[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#36597: 27.0.50; rehash hash tables eagerly in pdumper
From: |
Eli Zaretskii |
Subject: |
bug#36597: 27.0.50; rehash hash tables eagerly in pdumper |
Date: |
Thu, 18 Jul 2019 08:39:20 +0300 |
> From: Paul Eggert <eggert@cs.ucla.edu>
> Date: Sun, 14 Jul 2019 07:39:08 -0700
> Cc: 36597@debbugs.gnu.org
>
> Although I like the simplicity of eager rehashing, I'm not yet sold on the
> performance implications. On my usual (cd lisp && make compile-always)
> benchmark, the patch hurt user+system CPU time performance by 1.5%.
> Admittedly
> just one benchmark, but still....
>
> Also, must we expose Vpdumper_hash_tables to Lisp? Surely it'd be better to
> keep
> it private to pdumper.c.
>
> I'll CC: to Daniel to see whether he has any insights about improvements in
> this
> area.
You didn't CC Daniel, AFAICT, so I did that now.
> PS. I ran that benchmark on my home desktop, an Intel Xeon E3-1225 v2 running
> Ubunto 18.04.2. To run it, I rebased your patch and also removed the
> no-longer-used PDUMPER_CHECK_REHASHING macro that my GCC complained about
> (wonder why that didn't happen for you?), resulting in the attached patch
> against current master 8ff09154a29a1151afb2902267ca35f89ebda73c.
>
> >From 96a36697b2f472f3a6b3138444659babd0d73f32 Mon Sep 17 00:00:00 2001
> From: Pip Cet <pipcet@gmail.com>
> Date: Sun, 14 Jul 2019 07:22:01 -0700
> Subject: [PATCH] Rehash hash tables eagerly after loading a portable dump
>
> * src/lisp.h (hash_rehash_needed_p): Remove. All uses removed.
> (hash_rehash_if_needed): Remove. All uses removed.
> (struct Lisp_Hash_Table): Remove comment about rehashing hash tables.
> * src/pdumper.c (thaw_hash_tables): New function.
> (hash_table_thaw): New function.
> (hash_table_freeze): New function.
> (dump_hash_table): Simplify.
> (dump_hash_table_list): New function.
> (hash_table_contents): New function.
> (Fdump_emacs_portable): Handle hash tables by eager rehashing.
> (pdumper_load): Restore hash tables.
> (init_pdumper_once): New function.
> (PDUMPER_CHECK_REHASHING): Remove.
> ---
> src/bytecode.c | 1 -
> src/composite.c | 1 -
> src/emacs.c | 1 +
> src/fns.c | 54 ++++----------
> src/lisp.h | 19 +----
> src/minibuf.c | 3 -
> src/pdumper.c | 188 ++++++++++++++++++++++++------------------------
> src/pdumper.h | 1 +
> 8 files changed, 108 insertions(+), 160 deletions(-)
>
> diff --git a/src/bytecode.c b/src/bytecode.c
> index 29dff44f00..9c72429e0c 100644
> --- a/src/bytecode.c
> +++ b/src/bytecode.c
> @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) ,
> Lisp_Object v1 = POP;
> ptrdiff_t i;
> struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
> - hash_rehash_if_needed (h);
>
> /* h->count is a faster approximation for HASH_TABLE_SIZE (h)
> here. */
> diff --git a/src/composite.c b/src/composite.c
> index 183062de46..49a285cff0 100644
> --- a/src/composite.c
> +++ b/src/composite.c
> @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header)
> composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
> {
> struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
> - hash_rehash_if_needed (h);
> Lisp_Object header = LGSTRING_HEADER (gstring);
> EMACS_UINT hash = h->test.hashfn (&h->test, header);
> if (len < 0)
> diff --git a/src/emacs.c b/src/emacs.c
> index ad661a081b..855b2c6715 100644
> --- a/src/emacs.c
> +++ b/src/emacs.c
> @@ -1560,6 +1560,7 @@ main (int argc, char **argv)
> if (!initialized)
> {
> init_alloc_once ();
> + init_pdumper_once ();
> init_obarray_once ();
> init_eval_once ();
> init_charset_once ();
> diff --git a/src/fns.c b/src/fns.c
> index 0497588689..b6134a314c 100644
> --- a/src/fns.c
> +++ b/src/fns.c
> @@ -4241,43 +4241,24 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
> {
> ptrdiff_t size = HASH_TABLE_SIZE (h);
>
> - /* These structures may have been purecopied and shared
> - (bug#36447). */
> - h->next = Fcopy_sequence (h->next);
> - h->index = Fcopy_sequence (h->index);
> - h->hash = Fcopy_sequence (h->hash);
> -
> /* Recompute the actual hash codes for each entry in the table.
> Order is still invalid. */
> for (ptrdiff_t i = 0; i < size; ++i)
> - if (!NILP (HASH_HASH (h, i)))
> - {
> - Lisp_Object key = HASH_KEY (h, i);
> - EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
> - set_hash_hash_slot (h, i, make_fixnum (hash_code));
> - }
> -
> - /* Reset the index so that any slot we don't fill below is marked
> - invalid. */
> - Ffillarray (h->index, make_fixnum (-1));
> + {
> + Lisp_Object key = HASH_KEY (h, i);
> + EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
> + set_hash_hash_slot (h, i, make_fixnum (hash_code));
> + }
>
> /* Rebuild the collision chains. */
> for (ptrdiff_t i = 0; i < size; ++i)
> - if (!NILP (HASH_HASH (h, i)))
> - {
> - EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i));
> - ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
> - set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
> - set_hash_index_slot (h, start_of_bucket, i);
> - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
> - }
> -
> - /* Finally, mark the hash table as having a valid hash order.
> - Do this last so that if we're interrupted, we retry on next
> - access. */
> - eassert (h->count < 0);
> - h->count = -h->count;
> - eassert (!hash_rehash_needed_p (h));
> + {
> + EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i));
> + ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
> + set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
> + set_hash_index_slot (h, start_of_bucket, i);
> + eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
> + }
> }
>
> /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
> @@ -4290,8 +4271,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object
> key, EMACS_UINT *hash)
> EMACS_UINT hash_code;
> ptrdiff_t start_of_bucket, i;
>
> - hash_rehash_if_needed (h);
> -
> hash_code = h->test.hashfn (&h->test, key);
> eassert ((hash_code & ~INTMASK) == 0);
> if (hash)
> @@ -4320,8 +4299,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key,
> Lisp_Object value,
> {
> ptrdiff_t start_of_bucket, i;
>
> - hash_rehash_if_needed (h);
> -
> eassert ((hash & ~INTMASK) == 0);
>
> /* Increment count after resizing because resizing may fail. */
> @@ -4355,8 +4332,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h,
> Lisp_Object key)
> ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
> ptrdiff_t prev = -1;
>
> - hash_rehash_if_needed (h);
> -
> for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
> 0 <= i;
> i = HASH_NEXT (h, i))
> @@ -4434,9 +4409,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool
> remove_entries_p)
> for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
> {
> /* Follow collision chain, removing entries that don't survive
> - this garbage collection. It's okay if hash_rehash_needed_p
> - (h) is true, since we're operating entirely on the cached
> - hash values. */
> + this garbage collection. */
> ptrdiff_t prev = -1;
> ptrdiff_t next;
> for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next)
> @@ -4881,7 +4854,6 @@ DEFUN ("hash-table-count", Fhash_table_count,
> Shash_table_count, 1, 1, 0,
> (Lisp_Object table)
> {
> struct Lisp_Hash_Table *h = check_hash_table (table);
> - hash_rehash_if_needed (h);
> return make_fixnum (h->count);
> }
>
> diff --git a/src/lisp.h b/src/lisp.h
> index 13014c82dc..d0e5c43c41 100644
> --- a/src/lisp.h
> +++ b/src/lisp.h
> @@ -2245,11 +2245,7 @@ #define DEFSYM(sym, name) /* empty */
>
> struct Lisp_Hash_Table
> {
> - /* Change pdumper.c if you change the fields here.
> -
> - IMPORTANT!!!!!!!
> -
> - Call hash_rehash_if_needed() before accessing. */
> + /* Change pdumper.c if you change the fields here. */
>
> /* This is for Lisp; the hash table code does not refer to it. */
> union vectorlike_header header;
> @@ -2363,19 +2359,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
>
> void hash_table_rehash (struct Lisp_Hash_Table *h);
>
> -INLINE bool
> -hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
> -{
> - return h->count < 0;
> -}
> -
> -INLINE void
> -hash_rehash_if_needed (struct Lisp_Hash_Table *h)
> -{
> - if (hash_rehash_needed_p (h))
> - hash_table_rehash (h);
> -}
> -
> /* Default size for hash tables if not specified. */
>
> enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 };
> diff --git a/src/minibuf.c b/src/minibuf.c
> index d9a6e15b05..e923ce2a43 100644
> --- a/src/minibuf.c
> +++ b/src/minibuf.c
> @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion,
> Stry_completion, 2, 3, 0,
> bucket = AREF (collection, idx);
> }
>
> - if (HASH_TABLE_P (collection))
> - hash_rehash_if_needed (XHASH_TABLE (collection));
> -
> while (1)
> {
> /* Get the next element of the alist, obarray, or hash-table. */
> diff --git a/src/pdumper.c b/src/pdumper.c
> index 03c00bf27b..d35d296d32 100644
> --- a/src/pdumper.c
> +++ b/src/pdumper.c
> @@ -107,17 +107,6 @@ #define VM_MS_WINDOWS 2
>
> #define DANGEROUS 0
>
> -/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
> - check, for each hash table it dumps, that the hash table means the
> - same thing after rehashing. */
> -#ifndef PDUMPER_CHECK_REHASHING
> -# if ENABLE_CHECKING
> -# define PDUMPER_CHECK_REHASHING 1
> -# else
> -# define PDUMPER_CHECK_REHASHING 0
> -# endif
> -#endif
> -
> /* We require an architecture in which all pointers are the same size
> and have the same layout, where pointers are either 32 or 64 bits
> long, and where bytes have eight bits --- that is, a
> @@ -393,6 +382,8 @@ dump_fingerprint (char const *label,
> The start of the cold region is always aligned on a page
> boundary. */
> dump_off cold_start;
> +
> + dump_off hash_list;
> };
>
> /* Double-ended singly linked list. */
> @@ -550,6 +541,8 @@ dump_fingerprint (char const *label,
> heap objects. */
> Lisp_Object bignum_data;
>
> + Lisp_Object hash_tables;
> +
> unsigned number_hot_relocations;
> unsigned number_discardable_relocations;
> };
> @@ -2622,68 +2615,64 @@ dump_vectorlike_generic (struct dump_context *ctx,
> return offset;
> }
>
> -/* Determine whether the hash table's hash order is stable
> - across dump and load. If it is, we don't have to trigger
> - a rehash on access. */
> -static bool
> -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
> +/* Return a list of (KEY . VALUE) pairs in the given hash table. */
> +static Lisp_Object
> +hash_table_contents (struct Lisp_Hash_Table *h)
> {
> - bool is_eql = hash->test.hashfn == hashfn_eql;
> - bool is_equal = hash->test.hashfn == hashfn_equal;
> - ptrdiff_t size = HASH_TABLE_SIZE (hash);
> - for (ptrdiff_t i = 0; i < size; ++i)
> - if (!NILP (HASH_HASH (hash, i)))
> + Lisp_Object contents = Qnil;
> + /* Make sure key_and_value ends up in the same order, charset.c
> + relies on it by expecting hash table indices to stay constant
> + across the dump. */
> + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i)
> + if (!NILP (HASH_HASH (h, i)))
> {
> - Lisp_Object key = HASH_KEY (hash, i);
> - bool key_stable = (dump_builtin_symbol_p (key)
> - || FIXNUMP (key)
> - || (is_equal && STRINGP (key))
> - || ((is_equal || is_eql) && FLOATP (key)));
> - if (!key_stable)
> - return false;
> + dump_push (&contents, HASH_VALUE (h, i));
> + dump_push (&contents, HASH_KEY (h, i));
> }
> + return CALLN (Fapply, Qvector, contents);
> +}
>
> - return true;
> +static dump_off
> +dump_hash_table_list (struct dump_context *ctx)
> +{
> + if (CONSP (ctx->hash_tables))
> + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables));
> + else
> + return 0;
> }
>
> -/* Return a list of (KEY . VALUE) pairs in the given hash table. */
> -static Lisp_Object
> -hash_table_contents (Lisp_Object table)
> +static void
> +hash_table_freeze (struct Lisp_Hash_Table *h)
> {
> - Lisp_Object contents = Qnil;
> - struct Lisp_Hash_Table *h = XHASH_TABLE (table);
> - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
> - if (!NILP (HASH_HASH (h, i)))
> - dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i)));
> - return Fnreverse (contents);
> + h->key_and_value = hash_table_contents (h);
> + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
> + h->count = nkeys;
> + if (nkeys == 0)
> + nkeys = 1;
> + h->index = h->next = h->hash = make_fixnum (nkeys);
> }
>
> -/* Copy the given hash table, rehash it, and make sure that we can
> - look up all the values in the original. */
> static void
> -check_hash_table_rehash (Lisp_Object table_orig)
> -{
> - hash_rehash_if_needed (XHASH_TABLE (table_orig));
> - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
> - eassert (XHASH_TABLE (table_rehashed)->count >= 0);
> - XHASH_TABLE (table_rehashed)->count *= -1;
> - eassert (XHASH_TABLE (table_rehashed)->count <= 0);
> - hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
> - eassert (XHASH_TABLE (table_rehashed)->count >= 0);
> - Lisp_Object expected_contents = hash_table_contents (table_orig);
> - while (!NILP (expected_contents))
> +hash_table_thaw (struct Lisp_Hash_Table *h)
> +{
> + Lisp_Object count = h->index;
> + h->index = Fmake_vector (h->index, make_fixnum (-1));
> + h->hash = Fmake_vector (h->hash, Qnil);
> + h->next = Fmake_vector (h->next, make_fixnum (-1));
> + Lisp_Object key_and_value = h->key_and_value;
> + h->next_free = -1;
> + if (XFIXNAT (count) <= 1)
> {
> - Lisp_Object key_value_pair = dump_pop (&expected_contents);
> - Lisp_Object key = XCAR (key_value_pair);
> - Lisp_Object expected_value = XCDR (key_value_pair);
> - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
> - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
> - eassert (EQ (expected_value, found_value));
> - Fremhash (key, table_rehashed);
> + h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)),
> Qnil);
> + ptrdiff_t i = 0;
> + while (i < ASIZE (key_and_value))
> + {
> + ASET (h->key_and_value, i, AREF (key_and_value, i));
> + i++;
> + }
> }
>
> - eassert (EQ (Fhash_table_count (table_rehashed),
> - make_fixnum (0)));
> + hash_table_rehash (h);
> }
>
> static dump_off
> @@ -2695,45 +2684,11 @@ dump_hash_table (struct dump_context *ctx,
> # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h."
> #endif
> const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object);
> - bool is_stable = dump_hash_table_stable_p (hash_in);
> - /* If the hash table is likely to be modified in memory (either
> - because we need to rehash, and thus toggle hash->count, or
> - because we need to assemble a list of weak tables) punt the hash
> - table to the end of the dump, where we can lump all such hash
> - tables together. */
> - if (!(is_stable || !NILP (hash_in->weak))
> - && ctx->flags.defer_hash_tables)
> - {
> - if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE)
> - {
> - eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
> - || offset == DUMP_OBJECT_NOT_SEEN);
> - /* We still want to dump the actual keys and values now. */
> - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
> - /* We'll get to the rest later. */
> - offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
> - dump_remember_object (ctx, object, offset);
> - dump_push (&ctx->deferred_hash_tables, object);
> - }
> - return offset;
> - }
> -
> - if (PDUMPER_CHECK_REHASHING)
> - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in,
> Lisp_Vectorlike));
> -
> struct Lisp_Hash_Table hash_munged = *hash_in;
> struct Lisp_Hash_Table *hash = &hash_munged;
>
> - /* Remember to rehash this hash table on first access. After a
> - dump reload, the hash table values will have changed, so we'll
> - need to rebuild the index.
> -
> - TODO: for EQ and EQL hash tables, it should be possible to rehash
> - here using the preferred load address of the dump, eliminating
> - the need to rehash-on-access if we can load the dump where we
> - want. */
> - if (hash->count > 0 && !is_stable)
> - hash->count = -hash->count;
> + hash_table_freeze (hash);
> + dump_push (&ctx->hash_tables, object);
>
> START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
> dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
> @@ -4140,6 +4095,19 @@ DEFUN ("dump-emacs-portable",
> || !NILP (ctx->deferred_hash_tables)
> || !NILP (ctx->deferred_symbols));
>
> + ctx->header.hash_list = ctx->offset;
> + dump_hash_table_list (ctx);
> +
> + do
> + {
> + dump_drain_deferred_hash_tables (ctx);
> + dump_drain_deferred_symbols (ctx);
> + dump_drain_normal_queue (ctx);
> + }
> + while (!dump_queue_empty_p (&ctx->dump_queue)
> + || !NILP (ctx->deferred_hash_tables)
> + || !NILP (ctx->deferred_symbols));
> +
> dump_sort_copied_objects (ctx);
>
> /* While we copy built-in symbols into the Emacs image, these
> @@ -5431,6 +5399,13 @@ pdumper_load (const char *dump_filename)
> for (int i = 0; i < ARRAYELTS (sections); ++i)
> dump_mmap_reset (§ions[i]);
>
> + if (header->hash_list)
> + {
> + struct Lisp_Vector *hash_tables =
> + ((struct Lisp_Vector *)(dump_base + header->hash_list));
> + XSETVECTOR (Vpdumper_hash_tables, hash_tables);
> + }
> +
> /* Run the functions Emacs registered for doing post-dump-load
> initialization. */
> for (int i = 0; i < nr_dump_hooks; ++i)
> @@ -5502,10 +5477,31 @@ DEFUN ("pdumper-stats", Fpdumper_stats,
> Spdumper_stats, 0, 0, 0,
>
>
>
> +static void thaw_hash_tables (void)
> +{
> + Lisp_Object hash_tables = Vpdumper_hash_tables;
> + ptrdiff_t i = 0;
> + while (i < ASIZE (hash_tables))
> + {
> + hash_table_thaw (XHASH_TABLE (AREF (hash_tables, i)));
> + i++;
> + }
> + Vpdumper_hash_tables = zero_vector;
> +}
> +
> +void
> +init_pdumper_once (void)
> +{
> + Vpdumper_hash_tables = zero_vector;
> + pdumper_do_now_and_after_load (thaw_hash_tables);
> +}
> +
> void
> syms_of_pdumper (void)
> {
> #ifdef HAVE_PDUMPER
> + DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables,
> + doc: /* A list of hash tables that need to be thawed after
> loading the pdump. */);
> defsubr (&Sdump_emacs_portable);
> defsubr (&Sdump_emacs_portable__sort_predicate);
> defsubr (&Sdump_emacs_portable__sort_predicate_copied);
> diff --git a/src/pdumper.h b/src/pdumper.h
> index ab2f426c1e..cfea06d33d 100644
> --- a/src/pdumper.h
> +++ b/src/pdumper.h
> @@ -248,6 +248,7 @@ pdumper_clear_marks (void)
> file was loaded. */
> extern void pdumper_record_wd (const char *);
>
> +void init_pdumper_once (void);
> void syms_of_pdumper (void);
>
> INLINE_HEADER_END
> --
> 2.17.1
>
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, (continued)
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Paul Eggert, 2019/07/14
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/14
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/15
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/19
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Eli Zaretskii, 2019/07/19
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/20
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Paul Eggert, 2019/07/20
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/21
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Paul Eggert, 2019/07/21
- bug#36597: 27.0.50; rehash hash tables eagerly in pdumper, Pip Cet, 2019/07/21
bug#36597: 27.0.50; rehash hash tables eagerly in pdumper,
Eli Zaretskii <=