bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

alt hash implementation (was: [PATCH] hash: declare some functions with


From: Eric Blake
Subject: alt hash implementation (was: [PATCH] hash: declare some functions with the warn_unused_result attribute)
Date: Wed, 17 Jun 2009 18:01:51 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

Eric Blake <ebb9 <at> byu.net> writes:

> 
> I got bored and attempted this.  This implementation still passes the m4 
> testsuite, although there might be corner cases not exercised by m4 (such as 
> shrinking the table on hash_delete) that I got wrong, so I'm posting it for 
> review now.  In particular, I think I need to use xalloc_oversized in more 
> places.

In fact, the m4 testsuite never even triggered a rehash (so guess what I'm 
patching m4 to do today ;), but running autoconf on coreutils caused a 
coredump, due to a rather stupid bug in my placement of the failure label.  
Here's what I squashed to fix things:

diff --git i/lib/hash.c w/lib/hash.c
index f52c2ef..8c43b0f 100644
--- i/lib/hash.c
+++ w/lib/hash.c
@@ -714,7 +714,7 @@ allocate_entry (Hash_table *table)
        {
          new_size = table->overflow_size;
          new_size += new_size / 2;
-         if (new_size < table->overflow_size)
+         if (xalloc_oversized (new_size, sizeof *table->overflow))
            return NULL;
        }
       new = realloc (table->overflow, new_size * sizeof *table->overflow);
@@ -888,10 +888,10 @@ hash_rehash (Hash_table *table, size_t candidate)
        needed += table->overflow_size;
        if (needed < table->overflow_size + table->overflow_size / 2)
          needed = table->overflow_size + table->overflow_size / 2;
-       if (needed < table->overflow_size)
-         goto fail;
        if (needed < DEFAULT_MXFAST / sizeof *cursor)
          needed = DEFAULT_MXFAST / sizeof *cursor;
+       if (xalloc_oversized (needed, sizeof *cursor))
+         goto fail;
        cursor = realloc (table->overflow, needed * sizeof *cursor);
        if (!cursor)
          goto fail;
@@ -905,9 +905,6 @@ hash_rehash (Hash_table *table, size_t candidate)
        table->overflow = cursor;
        table->overflow_size = needed;
       }
-  fail:
-    free (new_table);
-    return false;
   }

   /* Merely reuse the extra old space into the new table.  */
@@ -916,7 +913,7 @@ hash_rehash (Hash_table *table, size_t candidate)

   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     if (bucket->data)
-      for (cursor = bucket; cursor; cursor = next)
+      for (cursor = bucket; cursor != table->overflow; cursor = next)
        {
          void *data = cursor->data;
          struct hash_entry *new_bucket
@@ -934,7 +931,7 @@ hash_rehash (Hash_table *table, size_t candidate)
                {
                  /* Allocate or recycle an entry, when moving from a bucket
                     header into a bucket overflow.  */
-                 struct hash_entry *new_entry = allocate_entry (new_table);
+                 struct hash_entry *new_entry = allocate_entry (table);

                  if (new_entry == NULL
                      || table->overflow_size <= new_entry - table->overflow)
@@ -961,7 +958,7 @@ hash_rehash (Hash_table *table, size_t candidate)
              new_bucket->data = data;
              new_table->n_buckets_used++;
              if (cursor != bucket)
-               free_entry (new_table, cursor);
+               free_entry (table, cursor);
            }
        }

@@ -975,6 +972,10 @@ hash_rehash (Hash_table *table, size_t candidate)
   free (new_table);

   return true;
+
+ fail:
+  free (new_table);
+  return false;
 }

 /* If ENTRY matches an entry already in the hash table, return the pointer





And the current state of the patch (but more on that in the next email):

From: Eric Blake <address@hidden>
Date: Tue, 16 Jun 2009 16:38:52 -0600
Subject: [PATCH] hash: manage bucket overflows more efficiently

* lib/hash.c [USE_OBSTACK]: Delete; the optimizations provided by
an obstack have been folded in to mainline code.
(hash_entry): Change type of next.
(struct hash_table): Alter how overflow entries are tracked.
(hash_get_max_bucket_length, hash_table_ok, hash_lookup)
(hash_get_next, hash_get_entries, hash_do_for_each)
(hash_initialize, hash_clear, hash_free, allocate_entry)
(free_entry, hash_find_entry, hash_rehash, hash_insert)
(hash_delete, hash_print): Adjust all clients.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog  |   13 +++
 lib/hash.c |  293 ++++++++++++++++++++++++++++++++---------------------------
 2 files changed, 172 insertions(+), 134 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 48b7013..fa0b986 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2009-06-16  Eric Blake  <address@hidden>

+       hash: manage bucket overflows more efficiently
+       * lib/hash.c [USE_OBSTACK]: Delete; the optimizations provided by
+       an obstack have been folded in to mainline code.
+       (hash_entry): Change type of next.
+       (struct hash_table): Alter how overflow entries are tracked.
+       (hash_get_max_bucket_length, hash_table_ok, hash_lookup)
+       (hash_get_next, hash_get_entries, hash_do_for_each)
+       (hash_initialize, hash_clear, hash_free, allocate_entry)
+       (free_entry, hash_find_entry, hash_rehash, hash_insert)
+       (hash_delete, hash_print): Adjust all clients.
+
+2009-06-16  Eric Blake  <address@hidden>
+
        hash: more minor cleanups
        * lib/hash.h (hash_entry): Make opaque, by moving...
        * lib/hash.c (hash_entry): ...here.
diff --git a/lib/hash.c b/lib/hash.c
index 7115932..8c43b0f 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -20,10 +20,6 @@

 /* A generic hash table package.  */

-/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
-   of malloc.  If you change USE_OBSTACK, you have to recompile!  Also,
-   memory allocation failures in the obstack are not reliably tracked.  */
-
 #include <config.h>

 #include "hash.h"
@@ -33,24 +29,18 @@
 #include <stdio.h>
 #include <stdlib.h>

-#if USE_OBSTACK
-# include "obstack.h"
-# ifndef obstack_chunk_alloc
-#  define obstack_chunk_alloc malloc
-# endif
-# ifndef obstack_chunk_free
-#  define obstack_chunk_free free
-# endif
-#endif
-
 #ifndef SIZE_MAX
 # define SIZE_MAX ((size_t) -1)
 #endif

+/* The approximate size to use for initial allocation.  64 bytes is
+   the largest "small" request for the GNU C library malloc.  */
+enum { DEFAULT_MXFAST = 64 };
+
 struct hash_entry
   {
     void *data;
-    struct hash_entry *next;
+    size_t next; /* Index into overflow array, or 0 if end of chain.  */
   };

 struct hash_table
@@ -76,16 +66,13 @@ struct hash_table
     Hash_comparator comparator;
     Hash_data_freer data_freer;

-    /* A linked list of freed struct hash_entry structs.  */
-    struct hash_entry *free_entry_list;
-
-#if USE_OBSTACK
-    /* Whenever obstacks are used, it is possible to allocate all overflowed
-       entries into a single stack, so they all can be freed in a single
-       operation.  It is not clear if the speedup is worth the trouble.
-       Also, allocation failure is not reliably detected.  */
-    struct obstack entry_stack;
-#endif
+    /* An array holding all overflowed entries, and size of that
+       array.  If overflow_size is 0, then there is no overflow or
+       free entry storage; otherwise, overflow[0].next starts a list
+       of freed entries ready to be recycled, and overflow[0].data is
+       the address of the first unallocated entry.  */
+    struct hash_entry *overflow;
+    size_t overflow_size;
   };

 /* A hash table contains many internal entries, each holding a pointer to
@@ -188,7 +175,7 @@ hash_get_max_bucket_length (const Hash_table *table)
          struct hash_entry const *cursor = bucket;
          size_t bucket_length = 1;

-         while (cursor = cursor->next, cursor)
+         while (cursor->next && (cursor = table->overflow + cursor->next))
            bucket_length++;

          if (bucket_length > max_bucket_length)
@@ -199,7 +186,7 @@ hash_get_max_bucket_length (const Hash_table *table)
   return max_bucket_length;
 }

-/* Do a mild validation of a hash table, by traversing it and checking two
+/* Do a mild validation of a hash table, by traversing it and checking several
    statistics.  */

 bool
@@ -208,7 +195,16 @@ hash_table_ok (const Hash_table *table)
   struct hash_entry const *bucket;
   size_t n_buckets_used = 0;
   size_t n_entries = 0;
+  size_t max_length = hash_get_max_bucket_length (table);
+  size_t max_index = 0;

+  if (table->overflow)
+    {
+      bucket = (struct hash_entry *) table->overflow->data;
+      max_index = bucket - table->overflow;
+      if (table->overflow_size < max_index)
+       return false;
+    }
   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     {
       if (bucket->data)
@@ -218,14 +214,21 @@ hash_table_ok (const Hash_table *table)
          /* Count bucket head.  */
          n_buckets_used++;
          n_entries++;
+         if (max_index < cursor->next)
+           return false;

          /* Count bucket overflow.  */
-         while (cursor = cursor->next, cursor)
-           n_entries++;
+         while (cursor->next && (cursor = table->overflow + cursor->next))
+           {
+             if (max_index < cursor->next)
+               return false;
+             n_entries++;
+           }
        }
     }

-  if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
+  if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries
+      && (max_length <= 1 || max_length < table->overflow_size))
     return true;

   return false;
@@ -264,7 +267,8 @@ hash_lookup (const Hash_table *table, const void *entry)
   if (bucket->data == NULL)
     return NULL;

-  for (cursor = bucket; cursor; cursor = cursor->next)
+  for (cursor = bucket; cursor != table->overflow;
+       cursor = table->overflow + cursor->next)
     if (entry == cursor->data || table->comparator (entry, cursor->data))
       return cursor->data;

@@ -312,9 +316,10 @@ hash_get_next (const Hash_table *table, const void *entry)
     abort ();

   /* Find next entry in the same bucket.  */
-  for (cursor = bucket; cursor; cursor = cursor->next)
+  for (cursor = bucket; cursor != table->overflow;
+       cursor = table->overflow + cursor->next)
     if (cursor->data == entry && cursor->next)
-      return cursor->next->data;
+      return table->overflow[cursor->next].data;

   /* Find first entry in any subsequent bucket.  */
   while (++bucket < table->bucket_limit)
@@ -341,7 +346,8 @@ hash_get_entries (const Hash_table *table, void **buffer,
     {
       if (bucket->data)
        {
-         for (cursor = bucket; cursor; cursor = cursor->next)
+         for (cursor = bucket; cursor != table->overflow;
+              cursor = table->overflow + cursor->next)
            {
              if (counter >= buffer_size)
                return counter;
@@ -373,7 +379,8 @@ hash_do_for_each (const Hash_table *table, Hash_processor 
processor,
     {
       if (bucket->data)
        {
-         for (cursor = bucket; cursor; cursor = cursor->next)
+         for (cursor = bucket; cursor != table->overflow;
+              cursor = table->overflow + cursor->next)
            {
              if (!(*processor) (cursor->data, processor_data))
                return counter;
@@ -595,10 +602,8 @@ hash_initialize (size_t candidate, const Hash_tuning 
*tuning,
   table->comparator = comparator;
   table->data_freer = data_freer;

-  table->free_entry_list = NULL;
-#if USE_OBSTACK
-  obstack_init (&table->entry_stack);
-#endif
+  table->overflow = NULL;
+  table->overflow_size = 0;
   return table;

  fail:
@@ -620,10 +625,12 @@ hash_clear (Hash_table *table)
       if (bucket->data)
        {
          struct hash_entry *cursor;
-         struct hash_entry *next;
+         size_t next;

          /* Free the bucket overflow.  */
-         for (cursor = bucket->next; cursor; cursor = next)
+         for (cursor = table->overflow + bucket->next;
+              cursor != table->overflow;
+              cursor = table->overflow + next)
            {
              if (table->data_freer)
                (*table->data_freer) (cursor->data);
@@ -632,15 +639,15 @@ hash_clear (Hash_table *table)
              next = cursor->next;
              /* Relinking is done one entry at a time, as it is to be expected
                 that overflows are either rare or short.  */
-             cursor->next = table->free_entry_list;
-             table->free_entry_list = cursor;
+             cursor->next = table->overflow->next;
+             table->overflow->next = cursor - table->overflow;
            }

          /* Free the bucket head.  */
          if (table->data_freer)
            (*table->data_freer) (bucket->data);
          bucket->data = NULL;
-         bucket->next = NULL;
+         bucket->next = 0;
        }
     }

@@ -658,7 +665,6 @@ hash_free (Hash_table *table)
 {
   struct hash_entry *bucket;
   struct hash_entry *cursor;
-  struct hash_entry *next;

   /* Call the user data_freer function.  */
   if (table->data_freer && table->n_entries)
@@ -667,7 +673,8 @@ hash_free (Hash_table *table)
        {
          if (bucket->data)
            {
-             for (cursor = bucket; cursor; cursor = cursor->next)
+             for (cursor = bucket; cursor != table->overflow;
+                  cursor = table->overflow + cursor->next)
                {
                  (*table->data_freer) (cursor->data);
                }
@@ -675,30 +682,8 @@ hash_free (Hash_table *table)
        }
     }

-#if USE_OBSTACK
-
-  obstack_free (&table->entry_stack, NULL);
-
-#else
-
-  /* Free all bucket overflowed entries.  */
-  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
-    {
-      for (cursor = bucket->next; cursor; cursor = next)
-       {
-         next = cursor->next;
-         free (cursor);
-       }
-    }
-
-  /* Also reclaim the internal list of previously freed entries.  */
-  for (cursor = table->free_entry_list; cursor; cursor = next)
-    {
-      next = cursor->next;
-      free (cursor);
-    }
-
-#endif
+  /* Bulk free all overflows and free entry list.  */
+  free (table->overflow);

   /* Free the remainder of the hash table structure.  */
   free (table->bucket);
@@ -715,20 +700,50 @@ allocate_entry (Hash_table *table)
 {
   struct hash_entry *new;

-  if (table->free_entry_list)
+  if (!table->overflow ||
+      (table->overflow->next == 0
+       && ((struct hash_entry *) table->overflow->data
+          == table->overflow + table->overflow_size)))
+    {
+      /* Must reallocate a larger overflow table.  */
+      size_t new_size;
+
+      if (!table->overflow)
+       new_size = DEFAULT_MXFAST / sizeof *table->overflow;
+      else
+       {
+         new_size = table->overflow_size;
+         new_size += new_size / 2;
+         if (xalloc_oversized (new_size, sizeof *table->overflow))
+           return NULL;
+       }
+      new = realloc (table->overflow, new_size * sizeof *table->overflow);
+      if (!new)
+       return NULL;
+      if (!table->overflow)
+       {
+         new->next = 0;
+         new->data = new + 1;
+       }
+      else
+       new->data = new + table->overflow_size;
+      table->overflow = new;
+      table->overflow_size = new_size;
+    }
+
+  if (table->overflow->next)
     {
-      new = table->free_entry_list;
-      table->free_entry_list = new->next;
+      /* Recycle from the free list.  */
+      new = table->overflow + table->overflow->next;
+      table->overflow->next = new->next;
     }
   else
     {
-#if USE_OBSTACK
-      new = obstack_alloc (&table->entry_stack, sizeof *new);
-#else
-      new = malloc (sizeof *new);
-#endif
+      /* Allocate from the tail of overflow.  */
+      new = (struct hash_entry *) table->overflow->data;
+      table->overflow->data = new + 1;
     }
-
+  new->next = 0;
   return new;
 }

@@ -738,9 +753,12 @@ allocate_entry (Hash_table *table)
 static void
 free_entry (Hash_table *table, struct hash_entry *entry)
 {
+  size_t index = entry - table->overflow;
+  if (table->overflow_size <= index)
+    abort ();
   entry->data = NULL;
-  entry->next = table->free_entry_list;
-  table->free_entry_list = entry;
+  entry->next = table->overflow->next;
+  table->overflow->next = index;
 }

 /* This private function is used to help with insertion and deletion.  When
@@ -775,7 +793,7 @@ hash_find_entry (Hash_table *table, const void *entry,
        {
          if (bucket->next)
            {
-             struct hash_entry *next = bucket->next;
+             struct hash_entry *next = table->overflow + bucket->next;

              /* Bump the first overflow entry into the bucket head, then save
                 the previous first overflow entry for later recycling.  */
@@ -792,15 +810,15 @@ hash_find_entry (Hash_table *table, const void *entry,
     }

   /* Scan the bucket overflow.  */
-  for (cursor = bucket; cursor->next; cursor = cursor->next)
+  for (cursor = bucket; cursor->next; cursor = table->overflow + cursor->next)
     {
-      if ((*table->comparator) (entry, cursor->next->data))
+      if ((*table->comparator) (entry, table->overflow[cursor->next].data))
        {
-         void *data = cursor->next->data;
+         void *data = table->overflow[cursor->next].data;

          if (delete)
            {
-             struct hash_entry *next = cursor->next;
+             struct hash_entry *next = table->overflow + cursor->next;

              /* Unlink the entry to delete, then save the freed entry for later
                 recycling.  */
@@ -847,39 +865,55 @@ hash_rehash (Hash_table *table, size_t candidate)
      combined into one bucket during the rehash).  */
   {
     size_t needed = table->n_buckets_used - 1;
-    cursor = table->free_entry_list;
-    while (cursor)
+    size_t available;
+    if (table->overflow)
       {
-       cursor = cursor->next;
-       needed--;
+       cursor = (struct hash_entry *) table->overflow->data;
+       available = table->overflow_size - (cursor - table->overflow);
+       if (available < needed)
+         needed -= available;
+       else
+         needed = 0;
+
+       cursor = table->overflow;
+       while (needed && cursor->next
+              && (cursor = table->overflow + cursor->next))
+         needed--;
       }
-    while (needed--)
+    if (needed)
       {
-#if USE_OBSTACK
-       cursor = obstack_alloc (&table->entry_stack, sizeof *cursor);
-#else
-       cursor = malloc (sizeof *cursor);
-#endif
+       /* Reallocate to the largest among table->overflow_size+needed,
+          table->overflow_size*1.5, and DEFAULT_MXFAST, to guarantee
+          O(N) overall cost.  */
+       needed += table->overflow_size;
+       if (needed < table->overflow_size + table->overflow_size / 2)
+         needed = table->overflow_size + table->overflow_size / 2;
+       if (needed < DEFAULT_MXFAST / sizeof *cursor)
+         needed = DEFAULT_MXFAST / sizeof *cursor;
+       if (xalloc_oversized (needed, sizeof *cursor))
+         goto fail;
+       cursor = realloc (table->overflow, needed * sizeof *cursor);
        if (!cursor)
+         goto fail;
+       if (!table->overflow)
          {
-           free (new_table);
-           return false;
+           cursor->next = 0;
+           cursor->data = cursor + 1;
          }
-       cursor->next = table->free_entry_list;
-       table->free_entry_list = cursor;
+       else
+         cursor->data = cursor + table->overflow_size;
+       table->overflow = cursor;
+       table->overflow_size = needed;
       }
   }

   /* Merely reuse the extra old space into the new table.  */
-#if USE_OBSTACK
-  obstack_free (&new_table->entry_stack, NULL);
-  new_table->entry_stack = table->entry_stack;
-#endif
-  new_table->free_entry_list = table->free_entry_list;
+  new_table->overflow = table->overflow;
+  new_table->overflow_size = table->overflow_size;

   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     if (bucket->data)
-      for (cursor = bucket; cursor; cursor = next)
+      for (cursor = bucket; cursor != table->overflow; cursor = next)
        {
          void *data = cursor->data;
          struct hash_entry *new_bucket
@@ -889,7 +923,7 @@ hash_rehash (Hash_table *table, size_t candidate)
          if (! (new_bucket < new_table->bucket_limit))
            abort ();

-         next = cursor->next;
+         next = table->overflow + cursor->next;

          if (new_bucket->data)
            {
@@ -897,21 +931,22 @@ hash_rehash (Hash_table *table, size_t candidate)
                {
                  /* Allocate or recycle an entry, when moving from a bucket
                     header into a bucket overflow.  */
-                 struct hash_entry *new_entry = allocate_entry (new_table);
+                 struct hash_entry *new_entry = allocate_entry (table);

-                 if (new_entry == NULL)
+                 if (new_entry == NULL
+                     || table->overflow_size <= new_entry - table->overflow)
                    abort ();

                  new_entry->data = data;
                  new_entry->next = new_bucket->next;
-                 new_bucket->next = new_entry;
+                 new_bucket->next = new_entry - table->overflow;
                }
              else
                {
                  /* Merely relink an existing entry, when moving from a
                     bucket overflow into a bucket overflow.  */
                  cursor->next = new_bucket->next;
-                 new_bucket->next = cursor;
+                 new_bucket->next = cursor - table->overflow;
                }
            }
          else
@@ -923,7 +958,7 @@ hash_rehash (Hash_table *table, size_t candidate)
              new_bucket->data = data;
              new_table->n_buckets_used++;
              if (cursor != bucket)
-               free_entry (new_table, cursor);
+               free_entry (table, cursor);
            }
        }

@@ -932,14 +967,15 @@ hash_rehash (Hash_table *table, size_t candidate)
   table->bucket_limit = new_table->bucket_limit;
   table->n_buckets = new_table->n_buckets;
   table->n_buckets_used = new_table->n_buckets_used;
-  table->free_entry_list = new_table->free_entry_list;
-  /* table->n_entries already holds its value.  */
-#if USE_OBSTACK
-  table->entry_stack = new_table->entry_stack;
-#endif
+  /* table->n_entries, table->overflow, and table->overflow_size
+     already hold their values.  */
   free (new_table);

   return true;
+
+ fail:
+  free (new_table);
+  return false;
 }

 /* If ENTRY matches an entry already in the hash table, return the pointer
@@ -968,12 +1004,14 @@ hash_insert (Hash_table *table, const void *entry)

       if (new_entry == NULL)
        return NULL;
+      if (table->overflow_size <= new_entry - table->overflow)
+       abort ();

       /* Add ENTRY in the overflow of the bucket.  */

       new_entry->data = (void *) entry;
       new_entry->next = bucket->next;
-      bucket->next = new_entry;
+      bucket->next = new_entry - table->overflow;
       table->n_entries++;
       return (void *) entry;
     }
@@ -1060,21 +1098,7 @@ hash_delete (Hash_table *table, const void *entry)
              if (!hash_rehash (table, candidate))
                {
                  /* Failure to allocate memory in an attempt to
-                    shrink the table is not fatal.  But since memory
-                    is low, we can at least be kind and free any
-                    spare entries, rather than keeping them tied up
-                    in the free entry list.  */
-#if ! USE_OBSTACK
-                 struct hash_entry *cursor = table->free_entry_list;
-                 struct hash_entry *next;
-                 while (cursor)
-                   {
-                     next = cursor->next;
-                     free (cursor);
-                     cursor = next;
-                   }
-                 table->free_entry_list = NULL;
-#endif
+                    shrink the table is not fatal.  */
                }
            }
        }
@@ -1099,7 +1123,8 @@ hash_print (const Hash_table *table)
       if (bucket)
        printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));

-      for (cursor = bucket; cursor; cursor = cursor->next)
+      for (cursor = bucket; cursor != table->overflow;
+          cursor = table->overflow + cursor->next)
        {
          char const *s = cursor->data;
          /* FIXME */
-- 
1.6.3.2







reply via email to

[Prev in Thread] Current Thread [Next in Thread]