bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 3/7] libihash: use linear probing and fast modulo operation


From: Justus Winter
Subject: [PATCH 3/7] libihash: use linear probing and fast modulo operation
Date: Tue, 13 May 2014 21:02:52 +0200

libihash uses open addressing.  Previously, quadratic probing in both
directions was used to resolve collisions.  Quadratic probing might
result in a less efficient use of caches.

Also, prime numbers of the form 4 * i + 3 were used as array sizes.
This was used in combination with the integer modulo operation for
hashing.  It has been known for some time that libihash suffers from
collisions, so a integer hash function is now applied to the keys to
derive the index.

Use linear probing instead.  Also, use powers of two for the array
sizes, so a bit mask can be used for the modulo operation.

* libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove.
(find_index): Use linear probing and fast modulo operation.
(add_one): Likewise.
* libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro.
---
 libihash/ihash.c | 125 +++++++------------------------------------------------
 libihash/ihash.h |   4 ++
 2 files changed, 19 insertions(+), 110 deletions(-)

diff --git a/libihash/ihash.c b/libihash/ihash.c
index 71ddc26..d628d75 100644
--- a/libihash/ihash.c
+++ b/libihash/ihash.c
@@ -31,55 +31,6 @@
 #include <assert.h>
 
 #include "ihash.h"
-
-
-/* The prime numbers of the form 4 * i + 3 for some i, all greater
-   than twice the previous one and smaller than 2^40 (for now).  */
-static const uint64_t ihash_sizes[] =
-{
-  3,
-  7,
-  19,
-  43,
-  103,
-  211,
-  431,
-  863,
-  1747,
-  3499,
-  7019,
-  14051,
-  28111,
-  56239,
-  112507,
-  225023,
-  450067,
-  900139,
-  1800311,
-  3600659,
-  7201351,
-  14402743,
-  28805519,
-  57611039,
-  115222091,
-  230444239,
-  460888499,
-  921777067,
-  1843554151,
-  UINT64_C (3687108307),
-  UINT64_C (7374216631),
-  UINT64_C (14748433279),
-  UINT64_C (29496866579),
-  UINT64_C (58993733159),
-  UINT64_C (117987466379),
-  UINT64_C (235974932759),
-  UINT64_C (471949865531),
-  UINT64_C (943899731087)
-};
-
-static const unsigned int ihash_nsizes = (sizeof ihash_sizes
-                                         / sizeof ihash_sizes[0]);
-
 
 /* This is the integer finalizer from MurmurHash3.  */
 static inline uint32_t
@@ -120,40 +71,24 @@ static inline int
 find_index (hurd_ihash_t ht, hurd_ihash_key_t key)
 {
   unsigned int idx;
-  unsigned int i;
   unsigned int up_idx;
-  unsigned int down_idx;
+  unsigned int mask = ht->size - 1;
 
-  idx = murmur3_mix32 (key, 32) % ht->size;
+  idx = murmur3_mix32 (key, __builtin_ctz (ht->size));
 
   if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key)
     return idx;
 
-  /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2,
-     we add 1, 3, 5, 7, etc to the previous index.  We do this in both
-     directions separately.  */
-  i = 1;
   up_idx = idx;
-  down_idx = idx;
 
   do
     {
-      up_idx = (up_idx + i) % ht->size;
+      up_idx = (up_idx + 1) & mask;
       if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
          || ht->items[up_idx].key == key)
        return up_idx;
-
-      if (down_idx < i)
-       down_idx += ht->size;
-      down_idx = (down_idx - i) % ht->size;
-      if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
-         || ht->items[down_idx].key == key)
-       return down_idx;
-
-      /* After (ht->size - 1) / 2 iterations, this will be 0.  */
-      i = (i + 2) % ht->size;
     }
-  while (i);
+  while (up_idx != idx);
 
   /* If we end up here, the item could not be found.  Return any
      invalid index.  */
@@ -277,52 +212,25 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, 
hurd_ihash_value_t value)
   unsigned int idx;
   unsigned int first_free;
 
-  idx = murmur3_mix32 (key, 32) % ht->size;
+  idx = murmur3_mix32 (key, __builtin_ctz (ht->size));
   first_free = idx;
 
   if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key)
     {
-      /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx +
-         i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index.
-         We do this in both directions separately.  */
-      unsigned int i = 1;
+      unsigned int mask = ht->size - 1;
       unsigned int up_idx = idx;
-      unsigned int down_idx = idx;
- 
+
       do
        {
-         up_idx = (up_idx + i) % ht->size;
+        up_idx = (up_idx + 1) & mask;
          if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
              || ht->items[up_idx].key == key)
            {
              idx = up_idx;
              break;
            }
-         if (first_free == idx
-             && ht->items[up_idx].value == _HURD_IHASH_DELETED)
-           first_free = up_idx;
-
-         if (down_idx < i)
-           down_idx += ht->size;
-         down_idx = (down_idx - i) % ht->size;
-         if (down_idx < 0)
-           down_idx += ht->size;
-         else
-           down_idx %= ht->size;
-         if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
-             || ht->items[down_idx].key == key)
-           {
-             idx = down_idx;
-             break;
-           }
-         if (first_free == idx
-             && ht->items[down_idx].value == _HURD_IHASH_DELETED)
-           first_free = down_idx;
-
-         /* After (ht->size - 1) / 2 iterations, this will be 0.  */
-         i = (i + 2) % ht->size;
        }
-      while (i);
+      while (up_idx != idx);
     }
 
   /* Remove the old entry for this key if necessary.  */
@@ -365,21 +273,18 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, 
hurd_ihash_value_t item)
   if (ht->size)
     {
       /* Only fill the hash table up to its maximum load factor.  */
-      if (ht->nr_items * 100 / ht->size <= ht->max_load)
+      if ((ht->nr_items * 100) >> __builtin_ctz (ht->size) <= ht->max_load)
        if (add_one (ht, key, item))
          return 0;
     }
 
   /* The hash table is too small, and we have to increase it.  */
-  for (i = 0; i < ihash_nsizes; i++)
-    if (ihash_sizes[i] > old_ht.size)
-      break;
-  if (i == ihash_nsizes
-      || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item))
-    return ENOMEM;             /* Surely will be true momentarily.  */
-
   ht->nr_items = 0;
-  ht->size = ihash_sizes[i];
+  if (ht->size == 0)
+      ht->size = HURD_IHASH_MIN_SIZE;
+  else
+      ht->size <<= 1;
+
   /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely.  */
   ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item));
 
diff --git a/libihash/ihash.h b/libihash/ihash.h
index a24f384..8829e51 100644
--- a/libihash/ihash.h
+++ b/libihash/ihash.h
@@ -93,6 +93,10 @@ typedef struct hurd_ihash *hurd_ihash_t;
 
 /* Construction and destruction of hash tables.  */
 
+/* The size of the initial allocation in number of items.  This must
+   be a power of two.  */
+#define HURD_IHASH_MIN_SIZE    32
+
 /* The default value for the maximum load factor in percent.  */
 #define HURD_IHASH_MAX_LOAD_DEFAULT 75
 
-- 
2.0.0.rc0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]