[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 3/7] libihash: use linear probing and fast modulo operation
From: |
Samuel Thibault |
Subject: |
Re: [PATCH 3/7] libihash: use linear probing and fast modulo operation |
Date: |
Tue, 13 May 2014 23:12:33 +0200 |
User-agent: |
Mutt/1.5.21+34 (58baf7c9f32f) (2010-12-30) |
Justus Winter, le Tue 13 May 2014 21:02:52 +0200, a écrit :
> libihash uses open addressing. Previously, quadratic probing in both
> directions was used to resolve collisions. Quadratic probing might
> result in a less efficient use of caches.
>
> Also, prime numbers of the form 4 * i + 3 were used as array sizes.
> This was used in combination with the integer modulo operation for
> hashing. It has been known for some time that libihash suffers from
> collisions, so a integer hash function is now applied to the keys to
> derive the index.
>
> Use linear probing instead. Also, use powers of two for the array
> sizes, so a bit mask can be used for the modulo operation.
Ack.
> * libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove.
> (find_index): Use linear probing and fast modulo operation.
> (add_one): Likewise.
> * libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro.
> ---
> libihash/ihash.c | 125
> +++++++------------------------------------------------
> libihash/ihash.h | 4 ++
> 2 files changed, 19 insertions(+), 110 deletions(-)
>
> diff --git a/libihash/ihash.c b/libihash/ihash.c
> index 71ddc26..d628d75 100644
> --- a/libihash/ihash.c
> +++ b/libihash/ihash.c
> @@ -31,55 +31,6 @@
> #include <assert.h>
>
> #include "ihash.h"
> -
> -
> -/* The prime numbers of the form 4 * i + 3 for some i, all greater
> - than twice the previous one and smaller than 2^40 (for now). */
> -static const uint64_t ihash_sizes[] =
> -{
> - 3,
> - 7,
> - 19,
> - 43,
> - 103,
> - 211,
> - 431,
> - 863,
> - 1747,
> - 3499,
> - 7019,
> - 14051,
> - 28111,
> - 56239,
> - 112507,
> - 225023,
> - 450067,
> - 900139,
> - 1800311,
> - 3600659,
> - 7201351,
> - 14402743,
> - 28805519,
> - 57611039,
> - 115222091,
> - 230444239,
> - 460888499,
> - 921777067,
> - 1843554151,
> - UINT64_C (3687108307),
> - UINT64_C (7374216631),
> - UINT64_C (14748433279),
> - UINT64_C (29496866579),
> - UINT64_C (58993733159),
> - UINT64_C (117987466379),
> - UINT64_C (235974932759),
> - UINT64_C (471949865531),
> - UINT64_C (943899731087)
> -};
> -
> -static const unsigned int ihash_nsizes = (sizeof ihash_sizes
> - / sizeof ihash_sizes[0]);
> -
>
> /* This is the integer finalizer from MurmurHash3. */
> static inline uint32_t
> @@ -120,40 +71,24 @@ static inline int
> find_index (hurd_ihash_t ht, hurd_ihash_key_t key)
> {
> unsigned int idx;
> - unsigned int i;
> unsigned int up_idx;
> - unsigned int down_idx;
> + unsigned int mask = ht->size - 1;
>
> - idx = murmur3_mix32 (key, 32) % ht->size;
> + idx = murmur3_mix32 (key, __builtin_ctz (ht->size));
>
> if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key)
> return idx;
>
> - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2,
> - we add 1, 3, 5, 7, etc to the previous index. We do this in both
> - directions separately. */
> - i = 1;
> up_idx = idx;
> - down_idx = idx;
>
> do
> {
> - up_idx = (up_idx + i) % ht->size;
> + up_idx = (up_idx + 1) & mask;
> if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
> || ht->items[up_idx].key == key)
> return up_idx;
> -
> - if (down_idx < i)
> - down_idx += ht->size;
> - down_idx = (down_idx - i) % ht->size;
> - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
> - || ht->items[down_idx].key == key)
> - return down_idx;
> -
> - /* After (ht->size - 1) / 2 iterations, this will be 0. */
> - i = (i + 2) % ht->size;
> }
> - while (i);
> + while (up_idx != idx);
>
> /* If we end up here, the item could not be found. Return any
> invalid index. */
> @@ -277,52 +212,25 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key,
> hurd_ihash_value_t value)
> unsigned int idx;
> unsigned int first_free;
>
> - idx = murmur3_mix32 (key, 32) % ht->size;
> + idx = murmur3_mix32 (key, __builtin_ctz (ht->size));
> first_free = idx;
>
> if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key)
> {
> - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx +
> - i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index.
> - We do this in both directions separately. */
> - unsigned int i = 1;
> + unsigned int mask = ht->size - 1;
> unsigned int up_idx = idx;
> - unsigned int down_idx = idx;
> -
> +
> do
> {
> - up_idx = (up_idx + i) % ht->size;
> + up_idx = (up_idx + 1) & mask;
> if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
> || ht->items[up_idx].key == key)
> {
> idx = up_idx;
> break;
> }
> - if (first_free == idx
> - && ht->items[up_idx].value == _HURD_IHASH_DELETED)
> - first_free = up_idx;
> -
> - if (down_idx < i)
> - down_idx += ht->size;
> - down_idx = (down_idx - i) % ht->size;
> - if (down_idx < 0)
> - down_idx += ht->size;
> - else
> - down_idx %= ht->size;
> - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
> - || ht->items[down_idx].key == key)
> - {
> - idx = down_idx;
> - break;
> - }
> - if (first_free == idx
> - && ht->items[down_idx].value == _HURD_IHASH_DELETED)
> - first_free = down_idx;
> -
> - /* After (ht->size - 1) / 2 iterations, this will be 0. */
> - i = (i + 2) % ht->size;
> }
> - while (i);
> + while (up_idx != idx);
> }
>
> /* Remove the old entry for this key if necessary. */
> @@ -365,21 +273,18 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key,
> hurd_ihash_value_t item)
> if (ht->size)
> {
> /* Only fill the hash table up to its maximum load factor. */
> - if (ht->nr_items * 100 / ht->size <= ht->max_load)
> + if ((ht->nr_items * 100) >> __builtin_ctz (ht->size) <= ht->max_load)
> if (add_one (ht, key, item))
> return 0;
> }
>
> /* The hash table is too small, and we have to increase it. */
> - for (i = 0; i < ihash_nsizes; i++)
> - if (ihash_sizes[i] > old_ht.size)
> - break;
> - if (i == ihash_nsizes
> - || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item))
> - return ENOMEM; /* Surely will be true momentarily. */
> -
> ht->nr_items = 0;
> - ht->size = ihash_sizes[i];
> + if (ht->size == 0)
> + ht->size = HURD_IHASH_MIN_SIZE;
> + else
> + ht->size <<= 1;
> +
> /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely.
> */
> ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item));
>
> diff --git a/libihash/ihash.h b/libihash/ihash.h
> index a24f384..8829e51 100644
> --- a/libihash/ihash.h
> +++ b/libihash/ihash.h
> @@ -93,6 +93,10 @@ typedef struct hurd_ihash *hurd_ihash_t;
>
> /* Construction and destruction of hash tables. */
>
> +/* The size of the initial allocation in number of items. This must
> + be a power of two. */
> +#define HURD_IHASH_MIN_SIZE 32
> +
> /* The default value for the maximum load factor in percent. */
> #define HURD_IHASH_MAX_LOAD_DEFAULT 75
>
> --
> 2.0.0.rc0
>
--
Samuel
<b> il faut combien de chevaux pour tirer une doloréan à 88 morph ?
***b vient de remarque que 88 mph c'est 142 km/h
<y> aaaaah
<y> c'est pour ça qu'ils limitent à 130 km/h sur les autoroutes
<y> c'est pour éviter que les gens voyagent dans le temps
<b> probablement
- [PATCH 1/7] libihash: fix type of max_load, Justus Winter, 2014/05/13
- [PATCH 3/7] libihash: use linear probing and fast modulo operation, Justus Winter, 2014/05/13
- Re: [PATCH 3/7] libihash: use linear probing and fast modulo operation,
Samuel Thibault <=
- [PATCH 2/7] libihash: use an integer hash function on the keys, Justus Winter, 2014/05/13
- [PATCH 4/7] libihash: use fast binary scaling to determine the load, Justus Winter, 2014/05/13
- [PATCH 5/7] include: add lock-less reference counting primitives, Justus Winter, 2014/05/13
- [PATCH 7/7] libtrivfs: lock-less reference counting for trivfs_peropen objects, Justus Winter, 2014/05/13