From db21b3e975ad7526e0db531fd4936354d8f031ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= Date: Sat, 10 Oct 2020 23:38:21 +0200 Subject: [PATCH] hamt: New module. This module provides (persistent) hash array mapped tries. * MODULES.html.sh: Add hamt. * lib/hamt.c: New file. * lib/hamt.h: New file. * modules/hamt: New file. * modules/hamt-tests: New file. * tests/test-hamt.c: New file. --- ChangeLog | 11 + MODULES.html.sh | 1 + lib/hamt.c | 984 +++++++++++++++++++++++++++++++++++++++++++++ lib/hamt.h | 182 +++++++++ modules/hamt | 29 ++ modules/hamt-tests | 11 + tests/test-hamt.c | 352 ++++++++++++++++ 7 files changed, 1570 insertions(+) create mode 100644 lib/hamt.c create mode 100644 lib/hamt.h create mode 100644 modules/hamt create mode 100644 modules/hamt-tests create mode 100644 tests/test-hamt.c diff --git a/ChangeLog b/ChangeLog index 2aba2b0c7..b9e4f9970 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2020-10-10 Marc Nieper-Wißkirchen + + hamt: New module. + This module provides (persistent) hash array mapped tries. + * MODULES.html.sh: Add hamt. + * lib/hamt.c: New file. + * lib/hamt.h: New file. + * modules/hamt: New file. + * modules/hamt-tests: New file. + * tests/test-hamt.c: New file. + 2020-10-10 Bruno Haible *-list, *-oset, *-omap: Avoid possible compiler warnings. diff --git a/MODULES.html.sh b/MODULES.html.sh index 7e7cdae3e..2907eb741 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2028,6 +2028,7 @@ func_all_modules () func_module hash-pjw func_module hash-pjw-bare func_module hash + func_module hamt func_module readline func_module readtokens func_module readtokens0 diff --git a/lib/hamt.c b/lib/hamt.c new file mode 100644 index 000000000..eb15e19c1 --- /dev/null +++ b/lib/hamt.c @@ -0,0 +1,984 @@ +/* (Persistent) hash array mapped tries. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Marc Nieper-Wißkirchen , 2020. */ + +#include +#include "hamt.h" + +#include +#include +#include +#include +#include "count-one-bits.h" +#include "verify.h" +#include "xalloc.h" + +/* Reference counters can be shared between different threads if the + entry they belong to is shared between different threads. + Operations on them therefore have to be atomic so that no invalid + state is observable. + + A thread must not modify an entry or its children (!) if its + reference count implies that the entry is shared by at least two + hamts. */ +typedef +#if GL_HAMT_THREAD_SAFE +_Atomic +#endif +size_t ref_counter; + +/* Hash values are of type size_t. For each level of the trie, we use + 5 bits (corresponding to lg2 of the width of a 32-bit word. */ +#define MAX_DEPTH ((SIZE_WIDTH + 4) / 5) + +/***************/ +/* Entry Types */ +/***************/ + +/* Leaf nodes are of type element. Non-leaf nodes are either subtries + or, if at maximal depth, buckets. The entry type is stored in the + lower two bits of the reference counter, whereas reference counters + for entries are incremented and decremented in multiples of 4. */ +enum entry_type +{ + element_entry = 0, + subtrie_entry = 1, + bucket_entry = 2 +}; + +/* Return the type an entry. */ +static enum entry_type +entry_type (const Hamt_entry *entry) +{ + return entry->ref_count & 3; +} + +/********************/ +/* Reference Counts */ +/********************/ + +/* Initialize the reference counter, storing its type. */ +static void +init_ref_counter (ref_counter *counter, enum entry_type type) +{ + *counter = 4 + type; +} + +/* Increase the reference counter of an entry. */ +static void +inc_ref_counter (ref_counter *counter) +{ + *counter += 4; +} + +/* Decrease the entry reference counter. Return false if the entry + can be deleted. */ +static bool +dec_ref_counter (ref_counter *counter) +{ + *counter -= 4; + return *counter >= 4; +} + +/**************/ +/* Structures */ +/**************/ + +/* Different generations of a hamt share a function table. */ +struct function_table +{ + Hamt_hasher *hasher; + Hamt_comparator *comparator; + Hamt_freer *freer; + ref_counter ref_count; +}; + +/* Different generations of a hamt share subtries. A singleton + subtrie is modelled as a single element. */ +struct subtrie +{ + ref_counter ref_count; + /* Nodes carry labels from 0 to 31. The i-th bit in MAP is set if + the node labelled i is present. */ + uint32_t map; + /* The length of the NODES array is the population count of MAP. + The order of the nodes corresponds to the order of the 1-bits in + MAP. */ + Hamt_entry *nodes [FLEXIBLE_ARRAY_MEMBER]; +}; + +/* Buckets are used when different elements have the same hash values. */ +struct bucket +{ + ref_counter ref_counter; + size_t elt_count; + Hamt_entry *elts [FLEXIBLE_ARRAY_MEMBER]; +}; + +/* A hamt consists of its function table and the root entry. */ +struct hamt +{ + struct function_table *functions; + /* The root entry is NULL for an empty HAMT. */ + Hamt_entry *root; +}; + +/*******************/ +/* Function Tables */ +/*******************/ + +/* Allocate and initialize a function table. */ +static struct function_table * +create_function_table (Hamt_hasher *hasher, Hamt_comparator *comparator, + Hamt_freer *freer) +{ + struct function_table *functions = XMALLOC (struct function_table); + functions->hasher = hasher; + functions->comparator = comparator; + functions->freer = freer; + functions->ref_count = 1; + return functions; +} + +/* Increment the reference count and return the function table. */ +static struct function_table * +copy_function_table (struct function_table *function_table) +{ + ++function_table->ref_count; + return function_table; +} + +/* Decrease the reference count and free the function table if the + reference count drops to zero. */ +static void +free_function_table (struct function_table *function_table) +{ + if (--function_table->ref_count) + return; + free (function_table); +} + +/************/ +/* Elements */ +/************/ + +/* Return an element's hash. */ +static size_t +hash_element (const struct function_table *functions, const Hamt_entry *elt) +{ + return functions->hasher (elt); +} + +/* Compare two elements. */ +static bool +compare_elements (const struct function_table *functions, + const Hamt_entry *elt1, const Hamt_entry *elt2) +{ + return functions->comparator (elt1, elt2); +} + +/* Free an element. */ +static void +free_element (const struct function_table *functions, Hamt_entry *elt) +{ + if (dec_ref_counter (&elt->ref_count)) + return; + functions->freer (elt); +} + +/* Return the initialized element. */ +static Hamt_entry * +init_element (Hamt_entry *elt) +{ + init_ref_counter (&elt->ref_count, element_entry); + return elt; +} + +/***********/ +/* Buckets */ +/***********/ + +/* Allocate a partially initialized bucket with a given number of elements. */ +static struct bucket * +alloc_bucket (size_t elt_count) +{ + struct bucket *bucket + = xmalloc (FLEXSIZEOF (struct bucket, elts, + sizeof (Hamt_entry) * elt_count)); + init_ref_counter (&bucket->ref_counter, bucket_entry); + bucket->elt_count = elt_count; + return bucket; +} + +/***********/ +/* Entries */ +/***********/ + +/* Return true if the entry is shared between two or more hamts. + Otherwise, return false. + + This procedure is used for destructive updates. If an entry and + all its parents are not shared, it can be updated destructively + without effecting other hamts. */ +static bool +is_shared (const Hamt_entry *entry) +{ + return entry->ref_count >= 8; +} + +/* Calculate and return the number of nodes in a subtrie. */ +static int +trienode_count (const struct subtrie *subtrie) +{ + return count_one_bits_l (subtrie->map); +} + +/* Allocate a partially initialized subtrie with a given number of nodes. */ +static struct subtrie * +alloc_subtrie (int node_count) +{ + struct subtrie *subtrie + = xmalloc (FLEXSIZEOF (struct subtrie, nodes, + sizeof (Hamt_entry) * node_count)); + init_ref_counter (&subtrie->ref_count, subtrie_entry); + return subtrie; +} + +/* Return a conceptually copy of an entry. */ +static Hamt_entry * +copy_entry (Hamt_entry *entry) +{ + inc_ref_counter (&entry->ref_count); + return entry; +} + +/* Return a new bucket that has the j-th element replaced. */ +static struct bucket * +replace_bucket_element (struct bucket *bucket, int j, Hamt_entry *elt) +{ + int n = bucket->elt_count; + struct bucket *new_bucket = alloc_bucket (n); + for (int k = 0; k < n; ++k) + if (k == j) + new_bucket->elts [k] = elt; + else + new_bucket->elts [k] = copy_entry (bucket->elts [k]); + return new_bucket; +} + +/* Return a new subtrie that has the j-th node replaced. */ +static struct subtrie * +replace_entry (struct subtrie *subtrie, int j, Hamt_entry *entry) +{ + int n = trienode_count (subtrie); + struct subtrie *new_subtrie = alloc_subtrie (n); + new_subtrie->map = subtrie->map; + for (int k = 0; k < n; ++k) + if (k == j) + new_subtrie->nodes [k] = entry; + else + new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]); + return new_subtrie; +} + +/* Return a new subtrie that has an entry labelled i inserted at + the j-th position. */ +static struct subtrie * +insert_entry (struct subtrie *subtrie, int i, int j, Hamt_entry *entry) +{ + int n = trienode_count (subtrie) + 1; + struct subtrie *new_subtrie = alloc_subtrie (n); + new_subtrie->map = subtrie->map | (1 << i); + for (int k = 0; k < n; ++k) + { + if (k < j) + new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]); + else if (k > j) + new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k - 1]); + else + new_subtrie->nodes [k] = entry; + } + return new_subtrie; +} + +/* Return a new entry that has the entry labelled i removed from + position j. */ +static Hamt_entry * +remove_subtrie_entry (struct subtrie *subtrie, int i, int j) +{ + int n = trienode_count (subtrie) - 1; + if (n == 1) + { + if (j == 0) + return copy_entry (subtrie->nodes [1]); + return copy_entry (subtrie->nodes [0]); + } + struct subtrie *new_subtrie = alloc_subtrie (n); + new_subtrie->map = subtrie->map & ~(1 << i); + for (int k = 0; k < n; ++k) + { + if (k < j) + new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]); + else if (k >= j) + new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k + 1]); + } + return (Hamt_entry *) new_subtrie; +} + +/* Return a new entry that has the entry at position j removed. */ +static Hamt_entry * +remove_bucket_entry (struct bucket *bucket, int j) +{ + int n = bucket->elt_count - 1; + if (n == 1) + { + if (j == 0) + return copy_entry (bucket->elts [1]); + return copy_entry (bucket->elts [0]); + } + struct bucket *new_bucket = alloc_bucket (n); + for (int k = 0; k < n; ++k) + { + if (k < j) + new_bucket->elts [k] = copy_entry (bucket->elts [k]); + else if (k >= j) + new_bucket->elts [k] = copy_entry (bucket->elts [k + 1]); + } + return (Hamt_entry *) new_bucket; +} + +/****************************/ +/* Creation and Destruction */ +/****************************/ + +/* Create a new, empty hash array mapped trie. */ +Hamt * +hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator, + Hamt_freer *freer) +{ + struct function_table *functions + = create_function_table (hasher, comparator, freer); + Hamt *hamt = XMALLOC (Hamt); + hamt->functions = functions; + hamt->root = NULL; + return hamt; +} + +/* Return a copy of the hamt. */ +Hamt * +hamt_copy (Hamt *hamt) +{ + Hamt *new_hamt = XMALLOC (Hamt); + new_hamt->functions = copy_function_table (hamt->functions); + new_hamt->root = hamt->root == NULL ? NULL : copy_entry (hamt->root); + return new_hamt; +} + +/* Free a bucket. */ +static void +free_bucket (struct function_table const *functions, struct bucket *bucket) +{ + if (dec_ref_counter (&bucket->ref_counter)) + return; + size_t elt_count = bucket->elt_count; + Hamt_entry *const *elts = bucket->elts; + for (size_t i = 0; i < elt_count; ++i) + free_element (functions, elts [i]); + free (bucket); +} + +/* Forward declaration. */ +static void free_subtrie (struct function_table const *functions, + struct subtrie *subtrie); + +/* Free an entry. */ +static void +free_entry (struct function_table const *functions, Hamt_entry *entry) +{ + switch (entry_type (entry)) + { + case element_entry: + free_element (functions, entry); + break; + case subtrie_entry: + free_subtrie (functions, (struct subtrie *) entry); + break; + case bucket_entry: + free_bucket (functions, (struct bucket *) entry); + break; + default: + assume (0); + } +} + +/* Free a trie recursively. */ +static void +free_subtrie (struct function_table const *functions, struct subtrie *subtrie) +{ + if (dec_ref_counter (&subtrie->ref_count)) + return; + int n = trienode_count (subtrie); + Hamt_entry **node_ptr = subtrie->nodes; + for (int j = 0; j < n; ++j) + free_entry (functions, *node_ptr++); + free (subtrie); +} + +/* Free a hamt. */ +void +hamt_free (Hamt *hamt) +{ + if (hamt->root != NULL) + free_entry (hamt->functions, hamt->root); + free_function_table (hamt->functions); + free (hamt); +} + +/**********/ +/* Lookup */ +/**********/ + +/* Lookup an element in a bucket. */ +static const Hamt_entry * +bucket_lookup (const struct function_table *functions, + const struct bucket *bucket, const Hamt_entry *elt) +{ + size_t elt_count = bucket->elt_count; + Hamt_entry *const *elts = bucket->elts; + for (size_t i = 0; i < elt_count; ++i) + { + if (compare_elements (functions, elt, elts [i])) + return *elts; + } + return NULL; +} + +/* Forward declaration. */ +static const Hamt_entry *entry_lookup (const struct function_table *functions, + const Hamt_entry *entry, + const Hamt_entry *elt, size_t hash); + +/* Lookup an element in a bucket. */ +static const Hamt_entry * +subtrie_lookup (const struct function_table *functions, + const struct subtrie *subtrie, const Hamt_entry *elt, + size_t hash) +{ + uint32_t map = subtrie->map; + int i = hash & 31; + + if (! (map & (1 << i))) + return NULL; + + int j = i == 0 ? 0 : count_one_bits (map << (32 - i)); + return entry_lookup (functions, subtrie->nodes [j], elt, hash >> 5); +} + +/* Lookup an element in an entry. */ +static const Hamt_entry * +entry_lookup (const struct function_table *functions, const Hamt_entry *entry, + const Hamt_entry *elt, size_t hash) +{ + switch (entry_type (entry)) + { + case element_entry: + if (compare_elements (functions, elt, entry)) + return entry; + return NULL; + case subtrie_entry: + return subtrie_lookup (functions, (struct subtrie *) entry, elt, hash); + case bucket_entry: + return bucket_lookup (functions, (struct bucket *) entry, elt); + default: + assume (0); + } +} + +/* If ELT matches an entry in HAMT, return this entry. Otherwise, + return NULL. */ +const Hamt_entry * +hamt_lookup (const Hamt *hamt, const Hamt_entry *elt) +{ + if (hamt->root == NULL) + return NULL; + + return entry_lookup (hamt->functions, hamt->root, elt, + hash_element (hamt->functions, elt)); +} + +/**************************/ +/* Insertion and Deletion */ +/**************************/ + +/* Create a bucket populated with two elements. */ +static struct bucket * +create_populated_bucket (Hamt_entry *elt1, Hamt_entry *elt2) +{ + struct bucket *bucket = alloc_bucket (2); + bucket->elts [0] = elt1; + bucket->elts [1] = elt2; + return bucket; +} + +/* Create a chain of subtrie nodes so that the resulting trie is + populated with exactly two elements. */ +static Hamt_entry * +create_populated_subtrie (Hamt_entry *elt1, Hamt_entry *elt2, size_t hash1, + size_t hash2, int depth) +{ + if (depth >= MAX_DEPTH) + return (Hamt_entry *) create_populated_bucket (elt1, elt2); + + struct subtrie *subtrie; + int i1 = hash1 & 31; + int i2 = hash2 & 31; + if (i1 != i2) + { + subtrie = alloc_subtrie (2); + subtrie->map = (1 << i1) | (1 << i2); + if (i1 < i2) + { + subtrie->nodes [0] = elt1; + subtrie->nodes [1] = elt2; + } + else + { + subtrie->nodes [0] = elt2; + subtrie->nodes [1] = elt1; + } + } + else + { + subtrie = alloc_subtrie (1); + subtrie->map = 1 << i1; + subtrie->nodes [0] + = create_populated_subtrie (elt1, elt2, hash1 >> 5, hash2 >> 5, + depth + 1); + } + return (Hamt_entry *) subtrie; +} + +/* Insert or replace an element in a bucket. */ +static struct bucket * +bucket_insert (const struct function_table *functions, struct bucket *bucket, + Hamt_entry **elt_ptr, bool replace, bool shared) +{ + size_t elt_count = bucket->elt_count; + Hamt_entry **elts = bucket->elts; + for (size_t i = 0; i < elt_count; ++i) + { + if (compare_elements (functions, *elt_ptr, elts [i])) + { + if (replace) + { + if (shared) + { + struct bucket *new_bucket + = replace_bucket_element (bucket, i, + init_element (*elt_ptr)); + *elt_ptr = elts [i]; + return new_bucket; + } + free_element (functions, elts [i]); + elts [i] = init_element (*elt_ptr); + return bucket; + } + *elt_ptr = *elt_ptr == elts [i] ? NULL : elts [i]; + return bucket; + } + } + struct bucket *new_bucket = alloc_bucket (elt_count + 1); + new_bucket->elts [0] = init_element (*elt_ptr); + for (size_t i = 0; i < elt_count; ++i) + { + new_bucket->elts [i + 1] = copy_entry (bucket->elts [i]); + } + if (replace) + *elt_ptr = NULL; + return new_bucket; +} + +/* Forward declaration. */ +static Hamt_entry *entry_insert (const struct function_table *functions, + Hamt_entry *subtrie, Hamt_entry **elt_ptr, + size_t hash, int depth, bool replace, + bool shared); + +/* Insert or replace an element in a subtrie. */ +static struct subtrie * +subtrie_insert (const struct function_table *functions, struct subtrie *subtrie, + Hamt_entry **elt_ptr, size_t hash, int depth, bool replace, + bool shared) +{ + uint32_t map = subtrie->map; + int i = hash & 31; + int j = i == 0 ? 0 : count_one_bits (map << (32 - i)); + if (map & (1 << i)) + { + Hamt_entry *entry = subtrie->nodes [j]; + Hamt_entry *new_entry + = entry_insert (functions, entry, elt_ptr, hash >> 5, depth + 1, + replace, shared); + if (new_entry != entry) + { + if (shared) + return replace_entry (subtrie, j, new_entry); + free_entry (functions, entry); + subtrie->nodes [j] = new_entry; + } + return subtrie; + } + Hamt_entry *entry = init_element (*elt_ptr); + if (replace) + *elt_ptr = NULL; + return insert_entry (subtrie, i, j, entry); +} + +/* Insert or replace an element in an entry. + + REPLACE is true if we want replace instead of insert semantics. + SHARED is false if a destructive update has been requested and none + of the parent nodes are shared. If an entry cannot be inserted + because the same entry with respect to pointer equality is already + present, *ELT_PTR is set to NULL to mark this special case. */ +static Hamt_entry * +entry_insert (const struct function_table *functions, Hamt_entry *entry, + Hamt_entry **elt_ptr, size_t hash, int depth, bool replace, + bool shared) +{ + shared |= is_shared (entry); + switch (entry_type (entry)) + { + case element_entry: + if (compare_elements (functions, *elt_ptr, entry)) + { + if (replace) + { + if (shared) + { + Hamt_entry *new_entry = init_element (*elt_ptr); + *elt_ptr = entry; + return new_entry; + } + return init_element (*elt_ptr); + } + *elt_ptr = *elt_ptr == entry ? NULL : entry; + return entry; + } + Hamt_entry *new_entry = init_element (*elt_ptr); + if (replace) + *elt_ptr = NULL; + return create_populated_subtrie (new_entry, copy_entry (entry), hash, + (hash_element (functions, entry) + >> (5 * depth)), depth); + case subtrie_entry: + return (Hamt_entry *) + subtrie_insert (functions, (struct subtrie *) entry, elt_ptr, hash, + depth, replace, shared); + case bucket_entry: + return (Hamt_entry *) + bucket_insert (functions, (struct bucket *) entry, elt_ptr, replace, + shared); + default: + assume (0); + } +} + +/* Insert or replace an element in the root. */ +static Hamt_entry * +root_insert (const struct function_table *functions, Hamt_entry *root, + Hamt_entry **elt_ptr, bool replace, bool shared) +{ + if (root == NULL) + return init_element (*elt_ptr); + + return entry_insert (functions, root, elt_ptr, + hash_element (functions, *elt_ptr), 0, replace, shared); +} + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the + element from the table and return HAMT. Otherwise, insert *ELT_PTR + into a copy of the HAMT and return the copy. */ +Hamt * +hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr) +{ + Hamt_entry *elt = *elt_ptr; + Hamt_entry *new_entry = root_insert (hamt->functions, hamt->root, + elt_ptr, false, true); + if (*elt_ptr == NULL) + *elt_ptr = elt; + + if (new_entry == hamt->root) + return hamt; + + Hamt *new_hamt = XMALLOC (Hamt); + new_hamt->functions = copy_function_table (hamt->functions); + new_hamt->root = new_entry; + return new_hamt; +} + +/* Insert *ELT_PTR into a copy of HAMT and return the copy. If an + existing element was replaced, set *ELT_PTR to this element, and to + NULL otherwise. */ +Hamt * +hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr) +{ + Hamt *new_hamt = XMALLOC (Hamt); + new_hamt->functions = copy_function_table (hamt->functions); + new_hamt->root = root_insert (hamt->functions, hamt->root, elt_ptr, true, + true); + return new_hamt; +} + +/* Delete an element in a bucket if found. */ +static Hamt_entry * +bucket_delete (const struct function_table *functions, struct bucket *bucket, + Hamt_entry **elt_ptr) +{ + size_t elt_count = bucket->elt_count; + Hamt_entry *const *elts = bucket->elts; + for (size_t i = 0; i < elt_count; ++i) + { + if (compare_elements (functions, *elt_ptr, elts [i])) + { + *elt_ptr = elts [i]; + return remove_bucket_entry (bucket, i); + } + } + *elt_ptr = NULL; + return (Hamt_entry *) bucket; +} + +/* Forward declaration. */ +static Hamt_entry *entry_delete (const struct function_table *functions, + Hamt_entry *entry, Hamt_entry **elt_ptr, + size_t hash, int depth, bool shared); + +/* Delete an element in a subtrie if found. */ +static Hamt_entry * +subtrie_delete (const struct function_table *functions, struct subtrie *subtrie, + Hamt_entry **elt_ptr, size_t hash, int depth, bool shared) +{ + uint32_t map = subtrie->map; + int i = hash & 31; + int j = i == 0 ? 0 : count_one_bits (map << (32 - i)); + if (map & (1 << i)) + { + Hamt_entry *entry = subtrie->nodes [j]; + Hamt_entry *new_entry + = entry_delete (functions, entry, elt_ptr, hash >> 5, depth + 1, + shared); + if (new_entry == NULL) + return remove_subtrie_entry (subtrie, i, j); + if (new_entry != entry) + { + if (shared) + return (Hamt_entry *) replace_entry (subtrie, j, new_entry); + free_entry (functions, entry); + subtrie->nodes [j] = new_entry; + } + return (Hamt_entry *) subtrie; + } + *elt_ptr = NULL; + return (Hamt_entry *) subtrie; +} + +/* Delete an element in an entry if found. + + SHARED is false if a destructive update has been requested and none + of the parent nodes are shared. If an entry cannot be + deleted, *ELT_PTR is set to NULL. */ +static Hamt_entry * +entry_delete (const struct function_table *functions, Hamt_entry *entry, + Hamt_entry **elt_ptr, size_t hash, int depth, bool shared) +{ + shared |= is_shared (entry); + switch (entry_type (entry)) + { + case element_entry: + if (compare_elements (functions, *elt_ptr, entry)) + { + *elt_ptr = entry; + return NULL; + } + *elt_ptr = NULL; + return entry; + case subtrie_entry: + return subtrie_delete (functions, (struct subtrie *) entry, elt_ptr, hash, + depth, shared); + case bucket_entry: + return bucket_delete (functions, (struct bucket *) entry, elt_ptr); + default: + assume (0); + } +} + +/* Delete an element in the root. */ +static Hamt_entry * +root_delete (const struct function_table *functions, Hamt_entry *root, + Hamt_entry **elt_ptr, bool shared) +{ + if (root == NULL) + return NULL; + + return entry_delete (functions, root, elt_ptr, + hash_element (functions, *elt_ptr), 0, shared); +} + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the +element from the table, remove the element from a copy of the hamt and +return the copy. Otherwise, return HAMT. */ +Hamt * +hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr) +{ + Hamt_entry *elt = *elt_ptr; + Hamt_entry *new_entry = root_delete (hamt->functions, hamt->root, elt_ptr, + true); + if (*elt_ptr == NULL) + *elt_ptr = elt; + + if (new_entry == hamt->root) + return hamt; + + Hamt *new_hamt = XMALLOC (Hamt); + new_hamt->functions = copy_function_table (hamt->functions); + new_hamt->root = new_entry; + return new_hamt; +} + +/*************/ +/* Iteration */ +/*************/ + +/* Walk a bucket. */ +static size_t +bucket_do_while (const struct bucket *bucket, Hamt_processor *proc, void *data, + bool *success) +{ + size_t cnt = 0; + size_t elt_count = bucket->elt_count; + Hamt_entry *const *elts = bucket->elts; + for (size_t i = 0; i < elt_count; ++i) + { + *success = proc (elts [i], data); + if (!success) + return cnt; + ++cnt; + } + return cnt; +} + +/* Forward declaration. */ +static size_t entry_do_while (const Hamt_entry *entry, Hamt_processor *proc, + void *data, bool *success); + +/* Walk a subtrie. */ +static size_t subtrie_do_while (const struct subtrie *subtrie, + Hamt_processor *proc, void *data, bool *success) +{ + size_t cnt = 0; + int n = trienode_count (subtrie); + Hamt_entry *const *node_ptr = subtrie->nodes; + for (int j = 0; j < n; ++j) + { + cnt += entry_do_while (*node_ptr++, proc, data, success); + if (!success) + return cnt; + } + return cnt; +} + +/* Walk an entry. */ +static size_t +entry_do_while (const Hamt_entry *entry, Hamt_processor *proc, void *data, + bool *success) +{ + switch (entry_type (entry)) + { + case element_entry: + *success = proc (entry, data); + return *success ? 1 : 0; + case subtrie_entry: + return subtrie_do_while ((struct subtrie *) entry, proc, data, success); + case bucket_entry: + return bucket_do_while ((struct bucket *) entry, proc, data, success); + default: + assume (0); + } +} + +/* Call PROC for every entry of the hamt until it returns false. The + first argument of PROC is the entry, the second argument is the value + of DATA as received. Return the number of calls that returned + true. */ +size_t +hamt_do_while (const Hamt *hamt, Hamt_processor *proc, void *data) +{ + if (hamt->root == NULL) + return 0; + + bool success = true; + return entry_do_while (hamt->root, proc, data, &success); +} + +/***********************/ +/* Destructive Updates */ +/***********************/ + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the + element from the table and return false. Otherwise, insert *ELT_PTR + destructively into the hamt and return true. */ +bool +hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr) +{ + Hamt_entry *elt = *elt_ptr; + Hamt_entry *old_root = hamt->root; + hamt->root = root_insert (hamt->functions, old_root, elt_ptr, false, false); + if (old_root != hamt->root && old_root != NULL) + free_entry (hamt->functions, old_root); + if (*elt_ptr == NULL) + { + *elt_ptr = elt; + return false; + } + return *elt_ptr == elt; +} + +/* Insert ELT destructively into HAMT. If an existing element was + replaced, return true. Otherwise, return false. */ +bool +hamt_replace_x (Hamt *hamt, Hamt_entry *elt) +{ + Hamt_entry *old_root = hamt->root; + hamt->root = root_insert (hamt->functions, old_root, &elt, true, false); + if (old_root != hamt->root && old_root != NULL) + free_entry (hamt->functions, old_root); + return elt != NULL; +} + +/* If ELT matches an element already in HAMT, remove the element + destructively from the hamt and return true. Otherwise, return + false. */ +bool +hamt_delete_x (Hamt *hamt, Hamt_entry *elt) +{ + Hamt_entry *old_root = hamt->root; + hamt->root = root_delete (hamt->functions, old_root, &elt, false); + if (old_root != hamt->root) + free_entry (hamt->functions, old_root); + return elt != NULL; +} diff --git a/lib/hamt.h b/lib/hamt.h new file mode 100644 index 000000000..c37314f55 --- /dev/null +++ b/lib/hamt.h @@ -0,0 +1,182 @@ +/* (Persistent) hash array mapped tries. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Marc Nieper-Wißkirchen , 2020. */ + +/* This module provides a persistent version of hash array mapped + tries (hamts) that can be used in place of hash tables when pure + (functional) operations are needed. + + A hash function and an equivalence predicate has to be provided for + the elements that can be inserted, replaced and deleted in a hamt. + A hamt cannot contain duplicates that compare equal. + + Each non-destructive updating operation returns a new hamt, which + shares structure with the original one. Destructive updates only + effect the hamt, on which the destructive operation is applied. + For example, given a hamt HAMT1, any non-destructive update + operation (e.g. hamt_insert) will result in a new hamt HAMT2. + Whatever further operations (destructive or not, including freeing + a hamt) are applied to HAMT1 won't change HAMT2 and vice versa. To + free all the memory, hash_free has therefore to be applied to both + HAMT1 and HAMT2. + + If persistence is not needed, transient hash tables are probably + faster. + + See also: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience + Department, École Polytechnique Fédérale de Lausanne. + + http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf */ + +#ifndef _GL_HAMT_H +#define _GL_HAMT_H + +/* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts + is thread-safe as long as two threads do not simultaneously access + the same hamt. This is non-trivial as different hamts may share + some structure. */ +#if __STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__ +# define GL_HAMT_THREAD_SAFE 0 +#else +# define GL_HAMT_THREAD_SAFE 1 +#endif + +#include +#include + +/* A hamt stores pointers to elements. Each element has to be a + struct whose initial member is of the type Hamt_entry. An element + is conceptually owned by a hamt as soon as it is inserted. It will + be automatically freed as soon as the last hamt containing it is + freed. */ +typedef struct +{ +#ifdef GL_HAMT_THREAD_SAFE + _Atomic +#endif + size_t ref_count; +} Hamt_entry; + +/*************************/ +/* Opaque Hamt Structure */ +/*************************/ + +/* In user-code, hamts are accessed through pointers to the opaque + Hamt type. Two hamts are said to be the same if and only if their + pointers are equal. */ +typedef struct hamt Hamt; + +/******************/ +/* Function Types */ +/******************/ + +/* A hash function has to be pure, and two elements that compare equal + have to have the same hash value. For a hash function to be a good + one, it is important that it uses all SIZE_WIDTH bits in + size_t. */ +typedef size_t (Hamt_hasher) (const Hamt_entry *elt); + +/* A comparision function has to be pure, and two elements that have + equal pointers have to compare equal. */ +typedef bool (Hamt_comparator) (const Hamt_entry *elt1, const Hamt_entry *elt2); + +/* A user-defined function that is called when the last hamt + containing a particular element is freed. */ +typedef void (Hamt_freer) (Hamt_entry *elt); + +/* A processor function is called during walking of a hamt. */ +typedef bool (Hamt_processor) (const Hamt_entry *elt, void *data); + + +/****************************/ +/* Creation and Destruction */ +/****************************/ + +/* Create and return a new and empty hash array mapped trie. */ +extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator, + Hamt_freer *freer) + _GL_ATTRIBUTE_NODISCARD; + +/* Return a copy of HAMT, which is not the same in the sense above. + This procedure can be used, for example, so that two threads can + access the same data independently. */ +extern Hamt *hamt_copy (Hamt *hamt) _GL_ATTRIBUTE_NODISCARD; + +/* Free the resources solely allocated by HAMT and all elements solely + contained in it. */ +extern void hamt_free (Hamt *hamt); + +/**********/ +/* Lookup */ +/**********/ + +/* If ELT matches an entry in HAMT, return this entry. Otherwise, + return NULL. */ +extern const Hamt_entry *hamt_lookup (const Hamt *hamt, const Hamt_entry *elt); + +/**************************/ +/* Insertion and Deletion */ +/**************************/ + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the + existing element and return the original hamt. Otherwise, insert + *ELT_PTR into a copy of the hamt and return the copy. */ +extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr) + _GL_ATTRIBUTE_NODISCARD; + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the +existing element, remove the element from a copy of the hamt and +return the copy. Otherwise, return the original hamt. */ +extern Hamt *hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr) + _GL_ATTRIBUTE_NODISCARD; + +/* Insert *ELT_PTR into a copy of HAMT and return the copy. If an + existing element was replaced, set *ELT_PTR to this element, and to + NULL otherwise. */ +extern Hamt *hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr) + _GL_ATTRIBUTE_NODISCARD; + +/*************/ +/* Iteration */ +/*************/ + +/* Call PROC for every entry of the hamt until it returns false. The + first argument to the processor is the entry, the second argument + is the value of DATA as received. Return the number of calls that + returned true. */ +extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc, + void *data); + +/***********************/ +/* Destructive Updates */ +/***********************/ + +/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the + element from the table and return false. Otherwise, insert *ELT_PTR + destructively into the hamt and return true. */ +extern bool hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr); + +/* Insert ELT destructively into HAMT. If an existing element was + replaced, return true. Otherwise, return false. */ +extern bool hamt_replace_x (Hamt *hamt, Hamt_entry *elt); + +/* If ELT matches an element already in HAMT, remove the element + destructively from the hamt and return true. Otherwise, return + false. */ +extern bool hamt_delete_x (Hamt *hamt, Hamt_entry *elt); + +#endif /* _GL_HAMT_H */ diff --git a/modules/hamt b/modules/hamt new file mode 100644 index 000000000..d73f09c2d --- /dev/null +++ b/modules/hamt @@ -0,0 +1,29 @@ +Description: +Persistent hash array mapped tries. + +Files: +lib/hamt.h +lib/hamt.c + +Depends-on: +count-one-bits +flexmember +inttypes-incomplete +stdbool +stdint +verify +xalloc + +configure.ac: + +Makefile.am: +lib_SOURCES += hamt.c + +Include: +"hamt.h" + +License: +GPL + +Maintainer: +Marc Nieper-Wisskirchen diff --git a/modules/hamt-tests b/modules/hamt-tests new file mode 100644 index 000000000..f4f0ea4e0 --- /dev/null +++ b/modules/hamt-tests @@ -0,0 +1,11 @@ +Files: +tests/test-hamt.c +tests/macros.h + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-hamt +check_PROGRAMS += test-hamt diff --git a/tests/test-hamt.c b/tests/test-hamt.c new file mode 100644 index 000000000..9c068c404 --- /dev/null +++ b/tests/test-hamt.c @@ -0,0 +1,352 @@ +/* Test of persistent hash array mapped trie implementation. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Marc Nieper-Wißkirchen , 2020. */ + +#include + +#include "hamt.h" +#include "macros.h" +#include "xalloc.h" + +typedef struct +{ + Hamt_entry entry; + int val; +} Element; + +static int +entry_value (const Hamt_entry *elt) +{ + return ((Element *) elt)->val; +} + +static size_t +hash_element (const Hamt_entry *elt) +{ + return entry_value (elt) & ~3; /* We drop the last bits so that we + can test hash collisions. */ +} + +static bool +compare_element (const Hamt_entry *elt1, const Hamt_entry *elt2) +{ + return entry_value (elt1) == entry_value (elt2); +} + +static void +free_element (Hamt_entry *elt) +{ + free (elt); +} + +static Hamt_entry * +make_element (int n) +{ + Element *elt = XMALLOC (Element); + elt->val = n; + return &elt->entry; +} + +static Hamt * +test_hamt_create (void) +{ + return hamt_create (hash_element, compare_element, free_element); +} + + +static int sum = 0; +static int flag; + +static bool +proc (const Hamt_entry *elt, void *data) +{ + if (data == &flag) + { + sum += entry_value (elt); + return true; + } + if (sum > 0) + { + sum = 0; + return true; + } + return false; +} + +static void +test_general (void) +{ + Hamt *hamt = test_hamt_create (); + + Hamt_entry *x5 = make_element (5); + Hamt_entry *p = x5; + Hamt *hamt1 = hamt_insert (hamt, &p); + ASSERT (hamt1 != hamt); + ASSERT (hamt_lookup (hamt, x5) == NULL); + ASSERT (hamt_lookup (hamt1, x5) == x5); + hamt_free (hamt); + + Hamt_entry *y5 = make_element (5); + p = y5; + Hamt *hamt2 = hamt_insert (hamt1, &p); + ASSERT (hamt2 == hamt1); + ASSERT (p == x5); + ASSERT (hamt_lookup (hamt1, y5) == x5); + + p = y5; + hamt = hamt_replace (hamt1, &p); + ASSERT (p == x5); + ASSERT (hamt_lookup (hamt, y5) == y5); + hamt_free (hamt); + y5 = make_element (5); + + Hamt_entry *z37 = make_element (37); + p = z37; + hamt2 = hamt_insert (hamt1, &p); + ASSERT (hamt2 != hamt1); + ASSERT (p == z37); + ASSERT (hamt_lookup (hamt1, z37) == NULL); + ASSERT (hamt_lookup (hamt2, z37) == z37); + hamt_free (hamt1); + + ASSERT (hamt_lookup (hamt2, x5) == x5); + ASSERT (hamt_lookup (hamt2, z37) == z37); + + ASSERT (hamt_do_while (hamt2, proc, &flag) == 2); + ASSERT (sum == 42); + ASSERT (hamt_do_while (hamt2, proc, NULL) == 1); + ASSERT (sum == 0); + + p = y5; + hamt1 = hamt_delete (hamt2, &p); + ASSERT (hamt1 != hamt2); + ASSERT (p == x5); + + ASSERT (hamt_lookup (hamt1, x5) == NULL); + ASSERT (hamt_lookup (hamt2, x5) == x5); + + hamt_free (hamt1); + Hamt_entry *x4 = make_element (4); + hamt1 = hamt_insert (hamt2, &x4); + hamt_free (hamt2); + Hamt_entry *x6 = make_element (6); + hamt2 = hamt_insert (hamt1, &x6); + hamt_free (hamt1); + ASSERT (hamt_do_while (hamt2, proc, &flag) == 4); + ASSERT (sum == 52); + + hamt1 = hamt_delete (hamt2, &x4); + sum = 0; + ASSERT (hamt_do_while (hamt2, proc, &flag) == 4); + ASSERT (sum = 52); + sum = 0; + ASSERT (hamt_do_while (hamt1, proc, &flag) == 3); + ASSERT (sum = 48); + + hamt_free (hamt1); + hamt_free (hamt2); + free_element (y5); +} + +static bool +true_processor (_GL_ATTRIBUTE_MAYBE_UNUSED const Hamt_entry *elt, + _GL_ATTRIBUTE_MAYBE_UNUSED void *data) +{ + return true; +} + +static size_t +element_count (Hamt *hamt) +{ + return hamt_do_while (hamt, true_processor, NULL); +} + +struct find_values_context +{ + size_t n; + int *elts; + bool *found; +}; + +static bool +find_values_processor (const Hamt_entry *entry, void *data) +{ + struct find_values_context *ctx = data; + int val = entry_value (entry); + for (size_t i = 0; i < ctx->n; ++i) + if (ctx->elts [i] == val && !ctx->found [i]) + { + ctx->found [i] = true; + return true; + } + return false; +} + +static bool +find_values (Hamt *hamt, size_t n, int *elts) +{ + bool *found = XCALLOC (n, bool); + struct find_values_context ctx = {n, elts, found}; + bool res = hamt_do_while (hamt, find_values_processor, &ctx) == n; + free (found); + return res; +} + +static size_t +insert_values (Hamt **hamt, size_t n, int *elts, bool destructive) +{ + size_t cnt = 0; + for (size_t i = 0; i < n; ++i) + { + Hamt_entry *p = make_element (elts [i]); + Hamt_entry *q = p; + if (destructive) + { + if (hamt_insert_x (*hamt, &p)) + ++cnt; + else + free_element (q); + } + else + { + Hamt *new_hamt = hamt_insert (*hamt, &p); + if (new_hamt != *hamt) + { + hamt_free (*hamt); + *hamt = new_hamt; + ++cnt; + } + else + { + free_element (q); + } + } + } + return cnt; +} + +static size_t +replace_values (Hamt **hamt, size_t n, int *elts, bool destructive) +{ + size_t cnt = 0; + for (size_t i = 0; i < n; ++i) + { + Hamt_entry *p = make_element (elts [i]); + if (destructive) + { + if (hamt_replace_x (*hamt, p)) + ++cnt; + } + else + { + Hamt *new_hamt = hamt_replace (*hamt, &p); + hamt_free (*hamt); + *hamt = new_hamt; + if (p != NULL) + ++cnt; + } + } + return cnt; +} + +static size_t +delete_values (Hamt **hamt, size_t n, int *elts, bool destructive) +{ + size_t cnt = 0; + for (size_t i = 0; i < n; ++i) + { + Hamt_entry *p = make_element (elts [i]); + Hamt_entry *q = p; + if (destructive) + { + if (hamt_delete_x (*hamt, p)) + ++cnt; + } + else + { + Hamt *new_hamt = hamt_delete (*hamt, &p); + if (new_hamt != *hamt) + { + hamt_free (*hamt); + *hamt = new_hamt; + ++cnt; + } + } + free (q); + } + return cnt; +} + +static int val_array1 [10] = {1, 2, 3, 4, 33, 34, 35, 36, 1024, 1025}; +static int val_array2 [10] = {1, 2, 34, 36, 1025, 32768, 32769, 32770, 32771, + 32772}; + +static void +test_functional_update (void) +{ + Hamt *hamt = test_hamt_create (); + + ASSERT (insert_values (&hamt, 10, val_array1, false) == 10); + ASSERT (element_count (hamt) == 10); + ASSERT (find_values (hamt, 10, val_array1)); + ASSERT (insert_values (&hamt, 10, val_array2, false) == 5); + ASSERT (element_count (hamt) == 15); + ASSERT (delete_values (&hamt, 10, val_array1, false) == 10); + ASSERT (element_count (hamt) == 5); + ASSERT (delete_values (&hamt, 10, val_array2, false) == 5); + ASSERT (element_count (hamt) == 0); + + ASSERT (replace_values (&hamt, 10, val_array1, false) == 0); + ASSERT (element_count (hamt) == 10); + ASSERT (find_values (hamt, 10, val_array1)); + ASSERT (replace_values (&hamt, 10, val_array2, false) == 5); + ASSERT (element_count (hamt) == 15); + + hamt_free (hamt); +} + +static void +test_destructive_update (void) +{ + Hamt *hamt = test_hamt_create (); + + ASSERT (insert_values (&hamt, 10, val_array1, true) == 10); + ASSERT (element_count (hamt) == 10); + ASSERT (find_values (hamt, 10, val_array1)); + ASSERT (insert_values (&hamt, 10, val_array2, true) == 5); + ASSERT (element_count (hamt) == 15); + ASSERT (delete_values (&hamt, 10, val_array1, true) == 10); + ASSERT (element_count (hamt) == 5); + ASSERT (delete_values (&hamt, 10, val_array2, true) == 5); + ASSERT (element_count (hamt) == 0); + + ASSERT (replace_values (&hamt, 10, val_array1, true) == 0); + ASSERT (element_count (hamt) == 10); + ASSERT (find_values (hamt, 10, val_array1)); + ASSERT (replace_values (&hamt, 10, val_array2, true) == 5); + ASSERT (element_count (hamt) == 15); + + hamt_free (hamt); +} + +int +main (void) +{ + test_general (); + test_functional_update (); + test_destructive_update (); +} -- 2.25.1