>From 7159b249f613ec38fea1d29103b03de0e18834ad Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 11 Oct 2020 23:24:22 +0200 Subject: [PATCH 1/3] hash, xhash: Move comments to the .h file. * lib/hash.c: Move comments meant for the user from here... * lib/xhash.c: ... and here... * lib/hash.h: ... to here. --- ChangeLog | 7 ++ lib/hash.c | 125 ------------------------------- lib/hash.h | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- lib/xhash.c | 6 -- 4 files changed, 206 insertions(+), 171 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6a0e15d..83518c7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2020-10-11 Bruno Haible + + hash, xhash: Move comments to the .h file. + * lib/hash.c: Move comments meant for the user from here... + * lib/xhash.c: ... and here... + * lib/hash.h: ... to here. + 2020-10-11 Benji Wiebe getprogname: Add support for OpenServer 6 and UnixWare 7. diff --git a/lib/hash.c b/lib/hash.c index 7aaf106..6b7b76a 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -138,38 +138,24 @@ static const Hash_tuning default_tuning = /* Information and lookup. */ -/* The following few functions provide information about the overall hash - table organization: the number of entries, number of buckets and maximum - length of buckets. */ - -/* Return the number of buckets in the hash table. The table size, the total - number of buckets (used plus unused), or the maximum number of slots, are - the same quantity. */ - size_t hash_get_n_buckets (const Hash_table *table) { return table->n_buckets; } -/* Return the number of slots in use (non-empty buckets). */ - size_t hash_get_n_buckets_used (const Hash_table *table) { return table->n_buckets_used; } -/* Return the number of active entries. */ - size_t hash_get_n_entries (const Hash_table *table) { return table->n_entries; } -/* Return the length of the longest chain (bucket). */ - size_t hash_get_max_bucket_length (const Hash_table *table) { @@ -194,9 +180,6 @@ hash_get_max_bucket_length (const Hash_table *table) return max_bucket_length; } -/* Do a mild validation of a hash table, by traversing it and checking two - statistics. */ - bool hash_table_ok (const Hash_table *table) { @@ -254,9 +237,6 @@ safe_hasher (const Hash_table *table, const void *key) return table->bucket + n; } -/* If ENTRY matches an entry already in the hash table, return the - entry from the table. Otherwise, return NULL. */ - void * hash_lookup (const Hash_table *table, const void *entry) { @@ -275,15 +255,6 @@ hash_lookup (const Hash_table *table, const void *entry) /* Walking. */ -/* The functions in this page traverse the hash table and process the - contained entries. For the traversal to work properly, the hash table - should not be resized nor modified while any particular entry is being - processed. In particular, entries should not be added, and an entry - may be removed only if there is no shrink threshold and the entry being - removed has already been passed to hash_get_next. */ - -/* Return the first data in the table, or NULL if the table is empty. */ - void * hash_get_first (const Hash_table *table) { @@ -299,10 +270,6 @@ hash_get_first (const Hash_table *table) return bucket->data; } -/* Return the user data for the entry following ENTRY, where ENTRY has been - returned by a previous call to either 'hash_get_first' or 'hash_get_next'. - Return NULL if there are no more entries. */ - void * hash_get_next (const Hash_table *table, const void *entry) { @@ -328,10 +295,6 @@ hash_get_next (const Hash_table *table, const void *entry) return NULL; } -/* Fill BUFFER with pointers to active user entries in the hash table, then - return the number of pointers copied. Do not copy more than BUFFER_SIZE - pointers. */ - size_t hash_get_entries (const Hash_table *table, void **buffer, size_t buffer_size) @@ -356,14 +319,6 @@ hash_get_entries (const Hash_table *table, void **buffer, return counter; } -/* Call a PROCESSOR function for each entry of a hash table, and return the - number of entries for which the processor function returned success. A - pointer to some PROCESSOR_DATA which will be made available to each call to - the processor function. The PROCESSOR accepts two arguments: the first is - the user entry being walked into, the second is the value of PROCESSOR_DATA - as received. The walking continue for as long as the PROCESSOR function - returns nonzero. When it returns zero, the walking is interrupted. */ - size_t hash_do_for_each (const Hash_table *table, Hash_processor processor, void *processor_data) @@ -390,9 +345,6 @@ hash_do_for_each (const Hash_table *table, Hash_processor processor, /* Allocation and clean-up. */ -/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. - This is a convenience routine for constructing other hashing functions. */ - #if USE_DIFF_HASH /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see @@ -556,40 +508,6 @@ compute_bucket_size (size_t candidate, const Hash_tuning *tuning) return candidate; } -/* Allocate and return a new hash table, or NULL upon failure. The initial - number of buckets is automatically selected so as to _guarantee_ that you - may insert at least CANDIDATE different user entries before any growth of - the hash table size occurs. So, if have a reasonably tight a-priori upper - bound on the number of entries you intend to insert in the hash table, you - may save some table memory and insertion time, by specifying it here. If - the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE - argument has its meaning changed to the wanted number of buckets. - - TUNING points to a structure of user-supplied values, in case some fine - tuning is wanted over the default behavior of the hasher. If TUNING is - NULL, the default tuning parameters are used instead. If TUNING is - provided but the values requested are out of bounds or might cause - rounding errors, return NULL. - - The user-supplied HASHER function, when not NULL, accepts two - arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a - slot number for that entry which should be in the range 0..TABLE_SIZE-1. - This slot number is then returned. - - The user-supplied COMPARATOR function, when not NULL, accepts two - arguments pointing to user data, it then returns true for a pair of entries - that compare equal, or false otherwise. This function is internally called - on entries which are already known to hash to the same bucket index, - but which are distinct pointers. - - The user-supplied DATA_FREER function, when not NULL, may be later called - with the user data as an argument, just before the entry containing the - data gets freed. This happens from within 'hash_free' or 'hash_clear'. - You should specify this function only if you want these functions to free - all of your 'data' data. This is typically the case when your data is - simply an auxiliary struct that you have malloc'd to aggregate several - values. */ - Hash_table * hash_initialize (size_t candidate, const Hash_tuning *tuning, Hash_hasher hasher, Hash_comparator comparator, @@ -645,10 +563,6 @@ hash_initialize (size_t candidate, const Hash_tuning *tuning, return NULL; } -/* Make all buckets empty, placing any chained entries on the free list. - Apply the user-specified function data_freer (if any) to the datas of any - affected entries. */ - void hash_clear (Hash_table *table) { @@ -687,11 +601,6 @@ hash_clear (Hash_table *table) table->n_entries = 0; } -/* Reclaim all storage associated with a hash table. If a data_freer - function has been supplied by the user when the hash table was created, - this function applies it to the data of each entry before freeing that - entry. */ - void hash_free (Hash_table *table) { @@ -931,14 +840,6 @@ transfer_entries (Hash_table *dst, Hash_table *src, bool safe) return true; } -/* For an already existing hash table, change the number of buckets through - specifying CANDIDATE. The contents of the hash table are preserved. The - new number of buckets is automatically selected so as to _guarantee_ that - the table may receive at least CANDIDATE different user entries, including - those already in the table, before any other growth of the hash table size - occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the - exact number of buckets desired. Return true iff the rehash succeeded. */ - bool hash_rehash (Hash_table *table, size_t candidate) { @@ -1018,22 +919,6 @@ hash_rehash (Hash_table *table, size_t candidate) return false; } -/* Insert ENTRY into hash TABLE if there is not already a matching entry. - - Return -1 upon memory allocation failure. - Return 1 if insertion succeeded. - Return 0 if there is already a matching entry in the table, - and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT - to that entry. - - This interface is easier to use than hash_insert when you must - distinguish between the latter two cases. More importantly, - hash_insert is unusable for some types of ENTRY values. When using - hash_insert, the only way to distinguish those cases is to compare - the return value and ENTRY. That works only when you can have two - different ENTRY values that point to data that compares "equal". Thus, - when the ENTRY value is a simple scalar, you must use - hash_insert_if_absent. ENTRY must not be NULL. */ int hash_insert_if_absent (Hash_table *table, void const *entry, void const **matched_ent) @@ -1116,12 +1001,6 @@ hash_insert_if_absent (Hash_table *table, void const *entry, return 1; } -/* If ENTRY matches an entry already in the hash table, return the pointer - to the entry from the table. Otherwise, insert ENTRY and return ENTRY. - Return NULL if the storage required for insertion cannot be allocated. - This implementation does not support duplicate entries or insertion of - NULL. */ - void * hash_insert (Hash_table *table, void const *entry) { @@ -1132,10 +1011,6 @@ hash_insert (Hash_table *table, void const *entry) : (void *) (err == 0 ? matched_ent : entry)); } -/* If ENTRY is already in the table, remove it and return the just-deleted - data (the user may want to deallocate its storage). If ENTRY is not in the - table, don't modify the table and return NULL. */ - void * hash_delete (Hash_table *table, const void *entry) { diff --git a/lib/hash.h b/lib/hash.h index e5af43c..848117d 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -27,11 +27,6 @@ # include # include -typedef size_t (*Hash_hasher) (const void *, size_t); -typedef bool (*Hash_comparator) (const void *, const void *); -typedef void (*Hash_data_freer) (void *); -typedef bool (*Hash_processor) (void *, void *); - struct hash_tuning { /* This structure is mainly used for 'hash_initialize', see the block @@ -50,40 +45,204 @@ struct hash_table; typedef struct hash_table Hash_table; -/* Information and lookup. */ -size_t hash_get_n_buckets (const Hash_table *) _GL_ATTRIBUTE_PURE; -size_t hash_get_n_buckets_used (const Hash_table *) _GL_ATTRIBUTE_PURE; -size_t hash_get_n_entries (const Hash_table *) _GL_ATTRIBUTE_PURE; -size_t hash_get_max_bucket_length (const Hash_table *) _GL_ATTRIBUTE_PURE; -bool hash_table_ok (const Hash_table *) _GL_ATTRIBUTE_PURE; -void hash_print_statistics (const Hash_table *, FILE *); -void *hash_lookup (const Hash_table *, const void *); - -/* Walking. */ -void *hash_get_first (const Hash_table *) _GL_ATTRIBUTE_PURE; -void *hash_get_next (const Hash_table *, const void *); -size_t hash_get_entries (const Hash_table *, void **, size_t); -size_t hash_do_for_each (const Hash_table *, Hash_processor, void *); - -/* Allocation and clean-up. */ -size_t hash_string (const char *, size_t) _GL_ATTRIBUTE_PURE; -void hash_reset_tuning (Hash_tuning *); -Hash_table *hash_initialize (size_t, const Hash_tuning *, - Hash_hasher, Hash_comparator, - Hash_data_freer) _GL_ATTRIBUTE_NODISCARD; -Hash_table *hash_xinitialize (size_t, const Hash_tuning *, - Hash_hasher, Hash_comparator, - Hash_data_freer) _GL_ATTRIBUTE_NODISCARD; -void hash_clear (Hash_table *); -void hash_free (Hash_table *); - -/* Insertion and deletion. */ -bool hash_rehash (Hash_table *, size_t) _GL_ATTRIBUTE_NODISCARD; -void *hash_insert (Hash_table *, const void *) _GL_ATTRIBUTE_NODISCARD; -void *hash_xinsert (Hash_table *, const void *); - -int hash_insert_if_absent (Hash_table *table, const void *entry, - const void **matched_ent); -void *hash_delete (Hash_table *, const void *); +/* + * Information and lookup. + */ + +/* The following few functions provide information about the overall hash + table organization: the number of entries, number of buckets and maximum + length of buckets. */ + +/* Return the number of buckets in the hash table. The table size, the total + number of buckets (used plus unused), or the maximum number of slots, are + the same quantity. */ +extern size_t hash_get_n_buckets (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +/* Return the number of slots in use (non-empty buckets). */ +extern size_t hash_get_n_buckets_used (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +/* Return the number of active entries. */ +extern size_t hash_get_n_entries (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +/* Return the length of the longest chain (bucket). */ +extern size_t hash_get_max_bucket_length (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +/* Do a mild validation of a hash table, by traversing it and checking two + statistics. */ +extern bool hash_table_ok (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +extern void hash_print_statistics (const Hash_table *table, FILE *stream); + +/* If ENTRY matches an entry already in the hash table, return the + entry from the table. Otherwise, return NULL. */ +extern void *hash_lookup (const Hash_table *table, const void *entry); + +/* + * Walking. + */ + +/* The functions in this page traverse the hash table and process the + contained entries. For the traversal to work properly, the hash table + should not be resized nor modified while any particular entry is being + processed. In particular, entries should not be added, and an entry + may be removed only if there is no shrink threshold and the entry being + removed has already been passed to hash_get_next. */ + +/* Return the first data in the table, or NULL if the table is empty. */ +extern void *hash_get_first (const Hash_table *table) + _GL_ATTRIBUTE_PURE; + +/* Return the user data for the entry following ENTRY, where ENTRY has been + returned by a previous call to either 'hash_get_first' or 'hash_get_next'. + Return NULL if there are no more entries. */ +extern void *hash_get_next (const Hash_table *table, const void *entry); + +/* Fill BUFFER with pointers to active user entries in the hash table, then + return the number of pointers copied. Do not copy more than BUFFER_SIZE + pointers. */ +extern size_t hash_get_entries (const Hash_table *table, void **buffer, + size_t buffer_size); + +typedef bool (*Hash_processor) (void *entry, void *processor_data); + +/* Call a PROCESSOR function for each entry of a hash table, and return the + number of entries for which the processor function returned success. A + pointer to some PROCESSOR_DATA which will be made available to each call to + the processor function. The PROCESSOR accepts two arguments: the first is + the user entry being walked into, the second is the value of PROCESSOR_DATA + as received. The walking continue for as long as the PROCESSOR function + returns nonzero. When it returns zero, the walking is interrupted. */ +extern size_t hash_do_for_each (const Hash_table *table, + Hash_processor processor, void *processor_data); + +/* + * Allocation and clean-up. + */ + +/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. + This is a convenience routine for constructing other hashing functions. */ +extern size_t hash_string (const char *string, size_t n_buckets) + _GL_ATTRIBUTE_PURE; + +extern void hash_reset_tuning (Hash_tuning *tuning); + +typedef size_t (*Hash_hasher) (const void *entry, size_t table_size); +typedef bool (*Hash_comparator) (const void *entry1, const void *entry2); +typedef void (*Hash_data_freer) (void *entry); + +/* Allocate and return a new hash table, or NULL upon failure. The initial + number of buckets is automatically selected so as to _guarantee_ that you + may insert at least CANDIDATE different user entries before any growth of + the hash table size occurs. So, if have a reasonably tight a-priori upper + bound on the number of entries you intend to insert in the hash table, you + may save some table memory and insertion time, by specifying it here. If + the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE + argument has its meaning changed to the wanted number of buckets. + + TUNING points to a structure of user-supplied values, in case some fine + tuning is wanted over the default behavior of the hasher. If TUNING is + NULL, the default tuning parameters are used instead. If TUNING is + provided but the values requested are out of bounds or might cause + rounding errors, return NULL. + + The user-supplied HASHER function, when not NULL, accepts two + arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a + slot number for that entry which should be in the range 0..TABLE_SIZE-1. + This slot number is then returned. + + The user-supplied COMPARATOR function, when not NULL, accepts two + arguments pointing to user data, it then returns true for a pair of entries + that compare equal, or false otherwise. This function is internally called + on entries which are already known to hash to the same bucket index, + but which are distinct pointers. + + The user-supplied DATA_FREER function, when not NULL, may be later called + with the user data as an argument, just before the entry containing the + data gets freed. This happens from within 'hash_free' or 'hash_clear'. + You should specify this function only if you want these functions to free + all of your 'data' data. This is typically the case when your data is + simply an auxiliary struct that you have malloc'd to aggregate several + values. */ +extern Hash_table *hash_initialize (size_t candidate, + const Hash_tuning *tuning, + Hash_hasher hasher, + Hash_comparator comparator, + Hash_data_freer data_freer) + _GL_ATTRIBUTE_NODISCARD; + +/* Same as hash_initialize, but invokes xalloc_die on memory exhaustion. */ +/* This function is defined by module 'xhash'. */ +extern Hash_table *hash_xinitialize (size_t candidate, + const Hash_tuning *tuning, + Hash_hasher hasher, + Hash_comparator comparator, + Hash_data_freer data_freer) + _GL_ATTRIBUTE_NODISCARD; + +/* Make all buckets empty, placing any chained entries on the free list. + Apply the user-specified function data_freer (if any) to the datas of any + affected entries. */ +extern void hash_clear (Hash_table *table); + +/* Reclaim all storage associated with a hash table. If a data_freer + function has been supplied by the user when the hash table was created, + this function applies it to the data of each entry before freeing that + entry. */ +extern void hash_free (Hash_table *table); + +/* + * Insertion and deletion. + */ + +/* For an already existing hash table, change the number of buckets through + specifying CANDIDATE. The contents of the hash table are preserved. The + new number of buckets is automatically selected so as to _guarantee_ that + the table may receive at least CANDIDATE different user entries, including + those already in the table, before any other growth of the hash table size + occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the + exact number of buckets desired. Return true iff the rehash succeeded. */ +extern bool hash_rehash (Hash_table *table, size_t candidate) + _GL_ATTRIBUTE_NODISCARD; + +/* If ENTRY matches an entry already in the hash table, return the pointer + to the entry from the table. Otherwise, insert ENTRY and return ENTRY. + Return NULL if the storage required for insertion cannot be allocated. + This implementation does not support duplicate entries or insertion of + NULL. */ +extern void *hash_insert (Hash_table *table, const void *entry) + _GL_ATTRIBUTE_NODISCARD; + +/* Same as hash_insert, but invokes xalloc_die on memory exhaustion. */ +/* This function is defined by module 'xhash'. */ +extern void *hash_xinsert (Hash_table *table, const void *entry); + +/* Insert ENTRY into hash TABLE if there is not already a matching entry. + + Return -1 upon memory allocation failure. + Return 1 if insertion succeeded. + Return 0 if there is already a matching entry in the table, + and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT + to that entry. + + This interface is easier to use than hash_insert when you must + distinguish between the latter two cases. More importantly, + hash_insert is unusable for some types of ENTRY values. When using + hash_insert, the only way to distinguish those cases is to compare + the return value and ENTRY. That works only when you can have two + different ENTRY values that point to data that compares "equal". Thus, + when the ENTRY value is a simple scalar, you must use + hash_insert_if_absent. ENTRY must not be NULL. */ +extern int hash_insert_if_absent (Hash_table *table, const void *entry, + const void **matched_ent); + +/* If ENTRY is already in the table, remove it and return the just-deleted + data (the user may want to deallocate its storage). If ENTRY is not in the + table, don't modify the table and return NULL. */ +extern void *hash_delete (Hash_table *table, const void *entry); #endif diff --git a/lib/xhash.c b/lib/xhash.c index 95df545..d358f4e 100644 --- a/lib/xhash.c +++ b/lib/xhash.c @@ -22,9 +22,6 @@ #include "xalloc.h" -/* Same as hash_initialize, but invokes xalloc_die on memory - exhaustion. */ - Hash_table * hash_xinitialize (size_t candidate, const Hash_tuning *tuning, Hash_hasher hasher, Hash_comparator comparator, @@ -37,9 +34,6 @@ hash_xinitialize (size_t candidate, const Hash_tuning *tuning, return res; } -/* Same as hash_insert, but invokes xalloc_die on memory - exhaustion. */ - void * hash_xinsert (Hash_table *table, void const *entry) { -- 2.7.4