>From 7159b249f613ec38fea1d29103b03de0e18834ad Mon Sep 17 00:00:00 2001
From: Bruno Haible <bruno@clisp.org>
Date: Sun, 11 Oct 2020 23:24:22 +0200
Subject: [PATCH 1/3] hash, xhash: Move comments to the .h file.

* lib/hash.c: Move comments meant for the user from here...
* lib/xhash.c: ... and here...
* lib/hash.h: ... to here.
---
 ChangeLog   |   7 ++
 lib/hash.c  | 125 -------------------------------
 lib/hash.h  | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
 lib/xhash.c |   6 --
 4 files changed, 206 insertions(+), 171 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6a0e15d..83518c7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2020-10-11  Bruno Haible  <bruno@clisp.org>
+
+	hash, xhash: Move comments to the .h file.
+	* lib/hash.c: Move comments meant for the user from here...
+	* lib/xhash.c: ... and here...
+	* lib/hash.h: ... to here.
+
 2020-10-11  Benji Wiebe  <benjiwiebe14@gmail.com>
 
 	getprogname: Add support for OpenServer 6 and UnixWare 7.
diff --git a/lib/hash.c b/lib/hash.c
index 7aaf106..6b7b76a 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -138,38 +138,24 @@ static const Hash_tuning default_tuning =
 
 /* Information and lookup.  */
 
-/* The following few functions provide information about the overall hash
-   table organization: the number of entries, number of buckets and maximum
-   length of buckets.  */
-
-/* Return the number of buckets in the hash table.  The table size, the total
-   number of buckets (used plus unused), or the maximum number of slots, are
-   the same quantity.  */
-
 size_t
 hash_get_n_buckets (const Hash_table *table)
 {
   return table->n_buckets;
 }
 
-/* Return the number of slots in use (non-empty buckets).  */
-
 size_t
 hash_get_n_buckets_used (const Hash_table *table)
 {
   return table->n_buckets_used;
 }
 
-/* Return the number of active entries.  */
-
 size_t
 hash_get_n_entries (const Hash_table *table)
 {
   return table->n_entries;
 }
 
-/* Return the length of the longest chain (bucket).  */
-
 size_t
 hash_get_max_bucket_length (const Hash_table *table)
 {
@@ -194,9 +180,6 @@ hash_get_max_bucket_length (const Hash_table *table)
   return max_bucket_length;
 }
 
-/* Do a mild validation of a hash table, by traversing it and checking two
-   statistics.  */
-
 bool
 hash_table_ok (const Hash_table *table)
 {
@@ -254,9 +237,6 @@ safe_hasher (const Hash_table *table, const void *key)
   return table->bucket + n;
 }
 
-/* If ENTRY matches an entry already in the hash table, return the
-   entry from the table.  Otherwise, return NULL.  */
-
 void *
 hash_lookup (const Hash_table *table, const void *entry)
 {
@@ -275,15 +255,6 @@ hash_lookup (const Hash_table *table, const void *entry)
 
 /* Walking.  */
 
-/* The functions in this page traverse the hash table and process the
-   contained entries.  For the traversal to work properly, the hash table
-   should not be resized nor modified while any particular entry is being
-   processed.  In particular, entries should not be added, and an entry
-   may be removed only if there is no shrink threshold and the entry being
-   removed has already been passed to hash_get_next.  */
-
-/* Return the first data in the table, or NULL if the table is empty.  */
-
 void *
 hash_get_first (const Hash_table *table)
 {
@@ -299,10 +270,6 @@ hash_get_first (const Hash_table *table)
       return bucket->data;
 }
 
-/* Return the user data for the entry following ENTRY, where ENTRY has been
-   returned by a previous call to either 'hash_get_first' or 'hash_get_next'.
-   Return NULL if there are no more entries.  */
-
 void *
 hash_get_next (const Hash_table *table, const void *entry)
 {
@@ -328,10 +295,6 @@ hash_get_next (const Hash_table *table, const void *entry)
   return NULL;
 }
 
-/* Fill BUFFER with pointers to active user entries in the hash table, then
-   return the number of pointers copied.  Do not copy more than BUFFER_SIZE
-   pointers.  */
-
 size_t
 hash_get_entries (const Hash_table *table, void **buffer,
                   size_t buffer_size)
@@ -356,14 +319,6 @@ hash_get_entries (const Hash_table *table, void **buffer,
   return counter;
 }
 
-/* Call a PROCESSOR function for each entry of a hash table, and return the
-   number of entries for which the processor function returned success.  A
-   pointer to some PROCESSOR_DATA which will be made available to each call to
-   the processor function.  The PROCESSOR accepts two arguments: the first is
-   the user entry being walked into, the second is the value of PROCESSOR_DATA
-   as received.  The walking continue for as long as the PROCESSOR function
-   returns nonzero.  When it returns zero, the walking is interrupted.  */
-
 size_t
 hash_do_for_each (const Hash_table *table, Hash_processor processor,
                   void *processor_data)
@@ -390,9 +345,6 @@ hash_do_for_each (const Hash_table *table, Hash_processor processor,
 
 /* Allocation and clean-up.  */
 
-/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
-   This is a convenience routine for constructing other hashing functions.  */
-
 #if USE_DIFF_HASH
 
 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
@@ -556,40 +508,6 @@ compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
   return candidate;
 }
 
-/* Allocate and return a new hash table, or NULL upon failure.  The initial
-   number of buckets is automatically selected so as to _guarantee_ that you
-   may insert at least CANDIDATE different user entries before any growth of
-   the hash table size occurs.  So, if have a reasonably tight a-priori upper
-   bound on the number of entries you intend to insert in the hash table, you
-   may save some table memory and insertion time, by specifying it here.  If
-   the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
-   argument has its meaning changed to the wanted number of buckets.
-
-   TUNING points to a structure of user-supplied values, in case some fine
-   tuning is wanted over the default behavior of the hasher.  If TUNING is
-   NULL, the default tuning parameters are used instead.  If TUNING is
-   provided but the values requested are out of bounds or might cause
-   rounding errors, return NULL.
-
-   The user-supplied HASHER function, when not NULL, accepts two
-   arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
-   slot number for that entry which should be in the range 0..TABLE_SIZE-1.
-   This slot number is then returned.
-
-   The user-supplied COMPARATOR function, when not NULL, accepts two
-   arguments pointing to user data, it then returns true for a pair of entries
-   that compare equal, or false otherwise.  This function is internally called
-   on entries which are already known to hash to the same bucket index,
-   but which are distinct pointers.
-
-   The user-supplied DATA_FREER function, when not NULL, may be later called
-   with the user data as an argument, just before the entry containing the
-   data gets freed.  This happens from within 'hash_free' or 'hash_clear'.
-   You should specify this function only if you want these functions to free
-   all of your 'data' data.  This is typically the case when your data is
-   simply an auxiliary struct that you have malloc'd to aggregate several
-   values.  */
-
 Hash_table *
 hash_initialize (size_t candidate, const Hash_tuning *tuning,
                  Hash_hasher hasher, Hash_comparator comparator,
@@ -645,10 +563,6 @@ hash_initialize (size_t candidate, const Hash_tuning *tuning,
   return NULL;
 }
 
-/* Make all buckets empty, placing any chained entries on the free list.
-   Apply the user-specified function data_freer (if any) to the datas of any
-   affected entries.  */
-
 void
 hash_clear (Hash_table *table)
 {
@@ -687,11 +601,6 @@ hash_clear (Hash_table *table)
   table->n_entries = 0;
 }
 
-/* Reclaim all storage associated with a hash table.  If a data_freer
-   function has been supplied by the user when the hash table was created,
-   this function applies it to the data of each entry before freeing that
-   entry.  */
-
 void
 hash_free (Hash_table *table)
 {
@@ -931,14 +840,6 @@ transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
   return true;
 }
 
-/* For an already existing hash table, change the number of buckets through
-   specifying CANDIDATE.  The contents of the hash table are preserved.  The
-   new number of buckets is automatically selected so as to _guarantee_ that
-   the table may receive at least CANDIDATE different user entries, including
-   those already in the table, before any other growth of the hash table size
-   occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
-   exact number of buckets desired.  Return true iff the rehash succeeded.  */
-
 bool
 hash_rehash (Hash_table *table, size_t candidate)
 {
@@ -1018,22 +919,6 @@ hash_rehash (Hash_table *table, size_t candidate)
   return false;
 }
 
-/* Insert ENTRY into hash TABLE if there is not already a matching entry.
-
-   Return -1 upon memory allocation failure.
-   Return 1 if insertion succeeded.
-   Return 0 if there is already a matching entry in the table,
-   and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
-   to that entry.
-
-   This interface is easier to use than hash_insert when you must
-   distinguish between the latter two cases.  More importantly,
-   hash_insert is unusable for some types of ENTRY values.  When using
-   hash_insert, the only way to distinguish those cases is to compare
-   the return value and ENTRY.  That works only when you can have two
-   different ENTRY values that point to data that compares "equal".  Thus,
-   when the ENTRY value is a simple scalar, you must use
-   hash_insert_if_absent.  ENTRY must not be NULL.  */
 int
 hash_insert_if_absent (Hash_table *table, void const *entry,
                        void const **matched_ent)
@@ -1116,12 +1001,6 @@ hash_insert_if_absent (Hash_table *table, void const *entry,
   return 1;
 }
 
-/* If ENTRY matches an entry already in the hash table, return the pointer
-   to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
-   Return NULL if the storage required for insertion cannot be allocated.
-   This implementation does not support duplicate entries or insertion of
-   NULL.  */
-
 void *
 hash_insert (Hash_table *table, void const *entry)
 {
@@ -1132,10 +1011,6 @@ hash_insert (Hash_table *table, void const *entry)
           : (void *) (err == 0 ? matched_ent : entry));
 }
 
-/* If ENTRY is already in the table, remove it and return the just-deleted
-   data (the user may want to deallocate its storage).  If ENTRY is not in the
-   table, don't modify the table and return NULL.  */
-
 void *
 hash_delete (Hash_table *table, const void *entry)
 {
diff --git a/lib/hash.h b/lib/hash.h
index e5af43c..848117d 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -27,11 +27,6 @@
 # include <stdio.h>
 # include <stdbool.h>
 
-typedef size_t (*Hash_hasher) (const void *, size_t);
-typedef bool (*Hash_comparator) (const void *, const void *);
-typedef void (*Hash_data_freer) (void *);
-typedef bool (*Hash_processor) (void *, void *);
-
 struct hash_tuning
   {
     /* This structure is mainly used for 'hash_initialize', see the block
@@ -50,40 +45,204 @@ struct hash_table;
 
 typedef struct hash_table Hash_table;
 
-/* Information and lookup.  */
-size_t hash_get_n_buckets (const Hash_table *) _GL_ATTRIBUTE_PURE;
-size_t hash_get_n_buckets_used (const Hash_table *) _GL_ATTRIBUTE_PURE;
-size_t hash_get_n_entries (const Hash_table *) _GL_ATTRIBUTE_PURE;
-size_t hash_get_max_bucket_length (const Hash_table *) _GL_ATTRIBUTE_PURE;
-bool hash_table_ok (const Hash_table *) _GL_ATTRIBUTE_PURE;
-void hash_print_statistics (const Hash_table *, FILE *);
-void *hash_lookup (const Hash_table *, const void *);
-
-/* Walking.  */
-void *hash_get_first (const Hash_table *) _GL_ATTRIBUTE_PURE;
-void *hash_get_next (const Hash_table *, const void *);
-size_t hash_get_entries (const Hash_table *, void **, size_t);
-size_t hash_do_for_each (const Hash_table *, Hash_processor, void *);
-
-/* Allocation and clean-up.  */
-size_t hash_string (const char *, size_t) _GL_ATTRIBUTE_PURE;
-void hash_reset_tuning (Hash_tuning *);
-Hash_table *hash_initialize (size_t, const Hash_tuning *,
-                             Hash_hasher, Hash_comparator,
-                             Hash_data_freer) _GL_ATTRIBUTE_NODISCARD;
-Hash_table *hash_xinitialize (size_t, const Hash_tuning *,
-                              Hash_hasher, Hash_comparator,
-                              Hash_data_freer) _GL_ATTRIBUTE_NODISCARD;
-void hash_clear (Hash_table *);
-void hash_free (Hash_table *);
-
-/* Insertion and deletion.  */
-bool hash_rehash (Hash_table *, size_t) _GL_ATTRIBUTE_NODISCARD;
-void *hash_insert (Hash_table *, const void *) _GL_ATTRIBUTE_NODISCARD;
-void *hash_xinsert (Hash_table *, const void *);
-
-int hash_insert_if_absent (Hash_table *table, const void *entry,
-                           const void **matched_ent);
-void *hash_delete (Hash_table *, const void *);
+/*
+ * Information and lookup.
+ */
+
+/* The following few functions provide information about the overall hash
+   table organization: the number of entries, number of buckets and maximum
+   length of buckets.  */
+
+/* Return the number of buckets in the hash table.  The table size, the total
+   number of buckets (used plus unused), or the maximum number of slots, are
+   the same quantity.  */
+extern size_t hash_get_n_buckets (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+/* Return the number of slots in use (non-empty buckets).  */
+extern size_t hash_get_n_buckets_used (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+/* Return the number of active entries.  */
+extern size_t hash_get_n_entries (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+/* Return the length of the longest chain (bucket).  */
+extern size_t hash_get_max_bucket_length (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+/* Do a mild validation of a hash table, by traversing it and checking two
+   statistics.  */
+extern bool hash_table_ok (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+extern void hash_print_statistics (const Hash_table *table, FILE *stream);
+
+/* If ENTRY matches an entry already in the hash table, return the
+   entry from the table.  Otherwise, return NULL.  */
+extern void *hash_lookup (const Hash_table *table, const void *entry);
+
+/*
+ * Walking.
+ */
+
+/* The functions in this page traverse the hash table and process the
+   contained entries.  For the traversal to work properly, the hash table
+   should not be resized nor modified while any particular entry is being
+   processed.  In particular, entries should not be added, and an entry
+   may be removed only if there is no shrink threshold and the entry being
+   removed has already been passed to hash_get_next.  */
+
+/* Return the first data in the table, or NULL if the table is empty.  */
+extern void *hash_get_first (const Hash_table *table)
+       _GL_ATTRIBUTE_PURE;
+
+/* Return the user data for the entry following ENTRY, where ENTRY has been
+   returned by a previous call to either 'hash_get_first' or 'hash_get_next'.
+   Return NULL if there are no more entries.  */
+extern void *hash_get_next (const Hash_table *table, const void *entry);
+
+/* Fill BUFFER with pointers to active user entries in the hash table, then
+   return the number of pointers copied.  Do not copy more than BUFFER_SIZE
+   pointers.  */
+extern size_t hash_get_entries (const Hash_table *table, void **buffer,
+                                size_t buffer_size);
+
+typedef bool (*Hash_processor) (void *entry, void *processor_data);
+
+/* Call a PROCESSOR function for each entry of a hash table, and return the
+   number of entries for which the processor function returned success.  A
+   pointer to some PROCESSOR_DATA which will be made available to each call to
+   the processor function.  The PROCESSOR accepts two arguments: the first is
+   the user entry being walked into, the second is the value of PROCESSOR_DATA
+   as received.  The walking continue for as long as the PROCESSOR function
+   returns nonzero.  When it returns zero, the walking is interrupted.  */
+extern size_t hash_do_for_each (const Hash_table *table,
+                                Hash_processor processor, void *processor_data);
+
+/*
+ * Allocation and clean-up.
+ */
+
+/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
+   This is a convenience routine for constructing other hashing functions.  */
+extern size_t hash_string (const char *string, size_t n_buckets)
+       _GL_ATTRIBUTE_PURE;
+
+extern void hash_reset_tuning (Hash_tuning *tuning);
+
+typedef size_t (*Hash_hasher) (const void *entry, size_t table_size);
+typedef bool (*Hash_comparator) (const void *entry1, const void *entry2);
+typedef void (*Hash_data_freer) (void *entry);
+
+/* Allocate and return a new hash table, or NULL upon failure.  The initial
+   number of buckets is automatically selected so as to _guarantee_ that you
+   may insert at least CANDIDATE different user entries before any growth of
+   the hash table size occurs.  So, if have a reasonably tight a-priori upper
+   bound on the number of entries you intend to insert in the hash table, you
+   may save some table memory and insertion time, by specifying it here.  If
+   the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
+   argument has its meaning changed to the wanted number of buckets.
+
+   TUNING points to a structure of user-supplied values, in case some fine
+   tuning is wanted over the default behavior of the hasher.  If TUNING is
+   NULL, the default tuning parameters are used instead.  If TUNING is
+   provided but the values requested are out of bounds or might cause
+   rounding errors, return NULL.
+
+   The user-supplied HASHER function, when not NULL, accepts two
+   arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
+   slot number for that entry which should be in the range 0..TABLE_SIZE-1.
+   This slot number is then returned.
+
+   The user-supplied COMPARATOR function, when not NULL, accepts two
+   arguments pointing to user data, it then returns true for a pair of entries
+   that compare equal, or false otherwise.  This function is internally called
+   on entries which are already known to hash to the same bucket index,
+   but which are distinct pointers.
+
+   The user-supplied DATA_FREER function, when not NULL, may be later called
+   with the user data as an argument, just before the entry containing the
+   data gets freed.  This happens from within 'hash_free' or 'hash_clear'.
+   You should specify this function only if you want these functions to free
+   all of your 'data' data.  This is typically the case when your data is
+   simply an auxiliary struct that you have malloc'd to aggregate several
+   values.  */
+extern Hash_table *hash_initialize (size_t candidate,
+                                    const Hash_tuning *tuning,
+                                    Hash_hasher hasher,
+                                    Hash_comparator comparator,
+                                    Hash_data_freer data_freer)
+       _GL_ATTRIBUTE_NODISCARD;
+
+/* Same as hash_initialize, but invokes xalloc_die on memory exhaustion.  */
+/* This function is defined by module 'xhash'.  */
+extern Hash_table *hash_xinitialize (size_t candidate,
+                                     const Hash_tuning *tuning,
+                                     Hash_hasher hasher,
+                                     Hash_comparator comparator,
+                                     Hash_data_freer data_freer)
+       _GL_ATTRIBUTE_NODISCARD;
+
+/* Make all buckets empty, placing any chained entries on the free list.
+   Apply the user-specified function data_freer (if any) to the datas of any
+   affected entries.  */
+extern void hash_clear (Hash_table *table);
+
+/* Reclaim all storage associated with a hash table.  If a data_freer
+   function has been supplied by the user when the hash table was created,
+   this function applies it to the data of each entry before freeing that
+   entry.  */
+extern void hash_free (Hash_table *table);
+
+/*
+ * Insertion and deletion.
+ */
+
+/* For an already existing hash table, change the number of buckets through
+   specifying CANDIDATE.  The contents of the hash table are preserved.  The
+   new number of buckets is automatically selected so as to _guarantee_ that
+   the table may receive at least CANDIDATE different user entries, including
+   those already in the table, before any other growth of the hash table size
+   occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
+   exact number of buckets desired.  Return true iff the rehash succeeded.  */
+extern bool hash_rehash (Hash_table *table, size_t candidate)
+       _GL_ATTRIBUTE_NODISCARD;
+
+/* If ENTRY matches an entry already in the hash table, return the pointer
+   to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
+   Return NULL if the storage required for insertion cannot be allocated.
+   This implementation does not support duplicate entries or insertion of
+   NULL.  */
+extern void *hash_insert (Hash_table *table, const void *entry)
+       _GL_ATTRIBUTE_NODISCARD;
+
+/* Same as hash_insert, but invokes xalloc_die on memory exhaustion.  */
+/* This function is defined by module 'xhash'.  */
+extern void *hash_xinsert (Hash_table *table, const void *entry);
+
+/* Insert ENTRY into hash TABLE if there is not already a matching entry.
+
+   Return -1 upon memory allocation failure.
+   Return 1 if insertion succeeded.
+   Return 0 if there is already a matching entry in the table,
+   and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
+   to that entry.
+
+   This interface is easier to use than hash_insert when you must
+   distinguish between the latter two cases.  More importantly,
+   hash_insert is unusable for some types of ENTRY values.  When using
+   hash_insert, the only way to distinguish those cases is to compare
+   the return value and ENTRY.  That works only when you can have two
+   different ENTRY values that point to data that compares "equal".  Thus,
+   when the ENTRY value is a simple scalar, you must use
+   hash_insert_if_absent.  ENTRY must not be NULL.  */
+extern int hash_insert_if_absent (Hash_table *table, const void *entry,
+                                  const void **matched_ent);
+
+/* If ENTRY is already in the table, remove it and return the just-deleted
+   data (the user may want to deallocate its storage).  If ENTRY is not in the
+   table, don't modify the table and return NULL.  */
+extern void *hash_delete (Hash_table *table, const void *entry);
 
 #endif
diff --git a/lib/xhash.c b/lib/xhash.c
index 95df545..d358f4e 100644
--- a/lib/xhash.c
+++ b/lib/xhash.c
@@ -22,9 +22,6 @@
 
 #include "xalloc.h"
 
-/* Same as hash_initialize, but invokes xalloc_die on memory
-   exhaustion.  */
-
 Hash_table *
 hash_xinitialize (size_t candidate, const Hash_tuning *tuning,
                   Hash_hasher hasher, Hash_comparator comparator,
@@ -37,9 +34,6 @@ hash_xinitialize (size_t candidate, const Hash_tuning *tuning,
   return res;
 }
 
-/* Same as hash_insert, but invokes xalloc_die on memory
-   exhaustion.  */
-
 void *
 hash_xinsert (Hash_table *table, void const *entry)
 {
-- 
2.7.4