From c792c5e657737a794893ae3505fba633b93e44b7 Mon Sep 17 00:00:00 2001
From: Michal Vaner <vorner@ucw.cz>
Date: Sun, 11 Jan 2009 22:53:05 +0100
Subject: [PATCH] ucw docs: hashtables

---
 ucw/doc/Makefile      |   2 +-
 ucw/doc/generic.txt   |   1 +
 ucw/doc/hashtable.txt | 176 ++++++++++++++++++++++++++++++++++++++++++
 ucw/doc/index.txt     |   7 +-
 ucw/hashtable.h       |  60 ++++++++++++--
 5 files changed, 233 insertions(+), 13 deletions(-)
 create mode 100644 ucw/doc/hashtable.txt

diff --git a/ucw/doc/Makefile b/ucw/doc/Makefile
index b1364848..88e59d5d 100644
--- a/ucw/doc/Makefile
+++ b/ucw/doc/Makefile
@@ -2,7 +2,7 @@
 
 DIRS+=ucw/doc
 
-UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort
+UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort hashtable
 UCW_INDEX=$(o)/ucw/doc/def_index.html
 UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS)))
 
diff --git a/ucw/doc/generic.txt b/ucw/doc/generic.txt
index 2c73f94d..330cad8c 100644
--- a/ucw/doc/generic.txt
+++ b/ucw/doc/generic.txt
@@ -13,6 +13,7 @@ libUCW, and also hints for use of these structures.
   * <<growbuf:gbuf,`gbuf.h`>>
   * <<sort:,Sorting>>
   * <<binheap:,`binheap.h`>>
+  * <<hashtable:,`hashtable.h`>>
 
 // TODO The module list
 
diff --git a/ucw/doc/hashtable.txt b/ucw/doc/hashtable.txt
new file mode 100644
index 00000000..35dbe28a
--- /dev/null
+++ b/ucw/doc/hashtable.txt
@@ -0,0 +1,176 @@
+Hash tables
+===========
+
+A hash table is very universal data structure. It does most of it's
+operations in O(1) average time. The library contains a header to
+generate hash tables suiting to your needs.
+
+They are <<generic:,generic data structures>>.
+
+- <<mandatory,Mandatory macros>>
+- <<functions,Optional function switches>>
+- <<params,Optional parameters>>
+- <<wants,Functionality switches>>
+- <<generated,Generated functions>>
+- <<iterator,Iterator>>
+
+[[mandatory]]
+Mandatory macros
+----------------
+
+- `HASH_NODE` -- a data type where a node dwells. It is usually a
+  structure.
+- `HASH_PREFIX(x)` -- the name generating macro.
+- Key type and name. Must be one of following.
+  [[key_atomic]]
+  * `HASH_KEY_ATOMIC` -- the key (`node\->HASH_KEY_ATOMIC`) is an
+    atomic type which can be compared using `==`.
+  [[key_string]]
+  * `HASH_KEY_STRING` -- the key is a zero-terminated string,
+    allocated separately from the rest of the node.
+  [[key_endstring]]
+  * `HASH_KEY_ENDSTRING` -- a zero-terminated string which lives at
+    the end of node (it is allocated together with the node). It
+    should be declared as `char key[1]`.
+  * `HASH_KEY_MEMORY` -- the `node\->HASH_KEY_MEMORY` is to be compared
+    using memcmp() function. In this case, you need to provide
+    `HASH_KEY_SIZE` macro as well, to specify the length of the key.
+  [[key_complex]]
+  * `HASH_KEY_COMPLEX(x)` -- the key is compound of more than one
+    component. The macro should expand to `x key1, x key2, ..., x kn`.
+    Furthermore, you need to provide a `HASH_KEY_DECL` macro. It is
+    used to define function parameters. Therefore it should expand to
+    `type1 key1, type2 key2, ..., typen keyn`. And
+    <<give_hashfn,`HASH_GIVE_HASHFN`>> and <<give_eq,`HASH_GIVE_EQ`>>
+    are mandatory for this key type.
+
+[[functions]]
+Optional function switches
+--------------------------
+
+You can define any of these macros and provide corresponding functions
+to customize the behaviour. The macros are:
+
+[[give_hashfn]]
+- `HASH_GIVE_HASHFN` -- the table will use `uns
+  HASH_PREFIX(hash)(key)` to calculate hash of `key`.
+  There is a sensible default for integers and strings.
+  In the case of <<key_complex,`HASH_KEY_COMPLEX`>>, it is mandatory
+  to provide this macro and function.
+[[give_eq]]
+- `HASH_GIVE_EQ` -- tells the table to use `int HASH_PREFIX(eq)(key1,
+  key2)` function to decide if `key1` and `key2` are equal. Default
+  for atomic types is `==` and strcmp() or strcasecmp() for strings
+  (depends on <<nocase,`HASH_NOCASE`>> switch).
+  It is mandatory when you use <<key_complex,`HASH_KEY_COMPLEX`>>.
+- `HASH_GIVE_EXTRA_SIZE` -- function `int HASH_PREFIX(extra_size)(key)`
+  returns how many bytes after the node should be allocated. It
+  defaults to `0` or to length of key in case of
+  <<key_endstring,`HASH_KEY_ENDSTRING`>>.
+- `HASH_GIVE_INIT_KEY` -- function
+  `void HASH_PREFIX(init_key)(node *, key)` is used to initialize key
+  in newly created node. The default is assignment for atomic keys and
+  static strings (<<key_atomic,`HASH_KEY_ATOMIC`>>,
+  <<key_string,`HASH_KEY_STRING`>>) and strcpy() for
+  <<key_endstr,`HASH_KEY_ENDSTRING`>>.
+[[give_init_data]]
+- `HASH_GIVE_INIT_DATA` -- function `void HASH_PREFIX(init_data)(node
+  *)` is used to initialize the rest of node. Useful if you use
+  <<fun_HASH_PREFIX_OPEN_PAREN_lookup_CLOSE_PAREN_,`HASH_PREFIX(lookup())`>>
+- `HASH_GIVE_ALLOC` -- you need to provide `void
+  \*HASH_PREFIX(alloc)(uns size` and `void HASH_PREFIX(free)(void \*)`
+  to allocate and deallocate the nodes. Default uses
+  <<memory:xmalloc()>> and <<memory:xfree()>> or <<mempool:,mempool
+  routines>>, depending on <<use_pool,`HASH_USE_POOL`>> and
+  <<auto_pool,`HASH_AUTO_POOL>> switches.
+
+[[params]]
+Optional parameters
+-------------------
+
+You can customize the hash table a little more by these macros:
+
+[[nocase]]
+- `HASH_NOCASE` -- use case-insensitive comparison for strings.
+- `HASH_DEFAULT_SIZE` -- use approximately this many elements when
+  creating the hash table.
+- `HASH_CONSERVE_SPACE` -- use as little space as possible.
+- `HASH_FN_BITS` -- the hash function only provides this many
+  significants bits.
+- `HASH_ATOMIC_TYPE` -- the type of atomic key
+  (<<key_atomic,`HASH_KEY_ATOMIC`>>) is not `int`, but this type.
+[[use_pool]]
+- `HASH_USE_POOL` -- tells to use <<mempool:,mempool allocation>> to
+  allocate the nodes. You should define it to the name of mempool
+  variable to be used for this purpose.
+[[auto_pool]]
+- `HASH_AUTO_POOL` -- like above, but it creates it's own mempool.
+  Define it to the block size of the pool.
+- `HASH_ZERO_FILL` -- initialize new nodes to all zeroes.
+- `HASH_TABLE_ALLOC` -- allocate the table the same way as nodes. If
+  not provided, <<mempory:xmalloc()>> is used.
+[[table_dynamic]]
+- `HASH_TABLE_DYNAMIC` -- By default, only one global hash table is
+  used. With this macro defined, all functions gain new first
+  parameter of type `HASH_PREFIX(table) *` to allow them work with
+  multiple hash tables.
+
+[[wants]]
+Functionality switches
+----------------------
+
+Each of these macros enables some of the functionality the table has.
+
+[[want_cleanup]]
+- `HASH_WANT_CLEANUP` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_cleanup_CLOSE_PAREN_,`HASH_PREFIX((cleanup()`>>
+[[want_find]]
+- `HASH_WANT_FIND` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_find_CLOSE_PAREN_,`HASH_PREFIX((find()`>>
+[[want_find_next]]
+- `HASH_WANT_FIND_NEXT` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_find_next_CLOSE_PAREN_,`HASH_PREFIX((find_next()`>>
+[[want_new]]
+- `HASH_WANT_NEW` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_new_CLOSE_PAREN_,`HASH_PREFIX((new()`>>
+[[want_lookup]]
+- `HASH_WANT_LOOKUP` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_lookup_CLOSE_PAREN_,`HASH_PREFIX((lookup()`>>
+[[want_delete]]
+- `HASH_WANT_DELETE` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_delete_CLOSE_PAREN_,`HASH_PREFIX((delete()`>>
+[[want_remove]]
+- `HASH_WANT_REMOVE` --
+  <<fun_HASH_PREFIX_OPEN_PAREN_remove_CLOSE_PAREN_,`HASH_PREFIX((remove()`>>
+
+[[generated]]
+Generated functions
+-------------------
+
+These are the function that the header file generates for you. The
+strange first parameter of each function is a place where the
+`HASH_PREFIX(table) *` resides when you define
+<<table_dynamic,`HASH_TABLE_DYNAMIC`>>. If you do not, the parameter
+is empty.
+
+!!ucw/hashtable.h HASH_PREFIX
+
+[[iterator]]
+Iterator
+--------
+
+You can use the `HASH_FOR_ALL` iterator macro to run trough all the
+nodes. Lets say your `HASH_PREFIX(x)` macro is defined as
+`prefix_##x`. Then you would do something like:
+
+  HASH_FOR_ALL(prefix, node_variable)
+  {
+     do_something_with_node(node_variable);
+  }
+  HASH_END_FOR;
+
+If you use <<table_dynamic,`HASH_TABLE_DYNAMIC`>>, use
+`HASH_FOR_ALL_DYNAMIC(prefix, table, node_variable)` instead.
+
+You may not modify the table inside the block. Use `HASH_BREAK` and
+`HASH_CONTINUE` instead of `break` and `continue` statements.
diff --git a/ucw/doc/index.txt b/ucw/doc/index.txt
index 946aff67..2c69d6ae 100644
--- a/ucw/doc/index.txt
+++ b/ucw/doc/index.txt
@@ -23,14 +23,15 @@ Modules
 - <<mainloop:,Mainloop>>
 - <<unaligned:,Unaligned data>>
 - <<lists:,Link lists>>
+- <<heap:,Binary heaps>>
+- <<binheap:,Binomial heaps>>
+- <<hashtable:,Hash tables>>
 - <<growbuf:,Growing buffers>>
 - <<chartype:,Single-byte characters>>
 - <<unicode:,Multi-byte characters>>
 - <<prime:,Prime numbers>>
 - <<sort:,Sorting>>
 - <<binsearch:,Binary search>>
-- <<heap:,Binary heaps>>
-- <<binheap:,Binomial heaps>>
 - <<compress:,Compression>>
 
 Other features
@@ -42,8 +43,6 @@ Other features
 
 Yet undocumented modules
 ------------------------
-- Hash tables
-  * `hashtable.h`
 - Trie
   * `trie.h`
 - Red-black trees
diff --git a/ucw/hashtable.h b/ucw/hashtable.h
index afccbd06..4f8ca1a3 100644
--- a/ucw/hashtable.h
+++ b/ucw/hashtable.h
@@ -394,7 +394,11 @@ static void P(alloc_table) (TAU)
     T.hash_min = 0;
 }
 
-static void P(init) (TA)
+/**
+ * Initializes the hash table.
+ * This one is available no matter what `HASH_WANT_` macros you defined or not.
+ **/
+static void HASH_PREFIX(init)(TA)
 {
   T.hash_count = 0;
   T.hash_size = HASH_DEFAULT_SIZE;
@@ -408,7 +412,11 @@ static void P(init) (TA)
 }
 
 #ifdef HASH_WANT_CLEANUP
-static void P(cleanup) (TA)
+/**
+ * Deallocates the hash table, including the nodes.
+ * It is available if you defined <<want_cleanup,`HASH_WANT_CLEANUP`>>.
+ **/
+static void HASH_PREFIX(cleanup)(TA)
 {
 #ifndef HASH_USE_POOL
   uns i;
@@ -462,7 +470,13 @@ static void P(rehash) (TAC uns size)
 }
 
 #ifdef HASH_WANT_FIND
-static P(node) * P(find) (TAC HASH_KEY_DECL)
+/**
+ * Finds a node with given key (specified in the @HAS_KEY_DECL parameter).
+ * If it does not exist, NULL is returned.
+ *
+ * Enabled by the <<want_find,`HASH_WANT_FIND`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(find)(TAC HASH_KEY_DECL)
 {
   uns h0 = P(hash) (TTC HASH_KEY( ));
   uns h = h0 % T.hash_size;
@@ -482,7 +496,12 @@ static P(node) * P(find) (TAC HASH_KEY_DECL)
 #endif
 
 #ifdef HASH_WANT_FIND_NEXT
-static P(node) * P(find_next) (TAC P(node) *start)
+/**
+ * Finds next node with the same key. Returns NULL if it does not exist.
+ *
+ * Enabled by the <<want_find_next,`HASH_WANT_FIND_NEXT`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(find_next)(TAC P(node) *start)
 {
 #ifndef HASH_CONSERVE_SPACE
   uns h0 = P(hash) (TTC HASH_KEY(start->));
@@ -503,7 +522,12 @@ static P(node) * P(find_next) (TAC P(node) *start)
 #endif
 
 #ifdef HASH_WANT_NEW
-static P(node) * P(new) (TAC HASH_KEY_DECL)
+/**
+ * Generates a new node with a given key.
+ *
+ * Enabled by the <<want_new,`HASH_WANT_NEW`>> macro.
+ **/
+static HASH_NODE * HASH_PREFIX(new)(TAC HASH_KEY_DECL)
 {
   uns h0, h;
   P(bucket) *b;
@@ -525,7 +549,13 @@ static P(node) * P(new) (TAC HASH_KEY_DECL)
 #endif
 
 #ifdef HASH_WANT_LOOKUP
-static P(node) * P(lookup) (TAC HASH_KEY_DECL)
+/**
+ * Finds a node with a given key. If it does not exist, a new one is created.
+ * It is strongly recommended to use <<give_init_data,`HASH_GIVE_INIT_DATA`>>.
+ *
+ * This one is enabled by the <<want_lookup,`HASH_WANT_LOOKUP`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(lookup)(TAC HASH_KEY_DECL)
 {
   uns h0 = P(hash) (TTC HASH_KEY( ));
   uns h = h0 % T.hash_size;
@@ -556,7 +586,14 @@ static P(node) * P(lookup) (TAC HASH_KEY_DECL)
 #endif
 
 #ifdef HASH_WANT_DELETE
-static int P(delete) (TAC HASH_KEY_DECL)
+/**
+ * Removes a node with the given key from hash table and deallocates it.
+ *
+ * Success is returned.
+ *
+ * This one is enabled by <<want_delete,`HASH_WANT_DELETE`>> macro.
+ **/
+static int HASH_PREFIX(delete)(TAC HASH_KEY_DECL)
 {
   uns h0 = P(hash) (TTC HASH_KEY( ));
   uns h = h0 % T.hash_size;
@@ -582,7 +619,14 @@ static int P(delete) (TAC HASH_KEY_DECL)
 #endif
 
 #ifdef HASH_WANT_REMOVE
-static void P(remove) (TAC P(node) *n)
+/**
+ * Removes a given node and deallocates it.
+ * It differs from <<fun__GENERIC_LINK|HASH_PREFIX|delete,`HASH_PREFIX(delete)()`>>
+ * in its type of parameter -- this one deletes a specific node, that one looks for it by a key.
+ *
+ * Enabled by <<want_remove,`HASH_WANT_REMOVE`>> macro.
+ **/
+static void HASH_PREFIX(remove)(TAC HASH_NODE *n)
 {
   P(bucket) *x = SKIP_BACK(struct P(bucket), n, n);
   uns h0 = P(bucket_hash)(TTC x);
-- 
2.39.5