From c792c5e657737a794893ae3505fba633b93e44b7 Mon Sep 17 00:00:00 2001 From: Michal Vaner Date: Sun, 11 Jan 2009 22:53:05 +0100 Subject: [PATCH] ucw docs: hashtables --- ucw/doc/Makefile | 2 +- ucw/doc/generic.txt | 1 + ucw/doc/hashtable.txt | 176 ++++++++++++++++++++++++++++++++++++++++++ ucw/doc/index.txt | 7 +- ucw/hashtable.h | 60 ++++++++++++-- 5 files changed, 233 insertions(+), 13 deletions(-) create mode 100644 ucw/doc/hashtable.txt diff --git a/ucw/doc/Makefile b/ucw/doc/Makefile index b1364848..88e59d5d 100644 --- a/ucw/doc/Makefile +++ b/ucw/doc/Makefile @@ -2,7 +2,7 @@ DIRS+=ucw/doc -UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort +UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort hashtable UCW_INDEX=$(o)/ucw/doc/def_index.html UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS))) diff --git a/ucw/doc/generic.txt b/ucw/doc/generic.txt index 2c73f94d..330cad8c 100644 --- a/ucw/doc/generic.txt +++ b/ucw/doc/generic.txt @@ -13,6 +13,7 @@ libUCW, and also hints for use of these structures. * <> * <> * <> + * <> // TODO The module list diff --git a/ucw/doc/hashtable.txt b/ucw/doc/hashtable.txt new file mode 100644 index 00000000..35dbe28a --- /dev/null +++ b/ucw/doc/hashtable.txt @@ -0,0 +1,176 @@ +Hash tables +=========== + +A hash table is very universal data structure. It does most of it's +operations in O(1) average time. The library contains a header to +generate hash tables suiting to your needs. + +They are <>. + +- <> +- <> +- <> +- <> +- <> +- <> + +[[mandatory]] +Mandatory macros +---------------- + +- `HASH_NODE` -- a data type where a node dwells. It is usually a + structure. +- `HASH_PREFIX(x)` -- the name generating macro. +- Key type and name. Must be one of following. + [[key_atomic]] + * `HASH_KEY_ATOMIC` -- the key (`node\->HASH_KEY_ATOMIC`) is an + atomic type which can be compared using `==`. + [[key_string]] + * `HASH_KEY_STRING` -- the key is a zero-terminated string, + allocated separately from the rest of the node. + [[key_endstring]] + * `HASH_KEY_ENDSTRING` -- a zero-terminated string which lives at + the end of node (it is allocated together with the node). It + should be declared as `char key[1]`. + * `HASH_KEY_MEMORY` -- the `node\->HASH_KEY_MEMORY` is to be compared + using memcmp() function. In this case, you need to provide + `HASH_KEY_SIZE` macro as well, to specify the length of the key. + [[key_complex]] + * `HASH_KEY_COMPLEX(x)` -- the key is compound of more than one + component. The macro should expand to `x key1, x key2, ..., x kn`. + Furthermore, you need to provide a `HASH_KEY_DECL` macro. It is + used to define function parameters. Therefore it should expand to + `type1 key1, type2 key2, ..., typen keyn`. And + <> and <> + are mandatory for this key type. + +[[functions]] +Optional function switches +-------------------------- + +You can define any of these macros and provide corresponding functions +to customize the behaviour. The macros are: + +[[give_hashfn]] +- `HASH_GIVE_HASHFN` -- the table will use `uns + HASH_PREFIX(hash)(key)` to calculate hash of `key`. + There is a sensible default for integers and strings. + In the case of <>, it is mandatory + to provide this macro and function. +[[give_eq]] +- `HASH_GIVE_EQ` -- tells the table to use `int HASH_PREFIX(eq)(key1, + key2)` function to decide if `key1` and `key2` are equal. Default + for atomic types is `==` and strcmp() or strcasecmp() for strings + (depends on <> switch). + It is mandatory when you use <>. +- `HASH_GIVE_EXTRA_SIZE` -- function `int HASH_PREFIX(extra_size)(key)` + returns how many bytes after the node should be allocated. It + defaults to `0` or to length of key in case of + <>. +- `HASH_GIVE_INIT_KEY` -- function + `void HASH_PREFIX(init_key)(node *, key)` is used to initialize key + in newly created node. The default is assignment for atomic keys and + static strings (<>, + <>) and strcpy() for + <>. +[[give_init_data]] +- `HASH_GIVE_INIT_DATA` -- function `void HASH_PREFIX(init_data)(node + *)` is used to initialize the rest of node. Useful if you use + <> +- `HASH_GIVE_ALLOC` -- you need to provide `void + \*HASH_PREFIX(alloc)(uns size` and `void HASH_PREFIX(free)(void \*)` + to allocate and deallocate the nodes. Default uses + <> and <> or <>, depending on <> and + <> switches. + +[[params]] +Optional parameters +------------------- + +You can customize the hash table a little more by these macros: + +[[nocase]] +- `HASH_NOCASE` -- use case-insensitive comparison for strings. +- `HASH_DEFAULT_SIZE` -- use approximately this many elements when + creating the hash table. +- `HASH_CONSERVE_SPACE` -- use as little space as possible. +- `HASH_FN_BITS` -- the hash function only provides this many + significants bits. +- `HASH_ATOMIC_TYPE` -- the type of atomic key + (<>) is not `int`, but this type. +[[use_pool]] +- `HASH_USE_POOL` -- tells to use <> to + allocate the nodes. You should define it to the name of mempool + variable to be used for this purpose. +[[auto_pool]] +- `HASH_AUTO_POOL` -- like above, but it creates it's own mempool. + Define it to the block size of the pool. +- `HASH_ZERO_FILL` -- initialize new nodes to all zeroes. +- `HASH_TABLE_ALLOC` -- allocate the table the same way as nodes. If + not provided, <> is used. +[[table_dynamic]] +- `HASH_TABLE_DYNAMIC` -- By default, only one global hash table is + used. With this macro defined, all functions gain new first + parameter of type `HASH_PREFIX(table) *` to allow them work with + multiple hash tables. + +[[wants]] +Functionality switches +---------------------- + +Each of these macros enables some of the functionality the table has. + +[[want_cleanup]] +- `HASH_WANT_CLEANUP` -- + <> +[[want_find]] +- `HASH_WANT_FIND` -- + <> +[[want_find_next]] +- `HASH_WANT_FIND_NEXT` -- + <> +[[want_new]] +- `HASH_WANT_NEW` -- + <> +[[want_lookup]] +- `HASH_WANT_LOOKUP` -- + <> +[[want_delete]] +- `HASH_WANT_DELETE` -- + <> +[[want_remove]] +- `HASH_WANT_REMOVE` -- + <> + +[[generated]] +Generated functions +------------------- + +These are the function that the header file generates for you. The +strange first parameter of each function is a place where the +`HASH_PREFIX(table) *` resides when you define +<>. If you do not, the parameter +is empty. + +!!ucw/hashtable.h HASH_PREFIX + +[[iterator]] +Iterator +-------- + +You can use the `HASH_FOR_ALL` iterator macro to run trough all the +nodes. Lets say your `HASH_PREFIX(x)` macro is defined as +`prefix_##x`. Then you would do something like: + + HASH_FOR_ALL(prefix, node_variable) + { + do_something_with_node(node_variable); + } + HASH_END_FOR; + +If you use <>, use +`HASH_FOR_ALL_DYNAMIC(prefix, table, node_variable)` instead. + +You may not modify the table inside the block. Use `HASH_BREAK` and +`HASH_CONTINUE` instead of `break` and `continue` statements. diff --git a/ucw/doc/index.txt b/ucw/doc/index.txt index 946aff67..2c69d6ae 100644 --- a/ucw/doc/index.txt +++ b/ucw/doc/index.txt @@ -23,14 +23,15 @@ Modules - <> - <> - <> +- <> +- <> +- <> - <> - <> - <> - <> - <> - <> -- <> -- <> - <> Other features @@ -42,8 +43,6 @@ Other features Yet undocumented modules ------------------------ -- Hash tables - * `hashtable.h` - Trie * `trie.h` - Red-black trees diff --git a/ucw/hashtable.h b/ucw/hashtable.h index afccbd06..4f8ca1a3 100644 --- a/ucw/hashtable.h +++ b/ucw/hashtable.h @@ -394,7 +394,11 @@ static void P(alloc_table) (TAU) T.hash_min = 0; } -static void P(init) (TA) +/** + * Initializes the hash table. + * This one is available no matter what `HASH_WANT_` macros you defined or not. + **/ +static void HASH_PREFIX(init)(TA) { T.hash_count = 0; T.hash_size = HASH_DEFAULT_SIZE; @@ -408,7 +412,11 @@ static void P(init) (TA) } #ifdef HASH_WANT_CLEANUP -static void P(cleanup) (TA) +/** + * Deallocates the hash table, including the nodes. + * It is available if you defined <>. + **/ +static void HASH_PREFIX(cleanup)(TA) { #ifndef HASH_USE_POOL uns i; @@ -462,7 +470,13 @@ static void P(rehash) (TAC uns size) } #ifdef HASH_WANT_FIND -static P(node) * P(find) (TAC HASH_KEY_DECL) +/** + * Finds a node with given key (specified in the @HAS_KEY_DECL parameter). + * If it does not exist, NULL is returned. + * + * Enabled by the <> macro. + **/ +static HASH_NODE* HASH_PREFIX(find)(TAC HASH_KEY_DECL) { uns h0 = P(hash) (TTC HASH_KEY( )); uns h = h0 % T.hash_size; @@ -482,7 +496,12 @@ static P(node) * P(find) (TAC HASH_KEY_DECL) #endif #ifdef HASH_WANT_FIND_NEXT -static P(node) * P(find_next) (TAC P(node) *start) +/** + * Finds next node with the same key. Returns NULL if it does not exist. + * + * Enabled by the <> macro. + **/ +static HASH_NODE* HASH_PREFIX(find_next)(TAC P(node) *start) { #ifndef HASH_CONSERVE_SPACE uns h0 = P(hash) (TTC HASH_KEY(start->)); @@ -503,7 +522,12 @@ static P(node) * P(find_next) (TAC P(node) *start) #endif #ifdef HASH_WANT_NEW -static P(node) * P(new) (TAC HASH_KEY_DECL) +/** + * Generates a new node with a given key. + * + * Enabled by the <> macro. + **/ +static HASH_NODE * HASH_PREFIX(new)(TAC HASH_KEY_DECL) { uns h0, h; P(bucket) *b; @@ -525,7 +549,13 @@ static P(node) * P(new) (TAC HASH_KEY_DECL) #endif #ifdef HASH_WANT_LOOKUP -static P(node) * P(lookup) (TAC HASH_KEY_DECL) +/** + * Finds a node with a given key. If it does not exist, a new one is created. + * It is strongly recommended to use <>. + * + * This one is enabled by the <> macro. + **/ +static HASH_NODE* HASH_PREFIX(lookup)(TAC HASH_KEY_DECL) { uns h0 = P(hash) (TTC HASH_KEY( )); uns h = h0 % T.hash_size; @@ -556,7 +586,14 @@ static P(node) * P(lookup) (TAC HASH_KEY_DECL) #endif #ifdef HASH_WANT_DELETE -static int P(delete) (TAC HASH_KEY_DECL) +/** + * Removes a node with the given key from hash table and deallocates it. + * + * Success is returned. + * + * This one is enabled by <> macro. + **/ +static int HASH_PREFIX(delete)(TAC HASH_KEY_DECL) { uns h0 = P(hash) (TTC HASH_KEY( )); uns h = h0 % T.hash_size; @@ -582,7 +619,14 @@ static int P(delete) (TAC HASH_KEY_DECL) #endif #ifdef HASH_WANT_REMOVE -static void P(remove) (TAC P(node) *n) +/** + * Removes a given node and deallocates it. + * It differs from <> + * in its type of parameter -- this one deletes a specific node, that one looks for it by a key. + * + * Enabled by <> macro. + **/ +static void HASH_PREFIX(remove)(TAC HASH_NODE *n) { P(bucket) *x = SKIP_BACK(struct P(bucket), n, n); uns h0 = P(bucket_hash)(TTC x); -- 2.39.5