From e1980ce3bfe2bf0d46bcc6e42fc4f1f35d541fe2 Mon Sep 17 00:00:00 2001 From: Robert Spalek Date: Mon, 3 Jun 2002 16:02:00 +0000 Subject: [PATCH] - str_hash.[ch] renamed to hashfunc.[ch], the functions renamed - deleted hash-{block,istring,string}.c, their functionality merged into hashfunc.[ch] - str-test.c rewritten to use the new name-style, char->byte, more tests added --- lib/Makefile | 2 +- lib/hash-block.c | 17 ------ lib/hash-istring.c | 20 ------- lib/hash-string.c | 19 ------- lib/{str_hash.c => hashfunc.c} | 95 +++++++++++++++++++++++++++++----- lib/hashfunc.h | 31 +++++++++-- lib/str-test.c | 35 +++++++++---- lib/str_hash.h | 23 -------- 8 files changed, 132 insertions(+), 110 deletions(-) delete mode 100644 lib/hash-block.c delete mode 100644 lib/hash-istring.c delete mode 100644 lib/hash-string.c rename lib/{str_hash.c => hashfunc.c} (58%) delete mode 100644 lib/str_hash.h diff --git a/lib/Makefile b/lib/Makefile index f732d0e0..131f3308 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,7 +8,7 @@ SHLIB_OBJS=alloc.o alloc_str.o ctmatch.o db.o fastbuf.o fb-file.o fb-mem.o lists prime.o random.o realloc.o regex.o timer.o url.o wildmatch.o \ wordsplit.o str_ctype.o str_upper.o bucket.o conf.o object.o sorter.o \ finger.o proctitle.o ipaccess.o profile.o bitsig.o randomkey.o \ - hash-string.o hash-istring.o hash-block.o custom.o base224.o str_hash.o fb-temp.o + hashfunc.o custom.o base224.o fb-temp.o obj/lib/libsh.a: $(addprefix obj/lib/,$(SHLIB_OBJS)) diff --git a/lib/hash-block.c b/lib/hash-block.c deleted file mode 100644 index 82bf4e2d..00000000 --- a/lib/hash-block.c +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Sherlock Library -- Block Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" - -uns -hash_block(byte *k, uns len) -{ - uns h = len; - while (len--) - h = h*37 + *k++; - return h; -} diff --git a/lib/hash-istring.c b/lib/hash-istring.c deleted file mode 100644 index e95de44d..00000000 --- a/lib/hash-istring.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Sherlock Library -- Case-Insensitive String Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" -#include "lib/chartype.h" - -#include - -uns -hash_string_nocase(byte *k) -{ - uns h = strlen(k); - while (*k) - h = h*37 + Cupcase(*k++); - return h; -} diff --git a/lib/hash-string.c b/lib/hash-string.c deleted file mode 100644 index 00623ec0..00000000 --- a/lib/hash-string.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Sherlock Library -- String Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" - -#include - -uns -hash_string(byte *k) -{ - uns h = strlen(k); - while (*k) - h = h*37 + *k++; - return h; -} diff --git a/lib/str_hash.c b/lib/hashfunc.c similarity index 58% rename from lib/str_hash.c rename to lib/hashfunc.c index 72dbb695..631a0bbe 100644 --- a/lib/str_hash.c +++ b/lib/hashfunc.c @@ -1,5 +1,5 @@ /* - * Hyper-super-meta-alt-control-shift extra fast str_len() and str_hash() + * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*() * routines * * It is always at least as fast as the classical strlen() routine and for @@ -9,9 +9,10 @@ */ #include "lib/lib.h" -#include "lib/str_hash.h" +#include "lib/hashfunc.h" +#include "lib/chartype.h" -/* The number of bits the hash in the function str_hash() is rotated by after +/* The number of bits the hash in the function hash_*() is rotated by after * every pass. It should be prime with the word size. */ #define SHIFT_BITS 5 @@ -19,13 +20,13 @@ static uns mask_higher_bits[sizeof(uns)]; static void CONSTRUCTOR -str_hash_init(void) +hashfunc_init(void) { uns i, j; - char *str; + byte *str; for (i=0; i= sizeof(uns)) + { + hash = ROL(hash, SHIFT_BITS) ^ *u++; + len -= sizeof(uns); + } + hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]); + return hash; +} + #ifndef CPU_ALLOW_UNALIGNED uns -str_len(const char *str) +str_len(const byte *str) { uns shift = UNALIGNED_PART(str, uns); if (!shift) @@ -111,11 +126,11 @@ str_len(const char *str) } uns -str_hash(const char *str) +hash_string(const byte *str) { uns shift = UNALIGNED_PART(str, uns); if (!shift) - return str_hash_aligned(str); + return hash_string_aligned(str); else { uns hash = 0; @@ -133,9 +148,61 @@ str_hash(const char *str) hash = ROL(hash, SHIFT_BITS); if (!str[i]) break; - hash ^= ((unsigned char) str[i]) << (shift * 8); + hash ^= str[i] << (shift * 8); + } + return hash; + } +} + +uns +hash_block(const byte *str, uns len) +{ + uns shift = UNALIGNED_PART(str, uns); + if (!shift) + return hash_block_aligned(str, len); + else + { + uns hash = 0; + uns i; + for (i=0; ; i++) + { + uns modulo = i % sizeof(uns); + uns shift; +#ifdef CPU_LITTLE_ENDIAN + shift = modulo; +#else + shift = sizeof(uns) - 1 - modulo; +#endif + if (!modulo) + hash = ROL(hash, SHIFT_BITS); + if (i >= len) + break; + hash ^= str[i] << (shift * 8); } return hash; } } #endif + +uns +hash_string_nocase(const byte *str) +{ + uns hash = 0; + uns i; + for (i=0; ; i++) + { + uns modulo = i % sizeof(uns); + uns shift; +#ifdef CPU_LITTLE_ENDIAN + shift = modulo; +#else + shift = sizeof(uns) - 1 - modulo; +#endif + if (!modulo) + hash = ROL(hash, SHIFT_BITS); + if (!str[i]) + break; + hash ^= Cupcase(str[i]) << (shift * 8); + } + return hash; +} diff --git a/lib/hashfunc.h b/lib/hashfunc.h index 8e49e4ca..1767d81b 100644 --- a/lib/hashfunc.h +++ b/lib/hashfunc.h @@ -1,15 +1,36 @@ /* - * Sherlock Library -- Hash Functions + * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*() + * routines * - * (c) 2002 Martin Mares + * (c) 2002, Robert Spalek */ #ifndef _SHERLOCK_HASHFUNC_H #define _SHERLOCK_HASHFUNC_H -uns hash_string(byte *x); -uns hash_string_nocase(byte *x); +#include "lib/lib.h" + +/* An equivalent of the Intel's rol instruction. */ +#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) + +/* The following functions need str to be aligned to uns. */ +uns str_len_aligned(const byte *str) CONST; +uns hash_string_aligned(const byte *str) CONST; +uns hash_block_aligned(const byte *str, uns len) CONST; + +#ifdef CPU_ALLOW_UNALIGNED +#define str_len(str) str_len_aligned(str) +#define hash_string(str) hash_string_aligned(str) +#define hash_block(str, len) hash_block_aligned(str, len) +#else +uns str_len(const byte *str) CONST; +uns hash_string(const byte *str) CONST; +uns hash_block(const byte *str, uns len) CONST; +#endif + +uns hash_string_nocase(const byte *str) CONST; + +static inline uns hash_int(uns x) CONST; static inline uns hash_int(uns x) { return 6442450967*x; } -uns hash_block(byte *x, uns len); #endif diff --git a/lib/str-test.c b/lib/str-test.c index 16cd9291..48d246fb 100644 --- a/lib/str-test.c +++ b/lib/str-test.c @@ -1,9 +1,9 @@ /* - * Checking the correctness of str_len() and str_hash() and proving, that + * Checking the correctness of str_len() and hash_*() and proving, that * it is faster than the classical version ;-) */ -#include "lib/str_hash.h" +#include "lib/hashfunc.h" #include #include @@ -17,7 +17,7 @@ static uns alignment = 0; static void -random_string(char *str, int len) +random_string(byte *str, int len) { int i; for (i=0; i= 0; i++) { - char str[lengths[i] + 1 + alignment]; + byte str[lengths[i] + 1 + alignment]; uns count = TEST_TIME / (lengths[i] + 10); - uns el1 = 0, el2 = 0, elh = 0; - uns tot1 = 0, tot2 = 0, hash = 0; + uns el1 = 0, el2 = 0, elh = 0, elhn = 0; + uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0; uns j; for (j=0; j - */ - -#include "lib/lib.h" - -/* An equivalent of the Intel's rol instruction. */ -#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) - -/* The following functions need str to be aligned to uns. */ -uns str_len_aligned(const char *str) CONST; -uns str_hash_aligned(const char *str) CONST; - -#ifdef CPU_ALLOW_UNALIGNED -#define str_len(str) str_len_aligned(str) -#define str_hash(str) str_hash_aligned(str) -#else -uns str_len(const char *str) CONST; -uns str_hash(const char *str) CONST; -#endif -- 2.39.2