From: Robert Spalek Date: Mon, 3 Jun 2002 16:02:00 +0000 (+0000) Subject: - str_hash.[ch] renamed to hashfunc.[ch], the functions renamed X-Git-Tag: holmes-import~1407 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=e1980ce3bfe2bf0d46bcc6e42fc4f1f35d541fe2;p=libucw.git - str_hash.[ch] renamed to hashfunc.[ch], the functions renamed - deleted hash-{block,istring,string}.c, their functionality merged into hashfunc.[ch] - str-test.c rewritten to use the new name-style, char->byte, more tests added --- diff --git a/lib/Makefile b/lib/Makefile index f732d0e0..131f3308 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,7 +8,7 @@ SHLIB_OBJS=alloc.o alloc_str.o ctmatch.o db.o fastbuf.o fb-file.o fb-mem.o lists prime.o random.o realloc.o regex.o timer.o url.o wildmatch.o \ wordsplit.o str_ctype.o str_upper.o bucket.o conf.o object.o sorter.o \ finger.o proctitle.o ipaccess.o profile.o bitsig.o randomkey.o \ - hash-string.o hash-istring.o hash-block.o custom.o base224.o str_hash.o fb-temp.o + hashfunc.o custom.o base224.o fb-temp.o obj/lib/libsh.a: $(addprefix obj/lib/,$(SHLIB_OBJS)) diff --git a/lib/hash-block.c b/lib/hash-block.c deleted file mode 100644 index 82bf4e2d..00000000 --- a/lib/hash-block.c +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Sherlock Library -- Block Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" - -uns -hash_block(byte *k, uns len) -{ - uns h = len; - while (len--) - h = h*37 + *k++; - return h; -} diff --git a/lib/hash-istring.c b/lib/hash-istring.c deleted file mode 100644 index e95de44d..00000000 --- a/lib/hash-istring.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Sherlock Library -- Case-Insensitive String Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" -#include "lib/chartype.h" - -#include - -uns -hash_string_nocase(byte *k) -{ - uns h = strlen(k); - while (*k) - h = h*37 + Cupcase(*k++); - return h; -} diff --git a/lib/hash-string.c b/lib/hash-string.c deleted file mode 100644 index 00623ec0..00000000 --- a/lib/hash-string.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Sherlock Library -- String Hash Function - * - * (c) 2002 Martin Mares - */ - -#include "lib/lib.h" -#include "lib/hashfunc.h" - -#include - -uns -hash_string(byte *k) -{ - uns h = strlen(k); - while (*k) - h = h*37 + *k++; - return h; -} diff --git a/lib/hashfunc.c b/lib/hashfunc.c new file mode 100644 index 00000000..631a0bbe --- /dev/null +++ b/lib/hashfunc.c @@ -0,0 +1,208 @@ +/* + * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*() + * routines + * + * It is always at least as fast as the classical strlen() routine and for + * strings longer than 100 characters, it is substantially faster. + * + * (c) 2002, Robert Spalek + */ + +#include "lib/lib.h" +#include "lib/hashfunc.h" +#include "lib/chartype.h" + +/* The number of bits the hash in the function hash_*() is rotated by after + * every pass. It should be prime with the word size. */ +#define SHIFT_BITS 5 + +/* A bit-mask which clears higher bytes than a given threshold. */ +static uns mask_higher_bits[sizeof(uns)]; + +static void CONSTRUCTOR +hashfunc_init(void) +{ + uns i, j; + byte *str; + for (i=0; i= sizeof(uns)) + { + hash = ROL(hash, SHIFT_BITS) ^ *u++; + len -= sizeof(uns); + } + hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]); + return hash; +} + +#ifndef CPU_ALLOW_UNALIGNED +uns +str_len(const byte *str) +{ + uns shift = UNALIGNED_PART(str, uns); + if (!shift) + return str_len_aligned(str); + else + { + uns i; + shift = sizeof(uns) - shift; + for (i=0; i= len) + break; + hash ^= str[i] << (shift * 8); + } + return hash; + } +} +#endif + +uns +hash_string_nocase(const byte *str) +{ + uns hash = 0; + uns i; + for (i=0; ; i++) + { + uns modulo = i % sizeof(uns); + uns shift; +#ifdef CPU_LITTLE_ENDIAN + shift = modulo; +#else + shift = sizeof(uns) - 1 - modulo; +#endif + if (!modulo) + hash = ROL(hash, SHIFT_BITS); + if (!str[i]) + break; + hash ^= Cupcase(str[i]) << (shift * 8); + } + return hash; +} diff --git a/lib/hashfunc.h b/lib/hashfunc.h index 8e49e4ca..1767d81b 100644 --- a/lib/hashfunc.h +++ b/lib/hashfunc.h @@ -1,15 +1,36 @@ /* - * Sherlock Library -- Hash Functions + * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*() + * routines * - * (c) 2002 Martin Mares + * (c) 2002, Robert Spalek */ #ifndef _SHERLOCK_HASHFUNC_H #define _SHERLOCK_HASHFUNC_H -uns hash_string(byte *x); -uns hash_string_nocase(byte *x); +#include "lib/lib.h" + +/* An equivalent of the Intel's rol instruction. */ +#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) + +/* The following functions need str to be aligned to uns. */ +uns str_len_aligned(const byte *str) CONST; +uns hash_string_aligned(const byte *str) CONST; +uns hash_block_aligned(const byte *str, uns len) CONST; + +#ifdef CPU_ALLOW_UNALIGNED +#define str_len(str) str_len_aligned(str) +#define hash_string(str) hash_string_aligned(str) +#define hash_block(str, len) hash_block_aligned(str, len) +#else +uns str_len(const byte *str) CONST; +uns hash_string(const byte *str) CONST; +uns hash_block(const byte *str, uns len) CONST; +#endif + +uns hash_string_nocase(const byte *str) CONST; + +static inline uns hash_int(uns x) CONST; static inline uns hash_int(uns x) { return 6442450967*x; } -uns hash_block(byte *x, uns len); #endif diff --git a/lib/str-test.c b/lib/str-test.c index 16cd9291..48d246fb 100644 --- a/lib/str-test.c +++ b/lib/str-test.c @@ -1,9 +1,9 @@ /* - * Checking the correctness of str_len() and str_hash() and proving, that + * Checking the correctness of str_len() and hash_*() and proving, that * it is faster than the classical version ;-) */ -#include "lib/str_hash.h" +#include "lib/hashfunc.h" #include #include @@ -17,7 +17,7 @@ static uns alignment = 0; static void -random_string(char *str, int len) +random_string(byte *str, int len) { int i; for (i=0; i= 0; i++) { - char str[lengths[i] + 1 + alignment]; + byte str[lengths[i] + 1 + alignment]; uns count = TEST_TIME / (lengths[i] + 10); - uns el1 = 0, el2 = 0, elh = 0; - uns tot1 = 0, tot2 = 0, hash = 0; + uns el1 = 0, el2 = 0, elh = 0, elhn = 0; + uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0; uns j; for (j=0; j - */ - -#include "lib/lib.h" -#include "lib/str_hash.h" - -/* The number of bits the hash in the function str_hash() is rotated by after - * every pass. It should be prime with the word size. */ -#define SHIFT_BITS 5 - -/* A bit-mask which clears higher bytes than a given threshold. */ -static uns mask_higher_bits[sizeof(uns)]; - -static void CONSTRUCTOR -str_hash_init(void) -{ - uns i, j; - char *str; - for (i=0; i - */ - -#include "lib/lib.h" - -/* An equivalent of the Intel's rol instruction. */ -#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) - -/* The following functions need str to be aligned to uns. */ -uns str_len_aligned(const char *str) CONST; -uns str_hash_aligned(const char *str) CONST; - -#ifdef CPU_ALLOW_UNALIGNED -#define str_len(str) str_len_aligned(str) -#define str_hash(str) str_hash_aligned(str) -#else -uns str_len(const char *str) CONST; -uns str_hash(const char *str) CONST; -#endif