prime.o random.o realloc.o regex.o timer.o url.o wildmatch.o \
wordsplit.o str_ctype.o str_upper.o bucket.o conf.o object.o sorter.o \
finger.o proctitle.o ipaccess.o profile.o bitsig.o randomkey.o \
- hash-string.o hash-istring.o hash-block.o custom.o base224.o str_hash.o fb-temp.o
+ hashfunc.o custom.o base224.o fb-temp.o
obj/lib/libsh.a: $(addprefix obj/lib/,$(SHLIB_OBJS))
+++ /dev/null
-/*
- * Sherlock Library -- Block Hash Function
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/hashfunc.h"
-
-uns
-hash_block(byte *k, uns len)
-{
- uns h = len;
- while (len--)
- h = h*37 + *k++;
- return h;
-}
+++ /dev/null
-/*
- * Sherlock Library -- Case-Insensitive String Hash Function
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/hashfunc.h"
-#include "lib/chartype.h"
-
-#include <string.h>
-
-uns
-hash_string_nocase(byte *k)
-{
- uns h = strlen(k);
- while (*k)
- h = h*37 + Cupcase(*k++);
- return h;
-}
+++ /dev/null
-/*
- * Sherlock Library -- String Hash Function
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/hashfunc.h"
-
-#include <string.h>
-
-uns
-hash_string(byte *k)
-{
- uns h = strlen(k);
- while (*k)
- h = h*37 + *k++;
- return h;
-}
--- /dev/null
+/*
+ * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*()
+ * routines
+ *
+ * It is always at least as fast as the classical strlen() routine and for
+ * strings longer than 100 characters, it is substantially faster.
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/hashfunc.h"
+#include "lib/chartype.h"
+
+/* The number of bits the hash in the function hash_*() is rotated by after
+ * every pass. It should be prime with the word size. */
+#define SHIFT_BITS 5
+
+/* A bit-mask which clears higher bytes than a given threshold. */
+static uns mask_higher_bits[sizeof(uns)];
+
+static void CONSTRUCTOR
+hashfunc_init(void)
+{
+ uns i, j;
+ byte *str;
+ for (i=0; i<sizeof(uns); i++)
+ {
+ str = (byte *) (mask_higher_bits + i);
+ for (j=0; j<i; j++)
+ str[j] = -1;
+ for (j=i; j<sizeof(uns); j++)
+ str[j] = 0;
+ }
+}
+
+static inline uns str_len_uns(uns x) CONST;
+
+static inline uns
+str_len_uns(uns x)
+{
+ const uns sub = ((uns) -1) / 0xff;
+ const uns and = sub * 0x80;
+ uns a, i;
+ byte *bytes;
+ a = (x ^ (x - sub)) & and;
+ /*
+ * x_2 = x - 0x01010101;
+ * x_3 = x ^ x_2;
+ * a = x_3 & 0x80808080;
+ *
+ * If none byte of x is in {0, 0x80}, then the highest bit of each byte
+ * of x_2 is the same as of x. Hence x_3 has all these highest bits
+ * cleared. If a == 0, then we are sure there is no zero byte in x.
+ */
+ if (!a)
+ return sizeof(uns);
+ bytes = (byte *) &x;
+ for (i=0; i<sizeof(uns) && bytes[i]; i++);
+ return i;
+}
+
+inline uns
+str_len_aligned(const byte *str)
+{
+ const uns *u = (const uns *) str;
+ uns len = 0;
+ while (1)
+ {
+ uns l = str_len_uns(*u++);
+ len += l;
+ if (l < sizeof(uns))
+ return len;
+ }
+}
+
+inline uns
+hash_string_aligned(const byte *str)
+{
+ const uns *u = (const uns *) str;
+ uns hash = 0;
+ while (1)
+ {
+ uns last_len = str_len_uns(*u);
+ hash = ROL(hash, SHIFT_BITS);
+ if (last_len < sizeof(uns))
+ {
+ uns tmp = *u & mask_higher_bits[last_len];
+ hash ^= tmp;
+ return hash;
+ }
+ hash ^= *u++;
+ }
+}
+
+inline uns
+hash_block_aligned(const byte *str, uns len)
+{
+ const uns *u = (const uns *) str;
+ uns hash = 0;
+ while (len >= sizeof(uns))
+ {
+ hash = ROL(hash, SHIFT_BITS) ^ *u++;
+ len -= sizeof(uns);
+ }
+ hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
+ return hash;
+}
+
+#ifndef CPU_ALLOW_UNALIGNED
+uns
+str_len(const byte *str)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return str_len_aligned(str);
+ else
+ {
+ uns i;
+ shift = sizeof(uns) - shift;
+ for (i=0; i<shift; i++)
+ if (!str[i])
+ return i;
+ return shift + str_len_aligned(str + shift);
+ }
+}
+
+uns
+hash_string(const byte *str)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return hash_string_aligned(str);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!str[i])
+ break;
+ hash ^= str[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+
+uns
+hash_block(const byte *str, uns len)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return hash_block_aligned(str, len);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (i >= len)
+ break;
+ hash ^= str[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+#endif
+
+uns
+hash_string_nocase(const byte *str)
+{
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!str[i])
+ break;
+ hash ^= Cupcase(str[i]) << (shift * 8);
+ }
+ return hash;
+}
/*
- * Sherlock Library -- Hash Functions
+ * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*()
+ * routines
*
- * (c) 2002 Martin Mares <mj@ucw.cz>
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
*/
#ifndef _SHERLOCK_HASHFUNC_H
#define _SHERLOCK_HASHFUNC_H
-uns hash_string(byte *x);
-uns hash_string_nocase(byte *x);
+#include "lib/lib.h"
+
+/* An equivalent of the Intel's rol instruction. */
+#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits))))
+
+/* The following functions need str to be aligned to uns. */
+uns str_len_aligned(const byte *str) CONST;
+uns hash_string_aligned(const byte *str) CONST;
+uns hash_block_aligned(const byte *str, uns len) CONST;
+
+#ifdef CPU_ALLOW_UNALIGNED
+#define str_len(str) str_len_aligned(str)
+#define hash_string(str) hash_string_aligned(str)
+#define hash_block(str, len) hash_block_aligned(str, len)
+#else
+uns str_len(const byte *str) CONST;
+uns hash_string(const byte *str) CONST;
+uns hash_block(const byte *str, uns len) CONST;
+#endif
+
+uns hash_string_nocase(const byte *str) CONST;
+
+static inline uns hash_int(uns x) CONST;
static inline uns hash_int(uns x) { return 6442450967*x; }
-uns hash_block(byte *x, uns len);
#endif
/*
- * Checking the correctness of str_len() and str_hash() and proving, that
+ * Checking the correctness of str_len() and hash_*() and proving, that
* it is faster than the classical version ;-)
*/
-#include "lib/str_hash.h"
+#include "lib/hashfunc.h"
#include <stdlib.h>
#include <stdio.h>
static uns alignment = 0;
static void
-random_string(char *str, int len)
+random_string(byte *str, int len)
{
int i;
for (i=0; i<len; i++)
int
main(int argc, char **argv)
{
- char *strings[] = {
+ byte *strings[] = {
"",
"a",
"aa",
"aaaaaaaa",
"aaaaaaaaa",
"aaaaaaaaaa",
+ "AHOJ",
"\200aaaa",
"\200",
"\200\200",
printf("Alignment set to %d\n", alignment);
for (i=0; strings[i]; i++)
if (strlen(strings[i]) != str_len(strings[i]))
- die("Internal error on string %d", i);
+ die("Internal str_len() error on string %d", i);
printf("%d strings tested OK\n", i);
for (i=0; strings[i]; i++)
- printf("hash %2d = %08x\n", i, str_hash(strings[i]));
+ {
+ uns h1, h2;
+ h1 = hash_string(strings[i]);
+ h2 = hash_string_nocase(strings[i]);
+ if (h1 != hash_block(strings[i], str_len(strings[i])))
+ die("Internal hash_string() error on string %d", i);
+ printf("hash %2d = %08x %08x", i, h1, h2);
+ if (h1 == h2)
+ printf(" upper case?");
+ printf("\n");
+ }
for (i=0; lengths[i] >= 0; i++)
{
- char str[lengths[i] + 1 + alignment];
+ byte str[lengths[i] + 1 + alignment];
uns count = TEST_TIME / (lengths[i] + 10);
- uns el1 = 0, el2 = 0, elh = 0;
- uns tot1 = 0, tot2 = 0, hash = 0;
+ uns el1 = 0, el2 = 0, elh = 0, elhn = 0;
+ uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0;
uns j;
for (j=0; j<count; j++)
{
el1 += elapsed_time();
tot2 += str_len(str + alignment);
el2 += elapsed_time();
- hash ^= str_hash(str + alignment);
+ hash ^= hash_string(str + alignment);
elh += elapsed_time();
+ hashn ^= hash_string_nocase(str + alignment);
+ elhn += elapsed_time();
}
if (tot1 != tot2)
die("Internal error during test %d", i);
printf("Test %d: strlen = %d, passes = %d, classical = %d usec, speedup = %.4f\n",
i, lengths[i], count, el1, (el1 + 0.) / el2);
- printf("\t\t total hash = %08x, hash time = %d usec\n", hash, elh);
+ printf("\t\t total hash = %08x/%08x, hash time = %d/%d usec\n", hash, hashn, elh, elhn);
}
/*
printf("test1: %d\n", hash_modify(10000000, 10000000, 99777555));
+++ /dev/null
-/*
- * Hyper-super-meta-alt-control-shift extra fast str_len() and str_hash()
- * routines
- *
- * It is always at least as fast as the classical strlen() routine and for
- * strings longer than 100 characters, it is substantially faster.
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/str_hash.h"
-
-/* The number of bits the hash in the function str_hash() is rotated by after
- * every pass. It should be prime with the word size. */
-#define SHIFT_BITS 5
-
-/* A bit-mask which clears higher bytes than a given threshold. */
-static uns mask_higher_bits[sizeof(uns)];
-
-static void CONSTRUCTOR
-str_hash_init(void)
-{
- uns i, j;
- char *str;
- for (i=0; i<sizeof(uns); i++)
- {
- str = (char *) (mask_higher_bits + i);
- for (j=0; j<i; j++)
- str[j] = -1;
- for (j=i; j<sizeof(uns); j++)
- str[j] = 0;
- }
-}
-
-static inline uns str_len_uns(uns x) CONST;
-
-static inline uns
-str_len_uns(uns x)
-{
- const uns sub = ((uns) -1) / 0xff;
- const uns and = sub * 0x80;
- uns a, i;
- char *bytes;
- a = (x ^ (x - sub)) & and;
- /*
- * x_2 = x - 0x01010101;
- * x_3 = x ^ x_2;
- * a = x_3 & 0x80808080;
- *
- * If none byte of x is in {0, 0x80}, then the highest bit of each byte
- * of x_2 is the same as of x. Hence x_3 has all these highest bits
- * cleared. If a == 0, then we are sure there is no zero byte in x.
- */
- if (!a)
- return sizeof(uns);
- bytes = (char *) &x;
- for (i=0; i<sizeof(uns) && bytes[i]; i++);
- return i;
-}
-
-inline uns
-str_len_aligned(const char *str)
-{
- const uns *u = (const uns *) str;
- uns len = 0;
- while (1)
- {
- uns l = str_len_uns(*u++);
- len += l;
- if (l < sizeof(uns))
- return len;
- }
-}
-
-inline uns
-str_hash_aligned(const char *str)
-{
- const uns *u = (const uns *) str;
- uns hash = 0;
- while (1)
- {
- uns last_len = str_len_uns(*u);
- hash = ROL(hash, SHIFT_BITS);
- if (last_len < sizeof(uns))
- {
- uns tmp = *u & mask_higher_bits[last_len];
- hash ^= tmp;
- return hash;
- }
- hash ^= *u++;
- }
-}
-
-#ifndef CPU_ALLOW_UNALIGNED
-uns
-str_len(const char *str)
-{
- uns shift = UNALIGNED_PART(str, uns);
- if (!shift)
- return str_len_aligned(str);
- else
- {
- uns i;
- shift = sizeof(uns) - shift;
- for (i=0; i<shift; i++)
- if (!str[i])
- return i;
- return shift + str_len_aligned(str + shift);
- }
-}
-
-uns
-str_hash(const char *str)
-{
- uns shift = UNALIGNED_PART(str, uns);
- if (!shift)
- return str_hash_aligned(str);
- else
- {
- uns hash = 0;
- uns i;
- for (i=0; ; i++)
- {
- uns modulo = i % sizeof(uns);
- uns shift;
-#ifdef CPU_LITTLE_ENDIAN
- shift = modulo;
-#else
- shift = sizeof(uns) - 1 - modulo;
-#endif
- if (!modulo)
- hash = ROL(hash, SHIFT_BITS);
- if (!str[i])
- break;
- hash ^= ((unsigned char) str[i]) << (shift * 8);
- }
- return hash;
- }
-}
-#endif
+++ /dev/null
-/*
- * Hyper-super-meta-alt-control-shift extra fast str_len() and str_hash()
- * routines
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- */
-
-#include "lib/lib.h"
-
-/* An equivalent of the Intel's rol instruction. */
-#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits))))
-
-/* The following functions need str to be aligned to uns. */
-uns str_len_aligned(const char *str) CONST;
-uns str_hash_aligned(const char *str) CONST;
-
-#ifdef CPU_ALLOW_UNALIGNED
-#define str_len(str) str_len_aligned(str)
-#define str_hash(str) str_hash_aligned(str)
-#else
-uns str_len(const char *str) CONST;
-uns str_hash(const char *str) CONST;
-#endif