2 * Hyper-super-meta-alt-control-shift extra fast str_len() and hash_*()
5 * It is always at least as fast as the classical strlen() routine and for
6 * strings longer than 100 characters, it is substantially faster.
8 * (c) 2002, Robert Spalek <robert@ucw.cz>
12 #include "lib/hashfunc.h"
13 #include "lib/chartype.h"
15 /* The number of bits the hash in the function hash_*() is rotated by after
16 * every pass. It should be prime with the word size. */
19 /* A bit-mask which clears higher bytes than a given threshold. */
20 static uns mask_higher_bits[sizeof(uns)];
22 static void CONSTRUCTOR
27 for (i=0; i<sizeof(uns); i++)
29 str = (byte *) (mask_higher_bits + i);
32 for (j=i; j<sizeof(uns); j++)
37 static inline uns CONST
40 const uns sub = ((uns) -1) / 0xff;
41 const uns and = sub * 0x80;
44 a = (x ^ (x - sub)) & and;
46 * x_2 = x - 0x01010101;
48 * a = x_3 & 0x80808080;
50 * If no byte of x is in {0, 0x80}, then the highest bit of each byte
51 * of x_2 is the same as of x. Hence x_3 has all these highest bits
52 * cleared. If a == 0, then we are sure there is no zero byte in x.
57 for (i=0; i<sizeof(uns) && bytes[i]; i++);
62 str_len_aligned(const byte *str)
64 const uns *u = (const uns *) str;
68 uns l = str_len_uns(*u++);
76 hash_string_aligned(const byte *str)
78 const uns *u = (const uns *) str;
82 uns last_len = str_len_uns(*u);
83 hash = ROL(hash, SHIFT_BITS);
84 if (last_len < sizeof(uns))
86 uns tmp = *u & mask_higher_bits[last_len];
95 hash_block_aligned(const byte *str, uns len)
97 const uns *u = (const uns *) str;
99 while (len >= sizeof(uns))
101 hash = ROL(hash, SHIFT_BITS) ^ *u++;
104 hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
108 #ifndef CPU_ALLOW_UNALIGNED
110 str_len(const byte *str)
112 uns shift = UNALIGNED_PART(str, uns);
114 return str_len_aligned(str);
118 shift = sizeof(uns) - shift;
119 for (i=0; i<shift; i++)
122 return shift + str_len_aligned(str + shift);
127 hash_string(const byte *str)
129 uns shift = UNALIGNED_PART(str, uns);
131 return hash_string_aligned(str);
138 uns modulo = i % sizeof(uns);
140 #ifdef CPU_LITTLE_ENDIAN
143 shift = sizeof(uns) - 1 - modulo;
146 hash = ROL(hash, SHIFT_BITS);
149 hash ^= str[i] << (shift * 8);
156 hash_block(const byte *str, uns len)
158 uns shift = UNALIGNED_PART(str, uns);
160 return hash_block_aligned(str, len);
167 uns modulo = i % sizeof(uns);
169 #ifdef CPU_LITTLE_ENDIAN
172 shift = sizeof(uns) - 1 - modulo;
175 hash = ROL(hash, SHIFT_BITS);
178 hash ^= str[i] << (shift * 8);
186 hash_string_nocase(const byte *str)
192 uns modulo = i % sizeof(uns);
194 #ifdef CPU_LITTLE_ENDIAN
197 shift = sizeof(uns) - 1 - modulo;
200 hash = ROL(hash, SHIFT_BITS);
203 hash ^= Cupcase(str[i]) << (shift * 8);