From 735c750e2f4e958ad4de51b43e857df1b6b03e28 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Thu, 31 Jan 2002 15:12:08 +0000 Subject: [PATCH] Added a reference to the original article. Improved the random generator as suggested by Robert. --- lib/bitsig.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/bitsig.c b/lib/bitsig.c index 1f76fa91..2e8d5c1f 100644 --- a/lib/bitsig.c +++ b/lib/bitsig.c @@ -3,7 +3,9 @@ * * (c) 2002 Martin Mares * - * FIXME: Ref to the original article. + * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files + * (An access method for documents and its analytical performance evaluation), + * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984. * * This data structure provides a very compact representation * of a set of strings with insertion and membership search, @@ -44,7 +46,8 @@ struct bitsig { uns l, m, n, maxn, max_m_mult; - u32 tmp; + u32 hash[4]; + uns hindex; byte array[0]; }; @@ -74,24 +77,25 @@ static void bitsig_hash_init(struct bitsig *b, byte *item) { struct MD5Context c; - u32 digest[4]; MD5Init(&c); MD5Update(&c, item, strlen(item)); - MD5Final((byte *) digest, &c); - b->tmp = digest[0]; + MD5Final((byte *) b->hash, &c); + b->hindex = 0; } static inline uns bitsig_hash_bit(struct bitsig *b) { + u32 h; do { - /* FIXME: Check */ - b->tmp *= 3006477127U; + h = b->hash[b->hindex]; + b->hash[b->hindex] *= 3006477127U; + b->hindex = (b->hindex+1) % 4; } - while (b->tmp >= b->max_m_mult); - return b->tmp % b->m; + while (h >= b->max_m_mult); + return h % b->m; } int @@ -133,10 +137,11 @@ bitsig_insert(struct bitsig *b, byte *item) #ifdef TEST #include +#include -int main(void) +int main(int argc, char **argv) { - struct bitsig *b = bitsig_init(10, 23000); + struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2])); byte buf[1024]; while (fgets(buf, 1024, stdin)) -- 2.39.2