X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fbitsig.c;h=ed510fad0d4988c2fe9679d482f89bee715a9fde;hb=b42162f5526360acc6930e3d2e296af1fef08e63;hp=1f76fa912c1e58148bc7928840963c560c3fe658;hpb=fa5fc267286efc42f0c610c4e54a117a9bc07232;p=libucw.git diff --git a/lib/bitsig.c b/lib/bitsig.c index 1f76fa91..ed510fad 100644 --- a/lib/bitsig.c +++ b/lib/bitsig.c @@ -3,7 +3,9 @@ * * (c) 2002 Martin Mares * - * FIXME: Ref to the original article. + * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files + * (An access method for documents and its analytical performance evaluation), + * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984. * * This data structure provides a very compact representation * of a set of strings with insertion and membership search, @@ -34,6 +36,9 @@ * We leave L and an upper bound for N as parameters set during * creation of the structure. Currently, the structure is limited * to 4 Gb = 512 MB. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. */ #include "lib/lib.h" @@ -41,10 +46,12 @@ #include "lib/md5.h" #include +#include struct bitsig { uns l, m, n, maxn, max_m_mult; - u32 tmp; + u32 hash[4]; + uns hindex; byte array[0]; }; @@ -70,28 +77,35 @@ bitsig_init(uns perrlog, uns maxn) return b; } +void +bitsig_free(struct bitsig *b) +{ + xfree(b); +} + static void bitsig_hash_init(struct bitsig *b, byte *item) { struct MD5Context c; - u32 digest[4]; MD5Init(&c); MD5Update(&c, item, strlen(item)); - MD5Final((byte *) digest, &c); - b->tmp = digest[0]; + MD5Final((byte *) b->hash, &c); + b->hindex = 0; } static inline uns bitsig_hash_bit(struct bitsig *b) { + u32 h; do { - /* FIXME: Check */ - b->tmp *= 3006477127U; + h = b->hash[b->hindex]; + b->hash[b->hindex] *= 3006477127U; + b->hindex = (b->hindex+1) % 4; } - while (b->tmp >= b->max_m_mult); - return b->tmp % b->m; + while (h >= b->max_m_mult); + return h % b->m; } int @@ -133,10 +147,11 @@ bitsig_insert(struct bitsig *b, byte *item) #ifdef TEST #include +#include -int main(void) +int main(int argc, char **argv) { - struct bitsig *b = bitsig_init(10, 23000); + struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2])); byte buf[1024]; while (fgets(buf, 1024, stdin))