X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fbitsig.c;h=9b072e1fc3dcc5c038bfd00645779b5cd4fd101e;hb=bd1f0b7994687ae6f327033b770713ad2085aeab;hp=1f76fa912c1e58148bc7928840963c560c3fe658;hpb=fa5fc267286efc42f0c610c4e54a117a9bc07232;p=libucw.git diff --git a/lib/bitsig.c b/lib/bitsig.c index 1f76fa91..9b072e1f 100644 --- a/lib/bitsig.c +++ b/lib/bitsig.c @@ -1,9 +1,11 @@ /* - * Bit Array Signatures -- A Dubious Detector of Duplicates + * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates * * (c) 2002 Martin Mares * - * FIXME: Ref to the original article. + * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files + * (An access method for documents and its analytical performance evaluation), + * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984. * * This data structure provides a very compact representation * of a set of strings with insertion and membership search, @@ -34,6 +36,9 @@ * We leave L and an upper bound for N as parameters set during * creation of the structure. Currently, the structure is limited * to 4 Gb = 512 MB. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. */ #include "lib/lib.h" @@ -44,7 +49,8 @@ struct bitsig { uns l, m, n, maxn, max_m_mult; - u32 tmp; + u32 hash[4]; + uns hindex; byte array[0]; }; @@ -70,28 +76,35 @@ bitsig_init(uns perrlog, uns maxn) return b; } +void +bitsig_free(struct bitsig *b) +{ + xfree(b); +} + static void bitsig_hash_init(struct bitsig *b, byte *item) { struct MD5Context c; - u32 digest[4]; MD5Init(&c); MD5Update(&c, item, strlen(item)); - MD5Final((byte *) digest, &c); - b->tmp = digest[0]; + MD5Final((byte *) b->hash, &c); + b->hindex = 0; } static inline uns bitsig_hash_bit(struct bitsig *b) { + u32 h; do { - /* FIXME: Check */ - b->tmp *= 3006477127U; + h = b->hash[b->hindex]; + b->hash[b->hindex] *= 3006477127U; + b->hindex = (b->hindex+1) % 4; } - while (b->tmp >= b->max_m_mult); - return b->tmp % b->m; + while (h >= b->max_m_mult); + return h % b->m; } int @@ -133,10 +146,11 @@ bitsig_insert(struct bitsig *b, byte *item) #ifdef TEST #include +#include -int main(void) +int main(int argc, char **argv) { - struct bitsig *b = bitsig_init(10, 23000); + struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2])); byte buf[1024]; while (fgets(buf, 1024, stdin))