From: Pavel Charvat Date: Tue, 8 Jan 2008 11:22:45 +0000 (+0100) Subject: Slightly simplified the URL loop detector and fixed one bug. X-Git-Tag: holmes-import~475^2 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=6fa5bb3a94bcef8bc27b5fad202e4e0eadaaeba8;p=libucw.git Slightly simplified the URL loop detector and fixed one bug. --- diff --git a/lib/url.c b/lib/url.c index ecc18b00..15a731fd 100644 --- a/lib/url.c +++ b/lib/url.c @@ -706,10 +706,10 @@ repeat_count(struct component *comp, uns count, uns len) int url_has_repeated_component(const byte *url) { - struct component *comp, **hash; - uns comps, comp_len, rep_prefix, hash_size; + struct component *comp; + uns comps, comp_len, rep_prefix, hash_size, *hash, *next; const byte *c; - uns i, j; + uns i, j, k; for (comps=0, c=url; c; comps++) { @@ -737,22 +737,26 @@ url_has_repeated_component(const byte *url) comp[i].hash = hashf(comp[i].start, comp[i].length); if (comps > url_max_occurences) { - hash_size = next_table_prime(comps * 2); + hash_size = next_table_prime(comps); hash = alloca(hash_size * sizeof(*hash)); - bzero(hash, hash_size * sizeof(*hash)); + next = alloca(comps * sizeof(*next)); + memset(hash, 255, hash_size * sizeof(*hash)); for (i=0; istart, comp[i].start, comp[i].length)) - if (++j == hash_size) - j = 0; - if (!hash[j]) + for (k = hash[j]; ~k && (comp[i].hash != comp[k].hash || comp[i].length != comp[k].length || + memcmp(comp[k].start, comp[i].start, comp[i].length)); k = next[k]); + if (!~k) { - hash[j] = &comp[i]; + next[i] = hash[j]; + hash[j] = i; comp[i].count = 1; } - else if (hash[j]->count++ >= url_max_occurences) - return 1; + else + { + if (comp[k].count++ >= url_max_occurences) + return 1; + } } } for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++)