X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Furl.c;h=15a731fde164f2fab86a2707e77f0187d2a71927;hb=c4bf633211b0424492b5a3937d6a6d2e0d79a4cf;hp=c4662372c99fc5afc493e83e486496a756b46890;hpb=a86e328ab75068b1c27e17e3fb33a9b5845f63aa;p=libucw.git diff --git a/lib/url.c b/lib/url.c index c4662372..15a731fd 100644 --- a/lib/url.c +++ b/lib/url.c @@ -29,9 +29,10 @@ static uns url_ignore_spaces; static uns url_ignore_underflow; -static byte *url_component_separators = ""; +static char *url_component_separators = ""; static uns url_min_repeat_count = 0x7fffffff; static uns url_max_repeat_length = 0; +static uns url_max_occurences = ~0U; static struct cf_section url_config = { CF_ITEMS { @@ -40,6 +41,7 @@ static struct cf_section url_config = { CF_STRING("ComponentSeparators", &url_component_separators), CF_UNS("MinRepeatCount", &url_min_repeat_count), CF_UNS("MaxRepeatLength", &url_max_repeat_length), + CF_UNS("MaxOccurences", &url_max_occurences), CF_END } }; @@ -58,7 +60,7 @@ enhex(uns x) } int -url_deescape(byte *s, byte *d) +url_deescape(const byte *s, byte *d) { byte *dstart = d; byte *end = d + MAX_URL_SIZE - 10; @@ -100,7 +102,7 @@ url_deescape(byte *s, byte *d) *d++ = *s++; else if (Cspace(*s)) { - byte *s0 = s; + const byte *s0 = s; while (Cspace(*s)) s++; if (!url_ignore_spaces || !(!*s || d == dstart)) @@ -121,7 +123,7 @@ url_deescape(byte *s, byte *d) } int -url_enescape(byte *s, byte *d) +url_enescape(const byte *s, byte *d) { byte *end = d + MAX_URL_SIZE - 10; unsigned int c; @@ -151,7 +153,7 @@ url_enescape(byte *s, byte *d) } int -url_enescape_friendly(byte *src, byte *dest) +url_enescape_friendly(const byte *src, byte *dest) { byte *end = dest + MAX_URL_SIZE - 10; while (*src) @@ -179,7 +181,7 @@ byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES; static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS; uns -identify_protocol(byte *p) +identify_protocol(const byte *p) { uns i; @@ -492,7 +494,7 @@ url_canonicalize(struct url *u) /* Pack a broken-down URL */ static byte * -append(byte *d, byte *s, byte *e) +append(byte *d, const byte *s, byte *e) { if (d) while (*s) @@ -571,7 +573,7 @@ url_error(uns err) /* Standard cookbook recipes */ int -url_canon_split_rel(byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base) +url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base) { int err; @@ -585,7 +587,7 @@ url_canon_split_rel(byte *u, byte *buf1, byte *buf2, struct url *url, struct url } int -url_auto_canonicalize_rel(byte *src, byte *dst, struct url *base) +url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base) { byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; int err; @@ -665,13 +667,14 @@ int main(int argc, char **argv) #endif struct component { - byte *start; + const byte *start; int length; + uns count; u32 hash; }; static inline u32 -hashf(byte *start, int length) +hashf(const byte *start, int length) { u32 hf = length; while (length-- > 0) @@ -701,12 +704,12 @@ repeat_count(struct component *comp, uns count, uns len) } int -url_has_repeated_component(byte *url) +url_has_repeated_component(const byte *url) { struct component *comp; - uns comps, comp_len, rep_prefix; - byte *c; - uns i; + uns comps, comp_len, rep_prefix, hash_size, *hash, *next; + const byte *c; + uns i, j, k; for (comps=0, c=url; c; comps++) { @@ -714,9 +717,9 @@ url_has_repeated_component(byte *url) if (c) c++; } - if (comps < url_min_repeat_count) + if (comps < url_min_repeat_count && comps <= url_max_occurences) return 0; - comp = alloca(comps * sizeof(struct component)); + comp = alloca(comps * sizeof(*comp)); for (i=0, c=url; c; i++) { comp[i].start = c; @@ -732,6 +735,30 @@ url_has_repeated_component(byte *url) ASSERT(i == comps); for (i=0; i url_max_occurences) + { + hash_size = next_table_prime(comps); + hash = alloca(hash_size * sizeof(*hash)); + next = alloca(comps * sizeof(*next)); + memset(hash, 255, hash_size * sizeof(*hash)); + for (i=0; i= url_max_occurences) + return 1; + } + } + } for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++) for (rep_prefix = 0; rep_prefix <= comps - comp_len; rep_prefix++) if (repeat_count(comp + rep_prefix, comps - rep_prefix, comp_len) >= url_min_repeat_count)