X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Furl.c;h=d98c3a79832edda5e780ab03445a2b8fb9ffce35;hb=c79a2911da7be63aaf4abec6b3ffad36dd5bcfb1;hp=c345a784cf917d3e4c96d635aa5dc123ab2f20d6;hpb=2fe336a3b80deaedf0134bf1a820538bfbe463c5;p=libucw.git diff --git a/lib/url.c b/lib/url.c index c345a784..d98c3a79 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1,7 +1,11 @@ /* * Sherlock Library -- URL Functions (according to RFC 1738 and 1808) * - * (c) 1997--2001 Martin Mares + * (c) 1997--2002 Martin Mares + * (c) 2001 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. */ #include "lib/lib.h" @@ -12,16 +16,23 @@ #include #include #include +#include /* Configuration */ static uns url_ignore_spaces; static uns url_ignore_underflow; +static byte *url_component_separators = ""; +static uns url_min_repeat_count = 0x7fffffff; +static uns url_max_repeat_length = 0; static struct cfitem url_config[] = { { "URL", CT_SECTION, NULL }, { "IgnoreSpaces", CT_INT, &url_ignore_spaces }, { "IgnoreUnderflow", CT_INT, &url_ignore_underflow }, + { "ComponentSeparators", CT_STRING, &url_component_separators }, + { "MinRepeatCount", CT_INT, &url_min_repeat_count }, + { "MaxRepeatLength", CT_INT, &url_max_repeat_length }, { NULL, CT_STOP, NULL } }; @@ -196,6 +207,11 @@ url_split(byte *s, struct url *u, byte *d) { *w++ = 0; u->user = q; + if (e = strchr(q, ':')) + { + *e++ = 0; + u->pass = e; + } } else w = q; @@ -348,6 +364,7 @@ url_normalize(struct url *u, struct url *b) if (url_proto_path_flags[u->protoid] && !u->host || u->host && !*u->host || !u->host && u->user || + !u->user && u->pass || !u->rest) return URL_SYNTAX_ERROR; @@ -364,6 +381,7 @@ url_normalize(struct url *u, struct url *b) { u->host = b->host; u->user = b->user; + u->pass = b->pass; u->port = b->port; if (err = relpath_merge(u, b)) return err; @@ -399,8 +417,8 @@ kill_end_dot(byte *b) if (b) { k = b + strlen(b) - 1; - if (k > b && *k == '.') - *k = 0; + while (k > b && *k == '.') + *k-- = 0; } } @@ -451,6 +469,11 @@ url_pack(struct url *u, byte *d) if (u->user) { d = append(d, u->user, e); + if (u->pass) + { + d = append(d, ":", e); + d = append(d, u->pass, e); + } d = append(d, "@", e); } d = append(d, u->host, e); @@ -493,7 +516,7 @@ url_error(uns err) return errmsg[err]; } -/* A "macro" for canonical split */ +/* Standard cookbook recipes */ int url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) @@ -509,6 +532,19 @@ url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) return url_canonicalize(url); } +int +url_auto_canonicalize(byte *src, byte *dst) +{ + byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; + int err; + struct url ur; + + (void)((err = url_canon_split(src, buf1, buf2, &ur)) || + (err = url_pack(&ur, buf3)) || + (err = url_enescape(buf3, dst))); + return err; +} + /* Testing */ #ifdef TEST @@ -532,7 +568,7 @@ int main(int argc, char **argv) printf("split: error %d\n", err); return 1; } - printf("split: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); + printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); if (err = url_split("http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment", &url0, buf3)) { printf("split base: error %d\n", err); @@ -543,19 +579,19 @@ int main(int argc, char **argv) printf("normalize base: error %d\n", err); return 1; } - printf("base: @%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.host, url0.port, url0.rest); + printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest); if (err = url_normalize(&url, &url0)) { printf("normalize: error %d\n", err); return 1; } - printf("normalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); + printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); if (err = url_canonicalize(&url)) { printf("canonicalize: error %d\n", err); return 1; } - printf("canonicalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); + printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); if (err = url_pack(&url, buf4)) { printf("pack: error %d\n", err); @@ -572,3 +608,78 @@ int main(int argc, char **argv) } #endif + +struct component { + byte *start; + int length; + u32 hash; +}; + +static inline u32 +hashf(byte *start, int length) +{ + u32 hf = length; + while (length-- > 0) + hf = (hf << 8 | hf >> 24) ^ *start++; + return hf; +} + +static inline uns +repeat_count(struct component *comp, uns count, uns len) +{ + struct component *orig_comp = comp; + uns found = 0; + while (1) + { + uns i; + comp += len; + count -= len; + found++; + if (count < len) + return found; + for (i=0; i= url_min_repeat_count) + return comp_len; + return 0; +}