X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Furl.c;h=a6dcf7aa06fe4f1335edbe921359136b17bed1dc;hb=318a88602c0c1b46da4a5aea6d7f376c2fcd8829;hp=9b031694ef09613f7ca36327399c3dc6475cbb45;hpb=bf285ff821b06bf89d0301bc2f1623ce4ee44869;p=libucw.git diff --git a/lib/url.c b/lib/url.c index 9b031694..a6dcf7aa 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1,16 +1,41 @@ /* * Sherlock Library -- URL Functions (according to RFC 1738 and 1808) * - * (c) 1997--1999 Martin Mares, + * (c) 1997--2001 Martin Mares + * (c) 2001 Robert Spalek */ +#include "lib/lib.h" +#include "lib/url.h" +#include "lib/chartype.h" +#include "lib/conf.h" + #include #include #include -#include "lib.h" -#include "url.h" -#include "string.h" +/* Configuration */ + +static uns url_ignore_spaces; +static uns url_ignore_underflow; +static byte *url_component_separators = ""; +static uns url_min_repeat_count = 0x7fffffff; +static uns url_max_repeat_length = 0; + +static struct cfitem url_config[] = { + { "URL", CT_SECTION, NULL }, + { "IgnoreSpaces", CT_INT, &url_ignore_spaces }, + { "IgnoreUnderflow", CT_INT, &url_ignore_underflow }, + { "ComponentSeparators", CT_STRING, &url_component_separators }, + { "MinRepeatCount", CT_INT, &url_min_repeat_count }, + { "MaxRepeatLength", CT_INT, &url_max_repeat_length }, + { NULL, CT_STOP, NULL } +}; + +static void CONSTRUCTOR url_init_config(void) +{ + cf_register(url_config); +} /* Escaping and de-escaping */ @@ -23,6 +48,7 @@ enhex(uns x) int url_deescape(byte *s, byte *d) { + byte *dstart = d; byte *end = d + MAX_URL_SIZE - 10; while (*s) { @@ -58,8 +84,23 @@ url_deescape(byte *s, byte *d) *d++ = val; s += 3; } - else if (*s >= 0x20) + else if (*s > 0x20) *d++ = *s++; + else if (Cspace(*s)) + { + byte *s0 = s; + while (Cspace(*s)) + s++; + if (!url_ignore_spaces || !(!*s || d == dstart)) + { + while (Cspace(*s0)) + { + if (d >= end) + return URL_ERR_TOO_LONG; + *d++ = *s0++; + } + } + } else return URL_ERR_INVALID_CHAR; } @@ -256,17 +297,23 @@ relpath_merge(struct url *u, struct url *b) else if (a[1] == '.' && (a[2] == '/' || !a[2])) /* "../" */ { a += 2; - if (d <= u->buf + 1) - /* - * RFC 1808 says we should leave ".." as a path segment, but - * we intentionally break the rule and refuse the URL. - */ - return URL_PATH_UNDERFLOW; - d--; /* Discard trailing slash */ - while (d[-1] != '/') - d--; if (a[0]) a++; + if (d <= u->buf + 1) + { + /* + * RFC 1808 says we should leave ".." as a path segment, but + * we intentionally break the rule and refuse the URL. + */ + if (!url_ignore_underflow) + return URL_PATH_UNDERFLOW; + } + else + { + d--; /* Discard trailing slash */ + while (d[-1] != '/') + d--; + } continue; } } @@ -302,7 +349,6 @@ copy: /* Combine part of old URL with the new one */ int url_normalize(struct url *u, struct url *b) { - byte *k; int err; /* Basic checks */ @@ -375,14 +421,14 @@ url_canonicalize(struct url *u) kill_end_dot(u->host); if ((!u->rest || !*u->rest) && url_proto_path_flags[u->protoid]) u->rest = "/"; - if (c = strchr(u->rest, '#')) /* Kill fragment reference */ + if (u->rest && (c = strchr(u->rest, '#'))) /* Kill fragment reference */ *c = 0; return 0; } /* Pack a broken-down URL */ -byte * +static byte * append(byte *d, byte *s, byte *e) { if (d) @@ -454,7 +500,7 @@ url_error(uns err) return errmsg[err]; } -/* A "macro" for canonical split */ +/* Standard cookbook recipes */ int url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) @@ -470,6 +516,19 @@ url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) return url_canonicalize(url); } +int +url_auto_canonicalize(byte *src, byte *dst) +{ + byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; + int err; + struct url ur; + + (void)((err = url_canon_split(src, buf1, buf2, &ur)) || + (err = url_pack(&ur, buf3)) || + (err = url_enescape(buf3, dst))); + return err; +} + /* Testing */ #ifdef TEST @@ -533,3 +592,78 @@ int main(int argc, char **argv) } #endif + +struct component { + byte *start; + int length; + u32 hash; +}; + +static inline u32 +hashf(byte *start, int length) +{ + u32 hf = length; + while (length-- > 0) + hf = (hf << 8 | hf >> 24) ^ *start++; + return hf; +} + +static inline uns +repeat_count(struct component *comp, uns count, uns len) +{ + struct component *orig_comp = comp; + uns found = 0; + while (1) + { + uns i; + comp += len; + count -= len; + found++; + if (count < len) + return found; + for (i=0; i= url_min_repeat_count) + return comp_len; + return 0; +}