X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=inline;f=lib%2Furl.c;h=d5cbcea21a34d718b7e9b70f6ee09131574deaa4;hb=4394a6c303de4d94972f9a7b37a52f6cd799690c;hp=73e3b50134b1d3e6a75c7a0385a19be8c99944b6;hpb=b3b94b1896934e04eeeb6a07ab4065fd8f4ca4b3;p=libucw.git diff --git a/lib/url.c b/lib/url.c index 73e3b501..d5cbcea2 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1,8 +1,18 @@ /* - * Sherlock Library -- URL Functions (according to RFC 1738 and 1808) + * UCW Library -- URL Functions * - * (c) 1997--2001 Martin Mares - * (c) 2001 Robert Spalek + * (c) 1997--2004 Martin Mares + * (c) 2001--2005 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + * + * The URL syntax corresponds to RFC 2396 with several exceptions: + * + * o Escaping of special characters still follows RFC 1738. + * o Interpretation of path parameters follows RFC 1808. + * + * XXX: The buffer handling in this module is really horrible, but it works. */ #include "lib/lib.h" @@ -13,6 +23,7 @@ #include #include #include +#include /* Configuration */ @@ -23,13 +34,13 @@ static uns url_min_repeat_count = 0x7fffffff; static uns url_max_repeat_length = 0; static struct cfitem url_config[] = { - { "URL", CT_SECTION, NULL }, - { "IgnoreSpaces", CT_INT, &url_ignore_spaces }, - { "IgnoreUnderflow", CT_INT, &url_ignore_underflow }, - { "ComponentSeparators", CT_STRING, &url_component_separators }, - { "MinRepeatCount", CT_INT, &url_min_repeat_count }, - { "MaxRepeatLength", CT_INT, &url_max_repeat_length }, - { NULL, CT_STOP, NULL } + { "URL", CT_SECTION, NULL }, + { "IgnoreSpaces", CT_INT, &url_ignore_spaces }, + { "IgnoreUnderflow", CT_INT, &url_ignore_underflow }, + { "ComponentSeparators", CT_STRING, &url_component_separators }, + { "MinRepeatCount", CT_INT, &url_min_repeat_count }, + { "MaxRepeatLength", CT_INT, &url_max_repeat_length }, + { NULL, CT_STOP, NULL } }; static void CONSTRUCTOR url_init_config(void) @@ -138,6 +149,29 @@ url_enescape(byte *s, byte *d) return 0; } +int +url_enescape_friendly(byte *src, byte *dest) +{ + byte *end = dest + MAX_URL_SIZE - 10; + while (*src) + { + if (dest >= end) + return URL_ERR_TOO_LONG; + if (*src < NCC_MAX) + *dest++ = NCC_CHARS[*src++]; + else if (*src >= 0x20 && *src < 0x7f) + *dest++ = *src++; + else + { + *dest++ = '%'; + *dest++ = enhex(*src >> 4); + *dest++ = enhex(*src++ & 0x0f); + } + } + *dest = 0; + return 0; +} + /* Split an URL (several parts may be copied to the destination buffer) */ byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES; @@ -190,23 +224,38 @@ url_split(byte *s, struct url *u, byte *d) { if (s[1] == '/') /* Host spec */ { - byte *q, *w, *e; + byte *q, *e; + byte *at = NULL; char *ep; s += 2; q = d; - while (*s && *s != '/') /* Copy user:passwd@host:port */ - *d++ = *s++; + while (*s && *s != '/' && *s != '?') /* Copy user:passwd@host:port */ + { + if (*s != '@') + *d++ = *s; + else if (!at) + { + *d++ = 0; + at = d; + } + else /* This shouldn't happen with sane URL's, but we need to be sure */ + *d++ = NCC_AT; + s++; + } *d++ = 0; - w = strchr(q, '@'); - if (w) /* user:passwd present */ + if (at) /* user:passwd present */ { - *w++ = 0; u->user = q; + if (e = strchr(q, ':')) + { + *e++ = 0; + u->pass = e; + } } else - w = q; - e = strchr(w, ':'); + at = q; + e = strchr(at, ':'); if (e) /* host:port present */ { uns p; @@ -217,7 +266,7 @@ url_split(byte *s, struct url *u, byte *d) else if (p) /* Port 0 (e.g. in :/) is treated as default port */ u->port = p; } - u->host = w; + u->host = at; } } @@ -241,7 +290,7 @@ relpath_merge(struct url *u, struct url *b) if (a[0] == '/') /* Absolute path => OK */ return 0; - if (o[0] != '/') + if (o[0] != '/' && o[0] != '?') return URL_PATH_UNDERFLOW; if (!a[0]) /* Empty URL -> inherit everything */ @@ -352,9 +401,9 @@ url_normalize(struct url *u, struct url *b) int err; /* Basic checks */ - if (url_proto_path_flags[u->protoid] && !u->host || - u->host && !*u->host || + if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) || !u->host && u->user || + !u->user && u->pass || !u->rest) return URL_SYNTAX_ERROR; @@ -371,12 +420,25 @@ url_normalize(struct url *u, struct url *b) { u->host = b->host; u->user = b->user; + u->pass = b->pass; u->port = b->port; if (err = relpath_merge(u, b)) return err; } } + /* Change path "?" to "/?" because it's the true meaning */ + if (u->rest[0] == '?') + { + int l = strlen(u->rest); + if (u->bufend - u->buf < l+1) + return URL_ERR_TOO_LONG; + u->buf[0] = '/'; + memcpy(u->buf+1, u->rest, l+1); + u->rest = u->buf; + u->buf += l+2; + } + /* Fill in missing info */ if (u->port == ~0U) u->port = std_ports[u->protoid]; @@ -406,8 +468,8 @@ kill_end_dot(byte *b) if (b) { k = b + strlen(b) - 1; - if (k > b && *k == '.') - *k = 0; + while (k > b && *k == '.') + *k-- = 0; } } @@ -458,6 +520,11 @@ url_pack(struct url *u, byte *d) if (u->user) { d = append(d, u->user, e); + if (u->pass) + { + d = append(d, ":", e); + d = append(d, u->pass, e); + } d = append(d, "@", e); } d = append(d, u->host, e); @@ -500,10 +567,10 @@ url_error(uns err) return errmsg[err]; } -/* A "macro" for canonical split */ +/* Standard cookbook recipes */ int -url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) +url_canon_split_rel(byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base) { int err; @@ -511,11 +578,24 @@ url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url) return err; if (err = url_split(buf1, url, buf2)) return err; - if (err = url_normalize(url, NULL)) + if (err = url_normalize(url, base)) return err; return url_canonicalize(url); } +int +url_auto_canonicalize_rel(byte *src, byte *dst, struct url *base) +{ + byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; + int err; + struct url ur; + + (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) || + (err = url_pack(&ur, buf3)) || + (err = url_enescape(buf3, dst))); + return err; +} + /* Testing */ #ifdef TEST @@ -525,9 +605,12 @@ int main(int argc, char **argv) char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE]; int err; struct url url, url0; + char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment"; - if (argc != 2) + if (argc != 2 && argc != 3) return 1; + if (argc == 3) + base = argv[2]; if (err = url_deescape(argv[1], buf1)) { printf("deesc: error %d\n", err); @@ -539,8 +622,8 @@ int main(int argc, char **argv) printf("split: error %d\n", err); return 1; } - printf("split: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); - if (err = url_split("http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment", &url0, buf3)) + printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); + if (err = url_split(base, &url0, buf3)) { printf("split base: error %d\n", err); return 1; @@ -550,19 +633,19 @@ int main(int argc, char **argv) printf("normalize base: error %d\n", err); return 1; } - printf("base: @%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.host, url0.port, url0.rest); + printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest); if (err = url_normalize(&url, &url0)) { printf("normalize: error %d\n", err); return 1; } - printf("normalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); + printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); if (err = url_canonicalize(&url)) { printf("canonicalize: error %d\n", err); return 1; } - printf("canonicalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest); + printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest); if (err = url_pack(&url, buf4)) { printf("pack: error %d\n", err);