X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=ucw%2Furl.c;h=6d940018d2165acbb27124bb094a16ff958bcc0a;hb=39b28bffc195348b93294b5fa0a8b9e87ea7317a;hp=74d8c1d5facf68f976360052d31891019b298f7d;hpb=031256ad2e123eec58521f8e3eb9496c197641d2;p=libucw.git diff --git a/ucw/url.c b/ucw/url.c index 74d8c1d5..6d940018 100644 --- a/ucw/url.c +++ b/ucw/url.c @@ -7,18 +7,14 @@ * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. * - * The URL syntax corresponds to RFC 2396 with several exceptions: - * - * o Escaping of special characters still follows RFC 1738. - * o Interpretation of path parameters follows RFC 1808. - * * XXX: The buffer handling in this module is really horrible, but it works. */ -#include "ucw/lib.h" -#include "ucw/url.h" -#include "ucw/chartype.h" -#include "ucw/conf.h" +#include +#include +#include +#include +#include #include #include @@ -34,6 +30,7 @@ static uns url_min_repeat_count = 0x7fffffff; static uns url_max_repeat_length = 0; static uns url_max_occurences = ~0U; +#ifndef TEST static struct cf_section url_config = { CF_ITEMS { CF_UNS("IgnoreSpaces", &url_ignore_spaces), @@ -50,6 +47,7 @@ static void CONSTRUCTOR url_init_config(void) { cf_declare_section("URL", &url_config, 0); } +#endif /* Escaping and de-escaping */ @@ -60,10 +58,10 @@ enhex(uns x) } int -url_deescape(const byte *s, byte *d) +url_deescape(const char *s, char *d) { - byte *dstart = d; - byte *end = d + MAX_URL_SIZE - 10; + char *dstart = d; + char *end = d + MAX_URL_SIZE - 10; while (*s) { if (d >= end) @@ -94,15 +92,21 @@ url_deescape(const byte *s, byte *d) val = NCC_AND; break; case '#': val = NCC_HASH; break; + case '$': + val = NCC_DOLLAR; break; + case '+': + val = NCC_PLUS; break; + case ',': + val = NCC_COMMA; break; } *d++ = val; s += 3; } - else if (*s > 0x20) + else if ((byte) *s > 0x20) *d++ = *s++; else if (Cspace(*s)) { - const byte *s0 = s; + const char *s0 = s; while (Cspace(*s)) s++; if (!url_ignore_spaces || !(!*s || d == dstart)) @@ -123,25 +127,25 @@ url_deescape(const byte *s, byte *d) } int -url_enescape(const byte *s, byte *d) +url_enescape(const char *s, char *d) { - byte *end = d + MAX_URL_SIZE - 10; + char *end = d + MAX_URL_SIZE - 10; unsigned int c; while (c = *s) { if (d >= end) return URL_ERR_TOO_LONG; - if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */ - c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */ - c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || - c == ',' || - c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */ - c == '=' || c == '&' || c == '#' || c == ';') + if (Calnum(c) || /* RFC 2396 (2.1-2.3): Only alphanumerics ... */ + c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || /* ... and some exceptions and reserved chars */ + c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || + c == ',' || c == '=' || c == '&' || c == '#' || c == ';' || + c == '/' || c == '?' || c == ':' || c == '@' || c == '~' + ) *d++ = *s++; else { - uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s; + uns val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s); *d++ = '%'; *d++ = enhex(val >> 4); *d++ = enhex(val & 0x0f); @@ -153,22 +157,23 @@ url_enescape(const byte *s, byte *d) } int -url_enescape_friendly(const byte *src, byte *dest) +url_enescape_friendly(const char *src, char *dest) { - byte *end = dest + MAX_URL_SIZE - 10; - while (*src) + char *end = dest + MAX_URL_SIZE - 10; + const byte *srcb = src; + while (*srcb) { if (dest >= end) return URL_ERR_TOO_LONG; - if (*src < NCC_MAX) - *dest++ = NCC_CHARS[*src++]; - else if (*src >= 0x20 && *src < 0x7f) - *dest++ = *src++; + if ((byte)*srcb < NCC_MAX) + *dest++ = NCC_CHARS[*srcb++]; + else if (*srcb >= 0x20 && *srcb < 0x7f) + *dest++ = *srcb++; else { *dest++ = '%'; - *dest++ = enhex(*src >> 4); - *dest++ = enhex(*src++ & 0x0f); + *dest++ = enhex((byte)*srcb >> 4); + *dest++ = enhex(*srcb++ & 0x0f); } } *dest = 0; @@ -177,11 +182,11 @@ url_enescape_friendly(const byte *src, byte *dest) /* Split an URL (several parts may be copied to the destination buffer) */ -byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES; +char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES; static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS; uns -identify_protocol(const byte *p) +url_identify_protocol(const char *p) { uns i; @@ -192,7 +197,7 @@ identify_protocol(const byte *p) } int -url_split(byte *s, struct url *u, byte *d) +url_split(char *s, struct url *u, char *d) { bzero(u, sizeof(struct url)); u->port = ~0; @@ -200,7 +205,7 @@ url_split(byte *s, struct url *u, byte *d) if (s[0] != '/') /* Seek for "protocol:" */ { - byte *p = s; + char *p = s; while (*p && Calnum(*p)) p++; if (p != s && *p == ':') @@ -209,7 +214,7 @@ url_split(byte *s, struct url *u, byte *d) while (s < p) *d++ = *s++; *d++ = 0; - u->protoid = identify_protocol(u->protocol); + u->protoid = url_identify_protocol(u->protocol); s++; if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/')) { @@ -227,8 +232,8 @@ url_split(byte *s, struct url *u, byte *d) { if (s[1] == '/') /* Host spec */ { - byte *q, *e; - byte *at = NULL; + char *q, *e; + char *at = NULL; char *ep; s += 2; @@ -285,11 +290,11 @@ static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */ static int relpath_merge(struct url *u, struct url *b) { - byte *a = u->rest; - byte *o = b->rest; - byte *d = u->buf; - byte *e = u->bufend; - byte *p; + char *a = u->rest; + char *o = b->rest; + char *d = u->buf; + char *e = u->bufend; + char *p; if (a[0] == '/') /* Absolute path => OK */ return 0; @@ -316,15 +321,9 @@ relpath_merge(struct url *u, struct url *b) ; goto copy; } - if (a[0] == ';') /* Change parameters */ - { - for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++) - ; - goto copy; - } p = NULL; /* Copy original path and find the last slash */ - while (*o && *o != ';' && *o != '?' && *o != '#') + while (*o && *o != '?' && *o != '#') { if (d >= e) return URL_ERR_TOO_LONG; @@ -452,7 +451,7 @@ url_normalize(struct url *u, struct url *b) /* Name canonicalization */ static void -lowercase(byte *b) +lowercase(char *b) { if (b) while (*b) @@ -464,9 +463,9 @@ lowercase(byte *b) } static void -kill_end_dot(byte *b) +kill_end_dot(char *b) { - byte *k; + char *k; if (b) { @@ -493,8 +492,8 @@ url_canonicalize(struct url *u) /* Pack a broken-down URL */ -static byte * -append(byte *d, const byte *s, byte *e) +static char * +append(char *d, const char *s, char *e) { if (d) while (*s) @@ -507,15 +506,15 @@ append(byte *d, const byte *s, byte *e) } int -url_pack(struct url *u, byte *d) +url_pack(struct url *u, char *d) { - byte *e = d + MAX_URL_SIZE - 10; + char *e = d + MAX_URL_SIZE - 10; if (u->protocol) { d = append(d, u->protocol, e); d = append(d, ":", e); - u->protoid = identify_protocol(u->protocol); + u->protoid = url_identify_protocol(u->protocol); } if (u->host) { @@ -573,7 +572,7 @@ url_error(uns err) /* Standard cookbook recipes */ int -url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base) +url_canon_split_rel(const char *u, char *buf1, char *buf2, struct url *url, struct url *base) { int err; @@ -587,9 +586,9 @@ url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, stru } int -url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base) +url_auto_canonicalize_rel(const char *src, char *dst, struct url *base) { - byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; + char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE]; int err; struct url ur; @@ -608,7 +607,7 @@ int main(int argc, char **argv) char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE]; int err; struct url url, url0; - char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment"; + char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?"; if (argc != 2 && argc != 3) return 1; @@ -667,14 +666,14 @@ int main(int argc, char **argv) #endif struct component { - const byte *start; + const char *start; int length; uns count; u32 hash; }; static inline u32 -hashf(const byte *start, int length) +hashf(const char *start, int length) { u32 hf = length; while (length-- > 0) @@ -704,11 +703,11 @@ repeat_count(struct component *comp, uns count, uns len) } int -url_has_repeated_component(const byte *url) +url_has_repeated_component(const char *url) { struct component *comp; uns comps, comp_len, rep_prefix, hash_size, *hash, *next; - const byte *c; + const char *c; uns i, j, k; for (comps=0, c=url; c; comps++)