X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=ucw%2Furl.c;h=8bdf581d5442596de8b465813af610929e637734;hb=0053f1a862cd0c2f359a4d12f0fdd35b17d18906;hp=75b96b063fc2b890a87a3d93fafdc0b3c6cafedb;hpb=c6c6c66f35c76a1e7dfec03c30af6b6c501357ff;p=libucw.git diff --git a/ucw/url.c b/ucw/url.c index 75b96b06..8bdf581d 100644 --- a/ucw/url.c +++ b/ucw/url.c @@ -7,18 +7,14 @@ * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. * - * The URL syntax corresponds to RFC 2396 with several exceptions: - * - * o Escaping of special characters still follows RFC 1738. - * o Interpretation of path parameters follows RFC 1808. - * * XXX: The buffer handling in this module is really horrible, but it works. */ -#include "ucw/lib.h" -#include "ucw/url.h" -#include "ucw/chartype.h" -#include "ucw/conf.h" +#include +#include +#include +#include +#include #include #include @@ -27,21 +23,22 @@ /* Configuration */ -static uns url_ignore_spaces; -static uns url_ignore_underflow; +static uint url_ignore_spaces; +static uint url_ignore_underflow; static char *url_component_separators = ""; -static uns url_min_repeat_count = 0x7fffffff; -static uns url_max_repeat_length = 0; -static uns url_max_occurences = ~0U; +static uint url_min_repeat_count = 0x7fffffff; +static uint url_max_repeat_length = 0; +static uint url_max_occurences = ~0U; +#ifndef TEST static struct cf_section url_config = { CF_ITEMS { - CF_UNS("IgnoreSpaces", &url_ignore_spaces), - CF_UNS("IgnoreUnderflow", &url_ignore_underflow), + CF_UINT("IgnoreSpaces", &url_ignore_spaces), + CF_UINT("IgnoreUnderflow", &url_ignore_underflow), CF_STRING("ComponentSeparators", &url_component_separators), - CF_UNS("MinRepeatCount", &url_min_repeat_count), - CF_UNS("MaxRepeatLength", &url_max_repeat_length), - CF_UNS("MaxOccurences", &url_max_occurences), + CF_UINT("MinRepeatCount", &url_min_repeat_count), + CF_UINT("MaxRepeatLength", &url_max_repeat_length), + CF_UINT("MaxOccurences", &url_max_occurences), CF_END } }; @@ -50,11 +47,12 @@ static void CONSTRUCTOR url_init_config(void) { cf_declare_section("URL", &url_config, 0); } +#endif /* Escaping and de-escaping */ -static uns -enhex(uns x) +static uint +enhex(uint x) { return (x<10) ? (x + '0') : (x - 10 + 'A'); } @@ -70,7 +68,7 @@ url_deescape(const char *s, char *d) return URL_ERR_TOO_LONG; if (*s == '%') { - unsigned int val; + uint val; if (!Cxdigit(s[1]) || !Cxdigit(s[2])) return URL_ERR_INVALID_ESCAPE; val = Cxvalue(s[1])*16 + Cxvalue(s[2]); @@ -94,6 +92,12 @@ url_deescape(const char *s, char *d) val = NCC_AND; break; case '#': val = NCC_HASH; break; + case '$': + val = NCC_DOLLAR; break; + case '+': + val = NCC_PLUS; break; + case ',': + val = NCC_COMMA; break; } *d++ = val; s += 3; @@ -126,22 +130,22 @@ int url_enescape(const char *s, char *d) { char *end = d + MAX_URL_SIZE - 10; - unsigned int c; + uint c; while (c = *s) { if (d >= end) return URL_ERR_TOO_LONG; - if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */ - c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */ - c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || - c == ',' || - c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */ - c == '=' || c == '&' || c == '#' || c == ';') + if (Calnum(c) || /* RFC 2396 (2.1-2.3): Only alphanumerics ... */ + c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || /* ... and some exceptions and reserved chars */ + c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || + c == ',' || c == '=' || c == '&' || c == '#' || c == ';' || + c == '/' || c == '?' || c == ':' || c == '@' || c == '~' + ) *d++ = *s++; else { - uns val = ((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s; + uint val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s); *d++ = '%'; *d++ = enhex(val >> 4); *d++ = enhex(val & 0x0f); @@ -161,14 +165,14 @@ url_enescape_friendly(const char *src, char *dest) { if (dest >= end) return URL_ERR_TOO_LONG; - if (*srcb < NCC_MAX) + if ((byte)*srcb < NCC_MAX) *dest++ = NCC_CHARS[*srcb++]; else if (*srcb >= 0x20 && *srcb < 0x7f) *dest++ = *srcb++; else { *dest++ = '%'; - *dest++ = enhex(*srcb >> 4); + *dest++ = enhex((byte)*srcb >> 4); *dest++ = enhex(*srcb++ & 0x0f); } } @@ -181,10 +185,10 @@ url_enescape_friendly(const char *src, char *dest) char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES; static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS; -uns -identify_protocol(const char *p) +uint +url_identify_protocol(const char *p) { - uns i; + uint i; for(i=1; iprotoid = identify_protocol(u->protocol); + u->protoid = url_identify_protocol(u->protocol); s++; if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/')) { @@ -262,7 +266,7 @@ url_split(char *s, struct url *u, char *d) e = strchr(at, ':'); if (e) /* host:port present */ { - uns p; + uint p; *e++ = 0; p = strtoul(e, &ep, 10); if (ep && *ep || p > 65535) @@ -281,7 +285,7 @@ url_split(char *s, struct url *u, char *d) /* Normalization according to given base URL */ -static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */ +static uint std_ports[] = URL_DEFPORTS; /* Default port numbers */ static int relpath_merge(struct url *u, struct url *b) @@ -317,15 +321,9 @@ relpath_merge(struct url *u, struct url *b) ; goto copy; } - if (a[0] == ';') /* Change parameters */ - { - for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++) - ; - goto copy; - } p = NULL; /* Copy original path and find the last slash */ - while (*o && *o != ';' && *o != '?' && *o != '#') + while (*o && *o != '?' && *o != '#') { if (d >= e) return URL_ERR_TOO_LONG; @@ -516,7 +514,7 @@ url_pack(struct url *u, char *d) { d = append(d, u->protocol, e); d = append(d, ":", e); - u->protoid = identify_protocol(u->protocol); + u->protoid = url_identify_protocol(u->protocol); } if (u->host) { @@ -564,7 +562,7 @@ static char *errmsg[] = { }; char * -url_error(uns err) +url_error(uint err) { if (err >= sizeof(errmsg) / sizeof(char *)) err = 0; @@ -609,7 +607,7 @@ int main(int argc, char **argv) char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE]; int err; struct url url, url0; - char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment"; + char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?"; if (argc != 2 && argc != 3) return 1; @@ -670,7 +668,7 @@ int main(int argc, char **argv) struct component { const char *start; int length; - uns count; + uint count; u32 hash; }; @@ -683,14 +681,14 @@ hashf(const char *start, int length) return hf; } -static inline uns -repeat_count(struct component *comp, uns count, uns len) +static inline uint +repeat_count(struct component *comp, uint count, uint len) { struct component *orig_comp = comp; - uns found = 0; + uint found = 0; while (1) { - uns i; + uint i; comp += len; count -= len; found++; @@ -708,9 +706,9 @@ int url_has_repeated_component(const char *url) { struct component *comp; - uns comps, comp_len, rep_prefix, hash_size, *hash, *next; + uint comps, comp_len, rep_prefix, hash_size, *hash, *next; const char *c; - uns i, j, k; + uint i, j, k; for (comps=0, c=url; c; comps++) {