Logging: Let log_close_all() ASSERT that all references are gone.

[libucw.git] / ucw / url.c
diff --git a/ucw/url.c b/ucw/url.c

index 74d8c1d5facf68f976360052d31891019b298f7d..39e52946e2bd086cd3cbdaefa4985fe217b56fde 100644 (file)
--- a/ucw/url.c
+++ b/ucw/url.c
@@ -7,11 +7,6 @@
   *     This software may be freely distributed and used according to the terms
   *     of the GNU Lesser General Public License.
   *
- *     The URL syntax corresponds to RFC 2396 with several exceptions:
- *
- *        o  Escaping of special characters still follows RFC 1738.
- *        o  Interpretation of path parameters follows RFC 1808.
- *
   *     XXX: The buffer handling in this module is really horrible, but it works.
   */
  
@@ -19,6 +14,7 @@
  #include "ucw/url.h"
  #include "ucw/chartype.h"
  #include "ucw/conf.h"
+#include "ucw/prime.h"
  
  #include <string.h>
  #include <stdlib.h>
@@ -34,6 +30,7 @@ static uns url_min_repeat_count = 0x7fffffff;
  static uns url_max_repeat_length = 0;
  static uns url_max_occurences = ~0U;
  
+#ifndef TEST
  static struct cf_section url_config = {
    CF_ITEMS {
      CF_UNS("IgnoreSpaces", &url_ignore_spaces),
@@ -50,6 +47,7 @@ static void CONSTRUCTOR url_init_config(void)
  {
    cf_declare_section("URL", &url_config, 0);
  }
+#endif
  
  /* Escaping and de-escaping */
  
@@ -60,10 +58,10 @@ enhex(uns x)
  }
  
  int
-url_deescape(const byte *s, byte *d)
+url_deescape(const char *s, char *d)
  {
-  byte *dstart = d;
-  byte *end = d + MAX_URL_SIZE - 10;
+  char *dstart = d;
+  char *end = d + MAX_URL_SIZE - 10;
    while (*s)
      {
        if (d >= end)
@@ -94,15 +92,23 @@ url_deescape(const byte *s, byte *d)
               val = NCC_AND; break;
             case '#':
               val = NCC_HASH; break;
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+           case '$':
+             val = NCC_DOLLAR; break;
+           case '+':
+             val = NCC_PLUS; break;
+           case ',':
+             val = NCC_COMMA; break;
+#endif
             }
           *d++ = val;
           s += 3;
         }
-      else if (*s > 0x20)
+      else if ((byte) *s > 0x20)
         *d++ = *s++;
        else if (Cspace(*s))
         {
-         const byte *s0 = s;
+         const char *s0 = s;
           while (Cspace(*s))
             s++;
           if (!url_ignore_spaces || !(!*s || d == dstart))
@@ -123,25 +129,28 @@ url_deescape(const byte *s, byte *d)
  }
  
  int
-url_enescape(const byte *s, byte *d)
+url_enescape(const char *s, char *d)
  {
-  byte *end = d + MAX_URL_SIZE - 10;
+  char *end = d + MAX_URL_SIZE - 10;
    unsigned int c;
  
    while (c = *s)
      {
        if (d >= end)
         return URL_ERR_TOO_LONG;
-      if (Calnum(c) ||                                                 /* RFC 1738(2.2): Only alphanumerics ... */
-         c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||   /* ... and several other exceptions ... */
-         c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
-         c == ',' ||
-         c == '/' || c == '?' || c == ':' || c == '@' ||               /* ... and reserved chars used for reserved purpose */
-         c == '=' || c == '&' || c == '#' || c == ';')
+      if (Calnum(c) ||                                                 /* RFC 2396 (2.1-2.3): Only alphanumerics ... */
+         c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||  /* ... and some exceptions and reserved chars */
+         c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||
+         c == ',' || c == '=' || c == '&' || c == '#' || c == ';' ||
+         c == '/' || c == '?' || c == ':' || c == '@'
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+         || c == '~'
+#endif
+       )
         *d++ = *s++;
        else
         {
-         uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s;
+         uns val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s);
           *d++ = '%';
           *d++ = enhex(val >> 4);
           *d++ = enhex(val & 0x0f);
@@ -153,22 +162,23 @@ url_enescape(const byte *s, byte *d)
  }
  
  int
-url_enescape_friendly(const byte *src, byte *dest)
+url_enescape_friendly(const char *src, char *dest)
  {
-  byte *end = dest + MAX_URL_SIZE - 10;
-  while (*src)
+  char *end = dest + MAX_URL_SIZE - 10;
+  const byte *srcb = src;
+  while (*srcb)
      {
        if (dest >= end)
         return URL_ERR_TOO_LONG;
-      if (*src < NCC_MAX)
-       *dest++ = NCC_CHARS[*src++];
-      else if (*src >= 0x20 && *src < 0x7f)
-       *dest++ = *src++;
+      if ((byte)*srcb < NCC_MAX)
+       *dest++ = NCC_CHARS[*srcb++];
+      else if (*srcb >= 0x20 && *srcb < 0x7f)
+       *dest++ = *srcb++;
        else
         {
           *dest++ = '%';
-         *dest++ = enhex(*src >> 4);
-         *dest++ = enhex(*src++ & 0x0f);
+         *dest++ = enhex((byte)*srcb >> 4);
+         *dest++ = enhex(*srcb++ & 0x0f);
         }
      }
    *dest = 0;
@@ -177,11 +187,11 @@ url_enescape_friendly(const byte *src, byte *dest)
  
  /* Split an URL (several parts may be copied to the destination buffer) */
  
-byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
+char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
  static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
  
  uns
-identify_protocol(const byte *p)
+url_identify_protocol(const char *p)
  {
    uns i;
  
@@ -192,7 +202,7 @@ identify_protocol(const byte *p)
  }
  
  int
-url_split(byte *s, struct url *u, byte *d)
+url_split(char *s, struct url *u, char *d)
  {
    bzero(u, sizeof(struct url));
    u->port = ~0;
@@ -200,7 +210,7 @@ url_split(byte *s, struct url *u, byte *d)
  
    if (s[0] != '/')                     /* Seek for "protocol:" */
      {
-      byte *p = s;
+      char *p = s;
        while (*p && Calnum(*p))
         p++;
        if (p != s && *p == ':')
@@ -209,7 +219,7 @@ url_split(byte *s, struct url *u, byte *d)
           while (s < p)
             *d++ = *s++;
           *d++ = 0;
-         u->protoid = identify_protocol(u->protocol);
+         u->protoid = url_identify_protocol(u->protocol);
           s++;
           if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
             {
@@ -227,8 +237,8 @@ url_split(byte *s, struct url *u, byte *d)
      {
        if (s[1] == '/')                 /* Host spec */
         {
-         byte *q, *e;
-         byte *at = NULL;
+         char *q, *e;
+         char *at = NULL;
           char *ep;
  
           s += 2;
@@ -285,11 +295,11 @@ static uns std_ports[] = URL_DEFPORTS;    /* Default port numbers */
  static int
  relpath_merge(struct url *u, struct url *b)
  {
-  byte *a = u->rest;
-  byte *o = b->rest;
-  byte *d = u->buf;
-  byte *e = u->bufend;
-  byte *p;
+  char *a = u->rest;
+  char *o = b->rest;
+  char *d = u->buf;
+  char *e = u->bufend;
+  char *p;
  
    if (a[0] == '/')                     /* Absolute path => OK */
      return 0;
@@ -316,15 +326,9 @@ relpath_merge(struct url *u, struct url *b)
         ;
        goto copy;
      }
-  if (a[0] == ';')                     /* Change parameters */
-    {
-      for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
-       ;
-      goto copy;
-    }
  
    p = NULL;                            /* Copy original path and find the last slash */
-  while (*o && *o != ';' && *o != '?' && *o != '#')
+  while (*o && *o != '?' && *o != '#')
      {
        if (d >= e)
         return URL_ERR_TOO_LONG;
@@ -452,7 +456,7 @@ url_normalize(struct url *u, struct url *b)
  /* Name canonicalization */
  
  static void
-lowercase(byte *b)
+lowercase(char *b)
  {
    if (b)
      while (*b)
@@ -464,9 +468,9 @@ lowercase(byte *b)
  }
  
  static void
-kill_end_dot(byte *b)
+kill_end_dot(char *b)
  {
-  byte *k;
+  char *k;
  
    if (b)
      {
@@ -493,8 +497,8 @@ url_canonicalize(struct url *u)
  
  /* Pack a broken-down URL */
  
-static byte *
-append(byte *d, const byte *s, byte *e)
+static char *
+append(char *d, const char *s, char *e)
  {
    if (d)
      while (*s)
@@ -507,15 +511,15 @@ append(byte *d, const byte *s, byte *e)
  }
  
  int
-url_pack(struct url *u, byte *d)
+url_pack(struct url *u, char *d)
  {
-  byte *e = d + MAX_URL_SIZE - 10;
+  char *e = d + MAX_URL_SIZE - 10;
  
    if (u->protocol)
      {
        d = append(d, u->protocol, e);
        d = append(d, ":", e);
-      u->protoid = identify_protocol(u->protocol);
+      u->protoid = url_identify_protocol(u->protocol);
      }
    if (u->host)
      {
@@ -573,7 +577,7 @@ url_error(uns err)
  /* Standard cookbook recipes */
  
  int
-url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
+url_canon_split_rel(const char *u, char *buf1, char *buf2, struct url *url, struct url *base)
  {
    int err;
  
@@ -587,9 +591,9 @@ url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, stru
  }
  
  int
-url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
+url_auto_canonicalize_rel(const char *src, char *dst, struct url *base)
  {
-  byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
+  char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
    int err;
    struct url ur;
  
@@ -608,7 +612,7 @@ int main(int argc, char **argv)
    char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
    int err;
    struct url url, url0;
-  char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+  char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?";
  
    if (argc != 2 && argc != 3)
      return 1;
@@ -667,14 +671,14 @@ int main(int argc, char **argv)
  #endif
  
  struct component {
-       const byte *start;
+       const char *start;
         int length;
         uns count;
         u32 hash;
  };
  
  static inline u32
-hashf(const byte *start, int length)
+hashf(const char *start, int length)
  {
         u32 hf = length;
         while (length-- > 0)
@@ -704,11 +708,11 @@ repeat_count(struct component *comp, uns count, uns len)
  }
  
  int
-url_has_repeated_component(const byte *url)
+url_has_repeated_component(const char *url)
  {
         struct component *comp;
         uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
-       const byte *c;
+       const char *c;
         uns i, j, k;
  
         for (comps=0, c=url; c; comps++)