tableprinter: manpage is now installed into /usr/share/man/man1

[libucw.git] / ucw / url.c
diff --git a/ucw/url.c b/ucw/url.c

index 75b96b063fc2b890a87a3d93fafdc0b3c6cafedb..8bdf581d5442596de8b465813af610929e637734 100644 (file)
--- a/ucw/url.c
+++ b/ucw/url.c
@@ -7,18 +7,14 @@
   *     This software may be freely distributed and used according to the terms
   *     of the GNU Lesser General Public License.
   *
- *     The URL syntax corresponds to RFC 2396 with several exceptions:
- *
- *        o  Escaping of special characters still follows RFC 1738.
- *        o  Interpretation of path parameters follows RFC 1808.
- *
   *     XXX: The buffer handling in this module is really horrible, but it works.
   */
  
-#include "ucw/lib.h"
-#include "ucw/url.h"
-#include "ucw/chartype.h"
-#include "ucw/conf.h"
+#include <ucw/lib.h>
+#include <ucw/url.h>
+#include <ucw/chartype.h>
+#include <ucw/conf.h>
+#include <ucw/prime.h>
  
  #include <string.h>
  #include <stdlib.h>
@@ -27,21 +23,22 @@
  
  /* Configuration */
  
-static uns url_ignore_spaces;
-static uns url_ignore_underflow;
+static uint url_ignore_spaces;
+static uint url_ignore_underflow;
  static char *url_component_separators = "";
-static uns url_min_repeat_count = 0x7fffffff;
-static uns url_max_repeat_length = 0;
-static uns url_max_occurences = ~0U;
+static uint url_min_repeat_count = 0x7fffffff;
+static uint url_max_repeat_length = 0;
+static uint url_max_occurences = ~0U;
  
+#ifndef TEST
  static struct cf_section url_config = {
    CF_ITEMS {
-    CF_UNS("IgnoreSpaces", &url_ignore_spaces),
-    CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
+    CF_UINT("IgnoreSpaces", &url_ignore_spaces),
+    CF_UINT("IgnoreUnderflow", &url_ignore_underflow),
      CF_STRING("ComponentSeparators", &url_component_separators),
-    CF_UNS("MinRepeatCount", &url_min_repeat_count),
-    CF_UNS("MaxRepeatLength", &url_max_repeat_length),
-    CF_UNS("MaxOccurences", &url_max_occurences),
+    CF_UINT("MinRepeatCount", &url_min_repeat_count),
+    CF_UINT("MaxRepeatLength", &url_max_repeat_length),
+    CF_UINT("MaxOccurences", &url_max_occurences),
      CF_END
    }
  };
@@ -50,11 +47,12 @@ static void CONSTRUCTOR url_init_config(void)
  {
    cf_declare_section("URL", &url_config, 0);
  }
+#endif
  
  /* Escaping and de-escaping */
  
-static uns
-enhex(uns x)
+static uint
+enhex(uint x)
  {
    return (x<10) ? (x + '0') : (x - 10 + 'A');
  }
@@ -70,7 +68,7 @@ url_deescape(const char *s, char *d)
         return URL_ERR_TOO_LONG;
        if (*s == '%')
         {
-         unsigned int val;
+         uint val;
           if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
             return URL_ERR_INVALID_ESCAPE;
           val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
@@ -94,6 +92,12 @@ url_deescape(const char *s, char *d)
               val = NCC_AND; break;
             case '#':
               val = NCC_HASH; break;
+           case '$':
+             val = NCC_DOLLAR; break;
+           case '+':
+             val = NCC_PLUS; break;
+           case ',':
+             val = NCC_COMMA; break;
             }
           *d++ = val;
           s += 3;
@@ -126,22 +130,22 @@ int
  url_enescape(const char *s, char *d)
  {
    char *end = d + MAX_URL_SIZE - 10;
-  unsigned int c;
+  uint c;
  
    while (c = *s)
      {
        if (d >= end)
         return URL_ERR_TOO_LONG;
-      if (Calnum(c) ||                                                 /* RFC 1738(2.2): Only alphanumerics ... */
-         c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||   /* ... and several other exceptions ... */
-         c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
-         c == ',' ||
-         c == '/' || c == '?' || c == ':' || c == '@' ||               /* ... and reserved chars used for reserved purpose */
-         c == '=' || c == '&' || c == '#' || c == ';')
+      if (Calnum(c) ||                                                 /* RFC 2396 (2.1-2.3): Only alphanumerics ... */
+         c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||  /* ... and some exceptions and reserved chars */
+         c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||
+         c == ',' || c == '=' || c == '&' || c == '#' || c == ';' ||
+         c == '/' || c == '?' || c == ':' || c == '@' || c == '~'
+       )
         *d++ = *s++;
        else
         {
-         uns val = ((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s;
+         uint val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s);
           *d++ = '%';
           *d++ = enhex(val >> 4);
           *d++ = enhex(val & 0x0f);
@@ -161,14 +165,14 @@ url_enescape_friendly(const char *src, char *dest)
      {
        if (dest >= end)
         return URL_ERR_TOO_LONG;
-      if (*srcb < NCC_MAX)
+      if ((byte)*srcb < NCC_MAX)
         *dest++ = NCC_CHARS[*srcb++];
        else if (*srcb >= 0x20 && *srcb < 0x7f)
         *dest++ = *srcb++;
        else
         {
           *dest++ = '%';
-         *dest++ = enhex(*srcb >> 4);
+         *dest++ = enhex((byte)*srcb >> 4);
           *dest++ = enhex(*srcb++ & 0x0f);
         }
      }
@@ -181,10 +185,10 @@ url_enescape_friendly(const char *src, char *dest)
  char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
  static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
  
-uns
-identify_protocol(const char *p)
+uint
+url_identify_protocol(const char *p)
  {
-  uns i;
+  uint i;
  
    for(i=1; i<URL_PROTO_MAX; i++)
      if (!strcasecmp(p, url_proto_names[i]))
@@ -210,7 +214,7 @@ url_split(char *s, struct url *u, char *d)
           while (s < p)
             *d++ = *s++;
           *d++ = 0;
-         u->protoid = identify_protocol(u->protocol);
+         u->protoid = url_identify_protocol(u->protocol);
           s++;
           if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
             {
@@ -262,7 +266,7 @@ url_split(char *s, struct url *u, char *d)
           e = strchr(at, ':');
           if (e)                        /* host:port present */
             {
-             uns p;
+             uint p;
               *e++ = 0;
               p = strtoul(e, &ep, 10);
               if (ep && *ep || p > 65535)
@@ -281,7 +285,7 @@ url_split(char *s, struct url *u, char *d)
  
  /* Normalization according to given base URL */
  
-static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
+static uint std_ports[] = URL_DEFPORTS;        /* Default port numbers */
  
  static int
  relpath_merge(struct url *u, struct url *b)
@@ -317,15 +321,9 @@ relpath_merge(struct url *u, struct url *b)
         ;
        goto copy;
      }
-  if (a[0] == ';')                     /* Change parameters */
-    {
-      for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
-       ;
-      goto copy;
-    }
  
    p = NULL;                            /* Copy original path and find the last slash */
-  while (*o && *o != ';' && *o != '?' && *o != '#')
+  while (*o && *o != '?' && *o != '#')
      {
        if (d >= e)
         return URL_ERR_TOO_LONG;
@@ -516,7 +514,7 @@ url_pack(struct url *u, char *d)
      {
        d = append(d, u->protocol, e);
        d = append(d, ":", e);
-      u->protoid = identify_protocol(u->protocol);
+      u->protoid = url_identify_protocol(u->protocol);
      }
    if (u->host)
      {
@@ -564,7 +562,7 @@ static char *errmsg[] = {
  };
  
  char *
-url_error(uns err)
+url_error(uint err)
  {
    if (err >= sizeof(errmsg) / sizeof(char *))
      err = 0;
@@ -609,7 +607,7 @@ int main(int argc, char **argv)
    char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
    int err;
    struct url url, url0;
-  char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+  char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?";
  
    if (argc != 2 && argc != 3)
      return 1;
@@ -670,7 +668,7 @@ int main(int argc, char **argv)
  struct component {
         const char *start;
         int length;
-       uns count;
+       uint count;
         u32 hash;
  };
  
@@ -683,14 +681,14 @@ hashf(const char *start, int length)
         return hf;
  }
  
-static inline uns
-repeat_count(struct component *comp, uns count, uns len)
+static inline uint
+repeat_count(struct component *comp, uint count, uint len)
  {
         struct component *orig_comp = comp;
-       uns found = 0;
+       uint found = 0;
         while (1)
         {
-               uns i;
+               uint i;
                 comp += len;
                 count -= len;
                 found++;
@@ -708,9 +706,9 @@ int
  url_has_repeated_component(const char *url)
  {
         struct component *comp;
-       uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
+       uint comps, comp_len, rep_prefix, hash_size, *hash, *next;
         const char *c;
-       uns i, j, k;
+       uint i, j, k;
  
         for (comps=0, c=url; c; comps++)
         {