* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
*
- * The URL syntax corresponds to RFC 2396 with several exceptions:
- *
- * o Escaping of special characters still follows RFC 1738.
- * o Interpretation of path parameters follows RFC 1808.
- *
* XXX: The buffer handling in this module is really horrible, but it works.
*/
#include "ucw/url.h"
#include "ucw/chartype.h"
#include "ucw/conf.h"
+#include "ucw/prime.h"
#include <string.h>
#include <stdlib.h>
static uns url_max_repeat_length = 0;
static uns url_max_occurences = ~0U;
+#ifndef TEST
static struct cf_section url_config = {
CF_ITEMS {
CF_UNS("IgnoreSpaces", &url_ignore_spaces),
{
cf_declare_section("URL", &url_config, 0);
}
+#endif
/* Escaping and de-escaping */
}
int
-url_deescape(const byte *s, byte *d)
+url_deescape(const char *s, char *d)
{
- byte *dstart = d;
- byte *end = d + MAX_URL_SIZE - 10;
+ char *dstart = d;
+ char *end = d + MAX_URL_SIZE - 10;
while (*s)
{
if (d >= end)
val = NCC_AND; break;
case '#':
val = NCC_HASH; break;
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+ case '$':
+ val = NCC_DOLLAR; break;
+ case '+':
+ val = NCC_PLUS; break;
+ case ',':
+ val = NCC_COMMA; break;
+#endif
}
*d++ = val;
s += 3;
}
- else if (*s > 0x20)
+ else if ((byte) *s > 0x20)
*d++ = *s++;
else if (Cspace(*s))
{
- const byte *s0 = s;
+ const char *s0 = s;
while (Cspace(*s))
s++;
if (!url_ignore_spaces || !(!*s || d == dstart))
}
int
-url_enescape(const byte *s, byte *d)
+url_enescape(const char *s, char *d)
{
- byte *end = d + MAX_URL_SIZE - 10;
+ char *end = d + MAX_URL_SIZE - 10;
unsigned int c;
while (c = *s)
{
if (d >= end)
return URL_ERR_TOO_LONG;
- if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */
- c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */
- c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
- c == ',' ||
- c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */
- c == '=' || c == '&' || c == '#' || c == ';')
+ if (Calnum(c) || /* RFC 2396 (2.1-2.3): Only alphanumerics ... */
+ c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || /* ... and some exceptions and reserved chars */
+ c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||
+ c == ',' || c == '=' || c == '&' || c == '#' || c == ';' ||
+ c == '/' || c == '?' || c == ':' || c == '@'
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+ || c == '~'
+#endif
+ )
*d++ = *s++;
else
{
- uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s;
+ uns val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s);
*d++ = '%';
*d++ = enhex(val >> 4);
*d++ = enhex(val & 0x0f);
}
int
-url_enescape_friendly(const byte *src, byte *dest)
+url_enescape_friendly(const char *src, char *dest)
{
- byte *end = dest + MAX_URL_SIZE - 10;
- while (*src)
+ char *end = dest + MAX_URL_SIZE - 10;
+ const byte *srcb = src;
+ while (*srcb)
{
if (dest >= end)
return URL_ERR_TOO_LONG;
- if (*src < NCC_MAX)
- *dest++ = NCC_CHARS[*src++];
- else if (*src >= 0x20 && *src < 0x7f)
- *dest++ = *src++;
+ if ((byte)*srcb < NCC_MAX)
+ *dest++ = NCC_CHARS[*srcb++];
+ else if (*srcb >= 0x20 && *srcb < 0x7f)
+ *dest++ = *srcb++;
else
{
*dest++ = '%';
- *dest++ = enhex(*src >> 4);
- *dest++ = enhex(*src++ & 0x0f);
+ *dest++ = enhex((byte)*srcb >> 4);
+ *dest++ = enhex(*srcb++ & 0x0f);
}
}
*dest = 0;
/* Split an URL (several parts may be copied to the destination buffer) */
-byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
+char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
uns
-identify_protocol(const byte *p)
+url_identify_protocol(const char *p)
{
uns i;
}
int
-url_split(byte *s, struct url *u, byte *d)
+url_split(char *s, struct url *u, char *d)
{
bzero(u, sizeof(struct url));
u->port = ~0;
if (s[0] != '/') /* Seek for "protocol:" */
{
- byte *p = s;
+ char *p = s;
while (*p && Calnum(*p))
p++;
if (p != s && *p == ':')
while (s < p)
*d++ = *s++;
*d++ = 0;
- u->protoid = identify_protocol(u->protocol);
+ u->protoid = url_identify_protocol(u->protocol);
s++;
if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
{
{
if (s[1] == '/') /* Host spec */
{
- byte *q, *e;
- byte *at = NULL;
+ char *q, *e;
+ char *at = NULL;
char *ep;
s += 2;
static int
relpath_merge(struct url *u, struct url *b)
{
- byte *a = u->rest;
- byte *o = b->rest;
- byte *d = u->buf;
- byte *e = u->bufend;
- byte *p;
+ char *a = u->rest;
+ char *o = b->rest;
+ char *d = u->buf;
+ char *e = u->bufend;
+ char *p;
if (a[0] == '/') /* Absolute path => OK */
return 0;
;
goto copy;
}
- if (a[0] == ';') /* Change parameters */
- {
- for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
- ;
- goto copy;
- }
p = NULL; /* Copy original path and find the last slash */
- while (*o && *o != ';' && *o != '?' && *o != '#')
+ while (*o && *o != '?' && *o != '#')
{
if (d >= e)
return URL_ERR_TOO_LONG;
/* Name canonicalization */
static void
-lowercase(byte *b)
+lowercase(char *b)
{
if (b)
while (*b)
}
static void
-kill_end_dot(byte *b)
+kill_end_dot(char *b)
{
- byte *k;
+ char *k;
if (b)
{
/* Pack a broken-down URL */
-static byte *
-append(byte *d, const byte *s, byte *e)
+static char *
+append(char *d, const char *s, char *e)
{
if (d)
while (*s)
}
int
-url_pack(struct url *u, byte *d)
+url_pack(struct url *u, char *d)
{
- byte *e = d + MAX_URL_SIZE - 10;
+ char *e = d + MAX_URL_SIZE - 10;
if (u->protocol)
{
d = append(d, u->protocol, e);
d = append(d, ":", e);
- u->protoid = identify_protocol(u->protocol);
+ u->protoid = url_identify_protocol(u->protocol);
}
if (u->host)
{
/* Standard cookbook recipes */
int
-url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
+url_canon_split_rel(const char *u, char *buf1, char *buf2, struct url *url, struct url *base)
{
int err;
}
int
-url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
+url_auto_canonicalize_rel(const char *src, char *dst, struct url *base)
{
- byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
+ char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
int err;
struct url ur;
char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
int err;
struct url url, url0;
- char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+ char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?";
if (argc != 2 && argc != 3)
return 1;
#endif
struct component {
- const byte *start;
+ const char *start;
int length;
uns count;
u32 hash;
};
static inline u32
-hashf(const byte *start, int length)
+hashf(const char *start, int length)
{
u32 hf = length;
while (length-- > 0)
}
int
-url_has_repeated_component(const byte *url)
+url_has_repeated_component(const char *url)
{
struct component *comp;
uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
- const byte *c;
+ const char *c;
uns i, j, k;
for (comps=0, c=url; c; comps++)