* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
*
- * The URL syntax corresponds to RFC 2396 with several exceptions:
- *
- * o Escaping of special characters still follows RFC 1738.
- * o Interpretation of path parameters follows RFC 1808.
- *
* XXX: The buffer handling in this module is really horrible, but it works.
*/
-#include "ucw/lib.h"
-#include "ucw/url.h"
-#include "ucw/chartype.h"
-#include "ucw/conf.h"
+#include <ucw/lib.h>
+#include <ucw/url.h>
+#include <ucw/chartype.h>
+#include <ucw/conf.h>
+#include <ucw/prime.h>
#include <string.h>
#include <stdlib.h>
/* Configuration */
-static uns url_ignore_spaces;
-static uns url_ignore_underflow;
+static uint url_ignore_spaces;
+static uint url_ignore_underflow;
static char *url_component_separators = "";
-static uns url_min_repeat_count = 0x7fffffff;
-static uns url_max_repeat_length = 0;
-static uns url_max_occurences = ~0U;
+static uint url_min_repeat_count = 0x7fffffff;
+static uint url_max_repeat_length = 0;
+static uint url_max_occurences = ~0U;
+#ifndef TEST
static struct cf_section url_config = {
CF_ITEMS {
- CF_UNS("IgnoreSpaces", &url_ignore_spaces),
- CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
+ CF_UINT("IgnoreSpaces", &url_ignore_spaces),
+ CF_UINT("IgnoreUnderflow", &url_ignore_underflow),
CF_STRING("ComponentSeparators", &url_component_separators),
- CF_UNS("MinRepeatCount", &url_min_repeat_count),
- CF_UNS("MaxRepeatLength", &url_max_repeat_length),
- CF_UNS("MaxOccurences", &url_max_occurences),
+ CF_UINT("MinRepeatCount", &url_min_repeat_count),
+ CF_UINT("MaxRepeatLength", &url_max_repeat_length),
+ CF_UINT("MaxOccurences", &url_max_occurences),
CF_END
}
};
{
cf_declare_section("URL", &url_config, 0);
}
+#endif
/* Escaping and de-escaping */
-static uns
-enhex(uns x)
+static uint
+enhex(uint x)
{
return (x<10) ? (x + '0') : (x - 10 + 'A');
}
return URL_ERR_TOO_LONG;
if (*s == '%')
{
- unsigned int val;
+ uint val;
if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
return URL_ERR_INVALID_ESCAPE;
val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
val = NCC_AND; break;
case '#':
val = NCC_HASH; break;
+ case '$':
+ val = NCC_DOLLAR; break;
+ case '+':
+ val = NCC_PLUS; break;
+ case ',':
+ val = NCC_COMMA; break;
}
*d++ = val;
s += 3;
url_enescape(const char *s, char *d)
{
char *end = d + MAX_URL_SIZE - 10;
- unsigned int c;
+ uint c;
while (c = *s)
{
if (d >= end)
return URL_ERR_TOO_LONG;
- if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */
- c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */
- c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
- c == ',' ||
- c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */
- c == '=' || c == '&' || c == '#' || c == ';')
+ if (Calnum(c) || /* RFC 2396 (2.1-2.3): Only alphanumerics ... */
+ c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || /* ... and some exceptions and reserved chars */
+ c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||
+ c == ',' || c == '=' || c == '&' || c == '#' || c == ';' ||
+ c == '/' || c == '?' || c == ':' || c == '@' || c == '~'
+ )
*d++ = *s++;
else
{
- uns val = ((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s;
+ uint val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s);
*d++ = '%';
*d++ = enhex(val >> 4);
*d++ = enhex(val & 0x0f);
{
if (dest >= end)
return URL_ERR_TOO_LONG;
- if (*srcb < NCC_MAX)
+ if ((byte)*srcb < NCC_MAX)
*dest++ = NCC_CHARS[*srcb++];
else if (*srcb >= 0x20 && *srcb < 0x7f)
*dest++ = *srcb++;
else
{
*dest++ = '%';
- *dest++ = enhex(*srcb >> 4);
+ *dest++ = enhex((byte)*srcb >> 4);
*dest++ = enhex(*srcb++ & 0x0f);
}
}
char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
-uns
-identify_protocol(const char *p)
+uint
+url_identify_protocol(const char *p)
{
- uns i;
+ uint i;
for(i=1; i<URL_PROTO_MAX; i++)
if (!strcasecmp(p, url_proto_names[i]))
while (s < p)
*d++ = *s++;
*d++ = 0;
- u->protoid = identify_protocol(u->protocol);
+ u->protoid = url_identify_protocol(u->protocol);
s++;
if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
{
e = strchr(at, ':');
if (e) /* host:port present */
{
- uns p;
+ uint p;
*e++ = 0;
p = strtoul(e, &ep, 10);
if (ep && *ep || p > 65535)
/* Normalization according to given base URL */
-static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
+static uint std_ports[] = URL_DEFPORTS; /* Default port numbers */
static int
relpath_merge(struct url *u, struct url *b)
;
goto copy;
}
- if (a[0] == ';') /* Change parameters */
- {
- for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
- ;
- goto copy;
- }
p = NULL; /* Copy original path and find the last slash */
- while (*o && *o != ';' && *o != '?' && *o != '#')
+ while (*o && *o != '?' && *o != '#')
{
if (d >= e)
return URL_ERR_TOO_LONG;
{
d = append(d, u->protocol, e);
d = append(d, ":", e);
- u->protoid = identify_protocol(u->protocol);
+ u->protoid = url_identify_protocol(u->protocol);
}
if (u->host)
{
};
char *
-url_error(uns err)
+url_error(uint err)
{
if (err >= sizeof(errmsg) / sizeof(char *))
err = 0;
char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
int err;
struct url url, url0;
- char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+ char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?";
if (argc != 2 && argc != 3)
return 1;
struct component {
const char *start;
int length;
- uns count;
+ uint count;
u32 hash;
};
return hf;
}
-static inline uns
-repeat_count(struct component *comp, uns count, uns len)
+static inline uint
+repeat_count(struct component *comp, uint count, uint len)
{
struct component *orig_comp = comp;
- uns found = 0;
+ uint found = 0;
while (1)
{
- uns i;
+ uint i;
comp += len;
count -= len;
found++;
url_has_repeated_component(const char *url)
{
struct component *comp;
- uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
+ uint comps, comp_len, rep_prefix, hash_size, *hash, *next;
const char *c;
- uns i, j, k;
+ uint i, j, k;
for (comps=0, c=url; c; comps++)
{