/*
- * Sherlock Library -- URL Functions (according to RFC 1738 and 1808)
+ * UCW Library -- URL Functions
*
- * (c) 1997--2001 Martin Mares <mj@ucw.cz>
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
* (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ * The URL syntax corresponds to RFC 2396 with several exceptions:
+ *
+ * o Escaping of special characters still follows RFC 1738.
+ * o Interpretation of path parameters follows RFC 1808.
+ *
+ * XXX: The buffer handling in this module is really horrible, but it works.
*/
#include "lib/lib.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
+#include <alloca.h>
/* Configuration */
static uns url_max_repeat_length = 0;
static struct cfitem url_config[] = {
- { "URL", CT_SECTION, NULL },
- { "IgnoreSpaces", CT_INT, &url_ignore_spaces },
- { "IgnoreUnderflow", CT_INT, &url_ignore_underflow },
- { "ComponentSeparators", CT_STRING, &url_component_separators },
- { "MinRepeatCount", CT_INT, &url_min_repeat_count },
- { "MaxRepeatLength", CT_INT, &url_max_repeat_length },
- { NULL, CT_STOP, NULL }
+ { "URL", CT_SECTION, NULL },
+ { "IgnoreSpaces", CT_INT, &url_ignore_spaces },
+ { "IgnoreUnderflow", CT_INT, &url_ignore_underflow },
+ { "ComponentSeparators", CT_STRING, &url_component_separators },
+ { "MinRepeatCount", CT_INT, &url_min_repeat_count },
+ { "MaxRepeatLength", CT_INT, &url_max_repeat_length },
+ { NULL, CT_STOP, NULL }
};
static void CONSTRUCTOR url_init_config(void)
s += 2;
q = d;
- while (*s && *s != '/') /* Copy user:passwd@host:port */
+ while (*s && *s != '/' && *s != '?') /* Copy user:passwd@host:port */
*d++ = *s++;
*d++ = 0;
w = strchr(q, '@');
{
*w++ = 0;
u->user = q;
+ if (e = strchr(q, ':'))
+ {
+ *e++ = 0;
+ u->pass = e;
+ }
}
else
w = q;
if (a[0] == '/') /* Absolute path => OK */
return 0;
- if (o[0] != '/')
+ if (o[0] != '/' && o[0] != '?')
return URL_PATH_UNDERFLOW;
if (!a[0]) /* Empty URL -> inherit everything */
int err;
/* Basic checks */
- if (url_proto_path_flags[u->protoid] && !u->host ||
- u->host && !*u->host ||
+ if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) ||
!u->host && u->user ||
+ !u->user && u->pass ||
!u->rest)
return URL_SYNTAX_ERROR;
{
u->host = b->host;
u->user = b->user;
+ u->pass = b->pass;
u->port = b->port;
if (err = relpath_merge(u, b))
return err;
}
}
+ /* Change path "?" to "/?" because it's the true meaning */
+ if (u->rest[0] == '?')
+ {
+ int l = strlen(u->rest);
+ if (u->bufend - u->buf < l+1)
+ return URL_ERR_TOO_LONG;
+ u->buf[0] = '/';
+ memcpy(u->buf+1, u->rest, l+1);
+ u->rest = u->buf;
+ u->buf += l+2;
+ }
+
/* Fill in missing info */
if (u->port == ~0U)
u->port = std_ports[u->protoid];
if (b)
{
k = b + strlen(b) - 1;
- if (k > b && *k == '.')
- *k = 0;
+ while (k > b && *k == '.')
+ *k-- = 0;
}
}
if (u->user)
{
d = append(d, u->user, e);
+ if (u->pass)
+ {
+ d = append(d, ":", e);
+ d = append(d, u->pass, e);
+ }
d = append(d, "@", e);
}
d = append(d, u->host, e);
/* Standard cookbook recipes */
int
-url_canon_split(byte *u, byte *buf1, byte *buf2, struct url *url)
+url_canon_split_rel(byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
{
int err;
return err;
if (err = url_split(buf1, url, buf2))
return err;
- if (err = url_normalize(url, NULL))
+ if (err = url_normalize(url, base))
return err;
return url_canonicalize(url);
}
int
-url_auto_canonicalize(byte *src, byte *dst)
+url_auto_canonicalize_rel(byte *src, byte *dst, struct url *base)
{
byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
int err;
struct url ur;
- (void)((err = url_canon_split(src, buf1, buf2, &ur)) ||
+ (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) ||
(err = url_pack(&ur, buf3)) ||
(err = url_enescape(buf3, dst)));
return err;
char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
int err;
struct url url, url0;
+ char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
- if (argc != 2)
+ if (argc != 2 && argc != 3)
return 1;
+ if (argc == 3)
+ base = argv[2];
if (err = url_deescape(argv[1], buf1))
{
printf("deesc: error %d\n", err);
printf("split: error %d\n", err);
return 1;
}
- printf("split: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest);
- if (err = url_split("http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment", &url0, buf3))
+ printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_split(base, &url0, buf3))
{
printf("split base: error %d\n", err);
return 1;
printf("normalize base: error %d\n", err);
return 1;
}
- printf("base: @%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.host, url0.port, url0.rest);
+ printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest);
if (err = url_normalize(&url, &url0))
{
printf("normalize: error %d\n", err);
return 1;
}
- printf("normalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest);
+ printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
if (err = url_canonicalize(&url))
{
printf("canonicalize: error %d\n", err);
return 1;
}
- printf("canonicalize: @%s@%s@%s@%d@%s\n", url.protocol, url.user, url.host, url.port, url.rest);
+ printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
if (err = url_pack(&url, buf4))
{
printf("pack: error %d\n", err);