From cdfd7b57bf6a48708ea5e490a0b5f61d387fb41a Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Thu, 12 Feb 2015 23:12:03 +0100 Subject: [PATCH] XML: Implementation of XML namespaces The basic logic of namespaces and qualified names works, but unfortunately the attribute data structures are broken. --- ucw-xml/Makefile | 12 +-- ucw-xml/common.c | 2 + ucw-xml/internals.h | 7 ++ ucw-xml/ns.c | 240 ++++++++++++++++++++++++++++++++++++++++++++ ucw-xml/parse.c | 70 ++++++++++--- ucw-xml/xml-test.c | 61 ++++++++--- ucw-xml/xml.h | 45 ++++++++- 7 files changed, 396 insertions(+), 41 deletions(-) create mode 100644 ucw-xml/ns.c diff --git a/ucw-xml/Makefile b/ucw-xml/Makefile index 28083ccd..5b391889 100644 --- a/ucw-xml/Makefile +++ b/ucw-xml/Makefile @@ -4,7 +4,7 @@ DIRS+=ucw-xml PROGS+=$(o)/ucw-xml/xml-test -LIBXML_MODS=common source parse dtd +LIBXML_MODS=common source parse dtd ns LIBXML_MOD_PATHS=$(addprefix $(o)/ucw-xml/,$(LIBXML_MODS)) LIBXML_INCLUDES=xml.h dtd.h LIBXML_DEPS=$(LIBUCW) $(LIBCHARSET) @@ -18,14 +18,8 @@ ifdef CONFIG_INSTALL_API $(o)/ucw-xml/libucw-xml.pc: $(addprefix $(o)/ucw-xml/libucw-xml$(LV),.a .so) endif -$(o)/ucw-xml/common.o: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/common.oo: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/source.o: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/source.oo: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/dtd.o: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/dtd.oo: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/parse.o: $(o)/ucw-xml/unicat.h -$(o)/ucw-xml/parse.oo: $(o)/ucw-xml/unicat.h +$(addsuffix .o,$(LIBXML_MOD_PATHS)): $(o)/ucw-xml/unicat.h +$(addsuffix .oo,$(LIBXML_MOD_PATHS)): $(o)/ucw-xml/unicat.h $(o)/ucw-xml/unicat.h: $(s)/ucw-xml/unicat.pl $(M)GEN $(addprefix $(o)/ucw-xml/unicat,.h .c) $(Q)$< $(addprefix $(o)/ucw-xml/unicat,.h .c) diff --git a/ucw-xml/common.c b/ucw-xml/common.c index 8b37d597..d6614496 100644 --- a/ucw-xml/common.c +++ b/ucw-xml/common.c @@ -119,6 +119,7 @@ xml_cleanup(struct xml_context *ctx) xml_attrs_table_cleanup(ctx); xml_dtd_cleanup(ctx); xml_sources_cleanup(ctx); + xml_ns_cleanup(ctx); mp_delete(ctx->pool); mp_delete(ctx->stack); } @@ -136,5 +137,6 @@ xml_reset(struct xml_context *ctx) *ctx = xml_defaults; ctx->pool = pool; ctx->stack = stack; + xml_ns_reset(ctx); xml_do_init(ctx); } diff --git a/ucw-xml/internals.h b/ucw-xml/internals.h index a67cd8eb..54101d69 100644 --- a/ucw-xml/internals.h +++ b/ucw-xml/internals.h @@ -323,4 +323,11 @@ void xml_attrs_table_cleanup(struct xml_context *ctx); void xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value); +/*** Namespaces ***/ + +void xml_ns_cleanup(struct xml_context *ctx); +void xml_ns_reset(struct xml_context *ctx); +void xml_ns_push_element(struct xml_context *ctx); +void xml_ns_pop_element(struct xml_context *ctx); + #endif diff --git a/ucw-xml/ns.c b/ucw-xml/ns.c new file mode 100644 index 00000000..87745908 --- /dev/null +++ b/ucw-xml/ns.c @@ -0,0 +1,240 @@ +/* + * UCW Library -- A simple XML parser -- Namespaces + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include + +/* + * This is an implementation of XML namespaces according to + * http://www.w3.org/TR/REC-xml-names/. + * + * Currently, we assume that the document does not contain a plethora + * of namespaces and prefixes. So we keep them in memory until the + * document ends. + */ + +struct ns_hash_entry { + uint ns; + char name[1]; +}; + +#define HASH_NODE struct ns_hash_entry +#define HASH_PREFIX(x) ns_hash_##x +#define HASH_KEY_ENDSTRING name +#define HASH_WANT_CLEANUP +#define HASH_WANT_FIND +#define HASH_WANT_LOOKUP +#define HASH_TABLE_DYNAMIC +#define HASH_LOOKUP_DETECT_NEW +#define HASH_GIVE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +struct xml_ns_prefix { + struct xml_ns_prefix *prev; + struct xml_node *e; /* Which element defined this prefix */ + struct ns_hash_entry *he; /* NULL if changing default NS */ + uint prev_ns; /* Previous NS ID assigned to this prefix */ +}; + +static bool +ns_enabled(struct xml_context *ctx) +{ + return (ctx->flags & XML_NAMESPACES); +} + +void +xml_ns_enable(struct xml_context *ctx) +{ + if (ns_enabled(ctx)) + return; + + TRACE(ctx, "NS: Enabling"); + ctx->flags |= XML_NAMESPACES; + if (!ctx->ns_pool) + { + TRACE(ctx, "NS: Allocating data structures"); + ctx->ns_pool = mp_new(4096); + GARY_INIT(ctx->ns_by_id, 16); + } + + ctx->ns_by_name = xml_hash_new(ctx->ns_pool, sizeof(struct ns_hash_table)); + ns_hash_init(ctx->ns_by_name); + + ctx->ns_by_prefix = xml_hash_new(ctx->ns_pool, sizeof(struct ns_hash_table)); + ns_hash_init(ctx->ns_by_prefix); + + /* Intern well-known namespaces */ + GARY_RESIZE(ctx->ns_by_id, 0); + uint none_ns = xml_ns_by_name(ctx, ""); + uint xmlns_ns = xml_ns_by_name(ctx, "http://www.w3.org/2000/xmlns/"); + uint xml_ns = xml_ns_by_name(ctx, "http://www.w3.org/XML/1998/namespace"); + ASSERT(none_ns == XML_NS_NONE && xmlns_ns == XML_NS_XMLNS && xml_ns == XML_NS_XML); + + /* Intern standard prefixes */ + int new_xmlns, new_xml; + ns_hash_lookup(ctx->ns_by_prefix, "xmlns", &new_xmlns)->ns = xmlns_ns; + ns_hash_lookup(ctx->ns_by_prefix, "xml", &new_xml)->ns = xml_ns; + ASSERT(new_xmlns && new_xml); +} + +void +xml_ns_cleanup(struct xml_context *ctx) +{ + if (!ctx->ns_pool) + return; + + TRACE(ctx, "NS: Cleanup"); + ns_hash_cleanup(ctx->ns_by_prefix); + ns_hash_cleanup(ctx->ns_by_name); + GARY_FREE(ctx->ns_by_id); + mp_delete(ctx->ns_pool); +} + +void +xml_ns_reset(struct xml_context *ctx) +{ + if (!ns_enabled(ctx)) + return; + + TRACE(ctx, "NS: Reset"); + GARY_RESIZE(ctx->ns_by_id, 1); + ctx->ns_by_id[0] = ""; + mp_flush(ctx->ns_pool); +} + +const char * +xml_ns_by_id(struct xml_context *ctx, uint ns) +{ + ASSERT(ns < GARY_SIZE(ctx->ns_by_id)); + return ctx->ns_by_id[ns]; +} + +uint +xml_ns_by_name(struct xml_context *ctx, const char *name) +{ + int new_p; + struct ns_hash_entry *he = ns_hash_lookup(ctx->ns_by_name, (char *) name, &new_p); + if (new_p) + { + he->ns = GARY_SIZE(ctx->ns_by_id); + ASSERT(he->ns < ~0U); + *GARY_PUSH(ctx->ns_by_id) = he->name; + TRACE(ctx, "NS: New namespace <%s> with ID %u", he->name, he->ns); + } + return he->ns; +} + +static struct xml_ns_prefix * +ns_push_prefix(struct xml_context *ctx) +{ + struct xml_ns_prefix *px = mp_alloc(ctx->stack, sizeof(*px)); + px->prev = ctx->ns_prefix_stack; + ctx->ns_prefix_stack = px; + px->e = ctx->node; + return px; +} + +static uint +ns_resolve(struct xml_context *ctx, char **namep, uint default_ns) +{ + char *name = *namep; + char *colon = strchr(name, ':'); + if (colon) + { + *colon = 0; + struct ns_hash_entry *he = ns_hash_find(ctx->ns_by_prefix, name); + *colon = ':'; + if (he && he->ns) + { + *namep = colon + 1; + return he->ns; + } + else + { + xml_error(ctx, "Unknown namespace prefix for %s", name); + return 0; + } + } + else + return default_ns; +} + +void xml_ns_push_element(struct xml_context *ctx) +{ + struct xml_node *e = ctx->node; + if (!ns_enabled(ctx)) + { + e->ns = 0; + return; + } + + /* Scan attributes for prefix definitions */ + XML_ATTR_FOR_EACH(a, e) + if (!memcmp(a->name, "xmlns", 5)) + { + struct xml_ns_prefix *px = ns_push_prefix(ctx); + uint ns = xml_ns_by_name(ctx, a->val); + if (a->name[5] == ':') + { + if (a->name[6]) + { + /* New NS prefix */ + int new_p; + struct ns_hash_entry *he = ns_hash_lookup(ctx->ns_by_prefix, a->name + 6, &new_p); + if (new_p) + he->ns = 0; + px->he = he; + px->prev_ns = he->ns; + he->ns = ns; + TRACE(ctx, "NS: New prefix <%s> -> ID %u", he->name, he->ns); + } + } + else + { + /* New default NS */ + px->he = NULL; + px->prev_ns = ctx->ns_default; + ctx->ns_default = ns; + TRACE(ctx, "New default NS -> ID %u", ns); + } + } + + /* Resolve namespaces */ + e->ns = ns_resolve(ctx, &e->name, ctx->ns_default); + XML_ATTR_FOR_EACH(a, e) + a->ns = ns_resolve(ctx, &a->name, 0); +} + +void xml_ns_pop_element(struct xml_context *ctx) +{ + if (!ns_enabled(ctx)) + return; + + struct xml_ns_prefix *px; + while ((px = ctx->ns_prefix_stack) && px->e == ctx->node) + { + struct ns_hash_entry *he = px->he; + if (he) + { + TRACE(ctx, "NS: Restoring prefix <%s> -> ID %u", he->name, px->prev_ns); + he->ns = px->prev_ns; + } + else + { + TRACE(ctx, "NS: Restoring default NS -> ID %u", px->prev_ns); + ctx->ns_default = px->prev_ns; + } + ctx->ns_prefix_stack = px->prev; + } +} diff --git a/ucw-xml/parse.c b/ucw-xml/parse.c index 3402b732..48ad7021 100644 --- a/ucw-xml/parse.c +++ b/ucw-xml/parse.c @@ -2,6 +2,7 @@ * UCW Library -- A simple XML parser * * (c) 2007--2008 Pavel Charvat + * (c) 2015 Martin Mares * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@ -58,7 +59,8 @@ xml_parse_eq(struct xml_context *ctx) static char * xml_parse_string(struct xml_context *ctx, struct mempool *pool, uint first_cat, uint next_cat, char *err) { - char *p = mp_start_noalign(pool, 1); + char *p = mp_start_noalign(pool, 2); + *p++ = '<'; /* We always prepend a '<', so we can seek backwards in the string */ if (unlikely(!(xml_peek_cat(ctx) & first_cat))) xml_fatal(ctx, "%s", err); do @@ -68,7 +70,7 @@ xml_parse_string(struct xml_context *ctx, struct mempool *pool, uint first_cat, } while (xml_peek_cat(ctx) & next_cat); *p++ = 0; - return mp_end(pool, p); + return mp_end(pool, p) + 1; } static void @@ -587,21 +589,22 @@ xml_normalize_white(struct xml_context *ctx UNUSED, char *text) struct xml_attrs_table; static inline uint -xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n) +xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, uint ns, char *n) { - return hash_pointer(e) ^ hash_string(n); + return hash_pointer(e) ^ hash_string(n) ^ hash_u32(ns); } static inline int -xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2) +xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, uint ns1, char *n1, struct xml_node *e2, uint ns2, char *n2) { - return (e1 == e2) && !strcmp(n1, n2); + return (e1 == e2) && (ns1 == ns2) && !strcmp(n1, n2); } static inline void -xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name) +xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, uint ns, char *name) { a->elem = e; + a->ns = ns; a->name = name; a->val = NULL; a->user = NULL; @@ -610,8 +613,8 @@ xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct #define HASH_PREFIX(x) xml_attrs_##x #define HASH_NODE struct xml_attr -#define HASH_KEY_COMPLEX(x) x elem, x name -#define HASH_KEY_DECL struct xml_node *elem, char *name +#define HASH_KEY_COMPLEX(x) x elem, x ns, x name +#define HASH_KEY_DECL struct xml_node *elem, uint ns, char *name #define HASH_TABLE_DYNAMIC #define HASH_GIVE_EQ #define HASH_GIVE_HASHFN @@ -631,7 +634,8 @@ xml_parse_attr(struct xml_context *ctx) /* Attribute ::= Name Eq AttValue */ struct xml_node *e = ctx->node; char *n = xml_parse_name(ctx, ctx->pool); - struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n); + // FIXME: This is wrong! This way, we never find attributes in a non-default NS. + struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, 0, n); xml_parse_eq(ctx); char *v = xml_parse_attr_value(ctx, NULL); if (a->val) @@ -651,13 +655,19 @@ xml_parse_attr(struct xml_context *ctx) struct xml_attr * xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name) { - return xml_attrs_find(ctx->tab_attrs, node, name); + return xml_attrs_find(ctx->tab_attrs, node, 0, name); +} + +struct xml_attr * +xml_attr_find_ns(struct xml_context *ctx, struct xml_node *node, uint ns, char *name) +{ + return xml_attrs_find(ctx->tab_attrs, node, ns, name); } char * xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name) { - struct xml_attr *attr = xml_attrs_find(ctx->tab_attrs, node, name); + struct xml_attr *attr = xml_attrs_find(ctx->tab_attrs, node, 0, name); if (attr) return attr->val; if (!node->dtd) @@ -678,6 +688,15 @@ xml_attrs_table_cleanup(struct xml_context *ctx) xml_attrs_cleanup(ctx->tab_attrs); } +char * +xml_attr_qname(struct xml_context *ctx UNUSED, struct xml_attr *attr) +{ + char *n = attr->name; + while (n[-1] != '<') + n--; + return n; +} + /*** Elements ***/ static uint @@ -705,12 +724,14 @@ xml_push_element(struct xml_context *ctx) e->type = XML_NODE_ELEM; e->name = xml_parse_name(ctx, ctx->pool); slist_init(&e->attrs); + if (!e->parent) { ctx->dom = e; if (ctx->doctype && strcmp(e->name, ctx->doctype)) xml_error(ctx, "The root element <%s> does not match the document type <%s>", e->name, ctx->doctype); } + if (!ctx->dtd) e->dtd = NULL; else if (!(e->dtd = xml_dtd_find_elem(ctx, e->name))) @@ -732,6 +753,7 @@ xml_push_element(struct xml_context *ctx) xml_error(ctx, "Unexpected element <%s>", e->name); } } + while (1) { uint white = xml_parse_white(ctx, 0); @@ -749,16 +771,20 @@ xml_push_element(struct xml_context *ctx) xml_unget_char(ctx); xml_parse_attr(ctx); } + + xml_ns_push_element(ctx); + + /* FIXME: DTD logic is not namespace-aware */ if (e->dtd) SLIST_FOR_EACH(struct xml_dtd_attr *, a, e->dtd->attrs) if (a->default_mode == XML_ATTR_REQUIRED) { - if (!xml_attrs_find(ctx->tab_attrs, e, a->name)) + if (!xml_attrs_find(ctx->tab_attrs, e, 0, a->name)) xml_error(ctx, "Missing required attribute %s in element <%s>", a->name, e->name); } else if (a->default_mode != XML_ATTR_IMPLIED && ctx->flags & XML_ALLOC_DEFAULT_ATTRS) { - struct xml_attr *attr = xml_attrs_lookup(ctx->tab_attrs, e, a->name); + struct xml_attr *attr = xml_attrs_lookup(ctx->tab_attrs, e, 0, a->name); if (!attr->val) attr->val = a->default_value; } @@ -772,6 +798,9 @@ xml_pop_element(struct xml_context *ctx) TRACE(ctx, "pop_element"); if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag) ctx->h_etag(ctx); + + xml_ns_pop_element(ctx); + struct xml_node *e = ctx->node; uint free = !(ctx->flags & XML_ALLOC_TAGS); if (free) @@ -793,6 +822,7 @@ xml_pop_element(struct xml_context *ctx) clist_remove(&n->n); } } + xml_pop_dom(ctx, free); xml_dec(ctx); } @@ -804,7 +834,7 @@ xml_parse_etag(struct xml_context *ctx) * Already parsed: '<' */ struct xml_node *e = ctx->node; ASSERT(e); - char *n = e->name; + char *n = xml_node_qname(ctx, e); while (*n) { uint c; @@ -822,6 +852,16 @@ recover: xml_dec(ctx); } +char * +xml_node_qname(struct xml_context *ctx UNUSED, struct xml_node *node) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *n = node->name; + while (n[-1] != '<') + n--; + return n; +} + /*** Document type declaration ***/ static void diff --git a/ucw-xml/xml-test.c b/ucw-xml/xml-test.c index 758b0373..7b7fcd1c 100644 --- a/ucw-xml/xml-test.c +++ b/ucw-xml/xml-test.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -25,21 +26,24 @@ enum { WANT_REPORT_BLOCKS, WANT_REPORT_IGNORABLE, WANT_FILE_ENTITIES, + WANT_QNAMES, }; -static char *shortopts = "spdt" CF_SHORT_OPTS; +static char *shortopts = "spdtn" CF_SHORT_OPTS; static struct option longopts[] = { CF_LONG_OPTS { "sax", 0, 0, 's' }, { "pull", 0, 0, 'p' }, { "dom", 0, 0, 't' }, { "dtd", 0, 0, 'd' }, + { "namespaces", 0, 0, 'n' }, { "hide-errors", 0, 0, WANT_HIDE_ERRORS }, { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS }, { "ignore-pis", 0, 0, WANT_IGNORE_PIS }, { "report-blocks", 0, 0, WANT_REPORT_BLOCKS }, { "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE }, { "file-entities", 0, 0, WANT_FILE_ENTITIES }, + { "qnames", 0, 0, WANT_QNAMES }, { NULL, 0, 0, 0 } }; @@ -56,12 +60,14 @@ CF_USAGE -s, --sax Test SAX interface\n\ -t, --dom Test DOM interface\n\ -d, --dtd Enable parsing of DTD\n\ +-n, --namespaces Resolve namespaces\n\ --hide-errors Hide warnings and error messages\n\ --ignore-comments Ignore comments\n\ --ignore-pis Ignore processing instructions\n\ --report-blocks Report blocks or characters and CDATA sections\n\ --report-ignorable Report ignorable whitespace\n\ --file-entities Resolve file external entities (not fully normative)\n\ + --qnames Display qualified names including namespace prefixes\n\ \n", stderr); exit(1); } @@ -69,6 +75,7 @@ CF_USAGE static uint want_sax; static uint want_pull; static uint want_dom; +static uint want_ns; static uint want_parse_dtd; static uint want_hide_errors; static uint want_ignore_comments; @@ -76,6 +83,7 @@ static uint want_ignore_pis; static uint want_report_blocks; static uint want_report_ignorable; static uint want_file_entities; +static uint want_qnames; static struct fastbuf *out; @@ -93,14 +101,20 @@ node_type(struct xml_node *node) } static void -show_node(struct xml_node *node) +show_node(struct xml_context *ctx, struct xml_node *node) { switch (node->type) { case XML_NODE_ELEM: - bprintf(out, " <%s>", node->name); + if (want_ns) + bprintf(out, " (ns%u)<%s>", node->ns, (want_qnames ? xml_node_qname(ctx, node) : node->name)); + else + bprintf(out, " <%s>", node->name); XML_ATTR_FOR_EACH(a, node) - bprintf(out, " %s='%s'", a->name, a->val); + if (want_ns) + bprintf(out, " (ns%u)%s='%s'", a->ns, (want_qnames ? xml_attr_qname(ctx, a) : a->name), a->val); + else + bprintf(out, " %s='%s'", a->name, a->val); bputc(out, '\n'); break; case XML_NODE_COMMENT: @@ -118,7 +132,7 @@ show_node(struct xml_node *node) } static void -show_tree(struct xml_node *node, uint level) +show_tree(struct xml_context *ctx, struct xml_node *node, uint level) { if (!node) return; @@ -126,10 +140,10 @@ show_tree(struct xml_node *node, uint level) for (uint i = 0; i < level; i++) bputs(out, " "); bputs(out, node_type(node)); - show_node(node); + show_node(ctx, node); if (node->type == XML_NODE_ELEM) XML_NODE_FOR_EACH(son, node) - show_tree(son, level + 1); + show_tree(ctx, son, level + 1); } static void @@ -168,21 +182,21 @@ static void h_comment(struct xml_context *ctx) { bputs(out, "SAX: comment"); - show_node(ctx->node); + show_node(ctx, ctx->node); } static void h_pi(struct xml_context *ctx) { bputs(out, "SAX: pi"); - show_node(ctx->node); + show_node(ctx, ctx->node); } static void h_stag(struct xml_context *ctx) { bputs(out, "SAX: stag"); - show_node(ctx->node); + show_node(ctx, ctx->node); } static void @@ -195,7 +209,7 @@ static void h_chars(struct xml_context *ctx) { bputs(out, "SAX: chars"); - show_node(ctx->node); + show_node(ctx, ctx->node); } static void @@ -255,6 +269,9 @@ main(int argc, char **argv) case 'd': want_parse_dtd++; break; + case 'n': + want_ns++; + break; case WANT_HIDE_ERRORS: want_hide_errors++; break; @@ -273,6 +290,9 @@ main(int argc, char **argv) case WANT_FILE_ENTITIES: want_file_entities++; break; + case WANT_QNAMES: + want_qnames++; + break; default: usage(); } @@ -315,6 +335,8 @@ main(int argc, char **argv) ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS); if (want_file_entities) ctx.h_resolve_entity = h_resolve_entity; + if (want_ns) + xml_ns_enable(&ctx); xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096)); bputs(out, "PULL: start\n"); if (want_pull) @@ -326,22 +348,22 @@ main(int argc, char **argv) { case XML_STATE_CHARS: bputs(out, "PULL: chars"); - show_node(ctx.node); + show_node(&ctx, ctx.node); break; case XML_STATE_STAG: bputs(out, "PULL: stag"); - show_node(ctx.node); + show_node(&ctx, ctx.node); break; case XML_STATE_ETAG: bprintf(out, "PULL: etag \n", ctx.node->name); break; case XML_STATE_COMMENT: bputs(out, "PULL: comment"); - show_node(ctx.node); + show_node(&ctx, ctx.node); break; case XML_STATE_PI: bputs(out, "PULL: pi"); - show_node(ctx.node); + show_node(&ctx, ctx.node); break; default: bputs(out, "PULL: unknown\n"); @@ -356,7 +378,14 @@ main(int argc, char **argv) { bputs(out, "PULL: eof\n"); if (want_dom) - show_tree(ctx.dom, 0); + show_tree(&ctx, ctx.dom, 0); + } + + if (want_ns) + { + bputs(out, "Known namespaces:\n"); + for (uns i=0; i < GARY_SIZE(ctx.ns_by_id); i++) + bprintf(out, "%u\t%s\n", i, ctx.ns_by_id[i]); } xml_cleanup(&ctx); diff --git a/ucw-xml/xml.h b/ucw-xml/xml.h index c048f56c..0c335c65 100644 --- a/ucw-xml/xml.h +++ b/ucw-xml/xml.h @@ -2,6 +2,7 @@ * UCW Library -- A simple XML parser * * (c) 2007--2008 Pavel Charvat + * (c) 2015 Martin Mares * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@ -104,6 +105,7 @@ enum xml_flags { XML_PARSE_DTD = 0x00000200, /* Enable parsing of DTD */ XML_NO_CHARS = 0x00000400, /* The current element must not contain character data (filled automaticaly if using DTD) */ XML_ALLOC_DEFAULT_ATTRS = 0x00000800, /* Allocate default attribute values so they can be found by XML_ATTR_FOR_EACH */ + XML_NAMESPACES = 0x00001000, /* Parse namespaces, use xml_ns_enable() to set this */ /* Internals, do not change! */ XML_EMPTY_ELEM_TAG = 0x00010000, /* The current element match EmptyElemTag */ @@ -131,6 +133,14 @@ struct xml_node { cnode n; /* Node for list of parent's sons */ uint type; /* XML_NODE_x */ struct xml_node *parent; /* Parent node */ + /* + * If namespaces are enabled, node->name points to the local part of the name + * and node->ns is the resolved namespace ID. + * + * However, the namespace prefix is kept in memory just before the local part, + * so you can use xml_node_qname() to find out the full qualified name. + * The same applies to attributes, but the function is xml_attr_qname(). + */ char *name; /* Element name / PI target */ clist sons; /* Children nodes */ union { @@ -139,6 +149,7 @@ struct xml_node { uint len; /* Text length in bytes */ }; struct { + uint ns; /* Namespace ID */ struct xml_dtd_elem *dtd; /* Element DTD */ slist attrs; /* Link list of element attributes */ }; @@ -150,7 +161,8 @@ struct xml_attr { snode n; /* Node for elem->attrs */ struct xml_node *elem; /* Parent element */ struct xml_dtd_attr *dtd; /* Attribute DTD */ - char *name; /* Attribute name */ + uint ns; /* Namespace ID */ + char *name; /* Attribute name without NS prefix */ char *val; /* Attribute value */ void *user; /* User-defined (initialized to NULL) */ }; @@ -228,6 +240,14 @@ struct xml_context { struct xml_node *dom; /* DOM root */ struct xml_node *node; /* Current DOM node */ + /* Namespaces */ + struct mempool *ns_pool; /* Memory pool for NS definitions */ + const char **ns_by_id; /* A growing array translating NS IDs to their names */ + void *ns_by_name; /* Hash table translating NS names to their IDs */ + void *ns_by_prefix; /* Hash table translating current prefixes to NS IDs, allocated from xml->stack */ + struct xml_ns_prefix *ns_prefix_stack; /* A stack of prefix definitions, allocated from xml->stack */ + uint ns_default; /* Current default namespace */ + char *version_str; uint standalone; char *doctype; /* The document type (or NULL if unknown) */ @@ -265,9 +285,18 @@ uint xml_skip_element(struct xml_context *ctx); /* Returns the current row number in the document entity */ uint xml_row(struct xml_context *ctx); +/* Finds a qualified name (including namespace prefix) of a given element node. */ +char *xml_node_qname(struct xml_context *ctx, struct xml_node *node); + +/* Finds a qualified name (including namespace prefix) of a given attribute. */ +char *xml_attr_qname(struct xml_context *ctx, struct xml_attr *node); + /* Finds a given attribute value in a XML_NODE_ELEM node */ struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name); +/* The same, but namespace-aware */ +struct xml_attr *xml_attr_find_ns(struct xml_context *ctx, struct xml_node *node, uint ns, char *name); + /* Similar to xml_attr_find, but it deals also with default values */ char *xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name); @@ -291,4 +320,18 @@ void xml_warn(struct xml_context *ctx, const char *format, ...); void xml_error(struct xml_context *ctx, const char *format, ...); void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...); +/* Request processing of namespaces */ +void xml_ns_enable(struct xml_context *ctx); + +/* Looks up namespace by its ID, dies on an invalid ID */ +const char *xml_ns_by_id(struct xml_context *ctx, uint ns); + +/* Looks up namespace by its name and returns its ID. Creates a new ID if necessary. */ +uint xml_ns_by_name(struct xml_context *ctx, const char *name); + +/* Well-known namespaces */ +#define XML_NS_NONE 0 /* This element has no namespace */ +#define XML_NS_XMLNS 1 /* xmlns: */ +#define XML_NS_XML 2 /* xml: */ + #endif -- 2.39.2