From 637533a60b2201eaadedcb00fc66ef1e20237432 Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Tue, 11 Dec 2007 12:17:47 +0100 Subject: [PATCH] XML: Updates to the XML parser. --- sherlock/xml/Makefile | 20 +- sherlock/xml/dtd.h | 148 +++++++ sherlock/xml/libshxml.pc | 11 + sherlock/xml/{xml-ucat.pl => unicat.pl} | 2 - sherlock/xml/xml.c | 562 +++++++++++++++--------- sherlock/xml/xml.h | 130 +----- 6 files changed, 528 insertions(+), 345 deletions(-) create mode 100644 sherlock/xml/dtd.h create mode 100644 sherlock/xml/libshxml.pc rename sherlock/xml/{xml-ucat.pl => unicat.pl} (98%) diff --git a/sherlock/xml/Makefile b/sherlock/xml/Makefile index e3acc181..f721b500 100644 --- a/sherlock/xml/Makefile +++ b/sherlock/xml/Makefile @@ -3,17 +3,21 @@ DIRS+=sherlock/xml -LIBSH_MODS+=xml/xml -LIBSH_XML_INCLUDES=xml.h +LIBSHXML_MODS=xml +LIBSHXML_INCLUDES=xml.h dtd.h -$(o)/sherlock/xml/xml-t: $(LIBSH) $(LIBCHARSET) -$(o)/sherlock/xml/xml.o: $(o)/sherlock/xml/xml-ucat.h -$(o)/sherlock/xml/xml-ucat.h: $(s)/sherlock/xml/xml-ucat.pl +LIBSHXML_MOD_PATHS=$(addprefix $(o)/sherlock/xml/,$(LIBSHXML_MODS)) + +$(o)/sherlock/xml/libshxml.a: $(addsuffix .o,$(LIBSHXML_MOD_PATHS)) +$(o)/sherlock/xml/libshxml.so: $(addsuffix .oo,$(LIBSHXML_MOD_PATHS)) +$(o)/sherlock/xml/libshxml.pc: $(LIBUCW) $(LIBCHARSET) + +$(o)/sherlock/xml/xml-t: $(LIBSHXML) +$(o)/sherlock/xml/xml.o: $(o)/sherlock/xml/unicat.h +$(o)/sherlock/xml/unicat.h: $(s)/sherlock/xml/unicat.pl $(M)GEN $@ $(Q)$< >$@ API_INCLUDES+=$(o)/sherlock/xml/.include-stamp -$(o)/sherlock/xml/.include-stamp: $(addprefix $(s)/sherlock/xml/,$(LIBSH_XML_INCLUDES)) +$(o)/sherlock/xml/.include-stamp: $(addprefix $(s)/sherlock/xml/,$(LIBSHXML_INCLUDES)) $(o)/sherlock/xml/.include-stamp: IDST=sherlock/xml - -include $(s)/sherlock/perl/Makefile diff --git a/sherlock/xml/dtd.h b/sherlock/xml/dtd.h new file mode 100644 index 00000000..bf95b872 --- /dev/null +++ b/sherlock/xml/dtd.h @@ -0,0 +1,148 @@ +/* + * Sherlock Library -- A simple XML parser + * + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _SHERLOCK_XML_DTD_H +#define _SHERLOCK_XML_DTD_H + +#include "sherlock/xml/xml.h" + +struct xml_dtd { + struct mempool *pool; /* Memory pool where to allocate DTD */ + slist gents; /* Link list of general entities */ + slist pents; /* Link list of parapeter entities */ + slist notns; /* Link list of notations */ + slist elems; /* Link list of elements */ + void *tab_gents; /* Hash table of general entities */ + void *tab_pents; /* Hash table of parameter entities */ + void *tab_notns; /* Hash table of notations */ + void *tab_elems; /* Hash table of elements */ + void *tab_enodes; /* Hash table of element sons */ + void *tab_attrs; /* Hash table of element attributes */ + void *tab_evals; /* Hash table of enumerated attribute values */ + void *tab_enotns; /* hash table of enumerated attribute notations */ +}; + +/* Notations */ + +enum xml_dtd_notn_flags { + XML_DTD_NOTN_DECLARED = 0x1, /* The notation has been declared (interbal usage) */ +}; + +struct xml_dtd_notn { + snode n; /* Node in xml_dtd.notns */ + uns flags; /* XML_DTD_NOTN_x */ + char *name; /* Notation name */ + struct xml_ext_id eid; /* External id */ +}; + +/* Entities */ + +enum xml_dtd_ent_flags { + XML_DTD_ENT_DECLARED = 0x1, /* The entity has been declared (internal usage) */ + XML_DTD_ENT_VISITED = 0x2, /* Cycle detection (internal usage) */ + XML_DTD_ENT_PARAMETER = 0x4, /* Parameter entity, general otherwise */ + XML_DTD_ENT_EXTERNAL = 0x8, /* External entity, internal otherwise */ + XML_DTD_ENT_UNPARSED = 0x10, /* Unparsed entity, parsed otherwise */ + XML_DTD_ENT_TRIVIAL = 0x20, /* Replacement text is a sequence of characters and character references */ +}; + +struct xml_dtd_ent { + snode n; /* Node in xml_dtd.[gp]ents */ + uns flags; /* XML_DTD_ENT_x */ + char *name; /* Entity name */ + char *text; /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */ + uns len; /* Text length */ + struct xml_ext_id eid; /* External ID */ + struct xml_dtd_notn *notn; /* Notation (XML_DTD_ENT_UNPARSED only) */ +}; + +/* Elements */ + +enum xml_dtd_elem_flags { + XML_DTD_ELEM_DECLARED = 0x1, /* The element has been declared (internal usage) */ +}; + +enum xml_dtd_elem_type { + XML_DTD_ELEM_EMPTY, + XML_DTD_ELEM_ANY, + XML_DTD_ELEM_MIXED, + XML_DTD_ELEM_CHILDREN, +}; + +struct xml_dtd_elem { + snode n; + uns flags; + uns type; + char *name; + struct xml_dtd_elem_node *node; +}; + +struct xml_dtd_elem_node { + snode n; + struct xml_dtd_elem_node *parent; + struct xml_dtd_elem *elem; + slist sons; + uns type; + uns occur; +}; + +enum xml_dtd_elem_node_type { + XML_DTD_ELEM_PCDATA, + XML_DTD_ELEM_SEQ, + XML_DTD_ELEM_OR, +}; + +enum xml_dtd_elem_node_occur { + XML_DTD_ELEM_OCCUR_ONCE, + XML_DTD_ELEM_OCCUR_OPT, + XML_DTD_ELEM_OCCUR_MULT, + XML_DTD_ELEM_OCCUR_PLUS, +}; + +/* Attributes */ + +enum xml_dtd_attribute_default { + XML_ATTR_NONE, + XML_ATTR_REQUIRED, + XML_ATTR_IMPLIED, + XML_ATTR_FIXED, +}; + +enum xml_dtd_attribute_type { + XML_ATTR_CDATA, + XML_ATTR_ID, + XML_ATTR_IDREF, + XML_ATTR_IDREFS, + XML_ATTR_ENTITY, + XML_ATTR_ENTITIES, + XML_ATTR_NMTOKEN, + XML_ATTR_NMTOKENS, + XML_ATTR_ENUM, + XML_ATTR_NOTATION, +}; + +struct xml_dtd_attr { + char *name; + struct xml_dtd_elem *elem; + enum xml_dtd_attribute_type type; + enum xml_dtd_attribute_default default_mode; + char *default_value; +}; + +struct xml_dtd_eval { + struct xml_dtd_attr *attr; + char *val; +}; + +struct xml_dtd_enotn { + struct xml_dtd_attr *attr; + struct xml_dtd_notn *notn; +}; + +#endif diff --git a/sherlock/xml/libshxml.pc b/sherlock/xml/libshxml.pc new file mode 100644 index 00000000..c2172b39 --- /dev/null +++ b/sherlock/xml/libshxml.pc @@ -0,0 +1,11 @@ +# pkg-config metadata for libshxml + +libdir=@LIBDIR@ +incdir=. + +Name: libshxml +Description: XML parser for Sherlock project +Version: @SHERLOCK_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} -lshxml +Requires: @DEPS@ diff --git a/sherlock/xml/xml-ucat.pl b/sherlock/xml/unicat.pl similarity index 98% rename from sherlock/xml/xml-ucat.pl rename to sherlock/xml/unicat.pl index eeb948e6..fc39bba7 100755 --- a/sherlock/xml/xml-ucat.pl +++ b/sherlock/xml/unicat.pl @@ -86,10 +86,8 @@ set("SNAME_1_0", "[_:]", @base_char_1_0, @ideographic_1_0); set("NAME_1_0", "[-_:.]", @base_char_1_0, @ideographic_1_0, @combining_char_1_0, @digit_1_0, @extender_1_0); set("SNAME_1_1", @sname_1_1); set("NAME_1_1", @sname_1_1, "[-.0-9]", 0xB7, [0x0300,0x036F], [0x203F,0x2040]); -set("DECL", @white, [0x20,0x7E]); set("GT", "[>]"); -print "/* Automatically generated by xml-ucat.pl */\n\n"; find_cls(); gen_enum(); gen_tabs(); diff --git a/sherlock/xml/xml.c b/sherlock/xml/xml.c index 2de0e818..1d9f0f45 100644 --- a/sherlock/xml/xml.c +++ b/sherlock/xml/xml.c @@ -27,6 +27,7 @@ #include "charset/charconv.h" #include "charset/fb-charconv.h" #include "sherlock/xml/xml.h" +#include "sherlock/xml/dtd.h" #include @@ -97,7 +98,7 @@ xml_fatal(struct xml_context *ctx, const char *format, ...) /*** Charecter categorization ***/ -#include "obj/sherlock/xml/xml-ucat.h" +#include "obj/sherlock/xml/unicat.h" static inline uns xml_char_cat(uns c) @@ -941,6 +942,45 @@ xml_dtd_find_pent(struct xml_context *ctx, char *name) XML_HASH_GIVE_ALLOC #include "lib/hashtable.h" +/* Element sons */ + +struct xml_dtd_enodes_table; + +static inline uns +xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) +{ + return hash_pointer(parent) ^ hash_pointer(elem); +} + +static inline int +xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2) +{ + return (parent1 == parent2) && (elem1 == elem2); +} + +static inline void +xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) +{ + node->parent = parent; + node->elem = elem; +} + +#define HASH_PREFIX(x) xml_dtd_enodes_##x +#define HASH_NODE struct xml_dtd_elem_node +#define HASH_KEY_COMPLEX(x) x parent, x elem +#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem +#define HASH_GIVE_HASHFN +#define HASH_GIVE_EQ +#define HASH_GIVE_INIT_KEY +#define HASH_TABLE_DYNAMIC +#define HASH_ZERO_FILL +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include "lib/hashtable.h" + /* Element attributes */ struct xml_dtd_attrs_table; @@ -1070,6 +1110,7 @@ xml_dtd_init(struct xml_context *ctx) xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table))); xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table))); xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table))); + xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table))); xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table))); xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table))); xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table))); @@ -1412,7 +1453,7 @@ xml_parse_external_id(struct xml_context *ctx, struct xml_ext_id *eid, uns allow xml_fatal(ctx, "Expected an external ID"); } -/* DTD: Notation declaration */ +/* DTD: */ static void xml_parse_notation_decl(struct xml_context *ctx) @@ -1441,6 +1482,8 @@ xml_parse_notation_decl(struct xml_context *ctx) xml_dec(ctx); } +/* DTD: */ + static void xml_parse_entity_decl(struct xml_context *ctx) { @@ -1530,6 +1573,314 @@ xml_parse_entity_decl(struct xml_context *ctx) xml_dec(ctx); } +/* DTD: */ + +static void +xml_parse_element_decl(struct xml_context *ctx) +{ + /* Elementdecl ::= '' + * Already parsed: 'dtd; + struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name); + if (elem->flags & XML_DTD_ELEM_DECLARED) + xml_fatal(ctx, "Element <%s> already declared", name); + + /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */ + uns c = xml_peek_char(ctx); + if (c == 'E') + { + xml_parse_seq(ctx, "EMPTY"); + elem->type = XML_DTD_ELEM_EMPTY; + } + else if (c == 'A') + { + xml_parse_seq(ctx, "ANY"); + elem->type = XML_DTD_ELEM_ANY; + } + else if (c == '(') + { + xml_skip_char(ctx); + xml_inc(ctx); + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent)); + if (xml_peek_char(ctx) == '#') + { + /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */ + xml_skip_char(ctx); + xml_parse_seq(ctx, "PCDATA"); + elem->type = XML_DTD_ELEM_MIXED; + parent->type = XML_DTD_ELEM_PCDATA; + while (1) + { + xml_parse_dtd_white(ctx, 0); + if ((c = xml_get_char(ctx)) == ')') + break; + else if (c != '|') + xml_fatal_expected(ctx, ')'); + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx)); + if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem)) + xml_error(ctx, "Duplicate content '%s'", son_elem->name); + else + { + struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); + slist_add_tail(&parent->sons, &son->n); + } + } + xml_dec(ctx); + if (xml_peek_char(ctx) == '*') + { + xml_skip_char(ctx); + parent->occur = XML_DTD_ELEM_OCCUR_MULT; + } + else if (!slist_head(&parent->sons)) + parent->occur = XML_DTD_ELEM_OCCUR_ONCE; + else + xml_fatal_expected(ctx, '*'); + } + else + { + /* children ::= (choice | seq) ('?' | '*' | '+')? + * cp ::= (Name | choice | seq) ('?' | '*' | '+')? + * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' + * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */ + + elem->type = XML_DTD_ELEM_CHILDREN; + parent->type = XML_DTD_ELEM_PCDATA; + uns c; + goto first; + + while (1) + { + /* After name */ + xml_parse_dtd_white(ctx, 0); + if ((c = xml_get_char(ctx)) == ')') + { + xml_dec(ctx); + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_SEQ; + if ((c = xml_get_char(ctx)) == '?') + parent->occur = XML_DTD_ELEM_OCCUR_OPT; + else if (c == '*') + parent->occur = XML_DTD_ELEM_OCCUR_MULT; + else if (c == '+') + parent->occur = XML_DTD_ELEM_OCCUR_PLUS; + else + { + xml_unget_char(ctx); + parent->occur = XML_DTD_ELEM_OCCUR_ONCE; + } + if (!parent->parent) + break; + parent = parent->parent; + continue; + } + else if (c == '|') + { + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_OR; + else if (parent->type != XML_DTD_ELEM_OR) + xml_fatal(ctx, "Mixed operators in the list of element children"); + } + else if (c == ',') + { + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_SEQ; + else if (parent->type != XML_DTD_ELEM_SEQ) + xml_fatal(ctx, "Mixed operators in the list of element children"); + } + else if (c == '(') + { + xml_inc(ctx); + struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); + son->parent = parent; + slist_add_tail(&parent->sons, &son->n); + parent = son->parent; + son->type = XML_DTD_ELEM_MIXED; + } + else + xml_unget_char(ctx); + + /* Before name */ + xml_parse_dtd_white(ctx, 0); +first:; + struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx)); + // FIXME: duplicates, occurance + //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); + struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); + son->parent = parent; + son->elem = son_elem; + slist_add_tail(&parent->sons, &son->n); + } + } + } + else + xml_fatal(ctx, "Expected element content specification"); + + xml_parse_dtd_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_dec(ctx); +} + +static char * +xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED) +{ + uns quote = xml_parse_quote(ctx); + xml_push(ctx); + struct fastbuf *out = ctx->value; + while (1) + { + uns c = xml_get_char(ctx); + if (c == '&') + { + xml_inc(ctx); + xml_parse_ge_ref(ctx, out); + } + else if (c == quote) // FIXME: beware quotes inside parsed + break; + else if (c == '<') + xml_error(ctx, "Attribute value must not contain '<'"); + else + bput_utf8_32(out, c); + } + xml_pop(ctx); + bputc(out, 0); + fbgrow_rewind(out); + char *value = mp_memdup(ctx->pool, out->bptr, out->bstop - out->bptr); + // FIXME: check value constraints / normalize value + fbgrow_reset(out); + return value; +} + +static void +xml_parse_attr_list_decl(struct xml_context *ctx) +{ + /* AttlistDecl ::= '' + * AttDef ::= S Name S AttType S DefaultDecl + * Already parsed: 'dtd->tab_elems, xml_parse_name(ctx)); + + while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>') + { + char *name = xml_parse_name(ctx); + struct xml_dtd_attr *attr = xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name); + uns ignored = 0; + if (attr) + { + xml_warn(ctx, "Duplicate attribute definition"); + ignored++; + } + else + attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name); + xml_parse_dtd_white(ctx, 1); + if (xml_peek_char(ctx) == '(') + { + xml_skip_char(ctx); // FIXME: xml_inc/dec ? + if (!ignored) + attr->type = XML_ATTR_ENUM; + do + { + xml_parse_dtd_white(ctx, 0); + char *value = xml_parse_nmtoken(ctx); + if (!ignored) + if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value)) + xml_error(ctx, "Duplicate enumeration value"); + else + xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value); + xml_parse_dtd_white(ctx, 0); + } + while (xml_get_char(ctx) == '|'); + xml_unget_char(ctx); + xml_parse_char(ctx, ')'); + } + else + { + char *type = xml_parse_name(ctx); + enum xml_dtd_attribute_type t; + if (!strcmp(type, "CDATA")) + t = XML_ATTR_CDATA; + else if (!strcmp(type, "ID")) + t = XML_ATTR_ID; + else if (!strcmp(type, "IDREF")) + t = XML_ATTR_IDREF; + else if (!strcmp(type, "IDREFS")) + t = XML_ATTR_IDREFS; + else if (!strcmp(type, "ENTITY")) + t = XML_ATTR_ENTITY; + else if (!strcmp(type, "ENTITIES")) + t = XML_ATTR_ENTITIES; + else if (!strcmp(type, "NMTOKEN")) + t = XML_ATTR_NMTOKEN; + else if (!strcmp(type, "NMTOKENS")) + t = XML_ATTR_NMTOKENS; + else if (!strcmp(type, "NOTATION")) + { + if (elem->type == XML_DTD_ELEM_EMPTY) + xml_fatal(ctx, "Empty element must not have notation attribute"); + // FIXME: An element type MUST NOT have more than one NOTATION attribute specified. + t = XML_ATTR_NOTATION; + xml_parse_dtd_white(ctx, 1); + xml_parse_char(ctx, '('); + do + { + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx)); + if (!ignored) + if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n)) + xml_error(ctx, "Duplicate enumerated notation"); + else + xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n); + xml_parse_dtd_white(ctx, 0); + } + while (xml_get_char(ctx) == '|'); + xml_unget_char(ctx); + xml_parse_char(ctx, ')'); + } + else + xml_fatal(ctx, "Unknown attribute type"); + if (!ignored) + attr->type = t; + } + xml_parse_dtd_white(ctx, 1); + enum xml_dtd_attribute_default def = XML_ATTR_NONE; + if (xml_get_char(ctx) == '#') + switch (xml_peek_char(ctx)) + { + case 'R': + xml_parse_seq(ctx, "REQUIRED"); + def = XML_ATTR_REQUIRED; + break; + case 'I': + xml_parse_seq(ctx, "IMPLIED"); + def = XML_ATTR_IMPLIED; + break; + case 'F': + xml_parse_seq(ctx, "FIXED"); + def = XML_ATTR_FIXED; + xml_parse_dtd_white(ctx, 1); + break; + default: + xml_fatal(ctx, "Expected a modifier for default attribute value"); + } + else + xml_unget_char(ctx); + if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED) + { + char *v = xml_parse_attr_value(ctx, attr); + if (!ignored) + attr->default_value = v; + } + if (!ignored) + attr->default_mode = def; + } + xml_skip_char(ctx); + xml_dec(ctx); +} + /* DTD: Internal subset */ static void @@ -1565,14 +1916,14 @@ xml_parse_internal_subset(struct xml_context *ctx) else if (c == 'L') { xml_parse_seq(ctx, "EMENT"); - // FIXME: Element + xml_parse_element_decl(ctx); } else goto invalid_markup; break; case 'A': xml_parse_seq(ctx, "TTLIST"); - // FIXME: AttList + xml_parse_attr_list_decl(ctx); break; default: goto invalid_markup; @@ -1813,209 +2164,6 @@ xml_pop_element(struct xml_context *ctx) #endif } -static void -xml_parse_element_decl(struct xml_context *ctx) -{ - // FIXME - mp_push(ctx->pool); - xml_parse_seq(ctx, "'); - mp_pop(ctx->pool); -} - -#if 0 -static void -xml_parse_attr_list_decl(struct xml_context *ctx) -{ - /* AttlistDecl ::= '' - * AttDef ::= S Name S AttType S DefaultDecl */ - xml_parse_seq(ctx, "ATTLIST"); - xml_parse_white(ctx, 1); - struct xml_dtd_elem *e = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx)); - e->attlist_declared = 1; - - while (xml_parse_white(ctx, 0) && xml_get_char(ctx) != '>') - { - xml_unget_char(ctx); - char *name = xml_parse_name(ctx); - struct xml_dtd_attr *a = xml_dtd_attrs_find(ctx->dtd->tab_attrs, e, name); - uns ignored = 0; - if (a) - { - xml_warn(ctx, "Duplicate attribute definition"); - ignored++; - } - else - a = xml_dtd_attrs_new(ctx->dtd->tab_attrs, e, name); - xml_parse_white(ctx, 1); - if (xml_get_char(ctx) == '(') - { - if (!ignored) - a->type = XML_ATTR_ENUM; - do - { - xml_parse_white(ctx, 0); - char *value = xml_parse_nmtoken(ctx); - if (!ignored) - if (xml_dtd_evals_find(ctx->dtd->tab_evals, a, value)) - xml_error(ctx, "Duplicate enumeration value"); - else - xml_dtd_evals_new(ctx->dtd->tab_evals, a, value); - xml_parse_white(ctx, 0); - } - while (xml_get_char(ctx) == '|'); - xml_unget_char(ctx); - xml_parse_char(ctx, ')'); - } - else - { - xml_unget_char(ctx); - char *type = xml_parse_name(ctx); - enum xml_dtd_attribute_type t; - if (!strcmp(type, "CDATA")) - t = XML_ATTR_CDATA; - else if (!strcmp(type, "ID")) - t = XML_ATTR_ID; - else if (!strcmp(type, "IDREF")) - t = XML_ATTR_IDREF; - else if (!strcmp(type, "IDREFS")) - t = XML_ATTR_IDREFS; - else if (!strcmp(type, "ENTITY")) - t = XML_ATTR_ENTITY; - else if (!strcmp(type, "ENTITIES")) - t = XML_ATTR_ENTITIES; - else if (!strcmp(type, "NMTOKEN")) - t = XML_ATTR_NMTOKEN; - else if (!strcmp(type, "NMTOKENS")) - t = XML_ATTR_NMTOKENS; - else if (!strcmp(type, "NOTATION")) - { - t = XML_ATTR_NOTATION; - xml_parse_white(ctx, 1); - xml_parse_char(ctx, '('); - do - { - xml_parse_white(ctx, 0); - struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx)); - if (!ignored) - if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, a, n)) - xml_error(ctx, "Duplicate enumerated notation"); - else - xml_dtd_enotns_new(ctx->dtd->tab_enotns, a, n); - xml_parse_white(ctx, 0); - } - while (xml_get_char(ctx) == '|'); - xml_unget_char(ctx); - xml_parse_char(ctx, ')'); - } - else - xml_fatal(ctx, "Unknown attribute type"); - if (!ignored) - a->type = t; - } - xml_parse_white(ctx, 1); - enum xml_dtd_attribute_default def = XML_ATTR_NONE; - if (xml_get_char(ctx) == '#') - switch (xml_get_char(ctx)) - { - case 'R': - xml_parse_seq(ctx, "EQUIRED"); - def = XML_ATTR_REQUIRED; - break; - case 'I': - xml_parse_seq(ctx, "MPLIED"); - def = XML_ATTR_IMPLIED; - break; - case 'F': - xml_parse_seq(ctx, "IXED"); - def = XML_ATTR_FIXED; - break; - default: - xml_fatal(ctx, "Expected a modifier for default attribute value"); - } - else - xml_unget_char(ctx); - if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED) - { - xml_parse_system_literal(ctx); - // FIXME - } - } -} -#endif - static void xml_parse_doctype_decl(struct xml_context *ctx) { diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h index 87cdff91..7e83f65a 100644 --- a/sherlock/xml/xml.h +++ b/sherlock/xml/xml.h @@ -7,8 +7,8 @@ * of the GNU Lesser General Public License. */ -#ifndef _SHERLOCK_XML_H -#define _SHERLOCK_XML_H +#ifndef _SHERLOCK_XML_XML_H +#define _SHERLOCK_XML_XML_H #include "lib/clists.h" #include "lib/slists.h" @@ -204,132 +204,6 @@ struct xml_context { void (*unparsed_entity_decl)(struct xml_context *ctx); }; -/*** Document Type Definition (DTD) ***/ - -struct xml_dtd { - struct mempool *pool; /* Memory pool where to allocate DTD */ - slist gents; /* Link list of general entities */ - slist pents; /* Link list of parapeter entities */ - slist notns; /* Link list of notations */ - slist elems; /* Link list of elements */ - void *tab_gents; /* Hash table of general entities */ - void *tab_pents; /* Hash table of parameter entities */ - void *tab_notns; /* Hash table of notations */ - void *tab_elems; /* Hash table of elements */ - void *tab_attrs; /* Hash table of element attributes */ - void *tab_evals; /* Hash table of enumerated attribute values */ - void *tab_enotns; /* hash table of enumerated attribute notations */ -}; - -/* Notations */ - -enum xml_dtd_notn_flags { - XML_DTD_NOTN_DECLARED = 0x1, /* The notation has been declared (interbal usage) */ -}; - -struct xml_dtd_notn { - snode n; /* Node in xml_dtd.notns */ - uns flags; /* XML_DTD_NOTN_x */ - char *name; /* Notation name */ - struct xml_ext_id eid; /* External id */ -}; - -/* Entities */ - -enum xml_dtd_ent_flags { - XML_DTD_ENT_DECLARED = 0x1, /* The entity has been declared (internal usage) */ - XML_DTD_ENT_VISITED = 0x2, /* Cycle detection (internal usage) */ - XML_DTD_ENT_PARAMETER = 0x4, /* Parameter entity, general otherwise */ - XML_DTD_ENT_EXTERNAL = 0x8, /* External entity, internal otherwise */ - XML_DTD_ENT_UNPARSED = 0x10, /* Unparsed entity, parsed otherwise */ - XML_DTD_ENT_TRIVIAL = 0x20, /* Replacement text is a sequence of characters and character references */ -}; - -struct xml_dtd_ent { - snode n; /* Node in xml_dtd.[gp]ents */ - uns flags; /* XML_DTD_ENT_x */ - char *name; /* Entity name */ - char *text; /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */ - uns len; /* Text length */ - struct xml_ext_id eid; /* External ID */ - struct xml_dtd_notn *notn; /* Notation (XML_DTD_ENT_UNPARSED only) */ -}; - -/* Elements */ - -enum xml_dtd_elem_flags { - XML_DTD_ELEM_DECLARED = 0x1, /* The element has been declared (internal usage) */ -}; - -struct xml_dtd_elem { - snode n; - uns flags; - char *name; - struct xml_dtd_elem_node *node; -}; - -struct xml_dtd_elem_node { - snode n; - struct xml_dtd_elem_node *parent; - slist sons; - uns type; - uns occur; -}; - -enum xml_dtd_elem_node_type { - XML_DTD_ELEM_PCDATA, - XML_DTD_ELEM_SEQ, - XML_DTD_ELEM_OR, -}; - -enum xml_dtd_elem_node_occur { - XML_DTD_ELEM_OCCUR_ONCE, - XML_DTD_ELEM_OCCUR_OPT, - XML_DTD_ELEM_OCCUR_MULT, - XML_DTD_ELEM_OCCUR_PLUS, -}; - -/* Attributes */ - - -enum xml_dtd_attribute_default { - XML_ATTR_NONE, - XML_ATTR_REQUIRED, - XML_ATTR_IMPLIED, - XML_ATTR_FIXED, -}; - -enum xml_dtd_attribute_type { - XML_ATTR_CDATA, - XML_ATTR_ID, - XML_ATTR_IDREF, - XML_ATTR_IDREFS, - XML_ATTR_ENTITY, - XML_ATTR_ENTITIES, - XML_ATTR_NMTOKEN, - XML_ATTR_NMTOKENS, - XML_ATTR_ENUM, - XML_ATTR_NOTATION, -}; - -struct xml_dtd_attr { - char *name; - struct xml_dtd_elem *elem; - enum xml_dtd_attribute_type type; - enum xml_dtd_attribute_default default_mode; - char *default_value; -}; - -struct xml_dtd_eval { - struct xml_dtd_attr *attr; - char *val; -}; - -struct xml_dtd_enotn { - struct xml_dtd_attr *attr; - struct xml_dtd_notn *notn; -}; - void xml_init(struct xml_context *ctx); void xml_cleanup(struct xml_context *ctx); void xml_set_source(struct xml_context *ctx, struct fastbuf *fb); -- 2.39.2