X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=xml%2Fdtd.c;h=8c50a53265785502445376e1f27bebc46a8d43ac;hb=HEAD;hp=84bbff7c9c5841adfe1b7d85114e24584b5b5fee;hpb=6912c77a66798de162fe31f79581cd7b5c97f12c;p=libucw.git diff --git a/xml/dtd.c b/xml/dtd.c deleted file mode 100644 index 84bbff7c..00000000 --- a/xml/dtd.c +++ /dev/null @@ -1,1003 +0,0 @@ -/* - * Sherlock Library -- A simple XML parser - * - * (c) 2007--2008 Pavel Charvat - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#undef LOCAL_DEBUG - -#include -#include -#include -#include -#include -#include -#include - -/* Notations */ - -#define HASH_PREFIX(x) xml_dtd_notns_##x -#define HASH_NODE struct xml_dtd_notn -#define HASH_KEY_STRING name -#define HASH_ZERO_FILL -#define HASH_TABLE_DYNAMIC -#define HASH_WANT_LOOKUP -#define HASH_WANT_FIND -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -struct xml_dtd_notn * -xml_dtd_find_notn(struct xml_context *ctx, char *name) -{ - struct xml_dtd *dtd = ctx->dtd; - struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name); - return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL; -} - -/* General entities */ - -#define HASH_PREFIX(x) xml_dtd_ents_##x -#define HASH_NODE struct xml_dtd_entity -#define HASH_KEY_STRING name -#define HASH_ZERO_FILL -#define HASH_TABLE_DYNAMIC -#define HASH_WANT_FIND -#define HASH_WANT_LOOKUP -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -static struct xml_dtd_entity * -xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text) -{ - struct xml_dtd *dtd = ctx->dtd; - struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name); - if (ent->flags & XML_DTD_ENTITY_DECLARED) - { - xml_warn(ctx, "Entity &%s; already declared", name); - return NULL; - } - slist_add_tail(&dtd->ents, &ent->n); - ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL; - ent->text = text; - return ent; -} - -static void -xml_dtd_declare_default_entities(struct xml_context *ctx) -{ - xml_dtd_declare_trivial_entity(ctx, "lt", "<"); - xml_dtd_declare_trivial_entity(ctx, "gt", ">"); - xml_dtd_declare_trivial_entity(ctx, "amp", "&"); - xml_dtd_declare_trivial_entity(ctx, "apos", "'"); - xml_dtd_declare_trivial_entity(ctx, "quot", "\""); -} - -struct xml_dtd_entity * -xml_def_find_entity(struct xml_context *ctx UNUSED, char *name) -{ -#define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL } - static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\""); -#undef ENT - switch (name[0]) - { - case 'l': - if (!strcmp(name, "lt")) - return &ent_lt; - break; - case 'g': - if (!strcmp(name, "gt")) - return &ent_gt; - break; - case 'a': - if (!strcmp(name, "amp")) - return &ent_amp; - if (!strcmp(name, "apos")) - return &ent_apos; - break; - case 'q': - if (!strcmp(name, "quot")) - return &ent_quot; - break; - } - return NULL; -} - -struct xml_dtd_entity * -xml_dtd_find_entity(struct xml_context *ctx, char *name) -{ - struct xml_dtd *dtd = ctx->dtd; - if (ctx->h_find_entity) - return ctx->h_find_entity(ctx, name); - else if (dtd) - { - struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name); - return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL; - } - else - return xml_def_find_entity(ctx, name); -} - -/* Parameter entities */ - -static struct xml_dtd_entity * -xml_dtd_find_pentity(struct xml_context *ctx, char *name) -{ - struct xml_dtd *dtd = ctx->dtd; - struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name); - return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL; -} - -/* Elements */ - -struct xml_dtd_elems_table; - -static void -xml_dtd_elems_init_data(struct xml_dtd_elems_table *tab UNUSED, struct xml_dtd_elem *e) -{ - slist_init(&e->attrs); -} - -#define HASH_PREFIX(x) xml_dtd_elems_##x -#define HASH_NODE struct xml_dtd_elem -#define HASH_KEY_STRING name -#define HASH_TABLE_DYNAMIC -#define HASH_ZERO_FILL -#define HASH_WANT_FIND -#define HASH_WANT_LOOKUP -#define HASH_GIVE_ALLOC -#define HASH_GIVE_INIT_DATA -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -struct xml_dtd_elem * -xml_dtd_find_elem(struct xml_context *ctx, char *name) -{ - return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL; -} - -/* Element sons */ - -struct xml_dtd_enodes_table; - -static inline uint -xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) -{ - return hash_pointer(parent) ^ hash_pointer(elem); -} - -static inline int -xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2) -{ - return (parent1 == parent2) && (elem1 == elem2); -} - -static inline void -xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) -{ - node->parent = parent; - node->elem = elem; -} - -#define HASH_PREFIX(x) xml_dtd_enodes_##x -#define HASH_NODE struct xml_dtd_elem_node -#define HASH_KEY_COMPLEX(x) x parent, x elem -#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem -#define HASH_GIVE_HASHFN -#define HASH_GIVE_EQ -#define HASH_GIVE_INIT_KEY -#define HASH_TABLE_DYNAMIC -#define HASH_ZERO_FILL -#define HASH_WANT_FIND -#define HASH_WANT_NEW -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -/* Element attributes */ - -struct xml_dtd_attrs_table; - -static inline uint -xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name) -{ - return hash_pointer(elem) ^ hash_string(name); -} - -static inline int -xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2) -{ - return (elem1 == elem2) && !strcmp(name1, name2); -} - -static inline void -xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name) -{ - attr->elem = elem; - attr->name = name; - slist_add_tail(&elem->attrs, &attr->n); -} - -#define HASH_PREFIX(x) xml_dtd_attrs_##x -#define HASH_NODE struct xml_dtd_attr -#define HASH_ZERO_FILL -#define HASH_TABLE_DYNAMIC -#define HASH_KEY_COMPLEX(x) x elem, x name -#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name -#define HASH_GIVE_HASHFN -#define HASH_GIVE_EQ -#define HASH_GIVE_INIT_KEY -#define HASH_WANT_FIND -#define HASH_WANT_NEW -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -struct xml_dtd_attr * -xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name) -{ - return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL; -} - -/* Enumerated attribute values */ - -struct xml_dtd_evals_table; - -static inline uint -xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val) -{ - return hash_pointer(attr) ^ hash_string(val); -} - -static inline int -xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2) -{ - return (attr1 == attr2) && !strcmp(val1, val2); -} - -static inline void -xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val) -{ - eval->attr = attr; - eval->val = val; -} - -#define HASH_PREFIX(x) xml_dtd_evals_##x -#define HASH_NODE struct xml_dtd_eval -#define HASH_TABLE_DYNAMIC -#define HASH_KEY_COMPLEX(x) x attr, x val -#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val -#define HASH_GIVE_HASHFN -#define HASH_GIVE_EQ -#define HASH_GIVE_INIT_KEY -#define HASH_WANT_FIND -#define HASH_WANT_NEW -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -/* Enumerated attribute notations */ - -struct xml_dtd_enotns_table; - -static inline uint -xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn) -{ - return hash_pointer(attr) ^ hash_pointer(notn); -} - -static inline int -xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2) -{ - return (attr1 == attr2) && (notn1 == notn2); -} - -static inline void -xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn) -{ - enotn->attr = attr; - enotn->notn = notn; -} - -#define HASH_PREFIX(x) xml_dtd_enotns_##x -#define HASH_NODE struct xml_dtd_enotn -#define HASH_TABLE_DYNAMIC -#define HASH_KEY_COMPLEX(x) x attr, x notn -#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn -#define HASH_GIVE_HASHFN -#define HASH_GIVE_EQ -#define HASH_GIVE_INIT_KEY -#define HASH_WANT_FIND -#define HASH_WANT_NEW -#define HASH_GIVE_ALLOC -#define HASH_TABLE_ALLOC -XML_HASH_GIVE_ALLOC -#include - -/* DTD initialization/cleanup */ - -void -xml_dtd_init(struct xml_context *ctx) -{ - if (ctx->dtd) - return; - struct mempool *pool = mp_new(4096); - struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd)); - dtd->pool = pool; - xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table))); - xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table))); - xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table))); - xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table))); - xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table))); - xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table))); - xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table))); - xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table))); - xml_dtd_declare_default_entities(ctx); -} - -void -xml_dtd_cleanup(struct xml_context *ctx) -{ - if (!ctx->dtd) - return; - mp_delete(ctx->dtd->pool); - ctx->dtd = NULL; -} - -void -xml_dtd_finish(struct xml_context *ctx) -{ - if (!ctx->dtd) - return; - // FIXME: validity checks -} - -/*** Parsing functions ***/ - -/* References to parameter entities */ - -void -xml_parse_pe_ref(struct xml_context *ctx) -{ - /* PEReference ::= '%' Name ';' - * Already parsed: '%' */ - struct mempool_state state; - mp_save(ctx->stack, &state); - char *name = xml_parse_name(ctx, ctx->stack); - xml_parse_char(ctx, ';'); - struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name); - if (!ent) - xml_error(ctx, "Unknown entity %%%s;", name); - else - { - TRACE(ctx, "Pushed entity %%%s;", name); - mp_restore(ctx->stack, &state); - xml_dec(ctx); - xml_push_entity(ctx, ent); - return; - } - mp_restore(ctx->stack, &state); - xml_dec(ctx); -} - -static uint -xml_parse_dtd_pe(struct xml_context *ctx, uint entity_decl) -{ - /* Already parsed: '%' */ - do - { - xml_inc(ctx); - if (!~entity_decl && (xml_peek_cat(ctx) & XML_CHAR_WHITE)) - { - xml_dec(ctx); - return ~0U; - } - xml_parse_pe_ref(ctx); - while (xml_peek_cat(ctx) & XML_CHAR_WHITE) - xml_skip_char(ctx); - } - while (xml_get_char(ctx) == '%'); - xml_unget_char(ctx); - return 1; -} - -static inline uint -xml_parse_dtd_white(struct xml_context *ctx, uint mandatory) -{ - /* Whitespace or parameter entity, - * mandatory==~0U has a special maening of the whitespace before the '%' character in an parameter entity declaration */ - uint cnt = 0; - while (xml_peek_cat(ctx) & XML_CHAR_WHITE) - { - xml_skip_char(ctx); - cnt = 1; - } - if (xml_peek_char(ctx) == '%') - { - xml_skip_char(ctx); - return xml_parse_dtd_pe(ctx, mandatory); - } - else if (unlikely(mandatory && !cnt)) - xml_fatal_expected_white(ctx); - return cnt; -} - -static void -xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uint allow_public) -{ - struct xml_dtd *dtd = ctx->dtd; - uint c = xml_peek_char(ctx); - if (c == 'S') - { - xml_parse_seq(ctx, "SYSTEM"); - xml_parse_dtd_white(ctx, 1); - *public_id = NULL; - *system_id = xml_parse_system_literal(ctx, dtd->pool); - } - else if (c == 'P') - { - xml_parse_seq(ctx, "PUBLIC"); - xml_parse_dtd_white(ctx, 1); - *system_id = NULL; - *public_id = xml_parse_pubid_literal(ctx, dtd->pool); - if (xml_parse_dtd_white(ctx, !allow_public)) - if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public) - *system_id = xml_parse_system_literal(ctx, dtd->pool); - } - else - xml_fatal(ctx, "Expected an external ID"); -} - -/* DTD: */ - -void -xml_parse_notation_decl(struct xml_context *ctx) -{ - /* NotationDecl ::= '' - * Already parsed: 'dtd; - xml_parse_dtd_white(ctx, 1); - - struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); - xml_parse_dtd_white(ctx, 1); - char *system_id, *public_id; - xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1); - xml_parse_dtd_white(ctx, 0); - xml_parse_char(ctx, '>'); - - if (notn->flags & XML_DTD_NOTN_DECLARED) - xml_warn(ctx, "Notation %s already declared", notn->name); - else - { - notn->flags = XML_DTD_NOTN_DECLARED; - notn->system_id = system_id; - notn->public_id = public_id; - slist_add_tail(&dtd->notns, ¬n->n); - } - xml_dec(ctx); -} - -/* DTD: */ - -void -xml_parse_entity_decl(struct xml_context *ctx) -{ - /* Already parsed: 'dtd; - uint flags = ~xml_parse_dtd_white(ctx, ~0U) ? 0 : XML_DTD_ENTITY_PARAMETER; - if (flags) - xml_parse_dtd_white(ctx, 1); - struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool)); - xml_parse_dtd_white(ctx, 1); - slist *list = flags ? &dtd->pents : &dtd->ents; - if (ent->flags & XML_DTD_ENTITY_DECLARED) - { - xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name); - // FIXME: should be only warning - } - uint c, sep = xml_get_char(ctx); - if (sep == '\'' || sep == '"') - { - /* Internal entity: - * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */ - char *p = mp_start_noalign(dtd->pool, 1); - while (1) - { - if ((c = xml_get_char(ctx)) == sep) - break; - if (c == '%') - { - // FIXME - ASSERT(0); - //xml_parse_parameter_ref(ctx); - continue; - } - if (c == '&') - { - xml_inc(ctx); - if (xml_peek_char(ctx) != '#') - { - /* Bypass references to general entities */ - struct mempool_state state; - mp_save(ctx->stack, &state); - char *n = xml_parse_name(ctx, ctx->stack); - xml_parse_char(ctx, ';'); - xml_dec(ctx); - uint l = strlen(n); - p = mp_spread(dtd->pool, p, 3 + l); - *p++ = '&'; - memcpy(p, n, l); - p += l; - *p++ = ';';; - mp_restore(ctx->stack, &state); - continue; - } - else - { - xml_skip_char(ctx); - c = xml_parse_char_ref(ctx); - } - } - p = mp_spread(dtd->pool, p, 5); - p = utf8_32_put(p, c); - } - *p = 0; - ent->len = p - (char *)mp_ptr(dtd->pool); - ent->text = mp_end(dtd->pool, p + 1); - slist_add_tail(list, &ent->n); - ent->flags = flags | XML_DTD_ENTITY_DECLARED; - } - else - { - /* External entity */ - struct xml_dtd_notn *notn = NULL; - char *system_id, *public_id; - xml_unget_char(ctx); - xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0); - if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>') - { - /* General external unparsed entity */ - flags |= XML_DTD_ENTITY_UNPARSED; - xml_parse_seq(ctx, "NDATA"); - xml_parse_dtd_white(ctx, 1); - notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); - } - slist_add_tail(list, &ent->n); - ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL; - ent->system_id = system_id; - ent->public_id = public_id; - ent->notn = notn; - } - xml_parse_dtd_white(ctx, 0); - xml_parse_char(ctx, '>'); - xml_dec(ctx); -} - -/* DTD: */ - -void -xml_parse_element_decl(struct xml_context *ctx) -{ - /* Elementdecl ::= '' - * Already parsed: 'dtd; - xml_parse_dtd_white(ctx, 1); - char *name = xml_parse_name(ctx, dtd->pool); - xml_parse_dtd_white(ctx, 1); - struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name); - if (elem->flags & XML_DTD_ELEM_DECLARED) - xml_fatal(ctx, "Element <%s> already declared", name); - - /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */ - uint c = xml_peek_char(ctx); - if (c == 'E') - { - xml_parse_seq(ctx, "EMPTY"); - elem->type = XML_DTD_ELEM_EMPTY; - } - else if (c == 'A') - { - xml_parse_seq(ctx, "ANY"); - elem->type = XML_DTD_ELEM_ANY; - } - else if (c == '(') - { - xml_skip_char(ctx); - xml_inc(ctx); - xml_parse_dtd_white(ctx, 0); - struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent)); - if (xml_peek_char(ctx) == '#') - { - /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */ - xml_skip_char(ctx); - xml_parse_seq(ctx, "PCDATA"); - elem->type = XML_DTD_ELEM_MIXED; - parent->type = XML_DTD_ELEM_PCDATA; - while (1) - { - xml_parse_dtd_white(ctx, 0); - if ((c = xml_get_char(ctx)) == ')') - break; - else if (c != '|') - xml_fatal_expected(ctx, ')'); - xml_parse_dtd_white(ctx, 0); - struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); - if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem)) - xml_error(ctx, "Duplicate content '%s'", son_elem->name); - else - { - struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); - slist_add_tail(&parent->sons, &son->n); - } - } - xml_dec(ctx); - if (xml_peek_char(ctx) == '*') - { - xml_skip_char(ctx); - parent->occur = XML_DTD_ELEM_OCCUR_MULT; - } - else if (!slist_head(&parent->sons)) - parent->occur = XML_DTD_ELEM_OCCUR_ONCE; - else - xml_fatal_expected(ctx, '*'); - } - else - { - /* children ::= (choice | seq) ('?' | '*' | '+')? - * cp ::= (Name | choice | seq) ('?' | '*' | '+')? - * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' - * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */ - - elem->type = XML_DTD_ELEM_CHILDREN; - parent->type = XML_DTD_ELEM_PCDATA; - uint c; - goto first; - - while (1) - { - /* After name */ - xml_parse_dtd_white(ctx, 0); - if ((c = xml_get_char(ctx)) == ')') - { - xml_dec(ctx); - if (parent->type == XML_DTD_ELEM_PCDATA) - parent->type = XML_DTD_ELEM_SEQ; - if ((c = xml_get_char(ctx)) == '?') - parent->occur = XML_DTD_ELEM_OCCUR_OPT; - else if (c == '*') - parent->occur = XML_DTD_ELEM_OCCUR_MULT; - else if (c == '+') - parent->occur = XML_DTD_ELEM_OCCUR_PLUS; - else - { - xml_unget_char(ctx); - parent->occur = XML_DTD_ELEM_OCCUR_ONCE; - } - if (!parent->parent) - break; - parent = parent->parent; - continue; - } - else if (c == '|') - { - if (parent->type == XML_DTD_ELEM_PCDATA) - parent->type = XML_DTD_ELEM_OR; - else if (parent->type != XML_DTD_ELEM_OR) - xml_fatal(ctx, "Mixed operators in the list of element children"); - } - else if (c == ',') - { - if (parent->type == XML_DTD_ELEM_PCDATA) - parent->type = XML_DTD_ELEM_SEQ; - else if (parent->type != XML_DTD_ELEM_SEQ) - xml_fatal(ctx, "Mixed operators in the list of element children"); - } - else if (c == '(') - { - xml_inc(ctx); - struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); - son->parent = parent; - slist_add_tail(&parent->sons, &son->n); - parent = son->parent; - son->type = XML_DTD_ELEM_MIXED; - } - else - xml_unget_char(ctx); - - /* Before name */ - xml_parse_dtd_white(ctx, 0); -first:; - struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); - // FIXME: duplicates, occurance - //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); - struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); - son->parent = parent; - son->elem = son_elem; - slist_add_tail(&parent->sons, &son->n); - } - } - } - else - xml_fatal(ctx, "Expected element content specification"); - - xml_parse_dtd_white(ctx, 0); - xml_parse_char(ctx, '>'); - xml_dec(ctx); -} - -void -xml_parse_attr_list_decl(struct xml_context *ctx) -{ - /* AttlistDecl ::= '' - * AttDef ::= S Name S AttType S DefaultDecl - * Already parsed: 'dtd; - xml_parse_dtd_white(ctx, 1); - struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); - - while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>') - { - char *name = xml_parse_name(ctx, dtd->pool); - struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name); - uint ignored = 0; - if (attr) - { - xml_warn(ctx, "Duplicate attribute definition"); - ignored++; - } - else - attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name); - xml_parse_dtd_white(ctx, 1); - if (xml_peek_char(ctx) == '(') - { - xml_skip_char(ctx); // FIXME: xml_inc/dec ? - if (!ignored) - attr->type = XML_ATTR_ENUM; - do - { - xml_parse_dtd_white(ctx, 0); - char *value = xml_parse_nmtoken(ctx, dtd->pool); - if (!ignored) - if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value)) - xml_error(ctx, "Duplicate enumeration value"); - else - xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value); - xml_parse_dtd_white(ctx, 0); - } - while (xml_get_char(ctx) == '|'); - xml_unget_char(ctx); - xml_parse_char(ctx, ')'); - } - else - { - char *type = xml_parse_name(ctx, dtd->pool); - enum xml_dtd_attr_type t = XML_ATTR_CDATA; - if (!strcmp(type, "CDATA")) - t = XML_ATTR_CDATA; - else if (!strcmp(type, "ID")) - t = XML_ATTR_ID; - else if (!strcmp(type, "IDREF")) - t = XML_ATTR_IDREF; - else if (!strcmp(type, "IDREFS")) - t = XML_ATTR_IDREFS; - else if (!strcmp(type, "ENTITY")) - t = XML_ATTR_ENTITY; - else if (!strcmp(type, "ENTITIES")) - t = XML_ATTR_ENTITIES; - else if (!strcmp(type, "NMTOKEN")) - t = XML_ATTR_NMTOKEN; - else if (!strcmp(type, "NMTOKENS")) - t = XML_ATTR_NMTOKENS; - else if (!strcmp(type, "NOTATION")) - { - if (elem->type == XML_DTD_ELEM_EMPTY) - xml_fatal(ctx, "Empty element must not have notation attribute"); - // FIXME: An element type MUST NOT have more than one NOTATION attribute specified. - t = XML_ATTR_NOTATION; - xml_parse_dtd_white(ctx, 1); - xml_parse_char(ctx, '('); - do - { - xml_parse_dtd_white(ctx, 0); - struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); - if (!ignored) - if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n)) - xml_error(ctx, "Duplicate enumerated notation"); - else - xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n); - xml_parse_dtd_white(ctx, 0); - } - while (xml_get_char(ctx) == '|'); - xml_unget_char(ctx); - xml_parse_char(ctx, ')'); - } - else - xml_fatal(ctx, "Unknown attribute type"); - if (!ignored) - attr->type = t; - } - xml_parse_dtd_white(ctx, 1); - enum xml_dtd_attr_default def = XML_ATTR_NONE; - if (xml_get_char(ctx) == '#') - switch (xml_peek_char(ctx)) - { - case 'R': - xml_parse_seq(ctx, "REQUIRED"); - def = XML_ATTR_REQUIRED; - break; - case 'I': - xml_parse_seq(ctx, "IMPLIED"); - def = XML_ATTR_IMPLIED; - break; - case 'F': - xml_parse_seq(ctx, "FIXED"); - def = XML_ATTR_FIXED; - xml_parse_dtd_white(ctx, 1); - break; - default: - xml_fatal(ctx, "Expected a modifier for default attribute value"); - } - else - xml_unget_char(ctx); - if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED) - { - char *v = xml_parse_attr_value(ctx, attr); - if (!ignored) - attr->default_value = v; - } - if (!ignored) - attr->default_mode = def; - } - xml_skip_char(ctx); - xml_dec(ctx); -} - -void -xml_skip_internal_subset(struct xml_context *ctx) -{ - TRACE(ctx, "skip_internal_subset"); - /* AlreadyParsed: '[' */ - uint c; - while ((c = xml_get_char(ctx)) != ']') - { - if (c != '<') - continue; - if ((c = xml_get_char(ctx)) == '?') - { - xml_inc(ctx); - xml_skip_pi(ctx); - } - else if (c != '!') - xml_dec(ctx); - else if (xml_get_char(ctx) == '-') - { - xml_inc(ctx); - xml_skip_comment(ctx); - } - else - while ((c = xml_get_char(ctx)) != '>') - if (c == '\'' || c == '"') - while (xml_get_char(ctx) != c); - } - xml_dec(ctx); -} - -/*** Validation of attribute values ***/ - -static uint -xml_check_tokens(char *value, uint first_cat, uint next_cat, uint seq) -{ - char *p = value; - uint u; - while (1) - { - p = utf8_32_get(p, &u); - if (!(xml_char_cat(u) & first_cat)) - return 0; - while (*p & ~0x20) - { - p = utf8_32_get(p, &u); - if (!(xml_char_cat(u) & next_cat)) - return 0; - } - if (!*p) - return 1; - if (!seq) - return 0; - p++; - } -} - -static uint -xml_is_name(struct xml_context *ctx, char *value) -{ - /* Name ::= NameStartChar (NameChar)* */ - return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0); -} - -static uint -xml_is_names(struct xml_context *ctx, char *value) -{ - /* Names ::= Name (#x20 Name)* */ - return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1); -} - -static uint -xml_is_nmtoken(struct xml_context *ctx, char *value) -{ - /* Nmtoken ::= (NameChar)+ */ - return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0); -} - -static uint -xml_is_nmtokens(struct xml_context *ctx, char *value) -{ - /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */ - return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1); -} - -static void -xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type) -{ - xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type); -} - -void -xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value) -{ - if (dtd->type == XML_ATTR_CDATA) - return; - xml_normalize_white(ctx, value); - switch (dtd->type) - { - case XML_ATTR_ID: - if (!xml_is_name(ctx, value)) - xml_err_attr_format(ctx, dtd, "NAME"); - //FIXME: add to a hash table - break; - case XML_ATTR_IDREF: - if (!xml_is_name(ctx, value)) - xml_err_attr_format(ctx, dtd, "NAME"); - // FIXME: find in hash table (beware forward references) - break; - case XML_ATTR_IDREFS: - if (!xml_is_names(ctx, value)) - xml_err_attr_format(ctx, dtd, "NAMES"); - // FIXME: find - break; - case XML_ATTR_ENTITY: - // FIXME - break; - case XML_ATTR_ENTITIES: - // FIXME - break; - case XML_ATTR_NMTOKEN: - if (!xml_is_nmtoken(ctx, value)) - xml_err_attr_format(ctx, dtd, "NMTOKEN"); - break; - case XML_ATTR_NMTOKENS: - if (!xml_is_nmtokens(ctx, value)) - xml_err_attr_format(ctx, dtd, "NMTOKENS"); - break; - case XML_ATTR_ENUM: - if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value)) - xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name); - break; - case XML_ATTR_NOTATION: - if (!xml_dtd_find_notn(ctx, value)) - xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name); - break; - } -}