2 * Sherlock Library -- A simple XML parser
4 * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
12 #include "sherlock/sherlock.h"
13 #include "sherlock/xml/xml.h"
14 #include "sherlock/xml/dtd.h"
15 #include "sherlock/xml/common.h"
16 #include "lib/fastbuf.h"
17 #include "lib/ff-unicode.h"
18 #include "lib/unicode.h"
22 #define HASH_PREFIX(x) xml_dtd_notns_##x
23 #define HASH_NODE struct xml_dtd_notn
24 #define HASH_KEY_STRING name
25 #define HASH_ZERO_FILL
26 #define HASH_TABLE_DYNAMIC
27 #define HASH_WANT_LOOKUP
28 #define HASH_WANT_FIND
29 #define HASH_GIVE_ALLOC
30 #define HASH_TABLE_ALLOC
32 #include "lib/hashtable.h"
35 xml_dtd_find_notn(struct xml_context *ctx, char *name)
37 struct xml_dtd *dtd = ctx->dtd;
38 struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name);
39 return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL;
42 /* General entities */
44 #define HASH_PREFIX(x) xml_dtd_ents_##x
45 #define HASH_NODE struct xml_dtd_entity
46 #define HASH_KEY_STRING name
47 #define HASH_ZERO_FILL
48 #define HASH_TABLE_DYNAMIC
49 #define HASH_WANT_FIND
50 #define HASH_WANT_LOOKUP
51 #define HASH_GIVE_ALLOC
52 #define HASH_TABLE_ALLOC
54 #include "lib/hashtable.h"
56 static struct xml_dtd_entity *
57 xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text)
59 struct xml_dtd *dtd = ctx->dtd;
60 struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name);
61 if (ent->flags & XML_DTD_ENTITY_DECLARED)
63 xml_warn(ctx, "Entity &%s; already declared", name);
66 slist_add_tail(&dtd->ents, &ent->n);
67 ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL;
73 xml_dtd_declare_default_entities(struct xml_context *ctx)
75 xml_dtd_declare_trivial_entity(ctx, "lt", "<");
76 xml_dtd_declare_trivial_entity(ctx, "gt", ">");
77 xml_dtd_declare_trivial_entity(ctx, "amp", "&");
78 xml_dtd_declare_trivial_entity(ctx, "apos", "'");
79 xml_dtd_declare_trivial_entity(ctx, "quot", "\"");
82 struct xml_dtd_entity *
83 xml_def_find_entity(struct xml_context *ctx UNUSED, char *name)
85 #define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL }
86 static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
91 if (!strcmp(name, "lt"))
95 if (!strcmp(name, "gt"))
99 if (!strcmp(name, "amp"))
101 if (!strcmp(name, "apos"))
105 if (!strcmp(name, "quot"))
112 struct xml_dtd_entity *
113 xml_dtd_find_entity(struct xml_context *ctx, char *name)
115 struct xml_dtd *dtd = ctx->dtd;
116 if (ctx->h_find_entity)
117 return ctx->h_find_entity(ctx, name);
120 struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name);
121 return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
124 return xml_def_find_entity(ctx, name);
127 /* Parameter entities */
129 static struct xml_dtd_entity *
130 xml_dtd_find_pentity(struct xml_context *ctx, char *name)
132 struct xml_dtd *dtd = ctx->dtd;
133 struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name);
134 return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
139 #define HASH_PREFIX(x) xml_dtd_elems_##x
140 #define HASH_NODE struct xml_dtd_elem
141 #define HASH_KEY_STRING name
142 #define HASH_TABLE_DYNAMIC
143 #define HASH_ZERO_FILL
144 #define HASH_WANT_FIND
145 #define HASH_WANT_LOOKUP
146 #define HASH_GIVE_ALLOC
147 #define HASH_TABLE_ALLOC
149 #include "lib/hashtable.h"
151 struct xml_dtd_elem *
152 xml_dtd_find_elem(struct xml_context *ctx, char *name)
154 return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL;
159 struct xml_dtd_enodes_table;
162 xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
164 return hash_pointer(parent) ^ hash_pointer(elem);
168 xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
170 return (parent1 == parent2) && (elem1 == elem2);
174 xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
176 node->parent = parent;
180 #define HASH_PREFIX(x) xml_dtd_enodes_##x
181 #define HASH_NODE struct xml_dtd_elem_node
182 #define HASH_KEY_COMPLEX(x) x parent, x elem
183 #define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
184 #define HASH_GIVE_HASHFN
186 #define HASH_GIVE_INIT_KEY
187 #define HASH_TABLE_DYNAMIC
188 #define HASH_ZERO_FILL
189 #define HASH_WANT_FIND
190 #define HASH_WANT_NEW
191 #define HASH_GIVE_ALLOC
192 #define HASH_TABLE_ALLOC
194 #include "lib/hashtable.h"
196 /* Element attributes */
198 struct xml_dtd_attrs_table;
201 xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
203 return hash_pointer(elem) ^ hash_string(name);
207 xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
209 return (elem1 == elem2) && !strcmp(name1, name2);
213 xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
219 #define HASH_PREFIX(x) xml_dtd_attrs_##x
220 #define HASH_NODE struct xml_dtd_attr
221 #define HASH_ZERO_FILL
222 #define HASH_TABLE_DYNAMIC
223 #define HASH_KEY_COMPLEX(x) x elem, x name
224 #define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
225 #define HASH_GIVE_HASHFN
227 #define HASH_GIVE_INIT_KEY
228 #define HASH_WANT_FIND
229 #define HASH_WANT_NEW
230 #define HASH_GIVE_ALLOC
231 #define HASH_TABLE_ALLOC
233 #include "lib/hashtable.h"
235 struct xml_dtd_attr *
236 xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name)
238 return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL;
241 /* Enumerated attribute values */
243 struct xml_dtd_evals_table;
246 xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
248 return hash_pointer(attr) ^ hash_string(val);
252 xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
254 return (attr1 == attr2) && !strcmp(val1, val2);
258 xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
264 #define HASH_PREFIX(x) xml_dtd_evals_##x
265 #define HASH_NODE struct xml_dtd_eval
266 #define HASH_TABLE_DYNAMIC
267 #define HASH_KEY_COMPLEX(x) x attr, x val
268 #define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
269 #define HASH_GIVE_HASHFN
271 #define HASH_GIVE_INIT_KEY
272 #define HASH_WANT_FIND
273 #define HASH_WANT_NEW
274 #define HASH_GIVE_ALLOC
275 #define HASH_TABLE_ALLOC
277 #include "lib/hashtable.h"
279 /* Enumerated attribute notations */
281 struct xml_dtd_enotns_table;
284 xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
286 return hash_pointer(attr) ^ hash_pointer(notn);
290 xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
292 return (attr1 == attr2) && (notn1 == notn2);
296 xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
302 #define HASH_PREFIX(x) xml_dtd_enotns_##x
303 #define HASH_NODE struct xml_dtd_enotn
304 #define HASH_TABLE_DYNAMIC
305 #define HASH_KEY_COMPLEX(x) x attr, x notn
306 #define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
307 #define HASH_GIVE_HASHFN
309 #define HASH_GIVE_INIT_KEY
310 #define HASH_WANT_FIND
311 #define HASH_WANT_NEW
312 #define HASH_GIVE_ALLOC
313 #define HASH_TABLE_ALLOC
315 #include "lib/hashtable.h"
317 /* DTD initialization/cleanup */
320 xml_dtd_init(struct xml_context *ctx)
324 struct mempool *pool = mp_new(4096);
325 struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
327 xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
328 xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
329 xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
330 xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
331 xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
332 xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
333 xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
334 xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
335 xml_dtd_declare_default_entities(ctx);
339 xml_dtd_cleanup(struct xml_context *ctx)
343 mp_delete(ctx->dtd->pool);
348 xml_dtd_finish(struct xml_context *ctx)
352 // FIXME: validity checks
355 /*** Parsing functions ***/
357 /* References to parameter entities */
360 xml_parse_pe_ref(struct xml_context *ctx)
362 /* PEReference ::= '%' Name ';'
363 * Already parsed: '%' */
364 struct mempool_state state;
365 mp_save(ctx->stack, &state);
366 char *name = xml_parse_name(ctx, ctx->stack);
367 xml_parse_char(ctx, ';');
368 struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name);
370 xml_error(ctx, "Unknown entity %%%s;", name);
373 TRACE(ctx, "Pushed entity %%%s;", name);
374 mp_restore(ctx->stack, &state);
376 xml_push_entity(ctx, ent);
379 mp_restore(ctx->stack, &state);
384 xml_parse_dtd_pe(struct xml_context *ctx)
390 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
392 xml_parse_pe_ref(ctx);
394 while (xml_peek_char(ctx) != '%');
398 xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
400 /* Whitespace or parameter entity */
402 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
407 if (xml_peek_char(ctx) == '%')
409 xml_parse_dtd_pe(ctx);
412 else if (unlikely(mandatory && !cnt))
413 xml_fatal_expected_white(ctx);
418 xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uns allow_public)
420 struct xml_dtd *dtd = ctx->dtd;
421 uns c = xml_peek_char(ctx);
424 xml_parse_seq(ctx, "SYSTEM");
425 xml_parse_dtd_white(ctx, 1);
427 *system_id = xml_parse_system_literal(ctx, dtd->pool);
431 xml_parse_seq(ctx, "PUBLIC");
432 xml_parse_dtd_white(ctx, 1);
434 *public_id = xml_parse_pubid_literal(ctx, dtd->pool);
435 if (xml_parse_dtd_white(ctx, !allow_public))
436 if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
437 *system_id = xml_parse_system_literal(ctx, dtd->pool);
440 xml_fatal(ctx, "Expected an external ID");
443 /* DTD: <!NOTATION ...> */
446 xml_parse_notation_decl(struct xml_context *ctx)
448 /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
449 * Already parsed: '<!NOTATION' */
450 TRACE(ctx, "parse_notation_decl");
451 struct xml_dtd *dtd = ctx->dtd;
452 xml_parse_dtd_white(ctx, 1);
454 struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
455 xml_parse_dtd_white(ctx, 1);
456 char *system_id, *public_id;
457 xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1);
458 xml_parse_dtd_white(ctx, 0);
459 xml_parse_char(ctx, '>');
461 if (notn->flags & XML_DTD_NOTN_DECLARED)
462 xml_warn(ctx, "Notation %s already declared", notn->name);
465 notn->flags = XML_DTD_NOTN_DECLARED;
466 notn->system_id = system_id;
467 notn->public_id = public_id;
468 slist_add_tail(&dtd->notns, ¬n->n);
473 /* DTD: <!ENTITY ...> */
476 xml_parse_entity_decl(struct xml_context *ctx)
478 /* Already parsed: '<!ENTITY' */
479 TRACE(ctx, "parse_entity_decl");
480 struct xml_dtd *dtd = ctx->dtd;
481 xml_parse_dtd_white(ctx, 1);
483 uns flags = (xml_get_char(ctx) == '%') ? XML_DTD_ENTITY_PARAMETER : 0;
485 xml_parse_dtd_white(ctx, 1);
489 struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool));
490 slist *list = flags ? &dtd->pents : &dtd->ents;
491 xml_parse_dtd_white(ctx, 1);
492 if (ent->flags & XML_DTD_ENTITY_DECLARED)
494 xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
495 // FIXME: should be only warning
498 uns c, sep = xml_get_char(ctx);
499 if (sep == '\'' || sep == '"')
502 * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
503 char *p = mp_start_noalign(dtd->pool, 1);
506 if ((c = xml_get_char(ctx)) == sep)
512 //xml_parse_parameter_ref(ctx);
518 if (xml_peek_char(ctx) != '#')
520 /* Bypass references to general entities */
521 struct mempool_state state;
522 mp_save(ctx->stack, &state);
523 char *n = xml_parse_name(ctx, ctx->stack);
524 xml_parse_char(ctx, ';');
527 p = mp_spread(dtd->pool, p, 3 + l);
532 mp_restore(ctx->stack, &state);
538 c = xml_parse_char_ref(ctx);
541 p = mp_spread(dtd->pool, p, 5);
542 p = utf8_32_put(p, c);
545 ent->len = p - (char *)mp_ptr(dtd->pool);
546 ent->text = mp_end(dtd->pool, p + 1);
547 slist_add_tail(list, &ent->n);
548 ent->flags = flags | XML_DTD_ENTITY_DECLARED;
552 /* External entity */
553 struct xml_dtd_notn *notn = NULL;
554 char *system_id, *public_id;
556 xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0);
557 if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>')
559 /* General external unparsed entity */
560 flags |= XML_DTD_ENTITY_UNPARSED;
561 xml_parse_seq(ctx, "NDATA");
562 xml_parse_dtd_white(ctx, 1);
563 notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
565 slist_add_tail(list, &ent->n);
566 ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL;
567 ent->system_id = system_id;
568 ent->public_id = public_id;
571 xml_parse_dtd_white(ctx, 0);
572 xml_parse_char(ctx, '>');
576 /* DTD: <!ELEMENT ...> */
579 xml_parse_element_decl(struct xml_context *ctx)
581 /* Elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
582 * Already parsed: '<!ELEMENT' */
583 struct xml_dtd *dtd = ctx->dtd;
584 xml_parse_dtd_white(ctx, 1);
585 char *name = xml_parse_name(ctx, dtd->pool);
586 xml_parse_dtd_white(ctx, 1);
587 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
588 if (elem->flags & XML_DTD_ELEM_DECLARED)
589 xml_fatal(ctx, "Element <%s> already declared", name);
591 /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
592 uns c = xml_peek_char(ctx);
595 xml_parse_seq(ctx, "EMPTY");
596 elem->type = XML_DTD_ELEM_EMPTY;
600 xml_parse_seq(ctx, "ANY");
601 elem->type = XML_DTD_ELEM_ANY;
607 xml_parse_dtd_white(ctx, 0);
608 struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
609 if (xml_peek_char(ctx) == '#')
611 /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
613 xml_parse_seq(ctx, "PCDATA");
614 elem->type = XML_DTD_ELEM_MIXED;
615 parent->type = XML_DTD_ELEM_PCDATA;
618 xml_parse_dtd_white(ctx, 0);
619 if ((c = xml_get_char(ctx)) == ')')
622 xml_fatal_expected(ctx, ')');
623 xml_parse_dtd_white(ctx, 0);
624 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
625 if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
626 xml_error(ctx, "Duplicate content '%s'", son_elem->name);
629 struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
630 slist_add_tail(&parent->sons, &son->n);
634 if (xml_peek_char(ctx) == '*')
637 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
639 else if (!slist_head(&parent->sons))
640 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
642 xml_fatal_expected(ctx, '*');
646 /* children ::= (choice | seq) ('?' | '*' | '+')?
647 * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
648 * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
649 * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
651 elem->type = XML_DTD_ELEM_CHILDREN;
652 parent->type = XML_DTD_ELEM_PCDATA;
659 xml_parse_dtd_white(ctx, 0);
660 if ((c = xml_get_char(ctx)) == ')')
663 if (parent->type == XML_DTD_ELEM_PCDATA)
664 parent->type = XML_DTD_ELEM_SEQ;
665 if ((c = xml_get_char(ctx)) == '?')
666 parent->occur = XML_DTD_ELEM_OCCUR_OPT;
668 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
670 parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
674 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
678 parent = parent->parent;
683 if (parent->type == XML_DTD_ELEM_PCDATA)
684 parent->type = XML_DTD_ELEM_OR;
685 else if (parent->type != XML_DTD_ELEM_OR)
686 xml_fatal(ctx, "Mixed operators in the list of element children");
690 if (parent->type == XML_DTD_ELEM_PCDATA)
691 parent->type = XML_DTD_ELEM_SEQ;
692 else if (parent->type != XML_DTD_ELEM_SEQ)
693 xml_fatal(ctx, "Mixed operators in the list of element children");
698 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
699 son->parent = parent;
700 slist_add_tail(&parent->sons, &son->n);
701 parent = son->parent;
702 son->type = XML_DTD_ELEM_MIXED;
708 xml_parse_dtd_white(ctx, 0);
710 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
711 // FIXME: duplicates, occurance
712 //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
713 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
714 son->parent = parent;
715 son->elem = son_elem;
716 slist_add_tail(&parent->sons, &son->n);
721 xml_fatal(ctx, "Expected element content specification");
723 xml_parse_dtd_white(ctx, 0);
724 xml_parse_char(ctx, '>');
729 xml_parse_attr_list_decl(struct xml_context *ctx)
731 /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
732 * AttDef ::= S Name S AttType S DefaultDecl
733 * Already parsed: '<!ATTLIST' */
734 struct xml_dtd *dtd = ctx->dtd;
735 xml_parse_dtd_white(ctx, 1);
736 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
738 while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
740 char *name = xml_parse_name(ctx, dtd->pool);
741 struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name);
745 xml_warn(ctx, "Duplicate attribute definition");
749 attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
750 xml_parse_dtd_white(ctx, 1);
751 if (xml_peek_char(ctx) == '(')
753 xml_skip_char(ctx); // FIXME: xml_inc/dec ?
755 attr->type = XML_ATTR_ENUM;
758 xml_parse_dtd_white(ctx, 0);
759 char *value = xml_parse_nmtoken(ctx, dtd->pool);
761 if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
762 xml_error(ctx, "Duplicate enumeration value");
764 xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
765 xml_parse_dtd_white(ctx, 0);
767 while (xml_get_char(ctx) == '|');
769 xml_parse_char(ctx, ')');
773 char *type = xml_parse_name(ctx, dtd->pool);
774 enum xml_dtd_attr_type t = XML_ATTR_CDATA;
775 if (!strcmp(type, "CDATA"))
777 else if (!strcmp(type, "ID"))
779 else if (!strcmp(type, "IDREF"))
781 else if (!strcmp(type, "IDREFS"))
783 else if (!strcmp(type, "ENTITY"))
785 else if (!strcmp(type, "ENTITIES"))
786 t = XML_ATTR_ENTITIES;
787 else if (!strcmp(type, "NMTOKEN"))
788 t = XML_ATTR_NMTOKEN;
789 else if (!strcmp(type, "NMTOKENS"))
790 t = XML_ATTR_NMTOKENS;
791 else if (!strcmp(type, "NOTATION"))
793 if (elem->type == XML_DTD_ELEM_EMPTY)
794 xml_fatal(ctx, "Empty element must not have notation attribute");
795 // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
796 t = XML_ATTR_NOTATION;
797 xml_parse_dtd_white(ctx, 1);
798 xml_parse_char(ctx, '(');
801 xml_parse_dtd_white(ctx, 0);
802 struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
804 if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
805 xml_error(ctx, "Duplicate enumerated notation");
807 xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
808 xml_parse_dtd_white(ctx, 0);
810 while (xml_get_char(ctx) == '|');
812 xml_parse_char(ctx, ')');
815 xml_fatal(ctx, "Unknown attribute type");
819 xml_parse_dtd_white(ctx, 1);
820 enum xml_dtd_attr_default def = XML_ATTR_NONE;
821 if (xml_get_char(ctx) == '#')
822 switch (xml_peek_char(ctx))
825 xml_parse_seq(ctx, "REQUIRED");
826 def = XML_ATTR_REQUIRED;
829 xml_parse_seq(ctx, "IMPLIED");
830 def = XML_ATTR_IMPLIED;
833 xml_parse_seq(ctx, "FIXED");
834 def = XML_ATTR_FIXED;
835 xml_parse_dtd_white(ctx, 1);
838 xml_fatal(ctx, "Expected a modifier for default attribute value");
842 if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
844 char *v = xml_parse_attr_value(ctx, attr);
846 attr->default_value = v;
849 attr->default_mode = def;
856 xml_skip_internal_subset(struct xml_context *ctx)
858 TRACE(ctx, "skip_internal_subset");
859 /* AlreadyParsed: '[' */
861 while ((c = xml_get_char(ctx)) != ']')
865 if ((c = xml_get_char(ctx)) == '?')
872 else if (xml_get_char(ctx) == '-')
875 xml_skip_comment(ctx);
878 while ((c = xml_get_char(ctx)) != '>')
879 if (c == '\'' || c == '"')
880 while (xml_get_char(ctx) != c);
885 /*** Validation of attribute values ***/
888 xml_check_tokens(char *value, uns first_cat, uns next_cat, uns seq)
894 p = utf8_32_get(p, &u);
895 if (!(xml_char_cat(u) & first_cat))
899 p = utf8_32_get(p, &u);
900 if (!(xml_char_cat(u) & next_cat))
912 xml_is_name(struct xml_context *ctx, char *value)
914 /* Name ::= NameStartChar (NameChar)* */
915 return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0);
919 xml_is_names(struct xml_context *ctx, char *value)
921 /* Names ::= Name (#x20 Name)* */
922 return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1);
926 xml_is_nmtoken(struct xml_context *ctx, char *value)
928 /* Nmtoken ::= (NameChar)+ */
929 return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0);
933 xml_is_nmtokens(struct xml_context *ctx, char *value)
935 /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */
936 return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1);
940 xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type)
942 xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type);
946 xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value)
948 if (dtd->type == XML_ATTR_CDATA)
950 xml_normalize_white(ctx, value);
954 if (!xml_is_name(ctx, value))
955 xml_err_attr_format(ctx, dtd, "NAME");
956 //FIXME: add to a hash table
959 if (!xml_is_name(ctx, value))
960 xml_err_attr_format(ctx, dtd, "NAME");
961 // FIXME: find in hash table (beware forward references)
963 case XML_ATTR_IDREFS:
964 if (!xml_is_names(ctx, value))
965 xml_err_attr_format(ctx, dtd, "NAMES");
968 case XML_ATTR_ENTITY:
971 case XML_ATTR_ENTITIES:
974 case XML_ATTR_NMTOKEN:
975 if (!xml_is_nmtoken(ctx, value))
976 xml_err_attr_format(ctx, dtd, "NMTOKEN");
978 case XML_ATTR_NMTOKENS:
979 if (!xml_is_nmtokens(ctx, value))
980 xml_err_attr_format(ctx, dtd, "NMTOKENS");
983 if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value))
984 xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name);
986 case XML_ATTR_NOTATION:
987 if (!xml_dtd_find_notn(ctx, value))
988 xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name);