2 * Sherlock Library -- A simple XML parser
4 * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
15 #include <xml/internals.h>
16 #include <ucw/fastbuf.h>
17 #include <ucw/ff-unicode.h>
18 #include <ucw/unicode.h>
22 #define HASH_PREFIX(x) xml_dtd_notns_##x
23 #define HASH_NODE struct xml_dtd_notn
24 #define HASH_KEY_STRING name
25 #define HASH_ZERO_FILL
26 #define HASH_TABLE_DYNAMIC
27 #define HASH_WANT_LOOKUP
28 #define HASH_WANT_FIND
29 #define HASH_GIVE_ALLOC
30 #define HASH_TABLE_ALLOC
32 #include <ucw/hashtable.h>
35 xml_dtd_find_notn(struct xml_context *ctx, char *name)
37 struct xml_dtd *dtd = ctx->dtd;
38 struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name);
39 return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL;
42 /* General entities */
44 #define HASH_PREFIX(x) xml_dtd_ents_##x
45 #define HASH_NODE struct xml_dtd_entity
46 #define HASH_KEY_STRING name
47 #define HASH_ZERO_FILL
48 #define HASH_TABLE_DYNAMIC
49 #define HASH_WANT_FIND
50 #define HASH_WANT_LOOKUP
51 #define HASH_GIVE_ALLOC
52 #define HASH_TABLE_ALLOC
54 #include <ucw/hashtable.h>
56 static struct xml_dtd_entity *
57 xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text)
59 struct xml_dtd *dtd = ctx->dtd;
60 struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name);
61 if (ent->flags & XML_DTD_ENTITY_DECLARED)
63 xml_warn(ctx, "Entity &%s; already declared", name);
66 slist_add_tail(&dtd->ents, &ent->n);
67 ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL;
73 xml_dtd_declare_default_entities(struct xml_context *ctx)
75 xml_dtd_declare_trivial_entity(ctx, "lt", "<");
76 xml_dtd_declare_trivial_entity(ctx, "gt", ">");
77 xml_dtd_declare_trivial_entity(ctx, "amp", "&");
78 xml_dtd_declare_trivial_entity(ctx, "apos", "'");
79 xml_dtd_declare_trivial_entity(ctx, "quot", "\"");
82 struct xml_dtd_entity *
83 xml_def_find_entity(struct xml_context *ctx UNUSED, char *name)
85 #define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL }
86 static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
91 if (!strcmp(name, "lt"))
95 if (!strcmp(name, "gt"))
99 if (!strcmp(name, "amp"))
101 if (!strcmp(name, "apos"))
105 if (!strcmp(name, "quot"))
112 struct xml_dtd_entity *
113 xml_dtd_find_entity(struct xml_context *ctx, char *name)
115 struct xml_dtd *dtd = ctx->dtd;
116 if (ctx->h_find_entity)
117 return ctx->h_find_entity(ctx, name);
120 struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name);
121 return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
124 return xml_def_find_entity(ctx, name);
127 /* Parameter entities */
129 static struct xml_dtd_entity *
130 xml_dtd_find_pentity(struct xml_context *ctx, char *name)
132 struct xml_dtd *dtd = ctx->dtd;
133 struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name);
134 return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
139 struct xml_dtd_elems_table;
142 xml_dtd_elems_init_data(struct xml_dtd_elems_table *tab UNUSED, struct xml_dtd_elem *e)
144 slist_init(&e->attrs);
147 #define HASH_PREFIX(x) xml_dtd_elems_##x
148 #define HASH_NODE struct xml_dtd_elem
149 #define HASH_KEY_STRING name
150 #define HASH_TABLE_DYNAMIC
151 #define HASH_ZERO_FILL
152 #define HASH_WANT_FIND
153 #define HASH_WANT_LOOKUP
154 #define HASH_GIVE_ALLOC
155 #define HASH_GIVE_INIT_DATA
156 #define HASH_TABLE_ALLOC
158 #include <ucw/hashtable.h>
160 struct xml_dtd_elem *
161 xml_dtd_find_elem(struct xml_context *ctx, char *name)
163 return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL;
168 struct xml_dtd_enodes_table;
171 xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
173 return hash_pointer(parent) ^ hash_pointer(elem);
177 xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
179 return (parent1 == parent2) && (elem1 == elem2);
183 xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
185 node->parent = parent;
189 #define HASH_PREFIX(x) xml_dtd_enodes_##x
190 #define HASH_NODE struct xml_dtd_elem_node
191 #define HASH_KEY_COMPLEX(x) x parent, x elem
192 #define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
193 #define HASH_GIVE_HASHFN
195 #define HASH_GIVE_INIT_KEY
196 #define HASH_TABLE_DYNAMIC
197 #define HASH_ZERO_FILL
198 #define HASH_WANT_FIND
199 #define HASH_WANT_NEW
200 #define HASH_GIVE_ALLOC
201 #define HASH_TABLE_ALLOC
203 #include <ucw/hashtable.h>
205 /* Element attributes */
207 struct xml_dtd_attrs_table;
210 xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
212 return hash_pointer(elem) ^ hash_string(name);
216 xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
218 return (elem1 == elem2) && !strcmp(name1, name2);
222 xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
226 slist_add_tail(&elem->attrs, &attr->n);
229 #define HASH_PREFIX(x) xml_dtd_attrs_##x
230 #define HASH_NODE struct xml_dtd_attr
231 #define HASH_ZERO_FILL
232 #define HASH_TABLE_DYNAMIC
233 #define HASH_KEY_COMPLEX(x) x elem, x name
234 #define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
235 #define HASH_GIVE_HASHFN
237 #define HASH_GIVE_INIT_KEY
238 #define HASH_WANT_FIND
239 #define HASH_WANT_NEW
240 #define HASH_GIVE_ALLOC
241 #define HASH_TABLE_ALLOC
243 #include <ucw/hashtable.h>
245 struct xml_dtd_attr *
246 xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name)
248 return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL;
251 /* Enumerated attribute values */
253 struct xml_dtd_evals_table;
256 xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
258 return hash_pointer(attr) ^ hash_string(val);
262 xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
264 return (attr1 == attr2) && !strcmp(val1, val2);
268 xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
274 #define HASH_PREFIX(x) xml_dtd_evals_##x
275 #define HASH_NODE struct xml_dtd_eval
276 #define HASH_TABLE_DYNAMIC
277 #define HASH_KEY_COMPLEX(x) x attr, x val
278 #define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
279 #define HASH_GIVE_HASHFN
281 #define HASH_GIVE_INIT_KEY
282 #define HASH_WANT_FIND
283 #define HASH_WANT_NEW
284 #define HASH_GIVE_ALLOC
285 #define HASH_TABLE_ALLOC
287 #include <ucw/hashtable.h>
289 /* Enumerated attribute notations */
291 struct xml_dtd_enotns_table;
294 xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
296 return hash_pointer(attr) ^ hash_pointer(notn);
300 xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
302 return (attr1 == attr2) && (notn1 == notn2);
306 xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
312 #define HASH_PREFIX(x) xml_dtd_enotns_##x
313 #define HASH_NODE struct xml_dtd_enotn
314 #define HASH_TABLE_DYNAMIC
315 #define HASH_KEY_COMPLEX(x) x attr, x notn
316 #define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
317 #define HASH_GIVE_HASHFN
319 #define HASH_GIVE_INIT_KEY
320 #define HASH_WANT_FIND
321 #define HASH_WANT_NEW
322 #define HASH_GIVE_ALLOC
323 #define HASH_TABLE_ALLOC
325 #include <ucw/hashtable.h>
327 /* DTD initialization/cleanup */
330 xml_dtd_init(struct xml_context *ctx)
334 struct mempool *pool = mp_new(4096);
335 struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
337 xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
338 xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
339 xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
340 xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
341 xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
342 xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
343 xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
344 xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
345 xml_dtd_declare_default_entities(ctx);
349 xml_dtd_cleanup(struct xml_context *ctx)
353 mp_delete(ctx->dtd->pool);
358 xml_dtd_finish(struct xml_context *ctx)
362 // FIXME: validity checks
365 /*** Parsing functions ***/
367 /* References to parameter entities */
370 xml_parse_pe_ref(struct xml_context *ctx)
372 /* PEReference ::= '%' Name ';'
373 * Already parsed: '%' */
374 struct mempool_state state;
375 mp_save(ctx->stack, &state);
376 char *name = xml_parse_name(ctx, ctx->stack);
377 xml_parse_char(ctx, ';');
378 struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name);
380 xml_error(ctx, "Unknown entity %%%s;", name);
383 TRACE(ctx, "Pushed entity %%%s;", name);
384 mp_restore(ctx->stack, &state);
386 xml_push_entity(ctx, ent);
389 mp_restore(ctx->stack, &state);
394 xml_parse_dtd_pe(struct xml_context *ctx, uint entity_decl)
396 /* Already parsed: '%' */
400 if (!~entity_decl && (xml_peek_cat(ctx) & XML_CHAR_WHITE))
405 xml_parse_pe_ref(ctx);
406 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
409 while (xml_get_char(ctx) == '%');
415 xml_parse_dtd_white(struct xml_context *ctx, uint mandatory)
417 /* Whitespace or parameter entity,
418 * mandatory==~0U has a special maening of the whitespace before the '%' character in an parameter entity declaration */
420 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
425 if (xml_peek_char(ctx) == '%')
428 return xml_parse_dtd_pe(ctx, mandatory);
430 else if (unlikely(mandatory && !cnt))
431 xml_fatal_expected_white(ctx);
436 xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uint allow_public)
438 struct xml_dtd *dtd = ctx->dtd;
439 uint c = xml_peek_char(ctx);
442 xml_parse_seq(ctx, "SYSTEM");
443 xml_parse_dtd_white(ctx, 1);
445 *system_id = xml_parse_system_literal(ctx, dtd->pool);
449 xml_parse_seq(ctx, "PUBLIC");
450 xml_parse_dtd_white(ctx, 1);
452 *public_id = xml_parse_pubid_literal(ctx, dtd->pool);
453 if (xml_parse_dtd_white(ctx, !allow_public))
454 if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
455 *system_id = xml_parse_system_literal(ctx, dtd->pool);
458 xml_fatal(ctx, "Expected an external ID");
461 /* DTD: <!NOTATION ...> */
464 xml_parse_notation_decl(struct xml_context *ctx)
466 /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
467 * Already parsed: '<!NOTATION' */
468 TRACE(ctx, "parse_notation_decl");
469 struct xml_dtd *dtd = ctx->dtd;
470 xml_parse_dtd_white(ctx, 1);
472 struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
473 xml_parse_dtd_white(ctx, 1);
474 char *system_id, *public_id;
475 xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1);
476 xml_parse_dtd_white(ctx, 0);
477 xml_parse_char(ctx, '>');
479 if (notn->flags & XML_DTD_NOTN_DECLARED)
480 xml_warn(ctx, "Notation %s already declared", notn->name);
483 notn->flags = XML_DTD_NOTN_DECLARED;
484 notn->system_id = system_id;
485 notn->public_id = public_id;
486 slist_add_tail(&dtd->notns, ¬n->n);
491 /* DTD: <!ENTITY ...> */
494 xml_parse_entity_decl(struct xml_context *ctx)
496 /* Already parsed: '<!ENTITY' */
497 TRACE(ctx, "parse_entity_decl");
498 struct xml_dtd *dtd = ctx->dtd;
499 uint flags = ~xml_parse_dtd_white(ctx, ~0U) ? 0 : XML_DTD_ENTITY_PARAMETER;
501 xml_parse_dtd_white(ctx, 1);
502 struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool));
503 xml_parse_dtd_white(ctx, 1);
504 slist *list = flags ? &dtd->pents : &dtd->ents;
505 if (ent->flags & XML_DTD_ENTITY_DECLARED)
507 xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
508 // FIXME: should be only warning
510 uint c, sep = xml_get_char(ctx);
511 if (sep == '\'' || sep == '"')
514 * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
515 char *p = mp_start_noalign(dtd->pool, 1);
518 if ((c = xml_get_char(ctx)) == sep)
524 //xml_parse_parameter_ref(ctx);
530 if (xml_peek_char(ctx) != '#')
532 /* Bypass references to general entities */
533 struct mempool_state state;
534 mp_save(ctx->stack, &state);
535 char *n = xml_parse_name(ctx, ctx->stack);
536 xml_parse_char(ctx, ';');
539 p = mp_spread(dtd->pool, p, 3 + l);
544 mp_restore(ctx->stack, &state);
550 c = xml_parse_char_ref(ctx);
553 p = mp_spread(dtd->pool, p, 5);
554 p = utf8_32_put(p, c);
557 ent->len = p - (char *)mp_ptr(dtd->pool);
558 ent->text = mp_end(dtd->pool, p + 1);
559 slist_add_tail(list, &ent->n);
560 ent->flags = flags | XML_DTD_ENTITY_DECLARED;
564 /* External entity */
565 struct xml_dtd_notn *notn = NULL;
566 char *system_id, *public_id;
568 xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0);
569 if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>')
571 /* General external unparsed entity */
572 flags |= XML_DTD_ENTITY_UNPARSED;
573 xml_parse_seq(ctx, "NDATA");
574 xml_parse_dtd_white(ctx, 1);
575 notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
577 slist_add_tail(list, &ent->n);
578 ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL;
579 ent->system_id = system_id;
580 ent->public_id = public_id;
583 xml_parse_dtd_white(ctx, 0);
584 xml_parse_char(ctx, '>');
588 /* DTD: <!ELEMENT ...> */
591 xml_parse_element_decl(struct xml_context *ctx)
593 /* Elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
594 * Already parsed: '<!ELEMENT' */
595 struct xml_dtd *dtd = ctx->dtd;
596 xml_parse_dtd_white(ctx, 1);
597 char *name = xml_parse_name(ctx, dtd->pool);
598 xml_parse_dtd_white(ctx, 1);
599 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
600 if (elem->flags & XML_DTD_ELEM_DECLARED)
601 xml_fatal(ctx, "Element <%s> already declared", name);
603 /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
604 uint c = xml_peek_char(ctx);
607 xml_parse_seq(ctx, "EMPTY");
608 elem->type = XML_DTD_ELEM_EMPTY;
612 xml_parse_seq(ctx, "ANY");
613 elem->type = XML_DTD_ELEM_ANY;
619 xml_parse_dtd_white(ctx, 0);
620 struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
621 if (xml_peek_char(ctx) == '#')
623 /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
625 xml_parse_seq(ctx, "PCDATA");
626 elem->type = XML_DTD_ELEM_MIXED;
627 parent->type = XML_DTD_ELEM_PCDATA;
630 xml_parse_dtd_white(ctx, 0);
631 if ((c = xml_get_char(ctx)) == ')')
634 xml_fatal_expected(ctx, ')');
635 xml_parse_dtd_white(ctx, 0);
636 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
637 if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
638 xml_error(ctx, "Duplicate content '%s'", son_elem->name);
641 struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
642 slist_add_tail(&parent->sons, &son->n);
646 if (xml_peek_char(ctx) == '*')
649 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
651 else if (!slist_head(&parent->sons))
652 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
654 xml_fatal_expected(ctx, '*');
658 /* children ::= (choice | seq) ('?' | '*' | '+')?
659 * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
660 * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
661 * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
663 elem->type = XML_DTD_ELEM_CHILDREN;
664 parent->type = XML_DTD_ELEM_PCDATA;
671 xml_parse_dtd_white(ctx, 0);
672 if ((c = xml_get_char(ctx)) == ')')
675 if (parent->type == XML_DTD_ELEM_PCDATA)
676 parent->type = XML_DTD_ELEM_SEQ;
677 if ((c = xml_get_char(ctx)) == '?')
678 parent->occur = XML_DTD_ELEM_OCCUR_OPT;
680 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
682 parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
686 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
690 parent = parent->parent;
695 if (parent->type == XML_DTD_ELEM_PCDATA)
696 parent->type = XML_DTD_ELEM_OR;
697 else if (parent->type != XML_DTD_ELEM_OR)
698 xml_fatal(ctx, "Mixed operators in the list of element children");
702 if (parent->type == XML_DTD_ELEM_PCDATA)
703 parent->type = XML_DTD_ELEM_SEQ;
704 else if (parent->type != XML_DTD_ELEM_SEQ)
705 xml_fatal(ctx, "Mixed operators in the list of element children");
710 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
711 son->parent = parent;
712 slist_add_tail(&parent->sons, &son->n);
713 parent = son->parent;
714 son->type = XML_DTD_ELEM_MIXED;
720 xml_parse_dtd_white(ctx, 0);
722 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
723 // FIXME: duplicates, occurance
724 //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
725 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
726 son->parent = parent;
727 son->elem = son_elem;
728 slist_add_tail(&parent->sons, &son->n);
733 xml_fatal(ctx, "Expected element content specification");
735 xml_parse_dtd_white(ctx, 0);
736 xml_parse_char(ctx, '>');
741 xml_parse_attr_list_decl(struct xml_context *ctx)
743 /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
744 * AttDef ::= S Name S AttType S DefaultDecl
745 * Already parsed: '<!ATTLIST' */
746 struct xml_dtd *dtd = ctx->dtd;
747 xml_parse_dtd_white(ctx, 1);
748 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
750 while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
752 char *name = xml_parse_name(ctx, dtd->pool);
753 struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name);
757 xml_warn(ctx, "Duplicate attribute definition");
761 attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
762 xml_parse_dtd_white(ctx, 1);
763 if (xml_peek_char(ctx) == '(')
765 xml_skip_char(ctx); // FIXME: xml_inc/dec ?
767 attr->type = XML_ATTR_ENUM;
770 xml_parse_dtd_white(ctx, 0);
771 char *value = xml_parse_nmtoken(ctx, dtd->pool);
773 if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
774 xml_error(ctx, "Duplicate enumeration value");
776 xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
777 xml_parse_dtd_white(ctx, 0);
779 while (xml_get_char(ctx) == '|');
781 xml_parse_char(ctx, ')');
785 char *type = xml_parse_name(ctx, dtd->pool);
786 enum xml_dtd_attr_type t = XML_ATTR_CDATA;
787 if (!strcmp(type, "CDATA"))
789 else if (!strcmp(type, "ID"))
791 else if (!strcmp(type, "IDREF"))
793 else if (!strcmp(type, "IDREFS"))
795 else if (!strcmp(type, "ENTITY"))
797 else if (!strcmp(type, "ENTITIES"))
798 t = XML_ATTR_ENTITIES;
799 else if (!strcmp(type, "NMTOKEN"))
800 t = XML_ATTR_NMTOKEN;
801 else if (!strcmp(type, "NMTOKENS"))
802 t = XML_ATTR_NMTOKENS;
803 else if (!strcmp(type, "NOTATION"))
805 if (elem->type == XML_DTD_ELEM_EMPTY)
806 xml_fatal(ctx, "Empty element must not have notation attribute");
807 // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
808 t = XML_ATTR_NOTATION;
809 xml_parse_dtd_white(ctx, 1);
810 xml_parse_char(ctx, '(');
813 xml_parse_dtd_white(ctx, 0);
814 struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
816 if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
817 xml_error(ctx, "Duplicate enumerated notation");
819 xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
820 xml_parse_dtd_white(ctx, 0);
822 while (xml_get_char(ctx) == '|');
824 xml_parse_char(ctx, ')');
827 xml_fatal(ctx, "Unknown attribute type");
831 xml_parse_dtd_white(ctx, 1);
832 enum xml_dtd_attr_default def = XML_ATTR_NONE;
833 if (xml_get_char(ctx) == '#')
834 switch (xml_peek_char(ctx))
837 xml_parse_seq(ctx, "REQUIRED");
838 def = XML_ATTR_REQUIRED;
841 xml_parse_seq(ctx, "IMPLIED");
842 def = XML_ATTR_IMPLIED;
845 xml_parse_seq(ctx, "FIXED");
846 def = XML_ATTR_FIXED;
847 xml_parse_dtd_white(ctx, 1);
850 xml_fatal(ctx, "Expected a modifier for default attribute value");
854 if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
856 char *v = xml_parse_attr_value(ctx, attr);
858 attr->default_value = v;
861 attr->default_mode = def;
868 xml_skip_internal_subset(struct xml_context *ctx)
870 TRACE(ctx, "skip_internal_subset");
871 /* AlreadyParsed: '[' */
873 while ((c = xml_get_char(ctx)) != ']')
877 if ((c = xml_get_char(ctx)) == '?')
884 else if (xml_get_char(ctx) == '-')
887 xml_skip_comment(ctx);
890 while ((c = xml_get_char(ctx)) != '>')
891 if (c == '\'' || c == '"')
892 while (xml_get_char(ctx) != c);
897 /*** Validation of attribute values ***/
900 xml_check_tokens(char *value, uint first_cat, uint next_cat, uint seq)
906 p = utf8_32_get(p, &u);
907 if (!(xml_char_cat(u) & first_cat))
911 p = utf8_32_get(p, &u);
912 if (!(xml_char_cat(u) & next_cat))
924 xml_is_name(struct xml_context *ctx, char *value)
926 /* Name ::= NameStartChar (NameChar)* */
927 return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0);
931 xml_is_names(struct xml_context *ctx, char *value)
933 /* Names ::= Name (#x20 Name)* */
934 return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1);
938 xml_is_nmtoken(struct xml_context *ctx, char *value)
940 /* Nmtoken ::= (NameChar)+ */
941 return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0);
945 xml_is_nmtokens(struct xml_context *ctx, char *value)
947 /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */
948 return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1);
952 xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type)
954 xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type);
958 xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value)
960 if (dtd->type == XML_ATTR_CDATA)
962 xml_normalize_white(ctx, value);
966 if (!xml_is_name(ctx, value))
967 xml_err_attr_format(ctx, dtd, "NAME");
968 //FIXME: add to a hash table
971 if (!xml_is_name(ctx, value))
972 xml_err_attr_format(ctx, dtd, "NAME");
973 // FIXME: find in hash table (beware forward references)
975 case XML_ATTR_IDREFS:
976 if (!xml_is_names(ctx, value))
977 xml_err_attr_format(ctx, dtd, "NAMES");
980 case XML_ATTR_ENTITY:
983 case XML_ATTR_ENTITIES:
986 case XML_ATTR_NMTOKEN:
987 if (!xml_is_nmtoken(ctx, value))
988 xml_err_attr_format(ctx, dtd, "NMTOKEN");
990 case XML_ATTR_NMTOKENS:
991 if (!xml_is_nmtokens(ctx, value))
992 xml_err_attr_format(ctx, dtd, "NMTOKENS");
995 if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value))
996 xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name);
998 case XML_ATTR_NOTATION:
999 if (!xml_dtd_find_notn(ctx, value))
1000 xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name);