2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
12 #include "sherlock/sherlock.h"
13 #include "sherlock/xml/xml.h"
14 #include "sherlock/xml/dtd.h"
15 #include "sherlock/xml/common.h"
16 #include "lib/fastbuf.h"
17 #include "lib/ff-unicode.h"
18 #include "lib/unicode.h"
19 #include "lib/chartype.h"
20 #include "lib/hashfunc.h"
24 /*** Basic parsing ***/
27 xml_fatal_expected(struct xml_context *ctx, uns c)
29 if (c >= 32 && c < 128)
30 xml_fatal(ctx, "Expected '%c'", c);
32 xml_fatal(ctx, "Expected U+%04x", c);
36 xml_fatal_expected_white(struct xml_context *ctx)
38 xml_fatal(ctx, "Expected a white space");
42 xml_fatal_expected_quot(struct xml_context *ctx)
44 xml_fatal(ctx, "Expected a quotation mark");
48 xml_parse_eq(struct xml_context *ctx)
50 /* Eq ::= S? '=' S? */
51 xml_parse_white(ctx, 0);
52 xml_parse_char(ctx, '=');
53 xml_parse_white(ctx, 0);
56 /*** Names and nmtokens ***/
59 xml_parse_string(struct xml_context *ctx, struct mempool *pool, uns first_cat, uns next_cat, char *err)
61 char *p = mp_start_noalign(pool, 1);
62 if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
63 xml_fatal(ctx, "%s", err);
66 p = mp_spread(pool, p, 5);
67 p = utf8_32_put(p, xml_skip_char(ctx));
69 while (xml_peek_cat(ctx) & next_cat);
71 return mp_end(pool, p);
75 xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
77 if (unlikely(!(xml_get_cat(ctx) & first_cat)))
78 xml_fatal(ctx, "%s", err);
79 while (xml_peek_cat(ctx) & next_cat)
84 xml_parse_name(struct xml_context *ctx, struct mempool *pool)
86 /* Name ::= NameStartChar (NameChar)* */
87 return xml_parse_string(ctx, pool, ctx->cat_sname, ctx->cat_name, "Expected a name");
91 xml_skip_name(struct xml_context *ctx)
93 xml_skip_string(ctx, ctx->cat_sname, ctx->cat_name, "Expected a name");
97 xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool)
99 /* Nmtoken ::= (NameChar)+ */
100 return xml_parse_string(ctx, pool, ctx->cat_name, ctx->cat_name, "Expected a nmtoken");
103 /*** Simple literals ***/
106 xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool)
108 /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
109 char *p = mp_start_noalign(pool, 1);
110 uns q = xml_parse_quote(ctx), c;
111 while ((c = xml_get_char(ctx)) != q)
113 p = mp_spread(pool, p, 5);
114 p = utf8_32_put(p, c);
117 return mp_end(pool, p);
121 xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool)
123 /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
124 char *p = mp_start_noalign(pool, 1);
125 uns q = xml_parse_quote(ctx), c;
126 while ((c = xml_get_char(ctx)) != q)
128 if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
129 xml_fatal(ctx, "Expected a pubid character");
130 p = mp_spread(pool, p, 2);
134 return mp_end(pool, p);
140 xml_push_comment(struct xml_context *ctx)
142 TRACE(ctx, "push_comment");
143 /* Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
144 * Already parsed: '<!-' */
145 xml_parse_char(ctx, '-');
146 struct xml_node *n = xml_push_dom(ctx);
147 n->type = XML_NODE_COMMENT;
148 char *p = mp_start_noalign(ctx->pool, 6);
151 if (xml_get_char(ctx) == '-')
152 if (xml_get_char(ctx) == '-')
156 p = utf8_32_put(p, xml_last_char(ctx));
157 p = mp_spread(ctx->pool, p, 6);
159 xml_parse_char(ctx, '>');
161 n->len = p - (char *)mp_ptr(ctx->pool);
162 n->text = mp_end(ctx->pool, p + 1);
163 if ((ctx->flags & XML_REPORT_COMMENTS) && ctx->h_comment)
168 xml_pop_comment(struct xml_context *ctx)
170 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_COMMENTS));
172 TRACE(ctx, "pop_comment");
176 xml_skip_comment(struct xml_context *ctx)
178 TRACE(ctx, "skip_comment");
179 xml_parse_char(ctx, '-');
180 while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
181 xml_parse_char(ctx, '>');
185 /*** Processing instructions ***/
188 xml_push_pi(struct xml_context *ctx)
190 TRACE(ctx, "push_pi");
191 /* Parses a PI to ctx->value and ctx->name:
192 * PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
193 * PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
194 * Already parsed: '<?' */
195 struct xml_node *n = xml_push_dom(ctx);
196 n->type = XML_NODE_PI;
197 n->name = xml_parse_name(ctx, ctx->pool);
198 if (unlikely(!strcasecmp(n->name, "xml")))
199 xml_error(ctx, "Reserved PI target");
200 char *p = mp_start_noalign(ctx->pool, 5);
201 if (!xml_parse_white(ctx, 0))
202 xml_parse_seq(ctx, "?>");
206 if (xml_get_char(ctx) == '?')
207 if (xml_peek_char(ctx) == '>')
215 p = utf8_32_put(p, xml_last_char(ctx));
216 p = mp_spread(ctx->pool, p, 5);
219 n->len = p - (char *)mp_ptr(ctx->pool);
220 n->text = mp_end(ctx->pool, p + 1);
221 if ((ctx->flags & XML_REPORT_PIS) && ctx->h_pi)
226 xml_pop_pi(struct xml_context *ctx)
228 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_PIS));
230 TRACE(ctx, "pop_pi");
234 xml_skip_pi(struct xml_context *ctx)
236 TRACE(ctx, "skip_pi");
237 if (ctx->flags & XML_VALIDATING)
239 struct mempool_state state;
240 mp_save(ctx->stack, &state);
241 if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml")))
242 xml_error(ctx, "Reserved PI target");
243 mp_restore(ctx->stack, &state);
244 if (!xml_parse_white(ctx, 0))
246 xml_parse_seq(ctx, "?>");
252 if (xml_get_char(ctx) == '?')
253 if (xml_peek_char(ctx) == '>')
259 /*** Character data ***/
262 xml_flush_chars(struct xml_context *ctx)
264 struct fastbuf *fb = &ctx->chars;
265 if (fb->bufend == fb->buffer)
267 TRACE(ctx, "flush_chars");
268 struct xml_node *n = ctx->node;
269 n->text = xml_end_chars(ctx, &n->len);
270 n->len = fb->bufend - fb->buffer;
271 if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_chars)
277 xml_pop_chars(struct xml_context *ctx)
279 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
280 TRACE(ctx, "pop_chars");
284 xml_append_chars(struct xml_context *ctx)
286 TRACE(ctx, "append_chars");
287 struct fastbuf *out = &ctx->chars;
288 while (xml_get_char(ctx) != '<')
289 if (xml_last_char(ctx) == '&')
295 bput_utf8_32(out, xml_last_char(ctx));
299 /*** CDATA sections ***/
302 xml_push_cdata(struct xml_context *ctx)
304 TRACE(ctx, "push_cdata");
305 /* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
306 * Already parsed: '<![' */
307 xml_parse_seq(ctx, "CDATA[");
308 struct xml_node *n = xml_push_dom(ctx);
309 n->type = XML_NODE_CHARS;
310 char *p = mp_start_noalign(ctx->pool, 7);
313 if (xml_get_char(ctx) == ']')
315 if (xml_get_char(ctx) == ']')
316 if (xml_get_char(ctx) == '>')
322 p = utf8_32_put(p, xml_last_char(ctx));
323 p = mp_spread(ctx->pool, p, 7);
326 n->len = p - (char *)mp_ptr(ctx->pool);
327 n->text = mp_end(ctx->pool, p + 1);
328 if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_cdata)
333 xml_pop_cdata(struct xml_context *ctx)
335 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
337 TRACE(ctx, "pop_cdata");
341 xml_append_cdata(struct xml_context *ctx)
343 TRACE(ctx, "append_cdata");
344 xml_parse_seq(ctx, "CDATA[");
345 struct fastbuf *out = &ctx->chars;
348 if (xml_get_char(ctx) == ']')
350 if (xml_get_char(ctx) == ']')
351 if (xml_get_char(ctx) == '>')
357 bput_utf8_32(out, xml_last_char(ctx));
363 xml_skip_cdata(struct xml_context *ctx)
365 TRACE(ctx, "skip_cdata");
366 xml_parse_seq(ctx, "CDATA[");
367 while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
371 /*** Character references ***/
374 xml_parse_char_ref(struct xml_context *ctx)
376 TRACE(ctx, "parse_char_ref");
377 /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
378 * Already parsed: '&#' */
380 if (xml_get_char(ctx) == 'x')
382 if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
384 xml_error(ctx, "Expected a hexadecimal value of character reference");
389 v = (v << 4) + Cxvalue(xml_last_char(ctx));
391 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
395 if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
397 xml_error(ctx, "Expected a numeric value of character reference");
402 v = v * 10 + xml_last_char(ctx) - '0';
404 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
406 uns cat = xml_char_cat(v);
407 if (!(cat & ctx->cat_unrestricted))
409 xml_error(ctx, "Character reference out of range");
412 if (xml_last_char(ctx) == ';')
417 xml_error(ctx, "Expected ';'");
419 while (xml_last_char(ctx) != ';')
422 return UNI_REPLACEMENT;
425 /*** References to general entities ***/
428 xml_parse_ref(struct xml_context *ctx)
430 /* Reference ::= EntityRef | CharRef
431 * EntityRef ::= '&' Name ';'
432 * Already parsed: '&' */
433 struct fastbuf *out = &ctx->chars;
434 if (xml_peek_char(ctx) == '#')
437 bput_utf8_32(out, xml_parse_char_ref(ctx));
441 TRACE(ctx, "parse_ge_ref");
442 struct mempool_state state;
443 mp_save(ctx->stack, &state);
444 char *name = xml_parse_name(ctx, ctx->stack);
445 xml_parse_char(ctx, ';');
446 struct xml_dtd_ent *ent = xml_dtd_find_ent(ctx, name);
449 xml_error(ctx, "Unknown entity &%s;", name);
454 else if (ent->flags & XML_DTD_ENT_TRIVIAL)
456 TRACE(ctx, "Trivial entity &%s;", name);
457 bwrite(out, ent->text, ent->len);
461 TRACE(ctx, "Pushed entity &%s;", name);
462 mp_restore(ctx->stack, &state);
464 xml_push_entity(ctx, ent);
467 mp_restore(ctx->stack, &state);
472 /*** Attribute values ***/
475 xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
477 TRACE(ctx, "parse_attr_value");
478 /* AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
480 * -- copying from ctx->chars to ctx->pool is not necessary, we could directly write to ctx->pool
481 * -- berare quotes inside parased entities
482 * -- check value constrains / normalize value */
483 struct mempool_state state;
484 uns quote = xml_parse_quote(ctx);
485 mp_save(ctx->stack, &state);
486 xml_start_chars(ctx);
487 struct fastbuf *out = &ctx->chars;
490 uns c = xml_get_char(ctx);
496 else if (c == quote) // FIXME: beware quotes inside parsed entities
499 xml_error(ctx, "Attribute value must not contain '<'");
500 else if (xml_last_cat(ctx) & XML_CHAR_WHITE)
503 bput_utf8_32(out, c);
505 mp_restore(ctx->stack, &state);
507 return xml_end_chars(ctx, &len);
512 struct xml_attrs_table;
515 xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n)
517 return hash_pointer(e) ^ hash_string(n);
521 xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2)
523 return (e1 == e2) && !strcmp(n1, n2);
527 xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name)
532 slist_add_tail(&e->attrs, &a->n);
535 #define HASH_PREFIX(x) xml_attrs_##x
536 #define HASH_NODE struct xml_attr
537 #define HASH_KEY_COMPLEX(x) x elem, x name
538 #define HASH_KEY_DECL struct xml_node *elem, char *name
539 #define HASH_TABLE_DYNAMIC
541 #define HASH_GIVE_HASHFN
542 #define HASH_GIVE_INIT_KEY
543 #define HASH_WANT_CLEANUP
544 #define HASH_WANT_REMOVE
545 #define HASH_WANT_LOOKUP
546 #define HASH_WANT_FIND
547 #define HASH_GIVE_ALLOC
549 #include "lib/hashtable.h"
552 xml_parse_attr(struct xml_context *ctx)
554 TRACE(ctx, "parse_attr");
555 /* Attribute ::= Name Eq AttValue */
557 * -- memory management
559 struct xml_node *e = ctx->node;
560 char *n = xml_parse_name(ctx, ctx->pool);
561 struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n);
563 char *v = xml_parse_attr_value(ctx, NULL);
565 xml_error(ctx, "Attribute %s is not unique", n);
571 xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name)
573 return xml_attrs_find(ctx->tab_attrs, node, name);
577 xml_attrs_table_init(struct xml_context *ctx)
579 xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
583 xml_attrs_table_cleanup(struct xml_context *ctx)
585 xml_attrs_cleanup(ctx->tab_attrs);
591 xml_push_element(struct xml_context *ctx)
593 TRACE(ctx, "push_element");
594 /* EmptyElemTag | STag
595 * EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
596 * STag ::= '<' Name (S Attribute)* S? '>'
597 * Already parsed: '<' */
598 struct xml_node *e = xml_push_dom(ctx);
599 clist_init(&e->sons);
600 e->type = XML_NODE_ELEM;
601 e->name = xml_parse_name(ctx, ctx->pool);
602 slist_init(&e->attrs);
606 if (ctx->doctype && strcmp(e->name, ctx->doctype))
607 xml_error(ctx, "The root element %s does not match the document type %s", e->name, ctx->doctype);
611 uns white = xml_parse_white(ctx, 0);
612 uns c = xml_get_char(ctx);
615 xml_parse_char(ctx, '>');
616 ctx->flags |= XML_EMPTY_ELEM_TAG;
622 xml_fatal_expected_white(ctx);
626 if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag)
631 xml_pop_element(struct xml_context *ctx)
633 TRACE(ctx, "pop_element");
634 if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag)
636 struct xml_node *e = ctx->node;
637 uns free = !(ctx->flags & XML_ALLOC_TAGS);
642 /* Restore hash table of attributes */
643 SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
644 xml_attrs_remove(ctx->tab_attrs, a);
646 while (n = clist_head(&e->sons))
648 if (n->type == XML_NODE_ELEM)
650 SLIST_FOR_EACH(struct xml_attr *, a, n->attrs)
651 xml_attrs_remove(ctx->tab_attrs, a);
652 clist_insert_list_after(&n->sons, &n->n);
657 xml_pop_dom(ctx, free);
662 xml_parse_etag(struct xml_context *ctx)
664 /* ETag ::= '</' Name S? '>'
665 * Already parsed: '<' */
666 struct xml_node *e = ctx->node;
672 n = utf8_32_get(n, &c);
673 if (xml_get_char(ctx) != c)
676 xml_parse_white(ctx, 0);
677 if (xml_get_char(ctx) != '>')
680 xml_error(ctx, "Invalid ETag, expected </%s>", e->name);
681 while (xml_get_char(ctx) != '>');
686 /*** Document type declaration ***/
689 xml_parse_doctype_decl(struct xml_context *ctx)
691 TRACE(ctx, "parse_doctype_decl");
692 /* doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
693 * Already parsed: '<!'
694 * Terminated before '[' or '>' */
696 xml_fatal(ctx, "Multiple document types not allowed");
697 xml_parse_seq(ctx, "DOCTYPE");
698 xml_parse_white(ctx, 1);
699 ctx->doctype = xml_parse_name(ctx, ctx->pool);
700 TRACE(ctx, "doctype=%s", ctx->doctype);
702 if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P'))
706 xml_parse_seq(ctx, "SYSTEM");
707 xml_parse_white(ctx, 1);
708 ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
712 xml_parse_seq(ctx, "PUBLIC");
713 xml_parse_white(ctx, 1);
714 ctx->public_id = xml_parse_pubid_literal(ctx, ctx->pool);
715 xml_parse_white(ctx, 1);
716 ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
718 xml_parse_white(ctx, 0);
719 ctx->flags |= XML_HAS_EXTERNAL_SUBSET;
721 if (xml_peek_char(ctx) == '[')
722 ctx->flags |= XML_HAS_INTERNAL_SUBSET;
723 if (ctx->h_doctype_decl)
724 ctx->h_doctype_decl(ctx);
729 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
731 /* DTD: Internal subset */
734 xml_parse_internal_subset(struct xml_context *ctx)
736 // FIXME: comments/pi have no parent
738 * intSubset :== (markupdecl | DeclSep)
739 * Already parsed: ']' */
742 xml_parse_white(ctx, 0);
743 uns c = xml_get_char(ctx);
746 if ((c = xml_get_char(ctx)) == '!')
747 switch (c = xml_get_char(ctx))
750 xml_push_comment(ctx);
751 xml_pop_comment(ctx);
754 xml_parse_seq(ctx, "OTATION");
755 xml_parse_notation_decl(ctx);
758 if ((c = xml_get_char(ctx)) == 'N')
760 xml_parse_seq(ctx, "TITY");
761 xml_parse_entity_decl(ctx);
765 xml_parse_seq(ctx, "EMENT");
766 xml_parse_element_decl(ctx);
772 xml_parse_seq(ctx, "TTLIST");
773 xml_parse_attr_list_decl(ctx);
786 xml_parse_pe_ref(ctx);
796 xml_fatal(ctx, "Invalid markup in the internal subset");
799 /*** The State Machine ***/
802 xml_next(struct xml_context *ctx)
804 /* A nasty state machine */
806 #define PULL(x) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##x; case XML_STATE_##x: ; } while (0)
807 #define PULL_STATE(x, s) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##s, XML_STATE_##x; case XML_STATE_##s: ; } while (0)
809 TRACE(ctx, "xml_next (state=%u)", ctx->state);
811 ctx->throw_buf = &throw_buf;
812 if (setjmp(throw_buf))
815 if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
817 TRACE(ctx, "raised fatal error");
818 return ctx->state = XML_STATE_EOF;
823 case XML_STATE_START:
824 TRACE(ctx, "entering prolog");
825 if (ctx->h_document_start)
826 ctx->h_document_start(ctx);
830 ctx->h_xml_decl(ctx);
833 /* Misc* (doctypedecl Misc*)? */
836 xml_parse_white(ctx, 0);
837 xml_parse_char(ctx, '<');
838 if ((c = xml_get_char(ctx)) == '?')
839 /* Processing intruction */
840 if (!(ctx->flags & XML_REPORT_PIS))
845 PULL_STATE(PI, PROLOG_PI);
850 /* Found the root tag */
854 else if (xml_get_char(ctx) == '-')
855 if (!(ctx->flags & XML_REPORT_COMMENTS))
856 xml_skip_comment(ctx);
859 xml_push_comment(ctx);
860 PULL_STATE(COMMENT, PROLOG_COMMENT);
861 xml_pop_comment(ctx);
867 xml_parse_doctype_decl(ctx);
869 if (xml_peek_char(ctx) == '[')
871 // FIXME: ability to skip the subset
875 if (ctx->h_dtd_start)
876 ctx->h_dtd_start(ctx);
877 xml_parse_internal_subset(ctx);
878 // FIXME: external subset
881 xml_parse_white(ctx, 0);
883 xml_parse_char(ctx, '>');
887 case XML_STATE_CHARS:
891 if (xml_peek_char(ctx) != '<')
894 xml_append_chars(ctx);
902 if ((c = xml_get_char(ctx)) == '?')
905 if (!(ctx->flags & (XML_REPORT_PIS | XML_ALLOC_PIS)))
909 if (xml_flush_chars(ctx))
911 PULL_STATE(CHARS, CHARS_BEFORE_PI);
921 if ((c = xml_get_char(ctx)) == '-')
924 if (!(ctx->flags & (XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS)))
925 xml_skip_comment(ctx);
928 if (xml_flush_chars(ctx))
930 PULL_STATE(CHARS, CHARS_BEFORE_COMMENT);
933 xml_push_comment(ctx);
935 xml_pop_comment(ctx);
941 if (!(ctx->flags & XML_UNFOLD_CDATA))
942 xml_append_cdata(ctx);
945 if (xml_flush_chars(ctx))
947 PULL_STATE(CHARS, CHARS_BEFORE_CDATA);
956 xml_fatal(ctx, "Unexpected character after '<!'");
960 /* STag | EmptyElemTag */
962 if (xml_flush_chars(ctx))
964 PULL_STATE(CHARS, CHARS_BEFORE_STAG);
968 xml_push_element(ctx);
970 if (ctx->flags & XML_EMPTY_ELEM_TAG)
977 if (xml_flush_chars(ctx))
979 PULL_STATE(CHARS, CHARS_BEFORE_ETAG);
986 xml_pop_element(ctx);
994 TRACE(ctx, "entering epilog");
997 /* Epilog whitespace is the only place, where a valid document can reach EOF */
998 if (setjmp(throw_buf))
999 if (ctx->err_code == XML_ERR_EOF)
1001 TRACE(ctx, "reached EOF");
1002 ctx->state = XML_STATE_EOF;
1003 if (ctx->h_document_end)
1004 ctx->h_document_end(ctx);
1007 ctx->err_msg = NULL;
1008 return XML_STATE_EOF;
1012 xml_parse_white(ctx, 0);
1013 if (setjmp(throw_buf))
1017 xml_parse_char(ctx, '<');
1019 if ((c = xml_get_char(ctx)) == '?')
1020 /* Processing instruction */
1021 if (!(ctx->flags & XML_REPORT_PIS))
1026 PULL_STATE(PI, EPILOG_PI);
1031 xml_parse_char(ctx, '-');
1033 if (!(ctx->flags & XML_REPORT_COMMENTS))
1034 xml_skip_comment(ctx);
1037 xml_push_comment(ctx);
1038 PULL_STATE(COMMENT, EPILOG_COMMENT);
1039 xml_pop_comment(ctx);
1043 xml_fatal(ctx, "Syntax error in the epilog");
1051 xml_parse(struct xml_context *ctx)
1055 return ctx->err_code;