2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
12 #include "sherlock/sherlock.h"
13 #include "sherlock/xml/xml.h"
14 #include "sherlock/xml/dtd.h"
15 #include "sherlock/xml/common.h"
16 #include "lib/fastbuf.h"
17 #include "lib/ff-unicode.h"
18 #include "lib/unicode.h"
19 #include "lib/chartype.h"
20 #include "lib/hashfunc.h"
24 /*** Basic parsing ***/
27 xml_fatal_expected(struct xml_context *ctx, uns c)
29 if (c >= 32 && c < 128)
30 xml_fatal(ctx, "Expected '%c'", c);
32 xml_fatal(ctx, "Expected U+%04x", c);
36 xml_fatal_expected_white(struct xml_context *ctx)
38 xml_fatal(ctx, "Expected a white space");
42 xml_fatal_expected_quot(struct xml_context *ctx)
44 xml_fatal(ctx, "Expected a quotation mark");
48 xml_parse_eq(struct xml_context *ctx)
50 /* Eq ::= S? '=' S? */
51 xml_parse_white(ctx, 0);
52 xml_parse_char(ctx, '=');
53 xml_parse_white(ctx, 0);
56 /*** Names and nmtokens ***/
59 xml_parse_string(struct xml_context *ctx, struct mempool *pool, uns first_cat, uns next_cat, char *err)
61 char *p = mp_start_noalign(pool, 1);
62 if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
63 xml_fatal(ctx, "%s", err);
66 p = mp_spread(pool, p, 5);
67 p = utf8_32_put(p, xml_skip_char(ctx));
69 while (xml_peek_cat(ctx) & next_cat);
71 return mp_end(pool, p);
75 xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
77 if (unlikely(!(xml_get_cat(ctx) & first_cat)))
78 xml_fatal(ctx, "%s", err);
79 while (xml_peek_cat(ctx) & next_cat)
84 xml_parse_name(struct xml_context *ctx, struct mempool *pool)
86 /* Name ::= NameStartChar (NameChar)* */
87 return xml_parse_string(ctx, pool,
88 !(ctx->flags & XML_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
89 !(ctx->flags & XML_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
94 xml_skip_name(struct xml_context *ctx)
97 !(ctx->flags & XML_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
98 !(ctx->flags & XML_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
103 xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool)
105 /* Nmtoken ::= (NameChar)+ */
106 uns cat = !(ctx->flags & XML_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1;
107 return xml_parse_string(ctx, pool, cat, cat, "Expected a nmtoken");
110 /*** Simple literals ***/
113 xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool)
115 /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
116 char *p = mp_start_noalign(pool, 1);
117 uns q = xml_parse_quote(ctx), c;
118 while ((c = xml_get_char(ctx)) != q)
120 p = mp_spread(pool, p, 5);
121 p = utf8_32_put(p, c);
124 return mp_end(pool, p);
128 xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool)
130 /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
131 char *p = mp_start_noalign(pool, 1);
132 uns q = xml_parse_quote(ctx), c;
133 while ((c = xml_get_char(ctx)) != q)
135 if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
136 xml_fatal(ctx, "Expected a pubid character");
137 p = mp_spread(pool, p, 2);
141 return mp_end(pool, p);
147 xml_push_comment(struct xml_context *ctx)
149 TRACE(ctx, "push_comment");
150 /* Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
151 * Already parsed: '<!-' */
152 xml_parse_char(ctx, '-');
153 struct xml_node *n = xml_push_dom(ctx);
154 n->type = XML_NODE_COMMENT;
155 char *p = mp_start_noalign(ctx->pool, 6);
158 if (xml_get_char(ctx) == '-')
159 if (xml_get_char(ctx) == '-')
163 p = utf8_32_put(p, xml_last_char(ctx));
164 p = mp_spread(ctx->pool, p, 6);
166 xml_parse_char(ctx, '>');
168 n->len = p - (char *)mp_ptr(ctx->pool);
169 n->text = mp_end(ctx->pool, p + 1);
170 if ((ctx->flags & XML_REPORT_COMMENTS) && ctx->h_comment)
175 xml_pop_comment(struct xml_context *ctx)
177 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_COMMENTS));
179 TRACE(ctx, "pop_comment");
183 xml_skip_comment(struct xml_context *ctx)
185 TRACE(ctx, "skip_comment");
186 xml_parse_char(ctx, '-');
187 while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
188 xml_parse_char(ctx, '>');
192 /*** Processing instructions ***/
195 xml_push_pi(struct xml_context *ctx)
197 TRACE(ctx, "push_pi");
198 /* Parses a PI to ctx->value and ctx->name:
199 * PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
200 * PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
201 * Already parsed: '<?' */
202 struct xml_node *n = xml_push_dom(ctx);
203 n->type = XML_NODE_PI;
204 n->name = xml_parse_name(ctx, ctx->pool);
205 if (unlikely(!strcasecmp(n->name, "xml")))
206 xml_error(ctx, "Reserved PI target");
207 char *p = mp_start_noalign(ctx->pool, 5);
208 if (!xml_parse_white(ctx, 0))
209 xml_parse_seq(ctx, "?>");
213 if (xml_get_char(ctx) == '?')
214 if (xml_peek_char(ctx) == '>')
222 p = utf8_32_put(p, xml_last_char(ctx));
223 p = mp_spread(ctx->pool, p, 5);
226 n->len = p - (char *)mp_ptr(ctx->pool);
227 n->text = mp_end(ctx->pool, p + 1);
228 if ((ctx->flags & XML_REPORT_PIS) && ctx->h_pi)
233 xml_pop_pi(struct xml_context *ctx)
235 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_PIS));
237 TRACE(ctx, "pop_pi");
241 xml_skip_pi(struct xml_context *ctx)
243 TRACE(ctx, "skip_pi");
244 if (ctx->flags & XML_VALIDATING)
246 struct mempool_state state;
247 mp_save(ctx->stack, &state);
248 if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml")))
249 xml_error(ctx, "Reserved PI target");
250 mp_restore(ctx->stack, &state);
251 if (!xml_parse_white(ctx, 0))
253 xml_parse_seq(ctx, "?>");
259 if (xml_get_char(ctx) == '?')
260 if (xml_peek_char(ctx) == '>')
266 /*** Character data ***/
269 xml_flush_chars(struct xml_context *ctx)
271 struct fastbuf *fb = &ctx->chars;
272 if (fb->bufend == fb->buffer)
274 TRACE(ctx, "flush_chars");
275 struct xml_node *n = ctx->node;
276 n->text = xml_end_chars(ctx, &n->len);
277 n->len = fb->bufend - fb->buffer;
278 if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_chars)
284 xml_pop_chars(struct xml_context *ctx)
286 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
287 TRACE(ctx, "pop_chars");
291 xml_append_chars(struct xml_context *ctx)
293 TRACE(ctx, "append_chars");
294 struct fastbuf *out = &ctx->chars;
295 while (xml_get_char(ctx) != '<')
296 if (xml_last_char(ctx) == '&')
302 bput_utf8_32(out, xml_last_char(ctx));
306 /*** CDATA sections ***/
309 xml_push_cdata(struct xml_context *ctx)
311 TRACE(ctx, "push_cdata");
312 /* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
313 * Already parsed: '<![' */
314 xml_parse_seq(ctx, "CDATA[");
315 struct xml_node *n = xml_push_dom(ctx);
316 n->type = XML_NODE_CHARS;
317 char *p = mp_start_noalign(ctx->pool, 7);
320 if (xml_get_char(ctx) == ']')
322 if (xml_get_char(ctx) == ']')
323 if (xml_get_char(ctx) == '>')
329 p = utf8_32_put(p, xml_last_char(ctx));
330 p = mp_spread(ctx->pool, p, 7);
333 n->len = p - (char *)mp_ptr(ctx->pool);
334 n->text = mp_end(ctx->pool, p + 1);
335 if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_cdata)
340 xml_pop_cdata(struct xml_context *ctx)
342 xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
344 TRACE(ctx, "pop_cdata");
348 xml_append_cdata(struct xml_context *ctx)
350 TRACE(ctx, "append_cdata");
351 xml_parse_seq(ctx, "CDATA[");
352 struct fastbuf *out = &ctx->chars;
355 if (xml_get_char(ctx) == ']')
357 if (xml_get_char(ctx) == ']')
358 if (xml_get_char(ctx) == '>')
364 bput_utf8_32(out, xml_last_char(ctx));
370 xml_skip_cdata(struct xml_context *ctx)
372 TRACE(ctx, "skip_cdata");
373 xml_parse_seq(ctx, "CDATA[");
374 while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
378 /*** Character references ***/
381 xml_parse_char_ref(struct xml_context *ctx)
383 TRACE(ctx, "parse_char_ref");
384 /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
385 * Already parsed: '&#' */
387 if (xml_get_char(ctx) == 'x')
389 if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
391 xml_error(ctx, "Expected a hexadecimal value of character reference");
396 v = (v << 4) + Cxvalue(xml_last_char(ctx));
398 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
402 if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
404 xml_error(ctx, "Expected a numeric value of character reference");
409 v = v * 10 + xml_last_char(ctx) - '0';
411 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
413 uns cat = xml_char_cat(v);
414 if (!(cat & XML_CHAR_UNRESTRICTED_1_1) && ((ctx->flags & XML_VERSION_1_1) || !(cat & XML_CHAR_VALID_1_0)))
416 xml_error(ctx, "Character reference out of range");
419 if (xml_last_char(ctx) == ';')
424 xml_error(ctx, "Expected ';'");
426 while (xml_last_char(ctx) != ';')
429 return UNI_REPLACEMENT;
432 /*** References to general entities ***/
435 xml_parse_ref(struct xml_context *ctx)
437 /* Reference ::= EntityRef | CharRef
438 * EntityRef ::= '&' Name ';'
439 * Already parsed: '&' */
440 struct fastbuf *out = &ctx->chars;
441 if (xml_peek_char(ctx) == '#')
444 bput_utf8_32(out, xml_parse_char_ref(ctx));
448 TRACE(ctx, "parse_ge_ref");
449 struct mempool_state state;
450 mp_save(ctx->stack, &state);
451 char *name = xml_parse_name(ctx, ctx->stack);
452 xml_parse_char(ctx, ';');
453 struct xml_dtd_ent *ent = xml_dtd_find_ent(ctx, name);
456 xml_error(ctx, "Unknown entity &%s;", name);
461 else if (ent->flags & XML_DTD_ENT_TRIVIAL)
463 TRACE(ctx, "Trivial entity &%s;", name);
464 bwrite(out, ent->text, ent->len);
468 TRACE(ctx, "Pushed entity &%s;", name);
469 mp_restore(ctx->stack, &state);
471 xml_push_entity(ctx, ent);
474 mp_restore(ctx->stack, &state);
479 /*** Attribute values ***/
482 xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
484 TRACE(ctx, "parse_attr_value");
485 /* AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
487 * -- copying from ctx->chars to ctx->pool is not necessary, we could directly write to ctx->pool
488 * -- berare quotes inside parased entities
489 * -- check value constrains / normalize value */
490 struct mempool_state state;
491 uns quote = xml_parse_quote(ctx);
492 mp_save(ctx->stack, &state);
493 xml_start_chars(ctx);
494 struct fastbuf *out = &ctx->chars;
497 uns c = xml_get_char(ctx);
503 else if (c == quote) // FIXME: beware quotes inside parsed entities
506 xml_error(ctx, "Attribute value must not contain '<'");
507 else if (xml_last_cat(ctx) & XML_CHAR_WHITE)
510 bput_utf8_32(out, c);
512 mp_restore(ctx->stack, &state);
514 return xml_end_chars(ctx, &len);
519 struct xml_attrs_table;
522 xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n)
524 return hash_pointer(e) ^ hash_string(n);
528 xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2)
530 return (e1 == e2) && !strcmp(n1, n2);
534 xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name)
539 slist_add_tail(&e->attrs, &a->n);
542 #define HASH_PREFIX(x) xml_attrs_##x
543 #define HASH_NODE struct xml_attr
544 #define HASH_KEY_COMPLEX(x) x elem, x name
545 #define HASH_KEY_DECL struct xml_node *elem, char *name
546 #define HASH_TABLE_DYNAMIC
548 #define HASH_GIVE_HASHFN
549 #define HASH_GIVE_INIT_KEY
550 #define HASH_WANT_CLEANUP
551 #define HASH_WANT_REMOVE
552 #define HASH_WANT_LOOKUP
553 #define HASH_WANT_FIND
554 #define HASH_GIVE_ALLOC
556 #include "lib/hashtable.h"
559 xml_parse_attr(struct xml_context *ctx)
561 TRACE(ctx, "parse_attr");
562 /* Attribute ::= Name Eq AttValue */
564 * -- memory management
566 struct xml_node *e = ctx->node;
567 char *n = xml_parse_name(ctx, ctx->pool);
568 struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n);
570 char *v = xml_parse_attr_value(ctx, NULL);
572 xml_error(ctx, "Attribute %s is not unique", n);
578 xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name)
580 return xml_attrs_find(ctx->tab_attrs, node, name);
584 xml_attrs_table_init(struct xml_context *ctx)
586 xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
590 xml_attrs_table_cleanup(struct xml_context *ctx)
592 xml_attrs_cleanup(ctx->tab_attrs);
598 xml_push_element(struct xml_context *ctx)
600 TRACE(ctx, "push_element");
601 /* EmptyElemTag | STag
602 * EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
603 * STag ::= '<' Name (S Attribute)* S? '>'
604 * Already parsed: '<' */
605 struct xml_node *e = xml_push_dom(ctx);
606 clist_init(&e->sons);
607 e->type = XML_NODE_ELEM;
608 e->name = xml_parse_name(ctx, ctx->pool);
609 slist_init(&e->attrs);
613 if (ctx->doctype && strcmp(e->name, ctx->doctype))
614 xml_error(ctx, "The root element %s does not match the document type %s", e->name, ctx->doctype);
618 uns white = xml_parse_white(ctx, 0);
619 uns c = xml_get_char(ctx);
622 xml_parse_char(ctx, '>');
623 ctx->flags |= XML_EMPTY_ELEM_TAG;
629 xml_fatal_expected_white(ctx);
633 if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag)
638 xml_pop_element(struct xml_context *ctx)
640 TRACE(ctx, "pop_element");
641 if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag)
643 struct xml_node *e = ctx->node;
644 uns free = !(ctx->flags & XML_ALLOC_TAGS);
649 /* Restore hash table of attributes */
650 SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
651 xml_attrs_remove(ctx->tab_attrs, a);
653 while (n = clist_head(&e->sons))
655 if (n->type == XML_NODE_ELEM)
657 SLIST_FOR_EACH(struct xml_attr *, a, n->attrs)
658 xml_attrs_remove(ctx->tab_attrs, a);
659 clist_insert_list_after(&n->sons, &n->n);
664 xml_pop_dom(ctx, free);
669 xml_parse_etag(struct xml_context *ctx)
671 /* ETag ::= '</' Name S? '>'
672 * Already parsed: '<' */
673 struct xml_node *e = ctx->node;
679 n = utf8_32_get(n, &c);
680 if (xml_get_char(ctx) != c)
683 xml_parse_white(ctx, 0);
684 if (xml_get_char(ctx) != '>')
687 xml_error(ctx, "Invalid ETag, expected </%s>", e->name);
688 while (xml_get_char(ctx) != '>');
693 /*** Document type declaration ***/
696 xml_parse_doctype_decl(struct xml_context *ctx)
698 TRACE(ctx, "parse_doctype_decl");
699 /* doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
700 * Already parsed: '<!'
701 * Terminated before '[' or '>' */
703 xml_fatal(ctx, "Multiple document types not allowed");
704 xml_parse_seq(ctx, "DOCTYPE");
705 xml_parse_white(ctx, 1);
706 ctx->doctype = xml_parse_name(ctx, ctx->pool);
707 TRACE(ctx, "doctype=%s", ctx->doctype);
709 if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P'))
713 xml_parse_seq(ctx, "SYSTEM");
714 xml_parse_white(ctx, 1);
715 ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
719 xml_parse_seq(ctx, "PUBLIC");
720 xml_parse_white(ctx, 1);
721 ctx->public_id = xml_parse_pubid_literal(ctx, ctx->pool);
722 xml_parse_white(ctx, 1);
723 ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
725 xml_parse_white(ctx, 0);
726 ctx->flags |= XML_HAS_EXTERNAL_SUBSET;
728 if (xml_peek_char(ctx) == '[')
729 ctx->flags |= XML_HAS_INTERNAL_SUBSET;
730 if (ctx->h_doctype_decl)
731 ctx->h_doctype_decl(ctx);
736 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
738 /* DTD: Internal subset */
741 xml_parse_internal_subset(struct xml_context *ctx)
743 // FIXME: comments/pi have no parent
745 * intSubset :== (markupdecl | DeclSep)
746 * Already parsed: ']' */
749 xml_parse_white(ctx, 0);
750 uns c = xml_get_char(ctx);
753 if ((c = xml_get_char(ctx)) == '!')
754 switch (c = xml_get_char(ctx))
757 xml_push_comment(ctx);
758 xml_pop_comment(ctx);
761 xml_parse_seq(ctx, "OTATION");
762 xml_parse_notation_decl(ctx);
765 if ((c = xml_get_char(ctx)) == 'N')
767 xml_parse_seq(ctx, "TITY");
768 xml_parse_entity_decl(ctx);
772 xml_parse_seq(ctx, "EMENT");
773 xml_parse_element_decl(ctx);
779 xml_parse_seq(ctx, "TTLIST");
780 xml_parse_attr_list_decl(ctx);
793 xml_parse_pe_ref(ctx);
803 xml_fatal(ctx, "Invalid markup in the internal subset");
806 /*** The State Machine ***/
809 xml_next(struct xml_context *ctx)
811 /* A nasty state machine */
813 #define PULL(x) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##x; case XML_STATE_##x: ; } while (0)
814 #define PULL_STATE(x, s) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##s, XML_STATE_##x; case XML_STATE_##s: ; } while (0)
816 TRACE(ctx, "xml_next (state=%u)", ctx->state);
818 ctx->throw_buf = &throw_buf;
819 if (setjmp(throw_buf))
822 if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
824 TRACE(ctx, "raised fatal error");
825 return ctx->state = XML_STATE_EOF;
830 case XML_STATE_START:
831 TRACE(ctx, "entering prolog");
832 if (ctx->h_document_start)
833 ctx->h_document_start(ctx);
837 ctx->h_xml_decl(ctx);
840 /* Misc* (doctypedecl Misc*)? */
843 xml_parse_white(ctx, 0);
844 xml_parse_char(ctx, '<');
845 if ((c = xml_get_char(ctx)) == '?')
846 /* Processing intruction */
847 if (!(ctx->flags & XML_REPORT_PIS))
852 PULL_STATE(PI, PROLOG_PI);
857 /* Found the root tag */
861 else if (xml_get_char(ctx) == '-')
862 if (!(ctx->flags & XML_REPORT_COMMENTS))
863 xml_skip_comment(ctx);
866 xml_push_comment(ctx);
867 PULL_STATE(COMMENT, PROLOG_COMMENT);
868 xml_pop_comment(ctx);
874 xml_parse_doctype_decl(ctx);
876 if (xml_peek_char(ctx) == '[')
878 // FIXME: ability to skip the subset
882 if (ctx->h_dtd_start)
883 ctx->h_dtd_start(ctx);
884 xml_parse_internal_subset(ctx);
885 // FIXME: external subset
888 xml_parse_white(ctx, 0);
890 xml_parse_char(ctx, '>');
894 case XML_STATE_CHARS:
898 if (xml_peek_char(ctx) != '<')
901 xml_append_chars(ctx);
909 if ((c = xml_get_char(ctx)) == '?')
912 if (!(ctx->flags & (XML_REPORT_PIS | XML_ALLOC_PIS)))
916 if (xml_flush_chars(ctx))
918 PULL_STATE(CHARS, CHARS_BEFORE_PI);
928 if ((c = xml_get_char(ctx)) == '-')
931 if (!(ctx->flags & (XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS)))
932 xml_skip_comment(ctx);
935 if (xml_flush_chars(ctx))
937 PULL_STATE(CHARS, CHARS_BEFORE_COMMENT);
940 xml_push_comment(ctx);
942 xml_pop_comment(ctx);
948 if (!(ctx->flags & XML_UNFOLD_CDATA))
949 xml_append_cdata(ctx);
952 if (xml_flush_chars(ctx))
954 PULL_STATE(CHARS, CHARS_BEFORE_CDATA);
963 xml_fatal(ctx, "Unexpected character after '<!'");
967 /* STag | EmptyElemTag */
969 if (xml_flush_chars(ctx))
971 PULL_STATE(CHARS, CHARS_BEFORE_STAG);
975 xml_push_element(ctx);
977 if (ctx->flags & XML_EMPTY_ELEM_TAG)
984 if (xml_flush_chars(ctx))
986 PULL_STATE(CHARS, CHARS_BEFORE_ETAG);
993 xml_pop_element(ctx);
1001 TRACE(ctx, "entering epilog");
1004 /* Epilog whitespace is the only place, where a valid document can reach EOF */
1005 if (setjmp(throw_buf))
1006 if (ctx->err_code == XML_ERR_EOF)
1008 TRACE(ctx, "reached EOF");
1009 ctx->state = XML_STATE_EOF;
1010 if (ctx->h_document_end)
1011 ctx->h_document_end(ctx);
1014 ctx->err_msg = NULL;
1015 return XML_STATE_EOF;
1019 xml_parse_white(ctx, 0);
1020 if (setjmp(throw_buf))
1024 xml_parse_char(ctx, '<');
1026 if ((c = xml_get_char(ctx)) == '?')
1027 /* Processing instruction */
1028 if (!(ctx->flags & XML_REPORT_PIS))
1033 PULL_STATE(PI, EPILOG_PI);
1038 xml_parse_char(ctx, '-');
1040 if (!(ctx->flags & XML_REPORT_COMMENTS))
1041 xml_skip_comment(ctx);
1044 xml_push_comment(ctx);
1045 PULL_STATE(COMMENT, EPILOG_COMMENT);
1046 xml_pop_comment(ctx);
1050 xml_fatal(ctx, "Syntax error in the epilog");
1058 xml_parse(struct xml_context *ctx)
1062 return ctx->err_code;