2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
12 * - stack-like memory handling where possible
18 #include "lib/mempool.h"
19 #include "lib/fastbuf.h"
20 #include "lib/ff-unicode.h"
21 #include "lib/ff-binary.h"
22 #include "lib/chartype.h"
23 #include "lib/unicode.h"
24 #include "lib/hashfunc.h"
25 #include "lib/stkstring.h"
26 #include "lib/unaligned.h"
27 #include "charset/charconv.h"
28 #include "charset/fb-charconv.h"
29 #include "sherlock/xml/xml.h"
30 #include "sherlock/xml/dtd.h"
37 #define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
39 #define TRACE(c, f, p...) do {} while(0)
42 static uns xml_row(struct xml_context *ctx);
44 /*** Error handling ***/
47 xml_throw(struct xml_context *ctx)
49 ASSERT(ctx->err_code && ctx->throw_buf);
50 longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code);
54 xml_warn(struct xml_context *ctx, const char *format, ...)
59 va_start(args, format);
60 ctx->err_msg = stk_vprintf(format, args);
61 ctx->err_code = XML_ERR_WARN;
65 ctx->err_code = XML_ERR_OK;
70 xml_error(struct xml_context *ctx, const char *format, ...)
75 va_start(args, format);
76 ctx->err_msg = stk_vprintf(format, args);
77 ctx->err_code = XML_ERR_ERROR;
81 ctx->err_code = XML_ERR_OK;
86 xml_fatal(struct xml_context *ctx, const char *format, ...)
89 va_start(args, format);
90 ctx->err_msg = mp_vprintf(ctx->pool, format, args);
91 ctx->err_code = XML_ERR_FATAL;
92 ctx->state = XML_STATE_FATAL;
99 /*** Charecter categorization ***/
101 #include "obj/sherlock/xml/unicat.h"
107 return 1U << xml_char_tab2[(c & 0xff) + xml_char_tab1[c >> 8]];
108 else if (likely(c < 0x110000))
109 return 1U << xml_char_tab3[c >> 16];
114 /*** Memory management ***/
117 xml_fatal_nested(struct xml_context *ctx)
119 xml_fatal(ctx, "Entity not nested correctly");
123 xml_inc(struct xml_context *ctx)
125 /* Called after the first character of a block */
131 xml_dec(struct xml_context *ctx)
133 /* Called after the last character of a block */
135 if (unlikely(!ctx->depth--))
136 xml_fatal_nested(ctx);
140 xml_push(struct xml_context *ctx)
143 struct xml_stack *s = mp_alloc(ctx->pool, sizeof(*s));
144 mp_save(ctx->pool, &s->saved_pool);
145 s->saved_flags = ctx->flags;
146 s->next = ctx->stack;
152 xml_pop(struct xml_context *ctx)
156 struct xml_stack *s = ctx->stack;
158 ctx->stack = s->next;
159 ctx->flags = s->saved_flags;
160 mp_restore(ctx->pool, &s->saved_pool);
163 #define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
164 #define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
165 static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
166 { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
167 static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
170 xml_hash_new(struct mempool *pool, uns size)
172 void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE);
173 *(void **)tab = pool;
174 return tab + XML_HASH_HDR_SIZE;
177 /*** Reading of document/external entities ***/
180 xml_eof(struct xml_context *ctx)
182 ctx->err_msg = "Unexpected EOF";
183 ctx->err_code = XML_ERR_EOF;
188 xml_add_char(u32 **bstop, uns c)
191 *(*bstop)++ = xml_char_cat(c);
194 static struct xml_source *
195 xml_push_source(struct xml_context *ctx, uns flags)
198 struct xml_source *src = ctx->src;
201 src->bptr = ctx->bptr;
202 src->bstop = ctx->bstop;
204 src = mp_alloc_zero(ctx->pool, sizeof(*src));
205 src->next = ctx->src;
206 src->saved_depth = ctx->depth;
208 ctx->flags = (ctx->flags & ~(XML_FLAG_SRC_EOF | XML_FLAG_SRC_EXPECTED_DECL | XML_FLAG_SRC_NEW_LINE | XML_FLAG_SRC_SURROUND | XML_FLAG_SRC_DOCUMENT)) | flags;
209 ctx->bstop = ctx->bptr = src->buf;
211 if (flags & XML_FLAG_SRC_SURROUND)
212 xml_add_char(&ctx->bstop, 0x20);
217 xml_pop_source(struct xml_context *ctx)
219 TRACE(ctx, "xml_pop_source");
220 if (unlikely(ctx->depth != 0))
221 xml_fatal_nested(ctx);
222 struct xml_source *src = ctx->src;
225 ctx->depth = src->saved_depth;
226 ctx->src = src = src->next;
229 ctx->bptr = src->bptr;
230 ctx->bstop = src->bstop;
237 static void xml_refill_utf8(struct xml_context *ctx);
240 xml_push_entity(struct xml_context *ctx, struct xml_dtd_ent *ent)
242 TRACE(ctx, "xml_push_entity");
243 uns cat1 = ctx->src->refill_cat1;
244 uns cat2 = ctx->src->refill_cat2;
245 struct xml_source *src = xml_push_source(ctx, 0);
246 src->refill_cat1 = cat1;
247 src->refill_cat2 = cat2;
248 if (ent->flags & XML_DTD_ENT_EXTERNAL)
249 xml_fatal(ctx, "External entities not implemented"); // FIXME
252 fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, ent->len, 0);
253 src->refill = xml_refill_utf8;
258 xml_set_source(struct xml_context *ctx, struct fastbuf *fb)
260 TRACE(ctx, "xml_set_source");
262 struct xml_source *src = xml_push_source(ctx, XML_FLAG_SRC_DOCUMENT | XML_FLAG_SRC_EXPECTED_DECL);
267 xml_error_restricted(struct xml_context *ctx, uns c)
270 xml_error(ctx, "Corrupted encoding");
272 xml_error(ctx, "Restricted char U+%04X", c);
273 return UNI_REPLACEMENT;
276 static void xml_parse_decl(struct xml_context *ctx);
278 #define REFILL(ctx, func, params...) \
279 struct xml_source *src = ctx->src; \
280 struct fastbuf *fb = src->fb; \
281 if (ctx->bptr == ctx->bstop) \
282 ctx->bptr = ctx->bstop = src->buf; \
283 uns f = ctx->flags, c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row; \
284 u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop, \
285 *last_0xd = (f & XML_FLAG_SRC_NEW_LINE) ? bstop : bend; \
288 c = func(fb, ##params); \
289 uns t = xml_char_cat(c); \
291 /* Typical branch */ \
292 *bstop++ = c, *bstop++ = t; \
296 /* XML 1.0: 0xA | 0xD | 0xD 0xA */ \
297 /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */ \
299 last_0xd = bstop + 2; \
300 else if (c != 0x2028 && last_0xd == bstop) \
305 xml_add_char(&bstop, 0xa), row++; \
309 /* Used only in XML/TextDecl to switch the encoding */ \
310 *bstop++ = c, *bstop++ = t; \
314 /* Restricted character */ \
315 xml_add_char(&bstop, xml_error_restricted(ctx, c)); \
319 if (f & XML_FLAG_SRC_SURROUND) \
320 xml_add_char(&bstop, 0x20); \
321 f |= XML_FLAG_SRC_EOF; \
325 while (bstop < bend); \
326 ctx->flags = (last_0xd == bstop) ? f | XML_FLAG_SRC_NEW_LINE : f & ~XML_FLAG_SRC_NEW_LINE; \
327 ctx->bstop = bstop; \
331 xml_refill_utf8(struct xml_context *ctx)
333 REFILL(ctx, bget_utf8_repl, ~1U);
337 xml_refill_utf16_le(struct xml_context *ctx)
339 REFILL(ctx, bget_utf16_le_repl, ~1U);
343 xml_refill_utf16_be(struct xml_context *ctx)
345 REFILL(ctx, bget_utf16_be_repl, ~1U);
350 xml_refill_libcharset_bget(struct fastbuf *fb, unsigned short int *in_to_x)
354 return (unlikely(c = bgetc(fb) < 0)) ? c : (int)conv_x_to_ucs(in_to_x[c]);
358 xml_refill_libcharset(struct xml_context *ctx)
360 unsigned short int *in_to_x = ctx->src->refill_in_to_x;
361 REFILL(ctx, xml_refill_libcharset_bget, in_to_x);
368 xml_refill(struct xml_context *ctx)
372 if (ctx->flags & XML_FLAG_SRC_EOF)
374 else if (ctx->flags & XML_FLAG_SRC_EXPECTED_DECL)
378 ctx->src->refill(ctx);
379 TRACE(ctx, "refilled %u characters", (uns)((ctx->bstop - ctx->bptr) / 2));
382 while (ctx->bptr == ctx->bstop);
386 xml_peek_char(struct xml_context *ctx)
388 if (ctx->bptr == ctx->bstop)
394 xml_peek_cat(struct xml_context *ctx)
396 if (ctx->bptr == ctx->bstop)
402 xml_get_char(struct xml_context *ctx)
404 uns c = xml_peek_char(ctx);
410 xml_get_cat(struct xml_context *ctx)
412 uns c = xml_peek_cat(ctx);
418 xml_last_char(struct xml_context *ctx)
420 return ctx->bptr[-2];
424 xml_last_cat(struct xml_context *ctx)
426 return ctx->bptr[-1];
430 xml_skip_char(struct xml_context *ctx)
432 uns c = ctx->bptr[0];
438 xml_unget_char(struct xml_context *ctx)
440 return *(ctx->bptr -= 2);
444 xml_row(struct xml_context *ctx)
446 struct xml_source *src = ctx->src;
450 for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2)
451 if (p[-1] & src->refill_cat2)
456 /*** Basic parsing ***/
459 xml_fatal_expected(struct xml_context *ctx, uns c)
461 xml_fatal(ctx, "Expected '%c'", c);
465 xml_fatal_expected_white(struct xml_context *ctx)
467 xml_fatal(ctx, "Expected a white space");
471 xml_fatal_expected_quot(struct xml_context *ctx)
473 xml_fatal(ctx, "Expected a quotation mark");
477 xml_parse_white(struct xml_context *ctx, uns mandatory)
479 /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
480 * mandatory=0 -> S? */
482 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
487 if (unlikely(mandatory && !cnt))
488 xml_fatal_expected_white(ctx);
493 xml_parse_char(struct xml_context *ctx, uns c)
495 /* Consumes a given Unicode character */
496 if (unlikely(c != xml_get_char(ctx)))
497 xml_fatal_expected(ctx, c);
501 xml_parse_seq(struct xml_context *ctx, const char *seq)
503 /* Consumes a given sequence of ASCII characters */
505 xml_parse_char(ctx, *seq++);
509 xml_parse_eq(struct xml_context *ctx)
511 /* Eq ::= S? '=' S? */
512 xml_parse_white(ctx, 0);
513 xml_parse_char(ctx, '=');
514 xml_parse_white(ctx, 0);
518 xml_parse_quote(struct xml_context *ctx)
521 uns c = xml_get_char(ctx);
522 if (unlikely(c != '\'' && c != '\"'))
523 xml_fatal_expected_quot(ctx);
527 /* Names and nmtokens */
530 xml_parse_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
532 char *p = mp_start_noalign(ctx->pool, 1);
533 if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
534 xml_fatal(ctx, "%s", err);
537 p = mp_spread(ctx->pool, p, 5);
538 p = utf8_32_put(p, xml_skip_char(ctx));
540 while (xml_peek_cat(ctx) & next_cat);
542 return mp_end(ctx->pool, p);
546 xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
548 if (unlikely(!(xml_get_cat(ctx) & first_cat)))
549 xml_fatal(ctx, "%s", err);
550 while (xml_peek_cat(ctx) & next_cat)
555 xml_parse_name(struct xml_context *ctx)
557 /* Name ::= NameStartChar (NameChar)* */
558 return xml_parse_string(ctx,
559 !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
560 !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
565 xml_skip_name(struct xml_context *ctx)
568 !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
569 !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
574 xml_parse_nmtoken(struct xml_context *ctx)
576 /* Nmtoken ::= (NameChar)+ */
577 uns cat = !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1;
578 return xml_parse_string(ctx, cat, cat, "Expected a nmtoken");
581 /* Simple literals */
584 xml_parse_system_literal(struct xml_context *ctx)
586 /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
587 char *p = mp_start_noalign(ctx->pool, 1);
588 uns q = xml_parse_quote(ctx), c;
589 while ((c = xml_get_char(ctx)) != q)
591 p = mp_spread(ctx->pool, p, 5);
592 p = utf8_32_put(p, c);
595 return mp_end(ctx->pool, p);
599 xml_parse_pubid_literal(struct xml_context *ctx)
601 /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
602 char *p = mp_start_noalign(ctx->pool, 1);
603 uns q = xml_parse_quote(ctx), c;
604 while ((c = xml_get_char(ctx)) != q)
606 if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
607 xml_fatal(ctx, "Expected a pubid character");
608 p = mp_spread(ctx->pool, p, 2);
612 return mp_end(ctx->pool, p);
616 xml_parse_encoding_name(struct xml_context *ctx)
618 /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */
619 char *p = mp_start_noalign(ctx->pool, 1);
620 uns q = xml_parse_quote(ctx);
621 if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME)))
622 xml_fatal(ctx, "Invalid character in the encoding name");
625 p = mp_spread(ctx->pool, p, 2);
626 *p++ = xml_last_char(ctx);
627 if (xml_get_char(ctx) == q)
629 if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME)))
630 xml_fatal(ctx, "Invalid character in the encoding name");
633 return mp_end(ctx->pool, p);
636 /* Document/external entity header */
639 xml_init_cats(struct xml_context *ctx, uns mask)
641 if (!(ctx->flags & XML_FLAG_VERSION_1_1))
643 ctx->src->refill_cat1 = XML_CHAR_VALID_1_0 & ~XML_CHAR_NEW_LINE_1_0 & ~mask;
644 ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_0;
648 ctx->src->refill_cat1 = XML_CHAR_UNRESTRICTED_1_1 & ~XML_CHAR_NEW_LINE_1_1 & ~mask;
649 ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_1;
654 xml_init_charconv(struct xml_context *ctx, int cs)
657 struct xml_source *src = ctx->src;
658 TRACE(ctx, "wrapping charset %s", charset_name(cs));
660 struct conv_context conv;
661 conv_set_charset(&conv, cs, CONV_CHARSET_UTF8);
662 src->refill = xml_refill_libcharset;
663 src->refill_in_to_x = conv.in_to_x;
665 src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8);
666 // FIXME: memory leak
671 xml_parse_decl(struct xml_context *ctx)
673 TRACE(ctx, "xml_parse_decl");
674 struct xml_source *src = ctx->src;
675 ctx->flags &= ~XML_FLAG_SRC_EXPECTED_DECL;
677 /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */
678 xml_init_cats(ctx, XML_CHAR_GT);
680 /* Initialize the supplied charset (if any) or try to guess it */
681 char *expected_encoding = src->expected_encoding ? : src->fb_encoding;
682 src->refill = xml_refill_utf8;
683 int bom = bpeekc(src->fb);
685 ctx->flags |= XML_FLAG_SRC_EOF;
686 if (!src->fb_encoding)
689 src->refill = xml_refill_utf16_be;
690 else if (bom == 0xff)
691 src->refill = xml_refill_utf16_le;
695 int cs = find_charset_by_name(src->fb_encoding);
696 if (cs == CONV_CHARSET_UTF8)
700 xml_init_charconv(ctx, cs);
703 else if (strcasecmp(src->fb_encoding, "UTF-16"))
705 src->refill = xml_refill_utf16_be;
707 src->refill = xml_refill_utf16_le;
708 if (!src->expected_encoding)
709 expected_encoding = (bom == 0xff) ? "UTF-16LE" : "UTF-16BE";
711 else if (strcasecmp(src->fb_encoding, "UTF-16BE"))
712 src->refill = xml_refill_utf16_be;
713 else if (strcasecmp(src->fb_encoding, "UTF-16LE"))
714 src->refill = xml_refill_utf16_le;
717 xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding);
718 expected_encoding = NULL;
721 uns utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be;
722 if (bom > 0 && xml_peek_char(ctx) == 0xfeff)
725 xml_error(ctx, "Missing or corrupted BOM");
727 /* Look ahead for presence of XMLDecl or optional TextDecl */
728 if (!(ctx->flags & XML_FLAG_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf))
730 uns doc = ctx->flags & XML_FLAG_SRC_DOCUMENT;
731 u32 *bptr = ctx->bptr;
732 uns have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) &&
733 bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L');
737 xml_fatal(ctx, "Missing or corrupted XML header");
738 else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16)
739 xml_error(ctx, "Missing or corrupted entity header");
742 ctx->bptr = bptr + 12;
743 xml_parse_white(ctx, 0);
745 /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */
746 if (xml_peek_char(ctx) == 'v')
748 xml_parse_seq(ctx, "version");
750 char *version = xml_parse_pubid_literal(ctx);
751 TRACE(ctx, "version=%s", version);
753 if (!strcmp(version, "1.1"))
754 v = XML_FLAG_VERSION_1_1;
755 else if (strcmp(version, "1.0"))
757 xml_error(ctx, "Unknown XML version string '%s'", version);
762 ctx->version_str = version;
765 else if (v > (ctx->flags & XML_FLAG_VERSION_1_1))
766 xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document");
767 if (!xml_parse_white(ctx, !doc))
772 xml_error(ctx, "Expected XML version");
773 ctx->version_str = "1.0";
776 /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */
777 if (xml_peek_char(ctx) == 'e')
779 xml_parse_seq(ctx, "encoding");
781 src->decl_encoding = xml_parse_encoding_name(ctx);
782 TRACE(ctx, "encoding=%s", src->decl_encoding);
783 if (!xml_parse_white(ctx, 0))
787 xml_error(ctx, "Expected XML encoding");
789 /* Parse whether the document is standalone (optional in XMLDecl) */
790 if (doc && xml_peek_char(ctx) == 's')
792 xml_parse_seq(ctx, "standalone");
794 uns c = xml_parse_quote(ctx);
795 if (ctx->standalone = (xml_peek_char(ctx) == 'y'))
796 xml_parse_seq(ctx, "yes");
798 xml_parse_seq(ctx, "no");
799 xml_parse_char(ctx, c);
800 TRACE(ctx, "standalone=%d", ctx->standalone);
801 xml_parse_white(ctx, 0);
804 xml_parse_seq(ctx, "?>");
806 /* Switch to the final encoding */
807 if (src->decl_encoding)
809 int cs = find_charset_by_name(src->decl_encoding);
810 if (cs < 0 && !expected_encoding)
811 xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding);
812 else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8)
813 xml_init_charconv(ctx, cs);
814 else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 ||
815 !(!strcasecmp(src->decl_encoding, "UTF-16") ||
816 (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) ||
817 (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE")))))
818 xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding);
822 /* Update valid Unicode ranges */
823 xml_init_cats(ctx, 0);
826 /*** Document Type Definition (DTD) ***/
830 #define HASH_PREFIX(x) xml_dtd_notns_##x
831 #define HASH_NODE struct xml_dtd_notn
832 #define HASH_KEY_STRING name
833 #define HASH_ZERO_FILL
834 #define HASH_TABLE_DYNAMIC
835 #define HASH_WANT_FIND
836 #define HASH_WANT_LOOKUP
837 #define HASH_GIVE_ALLOC
838 #define HASH_TABLE_ALLOC
840 #include "lib/hashtable.h"
842 /* General entities */
844 #define HASH_PREFIX(x) xml_dtd_ents_##x
845 #define HASH_NODE struct xml_dtd_ent
846 #define HASH_KEY_STRING name
847 #define HASH_ZERO_FILL
848 #define HASH_TABLE_DYNAMIC
849 #define HASH_WANT_FIND
850 #define HASH_WANT_LOOKUP
851 #define HASH_GIVE_ALLOC
852 #define HASH_TABLE_ALLOC
854 #include "lib/hashtable.h"
856 static struct xml_dtd_ent *
857 xml_dtd_declare_trivial_gent(struct xml_context *ctx, char *name, char *text)
859 struct xml_dtd *dtd = ctx->dtd;
860 struct xml_dtd_ent *ent = xml_dtd_ents_lookup(dtd->tab_gents, name);
861 if (ent->flags & XML_DTD_ENT_DECLARED)
863 xml_warn(ctx, "Entity &%s; already declared", name);
866 slist_add_tail(&dtd->gents, &ent->n);
867 ent->flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL;
869 ent->len = strlen(text);
874 xml_dtd_declare_default_gents(struct xml_context *ctx)
876 xml_dtd_declare_trivial_gent(ctx, "lt", "<");
877 xml_dtd_declare_trivial_gent(ctx, "gt", ">");
878 xml_dtd_declare_trivial_gent(ctx, "amp", "&");
879 xml_dtd_declare_trivial_gent(ctx, "apos", "'");
880 xml_dtd_declare_trivial_gent(ctx, "quot", "\"");
883 static struct xml_dtd_ent *
884 xml_dtd_find_gent(struct xml_context *ctx, char *name)
886 struct xml_dtd *dtd = ctx->dtd;
889 struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_gents, name);
890 return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
894 #define ENT(n, t) ent_##n = { .name = #n, .text = t, .len = 1, .flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL }
895 static struct xml_dtd_ent ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
900 if (!strcmp(name, "lt"))
904 if (!strcmp(name, "gt"))
908 if (!strcmp(name, "amp"))
910 if (!strcmp(name, "apos"))
914 if (!strcmp(name, "quot"))
922 /* Parameter entities */
924 static struct xml_dtd_ent *
925 xml_dtd_find_pent(struct xml_context *ctx, char *name)
927 struct xml_dtd *dtd = ctx->dtd;
928 struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_pents, name);
929 return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
934 #define HASH_PREFIX(x) xml_dtd_elems_##x
935 #define HASH_NODE struct xml_dtd_elem
936 #define HASH_KEY_STRING name
937 #define HASH_TABLE_DYNAMIC
938 #define HASH_ZERO_FILL
939 #define HASH_WANT_LOOKUP
940 #define HASH_GIVE_ALLOC
941 #define HASH_TABLE_ALLOC
943 #include "lib/hashtable.h"
947 struct xml_dtd_enodes_table;
950 xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
952 return hash_pointer(parent) ^ hash_pointer(elem);
956 xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
958 return (parent1 == parent2) && (elem1 == elem2);
962 xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
964 node->parent = parent;
968 #define HASH_PREFIX(x) xml_dtd_enodes_##x
969 #define HASH_NODE struct xml_dtd_elem_node
970 #define HASH_KEY_COMPLEX(x) x parent, x elem
971 #define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
972 #define HASH_GIVE_HASHFN
974 #define HASH_GIVE_INIT_KEY
975 #define HASH_TABLE_DYNAMIC
976 #define HASH_ZERO_FILL
977 #define HASH_WANT_FIND
978 #define HASH_WANT_NEW
979 #define HASH_GIVE_ALLOC
980 #define HASH_TABLE_ALLOC
982 #include "lib/hashtable.h"
984 /* Element attributes */
986 struct xml_dtd_attrs_table;
989 xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
991 return hash_pointer(elem) ^ hash_string(name);
995 xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
997 return (elem1 == elem2) && !strcmp(name1, name2);
1001 xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
1007 #define HASH_PREFIX(x) xml_dtd_attrs_##x
1008 #define HASH_NODE struct xml_dtd_attr
1009 #define HASH_ZERO_FILL
1010 #define HASH_TABLE_DYNAMIC
1011 #define HASH_KEY_COMPLEX(x) x elem, x name
1012 #define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
1013 #define HASH_GIVE_HASHFN
1014 #define HASH_GIVE_EQ
1015 #define HASH_GIVE_INIT_KEY
1016 #define HASH_WANT_FIND
1017 #define HASH_WANT_NEW
1018 #define HASH_GIVE_ALLOC
1019 #define HASH_TABLE_ALLOC
1021 #include "lib/hashtable.h"
1023 /* Enumerated attribute values */
1025 struct xml_dtd_evals_table;
1028 xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
1030 return hash_pointer(attr) ^ hash_string(val);
1034 xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
1036 return (attr1 == attr2) && !strcmp(val1, val2);
1040 xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
1046 #define HASH_PREFIX(x) xml_dtd_evals_##x
1047 #define HASH_NODE struct xml_dtd_eval
1048 #define HASH_TABLE_DYNAMIC
1049 #define HASH_KEY_COMPLEX(x) x attr, x val
1050 #define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
1051 #define HASH_GIVE_HASHFN
1052 #define HASH_GIVE_EQ
1053 #define HASH_GIVE_INIT_KEY
1054 #define HASH_WANT_FIND
1055 #define HASH_WANT_NEW
1056 #define HASH_GIVE_ALLOC
1057 #define HASH_TABLE_ALLOC
1059 #include "lib/hashtable.h"
1061 /* Enumerated attribute notations */
1063 struct xml_dtd_enotns_table;
1066 xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
1068 return hash_pointer(attr) ^ hash_pointer(notn);
1072 xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
1074 return (attr1 == attr2) && (notn1 == notn2);
1078 xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
1084 #define HASH_PREFIX(x) xml_dtd_enotns_##x
1085 #define HASH_NODE struct xml_dtd_enotn
1086 #define HASH_TABLE_DYNAMIC
1087 #define HASH_KEY_COMPLEX(x) x attr, x notn
1088 #define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
1089 #define HASH_GIVE_HASHFN
1090 #define HASH_GIVE_EQ
1091 #define HASH_GIVE_INIT_KEY
1092 #define HASH_WANT_FIND
1093 #define HASH_WANT_NEW
1094 #define HASH_GIVE_ALLOC
1095 #define HASH_TABLE_ALLOC
1097 #include "lib/hashtable.h"
1099 /* DTD initialization/cleanup */
1102 xml_dtd_init(struct xml_context *ctx)
1106 struct mempool *pool = mp_new(4096);
1107 struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
1109 xml_dtd_ents_init(dtd->tab_gents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
1110 xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
1111 xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
1112 xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
1113 xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
1114 xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
1115 xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
1116 xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
1117 xml_dtd_declare_default_gents(ctx);
1121 xml_dtd_cleanup(struct xml_context *ctx)
1125 mp_delete(ctx->dtd->pool);
1130 xml_dtd_finish(struct xml_context *ctx)
1137 /*** Parsing functions ***/
1142 xml_push_comment(struct xml_context *ctx)
1144 /* Parse a comment to ctx->value:
1145 * Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1146 * Already parsed: '<!-' */
1147 struct fastbuf *out = ctx->value;
1149 xml_parse_char(ctx, '-');
1152 if ((c = xml_get_char(ctx)) == '-')
1153 if ((c = xml_get_char(ctx)) == '-')
1157 bput_utf8_32(out, c);
1159 xml_parse_char(ctx, '>');
1163 ctx->h_comment(ctx);
1167 xml_pop_comment(struct xml_context *ctx)
1169 fbgrow_rewind(ctx->value);
1173 xml_skip_comment(struct xml_context *ctx)
1175 xml_parse_char(ctx, '-');
1176 while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
1177 xml_parse_char(ctx, '>');
1181 /* Processing instructions */
1184 xml_push_pi(struct xml_context *ctx)
1186 /* Parses a PI to ctx->value and ctx->name:
1187 * PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1188 * PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1189 * Already parsed: '<?' */
1191 ctx->name = xml_parse_name(ctx);
1192 if (unlikely(!strcasecmp(ctx->name, "xml")))
1193 xml_fatal(ctx, "Reserved PI target");
1194 struct fastbuf *out = ctx->value;
1195 if (xml_parse_white(ctx, 0))
1196 xml_parse_seq(ctx, "?>");
1202 if ((c = xml_get_char(ctx)) == '?')
1203 if (xml_get_char(ctx) == '>')
1207 xml_unget_char(ctx);
1211 bput_utf8_32(out, c);
1221 xml_pop_pi(struct xml_context *ctx)
1223 fbgrow_reset(ctx->value);
1227 xml_skip_pi(struct xml_context *ctx)
1229 if (ctx->flags & XML_FLAG_VALIDATING)
1232 if (unlikely(!strcasecmp(xml_parse_name(ctx), "xml")))
1233 xml_fatal(ctx, "Reserved PI target");
1235 if (!xml_parse_white(ctx, 0))
1237 xml_parse_seq(ctx, "?>");
1243 if (xml_get_char(ctx) == '?')
1244 if (xml_get_char(ctx) == '>')
1247 xml_unget_char(ctx);
1251 /* Character references */
1254 xml_parse_char_ref(struct xml_context *ctx)
1256 TRACE(ctx, "parse_char_ref");
1257 /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1258 * Already parsed: '&#' */
1260 if (xml_get_char(ctx) == 'x')
1262 if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
1264 xml_error(ctx, "Expected a hexadecimal value of character reference");
1269 v = (v << 4) + Cxvalue(xml_last_char(ctx));
1271 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
1275 if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
1277 xml_error(ctx, "Expected a numeric value of character reference");
1282 v = v * 10 + xml_last_char(ctx) - '0';
1284 while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
1286 uns cat = xml_char_cat(v);
1287 if (!(cat & XML_CHAR_UNRESTRICTED_1_1) && ((ctx->flags & XML_FLAG_VERSION_1_1) || !(cat & XML_CHAR_VALID_1_0)))
1289 xml_error(ctx, "Character reference out of range");
1292 if (xml_last_char(ctx) == ';')
1297 xml_error(ctx, "Expected ';'");
1299 while (xml_last_char(ctx) != ';')
1302 return UNI_REPLACEMENT;
1305 /* References to general entities */
1308 xml_parse_ge_ref(struct xml_context *ctx, struct fastbuf *out)
1310 /* Reference ::= EntityRef | CharRef
1311 * EntityRef ::= '&' Name ';'
1312 * Already parsed: '&' */
1313 if (xml_peek_char(ctx) == '#')
1316 uns c = xml_parse_char_ref(ctx);
1317 bput_utf8_32(out, c);
1321 struct mempool_state state;
1322 mp_save(ctx->pool, &state);
1323 char *name = xml_parse_name(ctx);
1324 xml_parse_char(ctx, ';');
1325 struct xml_dtd_ent *ent = xml_dtd_find_gent(ctx, name);
1328 xml_error(ctx, "Unknown entity &%s;", name);
1333 else if (ent->flags & XML_DTD_ENT_TRIVIAL)
1335 TRACE(ctx, "Trivial entity &%s;", name);
1336 bwrite(out, ent->text, ent->len);
1340 TRACE(ctx, "Pushed entity &%s;", name);
1341 mp_restore(ctx->pool, &state);
1343 xml_push_entity(ctx, ent);
1346 mp_restore(ctx->pool, &state);
1351 /* References to parameter entities */
1354 xml_parse_pe_ref(struct xml_context *ctx)
1356 /* PEReference ::= '%' Name ';'
1357 * Already parsed: '%' */
1358 struct mempool_state state;
1359 mp_save(ctx->pool, &state);
1360 char *name = xml_parse_name(ctx);
1361 xml_parse_char(ctx, ';');
1362 struct xml_dtd_ent *ent = xml_dtd_find_pent(ctx, name);
1364 xml_error(ctx, "Unknown entity %%%s;", name);
1367 TRACE(ctx, "Pushed entity %%%s;", name);
1368 mp_restore(ctx->pool, &state);
1370 xml_push_entity(ctx, ent);
1373 mp_restore(ctx->pool, &state);
1378 xml_parse_dtd_pe(struct xml_context *ctx)
1384 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
1386 xml_parse_pe_ref(ctx);
1388 while (xml_peek_char(ctx) != '%');
1392 xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
1394 /* Whitespace or parameter entity */
1396 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
1401 if (xml_peek_char(ctx) == '%')
1403 xml_parse_dtd_pe(ctx);
1406 else if (unlikely(mandatory && !cnt))
1407 xml_fatal_expected_white(ctx);
1412 xml_check_dtd_pe(struct xml_context *ctx)
1414 if (xml_peek_char(ctx) == '%')
1416 xml_parse_dtd_pe(ctx);
1425 xml_parse_external_id(struct xml_context *ctx, struct xml_ext_id *eid, uns allow_public, uns dtd)
1427 bzero(eid, sizeof(*eid));
1429 xml_check_dtd_pe(ctx);
1430 uns c = xml_peek_char(ctx);
1433 xml_parse_seq(ctx, "SYSTEM");
1435 xml_parse_dtd_white(ctx, 1);
1437 xml_parse_white(ctx, 1);
1438 eid->system_id = xml_parse_system_literal(ctx);
1442 xml_parse_seq(ctx, "PUBLIC");
1444 xml_parse_dtd_white(ctx, 1);
1446 xml_parse_white(ctx, 1);
1447 eid->public_id = xml_parse_pubid_literal(ctx);
1448 if (dtd ? xml_parse_dtd_white(ctx, 0) : xml_parse_white(ctx, 0))
1449 if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
1450 eid->system_id = xml_parse_system_literal(ctx);
1453 xml_fatal(ctx, "Expected an external ID");
1456 /* DTD: <!NOTATION ...> */
1459 xml_parse_notation_decl(struct xml_context *ctx)
1461 /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1462 * Already parsed: '<!NOTATION' */
1463 TRACE(ctx, "parse_notation_decl");
1464 struct xml_dtd *dtd = ctx->dtd;
1465 xml_parse_dtd_white(ctx, 1);
1467 struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx));
1468 xml_parse_dtd_white(ctx, 1);
1469 struct xml_ext_id eid;
1470 xml_parse_external_id(ctx, &eid, 1, 1);
1471 xml_parse_dtd_white(ctx, 0);
1472 xml_parse_char(ctx, '>');
1474 if (notn->flags & XML_DTD_NOTN_DECLARED)
1475 xml_warn(ctx, "Notation %s already declared", notn->name);
1478 notn->flags = XML_DTD_NOTN_DECLARED;
1480 slist_add_tail(&dtd->notns, ¬n->n);
1485 /* DTD: <!ENTITY ...> */
1488 xml_parse_entity_decl(struct xml_context *ctx)
1490 /* Already parsed: '<!ENTITY' */
1491 TRACE(ctx, "parse_entity_decl");
1492 struct xml_dtd *dtd = ctx->dtd;
1493 xml_parse_dtd_white(ctx, 1);
1495 uns flags = (xml_get_char(ctx) == '%') ? XML_DTD_ENT_PARAMETER : 0;
1497 xml_parse_dtd_white(ctx, 1);
1499 xml_unget_char(ctx);
1501 struct xml_dtd_ent *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_gents, xml_parse_name(ctx));
1502 slist *list = flags ? &dtd->pents : &dtd->gents;
1503 xml_parse_white(ctx, 1);
1504 if (ent->flags & XML_DTD_ENT_DECLARED)
1506 xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
1507 // FIXME: should be only warning
1510 uns c, sep = xml_get_char(ctx);
1511 if (sep == '\'' || sep == '"')
1514 * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
1515 struct fastbuf *out = ctx->value;
1518 if ((c = xml_get_char(ctx)) == sep)
1524 //xml_parse_parameter_ref(ctx);
1527 bput_utf8_32(out, c);
1528 else if ((c = xml_get_char(ctx)) == '#')
1529 c = xml_parse_char_ref(ctx);
1532 /* Bypass references to general entities */
1535 xml_unget_char(ctx);
1536 bputs(out, xml_parse_name(ctx));
1537 xml_parse_char(ctx, ';');
1544 slist_add_tail(list, &ent->n);
1545 ent->flags = flags | XML_DTD_ENT_DECLARED;
1546 ent->len = out->bstop - out->bptr - 1;
1547 ent->text = mp_memdup(ctx->pool, out->bptr, ent->len + 1);
1552 /* External entity */
1553 struct xml_ext_id eid;
1554 struct xml_dtd_notn *notn = NULL;
1555 xml_parse_external_id(ctx, &eid, 0, 0);
1556 if (!xml_parse_white(ctx, 0) || !flags)
1557 xml_parse_char(ctx, '>');
1558 else if (xml_get_char(ctx) != '>')
1560 /* General external unparsed entity */
1561 flags |= XML_DTD_ENT_UNPARSED;
1562 xml_parse_seq(ctx, "NDATA");
1563 xml_parse_white(ctx, 1);
1564 notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx));
1566 slist_add_tail(list, &ent->n);
1567 ent->flags = flags | XML_DTD_ENT_DECLARED | XML_DTD_ENT_EXTERNAL;
1571 xml_parse_dtd_white(ctx, 0);
1572 xml_parse_char(ctx, '>');
1576 /* DTD: <!ELEMENT ...> */
1579 xml_parse_element_decl(struct xml_context *ctx)
1581 /* Elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1582 * Already parsed: '<!ELEMENT' */
1583 xml_parse_dtd_white(ctx, 1);
1584 char *name = xml_parse_name(ctx);
1585 xml_parse_dtd_white(ctx, 1);
1586 struct xml_dtd *dtd = ctx->dtd;
1587 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
1588 if (elem->flags & XML_DTD_ELEM_DECLARED)
1589 xml_fatal(ctx, "Element <%s> already declared", name);
1591 /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
1592 uns c = xml_peek_char(ctx);
1595 xml_parse_seq(ctx, "EMPTY");
1596 elem->type = XML_DTD_ELEM_EMPTY;
1600 xml_parse_seq(ctx, "ANY");
1601 elem->type = XML_DTD_ELEM_ANY;
1607 xml_parse_dtd_white(ctx, 0);
1608 struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
1609 if (xml_peek_char(ctx) == '#')
1611 /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
1613 xml_parse_seq(ctx, "PCDATA");
1614 elem->type = XML_DTD_ELEM_MIXED;
1615 parent->type = XML_DTD_ELEM_PCDATA;
1618 xml_parse_dtd_white(ctx, 0);
1619 if ((c = xml_get_char(ctx)) == ')')
1622 xml_fatal_expected(ctx, ')');
1623 xml_parse_dtd_white(ctx, 0);
1624 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx));
1625 if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
1626 xml_error(ctx, "Duplicate content '%s'", son_elem->name);
1629 struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
1630 slist_add_tail(&parent->sons, &son->n);
1634 if (xml_peek_char(ctx) == '*')
1637 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
1639 else if (!slist_head(&parent->sons))
1640 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
1642 xml_fatal_expected(ctx, '*');
1646 /* children ::= (choice | seq) ('?' | '*' | '+')?
1647 * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1648 * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
1649 * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
1651 elem->type = XML_DTD_ELEM_CHILDREN;
1652 parent->type = XML_DTD_ELEM_PCDATA;
1659 xml_parse_dtd_white(ctx, 0);
1660 if ((c = xml_get_char(ctx)) == ')')
1663 if (parent->type == XML_DTD_ELEM_PCDATA)
1664 parent->type = XML_DTD_ELEM_SEQ;
1665 if ((c = xml_get_char(ctx)) == '?')
1666 parent->occur = XML_DTD_ELEM_OCCUR_OPT;
1668 parent->occur = XML_DTD_ELEM_OCCUR_MULT;
1670 parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
1673 xml_unget_char(ctx);
1674 parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
1676 if (!parent->parent)
1678 parent = parent->parent;
1683 if (parent->type == XML_DTD_ELEM_PCDATA)
1684 parent->type = XML_DTD_ELEM_OR;
1685 else if (parent->type != XML_DTD_ELEM_OR)
1686 xml_fatal(ctx, "Mixed operators in the list of element children");
1690 if (parent->type == XML_DTD_ELEM_PCDATA)
1691 parent->type = XML_DTD_ELEM_SEQ;
1692 else if (parent->type != XML_DTD_ELEM_SEQ)
1693 xml_fatal(ctx, "Mixed operators in the list of element children");
1698 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
1699 son->parent = parent;
1700 slist_add_tail(&parent->sons, &son->n);
1701 parent = son->parent;
1702 son->type = XML_DTD_ELEM_MIXED;
1705 xml_unget_char(ctx);
1708 xml_parse_dtd_white(ctx, 0);
1710 struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx));
1711 // FIXME: duplicates, occurance
1712 //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
1713 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
1714 son->parent = parent;
1715 son->elem = son_elem;
1716 slist_add_tail(&parent->sons, &son->n);
1721 xml_fatal(ctx, "Expected element content specification");
1723 xml_parse_dtd_white(ctx, 0);
1724 xml_parse_char(ctx, '>');
1729 xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
1731 uns quote = xml_parse_quote(ctx);
1733 struct fastbuf *out = ctx->value;
1736 uns c = xml_get_char(ctx);
1740 xml_parse_ge_ref(ctx, out);
1742 else if (c == quote) // FIXME: beware quotes inside parsed
1745 xml_error(ctx, "Attribute value must not contain '<'");
1747 bput_utf8_32(out, c);
1752 char *value = mp_memdup(ctx->pool, out->bptr, out->bstop - out->bptr);
1753 // FIXME: check value constraints / normalize value
1759 xml_parse_attr_list_decl(struct xml_context *ctx)
1761 /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1762 * AttDef ::= S Name S AttType S DefaultDecl
1763 * Already parsed: '<!ATTLIST' */
1764 xml_parse_dtd_white(ctx, 1);
1765 struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx));
1767 while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
1769 char *name = xml_parse_name(ctx);
1770 struct xml_dtd_attr *attr = xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name);
1774 xml_warn(ctx, "Duplicate attribute definition");
1778 attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
1779 xml_parse_dtd_white(ctx, 1);
1780 if (xml_peek_char(ctx) == '(')
1782 xml_skip_char(ctx); // FIXME: xml_inc/dec ?
1784 attr->type = XML_ATTR_ENUM;
1787 xml_parse_dtd_white(ctx, 0);
1788 char *value = xml_parse_nmtoken(ctx);
1790 if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
1791 xml_error(ctx, "Duplicate enumeration value");
1793 xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
1794 xml_parse_dtd_white(ctx, 0);
1796 while (xml_get_char(ctx) == '|');
1797 xml_unget_char(ctx);
1798 xml_parse_char(ctx, ')');
1802 char *type = xml_parse_name(ctx);
1803 enum xml_dtd_attribute_type t;
1804 if (!strcmp(type, "CDATA"))
1806 else if (!strcmp(type, "ID"))
1808 else if (!strcmp(type, "IDREF"))
1810 else if (!strcmp(type, "IDREFS"))
1811 t = XML_ATTR_IDREFS;
1812 else if (!strcmp(type, "ENTITY"))
1813 t = XML_ATTR_ENTITY;
1814 else if (!strcmp(type, "ENTITIES"))
1815 t = XML_ATTR_ENTITIES;
1816 else if (!strcmp(type, "NMTOKEN"))
1817 t = XML_ATTR_NMTOKEN;
1818 else if (!strcmp(type, "NMTOKENS"))
1819 t = XML_ATTR_NMTOKENS;
1820 else if (!strcmp(type, "NOTATION"))
1822 if (elem->type == XML_DTD_ELEM_EMPTY)
1823 xml_fatal(ctx, "Empty element must not have notation attribute");
1824 // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
1825 t = XML_ATTR_NOTATION;
1826 xml_parse_dtd_white(ctx, 1);
1827 xml_parse_char(ctx, '(');
1830 xml_parse_dtd_white(ctx, 0);
1831 struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx));
1833 if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
1834 xml_error(ctx, "Duplicate enumerated notation");
1836 xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
1837 xml_parse_dtd_white(ctx, 0);
1839 while (xml_get_char(ctx) == '|');
1840 xml_unget_char(ctx);
1841 xml_parse_char(ctx, ')');
1844 xml_fatal(ctx, "Unknown attribute type");
1848 xml_parse_dtd_white(ctx, 1);
1849 enum xml_dtd_attribute_default def = XML_ATTR_NONE;
1850 if (xml_get_char(ctx) == '#')
1851 switch (xml_peek_char(ctx))
1854 xml_parse_seq(ctx, "REQUIRED");
1855 def = XML_ATTR_REQUIRED;
1858 xml_parse_seq(ctx, "IMPLIED");
1859 def = XML_ATTR_IMPLIED;
1862 xml_parse_seq(ctx, "FIXED");
1863 def = XML_ATTR_FIXED;
1864 xml_parse_dtd_white(ctx, 1);
1867 xml_fatal(ctx, "Expected a modifier for default attribute value");
1870 xml_unget_char(ctx);
1871 if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
1873 char *v = xml_parse_attr_value(ctx, attr);
1875 attr->default_value = v;
1878 attr->default_mode = def;
1884 /* DTD: Internal subset */
1887 xml_parse_internal_subset(struct xml_context *ctx)
1889 // FIXME: comments/pi have no parent
1890 /* '[' intSubset ']'
1891 * intSubset :== (markupdecl | DeclSep)
1892 * Already parsed: ']' */
1895 xml_parse_white(ctx, 0);
1896 uns c = xml_get_char(ctx);
1899 if ((c = xml_get_char(ctx)) == '!')
1900 switch (c = xml_get_char(ctx))
1903 xml_push_comment(ctx);
1904 xml_pop_comment(ctx);
1907 xml_parse_seq(ctx, "OTATION");
1908 xml_parse_notation_decl(ctx);
1911 if ((c = xml_get_char(ctx)) == 'N')
1913 xml_parse_seq(ctx, "TITY");
1914 xml_parse_entity_decl(ctx);
1918 xml_parse_seq(ctx, "EMENT");
1919 xml_parse_element_decl(ctx);
1922 goto invalid_markup;
1925 xml_parse_seq(ctx, "TTLIST");
1926 xml_parse_attr_list_decl(ctx);
1929 goto invalid_markup;
1937 goto invalid_markup;
1939 xml_parse_dtd_pe(ctx);
1943 goto invalid_markup;
1949 xml_fatal(ctx, "Invalid markup in the internal subset");
1952 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
1955 xml_parse_cdata(struct xml_context *ctx)
1957 struct fastbuf *out = ctx->chars;
1958 xml_parse_seq(ctx, "CDATA[");
1962 if ((c = xml_get_char(ctx)) == ']')
1964 if ((c = xml_get_char(ctx)) == ']')
1965 if ((c = xml_get_char(ctx)) == '>')
1971 bput_utf8_32(out, c);
1976 xml_skip_cdata(struct xml_context *ctx)
1978 xml_parse_cdata(ctx);
1982 xml_parse_chars(struct xml_context *ctx)
1984 TRACE(ctx, "parse_chars");
1985 struct fastbuf *out = ctx->chars;
1987 while ((c = xml_get_char(ctx)) != '<')
1991 xml_parse_ge_ref(ctx, out);
1994 bput_utf8_32(out, c);
1995 xml_unget_char(ctx);
1998 /*----------------------------------------------*/
2000 struct xml_attrs_table;
2003 xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_elem *e, char *n)
2005 return hash_pointer(e) ^ hash_string(n);
2009 xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_elem *e1, char *n1, struct xml_elem *e2, char *n2)
2011 return (e1 == e2) && !strcmp(n1, n2);
2015 xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_elem *e, char *name)
2020 slist_add_tail(&e->attrs, &a->n);
2023 #define HASH_PREFIX(x) xml_attrs_##x
2024 #define HASH_NODE struct xml_attr
2025 #define HASH_KEY_COMPLEX(x) x elem, x name
2026 #define HASH_KEY_DECL struct xml_elem *elem, char *name
2027 #define HASH_TABLE_DYNAMIC
2028 #define HASH_GIVE_EQ
2029 #define HASH_GIVE_HASHFN
2030 #define HASH_GIVE_INIT_KEY
2031 #define HASH_WANT_CLEANUP
2032 #define HASH_WANT_REMOVE
2033 #define HASH_WANT_LOOKUP
2034 #define HASH_WANT_FIND
2035 #define HASH_GIVE_ALLOC
2037 #include "lib/hashtable.h"
2040 xml_init(struct xml_context *ctx)
2042 bzero(ctx, sizeof(*ctx));
2043 ctx->pool = mp_new(65536);
2044 ctx->chars = fbgrow_create(4096);
2045 ctx->value = fbgrow_create(4096);
2047 xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
2051 xml_cleanup(struct xml_context *ctx)
2053 xml_attrs_cleanup(ctx->tab_attrs);
2054 xml_dtd_cleanup(ctx);
2057 mp_delete(ctx->pool);
2061 xml_parse_attr(struct xml_context *ctx)
2063 // FIXME: memory management, dtd, literal
2064 TRACE(ctx, "parse_attr");
2065 struct xml_elem *e = ctx->elem;
2066 char *name = xml_parse_name(ctx);
2067 struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, name);
2069 char *val =xml_parse_system_literal(ctx);
2071 xml_error(ctx, "Attribute is not unique");
2077 xml_parse_stag(struct xml_context *ctx)
2080 TRACE(ctx, "parse_stag");
2082 struct xml_elem *e = mp_alloc_zero(ctx->pool, sizeof(*e));
2083 struct xml_elem *parent = ctx->elem;
2084 clist_init(&e->sons);
2085 e->node.parent = (void *)parent;
2087 e->name = xml_parse_name(ctx);
2089 clist_add_tail(&parent->sons, &e->node.n);
2093 if (ctx->document_type && strcmp(e->name, ctx->document_type))
2094 xml_error(ctx, "The root element does not match the document type");
2098 uns white = xml_parse_white(ctx, 0);
2099 uns c = xml_get_char(ctx);
2102 xml_parse_char(ctx, '>');
2103 ctx->flags |= XML_FLAG_EMPTY_ELEM;
2109 xml_fatal_expected_white(ctx);
2110 xml_unget_char(ctx);
2111 xml_parse_attr(ctx);
2113 if (ctx->h_element_start)
2114 ctx->h_element_start(ctx);
2118 xml_parse_etag(struct xml_context *ctx)
2120 TRACE(ctx, "parse_etag");
2121 struct xml_elem *e = ctx->elem;
2123 char *name = xml_parse_name(ctx);
2124 if (strcmp(name, e->name))
2125 xml_fatal(ctx, "Invalid ETag, expected '%s'", e->name);
2126 xml_parse_white(ctx, 0);
2127 xml_parse_char(ctx, '>');
2132 xml_pop_element(struct xml_context *ctx)
2134 TRACE(ctx, "pop_element");
2135 if (ctx->h_element_end)
2136 ctx->h_element_end(ctx);
2137 struct xml_elem *e = ctx->elem;
2138 if (ctx->flags & XML_DOM_FREE)
2141 clist_remove(&e->node.n);
2144 SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
2145 xml_attrs_remove(ctx->tab_attrs, a);
2147 while (n = clist_head(&e->sons))
2149 if (n->type == XML_NODE_ELEM)
2151 SLIST_FOR_EACH(struct xml_attr *, a, ((struct xml_elem *)n)->attrs)
2152 xml_attrs_remove(ctx->tab_attrs, a);
2153 clist_insert_list_after(&((struct xml_elem *)n)->sons, &n->n);
2155 clist_remove(&n->n);
2158 ctx->node = e->node.parent;
2159 xml_pop(ctx); // FIXME: memory management without XML_DOM_FREE
2162 for (struct xml_attribute *a = e->attrs; a; a = a->next)
2163 xml_attribute_remove(ctx->attribute_table, a);
2168 xml_parse_doctype_decl(struct xml_context *ctx)
2170 if (ctx->document_type)
2171 xml_fatal(ctx, "Multiple document types not allowed");
2172 xml_parse_seq(ctx, "DOCTYPE");
2173 xml_parse_white(ctx, 1);
2174 ctx->document_type = xml_parse_name(ctx);
2175 TRACE(ctx, "doctyype=%s", ctx->document_type);
2176 uns white = xml_parse_white(ctx, 0);
2177 uns c = xml_peek_char(ctx);
2178 if (c != '>' && c != '[' && white)
2180 xml_parse_external_id(ctx, &ctx->eid, 0, 0);
2181 xml_parse_white(ctx, 0);
2182 ctx->flags |= XML_FLAG_HAS_EXTERNAL_SUBSET;
2184 if (xml_peek_char(ctx) == '[')
2185 ctx->flags |= XML_FLAG_HAS_INTERNAL_SUBSET;
2186 if (ctx->h_doctype_decl)
2187 ctx->h_doctype_decl(ctx);
2191 xml_next(struct xml_context *ctx)
2193 /* A nasty state machine */
2195 TRACE(ctx, "xml_next (state=%u)", ctx->state);
2197 ctx->throw_buf = &throw_buf;
2198 if (setjmp(throw_buf))
2201 if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
2203 ctx->state = XML_STATE_FATAL;
2204 TRACE(ctx, "raised fatal error");
2210 case XML_STATE_FATAL:
2213 case XML_STATE_START:
2214 TRACE(ctx, "entering prolog");
2215 if (ctx->h_document_start)
2216 ctx->h_document_start(ctx);
2219 if (ctx->h_xml_decl)
2220 ctx->h_xml_decl(ctx);
2221 if (ctx->want & XML_WANT_DECL)
2222 return ctx->state = XML_STATE_DECL;
2223 case XML_STATE_DECL:
2225 /* Misc* (doctypedecl Misc*)? */
2228 xml_parse_white(ctx, 0);
2229 xml_parse_char(ctx, '<');
2230 if ((c = xml_get_char(ctx)) == '?')
2231 /* Processing intruction */
2232 if (!(ctx->want & XML_WANT_PI))
2237 ctx->state = XML_STATE_PROLOG_PI;
2238 return XML_STATE_PI;
2239 case XML_STATE_PROLOG_PI:
2244 /* Found the root tag */
2245 xml_unget_char(ctx);
2248 else if (xml_get_char(ctx) == '-')
2249 if (!(ctx->want & XML_WANT_COMMENT))
2250 xml_skip_comment(ctx);
2253 xml_push_comment(ctx);
2254 ctx->state = XML_STATE_PROLOG_COMMENT;
2255 return XML_STATE_COMMENT;
2256 case XML_STATE_PROLOG_COMMENT:
2257 xml_pop_comment(ctx);
2262 xml_unget_char(ctx);
2263 xml_parse_doctype_decl(ctx);
2264 if (ctx->want & XML_WANT_DOCUMENT_TYPE)
2265 return ctx->state = XML_STATE_DOCUMENT_TYPE;
2266 case XML_STATE_DOCUMENT_TYPE:
2267 if (xml_peek_char(ctx) == '[')
2271 xml_parse_internal_subset(ctx);
2272 xml_parse_white(ctx, 0);
2274 xml_parse_char(ctx, '>');
2280 case XML_STATE_COMMENT:
2281 fbgrow_reset(ctx->value);
2283 case XML_STATE_CHARS:
2287 if (xml_peek_char(ctx) != '<')
2290 xml_parse_chars(ctx);
2298 if ((c = xml_get_char(ctx)) == '?')
2301 if (!(ctx->want & XML_WANT_PI))
2305 if (btell(ctx->chars))
2307 fbgrow_rewind(ctx->chars);
2308 ctx->state = XML_STATE_CHARS_BEFORE_PI;
2309 return XML_STATE_PI;
2310 case XML_STATE_CHARS_BEFORE_PI:
2311 fbgrow_reset(ctx->chars);
2314 return ctx->state = XML_STATE_PI;
2319 if ((c = xml_get_char(ctx)) == '-')
2322 if (!(ctx->want & XML_WANT_COMMENT))
2323 xml_skip_comment(ctx);
2326 if (btell(ctx->chars))
2328 fbgrow_rewind(ctx->chars);
2329 ctx->state = XML_STATE_CHARS_BEFORE_COMMENT;
2330 return XML_STATE_CHARS;
2331 case XML_STATE_CHARS_BEFORE_COMMENT:
2332 fbgrow_reset(ctx->chars);
2334 xml_push_comment(ctx);
2335 return ctx->state = XML_STATE_COMMENT;
2341 if (!(ctx->want & XML_WANT_CDATA))
2342 xml_skip_cdata(ctx);
2345 if (btell(ctx->chars))
2347 fbgrow_rewind(ctx->chars);
2348 ctx->state = XML_STATE_CHARS_BEFORE_CDATA;
2349 return XML_STATE_CHARS;
2350 case XML_STATE_CHARS_BEFORE_CDATA:
2351 fbgrow_reset(ctx->chars);
2353 xml_parse_cdata(ctx);
2354 if (btell(ctx->chars))
2356 fbgrow_rewind(ctx->chars);
2357 return ctx->state = XML_STATE_CDATA;
2359 case XML_STATE_CDATA:
2360 fbgrow_reset(ctx->chars);
2364 xml_fatal(ctx, "Unexpected character after '<!'");
2368 /* STag | EmptyElemTag */
2369 xml_unget_char(ctx);
2370 if (btell(ctx->chars))
2372 fbgrow_rewind(ctx->chars);
2373 ctx->state = XML_STATE_CHARS_BEFORE_STAG;
2374 return XML_STATE_CHARS;
2375 case XML_STATE_CHARS_BEFORE_STAG:
2376 fbgrow_reset(ctx->chars);
2379 xml_parse_stag(ctx);
2380 if (ctx->want & XML_WANT_STAG)
2381 return ctx->state = XML_STATE_STAG;
2382 case XML_STATE_STAG:
2383 if (ctx->flags & XML_FLAG_EMPTY_ELEM)
2390 if (btell(ctx->chars))
2392 fbgrow_rewind(ctx->chars);
2393 ctx->state = XML_STATE_CHARS_BEFORE_ETAG;
2394 return XML_STATE_CHARS;
2395 case XML_STATE_CHARS_BEFORE_ETAG:
2396 fbgrow_reset(ctx->chars);
2399 xml_parse_etag(ctx);
2401 if (ctx->want & XML_WANT_ETAG)
2402 return ctx->state = XML_STATE_ETAG;
2403 case XML_STATE_ETAG:
2404 xml_pop_element(ctx);
2412 TRACE(ctx, "entering epilog");
2415 /* Epilog whitespace is the only place, where a valid document can reach EOF */
2416 if (setjmp(throw_buf))
2417 if (ctx->err_code == XML_ERR_EOF)
2419 TRACE(ctx, "reached EOF");
2420 ctx->state = XML_STATE_EOF;
2421 if (ctx->h_document_end)
2422 ctx->h_document_end(ctx);
2424 return XML_STATE_EOF;
2428 xml_parse_white(ctx, 0);
2429 if (setjmp(throw_buf))
2433 xml_parse_char(ctx, '<');
2434 if ((c = xml_get_char(ctx)) == '?')
2435 /* Processing instruction */
2436 if (!(ctx->want & XML_WANT_PI))
2441 return ctx->state = XML_STATE_EPILOG_PI, XML_STATE_PI;
2442 case XML_STATE_EPILOG_PI:
2447 if (!(ctx->want & XML_WANT_COMMENT))
2448 xml_skip_comment(ctx);
2451 xml_push_comment(ctx);
2452 return ctx->state = XML_STATE_EPILOG_COMMENT, XML_STATE_COMMENT;
2453 case XML_STATE_EPILOG_COMMENT:
2454 xml_pop_comment(ctx);
2457 xml_fatal(ctx, "Syntax error in the epilog");
2467 error(struct xml_context *ctx)
2469 msg((ctx->err_code < XML_ERR_ERROR) ? L_WARN_R : L_ERROR_R, "XML %u: %s", xml_row(ctx), ctx->err_msg);
2473 test(struct fastbuf *in, struct fastbuf *out)
2475 struct xml_context ctx;
2477 ctx.h_warn = ctx.h_error = ctx.h_fatal = error;
2478 ctx.want = XML_WANT_ALL;
2479 ctx.flags |= XML_DOM_FREE;
2480 xml_set_source(&ctx, in);
2482 while ((state = xml_next(&ctx)) >= 0)
2485 case XML_STATE_CHARS:
2486 bprintf(out, "CHARS [%.*s]\n", (int)(ctx.chars->bstop - ctx.chars->buffer), ctx.chars->buffer);
2488 case XML_STATE_STAG:
2489 bprintf(out, "STAG <%s>\n", ctx.elem->name);
2490 SLIST_FOR_EACH(struct xml_attr *, a, ctx.elem->attrs)
2491 bprintf(out, " ATTR %s=[%s]\n", a->name, a->val);
2493 case XML_STATE_ETAG:
2494 bprintf(out, "ETAG </%s>\n", ctx.elem->name);
2496 case XML_STATE_COMMENT:
2497 bprintf(out, "COMMENT [%.*s]\n", (int)(ctx.value->bstop - ctx.value->buffer), ctx.value->buffer);
2500 bprintf(out, "PI [%s] [%.*s]\n", ctx.name, (int)(ctx.value->bstop - ctx.value->buffer), ctx.value->buffer);
2502 case XML_STATE_CDATA:
2503 bprintf(out, "CDATA [%.*s]\n", (int)(ctx.chars->bstop - ctx.chars->buffer), ctx.chars->buffer);
2506 bprintf(out, "EOF\n");
2517 struct fastbuf *in = bfdopen_shared(0, 1024);
2518 struct fastbuf *out = bfdopen_shared(1, 1024);