From d998b1961061c93132531d6d9cd2772a0c51ea1a Mon Sep 17 00:00:00 2001
From: Pavel Charvat <pavel.charvat@netcentrum.cz>
Date: Wed, 12 Dec 2007 01:57:04 +0100
Subject: [PATCH] XML: Split to several files, revised part of iface and     
 memory handling, added a testing utility.

---
 sherlock/xml/Makefile   |   19 +-
 sherlock/xml/common.c   |  652 ++++++++++
 sherlock/xml/common.h   |  327 +++++
 sherlock/xml/dtd.c      |  823 +++++++++++++
 sherlock/xml/dtd.h      |    6 +
 sherlock/xml/parse.c    | 1004 ++++++++++++++++
 sherlock/xml/unicat.pl  |   30 +-
 sherlock/xml/xml-test.c |  253 ++++
 sherlock/xml/xml.c      | 2524 ---------------------------------------
 sherlock/xml/xml.h      |   57 +-
 10 files changed, 3122 insertions(+), 2573 deletions(-)
 create mode 100644 sherlock/xml/common.c
 create mode 100644 sherlock/xml/common.h
 create mode 100644 sherlock/xml/dtd.c
 create mode 100644 sherlock/xml/parse.c
 create mode 100644 sherlock/xml/xml-test.c
 delete mode 100644 sherlock/xml/xml.c

diff --git a/sherlock/xml/Makefile b/sherlock/xml/Makefile
index f721b500..cc9fda50 100644
--- a/sherlock/xml/Makefile
+++ b/sherlock/xml/Makefile
@@ -2,22 +2,27 @@
 # (c) 2007 Pavel Charvat <pchar@ucw.cz>
 
 DIRS+=sherlock/xml
+PROGS+=$(o)/sherlock/xml/xml-test
 
-LIBSHXML_MODS=xml
+LIBSHXML_MODS=common parse dtd
 LIBSHXML_INCLUDES=xml.h dtd.h
 
 LIBSHXML_MOD_PATHS=$(addprefix $(o)/sherlock/xml/,$(LIBSHXML_MODS))
 
 $(o)/sherlock/xml/libshxml.a: $(addsuffix .o,$(LIBSHXML_MOD_PATHS))
 $(o)/sherlock/xml/libshxml.so: $(addsuffix .oo,$(LIBSHXML_MOD_PATHS))
-$(o)/sherlock/xml/libshxml.pc: $(LIBUCW) $(LIBCHARSET)
+$(o)/sherlock/xml/libshxml.pc: $(LIBSH) $(LIBCHARSET)
 
-$(o)/sherlock/xml/xml-t: $(LIBSHXML)
-$(o)/sherlock/xml/xml.o: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/unicat.h: $(s)/sherlock/xml/unicat.pl
-	$(M)GEN $@
-	$(Q)$< >$@
+$(o)/sherlock/xml/common.o $(o)/sherlock/xml/unicat.h: $(o)/sherlock/xml/unicat.stamp
+$(o)/sherlock/xml/unicat.stamp: $(s)/sherlock/xml/unicat.pl
+	$(M)GEN $(addprefix $(o)/sherlock/xml/unicat,.h .c)
+	$(Q)$< $(addprefix $(o)/sherlock/xml/unicat,.h .c)
+	$(Q)touch $@
 
+$(o)/sherlock/xml/xml-test: $(o)/sherlock/xml/xml-test.o $(LIBSHXML)
+
+API_LIBS+=libshxml
 API_INCLUDES+=$(o)/sherlock/xml/.include-stamp
 $(o)/sherlock/xml/.include-stamp: $(addprefix $(s)/sherlock/xml/,$(LIBSHXML_INCLUDES))
 $(o)/sherlock/xml/.include-stamp: IDST=sherlock/xml
+run/lib/pkgconfig/libshxml.pc: $(o)/sherlock/xml/libshxml.pc
diff --git a/sherlock/xml/common.c b/sherlock/xml/common.c
new file mode 100644
index 00000000..4d96cecc
--- /dev/null
+++ b/sherlock/xml/common.c
@@ -0,0 +1,652 @@
+/*
+ *	Sherlock Library -- A simple XML parser
+ *
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#define LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-unicode.h"
+#include "lib/ff-binary.h"
+#include "lib/chartype.h"
+#include "lib/unicode.h"
+#include "lib/hashfunc.h"
+#include "lib/stkstring.h"
+#include "lib/unaligned.h"
+#include "charset/charconv.h"
+#include "charset/fb-charconv.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/common.h"
+
+#include <setjmp.h>
+
+/*** Error handling ***/
+
+void NONRET
+xml_throw(struct xml_context *ctx)
+{
+  ASSERT(ctx->err_code && ctx->throw_buf);
+  longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code);
+}
+
+void
+xml_warn(struct xml_context *ctx, const char *format, ...)
+{
+  if (ctx->h_warn)
+    {
+      va_list args;
+      va_start(args, format);
+      ctx->err_msg = stk_vprintf(format, args);
+      ctx->err_code = XML_ERR_WARN;
+      va_end(args);
+      ctx->h_warn(ctx);
+      ctx->err_msg = NULL;
+      ctx->err_code = XML_ERR_OK;
+    }
+}
+
+void
+xml_error(struct xml_context *ctx, const char *format, ...)
+{
+  if (ctx->h_error)
+    {
+      va_list args;
+      va_start(args, format);
+      ctx->err_msg = stk_vprintf(format, args);
+      ctx->err_code = XML_ERR_ERROR;
+      va_end(args);
+      ctx->h_error(ctx);
+      ctx->err_msg = NULL;
+      ctx->err_code = XML_ERR_OK;
+    }
+}
+
+void NONRET
+xml_fatal(struct xml_context *ctx, const char *format, ...)
+{
+  va_list args;
+  va_start(args, format);
+  ctx->err_msg = mp_vprintf(ctx->stack, format, args);
+  ctx->err_code = XML_ERR_FATAL;
+  ctx->state = XML_STATE_FATAL;
+  va_end(args);
+  if (ctx->h_fatal)
+    ctx->h_fatal(ctx);
+  xml_throw(ctx);
+}
+
+/*** Charecter categorization ***/
+
+#include "obj/sherlock/xml/unicat.c"
+
+/*** Memory management ***/
+
+void NONRET
+xml_fatal_nested(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Entity not nested correctly");
+}
+
+void *
+xml_hash_new(struct mempool *pool, uns size)
+{
+  void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE);
+  *(void **)tab = pool;
+  return tab + XML_HASH_HDR_SIZE;
+}
+
+/*** Reading of document/external entities ***/
+
+static void NONRET
+xml_eof(struct xml_context *ctx)
+{
+  ctx->err_msg = "Unexpected EOF";
+  ctx->err_code = XML_ERR_EOF;
+  xml_throw(ctx);
+}
+
+static inline void
+xml_add_char(u32 **bstop, uns c)
+{
+  *(*bstop)++ = c;
+  *(*bstop)++ = xml_char_cat(c);
+}
+
+struct xml_source *
+xml_push_source(struct xml_context *ctx, uns flags)
+{
+  xml_push(ctx);
+  struct xml_source *src = ctx->src;
+  if (src)
+    {
+      src->bptr = ctx->bptr;
+      src->bstop = ctx->bstop;
+    }
+  src = mp_alloc_zero(ctx->stack, sizeof(*src));
+  src->next = ctx->src;
+  src->saved_depth = ctx->depth;
+  ctx->src = src;
+  ctx->flags = (ctx->flags & ~(XML_FLAG_SRC_EOF | XML_FLAG_SRC_EXPECTED_DECL | XML_FLAG_SRC_NEW_LINE | XML_FLAG_SRC_SURROUND | XML_FLAG_SRC_DOCUMENT)) | flags;
+  ctx->bstop = ctx->bptr = src->buf;
+  ctx->depth = 0;
+  if (flags & XML_FLAG_SRC_SURROUND)
+    xml_add_char(&ctx->bstop, 0x20);
+  return src;
+}
+
+static void
+xml_pop_source(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop_source");
+  if (unlikely(ctx->depth != 0))
+    xml_fatal_nested(ctx);
+  struct xml_source *src = ctx->src;
+  ASSERT(src);
+  bclose(src->fb);
+  ctx->depth = src->saved_depth;
+  ctx->src = src = src->next;
+  if (src)
+    {
+      ctx->bptr = src->bptr;
+      ctx->bstop = src->bstop;
+    }
+  xml_pop(ctx);
+  if (unlikely(!src))
+    xml_eof(ctx);
+}
+
+static void xml_refill_utf8(struct xml_context *ctx);
+
+void
+xml_push_entity(struct xml_context *ctx, struct xml_dtd_ent *ent)
+{
+  TRACE(ctx, "xml_push_entity");
+  uns cat1 = ctx->src->refill_cat1;
+  uns cat2 = ctx->src->refill_cat2;
+  struct xml_source *src = xml_push_source(ctx, 0);
+  src->refill_cat1 = cat1;
+  src->refill_cat2 = cat2;
+  if (ent->flags & XML_DTD_ENT_EXTERNAL)
+    xml_fatal(ctx, "External entities not implemented"); // FIXME
+  else
+    {
+      fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, ent->len, 0);
+      src->refill = xml_refill_utf8;
+    }
+}
+
+void
+xml_set_source(struct xml_context *ctx, struct fastbuf *fb)
+{
+  TRACE(ctx, "xml_set_source");
+  ASSERT(!ctx->src);
+  struct xml_source *src = xml_push_source(ctx, XML_FLAG_SRC_DOCUMENT | XML_FLAG_SRC_EXPECTED_DECL);
+  src->fb = fb;
+}
+
+static uns
+xml_error_restricted(struct xml_context *ctx, uns c)
+{
+  if (c == ~1U)
+    xml_error(ctx, "Corrupted encoding");
+  else
+    xml_error(ctx, "Restricted char U+%04X", c);
+  return UNI_REPLACEMENT;
+}
+
+void xml_parse_decl(struct xml_context *ctx);
+
+#define REFILL(ctx, func, params...)							\
+  struct xml_source *src = ctx->src;							\
+  struct fastbuf *fb = src->fb;								\
+  if (ctx->bptr == ctx->bstop)								\
+    ctx->bptr = ctx->bstop = src->buf;							\
+  uns f = ctx->flags, c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row;	\
+  u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop,			\
+      *last_0xd = (f & XML_FLAG_SRC_NEW_LINE) ? bstop : bend;				\
+  do											\
+    {											\
+      c = func(fb, ##params);								\
+      uns t = xml_char_cat(c);								\
+      if (t & t1)									\
+        /* Typical branch */								\
+	*bstop++ = c, *bstop++ = t;							\
+      else if (t & t2)									\
+        {										\
+	  /* New line */								\
+	  /* XML 1.0: 0xA | 0xD | 0xD 0xA */						\
+	  /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */			\
+	  if (c == 0xd)									\
+	    last_0xd = bstop + 2;							\
+	  else if (c != 0x2028 && last_0xd == bstop)					\
+	    {										\
+	      last_0xd = bend;								\
+	      continue;									\
+	    }										\
+	  xml_add_char(&bstop, 0xa), row++;						\
+	}										\
+      else if (c == '>')								\
+        {										\
+	  /* Used only in XML/TextDecl to switch the encoding */			\
+	  *bstop++ = c, *bstop++ = t;							\
+	  break;									\
+	}										\
+      else if (~c)									\
+        /* Restricted character */							\
+        xml_add_char(&bstop, xml_error_restricted(ctx, c));				\
+      else										\
+        {										\
+	  /* EOF */									\
+	  if (f & XML_FLAG_SRC_SURROUND)						\
+	    xml_add_char(&bstop, 0x20);							\
+          f |= XML_FLAG_SRC_EOF;							\
+          break;									\
+	}										\
+    }											\
+  while (bstop < bend);									\
+  ctx->flags = (last_0xd == bstop) ? f | XML_FLAG_SRC_NEW_LINE : f & ~XML_FLAG_SRC_NEW_LINE; \
+  ctx->bstop = bstop;									\
+  src->row = row;
+
+static void
+xml_refill_utf8(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf8_repl, ~1U);
+}
+
+static void
+xml_refill_utf16_le(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf16_le_repl, ~1U);
+}
+
+static void
+xml_refill_utf16_be(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf16_be_repl, ~1U);
+}
+
+#if 0
+static inline uns
+xml_refill_libcharset_bget(struct fastbuf *fb, unsigned short int *in_to_x)
+{
+  // FIXME: slow
+  int c;
+  return (unlikely(c = bgetc(fb) < 0)) ? c : (int)conv_x_to_ucs(in_to_x[c]);
+}
+
+static void
+xml_refill_libcharset(struct xml_context *ctx)
+{
+  unsigned short int *in_to_x = ctx->src->refill_in_to_x;
+  REFILL(ctx, xml_refill_libcharset_bget, in_to_x);
+}
+#endif
+
+#undef REFILL
+
+void
+xml_refill(struct xml_context *ctx)
+{
+  do
+    {
+      if (ctx->flags & XML_FLAG_SRC_EOF)
+	xml_pop_source(ctx);
+      else if (ctx->flags & XML_FLAG_SRC_EXPECTED_DECL)
+	xml_parse_decl(ctx);
+      else
+        {
+	  ctx->src->refill(ctx);
+	  TRACE(ctx, "refilled %u characters", (uns)((ctx->bstop - ctx->bptr) / 2));
+	}
+    }
+  while (ctx->bptr == ctx->bstop);
+}
+
+uns
+xml_row(struct xml_context *ctx)
+{
+  struct xml_source *src = ctx->src;
+  if (!src)
+    return 0;
+  uns row = src->row;
+  for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2)
+    if (p[-1] & src->refill_cat2)
+      row--;
+  return row + 1;
+}
+
+/*** Basic parsing ***/
+
+void NONRET
+xml_fatal_expected(struct xml_context *ctx, uns c)
+{
+  xml_fatal(ctx, "Expected '%c'", c);
+}
+
+void NONRET
+xml_fatal_expected_white(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Expected a white space");
+}
+
+void NONRET
+xml_fatal_expected_quot(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Expected a quotation mark");
+}
+
+void
+xml_parse_eq(struct xml_context *ctx)
+{
+  /* Eq ::= S? '=' S? */
+  xml_parse_white(ctx, 0);
+  xml_parse_char(ctx, '=');
+  xml_parse_white(ctx, 0);
+}
+
+/* Names and nmtokens */
+
+static char *
+xml_parse_string(struct xml_context *ctx, struct mempool *pool, uns first_cat, uns next_cat, char *err)
+{
+  char *p = mp_start_noalign(pool, 1);
+  if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
+    xml_fatal(ctx, "%s", err);
+  do
+    {
+      p = mp_spread(pool, p, 5);
+      p = utf8_32_put(p, xml_skip_char(ctx));
+    }
+  while (xml_peek_cat(ctx) & next_cat);
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+static void
+xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
+{
+  if (unlikely(!(xml_get_cat(ctx) & first_cat)))
+    xml_fatal(ctx, "%s", err);
+  while (xml_peek_cat(ctx) & next_cat)
+    xml_skip_char(ctx);
+}
+
+char *
+xml_parse_name(struct xml_context *ctx, struct mempool *pool)
+{
+  /* Name ::= NameStartChar (NameChar)* */
+  return xml_parse_string(ctx, pool,
+    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
+    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
+    "Expected a name");
+}
+
+void
+xml_skip_name(struct xml_context *ctx)
+{
+  xml_skip_string(ctx,
+    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
+    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
+    "Expected a name");
+}
+
+char *
+xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool)
+{
+  /* Nmtoken ::= (NameChar)+ */
+  uns cat = !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1;
+  return xml_parse_string(ctx, pool, cat, cat, "Expected a nmtoken");
+}
+
+/* Simple literals */
+
+char *
+xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool)
+{
+  /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
+  char *p = mp_start_noalign(pool, 1);
+  uns q = xml_parse_quote(ctx), c;
+  while ((c = xml_get_char(ctx)) != q)
+    {
+      p = mp_spread(pool, p, 5);
+      p = utf8_32_put(p, c);
+    }
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+char *
+xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool)
+{
+  /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
+  char *p = mp_start_noalign(pool, 1);
+  uns q = xml_parse_quote(ctx), c;
+  while ((c = xml_get_char(ctx)) != q)
+    {
+      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
+	xml_fatal(ctx, "Expected a pubid character");
+      p = mp_spread(pool, p, 2);
+      *p++ = c;
+    }
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+static char *
+xml_parse_encoding_name(struct xml_context *ctx)
+{
+  /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */
+  char *p = mp_start_noalign(ctx->pool, 1);
+  uns q = xml_parse_quote(ctx);
+  if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME)))
+    xml_fatal(ctx, "Invalid character in the encoding name");
+  while (1)
+    {
+      p = mp_spread(ctx->pool, p, 2);
+      *p++ = xml_last_char(ctx);
+      if (xml_get_char(ctx) == q)
+	break;
+      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME)))
+	xml_fatal(ctx, "Invalid character in the encoding name");
+    }
+  *p++ = 0;
+  return mp_end(ctx->pool, p);
+}
+
+/* Document/external entity header */
+
+static inline void
+xml_init_cats(struct xml_context *ctx, uns mask)
+{
+  if (!(ctx->flags & XML_FLAG_VERSION_1_1))
+    {
+      ctx->src->refill_cat1 = XML_CHAR_VALID_1_0 & ~XML_CHAR_NEW_LINE_1_0 & ~mask;
+      ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_0;
+    }
+  else
+    {
+      ctx->src->refill_cat1 = XML_CHAR_UNRESTRICTED_1_1 & ~XML_CHAR_NEW_LINE_1_1 & ~mask;
+      ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_1;
+    }
+}
+
+static void
+xml_init_charconv(struct xml_context *ctx, int cs)
+{
+  // FIXME: hack
+  struct xml_source *src = ctx->src;
+  TRACE(ctx, "wrapping charset %s", charset_name(cs));
+#if 0
+  struct conv_context conv;
+  conv_set_charset(&conv, cs, CONV_CHARSET_UTF8);
+  src->refill = xml_refill_libcharset;
+  src->refill_in_to_x = conv.in_to_x;
+#else
+  src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8);
+  // FIXME: memory leak
+#endif
+}
+
+void
+xml_parse_decl(struct xml_context *ctx)
+{
+  TRACE(ctx, "xml_parse_decl");
+  struct xml_source *src = ctx->src;
+  ctx->flags &= ~XML_FLAG_SRC_EXPECTED_DECL;
+
+  /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */
+  xml_init_cats(ctx, XML_CHAR_GT);
+
+  /* Initialize the supplied charset (if any) or try to guess it */
+  char *expected_encoding = src->expected_encoding ? : src->fb_encoding;
+  src->refill = xml_refill_utf8;
+  int bom = bpeekc(src->fb);
+  if (bom < 0)
+    ctx->flags |= XML_FLAG_SRC_EOF;
+  if (!src->fb_encoding)
+    {
+      if (bom == 0xfe)
+	src->refill = xml_refill_utf16_be;
+      else if (bom == 0xff)
+	src->refill = xml_refill_utf16_le;
+    }
+  else
+    {
+      int cs = find_charset_by_name(src->fb_encoding);
+      if (cs == CONV_CHARSET_UTF8)
+        {}
+      else if (cs >= 0)
+        {
+	  xml_init_charconv(ctx, cs);
+	  bom = 0;
+	}
+      else if (strcasecmp(src->fb_encoding, "UTF-16"))
+        {
+	  src->refill = xml_refill_utf16_be;
+	  if (bom == 0xff)
+	    src->refill = xml_refill_utf16_le;
+	  if (!src->expected_encoding)
+	    expected_encoding = (bom == 0xff) ? "UTF-16LE" : "UTF-16BE";
+	}
+      else if (strcasecmp(src->fb_encoding, "UTF-16BE"))
+	src->refill = xml_refill_utf16_be;
+      else if (strcasecmp(src->fb_encoding, "UTF-16LE"))
+	src->refill = xml_refill_utf16_le;
+      else
+        {
+	  xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding);
+	  expected_encoding = NULL;
+	}
+    }
+  uns utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be;
+  if (bom > 0 && xml_peek_char(ctx) == 0xfeff)
+    xml_skip_char(ctx);
+  else if (utf16)
+    xml_error(ctx, "Missing or corrupted BOM");
+
+  /* Look ahead for presence of XMLDecl or optional TextDecl */
+  if (!(ctx->flags & XML_FLAG_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf))
+    xml_refill(ctx);
+  uns doc = ctx->flags & XML_FLAG_SRC_DOCUMENT;
+  u32 *bptr = ctx->bptr;
+  uns have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) &&
+    bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L');
+  if (!have_decl)
+    {
+      if (doc)
+        xml_fatal(ctx, "Missing or corrupted XML header");
+      else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16)
+	xml_error(ctx, "Missing or corrupted entity header");
+      goto exit;
+    }
+  ctx->bptr = bptr + 12;
+  xml_parse_white(ctx, 0);
+
+  /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */
+  if (xml_peek_char(ctx) == 'v')
+    {
+      xml_parse_seq(ctx, "version");
+      xml_parse_eq(ctx);
+      char *version = xml_parse_pubid_literal(ctx, ctx->pool);
+      TRACE(ctx, "version=%s", version);
+      uns v = 0;
+      if (!strcmp(version, "1.1"))
+	v = XML_FLAG_VERSION_1_1;
+      else if (strcmp(version, "1.0"))
+        {
+	  xml_error(ctx, "Unknown XML version string '%s'", version);
+	  version = "1.0";
+	}
+      if (doc)
+        {
+	  ctx->version_str = version;
+	  ctx->flags |= v;
+	}
+      else if (v > (ctx->flags & XML_FLAG_VERSION_1_1))
+        xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document");
+      if (!xml_parse_white(ctx, !doc))
+        goto end;
+    }
+  else if (doc)
+    {
+      xml_error(ctx, "Expected XML version");
+      ctx->version_str = "1.0";
+    }
+
+  /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */
+  if (xml_peek_char(ctx) == 'e')
+    {
+      xml_parse_seq(ctx, "encoding");
+      xml_parse_eq(ctx);
+      src->decl_encoding = xml_parse_encoding_name(ctx);
+      TRACE(ctx, "encoding=%s", src->decl_encoding);
+      if (!xml_parse_white(ctx, 0))
+	goto end;
+    }
+  else if (!doc)
+    xml_error(ctx, "Expected XML encoding");
+
+  /* Parse whether the document is standalone (optional in XMLDecl) */
+  if (doc && xml_peek_char(ctx) == 's')
+    {
+      xml_parse_seq(ctx, "standalone");
+      xml_parse_eq(ctx);
+      uns c = xml_parse_quote(ctx);
+      if (ctx->standalone = (xml_peek_char(ctx) == 'y'))
+	xml_parse_seq(ctx, "yes");
+      else
+        xml_parse_seq(ctx, "no");
+      xml_parse_char(ctx, c);
+      TRACE(ctx, "standalone=%d", ctx->standalone);
+      xml_parse_white(ctx, 0);
+    }
+end:
+  xml_parse_seq(ctx, "?>");
+
+  /* Switch to the final encoding */
+  if (src->decl_encoding)
+    {
+      int cs = find_charset_by_name(src->decl_encoding);
+      if (cs < 0 && !expected_encoding)
+	xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding);
+      else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8)
+	xml_init_charconv(ctx, cs);
+      else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 ||
+	!(!strcasecmp(src->decl_encoding, "UTF-16") ||
+	 (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) ||
+	 (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE")))))
+	xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding);
+    }
+
+exit:
+  /* Update valid Unicode ranges */
+  xml_init_cats(ctx, 0);
+}
diff --git a/sherlock/xml/common.h b/sherlock/xml/common.h
new file mode 100644
index 00000000..ed18e8af
--- /dev/null
+++ b/sherlock/xml/common.h
@@ -0,0 +1,327 @@
+/*
+ *	Sherlock Library -- A simple XML parser
+ *
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_XML_COMMON_H
+#define _SHERLOCK_XML_COMMON_H
+
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+
+/*** Debugging ***/
+
+#ifdef LOCAL_DEBUG
+#define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
+#else
+#define TRACE(c, f, p...) do {} while(0)
+#endif
+
+/*** Error handling ***/
+
+void NONRET xml_throw(struct xml_context *ctx);
+void xml_warn(struct xml_context *ctx, const char *format, ...);
+void xml_error(struct xml_context *ctx, const char *format, ...);
+void xml_fatal(struct xml_context *ctx, const char *format, ...);
+
+/*** Charecter categorization ***/
+
+#include "obj/sherlock/xml/unicat.h"
+
+static inline uns
+xml_char_cat(uns c)
+{
+  if (c < 0x10000)
+    return 1U << xml_char_tab1[(c & 0xff) + xml_char_tab2[c >> 8]];
+  else if (likely(c < 0x110000))
+    return 1U << xml_char_tab3[c >> 16];
+  else
+    return 1;
+}
+
+static inline uns
+xml_ascii_cat(uns c)
+{
+  return xml_char_tab1[c];
+}
+
+/*** Memory management ***/
+
+void NONRET xml_fatal_nested(struct xml_context *ctx);
+
+static inline void
+xml_inc(struct xml_context *ctx)
+{
+  /* Called after the first character of a block */
+  TRACE(ctx, "inc");
+  ctx->depth++;
+}
+
+static inline void
+xml_dec(struct xml_context *ctx)
+{
+  /* Called after the last character of a block */
+  TRACE(ctx, "dec");
+  if (unlikely(!ctx->depth--))
+    xml_fatal_nested(ctx);
+}
+
+struct xml_stack {
+  struct xml_stack *next;
+  struct mempool_state state;
+  uns flags;
+};
+
+static inline void *
+xml_do_push(struct xml_context *ctx, uns size)
+{
+  /* Saves ctx->stack and ctx->flags state */
+  struct mempool_state state;
+  mp_save(ctx->stack, &state);
+  struct xml_stack *s = mp_alloc(ctx->stack, size);
+  s->state = state;
+  s->flags = ctx->flags;
+  s->next = ctx->stack_list;
+  ctx->stack_list = s;
+  return s;
+}
+
+static inline void
+xml_do_pop(struct xml_context *ctx, struct xml_stack *s)
+{
+  /* Restore ctx->stack and ctx->flags state */
+  ctx->stack_list = s->next;
+  ctx->flags = s->flags;
+  mp_restore(ctx->stack, &s->state);
+}
+
+static inline void
+xml_push(struct xml_context *ctx)
+{
+  TRACE(ctx, "push");
+  xml_do_push(ctx, sizeof(struct xml_stack));
+}
+
+static inline void
+xml_pop(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop");
+  ASSERT(ctx->stack_list);
+  xml_do_pop(ctx, ctx->stack_list);
+}
+
+struct xml_dom_stack {
+  struct xml_stack stack;
+  struct mempool_state state;
+};
+
+static inline struct xml_node *
+xml_push_dom(struct xml_context *ctx)
+{
+  /* Create a new DOM node */
+  TRACE(ctx, "push_dom");
+  struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s));
+  mp_save(ctx->pool, &s->state);
+  struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n));
+  if (n->parent = ctx->node)
+    clist_add_tail(&n->parent->sons, &n->n);
+  return ctx->node = n;
+}
+
+static inline void
+xml_pop_dom(struct xml_context *ctx)
+{
+  /* Leave DOM subtree */
+  TRACE(ctx, "pop_dom");
+  ASSERT(ctx->node);
+  struct xml_node *p = ctx->node->parent;
+  struct xml_dom_stack *s = (void *)ctx->stack_list;
+  if (ctx->flags & XML_DOM_FREE)
+    {
+      /* See xml_pop_element() for cleanup of attribute hash table */
+      if (p)
+        clist_remove(&ctx->node->n);
+      mp_restore(ctx->pool, &s->state);
+    }
+  ctx->node = p;
+  xml_do_pop(ctx, &s->stack);
+}
+
+#define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
+#define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
+  static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
+  { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
+  static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
+
+void *xml_hash_new(struct mempool *pool, uns size);
+
+static inline void
+xml_start_chars(struct xml_context *ctx)
+{
+  struct fastbuf *fb = &ctx->chars;
+  fb->bstop = fb->bptr = fb->buffer = mp_start_noalign(ctx->pool, 1);
+  fb->bufend = fb->buffer + mp_avail(ctx->pool);
+}
+
+static inline char *
+xml_end_chars(struct xml_context *ctx, uns *len)
+{
+  struct fastbuf *fb = &ctx->chars;
+  uns l = fb->bufend - fb->buffer;
+  if (fb->bptr == fb->bufend)
+    fb->bptr = mp_expand(ctx->pool) + l;
+  *fb->bptr = 0;
+  char *c = mp_end(ctx->pool, fb->bptr + 1);
+  fb->bptr = fb->bstop = fb->buffer = fb->bufend = NULL;
+  *len = l;
+  return c;
+}
+
+/*** Reading of document/external entities ***/
+
+struct xml_source *xml_push_source(struct xml_context *ctx, uns flags);
+void xml_push_entity(struct xml_context *ctx, struct xml_dtd_ent *ent);
+
+void xml_refill(struct xml_context *ctx);
+
+static inline uns
+xml_peek_char(struct xml_context *ctx)
+{
+  if (ctx->bptr == ctx->bstop)
+    xml_refill(ctx);
+  return ctx->bptr[0];
+}
+
+static inline uns
+xml_peek_cat(struct xml_context *ctx)
+{
+  if (ctx->bptr == ctx->bstop)
+    xml_refill(ctx);
+  return ctx->bptr[1];
+}
+
+static inline uns
+xml_get_char(struct xml_context *ctx)
+{
+  uns c = xml_peek_char(ctx);
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_get_cat(struct xml_context *ctx)
+{
+  uns c = xml_peek_cat(ctx);
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_last_char(struct xml_context *ctx)
+{
+  return ctx->bptr[-2];
+}
+
+static inline uns
+xml_last_cat(struct xml_context *ctx)
+{
+  return ctx->bptr[-1];
+}
+
+static inline uns
+xml_skip_char(struct xml_context *ctx)
+{
+  uns c = ctx->bptr[0];
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_unget_char(struct xml_context *ctx)
+{
+  return *(ctx->bptr -= 2);
+}
+
+/*** Basic parsing ***/
+
+void NONRET xml_fatal_expected(struct xml_context *ctx, uns c);
+void NONRET xml_fatal_expected_white(struct xml_context *ctx);
+void NONRET xml_fatal_expected_quot(struct xml_context *ctx);
+
+static inline uns
+xml_parse_white(struct xml_context *ctx, uns mandatory)
+{
+  /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
+   * mandatory=0 -> S? */
+  uns cnt = 0;
+  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+    {
+      xml_skip_char(ctx);
+      cnt++;
+    }
+  if (unlikely(mandatory && !cnt))
+    xml_fatal_expected_white(ctx);
+  return cnt;
+}
+
+static inline void
+xml_parse_char(struct xml_context *ctx, uns c)
+{
+  /* Consumes a given Unicode character */
+  if (unlikely(c != xml_get_char(ctx)))
+    xml_fatal_expected(ctx, c);
+}
+
+static inline void
+xml_parse_seq(struct xml_context *ctx, const char *seq)
+{
+  /* Consumes a given sequence of ASCII characters */
+  while (*seq)
+    xml_parse_char(ctx, *seq++);
+}
+
+void xml_parse_eq(struct xml_context *ctx);
+
+static inline uns
+xml_parse_quote(struct xml_context *ctx)
+{
+  /* "'" | '"' */
+  uns c = xml_get_char(ctx);
+  if (unlikely(c != '\'' && c != '\"'))
+    xml_fatal_expected_quot(ctx);
+  return c;
+}
+
+/* Names and nmtokens */
+
+char *xml_parse_name(struct xml_context *ctx, struct mempool *pool);
+void xml_skip_name(struct xml_context *ctx);
+char *xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool);
+
+/* Simple literals */
+
+char *xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool);
+char *xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool);
+
+/* Parsing */
+
+uns xml_parse_char_ref(struct xml_context *ctx);
+void xml_parse_ref(struct xml_context *ctx);
+void xml_parse_pe_ref(struct xml_context *ctx);
+char *xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr);
+void xml_parse_notation_decl(struct xml_context *ctx);
+void xml_parse_entity_decl(struct xml_context *ctx);
+void xml_parse_element_decl(struct xml_context *ctx);
+void xml_parse_attr_list_decl(struct xml_context *ctx);
+void xml_push_comment(struct xml_context *ctx);
+void xml_pop_comment(struct xml_context *ctx);
+void xml_skip_comment(struct xml_context *ctx);
+void xml_push_pi(struct xml_context *ctx);
+void xml_pop_pi(struct xml_context *ctx);
+void xml_skip_pi(struct xml_context *ctx);
+
+#endif
diff --git a/sherlock/xml/dtd.c b/sherlock/xml/dtd.c
new file mode 100644
index 00000000..07f030a4
--- /dev/null
+++ b/sherlock/xml/dtd.c
@@ -0,0 +1,823 @@
+/*
+ *	Sherlock Library -- A simple XML parser
+ *
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/common.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-unicode.h"
+
+/* Notations */
+
+#define HASH_PREFIX(x) xml_dtd_notns_##x
+#define HASH_NODE struct xml_dtd_notn
+#define HASH_KEY_STRING name
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* General entities */
+
+#define HASH_PREFIX(x) xml_dtd_ents_##x
+#define HASH_NODE struct xml_dtd_ent
+#define HASH_KEY_STRING name
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+static struct xml_dtd_ent *
+xml_dtd_declare_trivial_gent(struct xml_context *ctx, char *name, char *text)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  struct xml_dtd_ent *ent = xml_dtd_ents_lookup(dtd->tab_gents, name);
+  if (ent->flags & XML_DTD_ENT_DECLARED)
+    {
+      xml_warn(ctx, "Entity &%s; already declared", name);
+      return NULL;
+    }
+  slist_add_tail(&dtd->gents, &ent->n);
+  ent->flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL;
+  ent->text = text;
+  ent->len = strlen(text);
+  return ent;
+}
+
+static void
+xml_dtd_declare_default_gents(struct xml_context *ctx)
+{
+  xml_dtd_declare_trivial_gent(ctx, "lt", "<");
+  xml_dtd_declare_trivial_gent(ctx, "gt", ">");
+  xml_dtd_declare_trivial_gent(ctx, "amp", "&");
+  xml_dtd_declare_trivial_gent(ctx, "apos", "'");
+  xml_dtd_declare_trivial_gent(ctx, "quot", "\"");
+}
+
+struct xml_dtd_ent *
+xml_dtd_find_gent(struct xml_context *ctx, char *name)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  if (dtd)
+    {
+      struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_gents, name);
+      return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
+    }
+  else
+    {
+#define ENT(n, t) ent_##n = { .name = #n, .text = t, .len = 1, .flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL }
+      static struct xml_dtd_ent ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
+#undef ENT
+      switch (name[0])
+        {
+	  case 'l':
+	    if (!strcmp(name, "lt"))
+	      return &ent_lt;
+	    break;
+	  case 'g':
+	    if (!strcmp(name, "gt"))
+	      return &ent_gt;
+	    break;
+	  case 'a':
+	    if (!strcmp(name, "amp"))
+	      return &ent_amp;
+	    if (!strcmp(name, "apos"))
+	      return &ent_apos;
+	    break;
+	  case 'q':
+	    if (!strcmp(name, "quot"))
+	      return &ent_quot;
+	    break;
+	}
+      return NULL;
+    }
+}
+
+/* Parameter entities */
+
+static struct xml_dtd_ent *
+xml_dtd_find_pent(struct xml_context *ctx, char *name)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_pents, name);
+  return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
+}
+
+/* Elements */
+
+#define HASH_PREFIX(x) xml_dtd_elems_##x
+#define HASH_NODE struct xml_dtd_elem
+#define HASH_KEY_STRING name
+#define HASH_TABLE_DYNAMIC
+#define HASH_ZERO_FILL
+#define HASH_WANT_LOOKUP
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* Element sons */
+
+struct xml_dtd_enodes_table;
+
+static inline uns
+xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
+{
+  return hash_pointer(parent) ^ hash_pointer(elem);
+}
+
+static inline int
+xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
+{
+  return (parent1 == parent2) && (elem1 == elem2);
+}
+
+static inline void
+xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
+{
+  node->parent = parent;
+  node->elem = elem;
+}
+
+#define HASH_PREFIX(x) xml_dtd_enodes_##x
+#define HASH_NODE struct xml_dtd_elem_node
+#define HASH_KEY_COMPLEX(x) x parent, x elem
+#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_TABLE_DYNAMIC
+#define HASH_ZERO_FILL
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* Element attributes */
+
+struct xml_dtd_attrs_table;
+
+static inline uns
+xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
+{
+  return hash_pointer(elem) ^ hash_string(name);
+}
+
+static inline int
+xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
+{
+  return (elem1 == elem2) && !strcmp(name1, name2);
+}
+
+static inline void
+xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
+{
+  attr->elem = elem;
+  attr->name = name;
+}
+
+#define HASH_PREFIX(x) xml_dtd_attrs_##x
+#define HASH_NODE struct xml_dtd_attr
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x elem, x name
+#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* Enumerated attribute values */
+
+struct xml_dtd_evals_table;
+
+static inline uns
+xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
+{
+  return hash_pointer(attr) ^ hash_string(val);
+}
+
+static inline int
+xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
+{
+  return (attr1 == attr2) && !strcmp(val1, val2);
+}
+
+static inline void
+xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
+{
+  eval->attr = attr;
+  eval->val = val;
+}
+
+#define HASH_PREFIX(x) xml_dtd_evals_##x
+#define HASH_NODE struct xml_dtd_eval
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x attr, x val
+#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* Enumerated attribute notations */
+
+struct xml_dtd_enotns_table;
+
+static inline uns
+xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
+{
+  return hash_pointer(attr) ^ hash_pointer(notn);
+}
+
+static inline int
+xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
+{
+  return (attr1 == attr2) && (notn1 == notn2);
+}
+
+static inline void
+xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
+{
+  enotn->attr = attr;
+  enotn->notn = notn;
+}
+
+#define HASH_PREFIX(x) xml_dtd_enotns_##x
+#define HASH_NODE struct xml_dtd_enotn
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x attr, x notn
+#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+/* DTD initialization/cleanup */
+
+void
+xml_dtd_init(struct xml_context *ctx)
+{
+  if (ctx->dtd)
+    return;
+  struct mempool *pool = mp_new(4096);
+  struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
+  dtd->pool = pool;
+  xml_dtd_ents_init(dtd->tab_gents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
+  xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
+  xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
+  xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
+  xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
+  xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
+  xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
+  xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
+  xml_dtd_declare_default_gents(ctx);
+}
+
+void
+xml_dtd_cleanup(struct xml_context *ctx)
+{
+  if (!ctx->dtd)
+    return;
+  mp_delete(ctx->dtd->pool);
+  ctx->dtd = NULL;
+}
+
+void
+xml_dtd_finish(struct xml_context *ctx)
+{
+  if (!ctx->dtd)
+    return;
+  // FIXME: validity checks
+}
+
+/*** Parsing functions ***/
+
+/* References to parameter entities */
+
+void
+xml_parse_pe_ref(struct xml_context *ctx)
+{
+  /* PEReference ::= '%' Name ';'
+   * Already parsed: '%' */
+  struct mempool_state state;
+  mp_save(ctx->stack, &state);
+  char *name = xml_parse_name(ctx, ctx->stack);
+  xml_parse_char(ctx, ';');
+  struct xml_dtd_ent *ent = xml_dtd_find_pent(ctx, name);
+  if (!ent)
+    xml_error(ctx, "Unknown entity %%%s;", name);
+  else
+    {
+      TRACE(ctx, "Pushed entity %%%s;", name);
+      mp_restore(ctx->stack, &state);
+      xml_dec(ctx);
+      xml_push_entity(ctx, ent);
+      return;
+    }
+  mp_restore(ctx->stack, &state);
+  xml_dec(ctx);
+}
+
+static void
+xml_parse_dtd_pe(struct xml_context *ctx)
+{
+  do
+    {
+      xml_skip_char(ctx);
+      xml_inc(ctx);
+      while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+	xml_skip_char(ctx);
+      xml_parse_pe_ref(ctx);
+    }
+  while (xml_peek_char(ctx) != '%');
+}
+
+static inline uns
+xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
+{
+  /* Whitespace or parameter entity */
+  uns cnt = 0;
+  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+    {
+      xml_skip_char(ctx);
+      cnt = 1;
+    }
+  if (xml_peek_char(ctx) == '%')
+    {
+      xml_parse_dtd_pe(ctx);
+      return 1;
+    }
+  else if (unlikely(mandatory && !cnt))
+    xml_fatal_expected_white(ctx);
+  return cnt;
+}
+
+static void
+xml_dtd_parse_external_id(struct xml_context *ctx, struct xml_ext_id *eid, uns allow_public)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  bzero(eid, sizeof(*eid));
+  uns c = xml_peek_char(ctx);
+  if (c == 'S')
+    {
+      xml_parse_seq(ctx, "SYSTEM");
+      xml_parse_dtd_white(ctx, 1);
+      eid->system_id = xml_parse_system_literal(ctx, dtd->pool);
+    }
+  else if (c == 'P')
+    {
+      xml_parse_seq(ctx, "PUBLIC");
+      xml_parse_dtd_white(ctx, 1);
+      eid->public_id = xml_parse_pubid_literal(ctx, dtd->pool);
+      if (xml_parse_dtd_white(ctx, 0)) // FIXME
+	if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
+	  eid->system_id = xml_parse_system_literal(ctx, dtd->pool);
+    }
+  else
+    xml_fatal(ctx, "Expected an external ID");
+}
+
+/* DTD: <!NOTATION ...> */
+
+void
+xml_parse_notation_decl(struct xml_context *ctx)
+{
+  /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
+   * Already parsed: '<!NOTATION' */
+  TRACE(ctx, "parse_notation_decl");
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+
+  struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+  xml_parse_dtd_white(ctx, 1);
+  struct xml_ext_id eid;
+  xml_dtd_parse_external_id(ctx, &eid, 1);
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+
+  if (notn->flags & XML_DTD_NOTN_DECLARED)
+    xml_warn(ctx, "Notation %s already declared", notn->name);
+  else
+    {
+      notn->flags = XML_DTD_NOTN_DECLARED;
+      notn->eid = eid;
+      slist_add_tail(&dtd->notns, &notn->n);
+    }
+  xml_dec(ctx);
+}
+
+/* DTD: <!ENTITY ...> */
+
+void
+xml_parse_entity_decl(struct xml_context *ctx)
+{
+  /* Already parsed: '<!ENTITY' */
+  TRACE(ctx, "parse_entity_decl");
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+
+  uns flags = (xml_get_char(ctx) == '%') ? XML_DTD_ENT_PARAMETER : 0;
+  if (flags)
+    xml_parse_dtd_white(ctx, 1);
+  else
+    xml_unget_char(ctx);
+
+  struct xml_dtd_ent *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_gents, xml_parse_name(ctx, dtd->pool));
+  slist *list = flags ? &dtd->pents : &dtd->gents;
+  xml_parse_dtd_white(ctx, 1);
+  if (ent->flags & XML_DTD_ENT_DECLARED)
+    {
+       xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
+       // FIXME: should be only warning
+    }
+
+  uns c, sep = xml_get_char(ctx);
+  if (sep == '\'' || sep == '"')
+    {
+      /* Internal entity:
+       * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
+      char *p = mp_start_noalign(dtd->pool, 1);
+      while (1)
+        {
+	  if ((c = xml_get_char(ctx)) == sep)
+	    break;
+	  if (c == '%')
+	    {
+	      // FIXME
+	      ASSERT(0);
+	      //xml_parse_parameter_ref(ctx);
+	      continue;
+	    }
+	  if (c == '&')
+	    {
+	      xml_inc(ctx);
+	      if (xml_peek_char(ctx) != '#')
+	        {
+	          /* Bypass references to general entities */
+	          struct mempool_state state;
+	          mp_save(ctx->stack, &state);
+	          char *n = xml_parse_name(ctx, ctx->stack);
+	          xml_parse_char(ctx, ';');
+		  xml_dec(ctx);
+		  uns l = strlen(n);
+		  p = mp_spread(dtd->pool, p, 3 + l);
+		  *p++ = '&';
+		  memcpy(p, n, l);
+		  *p++ = ';';;
+		  mp_restore(ctx->stack, &state);
+		  continue;
+	        }
+	      else
+	        {
+		  xml_skip_char(ctx);
+	          c = xml_parse_char_ref(ctx);
+		}
+	    }
+	  p = mp_spread(dtd->pool, p, 5);
+	  p = utf8_32_put(p, c);
+	}
+      *p = 0;
+      ent->len = p - (char *)mp_ptr(dtd->pool);
+      ent->text = mp_end(dtd->pool, p + 1);
+      slist_add_tail(list, &ent->n);
+      ent->flags = flags | XML_DTD_ENT_DECLARED;
+    }
+  else
+    {
+      /* External entity */
+      struct xml_ext_id eid;
+      struct xml_dtd_notn *notn = NULL;
+      xml_dtd_parse_external_id(ctx, &eid, 0);
+      if (!xml_parse_dtd_white(ctx, 0) || !flags)
+	xml_parse_char(ctx, '>');
+      else if (xml_get_char(ctx) != '>')
+        {
+	  /* General external unparsed entity */
+	  flags |= XML_DTD_ENT_UNPARSED;
+	  xml_parse_seq(ctx, "NDATA");
+	  xml_parse_dtd_white(ctx, 1);
+	  notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+	}
+      slist_add_tail(list, &ent->n);
+      ent->flags = flags | XML_DTD_ENT_DECLARED | XML_DTD_ENT_EXTERNAL;
+      ent->eid = eid;
+      ent->notn = notn;
+    }
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+/* DTD: <!ELEMENT ...> */
+
+void
+xml_parse_element_decl(struct xml_context *ctx)
+{
+  /* Elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
+   * Already parsed: '<!ELEMENT' */
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+  char *name = xml_parse_name(ctx, dtd->pool);
+  xml_parse_dtd_white(ctx, 1);
+  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
+  if (elem->flags & XML_DTD_ELEM_DECLARED)
+    xml_fatal(ctx, "Element <%s> already declared", name);
+
+  /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
+  uns c = xml_peek_char(ctx);
+  if (c == 'E')
+    {
+      xml_parse_seq(ctx, "EMPTY");
+      elem->type = XML_DTD_ELEM_EMPTY;
+    }
+  else if (c == 'A')
+    {
+      xml_parse_seq(ctx, "ANY");
+      elem->type = XML_DTD_ELEM_ANY;
+    }
+  else if (c == '(')
+    {
+      xml_skip_char(ctx);
+      xml_inc(ctx);
+      xml_parse_dtd_white(ctx, 0);
+      struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
+      if (xml_peek_char(ctx) == '#')
+        {
+	  /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
+	  xml_skip_char(ctx);
+	  xml_parse_seq(ctx, "PCDATA");
+	  elem->type = XML_DTD_ELEM_MIXED;
+          parent->type = XML_DTD_ELEM_PCDATA;
+	  while (1)
+	    {
+	      xml_parse_dtd_white(ctx, 0);
+	      if ((c = xml_get_char(ctx)) == ')')
+		break;
+	      else if (c != '|')
+		xml_fatal_expected(ctx, ')');
+	      xml_parse_dtd_white(ctx, 0);
+	      struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+	      if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
+		xml_error(ctx, "Duplicate content '%s'", son_elem->name);
+	      else
+	        {
+		  struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
+		  slist_add_tail(&parent->sons, &son->n);
+		}
+	    }
+	  xml_dec(ctx);
+	  if (xml_peek_char(ctx) == '*')
+	    {
+	      xml_skip_char(ctx);
+	      parent->occur = XML_DTD_ELEM_OCCUR_MULT;
+	    }
+	  else if (!slist_head(&parent->sons))
+	    parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
+	  else
+	    xml_fatal_expected(ctx, '*');
+	}
+      else
+        {
+	  /* children ::= (choice | seq) ('?' | '*' | '+')?
+	   * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
+	   * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
+	   * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
+
+	  elem->type = XML_DTD_ELEM_CHILDREN;
+	  parent->type = XML_DTD_ELEM_PCDATA;
+	  uns c;
+	  goto first;
+
+	  while (1)
+	    {
+	      /* After name */
+	      xml_parse_dtd_white(ctx, 0);
+	      if ((c = xml_get_char(ctx)) ==  ')')
+	        {
+		  xml_dec(ctx);
+		  if (parent->type == XML_DTD_ELEM_PCDATA)
+		    parent->type = XML_DTD_ELEM_SEQ;
+		  if ((c = xml_get_char(ctx)) == '?')
+		    parent->occur = XML_DTD_ELEM_OCCUR_OPT;
+		  else if (c == '*')
+		    parent->occur = XML_DTD_ELEM_OCCUR_MULT;
+		  else if (c == '+')
+		    parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
+		  else
+		    {
+		      xml_unget_char(ctx);
+		      parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
+		    }
+		  if (!parent->parent)
+		    break;
+		  parent = parent->parent;
+		  continue;
+		}
+	      else if (c == '|')
+	        {
+		  if (parent->type == XML_DTD_ELEM_PCDATA)
+		    parent->type = XML_DTD_ELEM_OR;
+		  else if (parent->type != XML_DTD_ELEM_OR)
+		    xml_fatal(ctx, "Mixed operators in the list of element children");
+		}
+	      else if (c == ',')
+	        {
+		  if (parent->type == XML_DTD_ELEM_PCDATA)
+		    parent->type = XML_DTD_ELEM_SEQ;
+		  else if (parent->type != XML_DTD_ELEM_SEQ)
+		    xml_fatal(ctx, "Mixed operators in the list of element children");
+		}
+	      else if (c == '(')
+	        {
+		  xml_inc(ctx);
+		  struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
+		  son->parent = parent;
+		  slist_add_tail(&parent->sons, &son->n);
+		  parent = son->parent;
+		  son->type = XML_DTD_ELEM_MIXED;
+		}
+	      else
+	        xml_unget_char(ctx);
+
+	      /* Before name */
+	      xml_parse_dtd_white(ctx, 0);
+first:;
+	      struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+	      // FIXME: duplicates, occurance
+	      //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
+	      struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
+	      son->parent = parent;
+	      son->elem = son_elem;
+	      slist_add_tail(&parent->sons, &son->n);
+	    }
+	}
+    }
+  else
+    xml_fatal(ctx, "Expected element content specification");
+
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+void
+xml_parse_attr_list_decl(struct xml_context *ctx)
+{
+  /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+   * AttDef ::= S Name S AttType S DefaultDecl
+   * Already parsed: '<!ATTLIST' */
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+
+  while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
+    {
+      char *name = xml_parse_name(ctx, dtd->pool);
+      struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name);
+      uns ignored = 0;
+      if (attr)
+        {
+	  xml_warn(ctx, "Duplicate attribute definition");
+	  ignored++;
+	}
+      else
+	attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
+      xml_parse_dtd_white(ctx, 1);
+      if (xml_peek_char(ctx) == '(')
+        {
+	  xml_skip_char(ctx); // FIXME: xml_inc/dec ?
+	  if (!ignored)
+	    attr->type = XML_ATTR_ENUM;
+	  do
+	    {
+	      xml_parse_dtd_white(ctx, 0);
+	      char *value = xml_parse_nmtoken(ctx, dtd->pool);
+	      if (!ignored)
+		if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
+		  xml_error(ctx, "Duplicate enumeration value");
+	        else
+		  xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
+	      xml_parse_dtd_white(ctx, 0);
+	    }
+	  while (xml_get_char(ctx) == '|');
+	  xml_unget_char(ctx);
+	  xml_parse_char(ctx, ')');
+	}
+      else
+        {
+	  char *type = xml_parse_name(ctx, dtd->pool);
+	  enum xml_dtd_attribute_type t = XML_ATTR_CDATA;
+	  if (!strcmp(type, "CDATA"))
+	    t = XML_ATTR_CDATA;
+	  else if (!strcmp(type, "ID"))
+	    t = XML_ATTR_ID;
+	  else if (!strcmp(type, "IDREF"))
+	    t = XML_ATTR_IDREF;
+	  else if (!strcmp(type, "IDREFS"))
+	    t = XML_ATTR_IDREFS;
+	  else if (!strcmp(type, "ENTITY"))
+	    t = XML_ATTR_ENTITY;
+	  else if (!strcmp(type, "ENTITIES"))
+	    t = XML_ATTR_ENTITIES;
+	  else if (!strcmp(type, "NMTOKEN"))
+	    t = XML_ATTR_NMTOKEN;
+	  else if (!strcmp(type, "NMTOKENS"))
+	    t = XML_ATTR_NMTOKENS;
+	  else if (!strcmp(type, "NOTATION"))
+	    {
+	      if (elem->type == XML_DTD_ELEM_EMPTY)
+		xml_fatal(ctx, "Empty element must not have notation attribute");
+	      // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
+	      t = XML_ATTR_NOTATION;
+	      xml_parse_dtd_white(ctx, 1);
+	      xml_parse_char(ctx, '(');
+	      do
+	        {
+		  xml_parse_dtd_white(ctx, 0);
+		  struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+		  if (!ignored)
+		    if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
+		      xml_error(ctx, "Duplicate enumerated notation");
+		    else
+		      xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
+		  xml_parse_dtd_white(ctx, 0);
+		}
+	      while (xml_get_char(ctx) == '|');
+	      xml_unget_char(ctx);
+	      xml_parse_char(ctx, ')');
+	    }
+	  else
+	    xml_fatal(ctx, "Unknown attribute type");
+	  if (!ignored)
+	    attr->type = t;
+	}
+      xml_parse_dtd_white(ctx, 1);
+      enum xml_dtd_attribute_default def = XML_ATTR_NONE;
+      if (xml_get_char(ctx) == '#')
+	switch (xml_peek_char(ctx))
+          {
+	    case 'R':
+	      xml_parse_seq(ctx, "REQUIRED");
+	      def = XML_ATTR_REQUIRED;
+	      break;
+	    case 'I':
+	      xml_parse_seq(ctx, "IMPLIED");
+	      def = XML_ATTR_IMPLIED;
+	      break;
+	    case 'F':
+	      xml_parse_seq(ctx, "FIXED");
+	      def = XML_ATTR_FIXED;
+	      xml_parse_dtd_white(ctx, 1);
+	      break;
+	    default:
+	      xml_fatal(ctx, "Expected a modifier for default attribute value");
+	  }
+      else
+	xml_unget_char(ctx);
+      if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
+        {
+	  char *v = xml_parse_attr_value(ctx, attr);
+	  if (!ignored)
+	    attr->default_value = v;
+	}
+      if (!ignored)
+	attr->default_mode = def;
+    }
+  xml_skip_char(ctx);
+  xml_dec(ctx);
+}
diff --git a/sherlock/xml/dtd.h b/sherlock/xml/dtd.h
index bf95b872..9b4c6d98 100644
--- a/sherlock/xml/dtd.h
+++ b/sherlock/xml/dtd.h
@@ -62,6 +62,8 @@ struct xml_dtd_ent {
   struct xml_dtd_notn *notn;		/* Notation (XML_DTD_ENT_UNPARSED only) */
 };
 
+struct xml_dtd_ent *xml_dtd_find_gent(struct xml_context *ctx, char *name);
+
 /* Elements */
 
 enum xml_dtd_elem_flags {
@@ -145,4 +147,8 @@ struct xml_dtd_enotn {
   struct xml_dtd_notn *notn;
 };
 
+void xml_dtd_init(struct xml_context *ctx);
+void xml_dtd_cleanup(struct xml_context *ctx);
+void xml_dtd_finish(struct xml_context *ctx);
+
 #endif
diff --git a/sherlock/xml/parse.c b/sherlock/xml/parse.c
new file mode 100644
index 00000000..6f2e7e00
--- /dev/null
+++ b/sherlock/xml/parse.c
@@ -0,0 +1,1004 @@
+/*
+ *	Sherlock Library -- A simple XML parser
+ *
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/common.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-unicode.h"
+#include "lib/unicode.h"
+#include "lib/chartype.h"
+#include "lib/hashfunc.h"
+
+#include <setjmp.h>
+
+/*** Comments ***/
+
+void
+xml_push_comment(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_comment");
+  /* Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+   * Already parsed: '<!-' */
+  xml_parse_char(ctx, '-');
+  struct xml_node *n = xml_push_dom(ctx);
+  n->type = XML_NODE_COMMENT;
+  char *p = mp_start_noalign(ctx->pool, 6);
+  while (1)
+    {
+      if (xml_get_char(ctx) == '-')
+	if (xml_get_char(ctx) == '-')
+	  break;
+	else
+	  *p++ = '-';
+      p = utf8_32_put(p, xml_last_char(ctx));
+      p = mp_spread(ctx->pool, p, 6);
+    }
+  xml_parse_char(ctx, '>');
+  *p = 0;
+  n->len = p - (char *)mp_ptr(ctx->pool);
+  n->text = mp_end(ctx->pool, p + 1);
+  if (ctx->h_comment)
+    ctx->h_comment(ctx);
+}
+
+void
+xml_pop_comment(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx);
+  xml_dec(ctx);
+  TRACE(ctx, "pop_comment");
+}
+
+void
+xml_skip_comment(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_comment");
+  xml_parse_char(ctx, '-');
+  while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+/*** Processing instructions ***/
+
+void
+xml_push_pi(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_pi");
+  /* Parses a PI to ctx->value and ctx->name:
+   *   PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+   *   PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
+   * Already parsed: '<?' */
+  struct xml_node *n = xml_push_dom(ctx);
+  n->type = XML_NODE_PI;
+  n->name = xml_parse_name(ctx, ctx->pool);
+  if (unlikely(!strcasecmp(n->name, "xml")))
+    xml_error(ctx, "Reserved PI target");
+  char *p = mp_start_noalign(ctx->pool, 5);
+  if (!xml_parse_white(ctx, 0))
+    xml_parse_seq(ctx, "?>");
+  else
+    while (1)
+      {
+	if (xml_get_char(ctx) == '?')
+	  if (xml_peek_char(ctx) == '>')
+	    {
+	      xml_skip_char(ctx);
+	      break;
+	    }
+	  else
+	    *p++ = '?';
+	else
+	  p = utf8_32_put(p, xml_last_char(ctx));
+	p = mp_spread(ctx->pool, p, 5);
+      }
+  *p = 0;
+  n->len = p - (char *)mp_ptr(ctx->pool);
+  n->text = mp_end(ctx->pool, p + 1);
+  if (ctx->h_pi)
+    ctx->h_pi(ctx);
+}
+
+void
+xml_pop_pi(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx);
+  xml_dec(ctx);
+  TRACE(ctx, "pop_pi");
+}
+
+void
+xml_skip_pi(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_pi");
+  if (ctx->flags & XML_FLAG_VALIDATING)
+    {
+      struct mempool_state state;
+      mp_save(ctx->stack, &state);
+      if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml")))
+	xml_error(ctx, "Reserved PI target");
+      mp_restore(ctx->stack, &state);
+      if (!xml_parse_white(ctx, 0))
+        {
+	  xml_parse_seq(ctx, "?>");
+	  xml_dec(ctx);
+	  return;
+	}
+    }
+  while (1)
+    if (xml_get_char(ctx) == '?')
+      if (xml_peek_char(ctx) == '>')
+	break;
+  xml_skip_char(ctx);
+  xml_dec(ctx);
+}
+
+/*** Character data ***/
+
+static void
+xml_chars_spout(struct fastbuf *fb)
+{
+  if (fb->bptr >= fb->bufend)
+    {
+      struct xml_context *ctx = SKIP_BACK(struct xml_context, chars, fb);
+      struct mempool *pool = ctx->pool;
+      if (fb->bufend != fb->buffer)
+        {
+          uns len = fb->bufend - fb->buffer;
+          TRACE(ctx, "grow_chars");
+          fb->buffer = mp_expand(pool);
+          fb->bufend = fb->buffer + mp_avail(pool);
+          fb->bstop = fb->buffer;
+          fb->bptr = fb->buffer + len;
+	}
+      else
+        {
+	  TRACE(ctx, "push_chars");
+          struct xml_node *n = xml_push_dom(ctx);
+	  n->type = XML_NODE_CDATA;
+	  xml_start_chars(ctx);
+	}
+    }
+}
+
+static void
+xml_init_chars(struct xml_context *ctx)
+{
+  struct fastbuf *fb = &ctx->chars;
+  fb->name = "<xml-chars>";
+  fb->spout = xml_chars_spout;
+  fb->can_overwrite_buffer = 1;
+  fb->bptr = fb->bstop = fb->buffer = fb->bufend = NULL;
+}
+
+static inline uns
+xml_flush_chars(struct xml_context *ctx)
+{
+  struct fastbuf *fb = &ctx->chars;
+  if (fb->bufend == fb->buffer)
+    return 0;
+  TRACE(ctx, "flush_chars");
+  struct xml_node *n = ctx->node;
+  n->text = xml_end_chars(ctx, &n->len);
+  n->len = fb->bufend - fb->buffer;
+  if (ctx->h_chars)
+    ctx->h_chars(ctx);
+  return 1;
+}
+
+static inline void
+xml_pop_chars(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx);
+  TRACE(ctx, "pop_chars");
+}
+
+static inline void
+xml_append_chars(struct xml_context *ctx)
+{
+  TRACE(ctx, "append_chars");
+  struct fastbuf *out = &ctx->chars;
+  while (xml_get_char(ctx) != '<')
+    if (xml_last_char(ctx) == '&')
+      {
+	xml_inc(ctx);
+        xml_parse_ref(ctx);
+      }
+    else
+      bput_utf8_32(out, xml_last_char(ctx));
+  xml_unget_char(ctx);
+}
+
+/*** CDATA sections ***/
+
+static void
+xml_push_cdata(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_cdata");
+  /* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
+   * Already parsed: '<![' */
+  xml_parse_seq(ctx, "CDATA[");
+  struct xml_node *n = xml_push_dom(ctx);
+  n->type = XML_NODE_CDATA;
+  char *p = mp_start_noalign(ctx->pool, 7);
+  while (1)
+    {
+      if (xml_get_char(ctx) == ']')
+        {
+          if (xml_get_char(ctx) == ']')
+	    if (xml_get_char(ctx) == '>')
+	      break;
+	    else
+	      *p++ = ']';
+	  *p++ = ']';
+	}
+      p = utf8_32_put(p, xml_last_char(ctx));
+      p = mp_spread(ctx->pool, p, 7);
+    }
+  *p = 0;
+  n->len = p - (char *)mp_ptr(ctx->pool);
+  n->text = mp_end(ctx->pool, p + 1);
+  if (ctx->h_cdata)
+    ctx->h_cdata(ctx);
+}
+
+static void
+xml_pop_cdata(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx);
+  xml_dec(ctx);
+  TRACE(ctx, "pop_cdata");
+}
+
+static void
+xml_append_cdata(struct xml_context *ctx)
+{
+  TRACE(ctx, "append_cdata");
+  xml_parse_seq(ctx, "CDATA[");
+  struct fastbuf *out = &ctx->chars;
+  while (1)
+    {
+      if (xml_get_char(ctx) == ']')
+        {
+          if (xml_get_char(ctx) == ']')
+	    if (xml_get_char(ctx) == '>')
+	      break;
+	    else
+	      bputc(out, ']');
+	  bputc(out, ']');
+	}
+      bput_utf8_32(out, xml_last_char(ctx));
+    }
+  xml_dec(ctx);
+}
+
+static void UNUSED
+xml_skip_cdata(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_cdata");
+  xml_parse_seq(ctx, "CDATA[");
+  while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
+  xml_dec(ctx);
+}
+
+/*** Character references ***/
+
+uns
+xml_parse_char_ref(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_char_ref");
+  /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+   * Already parsed: '&#' */
+  uns v = 0;
+  if (xml_get_char(ctx) == 'x')
+    {
+      if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
+        {
+	  xml_error(ctx, "Expected a hexadecimal value of character reference");
+	  goto recover;
+	}
+      do
+        {
+	  v = (v << 4) + Cxvalue(xml_last_char(ctx));
+	}
+      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
+    }
+  else
+    {
+      if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
+        {
+	  xml_error(ctx, "Expected a numeric value of character reference");
+	  goto recover;
+	}
+      do
+        {
+	  v = v * 10 + xml_last_char(ctx) - '0';
+	}
+      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
+    }
+  uns cat = xml_char_cat(v);
+  if (!(cat & XML_CHAR_UNRESTRICTED_1_1) && ((ctx->flags & XML_FLAG_VERSION_1_1) || !(cat & XML_CHAR_VALID_1_0)))
+    {
+      xml_error(ctx, "Character reference out of range");
+      goto recover;
+    }
+  if (xml_last_char(ctx) == ';')
+    {
+      xml_dec(ctx);
+      return v;
+    }
+  xml_error(ctx, "Expected ';'");
+recover:
+  while (xml_last_char(ctx) != ';')
+    xml_get_char(ctx);
+  xml_dec(ctx);
+  return UNI_REPLACEMENT;
+}
+
+/*** References to general entities ***/
+
+void
+xml_parse_ref(struct xml_context *ctx)
+{
+  /* Reference ::= EntityRef | CharRef
+   * EntityRef ::= '&' Name ';'
+   * Already parsed: '&' */
+  struct fastbuf *out = &ctx->chars;
+  if (xml_peek_char(ctx) == '#')
+    {
+      xml_skip_char(ctx);
+      bput_utf8_32(out, xml_parse_char_ref(ctx));
+    }
+  else
+    {
+      TRACE(ctx, "parse_ge_ref");
+      struct mempool_state state;
+      mp_save(ctx->stack, &state);
+      char *name = xml_parse_name(ctx, ctx->stack);
+      xml_parse_char(ctx, ';');
+      struct xml_dtd_ent *ent = xml_dtd_find_gent(ctx, name);
+      if (!ent)
+        {
+	  xml_error(ctx, "Unknown entity &%s;", name);
+	  bputc(out, '&');
+	  bputs(out, name);
+	  bputc(out, ';');
+	}
+      else if (ent->flags & XML_DTD_ENT_TRIVIAL)
+        {
+	  TRACE(ctx, "Trivial entity &%s;", name);
+	  bwrite(out, ent->text, ent->len);
+	}
+      else
+        {
+	  TRACE(ctx, "Pushed entity &%s;", name);
+	  mp_restore(ctx->stack, &state);
+          xml_dec(ctx);
+	  xml_push_entity(ctx, ent);
+	  return;
+	}
+      mp_restore(ctx->stack, &state);
+      xml_dec(ctx);
+    }
+}
+
+/*** Attribute values ***/
+
+char *
+xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
+{
+  TRACE(ctx, "parse_attr_value");
+  /* AttValue ::= '"' ([^<&"] | Reference)* '"'	| "'" ([^<&'] | Reference)* "'" */
+  /* FIXME:
+   * -- copying from ctx->chars to ctx->pool is not necessary, we could directly write to ctx->pool
+   * -- berare quotes inside parased entities
+   * -- check value constrains / normalize value */
+  struct mempool_state state;
+  uns quote = xml_parse_quote(ctx);
+  mp_save(ctx->stack, &state);
+  xml_start_chars(ctx);
+  struct fastbuf *out = &ctx->chars;
+  while (1)
+    {
+      uns c = xml_get_char(ctx);
+      if (c == '&')
+        {
+	  xml_inc(ctx);
+	  xml_parse_ref(ctx);
+	}
+      else if (c == quote) // FIXME: beware quotes inside parsed entities
+	break;
+      else if (c == '<')
+	xml_error(ctx, "Attribute value must not contain '<'");
+      else if (xml_last_cat(ctx) & XML_CHAR_WHITE)
+	bputc(out, ' ');
+      else
+	bput_utf8_32(out, c);
+    }
+  mp_restore(ctx->stack, &state);
+  uns len;
+  return xml_end_chars(ctx, &len);
+}
+
+/*** Attributes ***/
+
+struct xml_attrs_table;
+
+static inline uns
+xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n)
+{
+  return hash_pointer(e) ^ hash_string(n);
+}
+
+static inline int
+xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2)
+{
+  return (e1 == e2) && !strcmp(n1, n2);
+}
+
+static inline void
+xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name)
+{
+  a->elem = e;
+  a->name = name;
+  a->val = NULL;
+  slist_add_tail(&e->attrs, &a->n);
+}
+
+#define HASH_PREFIX(x) xml_attrs_##x
+#define HASH_NODE struct xml_attr
+#define HASH_KEY_COMPLEX(x) x elem, x name
+#define HASH_KEY_DECL struct xml_node *elem, char *name
+#define HASH_TABLE_DYNAMIC
+#define HASH_GIVE_EQ
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_REMOVE
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_FIND
+#define HASH_GIVE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "lib/hashtable.h"
+
+static void
+xml_parse_attr(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_attr");
+  /* Attribute ::= Name Eq AttValue */
+  /* FIXME:
+   * -- memory management
+   * -- DTD */
+  struct xml_node *e = ctx->node;
+  char *n = xml_parse_name(ctx, ctx->pool);
+  struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n);
+  xml_parse_eq(ctx);
+  char *v = xml_parse_attr_value(ctx, NULL);
+  if (a->val)
+    xml_error(ctx, "Attribute %s is not unique", n);
+  else
+    a->val = v;
+}
+
+/*** Elements ***/
+
+static void
+xml_push_element(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_element");
+  /* EmptyElemTag | STag
+   * EmptyElemTag ::= '<' Name (S  Attribute)* S? '/>'
+   * STag ::= '<' Name (S  Attribute)* S? '>'
+   * Already parsed: '<' */
+  struct xml_node *e = xml_push_dom(ctx);
+  clist_init(&e->sons);
+  e->type = XML_NODE_ELEM;
+  e->name = xml_parse_name(ctx, ctx->pool);
+  slist_init(&e->attrs);
+  if (!e->parent)
+    {
+      ctx->root = e;
+      if (ctx->document_type && strcmp(e->name, ctx->document_type))
+	xml_error(ctx, "The root element %s does not match the document type %s", e->name, ctx->document_type);
+    }
+  while (1)
+    {
+      uns white = xml_parse_white(ctx, 0);
+      uns c = xml_get_char(ctx);
+      if (c == '/')
+        {
+	  xml_parse_char(ctx, '>');
+	  ctx->flags |= XML_FLAG_EMPTY_ELEM;
+	  break;
+	}
+      else if (c == '>')
+	break;
+      else if (!white)
+	xml_fatal_expected_white(ctx);
+      xml_unget_char(ctx);
+      xml_parse_attr(ctx);
+    }
+  if (ctx->h_element_start)
+    ctx->h_element_start(ctx);
+}
+
+static void
+xml_pop_element(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop_element");
+  if (ctx->h_element_end)
+    ctx->h_element_end(ctx);
+  struct xml_node *e = ctx->node;
+  if (ctx->flags & XML_DOM_FREE)
+    {
+      if (!e->parent)
+	ctx->root = NULL;
+      /* Restore hash table of attributes */
+      SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
+	xml_attrs_remove(ctx->tab_attrs, a);
+      struct xml_node *n;
+      while (n = clist_head(&e->sons))
+        {
+	  if (n->type == XML_NODE_ELEM)
+	    {
+	      SLIST_FOR_EACH(struct xml_attr *, a, n->attrs)
+		xml_attrs_remove(ctx->tab_attrs, a);
+	      clist_insert_list_after(&n->sons, &n->n);
+	    }
+	  clist_remove(&n->n);
+	}
+    }
+  xml_pop_dom(ctx);
+  xml_dec(ctx);
+}
+
+static void
+xml_parse_etag(struct xml_context *ctx)
+{
+ /* ETag ::= '</' Name S? '>'
+  * Already parsed: '<' */
+  struct xml_node *e = ctx->node;
+  ASSERT(e);
+  char *n = e->name;
+  while (*n)
+    {
+      uns c;
+      n = utf8_32_get(n, &c);
+      if (xml_get_char(ctx) != c)
+	goto recover;
+    }
+  xml_parse_white(ctx, 0);
+  if (xml_get_char(ctx) != '>')
+    {
+recover:
+      xml_error(ctx, "Invalid ETag, expected </%s>", e->name);
+      while (xml_get_char(ctx) != '>');
+    }
+  xml_dec(ctx);
+}
+
+/*** Document type declaration ***/
+
+static void
+xml_parse_doctype_decl(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_doctype_decl");
+  /* doctypedecl ::= '<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'
+   * Already parsed: '<!'
+   * Terminated before '[' or '>' */
+  if (ctx->document_type)
+    xml_fatal(ctx, "Multiple document types not allowed");
+  xml_parse_seq(ctx, "DOCTYPE");
+  xml_parse_white(ctx, 1);
+  ctx->document_type = xml_parse_name(ctx, ctx->pool);
+  TRACE(ctx, "doctyype=%s", ctx->document_type);
+  uns c;
+  if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P'))
+    {
+      if (c == 'S')
+        {
+	  xml_parse_seq(ctx, "SYSTEM");
+	  xml_parse_white(ctx, 1);
+	  ctx->eid.system_id = xml_parse_system_literal(ctx, ctx->pool);
+	}
+      else
+        {
+	  xml_parse_seq(ctx, "PUBLIC");
+	  xml_parse_white(ctx, 1);
+	  ctx->eid.public_id = xml_parse_pubid_literal(ctx, ctx->pool);
+	  xml_parse_white(ctx, 1);
+	  ctx->eid.system_id = xml_parse_system_literal(ctx, ctx->pool);
+	}
+      xml_parse_white(ctx, 0);
+      ctx->flags |= XML_FLAG_HAS_EXTERNAL_SUBSET;
+    }
+  if (xml_peek_char(ctx) == '[')
+    ctx->flags |= XML_FLAG_HAS_INTERNAL_SUBSET;
+  if (ctx->h_doctype_decl)
+    ctx->h_doctype_decl(ctx);
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/* DTD: Internal subset */
+
+static void
+xml_parse_internal_subset(struct xml_context *ctx)
+{
+  // FIXME: comments/pi have no parent
+  /* '[' intSubset ']'
+   * intSubset :== (markupdecl | DeclSep)
+   * Already parsed: ']' */
+  while (1)
+    {
+      xml_parse_white(ctx, 0);
+      uns c = xml_get_char(ctx);
+      xml_inc(ctx);
+      if (c == '<')
+	if ((c = xml_get_char(ctx)) == '!')
+	  switch (c = xml_get_char(ctx))
+	    {
+	      case '-':
+		xml_push_comment(ctx);
+		xml_pop_comment(ctx);
+		break;
+	      case 'N':
+		xml_parse_seq(ctx, "OTATION");
+		xml_parse_notation_decl(ctx);
+		break;
+	      case 'E':
+		if ((c = xml_get_char(ctx)) == 'N')
+		  {
+		    xml_parse_seq(ctx, "TITY");
+		    xml_parse_entity_decl(ctx);
+		  }
+		else if (c == 'L')
+		  {
+		    xml_parse_seq(ctx, "EMENT");
+		    xml_parse_element_decl(ctx);
+		  }
+		else
+		  goto invalid_markup;
+		break;
+	      case 'A':
+		xml_parse_seq(ctx, "TTLIST");
+		xml_parse_attr_list_decl(ctx);
+		break;
+	      default:
+		goto invalid_markup;
+	    }
+        else if (c == '?')
+	  {
+	    xml_push_pi(ctx);
+	    xml_pop_pi(ctx);
+	  }
+        else
+	  goto invalid_markup;
+      else if (c == '%')
+	xml_parse_pe_ref(ctx);
+      else if (c == ']')
+	break;
+      else
+	goto invalid_markup;
+    }
+  xml_dec(ctx);
+  xml_dec(ctx);
+  return;
+invalid_markup:
+  xml_fatal(ctx, "Invalid markup in the internal subset");
+}
+
+
+/*----------------------------------------------*/
+
+void
+xml_init(struct xml_context *ctx)
+{
+  bzero(ctx, sizeof(*ctx));
+  ctx->pool = mp_new(65536);
+  ctx->stack = mp_new(65536);
+  ctx->flags = XML_DOM_FREE;
+  xml_init_chars(ctx);
+  xml_dtd_init(ctx);
+  xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
+}
+
+void
+xml_cleanup(struct xml_context *ctx)
+{
+  xml_attrs_cleanup(ctx->tab_attrs);
+  xml_dtd_cleanup(ctx);
+  mp_delete(ctx->pool);
+  mp_delete(ctx->stack);
+}
+
+int
+xml_next(struct xml_context *ctx)
+{
+  /* A nasty state machine */
+
+  TRACE(ctx, "xml_next (state=%u)", ctx->state);
+  jmp_buf throw_buf;
+  ctx->throw_buf = &throw_buf;
+  if (setjmp(throw_buf))
+    {
+error:
+      if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
+	ctx->h_fatal(ctx);
+      ctx->state = XML_STATE_FATAL;
+      TRACE(ctx, "raised fatal error");
+      return -1;
+    }
+  uns c;
+  switch (ctx->state)
+    {
+      case XML_STATE_FATAL:
+	return -1;
+
+      case XML_STATE_START:
+	TRACE(ctx, "entering prolog");
+	if (ctx->h_document_start)
+	  ctx->h_document_start(ctx);
+	/* XMLDecl */
+	xml_refill(ctx);
+	if (ctx->h_xml_decl)
+	  ctx->h_xml_decl(ctx);
+	if (ctx->want & XML_WANT_DECL)
+	  return ctx->state = XML_STATE_DECL;
+      case XML_STATE_DECL:
+
+	/* Misc* (doctypedecl Misc*)? */
+        while (1)
+	  {
+	    xml_parse_white(ctx, 0);
+	    xml_parse_char(ctx, '<');
+	    if ((c = xml_get_char(ctx)) == '?')
+	      /* Processing intruction */
+	      if (!(ctx->want & XML_WANT_PI))
+	        xml_skip_pi(ctx);
+	      else
+	        {
+		  xml_push_pi(ctx);
+		  ctx->state = XML_STATE_PROLOG_PI;
+		  return XML_STATE_PI;
+      case XML_STATE_PROLOG_PI:
+		  xml_pop_pi(ctx);
+	        }
+	    else if (c != '!')
+	      {
+		/* Found the root tag */
+		xml_unget_char(ctx);
+		goto first_tag;
+	      }
+	    else if (xml_get_char(ctx) == '-')
+	      if (!(ctx->want & XML_WANT_COMMENT))
+		xml_skip_comment(ctx);
+	      else
+	        {
+		  xml_push_comment(ctx);
+		  ctx->state = XML_STATE_PROLOG_COMMENT;
+		  return XML_STATE_COMMENT;
+      case XML_STATE_PROLOG_COMMENT:
+		  xml_pop_comment(ctx);
+		}
+	    else
+	      {
+		/* DocTypeDecl */
+		xml_unget_char(ctx);
+		xml_parse_doctype_decl(ctx);
+		if (ctx->want & XML_WANT_DOCUMENT_TYPE)
+		  return ctx->state = XML_STATE_DOCUMENT_TYPE;
+      case XML_STATE_DOCUMENT_TYPE:
+		if (xml_peek_char(ctx) == '[')
+		  {
+		    xml_skip_char(ctx);
+		    xml_inc(ctx);
+		    xml_parse_internal_subset(ctx);
+		    xml_parse_white(ctx, 0);
+		  }
+		xml_parse_char(ctx, '>');
+	      }
+	  }
+
+      case XML_STATE_CHARS:
+
+	while (1)
+	  {
+	    if (xml_peek_char(ctx) != '<')
+	      {
+		/* CharData */
+	        xml_append_chars(ctx);
+		continue;
+	      }
+	    else
+	      xml_skip_char(ctx);
+first_tag: ;
+
+	    xml_inc(ctx);
+	    if ((c = xml_get_char(ctx)) == '?')
+	      {
+		/* PI */
+	        if (!(ctx->want & XML_WANT_PI))
+	          xml_skip_pi(ctx);
+	        else
+		  {
+		    if (xml_flush_chars(ctx))
+		      {
+			if (ctx->want & XML_WANT_CHARS)
+			  {
+			    ctx->state = XML_STATE_CHARS_BEFORE_PI;
+			    return XML_STATE_CHARS;
+			  }
+      case XML_STATE_CHARS_BEFORE_PI:
+			xml_pop_chars(ctx);
+		      }
+		    xml_push_pi(ctx);
+		    return ctx->state = XML_STATE_PI;
+      case XML_STATE_PI:
+		    xml_pop_pi(ctx);
+		  }
+	      }
+
+	    else if (c == '!')
+	      if ((c = xml_get_char(ctx)) == '-')
+	        {
+		  /* Comment */
+		  if (!(ctx->want & XML_WANT_COMMENT))
+		    xml_skip_comment(ctx);
+		  else
+		    {
+		      if (xml_flush_chars(ctx))
+		        {
+			  if (ctx->want & XML_WANT_CHARS)
+			    {
+			      ctx->state = XML_STATE_CHARS_BEFORE_COMMENT;
+			      return XML_STATE_CHARS;
+			    }
+      case XML_STATE_CHARS_BEFORE_COMMENT:
+			  xml_pop_chars(ctx);
+			}
+		      xml_push_comment(ctx);
+		      return ctx->state = XML_STATE_COMMENT;
+      case XML_STATE_COMMENT:
+		      xml_pop_comment(ctx);
+		    }
+		}
+	      else if (c == '[')
+	        {
+		  /* CDATA */
+		  if (!(ctx->want & XML_WANT_CDATA))
+		    xml_append_cdata(ctx);
+		  else
+		    {
+		      if (xml_flush_chars(ctx))
+		        {
+			  if (ctx->want & XML_WANT_CHARS)
+			    {
+			      ctx->state = XML_STATE_CHARS_BEFORE_CDATA;
+			      return XML_STATE_CHARS;
+			    }
+      case XML_STATE_CHARS_BEFORE_CDATA:
+			  xml_pop_chars(ctx);
+			}
+		      xml_push_cdata(ctx);
+		      return ctx->state = XML_STATE_CDATA;
+      case XML_STATE_CDATA:
+		      xml_pop_cdata(ctx);
+		    }
+		}
+	      else
+		xml_fatal(ctx, "Unexpected character after '<!'");
+
+	    else if (c != '/')
+	      {
+		/* STag | EmptyElemTag */
+		xml_unget_char(ctx);
+		if (xml_flush_chars(ctx))
+		  {
+		    if (ctx->want & XML_WANT_CHARS)
+		      {
+		        ctx->state = XML_STATE_CHARS_BEFORE_STAG;
+		        return XML_STATE_CHARS;
+		      }
+      case XML_STATE_CHARS_BEFORE_STAG:
+		    xml_pop_chars(ctx);
+		  }
+
+		xml_push_element(ctx);
+		if (ctx->want & XML_WANT_STAG)
+		  return ctx->state = XML_STATE_STAG;
+      case XML_STATE_STAG:
+		if (ctx->flags & XML_FLAG_EMPTY_ELEM)
+		  goto pop_element;
+	      }
+
+	    else
+	      {
+		/* ETag */
+		if (xml_flush_chars(ctx))
+		  {
+		    if (ctx->want & XML_WANT_CHARS)
+		      {
+		        ctx->state = XML_STATE_CHARS_BEFORE_ETAG;
+		        return XML_STATE_CHARS;
+		      }
+      case XML_STATE_CHARS_BEFORE_ETAG:
+		    xml_pop_chars(ctx);
+		  }
+
+		xml_parse_etag(ctx);
+pop_element:
+		if (ctx->want & XML_WANT_ETAG)
+		  return ctx->state = XML_STATE_ETAG;
+      case XML_STATE_ETAG:
+		xml_pop_element(ctx);
+		if (!ctx->node)
+		  goto epilog;
+	      }
+	  }
+
+epilog:
+	/* Misc* */
+        TRACE(ctx, "entering epilog");
+	while (1)
+	  {
+	    /* Epilog whitespace is the only place, where a valid document can reach EOF */
+	    if (setjmp(throw_buf))
+	      if (ctx->err_code == XML_ERR_EOF)
+	        {
+		  TRACE(ctx, "reached EOF");
+		  ctx->state = XML_STATE_EOF;
+		  if (ctx->h_document_end)
+		    ctx->h_document_end(ctx);
+      case XML_STATE_EOF:
+		  return XML_STATE_EOF;
+		}
+	      else
+		goto error;
+	    xml_parse_white(ctx, 0);
+	    if (setjmp(throw_buf))
+	      goto error;
+
+	    /* Misc */
+	    xml_parse_char(ctx, '<');
+	    if ((c = xml_get_char(ctx)) == '?')
+	      /* Processing instruction */
+	      if (!(ctx->want & XML_WANT_PI))
+	        xml_skip_pi(ctx);
+	      else
+	        {
+		  xml_push_pi(ctx);
+		  return ctx->state = XML_STATE_EPILOG_PI, XML_STATE_PI;
+      case XML_STATE_EPILOG_PI:
+		  xml_pop_pi(ctx);
+	        }
+	    else if (c == '!')
+	      /* Comment */
+	      if (!(ctx->want & XML_WANT_COMMENT))
+		xml_skip_comment(ctx);
+	      else
+	        {
+		  xml_push_comment(ctx);
+		  return ctx->state = XML_STATE_EPILOG_COMMENT, XML_STATE_COMMENT;
+      case XML_STATE_EPILOG_COMMENT:
+		  xml_pop_comment(ctx);
+		}
+	    else
+	      xml_fatal(ctx, "Syntax error in the epilog");
+	  }
+
+    }
+  return -1;
+}
diff --git a/sherlock/xml/unicat.pl b/sherlock/xml/unicat.pl
index fc39bba7..b86106f2 100755
--- a/sherlock/xml/unicat.pl
+++ b/sherlock/xml/unicat.pl
@@ -88,9 +88,14 @@ set("SNAME_1_1", @sname_1_1);
 set("NAME_1_1", @sname_1_1, "[-.0-9]", 0xB7, [0x0300,0x036F], [0x203F,0x2040]);
 set("GT", "[>]");
 
+($ARGV[0] eq "" || $ARGV[1] eq "") && die("Invalid usage");
 find_cls();
+open(H, ">", $ARGV[0]) or die("Cannot create $ARGV[0]");
+open(C, ">", $ARGV[1]) or die("Cannot create $ARGV[1]");
 gen_enum();
 gen_tabs();
+close(H);
+close(C);
 
 sub set {
   my $id = shift;
@@ -113,21 +118,26 @@ sub find_cls {
 }
 
 sub gen_enum {
-  print "enum xml_char_type {\n";
+  print H "enum xml_char_type {\n";
   foreach my $id (sort keys %ids) {
     my $mask = 0;
     foreach my $i (keys %cls) {
       $mask |= 1 << $cls{$i} if $cls{$i} && ($i & (1 << $ids{$id}));
     }
-    printf "  XML_CHAR_%-20s = 0x%08x,\n", $id, $mask;
+    printf H "  XML_CHAR_%-20s = 0x%08x,\n", $id, $mask;
   }
-  print "};\n\n";
+  print H "};\n\n";
 }
 
 sub gen_tabs {
   my @tab = ();
   my %hash = ();
-  print "static const uns xml_char_tab1[] = {\n  ";
+
+  print H "extern const byte xml_char_tab1[];\n";
+  print H "extern const uns xml_char_tab2[];\n";
+  print H "extern const byte xml_char_tab3[];\n";
+
+  print C "const uns xml_char_tab2[] = {\n  ";
   for (my $t=0; $t<256; $t++) {
     my $i = $t * 256;
     my @x = ();
@@ -139,17 +149,17 @@ sub gen_tabs {
       $hash{$sub} = 256 * scalar @tab;
       push @tab, $sub;
     }
-    printf("0x%x", $hash{$sub});
-    print((~$t & 15) ? "," : ($t < 255) ? ",\n  " : "\n};\n\n");
+    printf C "0x%x", $hash{$sub};
+    print C ((~$t & 15) ? "," : ($t < 255) ? ",\n  " : "\n};\n\n");
   }
 
-  print "static const byte xml_char_tab2[] = {\n";
-  print join(",\n\n", @tab);
-  print "\n};\n\n";
+  print C "const byte xml_char_tab1[] = {\n";
+  print C join(",\n\n", @tab);
+  print C "\n};\n\n";
 
   my @l = ();
   for (my $i=0; $i<0x11; $i++) {
     push @l, sprintf("%d", $cls{$lcat[$i]});
   }
-  print "static const byte xml_char_tab3[] = {" . join(",", @l) . "};\n";
+  print C "const byte xml_char_tab3[] = {" . join(",", @l) . "};\n";
 }
diff --git a/sherlock/xml/xml-test.c b/sherlock/xml/xml-test.c
new file mode 100644
index 00000000..cca5ad8a
--- /dev/null
+++ b/sherlock/xml/xml-test.c
@@ -0,0 +1,253 @@
+/*
+ *	Sherlock Library -- A simple XML parser
+ *
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static char *shortopts = "sp" CF_SHORT_OPTS;
+static struct option longopts[] = {
+  CF_LONG_OPTS
+  { "sax",	0, 0, 's' },
+  { "pull",	0, 0, 'p' },
+  { "dom",	0, 0, 'd' },
+  { NULL,	0, 0, 0 }
+};
+
+static void NONRET
+usage(void)
+{
+  fputs("\
+Usage: xml-test [options] < in.xml\n\
+\n\
+Options:\n"
+CF_USAGE
+"\
+-s, --pull  Test PULL interface\n\
+-s, --sax   Test SAX interface\n\
+-d, --dom   Test DOM interface\n\
+\n", stderr);
+  exit(1);
+}
+
+static uns want_sax;
+static uns want_pull;
+static uns want_dom;
+static struct fastbuf *out;
+
+static char *
+node_type(struct xml_node *node)
+{
+  switch (node->type)
+    {
+      case XML_NODE_ELEM: return "element";
+      case XML_NODE_COMMENT: return "comment";
+      case XML_NODE_PI: return "pi";
+      case XML_NODE_CDATA: return "chars";
+      default: return "unknown";
+    }
+}
+
+static void
+show_node(struct xml_node *node)
+{
+  switch (node->type)
+    {
+      case XML_NODE_ELEM:
+	bprintf(out, " <%s>", node->name);
+        SLIST_FOR_EACH(struct xml_attr *, a, node->attrs)
+          bprintf(out, " %s='%s'", a->name, a->val);
+	bputc(out, '\n');
+	break;
+      case XML_NODE_COMMENT:
+	bprintf(out, " text='%s'\n", node->text);
+	break;
+      case XML_NODE_PI:
+	bprintf(out, " target=%s text='%s'\n", node->name, node->text);
+	break;
+      case XML_NODE_CDATA:
+	bprintf(out, " text='%s'\n", node->text);
+	break;
+      default:
+        bputc(out, '\n');
+    }
+}
+
+static void
+show_tree(struct xml_node *node, uns level)
+{
+  if (!node)
+    return;
+  bputs(out, "DOM:  ");
+  for (uns i = 0; i < level; i++)
+    bputs(out, "    ");
+  bputs(out, node_type(node));
+  show_node(node);
+  if (node->type == XML_NODE_ELEM)
+    CLIST_FOR_EACH(struct xml_node *, son, node->sons)
+      show_tree(son, level + 1);
+}
+
+static void
+h_error(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg);
+}
+
+static void
+h_document_start(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  document_start\n");
+}
+
+static void
+h_document_end(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  document_end\n");
+}
+
+static void
+h_xml_decl(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  xml_decl version=%s standalone=%d\n", ctx->version_str, ctx->standalone);
+}
+
+static void
+h_doctype_decl(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n",
+    ctx->document_type, ctx->eid.public_id ? : "", ctx->eid.system_id ? : "",
+    !!(ctx->flags & XML_FLAG_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_FLAG_HAS_INTERNAL_SUBSET));
+}
+
+static void
+h_comment(struct xml_context *ctx)
+{
+  bputs(out, "SAX:  comment");
+  show_node(ctx->node);
+}
+
+static void
+h_pi(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  pi");
+  show_node(ctx->node);
+}
+
+static void
+h_element_start(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  element_start");
+  show_node(ctx->node);
+}
+
+static void
+h_element_end(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  element_end </%s>\n", ctx->node->name);
+}
+
+static void
+h_chars(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  chars");
+  show_node(ctx->node);
+}
+
+int
+main(int argc, char **argv)
+{
+  int opt;
+  cf_def_file = NULL; // FIXME 
+  log_init(argv[0]);
+  while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
+    switch (opt)
+      {
+	case 's':
+	  want_sax++;
+	  break;
+	case 'p':
+	  want_pull++;
+	  break;
+	case 'd':
+	  want_dom++;
+	  break;
+	default:
+	  usage();
+      }
+  if (optind != argc)
+    usage();
+
+  out = bfdopen_shared(1, 4096);
+  struct xml_context ctx;
+  xml_init(&ctx);
+  ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error;
+  if (want_sax)
+    {
+      ctx.h_document_start = h_document_start;
+      ctx.h_document_end = h_document_end;
+      ctx.h_xml_decl = h_xml_decl;
+      ctx.h_doctype_decl = h_doctype_decl;
+      ctx.h_comment = h_comment;
+      ctx.h_pi = h_pi;
+      ctx.h_element_start = h_element_start;
+      ctx.h_element_end = h_element_end;
+      ctx.h_chars = h_chars;
+    }
+  if (want_pull)
+    ctx.want = XML_WANT_CHARS | XML_WANT_STAG | XML_WANT_ETAG | XML_WANT_COMMENT | XML_WANT_PI;
+  if (want_dom)
+    ctx.flags &= ~XML_DOM_FREE;
+  xml_set_source(&ctx, bfdopen_shared(0, 4096));
+  int state;
+  bprintf(out, "PULL: start\n");
+  while ((state = xml_next(&ctx)) >= 0 && state != XML_STATE_EOF)
+    switch (state)
+      {
+	case XML_STATE_CHARS:
+	  bprintf(out, "PULL: chars");
+	  show_node(ctx.node);
+	  break;
+	case XML_STATE_STAG:
+	  bprintf(out, "PULL: element_start");
+	  show_node(ctx.node);
+	  break;
+	case XML_STATE_ETAG:
+	  bprintf(out, "PULL: element_end </%s>\n", ctx.node->name);
+	  break;
+	case XML_STATE_COMMENT:
+	  bprintf(out, "PULL: comment");
+	  show_node(ctx.node);
+	  break;
+	case XML_STATE_PI:
+	  bprintf(out, "PULL: pi");
+	  show_node(ctx.node);
+	  break;
+#if 0
+	case XML_STATE_CDATA:
+	  bprintf(out, "PULL: cdata [%s]\n", ctx.node->text);
+	  break;
+#endif
+      }
+  if (state != XML_STATE_EOF)
+    bprintf(out, "PULL: fatal error\n");
+  else
+    bprintf(out, "PULL: eof\n");
+
+  if (want_dom)
+    show_tree(ctx.root, 0);
+
+  xml_cleanup(&ctx);
+  bclose(out);
+  return 0;
+}
diff --git a/sherlock/xml/xml.c b/sherlock/xml/xml.c
deleted file mode 100644
index 1d9f0f45..00000000
--- a/sherlock/xml/xml.c
+++ /dev/null
@@ -1,2524 +0,0 @@
-/*
- *	Sherlock Library -- A simple XML parser
- *
- *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- *	This software may be freely distributed and used according to the terms
- *	of the GNU Lesser General Public License.
- */
-
-/* TODO:
- * - iface
- * - stack-like memory handling where possible
- */
-
-#define LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-unicode.h"
-#include "lib/ff-binary.h"
-#include "lib/chartype.h"
-#include "lib/unicode.h"
-#include "lib/hashfunc.h"
-#include "lib/stkstring.h"
-#include "lib/unaligned.h"
-#include "charset/charconv.h"
-#include "charset/fb-charconv.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-
-#include <setjmp.h>
-
-/*** Debugging ***/
-
-#ifdef LOCAL_DEBUG
-#define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
-#else
-#define TRACE(c, f, p...) do {} while(0)
-#endif
-
-static uns xml_row(struct xml_context *ctx);
-
-/*** Error handling ***/
-
-static void NONRET
-xml_throw(struct xml_context *ctx)
-{
-  ASSERT(ctx->err_code && ctx->throw_buf);
-  longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code);
-}
-
-static void
-xml_warn(struct xml_context *ctx, const char *format, ...)
-{
-  if (ctx->h_warn)
-    {
-      va_list args;
-      va_start(args, format);
-      ctx->err_msg = stk_vprintf(format, args);
-      ctx->err_code = XML_ERR_WARN;
-      va_end(args);
-      ctx->h_warn(ctx);
-      ctx->err_msg = NULL;
-      ctx->err_code = XML_ERR_OK;
-    }
-}
-
-static void
-xml_error(struct xml_context *ctx, const char *format, ...)
-{
-  if (ctx->h_error)
-    {
-      va_list args;
-      va_start(args, format);
-      ctx->err_msg = stk_vprintf(format, args);
-      ctx->err_code = XML_ERR_ERROR;
-      va_end(args);
-      ctx->h_error(ctx);
-      ctx->err_msg = NULL;
-      ctx->err_code = XML_ERR_OK;
-    }
-}
-
-static void NONRET
-xml_fatal(struct xml_context *ctx, const char *format, ...)
-{
-  va_list args;
-  va_start(args, format);
-  ctx->err_msg = mp_vprintf(ctx->pool, format, args);
-  ctx->err_code = XML_ERR_FATAL;
-  ctx->state = XML_STATE_FATAL;
-  va_end(args);
-  if (ctx->h_fatal)
-    ctx->h_fatal(ctx);
-  xml_throw(ctx);
-}
-
-/*** Charecter categorization ***/
-
-#include "obj/sherlock/xml/unicat.h"
-
-static inline uns
-xml_char_cat(uns c)
-{
-  if (c < 0x10000)
-    return 1U << xml_char_tab2[(c & 0xff) + xml_char_tab1[c >> 8]];
-  else if (likely(c < 0x110000))
-    return 1U << xml_char_tab3[c >> 16];
-  else
-    return 1;
-}
-
-/*** Memory management ***/
-
-static void NONRET
-xml_fatal_nested(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Entity not nested correctly");
-}
-
-static inline void
-xml_inc(struct xml_context *ctx)
-{
-  /* Called after the first character of a block */
-  TRACE(ctx, "inc");
-  ctx->depth++;
-}
-
-static inline void
-xml_dec(struct xml_context *ctx)
-{
-  /* Called after the last character of a block */
-  TRACE(ctx, "dec");
-  if (unlikely(!ctx->depth--))
-    xml_fatal_nested(ctx);
-}
-
-static inline void
-xml_push(struct xml_context *ctx)
-{
-  TRACE(ctx, "push");
-  struct xml_stack *s = mp_alloc(ctx->pool, sizeof(*s));
-  mp_save(ctx->pool, &s->saved_pool);
-  s->saved_flags = ctx->flags;
-  s->next = ctx->stack;
-  ctx->stack = s;
-  xml_inc(ctx);
-}
-
-static inline void
-xml_pop(struct xml_context *ctx)
-{
-  TRACE(ctx, "pop");
-  xml_dec(ctx);
-  struct xml_stack *s = ctx->stack;
-  ASSERT(s);
-  ctx->stack = s->next;
-  ctx->flags = s->saved_flags;
-  mp_restore(ctx->pool, &s->saved_pool);
-}
-
-#define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
-#define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
-  static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
-  { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
-  static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
-
-static void *
-xml_hash_new(struct mempool *pool, uns size)
-{
-  void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE);
-  *(void **)tab = pool;
-  return tab + XML_HASH_HDR_SIZE;
-}
-
-/*** Reading of document/external entities ***/
-
-static void NONRET
-xml_eof(struct xml_context *ctx)
-{
-  ctx->err_msg = "Unexpected EOF";
-  ctx->err_code = XML_ERR_EOF;
-  xml_throw(ctx);
-}
-
-static inline void
-xml_add_char(u32 **bstop, uns c)
-{
-  *(*bstop)++ = c;
-  *(*bstop)++ = xml_char_cat(c);
-}
-
-static struct xml_source *
-xml_push_source(struct xml_context *ctx, uns flags)
-{
-  xml_push(ctx);
-  struct xml_source *src = ctx->src;
-  if (src)
-    {
-      src->bptr = ctx->bptr;
-      src->bstop = ctx->bstop;
-    }
-  src = mp_alloc_zero(ctx->pool, sizeof(*src));
-  src->next = ctx->src;
-  src->saved_depth = ctx->depth;
-  ctx->src = src;
-  ctx->flags = (ctx->flags & ~(XML_FLAG_SRC_EOF | XML_FLAG_SRC_EXPECTED_DECL | XML_FLAG_SRC_NEW_LINE | XML_FLAG_SRC_SURROUND | XML_FLAG_SRC_DOCUMENT)) | flags;
-  ctx->bstop = ctx->bptr = src->buf;
-  ctx->depth = 0;
-  if (flags & XML_FLAG_SRC_SURROUND)
-    xml_add_char(&ctx->bstop, 0x20);
-  return src;
-}
-
-static void
-xml_pop_source(struct xml_context *ctx)
-{
-  TRACE(ctx, "xml_pop_source");
-  if (unlikely(ctx->depth != 0))
-    xml_fatal_nested(ctx);
-  struct xml_source *src = ctx->src;
-  ASSERT(src);
-  bclose(src->fb);
-  ctx->depth = src->saved_depth;
-  ctx->src = src = src->next;
-  if (src)
-    {
-      ctx->bptr = src->bptr;
-      ctx->bstop = src->bstop;
-    }
-  xml_pop(ctx);
-  if (unlikely(!src))
-    xml_eof(ctx);
-}
-
-static void xml_refill_utf8(struct xml_context *ctx);
-
-static void
-xml_push_entity(struct xml_context *ctx, struct xml_dtd_ent *ent)
-{
-  TRACE(ctx, "xml_push_entity");
-  uns cat1 = ctx->src->refill_cat1;
-  uns cat2 = ctx->src->refill_cat2;
-  struct xml_source *src = xml_push_source(ctx, 0);
-  src->refill_cat1 = cat1;
-  src->refill_cat2 = cat2;
-  if (ent->flags & XML_DTD_ENT_EXTERNAL)
-    xml_fatal(ctx, "External entities not implemented"); // FIXME
-  else
-    {
-      fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, ent->len, 0);
-      src->refill = xml_refill_utf8;
-    }
-}
-
-void
-xml_set_source(struct xml_context *ctx, struct fastbuf *fb)
-{
-  TRACE(ctx, "xml_set_source");
-  ASSERT(!ctx->src);
-  struct xml_source *src = xml_push_source(ctx, XML_FLAG_SRC_DOCUMENT | XML_FLAG_SRC_EXPECTED_DECL);
-  src->fb = fb;
-}
-
-static uns
-xml_error_restricted(struct xml_context *ctx, uns c)
-{
-  if (c == ~1U)
-    xml_error(ctx, "Corrupted encoding");
-  else
-    xml_error(ctx, "Restricted char U+%04X", c);
-  return UNI_REPLACEMENT;
-}
-
-static void xml_parse_decl(struct xml_context *ctx);
-
-#define REFILL(ctx, func, params...)							\
-  struct xml_source *src = ctx->src;							\
-  struct fastbuf *fb = src->fb;								\
-  if (ctx->bptr == ctx->bstop)								\
-    ctx->bptr = ctx->bstop = src->buf;							\
-  uns f = ctx->flags, c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row;	\
-  u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop,			\
-      *last_0xd = (f & XML_FLAG_SRC_NEW_LINE) ? bstop : bend;				\
-  do											\
-    {											\
-      c = func(fb, ##params);								\
-      uns t = xml_char_cat(c);								\
-      if (t & t1)									\
-        /* Typical branch */								\
-	*bstop++ = c, *bstop++ = t;							\
-      else if (t & t2)									\
-        {										\
-	  /* New line */								\
-	  /* XML 1.0: 0xA | 0xD | 0xD 0xA */						\
-	  /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */			\
-	  if (c == 0xd)									\
-	    last_0xd = bstop + 2;							\
-	  else if (c != 0x2028 && last_0xd == bstop)					\
-	    {										\
-	      last_0xd = bend;								\
-	      continue;									\
-	    }										\
-	  xml_add_char(&bstop, 0xa), row++;						\
-	}										\
-      else if (c == '>')								\
-        {										\
-	  /* Used only in XML/TextDecl to switch the encoding */			\
-	  *bstop++ = c, *bstop++ = t;							\
-	  break;									\
-	}										\
-      else if (~c)									\
-        /* Restricted character */							\
-        xml_add_char(&bstop, xml_error_restricted(ctx, c));				\
-      else										\
-        {										\
-	  /* EOF */									\
-	  if (f & XML_FLAG_SRC_SURROUND)						\
-	    xml_add_char(&bstop, 0x20);							\
-          f |= XML_FLAG_SRC_EOF;							\
-          break;									\
-	}										\
-    }											\
-  while (bstop < bend);									\
-  ctx->flags = (last_0xd == bstop) ? f | XML_FLAG_SRC_NEW_LINE : f & ~XML_FLAG_SRC_NEW_LINE; \
-  ctx->bstop = bstop;									\
-  src->row = row;
-
-static void
-xml_refill_utf8(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf8_repl, ~1U);
-}
-
-static void
-xml_refill_utf16_le(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf16_le_repl, ~1U);
-}
-
-static void
-xml_refill_utf16_be(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf16_be_repl, ~1U);
-}
-
-#if 0
-static inline uns
-xml_refill_libcharset_bget(struct fastbuf *fb, unsigned short int *in_to_x)
-{
-  // FIXME: slow
-  int c;
-  return (unlikely(c = bgetc(fb) < 0)) ? c : (int)conv_x_to_ucs(in_to_x[c]);
-}
-
-static void
-xml_refill_libcharset(struct xml_context *ctx)
-{
-  unsigned short int *in_to_x = ctx->src->refill_in_to_x;
-  REFILL(ctx, xml_refill_libcharset_bget, in_to_x);
-}
-#endif
-
-#undef REFILL
-
-static void
-xml_refill(struct xml_context *ctx)
-{
-  do
-    {
-      if (ctx->flags & XML_FLAG_SRC_EOF)
-	xml_pop_source(ctx);
-      else if (ctx->flags & XML_FLAG_SRC_EXPECTED_DECL)
-	xml_parse_decl(ctx);
-      else
-        {
-	  ctx->src->refill(ctx);
-	  TRACE(ctx, "refilled %u characters", (uns)((ctx->bstop - ctx->bptr) / 2));
-	}
-    }
-  while (ctx->bptr == ctx->bstop);
-}
-
-static inline uns
-xml_peek_char(struct xml_context *ctx)
-{
-  if (ctx->bptr == ctx->bstop)
-    xml_refill(ctx);
-  return ctx->bptr[0];
-}
-
-static inline uns
-xml_peek_cat(struct xml_context *ctx)
-{
-  if (ctx->bptr == ctx->bstop)
-    xml_refill(ctx);
-  return ctx->bptr[1];
-}
-
-static inline uns
-xml_get_char(struct xml_context *ctx)
-{
-  uns c = xml_peek_char(ctx);
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_get_cat(struct xml_context *ctx)
-{
-  uns c = xml_peek_cat(ctx);
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_last_char(struct xml_context *ctx)
-{
-  return ctx->bptr[-2];
-}
-
-static inline uns
-xml_last_cat(struct xml_context *ctx)
-{
-  return ctx->bptr[-1];
-}
-
-static inline uns
-xml_skip_char(struct xml_context *ctx)
-{
-  uns c = ctx->bptr[0];
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_unget_char(struct xml_context *ctx)
-{
-  return *(ctx->bptr -= 2);
-}
-
-static uns
-xml_row(struct xml_context *ctx)
-{
-  struct xml_source *src = ctx->src;
-  if (!src)
-    return 0;
-  uns row = src->row;
-  for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2)
-    if (p[-1] & src->refill_cat2)
-      row--;
-  return row + 1;
-}
-
-/*** Basic parsing ***/
-
-static void NONRET
-xml_fatal_expected(struct xml_context *ctx, uns c)
-{
-  xml_fatal(ctx, "Expected '%c'", c);
-}
-
-static void NONRET
-xml_fatal_expected_white(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Expected a white space");
-}
-
-static void NONRET
-xml_fatal_expected_quot(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Expected a quotation mark");
-}
-
-static inline uns
-xml_parse_white(struct xml_context *ctx, uns mandatory)
-{
-  /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
-   * mandatory=0 -> S? */
-  uns cnt = 0;
-  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-    {
-      xml_skip_char(ctx);
-      cnt++;
-    }
-  if (unlikely(mandatory && !cnt))
-    xml_fatal_expected_white(ctx);
-  return cnt;
-}
-
-static inline void
-xml_parse_char(struct xml_context *ctx, uns c)
-{
-  /* Consumes a given Unicode character */
-  if (unlikely(c != xml_get_char(ctx)))
-    xml_fatal_expected(ctx, c);
-}
-
-static inline void
-xml_parse_seq(struct xml_context *ctx, const char *seq)
-{
-  /* Consumes a given sequence of ASCII characters */
-  while (*seq)
-    xml_parse_char(ctx, *seq++);
-}
-
-static void
-xml_parse_eq(struct xml_context *ctx)
-{
-  /* Eq ::= S? '=' S? */
-  xml_parse_white(ctx, 0);
-  xml_parse_char(ctx, '=');
-  xml_parse_white(ctx, 0);
-}
-
-static inline uns
-xml_parse_quote(struct xml_context *ctx)
-{
-  /* "'" | '"' */
-  uns c = xml_get_char(ctx);
-  if (unlikely(c != '\'' && c != '\"'))
-    xml_fatal_expected_quot(ctx);
-  return c;
-}
-
-/* Names and nmtokens */
-
-static char *
-xml_parse_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
-{
-  char *p = mp_start_noalign(ctx->pool, 1);
-  if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
-    xml_fatal(ctx, "%s", err);
-  do
-    {
-      p = mp_spread(ctx->pool, p, 5);
-      p = utf8_32_put(p, xml_skip_char(ctx));
-    }
-  while (xml_peek_cat(ctx) & next_cat);
-  *p++ = 0;
-  return mp_end(ctx->pool, p);
-}
-
-static void
-xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
-{
-  if (unlikely(!(xml_get_cat(ctx) & first_cat)))
-    xml_fatal(ctx, "%s", err);
-  while (xml_peek_cat(ctx) & next_cat)
-    xml_skip_char(ctx);
-}
-
-static char *
-xml_parse_name(struct xml_context *ctx)
-{
-  /* Name ::= NameStartChar (NameChar)* */
-  return xml_parse_string(ctx,
-    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
-    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
-    "Expected a name");
-}
-
-static void
-xml_skip_name(struct xml_context *ctx)
-{
-  xml_skip_string(ctx,
-    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_SNAME_1_0 : XML_CHAR_SNAME_1_1,
-    !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1,
-    "Expected a name");
-}
-
-static char *
-xml_parse_nmtoken(struct xml_context *ctx)
-{
-  /* Nmtoken ::= (NameChar)+ */
-  uns cat = !(ctx->flags & XML_FLAG_VERSION_1_1) ? XML_CHAR_NAME_1_0 : XML_CHAR_NAME_1_1;
-  return xml_parse_string(ctx, cat, cat, "Expected a nmtoken");
-}
-
-/* Simple literals */
-
-static char *
-xml_parse_system_literal(struct xml_context *ctx)
-{
-  /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
-  char *p = mp_start_noalign(ctx->pool, 1);
-  uns q = xml_parse_quote(ctx), c;
-  while ((c = xml_get_char(ctx)) != q)
-    {
-      p = mp_spread(ctx->pool, p, 5);
-      p = utf8_32_put(p, c);
-    }
-  *p++ = 0;
-  return mp_end(ctx->pool, p);
-}
-
-static char *
-xml_parse_pubid_literal(struct xml_context *ctx)
-{
-  /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
-  char *p = mp_start_noalign(ctx->pool, 1);
-  uns q = xml_parse_quote(ctx), c;
-  while ((c = xml_get_char(ctx)) != q)
-    {
-      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
-	xml_fatal(ctx, "Expected a pubid character");
-      p = mp_spread(ctx->pool, p, 2);
-      *p++ = c;
-    }
-  *p++ = 0;
-  return mp_end(ctx->pool, p);
-}
-
-static char *
-xml_parse_encoding_name(struct xml_context *ctx)
-{
-  /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */
-  char *p = mp_start_noalign(ctx->pool, 1);
-  uns q = xml_parse_quote(ctx);
-  if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME)))
-    xml_fatal(ctx, "Invalid character in the encoding name");
-  while (1)
-    {
-      p = mp_spread(ctx->pool, p, 2);
-      *p++ = xml_last_char(ctx);
-      if (xml_get_char(ctx) == q)
-	break;
-      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME)))
-	xml_fatal(ctx, "Invalid character in the encoding name");
-    }
-  *p++ = 0;
-  return mp_end(ctx->pool, p);
-}
-
-/* Document/external entity header */
-
-static inline void
-xml_init_cats(struct xml_context *ctx, uns mask)
-{
-  if (!(ctx->flags & XML_FLAG_VERSION_1_1))
-    {
-      ctx->src->refill_cat1 = XML_CHAR_VALID_1_0 & ~XML_CHAR_NEW_LINE_1_0 & ~mask;
-      ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_0;
-    }
-  else
-    {
-      ctx->src->refill_cat1 = XML_CHAR_UNRESTRICTED_1_1 & ~XML_CHAR_NEW_LINE_1_1 & ~mask;
-      ctx->src->refill_cat2 = XML_CHAR_NEW_LINE_1_1;
-    }
-}
-
-static void
-xml_init_charconv(struct xml_context *ctx, int cs)
-{
-  // FIXME: hack
-  struct xml_source *src = ctx->src;
-  TRACE(ctx, "wrapping charset %s", charset_name(cs));
-#if 0
-  struct conv_context conv;
-  conv_set_charset(&conv, cs, CONV_CHARSET_UTF8);
-  src->refill = xml_refill_libcharset;
-  src->refill_in_to_x = conv.in_to_x;
-#else
-  src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8);
-  // FIXME: memory leak
-#endif
-}
-
-static void
-xml_parse_decl(struct xml_context *ctx)
-{
-  TRACE(ctx, "xml_parse_decl");
-  struct xml_source *src = ctx->src;
-  ctx->flags &= ~XML_FLAG_SRC_EXPECTED_DECL;
-
-  /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */
-  xml_init_cats(ctx, XML_CHAR_GT);
-
-  /* Initialize the supplied charset (if any) or try to guess it */
-  char *expected_encoding = src->expected_encoding ? : src->fb_encoding;
-  src->refill = xml_refill_utf8;
-  int bom = bpeekc(src->fb);
-  if (bom < 0)
-    ctx->flags |= XML_FLAG_SRC_EOF;
-  if (!src->fb_encoding)
-    {
-      if (bom == 0xfe)
-	src->refill = xml_refill_utf16_be;
-      else if (bom == 0xff)
-	src->refill = xml_refill_utf16_le;
-    }
-  else
-    {
-      int cs = find_charset_by_name(src->fb_encoding);
-      if (cs == CONV_CHARSET_UTF8)
-        {}
-      else if (cs >= 0)
-        {
-	  xml_init_charconv(ctx, cs);
-	  bom = 0;
-	}
-      else if (strcasecmp(src->fb_encoding, "UTF-16"))
-        {
-	  src->refill = xml_refill_utf16_be;
-	  if (bom == 0xff)
-	    src->refill = xml_refill_utf16_le;
-	  if (!src->expected_encoding)
-	    expected_encoding = (bom == 0xff) ? "UTF-16LE" : "UTF-16BE";
-	}
-      else if (strcasecmp(src->fb_encoding, "UTF-16BE"))
-	src->refill = xml_refill_utf16_be;
-      else if (strcasecmp(src->fb_encoding, "UTF-16LE"))
-	src->refill = xml_refill_utf16_le;
-      else
-        {
-	  xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding);
-	  expected_encoding = NULL;
-	}
-    }
-  uns utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be;
-  if (bom > 0 && xml_peek_char(ctx) == 0xfeff)
-    xml_skip_char(ctx);
-  else if (utf16)
-    xml_error(ctx, "Missing or corrupted BOM");
-
-  /* Look ahead for presence of XMLDecl or optional TextDecl */
-  if (!(ctx->flags & XML_FLAG_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf))
-    xml_refill(ctx);
-  uns doc = ctx->flags & XML_FLAG_SRC_DOCUMENT;
-  u32 *bptr = ctx->bptr;
-  uns have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) &&
-    bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L');
-  if (!have_decl)
-    {
-      if (doc)
-        xml_fatal(ctx, "Missing or corrupted XML header");
-      else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16)
-	xml_error(ctx, "Missing or corrupted entity header");
-      goto exit;
-    }
-  ctx->bptr = bptr + 12;
-  xml_parse_white(ctx, 0);
-
-  /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */
-  if (xml_peek_char(ctx) == 'v')
-    {
-      xml_parse_seq(ctx, "version");
-      xml_parse_eq(ctx);
-      char *version = xml_parse_pubid_literal(ctx);
-      TRACE(ctx, "version=%s", version);
-      uns v = 0;
-      if (!strcmp(version, "1.1"))
-	v = XML_FLAG_VERSION_1_1;
-      else if (strcmp(version, "1.0"))
-        {
-	  xml_error(ctx, "Unknown XML version string '%s'", version);
-	  version = "1.0";
-	}
-      if (doc)
-        {
-	  ctx->version_str = version;
-	  ctx->flags |= v;
-	}
-      else if (v > (ctx->flags & XML_FLAG_VERSION_1_1))
-        xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document");
-      if (!xml_parse_white(ctx, !doc))
-        goto end;
-    }
-  else if (doc)
-    {
-      xml_error(ctx, "Expected XML version");
-      ctx->version_str = "1.0";
-    }
-
-  /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */
-  if (xml_peek_char(ctx) == 'e')
-    {
-      xml_parse_seq(ctx, "encoding");
-      xml_parse_eq(ctx);
-      src->decl_encoding = xml_parse_encoding_name(ctx);
-      TRACE(ctx, "encoding=%s", src->decl_encoding);
-      if (!xml_parse_white(ctx, 0))
-	goto end;
-    }
-  else if (!doc)
-    xml_error(ctx, "Expected XML encoding");
-
-  /* Parse whether the document is standalone (optional in XMLDecl) */
-  if (doc && xml_peek_char(ctx) == 's')
-    {
-      xml_parse_seq(ctx, "standalone");
-      xml_parse_eq(ctx);
-      uns c = xml_parse_quote(ctx);
-      if (ctx->standalone = (xml_peek_char(ctx) == 'y'))
-	xml_parse_seq(ctx, "yes");
-      else
-        xml_parse_seq(ctx, "no");
-      xml_parse_char(ctx, c);
-      TRACE(ctx, "standalone=%d", ctx->standalone);
-      xml_parse_white(ctx, 0);
-    }
-end:
-  xml_parse_seq(ctx, "?>");
-
-  /* Switch to the final encoding */
-  if (src->decl_encoding)
-    {
-      int cs = find_charset_by_name(src->decl_encoding);
-      if (cs < 0 && !expected_encoding)
-	xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding);
-      else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8)
-	xml_init_charconv(ctx, cs);
-      else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 ||
-	!(!strcasecmp(src->decl_encoding, "UTF-16") ||
-	 (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) ||
-	 (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE")))))
-	xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding);
-    }
-
-exit:
-  /* Update valid Unicode ranges */
-  xml_init_cats(ctx, 0);
-}
-
-/*** Document Type Definition (DTD) ***/
-
-/* Notations */
-
-#define HASH_PREFIX(x) xml_dtd_notns_##x
-#define HASH_NODE struct xml_dtd_notn
-#define HASH_KEY_STRING name
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* General entities */
-
-#define HASH_PREFIX(x) xml_dtd_ents_##x
-#define HASH_NODE struct xml_dtd_ent
-#define HASH_KEY_STRING name
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-static struct xml_dtd_ent *
-xml_dtd_declare_trivial_gent(struct xml_context *ctx, char *name, char *text)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_ent *ent = xml_dtd_ents_lookup(dtd->tab_gents, name);
-  if (ent->flags & XML_DTD_ENT_DECLARED)
-    {
-      xml_warn(ctx, "Entity &%s; already declared", name);
-      return NULL;
-    }
-  slist_add_tail(&dtd->gents, &ent->n);
-  ent->flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL;
-  ent->text = text;
-  ent->len = strlen(text);
-  return ent;
-}
-
-static void
-xml_dtd_declare_default_gents(struct xml_context *ctx)
-{
-  xml_dtd_declare_trivial_gent(ctx, "lt", "<");
-  xml_dtd_declare_trivial_gent(ctx, "gt", ">");
-  xml_dtd_declare_trivial_gent(ctx, "amp", "&");
-  xml_dtd_declare_trivial_gent(ctx, "apos", "'");
-  xml_dtd_declare_trivial_gent(ctx, "quot", "\"");
-}
-
-static struct xml_dtd_ent *
-xml_dtd_find_gent(struct xml_context *ctx, char *name)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  if (dtd)
-    {
-      struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_gents, name);
-      return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
-    }
-  else
-    {
-#define ENT(n, t) ent_##n = { .name = #n, .text = t, .len = 1, .flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL }
-      static struct xml_dtd_ent ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
-#undef ENT
-      switch (name[0])
-        {
-	  case 'l':
-	    if (!strcmp(name, "lt"))
-	      return &ent_lt;
-	    break;
-	  case 'g':
-	    if (!strcmp(name, "gt"))
-	      return &ent_gt;
-	    break;
-	  case 'a':
-	    if (!strcmp(name, "amp"))
-	      return &ent_amp;
-	    if (!strcmp(name, "apos"))
-	      return &ent_apos;
-	    break;
-	  case 'q':
-	    if (!strcmp(name, "quot"))
-	      return &ent_quot;
-	    break;
-	}
-      return NULL;
-    }
-}
-
-/* Parameter entities */
-
-static struct xml_dtd_ent *
-xml_dtd_find_pent(struct xml_context *ctx, char *name)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_pents, name);
-  return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL;
-}
-
-/* Elements */
-
-#define HASH_PREFIX(x) xml_dtd_elems_##x
-#define HASH_NODE struct xml_dtd_elem
-#define HASH_KEY_STRING name
-#define HASH_TABLE_DYNAMIC
-#define HASH_ZERO_FILL
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* Element sons */
-
-struct xml_dtd_enodes_table;
-
-static inline uns
-xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
-{
-  return hash_pointer(parent) ^ hash_pointer(elem);
-}
-
-static inline int
-xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
-{
-  return (parent1 == parent2) && (elem1 == elem2);
-}
-
-static inline void
-xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
-{
-  node->parent = parent;
-  node->elem = elem;
-}
-
-#define HASH_PREFIX(x) xml_dtd_enodes_##x
-#define HASH_NODE struct xml_dtd_elem_node
-#define HASH_KEY_COMPLEX(x) x parent, x elem
-#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_TABLE_DYNAMIC
-#define HASH_ZERO_FILL
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* Element attributes */
-
-struct xml_dtd_attrs_table;
-
-static inline uns
-xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
-{
-  return hash_pointer(elem) ^ hash_string(name);
-}
-
-static inline int
-xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
-{
-  return (elem1 == elem2) && !strcmp(name1, name2);
-}
-
-static inline void
-xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
-{
-  attr->elem = elem;
-  attr->name = name;
-}
-
-#define HASH_PREFIX(x) xml_dtd_attrs_##x
-#define HASH_NODE struct xml_dtd_attr
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x elem, x name
-#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* Enumerated attribute values */
-
-struct xml_dtd_evals_table;
-
-static inline uns
-xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
-{
-  return hash_pointer(attr) ^ hash_string(val);
-}
-
-static inline int
-xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
-{
-  return (attr1 == attr2) && !strcmp(val1, val2);
-}
-
-static inline void
-xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
-{
-  eval->attr = attr;
-  eval->val = val;
-}
-
-#define HASH_PREFIX(x) xml_dtd_evals_##x
-#define HASH_NODE struct xml_dtd_eval
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x attr, x val
-#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* Enumerated attribute notations */
-
-struct xml_dtd_enotns_table;
-
-static inline uns
-xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
-{
-  return hash_pointer(attr) ^ hash_pointer(notn);
-}
-
-static inline int
-xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
-{
-  return (attr1 == attr2) && (notn1 == notn2);
-}
-
-static inline void
-xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
-{
-  enotn->attr = attr;
-  enotn->notn = notn;
-}
-
-#define HASH_PREFIX(x) xml_dtd_enotns_##x
-#define HASH_NODE struct xml_dtd_enotn
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x attr, x notn
-#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-/* DTD initialization/cleanup */
-
-static void
-xml_dtd_init(struct xml_context *ctx)
-{
-  if (ctx->dtd)
-    return;
-  struct mempool *pool = mp_new(4096);
-  struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
-  dtd->pool = pool;
-  xml_dtd_ents_init(dtd->tab_gents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
-  xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
-  xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
-  xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
-  xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
-  xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
-  xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
-  xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
-  xml_dtd_declare_default_gents(ctx);
-}
-
-static void
-xml_dtd_cleanup(struct xml_context *ctx)
-{
-  if (!ctx->dtd)
-    return;
-  mp_delete(ctx->dtd->pool);
-  ctx->dtd = NULL;
-}
-
-static void
-xml_dtd_finish(struct xml_context *ctx)
-{
-  if (!ctx->dtd)
-    return;
-  // FIXME
-}
-
-/*** Parsing functions ***/
-
-/* Comments */
-
-static void
-xml_push_comment(struct xml_context *ctx)
-{
-  /* Parse a comment to ctx->value:
-   *   Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
-   * Already parsed: '<!-' */
-  struct fastbuf *out = ctx->value;
-  uns c;
-  xml_parse_char(ctx, '-');
-  while (1)
-    {
-      if ((c = xml_get_char(ctx)) == '-')
-	if ((c = xml_get_char(ctx)) == '-')
-	  break;
-	else
-	  bputc(out, '-');
-      bput_utf8_32(out, c);
-    }
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-  fbgrow_rewind(out);
-  if (ctx->h_comment)
-    ctx->h_comment(ctx);
-}
-
-static void
-xml_pop_comment(struct xml_context *ctx)
-{
-  fbgrow_rewind(ctx->value);
-}
-
-static void
-xml_skip_comment(struct xml_context *ctx)
-{
-  xml_parse_char(ctx, '-');
-  while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-/* Processing instructions */
-
-static void
-xml_push_pi(struct xml_context *ctx)
-{
-  /* Parses a PI to ctx->value and ctx->name:
-   *   PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
-   *   PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
-   * Already parsed: '<?' */
-
-  ctx->name = xml_parse_name(ctx);
-  if (unlikely(!strcasecmp(ctx->name, "xml")))
-    xml_fatal(ctx, "Reserved PI target");
-  struct fastbuf *out = ctx->value;
-  if (xml_parse_white(ctx, 0))
-    xml_parse_seq(ctx, "?>");
-  else
-    {
-      while (1)
-        {
-	  uns c;
-	  if ((c = xml_get_char(ctx)) == '?')
-	    if (xml_get_char(ctx) == '>')
-	      break;
-	    else
-	      {
-	        xml_unget_char(ctx);
-		bputc(out, '?');
-	      }
-	  else
-	    bput_utf8_32(out, c);
-	}
-      fbgrow_rewind(out);
-    }
-  xml_dec(ctx);
-  if (ctx->h_pi)
-    ctx->h_pi(ctx);
-}
-
-static void
-xml_pop_pi(struct xml_context *ctx)
-{
-  fbgrow_reset(ctx->value);
-}
-
-static void
-xml_skip_pi(struct xml_context *ctx)
-{
-  if (ctx->flags & XML_FLAG_VALIDATING)
-    {
-      mp_push(ctx->pool);
-      if (unlikely(!strcasecmp(xml_parse_name(ctx), "xml")))
-	xml_fatal(ctx, "Reserved PI target");
-      mp_pop(ctx->pool);
-      if (!xml_parse_white(ctx, 0))
-        {
-	  xml_parse_seq(ctx, "?>");
-	  xml_dec(ctx);
-	  return;
-	}
-    }
-  while (1)
-    if (xml_get_char(ctx) == '?')
-      if (xml_get_char(ctx) == '>')
-	break;
-      else
-	xml_unget_char(ctx);
-  xml_dec(ctx);
-}
-
-/* Character references */
-
-static uns
-xml_parse_char_ref(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_char_ref");
-  /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
-   * Already parsed: '&#' */
-  uns v = 0;
-  if (xml_get_char(ctx) == 'x')
-    {
-      if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
-        {
-	  xml_error(ctx, "Expected a hexadecimal value of character reference");
-	  goto recover;
-	}
-      do
-        {
-	  v = (v << 4) + Cxvalue(xml_last_char(ctx));
-	}
-      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
-    }
-  else
-    {
-      if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
-        {
-	  xml_error(ctx, "Expected a numeric value of character reference");
-	  goto recover;
-	}
-      do
-        {
-	  v = v * 10 + xml_last_char(ctx) - '0';
-	}
-      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
-    }
-  uns cat = xml_char_cat(v);
-  if (!(cat & XML_CHAR_UNRESTRICTED_1_1) && ((ctx->flags & XML_FLAG_VERSION_1_1) || !(cat & XML_CHAR_VALID_1_0)))
-    {
-      xml_error(ctx, "Character reference out of range");
-      goto recover;
-    }
-  if (xml_last_char(ctx) == ';')
-    {
-      xml_dec(ctx);
-      return v;
-    }
-  xml_error(ctx, "Expected ';'");
-recover:
-  while (xml_last_char(ctx) != ';')
-    xml_get_char(ctx);
-  xml_dec(ctx);
-  return UNI_REPLACEMENT;
-}
-
-/* References to general entities */
-
-static void
-xml_parse_ge_ref(struct xml_context *ctx, struct fastbuf *out)
-{
-  /* Reference ::= EntityRef | CharRef
-   * EntityRef ::= '&' Name ';'
-   * Already parsed: '&' */
-  if (xml_peek_char(ctx) == '#')
-    {
-      xml_skip_char(ctx);
-      uns c = xml_parse_char_ref(ctx);
-      bput_utf8_32(out, c);
-    }
-  else
-    {
-      struct mempool_state state;
-      mp_save(ctx->pool, &state);
-      char *name = xml_parse_name(ctx);
-      xml_parse_char(ctx, ';');
-      struct xml_dtd_ent *ent = xml_dtd_find_gent(ctx, name);
-      if (!ent)
-        {
-	  xml_error(ctx, "Unknown entity &%s;", name);
-	  bputc(out, '&');
-	  bputs(out, name);
-	  bputc(out, ';');
-	}
-      else if (ent->flags & XML_DTD_ENT_TRIVIAL)
-        {
-	  TRACE(ctx, "Trivial entity &%s;", name);
-	  bwrite(out, ent->text, ent->len);
-	}
-      else
-        {
-	  TRACE(ctx, "Pushed entity &%s;", name);
-	  mp_restore(ctx->pool, &state);
-          xml_dec(ctx);
-	  xml_push_entity(ctx, ent);
-	  return;
-	}
-      mp_restore(ctx->pool, &state);
-      xml_dec(ctx);
-    }
-}
-
-/* References to parameter entities */
-
-static void
-xml_parse_pe_ref(struct xml_context *ctx)
-{
-  /* PEReference ::= '%' Name ';'
-   * Already parsed: '%' */
-  struct mempool_state state;
-  mp_save(ctx->pool, &state);
-  char *name = xml_parse_name(ctx);
-  xml_parse_char(ctx, ';');
-  struct xml_dtd_ent *ent = xml_dtd_find_pent(ctx, name);
-  if (!ent)
-    xml_error(ctx, "Unknown entity %%%s;", name);
-  else
-    {
-      TRACE(ctx, "Pushed entity %%%s;", name);
-      mp_restore(ctx->pool, &state);
-      xml_dec(ctx);
-      xml_push_entity(ctx, ent);
-      return;
-    }
-  mp_restore(ctx->pool, &state);
-  xml_dec(ctx);
-}
-
-static void
-xml_parse_dtd_pe(struct xml_context *ctx)
-{
-  do
-    {
-      xml_skip_char(ctx);
-      xml_inc(ctx);
-      while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-	xml_skip_char(ctx);
-      xml_parse_pe_ref(ctx);
-    }
-  while (xml_peek_char(ctx) != '%');
-}
-
-static inline uns
-xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
-{
-  /* Whitespace or parameter entity */
-  uns cnt = 0;
-  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-    {
-      xml_skip_char(ctx);
-      cnt = 1;
-    }
-  if (xml_peek_char(ctx) == '%')
-    {
-      xml_parse_dtd_pe(ctx);
-      return 1;
-    }
-  else if (unlikely(mandatory && !cnt))
-    xml_fatal_expected_white(ctx);
-  return cnt;
-}
-
-static inline uns
-xml_check_dtd_pe(struct xml_context *ctx)
-{
-  if (xml_peek_char(ctx) == '%')
-    {
-      xml_parse_dtd_pe(ctx);
-      return 1;
-    }
-  return 0;
-}
-
-/* External ID */
-
-static void
-xml_parse_external_id(struct xml_context *ctx, struct xml_ext_id *eid, uns allow_public, uns dtd)
-{
-  bzero(eid, sizeof(*eid));
-  if (dtd)
-    xml_check_dtd_pe(ctx);
-  uns c = xml_peek_char(ctx);
-  if (c == 'S')
-    {
-      xml_parse_seq(ctx, "SYSTEM");
-      if (dtd)
-	xml_parse_dtd_white(ctx, 1);
-      else
-	xml_parse_white(ctx, 1);
-      eid->system_id = xml_parse_system_literal(ctx);
-    }
-  else if (c == 'P')
-    {
-      xml_parse_seq(ctx, "PUBLIC");
-      if (dtd)
-	xml_parse_dtd_white(ctx, 1);
-      else
-	xml_parse_white(ctx, 1);
-      eid->public_id = xml_parse_pubid_literal(ctx);
-      if (dtd ? xml_parse_dtd_white(ctx, 0) : xml_parse_white(ctx, 0))
-	if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
-          eid->system_id = xml_parse_system_literal(ctx);
-    }
-  else
-    xml_fatal(ctx, "Expected an external ID");
-}
-
-/* DTD: <!NOTATION ...> */
-
-static void
-xml_parse_notation_decl(struct xml_context *ctx)
-{
-  /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
-   * Already parsed: '<!NOTATION' */
-  TRACE(ctx, "parse_notation_decl");
-  struct xml_dtd *dtd = ctx->dtd;
-  xml_parse_dtd_white(ctx, 1);
-
-  struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx));
-  xml_parse_dtd_white(ctx, 1);
-  struct xml_ext_id eid;
-  xml_parse_external_id(ctx, &eid, 1, 1);
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-
-  if (notn->flags & XML_DTD_NOTN_DECLARED)
-    xml_warn(ctx, "Notation %s already declared", notn->name);
-  else
-    {
-      notn->flags = XML_DTD_NOTN_DECLARED;
-      notn->eid = eid;
-      slist_add_tail(&dtd->notns, &notn->n);
-    }
-  xml_dec(ctx);
-}
-
-/* DTD: <!ENTITY ...> */
-
-static void
-xml_parse_entity_decl(struct xml_context *ctx)
-{
-  /* Already parsed: '<!ENTITY' */
-  TRACE(ctx, "parse_entity_decl");
-  struct xml_dtd *dtd = ctx->dtd;
-  xml_parse_dtd_white(ctx, 1);
-
-  uns flags = (xml_get_char(ctx) == '%') ? XML_DTD_ENT_PARAMETER : 0;
-  if (flags)
-    xml_parse_dtd_white(ctx, 1);
-  else
-    xml_unget_char(ctx);
-
-  struct xml_dtd_ent *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_gents, xml_parse_name(ctx));
-  slist *list = flags ? &dtd->pents : &dtd->gents;
-  xml_parse_white(ctx, 1);
-  if (ent->flags & XML_DTD_ENT_DECLARED)
-    {
-       xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
-       // FIXME: should be only warning
-    }
-
-  uns c, sep = xml_get_char(ctx);
-  if (sep == '\'' || sep == '"')
-    {
-      /* Internal entity:
-       * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
-      struct fastbuf *out = ctx->value;
-      while (1)
-        {
-	  if ((c = xml_get_char(ctx)) == sep)
-	    break;
-	  else if (c == '%')
-	    {
-	      // FIXME
-	      ASSERT(0);
-	      //xml_parse_parameter_ref(ctx);
-	    }
-	  else if (c != '&')
-	    bput_utf8_32(out, c);
-	  else if ((c = xml_get_char(ctx)) == '#')
-	    c = xml_parse_char_ref(ctx);
-	  else
-	    {
-	      /* Bypass references to general entities */
-	      mp_push(ctx->pool);
-	      bputc(out, '&');
-	      xml_unget_char(ctx);
-	      bputs(out, xml_parse_name(ctx));
-	      xml_parse_char(ctx, ';');
-	      bputc(out, ';');
-	      mp_pop(ctx->pool);
-	    }
-	}
-      bputc(out, 0);
-      fbgrow_rewind(out);
-      slist_add_tail(list, &ent->n);
-      ent->flags = flags | XML_DTD_ENT_DECLARED;
-      ent->len = out->bstop - out->bptr - 1;
-      ent->text = mp_memdup(ctx->pool, out->bptr, ent->len + 1);
-      fbgrow_reset(out);
-    }
-  else
-    {
-      /* External entity */
-      struct xml_ext_id eid;
-      struct xml_dtd_notn *notn = NULL;
-      xml_parse_external_id(ctx, &eid, 0, 0);
-      if (!xml_parse_white(ctx, 0) || !flags)
-	xml_parse_char(ctx, '>');
-      else if (xml_get_char(ctx) != '>')
-        {
-	  /* General external unparsed entity */
-	  flags |= XML_DTD_ENT_UNPARSED;
-	  xml_parse_seq(ctx, "NDATA");
-	  xml_parse_white(ctx, 1);
-	  notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx));
-	}
-      slist_add_tail(list, &ent->n);
-      ent->flags = flags | XML_DTD_ENT_DECLARED | XML_DTD_ENT_EXTERNAL;
-      ent->eid = eid;
-      ent->notn = notn;
-    }
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-/* DTD: <!ELEMENT ...> */
-
-static void
-xml_parse_element_decl(struct xml_context *ctx)
-{
-  /* Elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
-   * Already parsed: '<!ELEMENT' */
-  xml_parse_dtd_white(ctx, 1);
-  char *name = xml_parse_name(ctx);
-  xml_parse_dtd_white(ctx, 1);
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
-  if (elem->flags & XML_DTD_ELEM_DECLARED)
-    xml_fatal(ctx, "Element <%s> already declared", name);
-
-  /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
-  uns c = xml_peek_char(ctx);
-  if (c == 'E')
-    {
-      xml_parse_seq(ctx, "EMPTY");
-      elem->type = XML_DTD_ELEM_EMPTY;
-    }
-  else if (c == 'A')
-    {
-      xml_parse_seq(ctx, "ANY");
-      elem->type = XML_DTD_ELEM_ANY;
-    }
-  else if (c == '(')
-    {
-      xml_skip_char(ctx);
-      xml_inc(ctx);
-      xml_parse_dtd_white(ctx, 0);
-      struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
-      if (xml_peek_char(ctx) == '#')
-        {
-	  /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
-	  xml_skip_char(ctx);
-	  xml_parse_seq(ctx, "PCDATA");
-	  elem->type = XML_DTD_ELEM_MIXED;
-          parent->type = XML_DTD_ELEM_PCDATA;
-	  while (1)
-	    {
-	      xml_parse_dtd_white(ctx, 0);
-	      if ((c = xml_get_char(ctx)) == ')')
-		break;
-	      else if (c != '|')
-		xml_fatal_expected(ctx, ')');
-	      xml_parse_dtd_white(ctx, 0);
-	      struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx));
-	      if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
-		xml_error(ctx, "Duplicate content '%s'", son_elem->name);
-	      else
-	        {
-		  struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
-		  slist_add_tail(&parent->sons, &son->n);
-		}
-	    }
-	  xml_dec(ctx);
-	  if (xml_peek_char(ctx) == '*')
-	    {
-	      xml_skip_char(ctx);
-	      parent->occur = XML_DTD_ELEM_OCCUR_MULT;
-	    }
-	  else if (!slist_head(&parent->sons))
-	    parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
-	  else
-	    xml_fatal_expected(ctx, '*');
-	}
-      else
-        {
-	  /* children ::= (choice | seq) ('?' | '*' | '+')?
-	   * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
-	   * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
-	   * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
-
-	  elem->type = XML_DTD_ELEM_CHILDREN;
-	  parent->type = XML_DTD_ELEM_PCDATA;
-	  uns c;
-	  goto first;
-
-	  while (1)
-	    {
-	      /* After name */
-	      xml_parse_dtd_white(ctx, 0);
-	      if ((c = xml_get_char(ctx)) ==  ')')
-	        {
-		  xml_dec(ctx);
-		  if (parent->type == XML_DTD_ELEM_PCDATA)
-		    parent->type = XML_DTD_ELEM_SEQ;
-		  if ((c = xml_get_char(ctx)) == '?')
-		    parent->occur = XML_DTD_ELEM_OCCUR_OPT;
-		  else if (c == '*')
-		    parent->occur = XML_DTD_ELEM_OCCUR_MULT;
-		  else if (c == '+')
-		    parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
-		  else
-		    {
-		      xml_unget_char(ctx);
-		      parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
-		    }
-		  if (!parent->parent)
-		    break;
-		  parent = parent->parent;
-		  continue;
-		}
-	      else if (c == '|')
-	        {
-		  if (parent->type == XML_DTD_ELEM_PCDATA)
-		    parent->type = XML_DTD_ELEM_OR;
-		  else if (parent->type != XML_DTD_ELEM_OR)
-		    xml_fatal(ctx, "Mixed operators in the list of element children");
-		}
-	      else if (c == ',')
-	        {
-		  if (parent->type == XML_DTD_ELEM_PCDATA)
-		    parent->type = XML_DTD_ELEM_SEQ;
-		  else if (parent->type != XML_DTD_ELEM_SEQ)
-		    xml_fatal(ctx, "Mixed operators in the list of element children");
-		}
-	      else if (c == '(')
-	        {
-		  xml_inc(ctx);
-		  struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
-		  son->parent = parent;
-		  slist_add_tail(&parent->sons, &son->n);
-		  parent = son->parent;
-		  son->type = XML_DTD_ELEM_MIXED;
-		}
-	      else
-	        xml_unget_char(ctx);
-
-	      /* Before name */
-	      xml_parse_dtd_white(ctx, 0);
-first:;
-	      struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx));
-	      // FIXME: duplicates, occurance
-	      //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
-	      struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
-	      son->parent = parent;
-	      son->elem = son_elem;
-	      slist_add_tail(&parent->sons, &son->n);
-	    }
-	}
-    }
-  else
-    xml_fatal(ctx, "Expected element content specification");
-
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-static char *
-xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
-{
-  uns quote = xml_parse_quote(ctx);
-  xml_push(ctx);
-  struct fastbuf *out = ctx->value;
-  while (1)
-    {
-      uns c = xml_get_char(ctx);
-      if (c == '&')
-        {
-	  xml_inc(ctx);
-	  xml_parse_ge_ref(ctx, out);
-	}
-      else if (c == quote) // FIXME: beware quotes inside parsed
-	break;
-      else if (c == '<')
-	xml_error(ctx, "Attribute value must not contain '<'"); 
-      else
-	bput_utf8_32(out, c);
-    }
-  xml_pop(ctx);
-  bputc(out, 0);
-  fbgrow_rewind(out);
-  char *value = mp_memdup(ctx->pool, out->bptr, out->bstop - out->bptr);
-  // FIXME: check value constraints / normalize value
-  fbgrow_reset(out);
-  return value;
-}
-
-static void
-xml_parse_attr_list_decl(struct xml_context *ctx)
-{
-  /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
-   * AttDef ::= S Name S AttType S DefaultDecl
-   * Already parsed: '<!ATTLIST' */
-  xml_parse_dtd_white(ctx, 1);
-  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx));
-
-  while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
-    {
-      char *name = xml_parse_name(ctx);
-      struct xml_dtd_attr *attr = xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name);
-      uns ignored = 0;
-      if (attr)
-        {
-	  xml_warn(ctx, "Duplicate attribute definition");
-	  ignored++;
-	}
-      else
-	attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
-      xml_parse_dtd_white(ctx, 1);
-      if (xml_peek_char(ctx) == '(')
-        {
-	  xml_skip_char(ctx); // FIXME: xml_inc/dec ?
-	  if (!ignored)
-	    attr->type = XML_ATTR_ENUM;
-	  do
-	    {
-	      xml_parse_dtd_white(ctx, 0);
-	      char *value = xml_parse_nmtoken(ctx);
-	      if (!ignored)
-		if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
-		  xml_error(ctx, "Duplicate enumeration value");
-	        else
-		  xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
-	      xml_parse_dtd_white(ctx, 0);
-	    }
-	  while (xml_get_char(ctx) == '|');
-	  xml_unget_char(ctx);
-	  xml_parse_char(ctx, ')');
-	}
-      else
-        {
-	  char *type = xml_parse_name(ctx);
-	  enum xml_dtd_attribute_type t;
-	  if (!strcmp(type, "CDATA"))
-	    t = XML_ATTR_CDATA;
-	  else if (!strcmp(type, "ID"))
-	    t = XML_ATTR_ID;
-	  else if (!strcmp(type, "IDREF"))
-	    t = XML_ATTR_IDREF;
-	  else if (!strcmp(type, "IDREFS"))
-	    t = XML_ATTR_IDREFS;
-	  else if (!strcmp(type, "ENTITY"))
-	    t = XML_ATTR_ENTITY;
-	  else if (!strcmp(type, "ENTITIES"))
-	    t = XML_ATTR_ENTITIES;
-	  else if (!strcmp(type, "NMTOKEN"))
-	    t = XML_ATTR_NMTOKEN;
-	  else if (!strcmp(type, "NMTOKENS"))
-	    t = XML_ATTR_NMTOKENS;
-	  else if (!strcmp(type, "NOTATION"))
-	    {
-	      if (elem->type == XML_DTD_ELEM_EMPTY)
-		xml_fatal(ctx, "Empty element must not have notation attribute");
-	      // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
-	      t = XML_ATTR_NOTATION;
-	      xml_parse_dtd_white(ctx, 1);
-	      xml_parse_char(ctx, '(');
-	      do
-	        {
-		  xml_parse_dtd_white(ctx, 0);
-		  struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx));
-		  if (!ignored)
-		    if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
-		      xml_error(ctx, "Duplicate enumerated notation");
-		    else
-		      xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
-		  xml_parse_dtd_white(ctx, 0);
-		}
-	      while (xml_get_char(ctx) == '|');
-	      xml_unget_char(ctx);
-	      xml_parse_char(ctx, ')');
-	    }
-	  else
-	    xml_fatal(ctx, "Unknown attribute type");
-	  if (!ignored)
-	    attr->type = t;
-	}
-      xml_parse_dtd_white(ctx, 1);
-      enum xml_dtd_attribute_default def = XML_ATTR_NONE;
-      if (xml_get_char(ctx) == '#')
-	switch (xml_peek_char(ctx))
-          {
-	    case 'R':
-	      xml_parse_seq(ctx, "REQUIRED");
-	      def = XML_ATTR_REQUIRED;
-	      break;
-	    case 'I':
-	      xml_parse_seq(ctx, "IMPLIED");
-	      def = XML_ATTR_IMPLIED;
-	      break;
-	    case 'F':
-	      xml_parse_seq(ctx, "FIXED");
-	      def = XML_ATTR_FIXED;
-	      xml_parse_dtd_white(ctx, 1);
-	      break;
-	    default:
-	      xml_fatal(ctx, "Expected a modifier for default attribute value");
-	  }
-      else
-	xml_unget_char(ctx);
-      if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
-        {
-	  char *v = xml_parse_attr_value(ctx, attr);
-	  if (!ignored)
-	    attr->default_value = v;
-	}
-      if (!ignored)
-	attr->default_mode = def;
-    }
-  xml_skip_char(ctx);
-  xml_dec(ctx);
-}
-
-/* DTD: Internal subset */
-
-static void
-xml_parse_internal_subset(struct xml_context *ctx)
-{
-  // FIXME: comments/pi have no parent
-  /* '[' intSubset ']'
-   * intSubset :== (markupdecl | DeclSep)
-   * Already parsed: ']' */
-  while (1)
-    {
-      xml_parse_white(ctx, 0);
-      uns c = xml_get_char(ctx);
-      xml_inc(ctx);
-      if (c == '<')
-	if ((c = xml_get_char(ctx)) == '!')
-	  switch (c = xml_get_char(ctx))
-	    {
-	      case '-':
-		xml_push_comment(ctx);
-		xml_pop_comment(ctx);
-		break;
-	      case 'N':
-		xml_parse_seq(ctx, "OTATION");
-		xml_parse_notation_decl(ctx);
-		break;
-	      case 'E':
-		if ((c = xml_get_char(ctx)) == 'N')
-		  {
-		    xml_parse_seq(ctx, "TITY");
-		    xml_parse_entity_decl(ctx);
-		  }
-		else if (c == 'L')
-		  {
-		    xml_parse_seq(ctx, "EMENT");
-		    xml_parse_element_decl(ctx);
-		  }
-		else
-		  goto invalid_markup;
-		break;
-	      case 'A':
-		xml_parse_seq(ctx, "TTLIST");
-		xml_parse_attr_list_decl(ctx);
-		break;
-	      default:
-		goto invalid_markup;
-	    }
-        else if (c == '?')
-	  {
-	    xml_push_pi(ctx);
-	    xml_pop_pi(ctx);
-	  }
-        else
-	  goto invalid_markup;
-      else if (c == '%')
-	xml_parse_dtd_pe(ctx);
-      else if (c == ']')
-	break;
-      else
-	goto invalid_markup;
-    }
-  xml_dec(ctx);
-  xml_dec(ctx);
-  return;
-invalid_markup:
-  xml_fatal(ctx, "Invalid markup in the internal subset");
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static void
-xml_parse_cdata(struct xml_context *ctx)
-{
-  struct fastbuf *out = ctx->chars;
-  xml_parse_seq(ctx, "CDATA[");
-  while (1)
-    {
-      uns c;
-      if ((c = xml_get_char(ctx)) == ']')
-        {
-          if ((c = xml_get_char(ctx)) == ']')
-	    if ((c = xml_get_char(ctx)) == '>')
-	      break;
-	    else
-	      bputc(out, ']');
-	  bputc(out, ']');
-	}
-      bput_utf8_32(out, c);
-    }
-}
-
-static void
-xml_skip_cdata(struct xml_context *ctx)
-{
-  xml_parse_cdata(ctx);
-}
-
-static void
-xml_parse_chars(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_chars");
-  struct fastbuf *out = ctx->chars;
-  uns c;
-  while ((c = xml_get_char(ctx)) != '<')
-    if (c == '&')
-      {
-	xml_inc(ctx);
-        xml_parse_ge_ref(ctx, out);
-      }
-    else
-      bput_utf8_32(out, c);
-  xml_unget_char(ctx);
-}
-
-/*----------------------------------------------*/
-
-struct xml_attrs_table;
-
-static inline uns
-xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_elem *e, char *n)
-{
-  return hash_pointer(e) ^ hash_string(n);
-}
-
-static inline int
-xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_elem *e1, char *n1, struct xml_elem *e2, char *n2)
-{
-  return (e1 == e2) && !strcmp(n1, n2);
-}
-
-static inline void
-xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_elem *e, char *name)
-{
-  a->elem = e;
-  a->name = name;
-  a->val = NULL;
-  slist_add_tail(&e->attrs, &a->n);
-}
-
-#define HASH_PREFIX(x) xml_attrs_##x
-#define HASH_NODE struct xml_attr
-#define HASH_KEY_COMPLEX(x) x elem, x name
-#define HASH_KEY_DECL struct xml_elem *elem, char *name
-#define HASH_TABLE_DYNAMIC
-#define HASH_GIVE_EQ
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_CLEANUP
-#define HASH_WANT_REMOVE
-#define HASH_WANT_LOOKUP
-#define HASH_WANT_FIND
-#define HASH_GIVE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "lib/hashtable.h"
-
-void
-xml_init(struct xml_context *ctx)
-{
-  bzero(ctx, sizeof(*ctx));
-  ctx->pool = mp_new(65536);
-  ctx->chars = fbgrow_create(4096);
-  ctx->value = fbgrow_create(4096);
-  xml_dtd_init(ctx);
-  xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
-}
-
-void
-xml_cleanup(struct xml_context *ctx)
-{
-  xml_attrs_cleanup(ctx->tab_attrs);
-  xml_dtd_cleanup(ctx);
-  bclose(ctx->value);
-  bclose(ctx->chars);
-  mp_delete(ctx->pool);
-}
-
-static void
-xml_parse_attr(struct xml_context *ctx)
-{
-  // FIXME: memory management, dtd, literal
-  TRACE(ctx, "parse_attr");
-  struct xml_elem *e = ctx->elem;
-  char *name = xml_parse_name(ctx);
-  struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, name);
-  xml_parse_eq(ctx);
-  char *val =xml_parse_system_literal(ctx);
-  if (a->val)
-    xml_error(ctx, "Attribute is not unique");
-  else
-    a->val = val;
-}
-
-static void
-xml_parse_stag(struct xml_context *ctx)
-{
-  // FIXME: dtd
-  TRACE(ctx, "parse_stag");
-  xml_push(ctx);
-  struct xml_elem *e = mp_alloc_zero(ctx->pool, sizeof(*e));
-  struct xml_elem *parent = ctx->elem;
-  clist_init(&e->sons);
-  e->node.parent = (void *)parent;
-  ctx->elem = e;
-  e->name = xml_parse_name(ctx);
-  if (parent)
-    clist_add_tail(&parent->sons, &e->node.n);
-  else
-    {
-      ctx->root = e;
-      if (ctx->document_type && strcmp(e->name, ctx->document_type))
-	xml_error(ctx, "The root element does not match the document type");
-    }
-  while (1)
-    {
-      uns white = xml_parse_white(ctx, 0);
-      uns c = xml_get_char(ctx);
-      if (c == '/')
-        {
-	  xml_parse_char(ctx, '>');
-	  ctx->flags |= XML_FLAG_EMPTY_ELEM;
-	  break;
-	}
-      else if (c == '>')
-	break;
-      else if (!white)
-	xml_fatal_expected_white(ctx);
-      xml_unget_char(ctx);
-      xml_parse_attr(ctx);
-    }
-  if (ctx->h_element_start)
-    ctx->h_element_start(ctx);
-}
-
-static void
-xml_parse_etag(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_etag");
-  struct xml_elem *e = ctx->elem;
-  ASSERT(e);
-  char *name = xml_parse_name(ctx);
-  if (strcmp(name, e->name))
-    xml_fatal(ctx, "Invalid ETag, expected '%s'", e->name);
-  xml_parse_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-static void
-xml_pop_element(struct xml_context *ctx)
-{
-  TRACE(ctx, "pop_element");
-  if (ctx->h_element_end)
-    ctx->h_element_end(ctx);
-  struct xml_elem *e = ctx->elem;
-  if (ctx->flags & XML_DOM_FREE)
-    {
-      if (e->node.parent)
-        clist_remove(&e->node.n);
-      else
-	ctx->root = NULL;
-      SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
-	xml_attrs_remove(ctx->tab_attrs, a);
-      struct xml_node *n;
-      while (n = clist_head(&e->sons))
-        {
-	  if (n->type == XML_NODE_ELEM)
-	    {
-	      SLIST_FOR_EACH(struct xml_attr *, a, ((struct xml_elem *)n)->attrs)
-		xml_attrs_remove(ctx->tab_attrs, a);
-	      clist_insert_list_after(&((struct xml_elem *)n)->sons, &n->n);
-	    }
-	  clist_remove(&n->n);
-	}
-    }
-  ctx->node = e->node.parent;
-  xml_pop(ctx); // FIXME: memory management without XML_DOM_FREE
-  xml_dec(ctx);
-#if 0
-  for (struct xml_attribute *a = e->attrs; a; a = a->next)
-    xml_attribute_remove(ctx->attribute_table, a);
-#endif
-}
-
-static void
-xml_parse_doctype_decl(struct xml_context *ctx)
-{
-  if (ctx->document_type)
-    xml_fatal(ctx, "Multiple document types not allowed");
-  xml_parse_seq(ctx, "DOCTYPE");
-  xml_parse_white(ctx, 1);
-  ctx->document_type = xml_parse_name(ctx);
-  TRACE(ctx, "doctyype=%s", ctx->document_type);
-  uns white = xml_parse_white(ctx, 0);
-  uns c = xml_peek_char(ctx);
-  if (c != '>' && c != '[' && white)
-    {
-      xml_parse_external_id(ctx, &ctx->eid, 0, 0);
-      xml_parse_white(ctx, 0);
-      ctx->flags |= XML_FLAG_HAS_EXTERNAL_SUBSET;
-    }
-  if (xml_peek_char(ctx) == '[')
-    ctx->flags |= XML_FLAG_HAS_INTERNAL_SUBSET;
-  if (ctx->h_doctype_decl)
-    ctx->h_doctype_decl(ctx);
-}
-
-int
-xml_next(struct xml_context *ctx)
-{
-  /* A nasty state machine */
-
-  TRACE(ctx, "xml_next (state=%u)", ctx->state);
-  jmp_buf throw_buf;
-  ctx->throw_buf = &throw_buf;
-  if (setjmp(throw_buf))
-    {
-error:
-      if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
-	ctx->h_fatal(ctx);
-      ctx->state = XML_STATE_FATAL;
-      TRACE(ctx, "raised fatal error");
-      return -1;
-    }
-  uns c;
-  switch (ctx->state)
-    {
-      case XML_STATE_FATAL:
-	return -1;
-
-      case XML_STATE_START:
-	TRACE(ctx, "entering prolog");
-	if (ctx->h_document_start)
-	  ctx->h_document_start(ctx);
-	/* XMLDecl */
-	xml_refill(ctx);
-	if (ctx->h_xml_decl)
-	  ctx->h_xml_decl(ctx);
-	if (ctx->want & XML_WANT_DECL)
-	  return ctx->state = XML_STATE_DECL;
-      case XML_STATE_DECL:
-
-	/* Misc* (doctypedecl Misc*)? */
-        while (1)
-	  {
-	    xml_parse_white(ctx, 0);
-	    xml_parse_char(ctx, '<');
-	    if ((c = xml_get_char(ctx)) == '?')
-	      /* Processing intruction */
-	      if (!(ctx->want & XML_WANT_PI))
-	        xml_skip_pi(ctx);
-	      else
-	        {
-		  xml_push_pi(ctx);
-		  ctx->state = XML_STATE_PROLOG_PI;
-		  return XML_STATE_PI;
-      case XML_STATE_PROLOG_PI:
-		  xml_pop_pi(ctx);
-	        }
-	    else if (c != '!')
-	      {
-		/* Found the root tag */
-		xml_unget_char(ctx);
-		goto first_tag;
-	      }
-	    else if (xml_get_char(ctx) == '-')
-	      if (!(ctx->want & XML_WANT_COMMENT))
-		xml_skip_comment(ctx);
-	      else
-	        {
-		  xml_push_comment(ctx);
-		  ctx->state = XML_STATE_PROLOG_COMMENT;
-		  return XML_STATE_COMMENT;
-      case XML_STATE_PROLOG_COMMENT:
-		  xml_pop_comment(ctx);
-		}
-	    else
-	      {
-		/* DocTypeDecl */
-		xml_unget_char(ctx);
-		xml_parse_doctype_decl(ctx);
-		if (ctx->want & XML_WANT_DOCUMENT_TYPE)
-		  return ctx->state = XML_STATE_DOCUMENT_TYPE;
-      case XML_STATE_DOCUMENT_TYPE:
-		if (xml_peek_char(ctx) == '[')
-		  {
-		    xml_skip_char(ctx);
-		    xml_inc(ctx);
-		    xml_parse_internal_subset(ctx);
-		    xml_parse_white(ctx, 0);
-		  }
-		xml_parse_char(ctx, '>');
-	      }
-	  }
-
-      case XML_STATE_PI:
-	mp_pop(ctx->pool);
-      case XML_STATE_COMMENT:
-	fbgrow_reset(ctx->value);
-
-      case XML_STATE_CHARS:
-
-	while (1)
-	  {
-	    if (xml_peek_char(ctx) != '<')
-	      {
-		/* CharData */
-	        xml_parse_chars(ctx);
-		continue;
-	      }
-	    else
-	      xml_skip_char(ctx);
-first_tag: ;
-
-	    xml_inc(ctx);
-	    if ((c = xml_get_char(ctx)) == '?')
-	      {
-		/* PI */
-	        if (!(ctx->want & XML_WANT_PI))
-	          xml_skip_pi(ctx);
-	        else
-		  {
-		    if (btell(ctx->chars))
-		      {
-			fbgrow_rewind(ctx->chars);
-			ctx->state = XML_STATE_CHARS_BEFORE_PI;
-			return XML_STATE_PI;
-      case XML_STATE_CHARS_BEFORE_PI:
-			fbgrow_reset(ctx->chars);
-		      }
-		    xml_push_pi(ctx);
-		    return ctx->state = XML_STATE_PI;
-		  }
-	      }
-
-	    else if (c == '!')
-	      if ((c = xml_get_char(ctx)) == '-')
-	        {
-		  /* Comment */
-		  if (!(ctx->want & XML_WANT_COMMENT))
-		    xml_skip_comment(ctx);
-		  else
-		    {
-		      if (btell(ctx->chars))
-		        {
-			  fbgrow_rewind(ctx->chars);
-			  ctx->state = XML_STATE_CHARS_BEFORE_COMMENT;
-			  return XML_STATE_CHARS;
-      case XML_STATE_CHARS_BEFORE_COMMENT:
-			  fbgrow_reset(ctx->chars);
-			}
-		      xml_push_comment(ctx);
-		      return ctx->state = XML_STATE_COMMENT;
-		    }
-		}
-	      else if (c == '[')
-	        {
-		  /* CDATA */
-		  if (!(ctx->want & XML_WANT_CDATA))
-		    xml_skip_cdata(ctx);
-		  else
-		    {
-		      if (btell(ctx->chars))
-		        {
-			  fbgrow_rewind(ctx->chars);
-			  ctx->state = XML_STATE_CHARS_BEFORE_CDATA;
-			  return XML_STATE_CHARS;
-      case XML_STATE_CHARS_BEFORE_CDATA:
-			  fbgrow_reset(ctx->chars);
-			}
-		      xml_parse_cdata(ctx);
-		      if (btell(ctx->chars))
-		        {
-			  fbgrow_rewind(ctx->chars);
-			  return ctx->state = XML_STATE_CDATA;
-			}
-      case XML_STATE_CDATA:
-		      fbgrow_reset(ctx->chars);
-		    }
-		}
-	      else
-		xml_fatal(ctx, "Unexpected character after '<!'");
-
-	    else if (c != '/')
-	      {
-		/* STag | EmptyElemTag */
-		xml_unget_char(ctx);
-		if (btell(ctx->chars))
-		  {
-		    fbgrow_rewind(ctx->chars);
-		    ctx->state = XML_STATE_CHARS_BEFORE_STAG;
-		    return XML_STATE_CHARS;
-      case XML_STATE_CHARS_BEFORE_STAG:
-		    fbgrow_reset(ctx->chars);
-		  }
-
-		xml_parse_stag(ctx);
-		if (ctx->want & XML_WANT_STAG)
-		  return ctx->state = XML_STATE_STAG;
-      case XML_STATE_STAG:
-		if (ctx->flags & XML_FLAG_EMPTY_ELEM)
-		  goto pop_element;
-	      }
-
-	    else
-	      {
-		/* ETag */
-		if (btell(ctx->chars))
-		  {
-		    fbgrow_rewind(ctx->chars);
-		    ctx->state = XML_STATE_CHARS_BEFORE_ETAG;
-		    return XML_STATE_CHARS;
-      case XML_STATE_CHARS_BEFORE_ETAG:
-		    fbgrow_reset(ctx->chars);
-		  }
-
-		xml_parse_etag(ctx);
-pop_element:
-		if (ctx->want & XML_WANT_ETAG)
-		  return ctx->state = XML_STATE_ETAG;
-      case XML_STATE_ETAG:
-		xml_pop_element(ctx);
-		if (!ctx->elem)
-		  goto epilog;
-	      }
-	  }
-
-epilog:
-	/* Misc* */
-        TRACE(ctx, "entering epilog");
-	while (1)
-	  {
-	    /* Epilog whitespace is the only place, where a valid document can reach EOF */
-	    if (setjmp(throw_buf))
-	      if (ctx->err_code == XML_ERR_EOF)
-	        {
-		  TRACE(ctx, "reached EOF");
-		  ctx->state = XML_STATE_EOF;
-		  if (ctx->h_document_end)
-		    ctx->h_document_end(ctx);
-      case XML_STATE_EOF:
-		  return XML_STATE_EOF;
-		}
-	      else
-		goto error;
-	    xml_parse_white(ctx, 0);
-	    if (setjmp(throw_buf))
-	      goto error;
-
-	    /* Misc */
-	    xml_parse_char(ctx, '<');
-	    if ((c = xml_get_char(ctx)) == '?')
-	      /* Processing instruction */
-	      if (!(ctx->want & XML_WANT_PI))
-	        xml_skip_pi(ctx);
-	      else
-	        {
-		  xml_push_pi(ctx);
-		  return ctx->state = XML_STATE_EPILOG_PI, XML_STATE_PI;
-      case XML_STATE_EPILOG_PI:
-		  xml_pop_pi(ctx);
-	        }
-	    else if (c == '!')
-	      /* Comment */
-	      if (!(ctx->want & XML_WANT_COMMENT))
-		xml_skip_comment(ctx);
-	      else
-	        {
-		  xml_push_comment(ctx);
-		  return ctx->state = XML_STATE_EPILOG_COMMENT, XML_STATE_COMMENT;
-      case XML_STATE_EPILOG_COMMENT:
-		  xml_pop_comment(ctx);
-		}
-	    else
-	      xml_fatal(ctx, "Syntax error in the epilog");
-	  }
-
-    }
-  return -1;
-}
-
-#ifdef TEST
-
-static void
-error(struct xml_context *ctx)
-{
-  msg((ctx->err_code < XML_ERR_ERROR) ? L_WARN_R : L_ERROR_R, "XML %u: %s", xml_row(ctx), ctx->err_msg);
-}
-
-static void
-test(struct fastbuf *in, struct fastbuf *out)
-{
-  struct xml_context ctx;
-  xml_init(&ctx);
-  ctx.h_warn = ctx.h_error = ctx.h_fatal = error;
-  ctx.want = XML_WANT_ALL;
-  ctx.flags |= XML_DOM_FREE;
-  xml_set_source(&ctx, in);
-  int state;
-  while ((state = xml_next(&ctx)) >= 0)
-    switch (state)
-      {
-	case XML_STATE_CHARS:
-	  bprintf(out, "CHARS [%.*s]\n", (int)(ctx.chars->bstop - ctx.chars->buffer), ctx.chars->buffer);
-	  break;
-	case XML_STATE_STAG:
-	  bprintf(out, "STAG <%s>\n", ctx.elem->name);
-	  SLIST_FOR_EACH(struct xml_attr *, a, ctx.elem->attrs)
-	    bprintf(out, "  ATTR %s=[%s]\n", a->name, a->val);
-	  break;
-	case XML_STATE_ETAG:
-	  bprintf(out, "ETAG </%s>\n", ctx.elem->name);
-	  break;
-	case XML_STATE_COMMENT:
-	  bprintf(out, "COMMENT [%.*s]\n", (int)(ctx.value->bstop - ctx.value->buffer), ctx.value->buffer);
-	  break;
-	case XML_STATE_PI:
-	  bprintf(out, "PI [%s] [%.*s]\n", ctx.name, (int)(ctx.value->bstop - ctx.value->buffer), ctx.value->buffer);
-	  break;
-	case XML_STATE_CDATA:
-	  bprintf(out, "CDATA [%.*s]\n", (int)(ctx.chars->bstop - ctx.chars->buffer), ctx.chars->buffer);
-	  break;
-	case XML_STATE_EOF:
-	  bprintf(out, "EOF\n");
-	  goto end;
-	  break;
-      }
-end:
-  xml_cleanup(&ctx);
-}
-
-int
-main(void)
-{
-  struct fastbuf *in = bfdopen_shared(0, 1024);
-  struct fastbuf *out = bfdopen_shared(1, 1024);
-  test(in, out);
-  bclose(out);
-  return 0;
-}
-
-#endif
diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h
index 7e83f65a..db6ab6c6 100644
--- a/sherlock/xml/xml.h
+++ b/sherlock/xml/xml.h
@@ -13,6 +13,7 @@
 #include "lib/clists.h"
 #include "lib/slists.h"
 #include "lib/mempool.h"
+#include "lib/fastbuf.h"
 
 enum xml_error {
   XML_ERR_OK = 0,
@@ -98,31 +99,29 @@ struct xml_node {
   cnode n;					/* Node for list of parent's sons */
   uns type;					/* XML_NODE_x */
   struct xml_node *parent;			/* Parent node */
-};
-
-struct xml_elem {
-  struct xml_node node;
-  char *name;					/* Element name */
-  clist sons;					/* List of subnodes */
-  struct xml_dtd_elem *dtd;			/* Element DTD */
-  slist attrs;					/* Link list of attributes */
+  char *name;					/* Element name / PI target */
+  clist sons;					/* Children nodes */
+  union {
+    struct {
+      char *text;				/* PI text / Comment / CDATA */
+      uns len;					/* Text length in bytes */
+    };
+    struct {
+      struct xml_dtd_elem *dtd;			/* Element DTD */
+      slist attrs;				/* Link list of element attributes */
+    };
+  };
 };
 
 struct xml_attr {
   snode n;
-  struct xml_elem *elem;
+  struct xml_node *elem;
   char *name;
   char *val;
 };
 
 struct xml_context;
 
-struct xml_stack {
-  struct xml_stack *next;			/* Link list of stack records */
-  uns saved_flags;				/* Saved ctx->flags */
-  struct mempool_state saved_pool;		/* Saved ctx->pool state */
-};
-
 #define XML_BUF_SIZE 32				/* At least 16 -- hardcoded */
 
 struct xml_source {
@@ -152,16 +151,13 @@ struct xml_context {
   void (*h_fatal)(struct xml_context *ctx);		/* Unrecoverable error callback */
 
   /* Memory management */
-  struct mempool *pool;					/* Most data */
-  struct fastbuf *chars;				/* Character data */
-  struct fastbuf *value;				/* Attribute value / comment / processing instruction data */
-  char *name;						/* Attribute name, processing instruction target */
-  void *tab_attrs;
-
-  /* Stack */
-  struct xml_stack *stack;				/* See xml_push(), xml_pop() */
+  struct mempool *pool;					/* DOM pool */
+  struct mempool *stack;				/* Stack pool (freed as soon as possible) */
+  struct xml_stack *stack_list;				/* See xml_push(), xml_pop() */
   uns flags;						/* XML_FLAG_x (restored on xml_pop()) */
   uns depth;						/* Nesting level */
+  struct fastbuf chars;					/* Character data / attribute value */
+  void *tab_attrs;
 
   /* Input */
   struct xml_source *src;				/* Current source */
@@ -172,17 +168,16 @@ struct xml_context {
   void (*h_document_end)(struct xml_context *ctx);	/* Called after leaving epilog */
   void (*h_xml_decl)(struct xml_context *ctx);		/* Called after the XML declaration */
   void (*h_doctype_decl)(struct xml_context *ctx);	/* Called in the doctype declaration just before internal subset */
-  void (*h_pi)(struct xml_context *ctx);		/* Called after a processing instruction */
   void (*h_comment)(struct xml_context *ctx);		/* Called after a comment */
+  void (*h_pi)(struct xml_context *ctx);		/* Called after a processing instruction */
   void (*h_element_start)(struct xml_context *ctx);	/* Called after STag or EmptyElemTag */
   void (*h_element_end)(struct xml_context *ctx);	/* Called before ETag or after EmptyElemTag */
+  void (*h_chars)(struct xml_context *ctx);		/* Called after some characters */
+  void (*h_cdata)(struct xml_context *ctx);		/* Called after a CDATA section */
 
   /* DOM */
-  struct xml_elem *root;				/* DOM root */
-  union {
-    struct xml_node *node;				/* Current DOM node */
-    struct xml_elem *elem;				/* Current element */
-  };
+  struct xml_node *root;				/* DOM root */
+  struct xml_node *node;				/* Current DOM node */
 
   char *version_str;
   uns standalone;
@@ -194,11 +189,8 @@ struct xml_context {
 
   void (*start_dtd)(struct xml_context *ctx);
   void (*end_dtd)(struct xml_context *ctx);
-  void (*start_cdata)(struct xml_context *ctx);
-  void (*end_cdata)(struct xml_context *ctx);
   void (*start_entity)(struct xml_context *ctx);
   void (*end_entity)(struct xml_context *ctx);
-  void (*chacacters)(struct xml_context *ctx);
   struct fastbuf *(*resolve_entity)(struct xml_context *ctx);
   void (*notation_decl)(struct xml_context *ctx);
   void (*unparsed_entity_decl)(struct xml_context *ctx);
@@ -208,5 +200,6 @@ void xml_init(struct xml_context *ctx);
 void xml_cleanup(struct xml_context *ctx);
 void xml_set_source(struct xml_context *ctx, struct fastbuf *fb);
 int xml_next(struct xml_context *ctx);
+uns xml_row(struct xml_context *ctx);
 
 #endif
-- 
2.39.5