Cleanup: Moved libshxml to shxml/

author Martin Mares <mj@ucw.cz>

Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)

committer Martin Mares <mj@ucw.cz>

Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)
author Martin Mares <mj@ucw.cz>
Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)
committer Martin Mares <mj@ucw.cz>
Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)
diff --git a/sherlock/xml/Makefile b/sherlock/xml/Makefile

deleted file mode 100644 (file)

index 23e08b2..0000000
--- a/sherlock/xml/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-# Makefile for the XML parser
-# (c) 2007 Pavel Charvat <pchar@ucw.cz>
-
-DIRS+=sherlock/xml
-PROGS+=$(o)/sherlock/xml/xml-test
-
-LIBSHXML_MODS=common source parse dtd
-LIBSHXML_INCLUDES=xml.h dtd.h
-
-LIBSHXML_MOD_PATHS=$(addprefix $(o)/sherlock/xml/,$(LIBSHXML_MODS))
-
-$(o)/sherlock/xml/libshxml.a: $(addsuffix .o,$(LIBSHXML_MOD_PATHS))
-$(o)/sherlock/xml/libshxml.so: $(addsuffix .oo,$(LIBSHXML_MOD_PATHS))
-$(o)/sherlock/xml/libshxml.pc: $(LIBSH) $(LIBCHARSET)
-
-$(o)/sherlock/xml/common.o: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/common.oo: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/source.o: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/source.oo: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/dtd.o: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/dtd.oo: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/parse.o: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/parse.oo: $(o)/sherlock/xml/unicat.h
-$(o)/sherlock/xml/unicat.h: $(s)/sherlock/xml/unicat.pl
-       $(M)GEN $(addprefix $(o)/sherlock/xml/unicat,.h .c)
-       $(Q)$< $(addprefix $(o)/sherlock/xml/unicat,.h .c)
-       $(Q)touch $@
-
-TESTS+=$(o)/sherlock/xml/xml-test.test
-$(o)/sherlock/xml/xml-test: $(o)/sherlock/xml/xml-test.o $(LIBSHXML)
-$(o)/sherlock/xml/xml-test.test: $(o)/sherlock/xml/xml-test
-
-API_LIBS+=libshxml
-API_INCLUDES+=$(o)/sherlock/xml/.include-stamp
-$(o)/sherlock/xml/.include-stamp: $(addprefix $(s)/sherlock/xml/,$(LIBSHXML_INCLUDES))
-$(o)/sherlock/xml/.include-stamp: IDST=sherlock/xml
-run/lib/pkgconfig/libshxml.pc: $(o)/sherlock/xml/libshxml.pc
-
-INSTALL_TARGETS+=install-sh-xml
-install-sh-xml:
-       install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/sherlock/xml $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_PKGCONFIG_DIR)
-       install -m 644 $(addprefix run/include/sherlock/xml/,$(LIBSHXML_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/sherlock/xml
-       install -m 644 run/lib/pkgconfig/libshxml.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR)
-       install -m 644 run/lib/libshxml.$(LS) $(DESTDIR)$(INSTALL_LIB_DIR)
-
-.PHONY: install-sh-xml
diff --git a/sherlock/xml/TODO b/sherlock/xml/TODO

deleted file mode 100644 (file)

index b8dbc29..0000000
--- a/sherlock/xml/TODO
+++ /dev/null
@@ -1,15 +0,0 @@
-Non-normative / not-implemented:
--- introduce numeric error codes
--- cycle detection in internal entities (and possibly external?)
--- conditional sections in DTD
--- validation of elements (regular expressions, non-cdata)
--- validation of attributes (unfinished)
--- notations
--- URI normalization
--- support for xml:space
--- support for xml:lang
--- full support for standalone documents
--- Unicode normalization
-
-Optimizations:
--- detect definitions of trivial entities
diff --git a/sherlock/xml/common.c b/sherlock/xml/common.c

deleted file mode 100644 (file)

index 6bb2737..0000000
--- a/sherlock/xml/common.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-#include "sherlock/xml/internals.h"
-#include "ucw/stkstring.h"
-#include "ucw/ff-unicode.h"
-
-#include <setjmp.h>
-
-/*** Error handling ***/
-
-void NONRET
-xml_throw(struct xml_context *ctx)
-{
-  ASSERT(ctx->err_code && ctx->throw_buf);
-  longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code);
-}
-
-void
-xml_warn(struct xml_context *ctx, const char *format, ...)
-{
-  if (ctx->h_warn)
-    {
-      va_list args;
-      va_start(args, format);
-      ctx->err_msg = stk_vprintf(format, args);
-      ctx->err_code = XML_ERR_WARN;
-      va_end(args);
-      ctx->h_warn(ctx);
-      ctx->err_msg = NULL;
-      ctx->err_code = XML_ERR_OK;
-    }
-}
-
-void
-xml_error(struct xml_context *ctx, const char *format, ...)
-{
-  if (ctx->h_error)
-    {
-      va_list args;
-      va_start(args, format);
-      ctx->err_msg = stk_vprintf(format, args);
-      ctx->err_code = XML_ERR_ERROR;
-      va_end(args);
-      ctx->h_error(ctx);
-      ctx->err_msg = NULL;
-      ctx->err_code = XML_ERR_OK;
-    }
-}
-
-void NONRET
-xml_fatal(struct xml_context *ctx, const char *format, ...)
-{
-  va_list args;
-  va_start(args, format);
-  ctx->err_msg = mp_vprintf(ctx->stack, format, args);
-  ctx->err_code = XML_ERR_FATAL;
-  ctx->state = XML_STATE_EOF;
-  va_end(args);
-  if (ctx->h_fatal)
-    ctx->h_fatal(ctx);
-  xml_throw(ctx);
-}
-
-/*** Memory management ***/
-
-void *
-xml_hash_new(struct mempool *pool, uns size)
-{
-  void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE);
-  *(void **)tab = pool;
-  return tab + XML_HASH_HDR_SIZE;
-}
-
-/*** Initialization ***/
-
-static struct xml_context xml_defaults = {
-  .flags = XML_SRC_EOF | XML_REPORT_ALL,
-  .state = XML_STATE_START,
-  .h_resolve_entity = xml_def_resolve_entity,
-  .chars = {
-    .name = "<xml_chars>",
-    .spout = xml_spout_chars,
-    .can_overwrite_buffer = 1,
-  },
-};
-
-static void
-xml_do_init(struct xml_context *ctx)
-{
-  xml_attrs_table_init(ctx);
-}
-
-void
-xml_init(struct xml_context *ctx)
-{
-  *ctx = xml_defaults;
-  ctx->pool = mp_new(65536);
-  ctx->stack = mp_new(65536);
-  xml_do_init(ctx);
-  TRACE(ctx, "init");
-}
-
-void
-xml_cleanup(struct xml_context *ctx)
-{
-  TRACE(ctx, "cleanup");
-  xml_attrs_table_cleanup(ctx);
-  xml_dtd_cleanup(ctx);
-  xml_sources_cleanup(ctx);
-  mp_delete(ctx->pool);
-  mp_delete(ctx->stack);
-}
-
-void
-xml_reset(struct xml_context *ctx)
-{
-  TRACE(ctx, "reset");
-  struct mempool *pool = ctx->pool, *stack = ctx->stack;
-  xml_attrs_table_cleanup(ctx);
-  xml_dtd_cleanup(ctx);
-  xml_sources_cleanup(ctx);
-  mp_flush(pool);
-  mp_flush(stack);
-  *ctx = xml_defaults;
-  ctx->pool = pool;
-  ctx->stack = stack;
-  xml_do_init(ctx);
-}
diff --git a/sherlock/xml/dtd.c b/sherlock/xml/dtd.c

deleted file mode 100644 (file)

index 67cb7cc..0000000
--- a/sherlock/xml/dtd.c
+++ /dev/null
@@ -1,1003 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-#include "sherlock/xml/internals.h"
-#include "ucw/fastbuf.h"
-#include "ucw/ff-unicode.h"
-#include "ucw/unicode.h"
-
-/* Notations */
-
-#define HASH_PREFIX(x) xml_dtd_notns_##x
-#define HASH_NODE struct xml_dtd_notn
-#define HASH_KEY_STRING name
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_WANT_LOOKUP
-#define HASH_WANT_FIND
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-struct xml_dtd_notn *
-xml_dtd_find_notn(struct xml_context *ctx, char *name)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name);
-  return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL;
-}
-
-/* General entities */
-
-#define HASH_PREFIX(x) xml_dtd_ents_##x
-#define HASH_NODE struct xml_dtd_entity
-#define HASH_KEY_STRING name
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-static struct xml_dtd_entity *
-xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name);
-  if (ent->flags & XML_DTD_ENTITY_DECLARED)
-    {
-      xml_warn(ctx, "Entity &%s; already declared", name);
-      return NULL;
-    }
-  slist_add_tail(&dtd->ents, &ent->n);
-  ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL;
-  ent->text = text;
-  return ent;
-}
-
-static void
-xml_dtd_declare_default_entities(struct xml_context *ctx)
-{
-  xml_dtd_declare_trivial_entity(ctx, "lt", "<");
-  xml_dtd_declare_trivial_entity(ctx, "gt", ">");
-  xml_dtd_declare_trivial_entity(ctx, "amp", "&");
-  xml_dtd_declare_trivial_entity(ctx, "apos", "'");
-  xml_dtd_declare_trivial_entity(ctx, "quot", "\"");
-}
-
-struct xml_dtd_entity *
-xml_def_find_entity(struct xml_context *ctx UNUSED, char *name)
-{
-#define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL }
-  static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
-#undef ENT
-  switch (name[0])
-    {
-      case 'l':
-       if (!strcmp(name, "lt"))
-         return &ent_lt;
-       break;
-      case 'g':
-       if (!strcmp(name, "gt"))
-         return &ent_gt;
-       break;
-      case 'a':
-       if (!strcmp(name, "amp"))
-         return &ent_amp;
-       if (!strcmp(name, "apos"))
-         return &ent_apos;
-       break;
-      case 'q':
-       if (!strcmp(name, "quot"))
-         return &ent_quot;
-       break;
-    }
-  return NULL;
-}
-
-struct xml_dtd_entity *
-xml_dtd_find_entity(struct xml_context *ctx, char *name)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  if (ctx->h_find_entity)
-    return ctx->h_find_entity(ctx, name);
-  else if (dtd)
-    {
-      struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name);
-      return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
-    }
-  else
-    return xml_def_find_entity(ctx, name);
-}
-
-/* Parameter entities */
-
-static struct xml_dtd_entity *
-xml_dtd_find_pentity(struct xml_context *ctx, char *name)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name);
-  return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
-}
-
-/* Elements */
-
-struct xml_dtd_elems_table;
-
-static void
-xml_dtd_elems_init_data(struct xml_dtd_elems_table *tab UNUSED, struct xml_dtd_elem *e)
-{
-  slist_init(&e->attrs);
-}
-
-#define HASH_PREFIX(x) xml_dtd_elems_##x
-#define HASH_NODE struct xml_dtd_elem
-#define HASH_KEY_STRING name
-#define HASH_TABLE_DYNAMIC
-#define HASH_ZERO_FILL
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_ALLOC
-#define HASH_GIVE_INIT_DATA
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-struct xml_dtd_elem *
-xml_dtd_find_elem(struct xml_context *ctx, char *name)
-{
-  return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL;
-}
-
-/* Element sons */
-
-struct xml_dtd_enodes_table;
-
-static inline uns
-xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
-{
-  return hash_pointer(parent) ^ hash_pointer(elem);
-}
-
-static inline int
-xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
-{
-  return (parent1 == parent2) && (elem1 == elem2);
-}
-
-static inline void
-xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
-{
-  node->parent = parent;
-  node->elem = elem;
-}
-
-#define HASH_PREFIX(x) xml_dtd_enodes_##x
-#define HASH_NODE struct xml_dtd_elem_node
-#define HASH_KEY_COMPLEX(x) x parent, x elem
-#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_TABLE_DYNAMIC
-#define HASH_ZERO_FILL
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-/* Element attributes */
-
-struct xml_dtd_attrs_table;
-
-static inline uns
-xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
-{
-  return hash_pointer(elem) ^ hash_string(name);
-}
-
-static inline int
-xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
-{
-  return (elem1 == elem2) && !strcmp(name1, name2);
-}
-
-static inline void
-xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
-{
-  attr->elem = elem;
-  attr->name = name;
-  slist_add_tail(&elem->attrs, &attr->n);
-}
-
-#define HASH_PREFIX(x) xml_dtd_attrs_##x
-#define HASH_NODE struct xml_dtd_attr
-#define HASH_ZERO_FILL
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x elem, x name
-#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-struct xml_dtd_attr *
-xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name)
-{
-  return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL;
-}
-
-/* Enumerated attribute values */
-
-struct xml_dtd_evals_table;
-
-static inline uns
-xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
-{
-  return hash_pointer(attr) ^ hash_string(val);
-}
-
-static inline int
-xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
-{
-  return (attr1 == attr2) && !strcmp(val1, val2);
-}
-
-static inline void
-xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
-{
-  eval->attr = attr;
-  eval->val = val;
-}
-
-#define HASH_PREFIX(x) xml_dtd_evals_##x
-#define HASH_NODE struct xml_dtd_eval
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x attr, x val
-#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-/* Enumerated attribute notations */
-
-struct xml_dtd_enotns_table;
-
-static inline uns
-xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
-{
-  return hash_pointer(attr) ^ hash_pointer(notn);
-}
-
-static inline int
-xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
-{
-  return (attr1 == attr2) && (notn1 == notn2);
-}
-
-static inline void
-xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
-{
-  enotn->attr = attr;
-  enotn->notn = notn;
-}
-
-#define HASH_PREFIX(x) xml_dtd_enotns_##x
-#define HASH_NODE struct xml_dtd_enotn
-#define HASH_TABLE_DYNAMIC
-#define HASH_KEY_COMPLEX(x) x attr, x notn
-#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_GIVE_ALLOC
-#define HASH_TABLE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-/* DTD initialization/cleanup */
-
-void
-xml_dtd_init(struct xml_context *ctx)
-{
-  if (ctx->dtd)
-    return;
-  struct mempool *pool = mp_new(4096);
-  struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
-  dtd->pool = pool;
-  xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
-  xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
-  xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
-  xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
-  xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
-  xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
-  xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
-  xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
-  xml_dtd_declare_default_entities(ctx);
-}
-
-void
-xml_dtd_cleanup(struct xml_context *ctx)
-{
-  if (!ctx->dtd)
-    return;
-  mp_delete(ctx->dtd->pool);
-  ctx->dtd = NULL;
-}
-
-void
-xml_dtd_finish(struct xml_context *ctx)
-{
-  if (!ctx->dtd)
-    return;
-  // FIXME: validity checks
-}
-
-/*** Parsing functions ***/
-
-/* References to parameter entities */
-
-void
-xml_parse_pe_ref(struct xml_context *ctx)
-{
-  /* PEReference ::= '%' Name ';'
-   * Already parsed: '%' */
-  struct mempool_state state;
-  mp_save(ctx->stack, &state);
-  char *name = xml_parse_name(ctx, ctx->stack);
-  xml_parse_char(ctx, ';');
-  struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name);
-  if (!ent)
-    xml_error(ctx, "Unknown entity %%%s;", name);
-  else
-    {
-      TRACE(ctx, "Pushed entity %%%s;", name);
-      mp_restore(ctx->stack, &state);
-      xml_dec(ctx);
-      xml_push_entity(ctx, ent);
-      return;
-    }
-  mp_restore(ctx->stack, &state);
-  xml_dec(ctx);
-}
-
-static uns
-xml_parse_dtd_pe(struct xml_context *ctx, uns entity_decl)
-{
-  /* Already parsed: '%' */
-  do
-    {
-      xml_inc(ctx);
-      if (!~entity_decl && (xml_peek_cat(ctx) & XML_CHAR_WHITE))
-        {
-         xml_dec(ctx);
-         return ~0U;
-       }
-      xml_parse_pe_ref(ctx);
-      while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-       xml_skip_char(ctx);
-    }
-  while (xml_get_char(ctx) == '%');
-  xml_unget_char(ctx);
-  return 1;
-}
-
-static inline uns
-xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
-{
-  /* Whitespace or parameter entity,
-   * mandatory==~0U has a special maening of the whitespace before the '%' character in an parameter entity declaration */
-  uns cnt = 0;
-  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-    {
-      xml_skip_char(ctx);
-      cnt = 1;
-    }
-  if (xml_peek_char(ctx) == '%')
-    {
-      xml_skip_char(ctx);
-      return xml_parse_dtd_pe(ctx, mandatory);
-    }
-  else if (unlikely(mandatory && !cnt))
-    xml_fatal_expected_white(ctx);
-  return cnt;
-}
-
-static void
-xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uns allow_public)
-{
-  struct xml_dtd *dtd = ctx->dtd;
-  uns c = xml_peek_char(ctx);
-  if (c == 'S')
-    {
-      xml_parse_seq(ctx, "SYSTEM");
-      xml_parse_dtd_white(ctx, 1);
-      *public_id = NULL;
-      *system_id = xml_parse_system_literal(ctx, dtd->pool);
-    }
-  else if (c == 'P')
-    {
-      xml_parse_seq(ctx, "PUBLIC");
-      xml_parse_dtd_white(ctx, 1);
-      *system_id = NULL;
-      *public_id = xml_parse_pubid_literal(ctx, dtd->pool);
-      if (xml_parse_dtd_white(ctx, !allow_public))
-       if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
-         *system_id = xml_parse_system_literal(ctx, dtd->pool);
-    }
-  else
-    xml_fatal(ctx, "Expected an external ID");
-}
-
-/* DTD: <!NOTATION ...> */
-
-void
-xml_parse_notation_decl(struct xml_context *ctx)
-{
-  /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
-   * Already parsed: '<!NOTATION' */
-  TRACE(ctx, "parse_notation_decl");
-  struct xml_dtd *dtd = ctx->dtd;
-  xml_parse_dtd_white(ctx, 1);
-
-  struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
-  xml_parse_dtd_white(ctx, 1);
-  char *system_id, *public_id;
-  xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1);
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-
-  if (notn->flags & XML_DTD_NOTN_DECLARED)
-    xml_warn(ctx, "Notation %s already declared", notn->name);
-  else
-    {
-      notn->flags = XML_DTD_NOTN_DECLARED;
-      notn->system_id = system_id;
-      notn->public_id = public_id;
-      slist_add_tail(&dtd->notns, &notn->n);
-    }
-  xml_dec(ctx);
-}
-
-/* DTD: <!ENTITY ...> */
-
-void
-xml_parse_entity_decl(struct xml_context *ctx)
-{
-  /* Already parsed: '<!ENTITY' */
-  TRACE(ctx, "parse_entity_decl");
-  struct xml_dtd *dtd = ctx->dtd;
-  uns flags = ~xml_parse_dtd_white(ctx, ~0U) ? 0 : XML_DTD_ENTITY_PARAMETER;
-  if (flags)
-    xml_parse_dtd_white(ctx, 1);
-  struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool));
-  xml_parse_dtd_white(ctx, 1);
-  slist *list = flags ? &dtd->pents : &dtd->ents;
-  if (ent->flags & XML_DTD_ENTITY_DECLARED)
-    {
-       xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
-       // FIXME: should be only warning
-    }
-  uns c, sep = xml_get_char(ctx);
-  if (sep == '\'' || sep == '"')
-    {
-      /* Internal entity:
-       * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
-      char *p = mp_start_noalign(dtd->pool, 1);
-      while (1)
-        {
-         if ((c = xml_get_char(ctx)) == sep)
-           break;
-         if (c == '%')
-           {
-             // FIXME
-             ASSERT(0);
-             //xml_parse_parameter_ref(ctx);
-             continue;
-           }
-         if (c == '&')
-           {
-             xml_inc(ctx);
-             if (xml_peek_char(ctx) != '#')
-               {
-                 /* Bypass references to general entities */
-                 struct mempool_state state;
-                 mp_save(ctx->stack, &state);
-                 char *n = xml_parse_name(ctx, ctx->stack);
-                 xml_parse_char(ctx, ';');
-                 xml_dec(ctx);
-                 uns l = strlen(n);
-                 p = mp_spread(dtd->pool, p, 3 + l);
-                 *p++ = '&';
-                 memcpy(p, n, l);
-                 p += l;
-                 *p++ = ';';;
-                 mp_restore(ctx->stack, &state);
-                 continue;
-               }
-             else
-               {
-                 xml_skip_char(ctx);
-                 c = xml_parse_char_ref(ctx);
-               }
-           }
-         p = mp_spread(dtd->pool, p, 5);
-         p = utf8_32_put(p, c);
-       }
-      *p = 0;
-      ent->len = p - (char *)mp_ptr(dtd->pool);
-      ent->text = mp_end(dtd->pool, p + 1);
-      slist_add_tail(list, &ent->n);
-      ent->flags = flags | XML_DTD_ENTITY_DECLARED;
-    }
-  else
-    {
-      /* External entity */
-      struct xml_dtd_notn *notn = NULL;
-      char *system_id, *public_id;
-      xml_unget_char(ctx);
-      xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0);
-      if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>')
-        {
-         /* General external unparsed entity */
-         flags |= XML_DTD_ENTITY_UNPARSED;
-         xml_parse_seq(ctx, "NDATA");
-         xml_parse_dtd_white(ctx, 1);
-         notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
-       }
-      slist_add_tail(list, &ent->n);
-      ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL;
-      ent->system_id = system_id;
-      ent->public_id = public_id;
-      ent->notn = notn;
-    }
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-/* DTD: <!ELEMENT ...> */
-
-void
-xml_parse_element_decl(struct xml_context *ctx)
-{
-  /* Elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
-   * Already parsed: '<!ELEMENT' */
-  struct xml_dtd *dtd = ctx->dtd;
-  xml_parse_dtd_white(ctx, 1);
-  char *name = xml_parse_name(ctx, dtd->pool);
-  xml_parse_dtd_white(ctx, 1);
-  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
-  if (elem->flags & XML_DTD_ELEM_DECLARED)
-    xml_fatal(ctx, "Element <%s> already declared", name);
-
-  /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
-  uns c = xml_peek_char(ctx);
-  if (c == 'E')
-    {
-      xml_parse_seq(ctx, "EMPTY");
-      elem->type = XML_DTD_ELEM_EMPTY;
-    }
-  else if (c == 'A')
-    {
-      xml_parse_seq(ctx, "ANY");
-      elem->type = XML_DTD_ELEM_ANY;
-    }
-  else if (c == '(')
-    {
-      xml_skip_char(ctx);
-      xml_inc(ctx);
-      xml_parse_dtd_white(ctx, 0);
-      struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
-      if (xml_peek_char(ctx) == '#')
-        {
-         /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
-         xml_skip_char(ctx);
-         xml_parse_seq(ctx, "PCDATA");
-         elem->type = XML_DTD_ELEM_MIXED;
-          parent->type = XML_DTD_ELEM_PCDATA;
-         while (1)
-           {
-             xml_parse_dtd_white(ctx, 0);
-             if ((c = xml_get_char(ctx)) == ')')
-               break;
-             else if (c != '|')
-               xml_fatal_expected(ctx, ')');
-             xml_parse_dtd_white(ctx, 0);
-             struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
-             if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
-               xml_error(ctx, "Duplicate content '%s'", son_elem->name);
-             else
-               {
-                 struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
-                 slist_add_tail(&parent->sons, &son->n);
-               }
-           }
-         xml_dec(ctx);
-         if (xml_peek_char(ctx) == '*')
-           {
-             xml_skip_char(ctx);
-             parent->occur = XML_DTD_ELEM_OCCUR_MULT;
-           }
-         else if (!slist_head(&parent->sons))
-           parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
-         else
-           xml_fatal_expected(ctx, '*');
-       }
-      else
-        {
-         /* children ::= (choice | seq) ('?' | '*' | '+')?
-          * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
-          * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
-          * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
-
-         elem->type = XML_DTD_ELEM_CHILDREN;
-         parent->type = XML_DTD_ELEM_PCDATA;
-         uns c;
-         goto first;
-
-         while (1)
-           {
-             /* After name */
-             xml_parse_dtd_white(ctx, 0);
-             if ((c = xml_get_char(ctx)) ==  ')')
-               {
-                 xml_dec(ctx);
-                 if (parent->type == XML_DTD_ELEM_PCDATA)
-                   parent->type = XML_DTD_ELEM_SEQ;
-                 if ((c = xml_get_char(ctx)) == '?')
-                   parent->occur = XML_DTD_ELEM_OCCUR_OPT;
-                 else if (c == '*')
-                   parent->occur = XML_DTD_ELEM_OCCUR_MULT;
-                 else if (c == '+')
-                   parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
-                 else
-                   {
-                     xml_unget_char(ctx);
-                     parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
-                   }
-                 if (!parent->parent)
-                   break;
-                 parent = parent->parent;
-                 continue;
-               }
-             else if (c == '|')
-               {
-                 if (parent->type == XML_DTD_ELEM_PCDATA)
-                   parent->type = XML_DTD_ELEM_OR;
-                 else if (parent->type != XML_DTD_ELEM_OR)
-                   xml_fatal(ctx, "Mixed operators in the list of element children");
-               }
-             else if (c == ',')
-               {
-                 if (parent->type == XML_DTD_ELEM_PCDATA)
-                   parent->type = XML_DTD_ELEM_SEQ;
-                 else if (parent->type != XML_DTD_ELEM_SEQ)
-                   xml_fatal(ctx, "Mixed operators in the list of element children");
-               }
-             else if (c == '(')
-               {
-                 xml_inc(ctx);
-                 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
-                 son->parent = parent;
-                 slist_add_tail(&parent->sons, &son->n);
-                 parent = son->parent;
-                 son->type = XML_DTD_ELEM_MIXED;
-               }
-             else
-               xml_unget_char(ctx);
-
-             /* Before name */
-             xml_parse_dtd_white(ctx, 0);
-first:;
-             struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
-             // FIXME: duplicates, occurance
-             //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
-             struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
-             son->parent = parent;
-             son->elem = son_elem;
-             slist_add_tail(&parent->sons, &son->n);
-           }
-       }
-    }
-  else
-    xml_fatal(ctx, "Expected element content specification");
-
-  xml_parse_dtd_white(ctx, 0);
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-void
-xml_parse_attr_list_decl(struct xml_context *ctx)
-{
-  /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
-   * AttDef ::= S Name S AttType S DefaultDecl
-   * Already parsed: '<!ATTLIST' */
-  struct xml_dtd *dtd = ctx->dtd;
-  xml_parse_dtd_white(ctx, 1);
-  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
-
-  while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
-    {
-      char *name = xml_parse_name(ctx, dtd->pool);
-      struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name);
-      uns ignored = 0;
-      if (attr)
-        {
-         xml_warn(ctx, "Duplicate attribute definition");
-         ignored++;
-       }
-      else
-       attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
-      xml_parse_dtd_white(ctx, 1);
-      if (xml_peek_char(ctx) == '(')
-        {
-         xml_skip_char(ctx); // FIXME: xml_inc/dec ?
-         if (!ignored)
-           attr->type = XML_ATTR_ENUM;
-         do
-           {
-             xml_parse_dtd_white(ctx, 0);
-             char *value = xml_parse_nmtoken(ctx, dtd->pool);
-             if (!ignored)
-               if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
-                 xml_error(ctx, "Duplicate enumeration value");
-               else
-                 xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
-             xml_parse_dtd_white(ctx, 0);
-           }
-         while (xml_get_char(ctx) == '|');
-         xml_unget_char(ctx);
-         xml_parse_char(ctx, ')');
-       }
-      else
-        {
-         char *type = xml_parse_name(ctx, dtd->pool);
-         enum xml_dtd_attr_type t = XML_ATTR_CDATA;
-         if (!strcmp(type, "CDATA"))
-           t = XML_ATTR_CDATA;
-         else if (!strcmp(type, "ID"))
-           t = XML_ATTR_ID;
-         else if (!strcmp(type, "IDREF"))
-           t = XML_ATTR_IDREF;
-         else if (!strcmp(type, "IDREFS"))
-           t = XML_ATTR_IDREFS;
-         else if (!strcmp(type, "ENTITY"))
-           t = XML_ATTR_ENTITY;
-         else if (!strcmp(type, "ENTITIES"))
-           t = XML_ATTR_ENTITIES;
-         else if (!strcmp(type, "NMTOKEN"))
-           t = XML_ATTR_NMTOKEN;
-         else if (!strcmp(type, "NMTOKENS"))
-           t = XML_ATTR_NMTOKENS;
-         else if (!strcmp(type, "NOTATION"))
-           {
-             if (elem->type == XML_DTD_ELEM_EMPTY)
-               xml_fatal(ctx, "Empty element must not have notation attribute");
-             // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
-             t = XML_ATTR_NOTATION;
-             xml_parse_dtd_white(ctx, 1);
-             xml_parse_char(ctx, '(');
-             do
-               {
-                 xml_parse_dtd_white(ctx, 0);
-                 struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
-                 if (!ignored)
-                   if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
-                     xml_error(ctx, "Duplicate enumerated notation");
-                   else
-                     xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
-                 xml_parse_dtd_white(ctx, 0);
-               }
-             while (xml_get_char(ctx) == '|');
-             xml_unget_char(ctx);
-             xml_parse_char(ctx, ')');
-           }
-         else
-           xml_fatal(ctx, "Unknown attribute type");
-         if (!ignored)
-           attr->type = t;
-       }
-      xml_parse_dtd_white(ctx, 1);
-      enum xml_dtd_attr_default def = XML_ATTR_NONE;
-      if (xml_get_char(ctx) == '#')
-       switch (xml_peek_char(ctx))
-          {
-           case 'R':
-             xml_parse_seq(ctx, "REQUIRED");
-             def = XML_ATTR_REQUIRED;
-             break;
-           case 'I':
-             xml_parse_seq(ctx, "IMPLIED");
-             def = XML_ATTR_IMPLIED;
-             break;
-           case 'F':
-             xml_parse_seq(ctx, "FIXED");
-             def = XML_ATTR_FIXED;
-             xml_parse_dtd_white(ctx, 1);
-             break;
-           default:
-             xml_fatal(ctx, "Expected a modifier for default attribute value");
-         }
-      else
-       xml_unget_char(ctx);
-      if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
-        {
-         char *v = xml_parse_attr_value(ctx, attr);
-         if (!ignored)
-           attr->default_value = v;
-       }
-      if (!ignored)
-       attr->default_mode = def;
-    }
-  xml_skip_char(ctx);
-  xml_dec(ctx);
-}
-
-void
-xml_skip_internal_subset(struct xml_context *ctx)
-{
-  TRACE(ctx, "skip_internal_subset");
-  /* AlreadyParsed: '[' */
-  uns c;
-  while ((c = xml_get_char(ctx)) != ']')
-    {
-      if (c != '<')
-       continue;
-      if ((c = xml_get_char(ctx)) == '?')
-        {
-          xml_inc(ctx);
-         xml_skip_pi(ctx);
-       }
-      else if (c != '!')
-       xml_dec(ctx);
-      else if (xml_get_char(ctx) == '-')
-        {
-         xml_inc(ctx);
-         xml_skip_comment(ctx);
-       }
-      else
-       while ((c = xml_get_char(ctx)) != '>')
-         if (c == '\'' || c == '"')
-           while (xml_get_char(ctx) != c);
-    }
-  xml_dec(ctx);
-}
-
-/*** Validation of attribute values ***/
-
-static uns
-xml_check_tokens(char *value, uns first_cat, uns next_cat, uns seq)
-{
-  char *p = value;
-  uns u;
-  while (1)
-    {
-      p = utf8_32_get(p, &u);
-      if (!(xml_char_cat(u) & first_cat))
-        return 0;
-      while (*p & ~0x20)
-        {
-         p = utf8_32_get(p, &u);
-         if (!(xml_char_cat(u) & next_cat))
-           return 0;
-       }
-      if (!*p)
-       return 1;
-      if (!seq)
-       return 0;
-      p++;
-    }
-}
-
-static uns
-xml_is_name(struct xml_context *ctx, char *value)
-{
-  /* Name ::= NameStartChar (NameChar)* */
-  return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0);
-}
-
-static uns
-xml_is_names(struct xml_context *ctx, char *value)
-{
-  /* Names ::= Name (#x20 Name)* */
-  return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1);
-}
-
-static uns
-xml_is_nmtoken(struct xml_context *ctx, char *value)
-{
-  /* Nmtoken ::= (NameChar)+ */
-  return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0);
-}
-
-static uns
-xml_is_nmtokens(struct xml_context *ctx, char *value)
-{
-  /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */
-  return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1);
-}
-
-static void
-xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type)
-{
-  xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type);
-}
-
-void
-xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value)
-{
-  if (dtd->type == XML_ATTR_CDATA)
-    return;
-  xml_normalize_white(ctx, value);
-  switch (dtd->type)
-    {
-      case XML_ATTR_ID:
-       if (!xml_is_name(ctx, value))
-         xml_err_attr_format(ctx, dtd, "NAME");
-       //FIXME: add to a hash table
-       break;
-      case XML_ATTR_IDREF:
-       if (!xml_is_name(ctx, value))
-         xml_err_attr_format(ctx, dtd, "NAME");
-       // FIXME: find in hash table (beware forward references)
-       break;
-      case XML_ATTR_IDREFS:
-       if (!xml_is_names(ctx, value))
-         xml_err_attr_format(ctx, dtd, "NAMES");
-       // FIXME: find
-       break;
-      case XML_ATTR_ENTITY:
-       // FIXME
-       break;
-      case XML_ATTR_ENTITIES:
-       // FIXME
-       break;
-      case XML_ATTR_NMTOKEN:
-       if (!xml_is_nmtoken(ctx, value))
-         xml_err_attr_format(ctx, dtd, "NMTOKEN");
-       break;
-      case XML_ATTR_NMTOKENS:
-       if (!xml_is_nmtokens(ctx, value))
-         xml_err_attr_format(ctx, dtd, "NMTOKENS");
-       break;
-      case XML_ATTR_ENUM:
-       if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value))
-         xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name);
-       break;
-      case XML_ATTR_NOTATION:
-       if (!xml_dtd_find_notn(ctx, value))
-         xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name);
-       break;
-    }
-}
diff --git a/sherlock/xml/dtd.h b/sherlock/xml/dtd.h

deleted file mode 100644 (file)

index e2caf98..0000000
--- a/sherlock/xml/dtd.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#ifndef _SHERLOCK_XML_DTD_H
-#define _SHERLOCK_XML_DTD_H
-
-#include "sherlock/xml/xml.h"
-
-struct xml_dtd {
-  struct mempool *pool;                        /* Memory pool where to allocate DTD */
-  slist ents;                          /* Link list of general entities */
-  slist pents;                         /* Link list of parameter entities */
-  slist notns;                         /* Link list of notations */
-  slist elems;                         /* Link list of elements */
-  void *tab_ents;                      /* Hash table of general entities */
-  void *tab_pents;                     /* Hash table of parameter entities */
-  void *tab_notns;                     /* Hash table of notations */
-  void *tab_elems;                     /* Hash table of elements */
-  void *tab_enodes;                    /* Hash table of element sons */
-  void *tab_attrs;                     /* Hash table of element attributes */
-  void *tab_evals;                     /* Hash table of enumerated attribute values */
-  void *tab_enotns;                    /* hash table of enumerated attribute notations */
-};
-
-/* Notations */
-
-enum xml_dtd_notn_flags {
-  XML_DTD_NOTN_DECLARED = 0x1,         /* The notation has been declared (internal usage) */
-};
-
-struct xml_dtd_notn {
-  snode n;                             /* Node in xml_dtd.notns */
-  uns flags;                           /* XML_DTD_NOTN_x */
-  char *name;                          /* Notation name */
-  char *system_id;                     /* External ID */
-  char *public_id;
-  void *user;                          /* User-defined */
-};
-
-struct xml_dtd_notn *xml_dtd_find_notn(struct xml_context *ctx, char *name);
-
-/* Entities */
-
-enum xml_dtd_entity_flags {
-  XML_DTD_ENTITY_DECLARED = 0x1,       /* The entity has been declared (internal usage) */
-  XML_DTD_ENTITY_VISITED = 0x2,                /* Cycle detection (internal usage) */
-  XML_DTD_ENTITY_PARAMETER = 0x4,      /* Parameter entity, general otherwise */
-  XML_DTD_ENTITY_EXTERNAL = 0x8,       /* External entity, internal otherwise */
-  XML_DTD_ENTITY_UNPARSED = 0x10,      /* Unparsed entity, parsed otherwise */
-  XML_DTD_ENTITY_TRIVIAL = 0x20,       /* Replacement text is a sequence of characters and character references */
-};
-
-struct xml_dtd_entity {
-  snode n;                             /* Node in xml_dtd.[gp]ents */
-  uns flags;                           /* XML_DTD_ENT_x */
-  char *name;                          /* Entity name */
-  char *text;                          /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */
-  uns len;                             /* Text length */
-  char *system_id;                     /* External ID */
-  char *public_id;
-  struct xml_dtd_notn *notn;           /* Notation (XML_DTD_ENT_UNPARSED only) */
-  void *user;                          /* User-defined */
-};
-
-struct xml_dtd_entity *xml_dtd_find_entity(struct xml_context *ctx, char *name);
-
-/* Elements */
-
-enum xml_dtd_elem_flags {
-  XML_DTD_ELEM_DECLARED = 0x1,         /* The element has been declared (internal usage) */
-};
-
-enum xml_dtd_elem_type {
-  XML_DTD_ELEM_EMPTY,
-  XML_DTD_ELEM_ANY,
-  XML_DTD_ELEM_MIXED,
-  XML_DTD_ELEM_CHILDREN,
-};
-
-struct xml_dtd_elem {
-  snode n;
-  uns flags;
-  uns type;
-  char *name;
-  struct xml_dtd_elem_node *node;
-  slist attrs;
-  void *user;                          /* User-defined */
-};
-
-struct xml_dtd_elem_node {
-  snode n;
-  struct xml_dtd_elem_node *parent;
-  struct xml_dtd_elem *elem;
-  slist sons;
-  uns type;
-  uns occur;
-  void *user;                          /* User-defined */
-};
-
-enum xml_dtd_elem_node_type {
-  XML_DTD_ELEM_PCDATA,
-  XML_DTD_ELEM_SEQ,
-  XML_DTD_ELEM_OR,
-};
-
-enum xml_dtd_elem_node_occur {
-  XML_DTD_ELEM_OCCUR_ONCE,
-  XML_DTD_ELEM_OCCUR_OPT,
-  XML_DTD_ELEM_OCCUR_MULT,
-  XML_DTD_ELEM_OCCUR_PLUS,
-};
-
-struct xml_dtd_elem *xml_dtd_find_elem(struct xml_context *ctx, char *name);
-
-/* Attributes */
-
-enum xml_dtd_attr_default {
-  XML_ATTR_NONE,
-  XML_ATTR_REQUIRED,
-  XML_ATTR_IMPLIED,
-  XML_ATTR_FIXED,
-};
-
-enum xml_dtd_attr_type {
-  XML_ATTR_CDATA,
-  XML_ATTR_ID,
-  XML_ATTR_IDREF,
-  XML_ATTR_IDREFS,
-  XML_ATTR_ENTITY,
-  XML_ATTR_ENTITIES,
-  XML_ATTR_NMTOKEN,
-  XML_ATTR_NMTOKENS,
-  XML_ATTR_ENUM,
-  XML_ATTR_NOTATION,
-};
-
-struct xml_dtd_attr {
-  snode n;
-  char *name;                          /* Attribute name */
-  struct xml_dtd_elem *elem;           /* Owner element */
-  uns type;                            /* See enum xml_dtd_attr_type */
-  uns default_mode;                    /* See enum xml_dtd_attr_default */
-  char *default_value;                 /* The default value defined in DTD (or NULL) */
-};
-
-struct xml_dtd_eval {
-  struct xml_dtd_attr *attr;
-  char *val;
-};
-
-struct xml_dtd_enotn {
-  struct xml_dtd_attr *attr;
-  struct xml_dtd_notn *notn;
-};
-
-void xml_dtd_init(struct xml_context *ctx);
-void xml_dtd_cleanup(struct xml_context *ctx);
-void xml_dtd_finish(struct xml_context *ctx);
-
-struct xml_dtd_attr *xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name);
-
-#endif
diff --git a/sherlock/xml/internals.h b/sherlock/xml/internals.h

deleted file mode 100644 (file)

index bbf28c0..0000000
--- a/sherlock/xml/internals.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#ifndef _SHERLOCK_XML_INTERNALS_H
-#define _SHERLOCK_XML_INTERNALS_H
-
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-
-/*** Debugging ***/
-
-#ifdef LOCAL_DEBUG
-#define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
-#else
-#define TRACE(c, f, p...) do {} while(0)
-#endif
-
-/*** Error handling ***/
-
-void NONRET xml_throw(struct xml_context *ctx);
-
-/*** Memory management ***/
-
-struct xml_stack {
-  struct xml_stack *next;
-  struct mempool_state state;
-  uns flags;
-};
-
-static inline void *
-xml_do_push(struct xml_context *ctx, uns size)
-{
-  /* Saves ctx->stack and ctx->flags state */
-  struct mempool_state state;
-  mp_save(ctx->stack, &state);
-  struct xml_stack *s = mp_alloc(ctx->stack, size);
-  s->state = state;
-  s->flags = ctx->flags;
-  s->next = ctx->stack_list;
-  ctx->stack_list = s;
-  return s;
-}
-
-static inline void
-xml_do_pop(struct xml_context *ctx, struct xml_stack *s)
-{
-  /* Restore ctx->stack and ctx->flags state */
-  ctx->stack_list = s->next;
-  ctx->flags = s->flags;
-  mp_restore(ctx->stack, &s->state);
-}
-
-static inline void
-xml_push(struct xml_context *ctx)
-{
-  TRACE(ctx, "push");
-  xml_do_push(ctx, sizeof(struct xml_stack));
-}
-
-static inline void
-xml_pop(struct xml_context *ctx)
-{
-  TRACE(ctx, "pop");
-  ASSERT(ctx->stack_list);
-  xml_do_pop(ctx, ctx->stack_list);
-}
-
-struct xml_dom_stack {
-  struct xml_stack stack;
-  struct mempool_state state;
-};
-
-static inline struct xml_node *
-xml_push_dom(struct xml_context *ctx, struct mempool_state *state)
-{
-  /* Create a new DOM node */
-  TRACE(ctx, "push_dom");
-  struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s));
-  if (state)
-    s->state = *state;
-  else
-    mp_save(ctx->pool, &s->state);
-  struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n));
-  n->user = NULL;
-  if (n->parent = ctx->node)
-    clist_add_tail(&n->parent->sons, &n->n);
-  return ctx->node = n;
-}
-
-static inline void
-xml_pop_dom(struct xml_context *ctx, uns free)
-{
-  /* Leave DOM subtree */
-  TRACE(ctx, "pop_dom");
-  ASSERT(ctx->node);
-  struct xml_node *p = ctx->node->parent;
-  struct xml_dom_stack *s = (void *)ctx->stack_list;
-  if (free)
-    {
-      /* See xml_pop_element() for cleanup of attribute hash table */
-      if (p)
-        clist_remove(&ctx->node->n);
-      mp_restore(ctx->pool, &s->state);
-    }
-  ctx->node = p;
-  xml_do_pop(ctx, &s->stack);
-}
-
-#define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
-#define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
-  static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
-  { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
-  static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
-
-void *xml_hash_new(struct mempool *pool, uns size);
-
-void xml_spout_chars(struct fastbuf *fb);
-
-/*** Reading of document/external entities ***/
-
-void NONRET xml_fatal_nested(struct xml_context *ctx);
-
-static inline void
-xml_inc(struct xml_context *ctx)
-{
-  /* Called after the first character of a block */
-  TRACE(ctx, "inc");
-  ctx->depth++;
-}
-
-static inline void
-xml_dec(struct xml_context *ctx)
-{
-  /* Called after the last character of a block */
-  TRACE(ctx, "dec");
-  if (unlikely(!ctx->depth--))
-    xml_fatal_nested(ctx);
-}
-
-#include "obj/sherlock/xml/unicat.h"
-
-static inline uns
-xml_char_cat(uns c)
-{
-  if (c < 0x10000)
-    return 1U << xml_char_tab1[(c & 0xff) + xml_char_tab2[c >> 8]];
-  else if (likely(c < 0x110000))
-    return 1U << xml_char_tab3[c >> 16];
-  else
-    return 1;
-}
-
-static inline uns
-xml_ascii_cat(uns c)
-{
-  return xml_char_tab1[c];
-}
-
-struct xml_source *xml_push_source(struct xml_context *ctx);
-void xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent);
-
-void xml_refill(struct xml_context *ctx);
-
-static inline uns
-xml_peek_char(struct xml_context *ctx)
-{
-  if (ctx->bptr == ctx->bstop)
-    xml_refill(ctx);
-  return ctx->bptr[0];
-}
-
-static inline uns
-xml_peek_cat(struct xml_context *ctx)
-{
-  if (ctx->bptr == ctx->bstop)
-    xml_refill(ctx);
-  return ctx->bptr[1];
-}
-
-static inline uns
-xml_get_char(struct xml_context *ctx)
-{
-  uns c = xml_peek_char(ctx);
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_get_cat(struct xml_context *ctx)
-{
-  uns c = xml_peek_cat(ctx);
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_last_char(struct xml_context *ctx)
-{
-  return ctx->bptr[-2];
-}
-
-static inline uns
-xml_last_cat(struct xml_context *ctx)
-{
-  return ctx->bptr[-1];
-}
-
-static inline uns
-xml_skip_char(struct xml_context *ctx)
-{
-  uns c = ctx->bptr[0];
-  ctx->bptr += 2;
-  return c;
-}
-
-static inline uns
-xml_unget_char(struct xml_context *ctx)
-{
-  return *(ctx->bptr -= 2);
-}
-
-void xml_sources_cleanup(struct xml_context *ctx);
-
-/*** Parsing ***/
-
-void NONRET xml_fatal_expected(struct xml_context *ctx, uns c);
-void NONRET xml_fatal_expected_white(struct xml_context *ctx);
-void NONRET xml_fatal_expected_quot(struct xml_context *ctx);
-
-static inline uns
-xml_parse_white(struct xml_context *ctx, uns mandatory)
-{
-  /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
-   * mandatory=0 -> S? */
-  uns cnt = 0;
-  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
-    {
-      xml_skip_char(ctx);
-      cnt++;
-    }
-  if (unlikely(mandatory && !cnt))
-    xml_fatal_expected_white(ctx);
-  return cnt;
-}
-
-static inline void
-xml_parse_char(struct xml_context *ctx, uns c)
-{
-  /* Consumes a given Unicode character */
-  if (unlikely(c != xml_get_char(ctx)))
-    xml_fatal_expected(ctx, c);
-}
-
-static inline void
-xml_parse_seq(struct xml_context *ctx, const char *seq)
-{
-  /* Consumes a given sequence of ASCII characters */
-  while (*seq)
-    xml_parse_char(ctx, *seq++);
-}
-
-void xml_parse_eq(struct xml_context *ctx);
-
-static inline uns
-xml_parse_quote(struct xml_context *ctx)
-{
-  /* "'" | '"' */
-  uns c = xml_get_char(ctx);
-  if (unlikely(c != '\'' && c != '\"'))
-    xml_fatal_expected_quot(ctx);
-  return c;
-}
-
-char *xml_parse_name(struct xml_context *ctx, struct mempool *pool);
-void xml_skip_name(struct xml_context *ctx);
-char *xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool);
-
-char *xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool);
-char *xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool);
-
-uns xml_parse_char_ref(struct xml_context *ctx);
-void xml_parse_pe_ref(struct xml_context *ctx);
-
-char *xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr);
-
-void xml_skip_internal_subset(struct xml_context *ctx);
-void xml_parse_notation_decl(struct xml_context *ctx);
-void xml_parse_entity_decl(struct xml_context *ctx);
-void xml_parse_element_decl(struct xml_context *ctx);
-void xml_parse_attr_list_decl(struct xml_context *ctx);
-
-void xml_push_comment(struct xml_context *ctx);
-void xml_pop_comment(struct xml_context *ctx);
-void xml_skip_comment(struct xml_context *ctx);
-
-void xml_push_pi(struct xml_context *ctx);
-void xml_pop_pi(struct xml_context *ctx);
-void xml_skip_pi(struct xml_context *ctx);
-
-void xml_attrs_table_init(struct xml_context *ctx);
-void xml_attrs_table_cleanup(struct xml_context *ctx);
-
-void xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value);
-
-#endif
diff --git a/sherlock/xml/libshxml.pc b/sherlock/xml/libshxml.pc

deleted file mode 100644 (file)

index c2172b3..0000000
--- a/sherlock/xml/libshxml.pc
+++ /dev/null
@@ -1,11 +0,0 @@
-# pkg-config metadata for libshxml
-
-libdir=@LIBDIR@
-incdir=.
-
-Name: libshxml
-Description: XML parser for Sherlock project
-Version: @SHERLOCK_VERSION@
-Cflags: -I${incdir}
-Libs: -L${libdir} -lshxml
-Requires: @DEPS@
diff --git a/sherlock/xml/parse.c b/sherlock/xml/parse.c

deleted file mode 100644 (file)

index 27141b1..0000000
--- a/sherlock/xml/parse.c
+++ /dev/null
@@ -1,1287 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-#include "sherlock/xml/internals.h"
-#include "ucw/fastbuf.h"
-#include "ucw/ff-unicode.h"
-#include "ucw/unicode.h"
-#include "ucw/chartype.h"
-#include "ucw/hashfunc.h"
-
-#include <setjmp.h>
-
-/*** Basic parsing ***/
-
-void NONRET
-xml_fatal_expected(struct xml_context *ctx, uns c)
-{
-  if (c >= 32 && c < 128)
-    xml_fatal(ctx, "Expected '%c'", c);
-  else
-    xml_fatal(ctx, "Expected U+%04x", c);
-}
-
-void NONRET
-xml_fatal_expected_white(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Expected a white space");
-}
-
-void NONRET
-xml_fatal_expected_quot(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Expected a quotation mark");
-}
-
-void
-xml_parse_eq(struct xml_context *ctx)
-{
-  /* Eq ::= S? '=' S? */
-  xml_parse_white(ctx, 0);
-  xml_parse_char(ctx, '=');
-  xml_parse_white(ctx, 0);
-}
-
-/*** Names and nmtokens ***/
-
-static char *
-xml_parse_string(struct xml_context *ctx, struct mempool *pool, uns first_cat, uns next_cat, char *err)
-{
-  char *p = mp_start_noalign(pool, 1);
-  if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
-    xml_fatal(ctx, "%s", err);
-  do
-    {
-      p = mp_spread(pool, p, 5);
-      p = utf8_32_put(p, xml_skip_char(ctx));
-    }
-  while (xml_peek_cat(ctx) & next_cat);
-  *p++ = 0;
-  return mp_end(pool, p);
-}
-
-static void
-xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
-{
-  if (unlikely(!(xml_get_cat(ctx) & first_cat)))
-    xml_fatal(ctx, "%s", err);
-  while (xml_peek_cat(ctx) & next_cat)
-    xml_skip_char(ctx);
-}
-
-char *
-xml_parse_name(struct xml_context *ctx, struct mempool *pool)
-{
-  /* Name ::= NameStartChar (NameChar)* */
-  return xml_parse_string(ctx, pool, ctx->cat_sname, ctx->cat_name, "Expected a name");
-}
-
-void
-xml_skip_name(struct xml_context *ctx)
-{
-  xml_skip_string(ctx, ctx->cat_sname, ctx->cat_name, "Expected a name");
-}
-
-char *
-xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool)
-{
-  /* Nmtoken ::= (NameChar)+ */
-  return xml_parse_string(ctx, pool, ctx->cat_name, ctx->cat_name, "Expected a nmtoken");
-}
-
-/*** Simple literals ***/
-
-char *
-xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool)
-{
-  /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
-  char *p = mp_start_noalign(pool, 1);
-  uns q = xml_parse_quote(ctx), c;
-  while ((c = xml_get_char(ctx)) != q)
-    {
-      p = mp_spread(pool, p, 5);
-      p = utf8_32_put(p, c);
-    }
-  *p++ = 0;
-  return mp_end(pool, p);
-}
-
-char *
-xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool)
-{
-  /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
-  char *p = mp_start_noalign(pool, 1);
-  uns q = xml_parse_quote(ctx), c;
-  while ((c = xml_get_char(ctx)) != q)
-    {
-      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
-       xml_fatal(ctx, "Expected a pubid character");
-      p = mp_spread(pool, p, 2);
-      *p++ = c;
-    }
-  *p++ = 0;
-  return mp_end(pool, p);
-}
-
-/*** Comments ***/
-
-void
-xml_push_comment(struct xml_context *ctx)
-{
-  TRACE(ctx, "push_comment");
-  /* Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
-   * Already parsed: '<!-' */
-  xml_parse_char(ctx, '-');
-  struct xml_node *n = xml_push_dom(ctx, NULL);
-  n->type = XML_NODE_COMMENT;
-  char *p = mp_start_noalign(ctx->pool, 6);
-  while (1)
-    {
-      if (xml_get_char(ctx) == '-')
-       if (xml_get_char(ctx) == '-')
-         break;
-       else
-         *p++ = '-';
-      p = utf8_32_put(p, xml_last_char(ctx));
-      p = mp_spread(ctx->pool, p, 6);
-    }
-  xml_parse_char(ctx, '>');
-  *p = 0;
-  n->len = p - (char *)mp_ptr(ctx->pool);
-  n->text = mp_end(ctx->pool, p + 1);
-  if ((ctx->flags & XML_REPORT_COMMENTS) && ctx->h_comment)
-    ctx->h_comment(ctx);
-}
-
-void
-xml_pop_comment(struct xml_context *ctx)
-{
-  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_COMMENTS));
-  xml_dec(ctx);
-  TRACE(ctx, "pop_comment");
-}
-
-void
-xml_skip_comment(struct xml_context *ctx)
-{
-  TRACE(ctx, "skip_comment");
-  xml_parse_char(ctx, '-');
-  while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
-  xml_parse_char(ctx, '>');
-  xml_dec(ctx);
-}
-
-/*** Processing instructions ***/
-
-void
-xml_push_pi(struct xml_context *ctx)
-{
-  TRACE(ctx, "push_pi");
-  /* Parses a PI to ctx->value and ctx->name:
-   *   PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
-   *   PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
-   * Already parsed: '<?' */
-  struct xml_node *n = xml_push_dom(ctx, NULL);
-  n->type = XML_NODE_PI;
-  n->name = xml_parse_name(ctx, ctx->pool);
-  if (unlikely(!strcasecmp(n->name, "xml")))
-    xml_error(ctx, "Reserved PI target");
-  char *p = mp_start_noalign(ctx->pool, 5);
-  if (!xml_parse_white(ctx, 0))
-    xml_parse_seq(ctx, "?>");
-  else
-    while (1)
-      {
-       if (xml_get_char(ctx) == '?')
-         if (xml_peek_char(ctx) == '>')
-           {
-             xml_skip_char(ctx);
-             break;
-           }
-         else
-           *p++ = '?';
-       else
-         p = utf8_32_put(p, xml_last_char(ctx));
-       p = mp_spread(ctx->pool, p, 5);
-      }
-  *p = 0;
-  n->len = p - (char *)mp_ptr(ctx->pool);
-  n->text = mp_end(ctx->pool, p + 1);
-  if ((ctx->flags & XML_REPORT_PIS) && ctx->h_pi)
-    ctx->h_pi(ctx);
-}
-
-void
-xml_pop_pi(struct xml_context *ctx)
-{
-  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_PIS));
-  xml_dec(ctx);
-  TRACE(ctx, "pop_pi");
-}
-
-void
-xml_skip_pi(struct xml_context *ctx)
-{
-  TRACE(ctx, "skip_pi");
-  if (ctx->flags & XML_VALIDATING)
-    {
-      struct mempool_state state;
-      mp_save(ctx->stack, &state);
-      if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml")))
-       xml_error(ctx, "Reserved PI target");
-      mp_restore(ctx->stack, &state);
-      if (!xml_parse_white(ctx, 0))
-        {
-         xml_parse_seq(ctx, "?>");
-         xml_dec(ctx);
-         return;
-       }
-    }
-  while (1)
-    if (xml_get_char(ctx) == '?')
-      if (xml_peek_char(ctx) == '>')
-       break;
-  xml_skip_char(ctx);
-  xml_dec(ctx);
-}
-
-/*** Character references ***/
-
-uns
-xml_parse_char_ref(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_char_ref");
-  /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
-   * Already parsed: '&#' */
-  uns v = 0;
-  if (xml_get_char(ctx) == 'x')
-    {
-      if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
-        {
-         xml_error(ctx, "Expected a hexadecimal value of character reference");
-         goto recover;
-       }
-      do
-        {
-         v = (v << 4) + Cxvalue(xml_last_char(ctx));
-       }
-      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
-    }
-  else
-    {
-      if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
-        {
-         xml_error(ctx, "Expected a numeric value of character reference");
-         goto recover;
-       }
-      do
-        {
-         v = v * 10 + xml_last_char(ctx) - '0';
-       }
-      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
-    }
-  uns cat = xml_char_cat(v);
-  if (!(cat & ctx->cat_unrestricted))
-    {
-      xml_error(ctx, "Character reference out of range");
-      goto recover;
-    }
-  if (xml_last_char(ctx) == ';')
-    {
-      xml_dec(ctx);
-      return v;
-    }
-  xml_error(ctx, "Expected ';'");
-recover:
-  while (xml_last_char(ctx) != ';')
-    xml_get_char(ctx);
-  xml_dec(ctx);
-  return UNI_REPLACEMENT;
-}
-
-/*** References to general entities ***/
-
-static void
-xml_parse_ref(struct xml_context *ctx)
-{
-  /* Reference ::= EntityRef | CharRef
-   * EntityRef ::= '&' Name ';'
-   * Already parsed: '&' */
-  struct fastbuf *out = &ctx->chars;
-  if (xml_peek_char(ctx) == '#')
-    {
-      xml_skip_char(ctx);
-      bput_utf8_32(out, xml_parse_char_ref(ctx));
-    }
-  else
-    {
-      TRACE(ctx, "parse_ge_ref");
-      struct mempool_state state;
-      mp_save(ctx->stack, &state);
-      char *name = xml_parse_name(ctx, ctx->stack);
-      xml_parse_char(ctx, ';');
-      struct xml_dtd_entity *ent = xml_dtd_find_entity(ctx, name);
-      if (!ent)
-        {
-         xml_error(ctx, "Unknown entity &%s;", name);
-         bputc(out, '&');
-         bputs(out, name);
-         bputc(out, ';');
-       }
-      else if (ent->flags & XML_DTD_ENTITY_TRIVIAL)
-        {
-         TRACE(ctx, "Trivial entity &%s;", name);
-         bputs(out, ent->text);
-       }
-      else
-        {
-         TRACE(ctx, "Pushed entity &%s;", name);
-         mp_restore(ctx->stack, &state);
-          xml_dec(ctx);
-         xml_push_entity(ctx, ent);
-         return;
-       }
-      mp_restore(ctx->stack, &state);
-      xml_dec(ctx);
-    }
-}
-
-/*** Character data ***/
-
-void
-xml_spout_chars(struct fastbuf *fb)
-{
-  if (fb->bptr < fb->bufend)
-    return;
-  struct xml_context *ctx = SKIP_BACK(struct xml_context, chars, fb);
-  struct mempool *pool = ctx->pool;
-  if (fb->bufend != fb->buffer)
-    {
-      TRACE(ctx, "growing chars");
-      uns len = fb->bufend - fb->buffer;
-      uns reported = fb->bstop - fb->buffer;
-      fb->buffer = mp_expand(pool);
-      fb->bufend = fb->buffer + mp_avail(pool);
-      fb->bptr = fb->buffer + len;
-      fb->bstop = fb->buffer + reported;
-    }
-  else
-    {
-      TRACE(ctx, "starting chars");
-      mp_save(pool, &ctx->chars_state);
-      fb->bptr = fb->buffer = fb->bstop = mp_start_noalign(pool, 2);
-      fb->bufend = fb->buffer + mp_avail(pool) - 1;
-    }
-}
-
-static inline uns
-xml_end_chars(struct xml_context *ctx, char **out)
-{
-  struct fastbuf *fb = &ctx->chars;
-  uns len = fb->bptr - fb->buffer;
-  if (len)
-    {
-      TRACE(ctx, "ending chars");
-      *fb->bptr = 0;
-      *out = mp_end(ctx->pool, fb->bptr + 1);
-      fb->bufend = fb->bstop = fb->bptr = fb->buffer;
-    }
-  return len;
-}
-
-static inline uns
-xml_report_chars(struct xml_context *ctx, char **out)
-{
-  struct fastbuf *fb = &ctx->chars;
-  uns len = fb->bptr - fb->buffer;
-  if (len)
-    {
-      *fb->bptr = 0;
-      *out = fb->bstop;
-      fb->bstop = fb->bptr;
-    }
-  return len;
-}
-
-static inline uns
-xml_flush_chars(struct xml_context *ctx)
-{
-  char *text, *rtext;
-  uns len = xml_end_chars(ctx, &text), rlen;
-  if (len)
-    {
-      if (ctx->flags & XML_NO_CHARS)
-        {
-          if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_ignorable)
-            ctx->h_ignorable(ctx, text, len);
-         mp_restore(ctx->pool, &ctx->chars_state);
-         return 0;
-       }
-      if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext)))
-       ctx->h_block(ctx, rtext, rlen);
-      if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS))
-        {
-         mp_restore(ctx->pool, &ctx->chars_state);
-         return 0;
-       }
-      struct xml_node *n = xml_push_dom(ctx, &ctx->chars_state);
-      n->type = XML_NODE_CHARS;
-      n->text = text;
-      n->len = len;
-      if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_chars)
-        ctx->h_chars(ctx);
-    }
-  return len;
-}
-
-static inline void
-xml_pop_chars(struct xml_context *ctx)
-{
-  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
-  TRACE(ctx, "pop_chars");
-}
-
-static inline void
-xml_append_chars(struct xml_context *ctx)
-{
-  TRACE(ctx, "append_chars");
-  struct fastbuf *out = &ctx->chars;
-  if (ctx->flags & XML_NO_CHARS)
-    while (xml_get_char(ctx) != '<')
-      if (xml_last_cat(ctx) & XML_CHAR_WHITE)
-       bput_utf8_32(out, xml_last_char(ctx));
-      else
-        {
-         xml_error(ctx, "This element must not contain character data");
-         while (xml_get_char(ctx) != '<');
-         break;
-       }
-  else
-    while (xml_get_char(ctx) != '<')
-      if (xml_last_char(ctx) == '&')
-        {
-         xml_inc(ctx);
-          xml_parse_ref(ctx);
-        }
-      else
-        bput_utf8_32(out, xml_last_char(ctx));
-  xml_unget_char(ctx);
-}
-
-/*** CDATA sections ***/
-
-static void
-xml_skip_cdata(struct xml_context *ctx)
-{
-  TRACE(ctx, "skip_cdata");
-  xml_parse_seq(ctx, "CDATA[");
-  while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
-  xml_dec(ctx);
-}
-
-static void
-xml_append_cdata(struct xml_context *ctx)
-{
-  /* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
-   * Already parsed: '<![' */
-  TRACE(ctx, "append_cdata");
-  if (ctx->flags & XML_NO_CHARS)
-    {
-      xml_error(ctx, "This element must not contain CDATA");
-      xml_skip_cdata(ctx);
-      return;
-    }
-  xml_parse_seq(ctx, "CDATA[");
-  struct fastbuf *out = &ctx->chars;
-  uns rlen;
-  char *rtext;
-  if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext)))
-    ctx->h_block(ctx, rtext, rlen);
-  while (1)
-    {
-      if (xml_get_char(ctx) == ']')
-        {
-          if (xml_get_char(ctx) == ']')
-           if (xml_get_char(ctx) == '>')
-             break;
-           else
-             bputc(out, ']');
-         bputc(out, ']');
-       }
-      bput_utf8_32(out, xml_last_char(ctx));
-    }
-  if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_cdata && (rlen = xml_report_chars(ctx, &rtext)))
-    ctx->h_cdata(ctx, rtext, rlen);
-  xml_dec(ctx);
-}
-
-/*** Attribute values ***/
-
-char *
-xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
-{
-  TRACE(ctx, "parse_attr_value");
-  /* AttValue ::= '"' ([^<&"] | Reference)* '"'        | "'" ([^<&'] | Reference)* "'" */
-  /* FIXME: -- check value constrains / normalize leading/trailing WS and repeated WS */
-  struct mempool_state state;
-  uns quote = xml_parse_quote(ctx);
-  mp_save(ctx->stack, &state);
-  struct fastbuf *out = &ctx->chars;
-  struct xml_source *src = ctx->src;
-  while (1)
-    {
-      uns c = xml_get_char(ctx);
-      if (c == '&')
-        {
-         xml_inc(ctx);
-         xml_parse_ref(ctx);
-       }
-      else if (c == quote && src == ctx->src)
-       break;
-      else if (c == '<')
-       xml_error(ctx, "Attribute value must not contain '<'");
-      else if (xml_last_cat(ctx) & XML_CHAR_WHITE)
-       bputc(out, ' ');
-      else
-       bput_utf8_32(out, c);
-    }
-  mp_restore(ctx->stack, &state);
-  char *text;
-  return xml_end_chars(ctx, &text) ? text : "";
-}
-
-uns
-xml_normalize_white(struct xml_context *ctx UNUSED, char *text)
-{
-  char *s = text, *d = text;
-  while (*s == 0x20)
-    s++;
-  while (1)
-    {
-      while (*s & ~0x20)
-       *d++ = *s++;
-      if (!*s)
-       break;
-      while (*++s == 0x20);
-      *d++ = 0x20;
-    }
-  if (d != text && d[-1] == 0x20)
-    d--;
-  *d = 0;
-  return d - text;
-}
-
-/*** Attributes ***/
-
-struct xml_attrs_table;
-
-static inline uns
-xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n)
-{
-  return hash_pointer(e) ^ hash_string(n);
-}
-
-static inline int
-xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2)
-{
-  return (e1 == e2) && !strcmp(n1, n2);
-}
-
-static inline void
-xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name)
-{
-  a->elem = e;
-  a->name = name;
-  a->val = NULL;
-  a->user = NULL;
-  slist_add_tail(&e->attrs, &a->n);
-}
-
-#define HASH_PREFIX(x) xml_attrs_##x
-#define HASH_NODE struct xml_attr
-#define HASH_KEY_COMPLEX(x) x elem, x name
-#define HASH_KEY_DECL struct xml_node *elem, char *name
-#define HASH_TABLE_DYNAMIC
-#define HASH_GIVE_EQ
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_INIT_KEY
-#define HASH_WANT_CLEANUP
-#define HASH_WANT_REMOVE
-#define HASH_WANT_LOOKUP
-#define HASH_WANT_FIND
-#define HASH_GIVE_ALLOC
-XML_HASH_GIVE_ALLOC
-#include "ucw/hashtable.h"
-
-static void
-xml_parse_attr(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_attr");
-  /* Attribute ::= Name Eq AttValue */
-  struct xml_node *e = ctx->node;
-  char *n = xml_parse_name(ctx, ctx->pool);
-  struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n);
-  xml_parse_eq(ctx);
-  char *v = xml_parse_attr_value(ctx, NULL);
-  if (a->val)
-    {
-      xml_error(ctx, "Attribute %s is not unique in element <%s>", n, e->name);
-      return;
-    }
-  a->val = v;
-  if (!e->dtd)
-    a->dtd = NULL;
-  else if (!(a->dtd = xml_dtd_find_attr(ctx, e->dtd, a->name)))
-    xml_error(ctx, "Undefined attribute %s in element <%s>", n, e->name);
-  else
-    xml_validate_attr(ctx, a->dtd, a->val);
-}
-
-struct xml_attr *
-xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name)
-{
-  return xml_attrs_find(ctx->tab_attrs, node, name);
-}
-
-char *
-xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name)
-{
-  struct xml_attr *attr = xml_attrs_find(ctx->tab_attrs, node, name);
-  if (attr)
-    return attr->val;
-  if (!node->dtd)
-    return NULL;
-  struct xml_dtd_attr *dtd = xml_dtd_find_attr(ctx, node->dtd, name);
-  return dtd ? dtd->default_value : NULL;
-}
-
-void
-xml_attrs_table_init(struct xml_context *ctx)
-{
-  xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
-}
-
-void
-xml_attrs_table_cleanup(struct xml_context *ctx)
-{
-  xml_attrs_cleanup(ctx->tab_attrs);
-}
-
-/*** Elements ***/
-
-static uns
-xml_validate_element(struct xml_dtd_elem_node *root, struct xml_dtd_elem *elem)
-{
-  if (root->elem)
-    return elem == root->elem;
-  else
-    SLIST_FOR_EACH(struct xml_dtd_elem_node *, son, root->sons)
-      if (xml_validate_element(son, elem))
-       return 1;
-  return 0;
-}
-
-static void
-xml_push_element(struct xml_context *ctx)
-{
-  TRACE(ctx, "push_element");
-  /* EmptyElemTag | STag
-   * EmptyElemTag ::= '<' Name (S  Attribute)* S? '/>'
-   * STag ::= '<' Name (S  Attribute)* S? '>'
-   * Already parsed: '<' */
-  struct xml_node *e = xml_push_dom(ctx, NULL);
-  clist_init(&e->sons);
-  e->type = XML_NODE_ELEM;
-  e->name = xml_parse_name(ctx, ctx->pool);
-  slist_init(&e->attrs);
-  if (!e->parent)
-    {
-      ctx->dom = e;
-      if (ctx->doctype && strcmp(e->name, ctx->doctype))
-       xml_error(ctx, "The root element <%s> does not match the document type <%s>", e->name, ctx->doctype);
-    }
-  if (!ctx->dtd)
-    e->dtd = NULL;
-  else if (!(e->dtd = xml_dtd_find_elem(ctx, e->name)))
-    xml_error(ctx, "Undefined element <%s>", e->name);
-  else
-    {
-      struct xml_dtd_elem *dtd = e->dtd, *parent_dtd = e->parent ? e->parent->dtd : NULL;
-      if (dtd->type == XML_DTD_ELEM_MIXED)
-        ctx->flags &= ~XML_NO_CHARS;
-      else
-       ctx->flags |= XML_NO_CHARS;
-      if (parent_dtd)
-        if (parent_dtd->type == XML_DTD_ELEM_EMPTY)
-         xml_error(ctx, "Empty element must not contain children");
-        else if (parent_dtd->type != XML_DTD_ELEM_ANY)
-         {
-           // FIXME: validate regular expressions
-           if (!xml_validate_element(parent_dtd->node, dtd))
-             xml_error(ctx, "Unexpected element <%s>", e->name);
-         }
-    }
-  while (1)
-    {
-      uns white = xml_parse_white(ctx, 0);
-      uns c = xml_get_char(ctx);
-      if (c == '/')
-        {
-         xml_parse_char(ctx, '>');
-         ctx->flags |= XML_EMPTY_ELEM_TAG;
-         break;
-       }
-      else if (c == '>')
-       break;
-      else if (!white)
-       xml_fatal_expected_white(ctx);
-      xml_unget_char(ctx);
-      xml_parse_attr(ctx);
-    }
-  if (e->dtd)
-    SLIST_FOR_EACH(struct xml_dtd_attr *, a, e->dtd->attrs)
-      if (a->default_mode == XML_ATTR_REQUIRED)
-        {
-         if (!xml_attrs_find(ctx->tab_attrs, e, a->name))
-           xml_error(ctx, "Missing required attribute %s in element <%s>", a->name, e->name);
-       }
-      else if (a->default_mode != XML_ATTR_IMPLIED && ctx->flags & XML_ALLOC_DEFAULT_ATTRS)
-        {
-         struct xml_attr *attr = xml_attrs_lookup(ctx->tab_attrs, e, a->name);
-         if (!attr->val)
-           attr->val = a->default_value;
-       }
-  if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag)
-    ctx->h_stag(ctx);
-}
-
-static void
-xml_pop_element(struct xml_context *ctx)
-{
-  TRACE(ctx, "pop_element");
-  if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag)
-    ctx->h_etag(ctx);
-  struct xml_node *e = ctx->node;
-  uns free = !(ctx->flags & XML_ALLOC_TAGS);
-  if (free)
-    {
-      if (!e->parent)
-       ctx->dom = NULL;
-      /* Restore hash table of attributes */
-      SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
-       xml_attrs_remove(ctx->tab_attrs, a);
-      struct xml_node *n;
-      while (n = clist_head(&e->sons))
-        {
-         if (n->type == XML_NODE_ELEM)
-           {
-             SLIST_FOR_EACH(struct xml_attr *, a, n->attrs)
-               xml_attrs_remove(ctx->tab_attrs, a);
-             clist_insert_list_after(&n->sons, &n->n);
-           }
-         clist_remove(&n->n);
-       }
-    }
-  xml_pop_dom(ctx, free);
-  xml_dec(ctx);
-}
-
-static void
-xml_parse_etag(struct xml_context *ctx)
-{
- /* ETag ::= '</' Name S? '>'
-  * Already parsed: '<' */
-  struct xml_node *e = ctx->node;
-  ASSERT(e);
-  char *n = e->name;
-  while (*n)
-    {
-      uns c;
-      n = utf8_32_get(n, &c);
-      if (xml_get_char(ctx) != c)
-       goto recover;
-    }
-  xml_parse_white(ctx, 0);
-  if (xml_get_char(ctx) != '>')
-    {
-recover:
-      xml_error(ctx, "Invalid ETag, expected </%s>", e->name);
-      while (xml_get_char(ctx) != '>');
-    }
-  xml_dec(ctx);
-}
-
-/*** Document type declaration ***/
-
-static void
-xml_parse_doctype_decl(struct xml_context *ctx)
-{
-  TRACE(ctx, "parse_doctype_decl");
-  /* doctypedecl ::= '<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'
-   * Already parsed: '<!'
-   * Terminated before '[' or '>' */
-  if (ctx->doctype)
-    xml_fatal(ctx, "Multiple document types not allowed");
-  xml_parse_seq(ctx, "DOCTYPE");
-  xml_parse_white(ctx, 1);
-  ctx->doctype = xml_parse_name(ctx, ctx->pool);
-  TRACE(ctx, "doctype=%s", ctx->doctype);
-  uns c;
-  if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P'))
-    {
-      if (c == 'S')
-        {
-         xml_parse_seq(ctx, "SYSTEM");
-         xml_parse_white(ctx, 1);
-         ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
-       }
-      else
-        {
-         xml_parse_seq(ctx, "PUBLIC");
-         xml_parse_white(ctx, 1);
-         ctx->public_id = xml_parse_pubid_literal(ctx, ctx->pool);
-         xml_parse_white(ctx, 1);
-         ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
-       }
-      xml_parse_white(ctx, 0);
-      ctx->flags |= XML_HAS_EXTERNAL_SUBSET;
-    }
-  if (xml_peek_char(ctx) == '[')
-    {
-      ctx->flags |= XML_HAS_INTERNAL_SUBSET;
-      xml_skip_char(ctx);
-      xml_inc(ctx);
-    }
-  if (ctx->h_doctype_decl)
-    ctx->h_doctype_decl(ctx);
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/* DTD: Internal subset */
-
-static void
-xml_parse_subset(struct xml_context *ctx, uns external)
-{
-  // FIXME:
-  // -- comments/pi have no parent
-  // -- conditional sections in external subset
-  // -- check corectness of parameter entities
-
-  /* '[' intSubset ']'
-   * intSubset :== (markupdecl | DeclSep)
-   * Already parsed: '['
-   *
-   * extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
-   */
-  while (1)
-    {
-      xml_parse_white(ctx, 0);
-      uns c = xml_get_char(ctx);
-      xml_inc(ctx);
-      if (c == '<')
-       if ((c = xml_get_char(ctx)) == '!')
-         switch (c = xml_get_char(ctx))
-           {
-             case '-':
-               xml_push_comment(ctx);
-               xml_pop_comment(ctx);
-               break;
-             case 'N':
-               xml_parse_seq(ctx, "OTATION");
-               xml_parse_notation_decl(ctx);
-               break;
-             case 'E':
-               if ((c = xml_get_char(ctx)) == 'N')
-                 {
-                   xml_parse_seq(ctx, "TITY");
-                   xml_parse_entity_decl(ctx);
-                 }
-               else if (c == 'L')
-                 {
-                   xml_parse_seq(ctx, "EMENT");
-                   xml_parse_element_decl(ctx);
-                 }
-               else
-                 goto invalid_markup;
-               break;
-             case 'A':
-               xml_parse_seq(ctx, "TTLIST");
-               xml_parse_attr_list_decl(ctx);
-               break;
-             default:
-               goto invalid_markup;
-           }
-        else if (c == '?')
-         {
-           xml_push_pi(ctx);
-           xml_pop_pi(ctx);
-         }
-        else
-         goto invalid_markup;
-      else if (c == '%')
-       xml_parse_pe_ref(ctx);
-      else if (c == ']' && !external)
-        {
-         break;
-       }
-      else if (c == '>' && external)
-        {
-         break;
-       }
-      else
-       goto invalid_markup;
-    }
-  xml_dec(ctx);
-  return;
-invalid_markup: ;
-  xml_fatal(ctx, "Invalid markup in the %s subset", external ? "external" : "internal");
-}
-
-/*** The State Machine ***/
-
-uns
-xml_next(struct xml_context *ctx)
-{
-  /* A nasty state machine */
-
-#define PULL(x) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##x; case XML_STATE_##x: ; } while (0)
-#define PULL_STATE(x, s) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##s, XML_STATE_##x; case XML_STATE_##s: ; } while (0)
-
-  TRACE(ctx, "xml_next (state=%u)", ctx->state);
-  jmp_buf throw_buf;
-  ctx->throw_buf = &throw_buf;
-  if (setjmp(throw_buf))
-    {
-error:
-      if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
-       ctx->h_fatal(ctx);
-      TRACE(ctx, "raised fatal error");
-      return ctx->state = XML_STATE_EOF;
-    }
-  uns c;
-  switch (ctx->state)
-    {
-      case XML_STATE_START:
-       TRACE(ctx, "entering prolog");
-       ctx->flags |= XML_SRC_DOCUMENT | XML_SRC_EXPECTED_DECL;
-       if (ctx->h_document_start)
-         ctx->h_document_start(ctx);
-       /* XMLDecl */
-       xml_refill(ctx);
-       if (ctx->h_xml_decl)
-         ctx->h_xml_decl(ctx);
-       PULL(XML_DECL);
-
-       /* Misc* (doctypedecl Misc*)? */
-        while (1)
-         {
-           xml_parse_white(ctx, 0);
-           xml_parse_char(ctx, '<');
-           xml_inc(ctx);
-           if ((c = xml_get_char(ctx)) == '?')
-             /* Processing intruction */
-             if (!(ctx->flags & XML_REPORT_PIS))
-               xml_skip_pi(ctx);
-             else
-               {
-                 xml_push_pi(ctx);
-                 PULL_STATE(PI, PROLOG_PI);
-                 xml_pop_pi(ctx);
-               }
-           else if (c != '!')
-             {
-               /* Found the root tag */
-               xml_unget_char(ctx);
-               goto first_tag;
-             }
-           else if (xml_get_char(ctx) == '-')
-             if (!(ctx->flags & XML_REPORT_COMMENTS))
-               xml_skip_comment(ctx);
-             else
-               {
-                 xml_push_comment(ctx);
-                 PULL_STATE(COMMENT, PROLOG_COMMENT);
-                 xml_pop_comment(ctx);
-               }
-           else
-             {
-               /* DocTypeDecl */
-               xml_unget_char(ctx);
-               xml_parse_doctype_decl(ctx);
-               PULL(DOCTYPE_DECL);
-               if (ctx->flags & XML_HAS_DTD)
-                 if (ctx->flags & XML_PARSE_DTD)
-                   {
-                     xml_dtd_init(ctx);
-                     if (ctx->h_dtd_start)
-                       ctx->h_dtd_start(ctx);
-                     if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
-                       {
-                         xml_parse_subset(ctx, 0);
-                         xml_dec(ctx);
-                       }
-                     if (ctx->flags & XML_HAS_EXTERNAL_SUBSET)
-                       {
-                         struct xml_dtd_entity ent = {
-                           .system_id = ctx->system_id,
-                           .public_id = ctx->public_id,
-                         };
-                         xml_parse_white(ctx, 0);
-                         xml_parse_char(ctx, '>');
-                         xml_unget_char(ctx);
-                         ASSERT(ctx->h_resolve_entity);
-                         ctx->h_resolve_entity(ctx, &ent);
-                         ctx->flags |= XML_SRC_EXPECTED_DECL;
-                         xml_parse_subset(ctx, 1);
-                         xml_unget_char(ctx);;
-                       }
-                     if (ctx->h_dtd_end)
-                       ctx->h_dtd_end(ctx);
-                   }
-                 else if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
-                   xml_skip_internal_subset(ctx);
-               xml_parse_white(ctx, 0);
-               xml_parse_char(ctx, '>');
-               xml_dec(ctx);
-             }
-         }
-
-      case XML_STATE_CHARS:
-
-       while (1)
-         {
-           if (xml_peek_char(ctx) != '<')
-             {
-               /* CharData */
-               xml_append_chars(ctx);
-               continue;
-             }
-           else
-             xml_skip_char(ctx);
-           xml_inc(ctx);
-first_tag:
-
-           if ((c = xml_get_char(ctx)) == '?')
-             {
-               /* PI */
-               if (!(ctx->flags & (XML_REPORT_PIS | XML_ALLOC_PIS)))
-                 xml_skip_pi(ctx);
-               else
-                 {
-                   if (xml_flush_chars(ctx))
-                     {
-                       PULL_STATE(CHARS, CHARS_BEFORE_PI);
-                       xml_pop_chars(ctx);
-                     }
-                   xml_push_pi(ctx);
-                   PULL(PI);
-                   xml_pop_pi(ctx);
-                 }
-             }
-
-           else if (c == '!')
-             if ((c = xml_get_char(ctx)) == '-')
-               {
-                 /* Comment */
-                 if (!(ctx->flags & (XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS)))
-                   xml_skip_comment(ctx);
-                 else
-                   {
-                     if (xml_flush_chars(ctx))
-                       {
-                         PULL_STATE(CHARS, CHARS_BEFORE_COMMENT);
-                         xml_pop_chars(ctx);
-                       }
-                     xml_push_comment(ctx);
-                     PULL(COMMENT);
-                     xml_pop_comment(ctx);
-                   }
-               }
-             else if (c == '[')
-               {
-                 /* CDATA */
-                 xml_append_cdata(ctx);
-               }
-             else
-               xml_fatal(ctx, "Unexpected character after '<!'");
-
-           else if (c != '/')
-             {
-               /* STag | EmptyElemTag */
-               xml_unget_char(ctx);
-               if (xml_flush_chars(ctx))
-                 {
-                   PULL_STATE(CHARS, CHARS_BEFORE_STAG);
-                   xml_pop_chars(ctx);
-                 }
-
-               xml_push_element(ctx);
-               PULL(STAG);
-               if (ctx->flags & XML_EMPTY_ELEM_TAG)
-                 goto pop_element;
-             }
-
-           else
-             {
-               /* ETag */
-               if (xml_flush_chars(ctx))
-                 {
-                   PULL_STATE(CHARS, CHARS_BEFORE_ETAG);
-                   xml_pop_chars(ctx);
-                 }
-
-               xml_parse_etag(ctx);
-pop_element:
-               PULL(ETAG);
-               xml_pop_element(ctx);
-               if (!ctx->node)
-                 goto epilog;
-             }
-         }
-
-epilog:
-       /* Misc* */
-        TRACE(ctx, "entering epilog");
-       while (1)
-         {
-           /* Epilog whitespace is the only place, where a valid document can reach EOF */
-           if (setjmp(throw_buf))
-             if (ctx->err_code == XML_ERR_EOF)
-               {
-                 TRACE(ctx, "reached EOF");
-                 ctx->state = XML_STATE_EOF;
-                 if (ctx->h_document_end)
-                   ctx->h_document_end(ctx);
-      case XML_STATE_EOF:
-                 ctx->err_code = 0;
-                 ctx->err_msg = NULL;
-                 return XML_STATE_EOF;
-               }
-             else
-               goto error;
-           xml_parse_white(ctx, 0);
-           if (setjmp(throw_buf))
-             goto error;
-
-           /* Misc */
-           xml_parse_char(ctx, '<');
-           xml_inc(ctx);
-           if ((c = xml_get_char(ctx)) == '?')
-             /* Processing instruction */
-             if (!(ctx->flags & XML_REPORT_PIS))
-               xml_skip_pi(ctx);
-             else
-               {
-                 xml_push_pi(ctx);
-                 PULL_STATE(PI, EPILOG_PI);
-                 xml_pop_pi(ctx);
-               }
-           else if (c == '!')
-             {
-               xml_parse_char(ctx, '-');
-               /* Comment */
-               if (!(ctx->flags & XML_REPORT_COMMENTS))
-                 xml_skip_comment(ctx);
-               else
-                 {
-                   xml_push_comment(ctx);
-                   PULL_STATE(COMMENT, EPILOG_COMMENT);
-                   xml_pop_comment(ctx);
-                 }
-             }
-           else
-             xml_fatal(ctx, "Syntax error in the epilog");
-         }
-
-    }
-  ASSERT(0);
-}
-
-uns
-xml_next_state(struct xml_context *ctx, uns pull)
-{
-  uns saved = ctx->pull;
-  ctx->pull = pull;
-  uns res = xml_next(ctx);
-  ctx->pull = saved;
-  return res;
-}
-
-uns
-xml_skip_element(struct xml_context *ctx)
-{
-  ASSERT(ctx->state == XML_STATE_STAG);
-  struct xml_node *node = ctx->node;
-  uns saved = ctx->pull, res;
-  ctx->pull = XML_PULL_ETAG;
-  while ((res = xml_next(ctx)) && ctx->node != node);
-  ctx->pull = saved;
-  return res;
-}
-
-uns
-xml_parse(struct xml_context *ctx)
-{
-  /* This cycle should run only once unless the user overrides the value of ctx->pull in a SAX handler */
-  do
-    {
-      ctx->pull = 0;
-    }
-  while (xml_next(ctx));
-  return ctx->err_code;
-}
-
-char *
-xml_merge_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool)
-{
-  ASSERT(node->type == XML_NODE_ELEM);
-  char *p = mp_start_noalign(pool, 1);
-  XML_NODE_FOR_EACH(son, node)
-    if (son->type == XML_NODE_CHARS)
-      {
-       p = mp_spread(pool, p, son->len + 1);
-       memcpy(p, son->text, son->len);
-       p += son->len;
-      }
-  *p++ = 0;
-  return mp_end(pool, p);
-}
-
-static char *
-xml_append_dom_chars(char *p, struct mempool *pool, struct xml_node *node)
-{
-  XML_NODE_FOR_EACH(son, node)
-    if (son->type == XML_NODE_CHARS)
-      {
-       p = mp_spread(pool, p, son->len + 1);
-       memcpy(p, son->text, son->len);
-       p += son->len;
-      }
-    else if (son->type == XML_NODE_ELEM)
-      p = xml_append_dom_chars(p, pool, son);
-  return p;
-}
-
-char *
-xml_merge_dom_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool)
-{
-  ASSERT(node->type == XML_NODE_ELEM);
-  char *p = mp_start_noalign(pool, 1);
-  p = xml_append_dom_chars(p, pool, node);
-  *p++ = 0;
-  return mp_end(pool, p);
-}
diff --git a/sherlock/xml/source.c b/sherlock/xml/source.c

deleted file mode 100644 (file)

index 29226f0..0000000
--- a/sherlock/xml/source.c
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-#include "sherlock/xml/internals.h"
-#include "ucw/unicode.h"
-#include "ucw/ff-unicode.h"
-#include "charset/charconv.h"
-#include "charset/fb-charconv.h"
-
-/*** Charecter categorization ***/
-
-#include "obj/sherlock/xml/unicat.c"
-
-static void
-xml_init_cats(struct xml_context *ctx)
-{
-  if (!(ctx->flags & XML_VERSION_1_1))
-    {
-      ctx->cat_chars = XML_CHAR_VALID_1_0;
-      ctx->cat_unrestricted = XML_CHAR_VALID_1_0;
-      ctx->cat_new_line = XML_CHAR_NEW_LINE_1_0;
-      ctx->cat_name = XML_CHAR_NAME_1_0;
-      ctx->cat_sname = XML_CHAR_SNAME_1_0;
-    }
-  else
-    {
-      ctx->cat_chars = XML_CHAR_VALID_1_1;
-      ctx->cat_unrestricted = XML_CHAR_UNRESTRICTED_1_1;
-      ctx->cat_new_line = XML_CHAR_NEW_LINE_1_1;
-      ctx->cat_name = XML_CHAR_NAME_1_1;
-      ctx->cat_sname = XML_CHAR_SNAME_1_1;
-    }
-}
-
-/*** Reading of document/external entities ***/
-
-static void NONRET
-xml_eof(struct xml_context *ctx)
-{
-  ctx->err_msg = "Unexpected EOF";
-  ctx->err_code = XML_ERR_EOF;
-  xml_throw(ctx);
-}
-
-void NONRET
-xml_fatal_nested(struct xml_context *ctx)
-{
-  xml_fatal(ctx, "Entity is not nested correctly");
-}
-
-static inline void
-xml_add_char(u32 **bstop, uns c)
-{
-  *(*bstop)++ = c;
-  *(*bstop)++ = xml_char_cat(c);
-}
-
-struct xml_source *
-xml_push_source(struct xml_context *ctx)
-{
-  xml_push(ctx);
-  struct xml_source *src = ctx->src;
-  if (src)
-    {
-      src->bptr = ctx->bptr;
-      src->bstop = ctx->bstop;
-    }
-  src = mp_alloc_zero(ctx->stack, sizeof(*src));
-  src->next = ctx->src;
-  src->saved_depth = ctx->depth;
-  ctx->src = src;
-  ctx->flags &= ~(XML_SRC_EOF | XML_SRC_EXPECTED_DECL | XML_SRC_DOCUMENT);
-  ctx->bstop = ctx->bptr = src->buf;
-  ctx->depth = 0;
-  return src;
-}
-
-struct xml_source *
-xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb)
-{
-  struct xml_source *src = xml_push_source(ctx);
-  src->fb = fb;
-  return src;
-}
-
-static void
-xml_close_source(struct xml_source *src)
-{
-  bclose(src->fb);
-  if (src->wrapped_fb)
-    bclose(src->wrapped_fb);
-}
-
-static void
-xml_pop_source(struct xml_context *ctx)
-{
-  TRACE(ctx, "pop_source");
-  if (unlikely(ctx->depth != 0))
-    xml_fatal(ctx, "Unexpected end of entity");
-  struct xml_source *src = ctx->src;
-  if (!src)
-    xml_fatal(ctx, "Undefined source");
-  xml_close_source(src);
-  ctx->depth = src->saved_depth;
-  ctx->src = src = src->next;
-  if (src)
-    {
-      ctx->bptr = src->bptr;
-      ctx->bstop = src->bstop;
-    }
-  xml_pop(ctx);
-  if (unlikely(!src))
-    xml_eof(ctx);
-}
-
-void
-xml_sources_cleanup(struct xml_context *ctx)
-{
-  struct xml_source *s;
-  while (s = ctx->src)
-    {
-      ctx->src = s->next;
-      xml_close_source(s);
-    }
-}
-
-static void xml_refill_utf8(struct xml_context *ctx);
-
-void
-xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent UNUSED)
-{
-  xml_error(ctx, "References to external entities are not supported");
-}
-
-void
-xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent)
-{
-  TRACE(ctx, "xml_push_entity");
-  struct xml_source *src;
-  if (ent->flags & XML_DTD_ENTITY_EXTERNAL)
-    {
-      ASSERT(ctx->h_resolve_entity);
-      ctx->h_resolve_entity(ctx, ent);
-      ctx->flags |= XML_SRC_EXPECTED_DECL;
-      src = ctx->src;
-    }
-  else
-    {
-      src = xml_push_source(ctx);
-      fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, strlen(ent->text), 0);
-    }
-  src->refill = xml_refill_utf8;
-  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line;
-  src->refill_cat2 = ctx->cat_new_line;
-}
-
-static uns
-xml_error_restricted(struct xml_context *ctx, uns c)
-{
-  if (c == ~1U)
-    xml_error(ctx, "Corrupted encoding");
-  else
-    xml_error(ctx, "Restricted char U+%04X", c);
-  return UNI_REPLACEMENT;
-}
-
-void xml_parse_decl(struct xml_context *ctx);
-
-#define REFILL(ctx, func, params...)                                                   \
-  struct xml_source *src = ctx->src;                                                   \
-  struct fastbuf *fb = src->fb;                                                                \
-  if (ctx->bptr == ctx->bstop)                                                         \
-    ctx->bptr = ctx->bstop = src->buf;                                                 \
-  uns c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row;         \
-  u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop,                    \
-      *last_0xd = src->pending_0xd ? bstop : NULL;                                     \
-  do                                                                                   \
-    {                                                                                  \
-      c = func(fb, ##params);                                                          \
-      uns t = xml_char_cat(c);                                                         \
-      if (t & t1)                                                                      \
-        /* Typical branch */                                                           \
-       *bstop++ = c, *bstop++ = t;                                                     \
-      else if (t & t2)                                                                 \
-        {                                                                              \
-         /* New line */                                                                \
-         /* XML 1.0: 0xA | 0xD | 0xD 0xA */                                            \
-         /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */                 \
-         if (c == 0xd)                                                                 \
-           last_0xd = bstop + 2;                                                       \
-         else if (c != 0x2028 && last_0xd == bstop)                                    \
-           {                                                                           \
-             last_0xd = NULL;                                                          \
-             continue;                                                                 \
-           }                                                                           \
-         xml_add_char(&bstop, 0xa), row++;                                             \
-       }                                                                               \
-      else if (c == '>')                                                               \
-        {                                                                              \
-         /* Used only in XML/TextDecl to switch the encoding */                        \
-         *bstop++ = c, *bstop++ = t;                                                   \
-         break;                                                                        \
-       }                                                                               \
-      else if (~c)                                                                     \
-        /* Restricted character */                                                     \
-        xml_add_char(&bstop, xml_error_restricted(ctx, c));                            \
-      else                                                                             \
-        {                                                                              \
-         /* EOF */                                                                     \
-          ctx->flags |= XML_SRC_EOF;                                                   \
-          break;                                                                       \
-       }                                                                               \
-    }                                                                                  \
-  while (bstop < bend);                                                                        \
-  src->pending_0xd = (last_0xd == bstop);                                              \
-  ctx->bstop = bstop;                                                                  \
-  src->row = row;
-
-static void
-xml_refill_utf8(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf8_repl, ~1U);
-}
-
-static void
-xml_refill_utf16_le(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf16_le_repl, ~1U);
-}
-
-static void
-xml_refill_utf16_be(struct xml_context *ctx)
-{
-  REFILL(ctx, bget_utf16_be_repl, ~1U);
-}
-
-#undef REFILL
-
-void
-xml_refill(struct xml_context *ctx)
-{
-  do
-    {
-      if (ctx->flags & XML_SRC_EOF)
-       xml_pop_source(ctx);
-      else if (ctx->flags & XML_SRC_EXPECTED_DECL)
-       xml_parse_decl(ctx);
-      else
-        {
-         ctx->src->refill(ctx);
-         TRACE(ctx, "refilled %u characters", (uns)((ctx->bstop - ctx->bptr) / 2));
-       }
-    }
-  while (ctx->bptr == ctx->bstop);
-}
-
-static uns
-xml_source_row(struct xml_context *ctx, struct xml_source *src)
-{
-  uns row = src->row;
-  for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2)
-    if (p[-1] & src->refill_cat2)
-      row--;
-  return row + 1;
-}
-
-uns
-xml_row(struct xml_context *ctx)
-{
-  return ctx->src ? xml_source_row(ctx, ctx->src) : 0;
-}
-
-/* Document/external entity header */
-
-static char *
-xml_parse_encoding_name(struct xml_context *ctx)
-{
-  /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */
-  char *p = mp_start_noalign(ctx->pool, 1);
-  uns q = xml_parse_quote(ctx);
-  if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME)))
-    xml_fatal(ctx, "Invalid character in the encoding name");
-  while (1)
-    {
-      p = mp_spread(ctx->pool, p, 2);
-      *p++ = xml_last_char(ctx);
-      if (xml_get_char(ctx) == q)
-       break;
-      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME)))
-       xml_fatal(ctx, "Invalid character in the encoding name");
-    }
-  *p++ = 0;
-  return mp_end(ctx->pool, p);
-}
-
-static void
-xml_init_charconv(struct xml_context *ctx, int cs)
-{
-  // XXX: with a direct access to libcharset tables could be faster
-  struct xml_source *src = ctx->src;
-  TRACE(ctx, "wrapping charset %s", charset_name(cs));
-  src->wrapped_fb = src->fb;
-  src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8);
-}
-
-void
-xml_parse_decl(struct xml_context *ctx)
-{
-  TRACE(ctx, "xml_parse_decl");
-  struct xml_source *src = ctx->src;
-  ctx->flags &= ~XML_SRC_EXPECTED_DECL;
-  uns doc = ctx->flags & XML_SRC_DOCUMENT;
-
-  /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */
-  if (doc)
-    xml_init_cats(ctx);
-  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line & ~XML_CHAR_GT;
-  src->refill_cat2 = ctx->cat_new_line;
-
-  /* Initialize the supplied charset (if any) or try to guess it */
-  char *expected_encoding = src->expected_encoding;
-  src->refill = xml_refill_utf8;
-  int bom = bpeekc(src->fb);
-  if (bom < 0)
-    ctx->flags |= XML_SRC_EOF;
-  if (!src->fb_encoding)
-    {
-      if (bom == 0xfe)
-       src->refill = xml_refill_utf16_be;
-      else if (bom == 0xff)
-       src->refill = xml_refill_utf16_le;
-    }
-  else
-    {
-      int cs = find_charset_by_name(src->fb_encoding);
-      if (cs == CONV_CHARSET_UTF8)
-        {}
-      else if (cs >= 0)
-        {
-         xml_init_charconv(ctx, cs);
-         bom = 0;
-       }
-      else if (strcasecmp(src->fb_encoding, "UTF-16"))
-        {
-         src->refill = xml_refill_utf16_be;
-         if (bom == 0xff)
-           src->refill = xml_refill_utf16_le;
-       }
-      else if (strcasecmp(src->fb_encoding, "UTF-16BE"))
-       src->refill = xml_refill_utf16_be;
-      else if (strcasecmp(src->fb_encoding, "UTF-16LE"))
-       src->refill = xml_refill_utf16_le;
-      else
-        {
-         xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding);
-         expected_encoding = NULL;
-       }
-    }
-  uns utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be;
-  if (utf16)
-    src->fb_encoding = (src->refill == xml_refill_utf16_be) ? "UTF-16BE" : "UTF-16LE";
-  if (!expected_encoding)
-    expected_encoding = src->fb_encoding;
-  if (bom > 0 && xml_peek_char(ctx) == 0xfeff)
-    xml_skip_char(ctx);
-  else if (utf16)
-    xml_error(ctx, "Missing or corrupted BOM");
-  TRACE(ctx, "Initial encoding=%s", src->fb_encoding ? : "?");
-
-  /* Look ahead for presence of XMLDecl or optional TextDecl */
-  if (!(ctx->flags & XML_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf))
-    xml_refill(ctx);
-  u32 *bptr = ctx->bptr;
-  uns have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) &&
-    bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L');
-  if (!have_decl)
-    {
-      if (doc)
-        xml_fatal(ctx, "Missing or corrupted XML header");
-      else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16)
-       xml_error(ctx, "Missing or corrupted entity header");
-      goto exit;
-    }
-  ctx->bptr = bptr + 12;
-  xml_parse_white(ctx, 0);
-
-  /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */
-  if (xml_peek_char(ctx) == 'v')
-    {
-      xml_parse_seq(ctx, "version");
-      xml_parse_eq(ctx);
-      char *version = xml_parse_pubid_literal(ctx, ctx->pool);
-      TRACE(ctx, "version=%s", version);
-      uns v = 0;
-      if (!strcmp(version, "1.1"))
-       v = XML_VERSION_1_1;
-      else if (strcmp(version, "1.0"))
-        {
-         xml_error(ctx, "Unknown XML version string '%s'", version);
-         version = "1.0";
-       }
-      if (doc)
-        {
-         ctx->version_str = version;
-         ctx->flags |= v;
-       }
-      else if (v > (ctx->flags & XML_VERSION_1_1))
-        xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document");
-      if (!xml_parse_white(ctx, !doc))
-        goto end;
-    }
-  else if (doc)
-    {
-      xml_error(ctx, "Expected XML version");
-      ctx->version_str = "1.0";
-    }
-
-  /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */
-  if (xml_peek_char(ctx) == 'e')
-    {
-      xml_parse_seq(ctx, "encoding");
-      xml_parse_eq(ctx);
-      src->decl_encoding = xml_parse_encoding_name(ctx);
-      TRACE(ctx, "encoding=%s", src->decl_encoding);
-      if (!xml_parse_white(ctx, 0))
-       goto end;
-    }
-  else if (!doc)
-    xml_error(ctx, "Expected XML encoding");
-
-  /* Parse whether the document is standalone (optional in XMLDecl) */
-  if (doc && xml_peek_char(ctx) == 's')
-    {
-      xml_parse_seq(ctx, "standalone");
-      xml_parse_eq(ctx);
-      uns c = xml_parse_quote(ctx);
-      if (ctx->standalone = (xml_peek_char(ctx) == 'y'))
-       xml_parse_seq(ctx, "yes");
-      else
-        xml_parse_seq(ctx, "no");
-      xml_parse_char(ctx, c);
-      TRACE(ctx, "standalone=%d", ctx->standalone);
-      xml_parse_white(ctx, 0);
-    }
-end:
-  xml_parse_seq(ctx, "?>");
-
-  /* Switch to the final encoding */
-  if (src->decl_encoding)
-    {
-      int cs = find_charset_by_name(src->decl_encoding);
-      if (cs < 0 && !expected_encoding)
-       xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding);
-      else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8)
-        {
-         xml_init_charconv(ctx, cs);
-         src->fb_encoding = src->decl_encoding;
-       }
-      else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 ||
-       !(!strcasecmp(src->decl_encoding, "UTF-16") ||
-        (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) ||
-        (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE")))))
-       xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding);
-    }
-  if (!src->fb_encoding)
-    src->fb_encoding = "UTF-8";
-  TRACE(ctx, "Final encoding=%s", src->fb_encoding);
-
-exit:
-  /* Update valid Unicode ranges */
-  if (doc)
-    xml_init_cats(ctx);
-  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line;
-  src->refill_cat2 = ctx->cat_new_line;
-}
diff --git a/sherlock/xml/unicat.pl b/sherlock/xml/unicat.pl

deleted file mode 100755 (executable)

index b86106f..0000000
--- a/sherlock/xml/unicat.pl
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/perl
-#
-#      UCW Library -- Character map for the XML parser
-#
-#      (c) 2007 Pavel Charvat <pchar@ucw.cz>
-#
-#      This software may be freely distributed and used according to the terms
-#      of the GNU Lesser General Public License.
-#
-
-my @cat = ();
-my @lcat = ();
-my %ids = ();
-my %cls = ();
-for (my $i = 0; $i < 0x10000; $i++) { $cat[$i] = 0; }
-for (my $i = 0; $i < 0x11; $i++) { $lcat[$i] = 0; }
-
-my @white = (0x9, 0xA, 0xD, 0x20);
-my @base_char_1_0 = (
-  [0x0041,0x005A], [0x0061,0x007A], [0x00C0,0x00D6], [0x00D8,0x00F6], [0x00F8,0x00FF], [0x0100,0x0131],
-  [0x0134,0x013E], [0x0141,0x0148], [0x014A,0x017E], [0x0180,0x01C3], [0x01CD,0x01F0], [0x01F4,0x01F5],
-  [0x01FA,0x0217], [0x0250,0x02A8], [0x02BB,0x02C1], 0x0386, [0x0388,0x038A], 0x038C, [0x038E,0x03A1],
-  [0x03A3,0x03CE], [0x03D0,0x03D6], 0x03DA, 0x03DC, 0x03DE, 0x03E0, [0x03E2,0x03F3], [0x0401,0x040C],
-  [0x040E,0x044F], [0x0451,0x045C], [0x045E,0x0481], [0x0490,0x04C4], [0x04C7,0x04C8], [0x04CB,0x04CC],
-  [0x04D0,0x04EB], [0x04EE,0x04F5], [0x04F8,0x04F9], [0x0531,0x0556], 0x0559, [0x0561,0x0586], [0x05D0,0x05EA],
-  [0x05F0,0x05F2], [0x0621,0x063A], [0x0641,0x064A], [0x0671,0x06B7], [0x06BA,0x06BE], [0x06C0,0x06CE],
-  [0x06D0,0x06D3], 0x06D5, [0x06E5,0x06E6], [0x0905,0x0939], 0x093D, [0x0958,0x0961], [0x0985,0x098C],
-  [0x098F,0x0990], [0x0993,0x09A8], [0x09AA,0x09B0], 0x09B2, [0x09B6,0x09B9], [0x09DC,0x09DD], [0x09DF,0x09E1],
-  [0x09F0,0x09F1], [0x0A05,0x0A0A], [0x0A0F,0x0A10], [0x0A13,0x0A28], [0x0A2A,0x0A30], [0x0A32,0x0A33],
-  [0x0A35,0x0A36], [0x0A38,0x0A39], [0x0A59,0x0A5C], 0x0A5E, [0x0A72,0x0A74], [0x0A85,0x0A8B], 0x0A8D,
-  [0x0A8F,0x0A91], [0x0A93,0x0AA8], [0x0AAA,0x0AB0], [0x0AB2,0x0AB3], [0x0AB5,0x0AB9], 0x0ABD, 0x0AE0,
-  [0x0B05,0x0B0C], [0x0B0F,0x0B10], [0x0B13,0x0B28], [0x0B2A,0x0B30], [0x0B32,0x0B33], [0x0B36,0x0B39],
-  0x0B3D, [0x0B5C,0x0B5D], [0x0B5F,0x0B61], [0x0B85,0x0B8A], [0x0B8E,0x0B90], [0x0B92,0x0B95], [0x0B99,0x0B9A],
-  0x0B9C, [0x0B9E,0x0B9F], [0x0BA3,0x0BA4], [0x0BA8,0x0BAA], [0x0BAE,0x0BB5], [0x0BB7,0x0BB9], [0x0C05,0x0C0C],
-  [0x0C0E,0x0C10], [0x0C12,0x0C28], [0x0C2A,0x0C33], [0x0C35,0x0C39], [0x0C60,0x0C61], [0x0C85,0x0C8C],
-  [0x0C8E,0x0C90], [0x0C92,0x0CA8], [0x0CAA,0x0CB3], [0x0CB5,0x0CB9], 0x0CDE, [0x0CE0,0x0CE1], [0x0D05,0x0D0C],
-  [0x0D0E,0x0D10], [0x0D12,0x0D28], [0x0D2A,0x0D39], [0x0D60,0x0D61], [0x0E01,0x0E2E], 0x0E30, [0x0E32,0x0E33],
-  [0x0E40,0x0E45], [0x0E81,0x0E82], 0x0E84, [0x0E87,0x0E88], 0x0E8A, 0x0E8D, [0x0E94,0x0E97], [0x0E99,0x0E9F],
-  [0x0EA1,0x0EA3], 0x0EA5, 0x0EA7, [0x0EAA,0x0EAB], [0x0EAD,0x0EAE], 0x0EB0, [0x0EB2,0x0EB3], 0x0EBD,
-  [0x0EC0,0x0EC4], [0x0F40,0x0F47], [0x0F49,0x0F69], [0x10A0,0x10C5], [0x10D0,0x10F6], 0x1100, [0x1102,0x1103],
-  [0x1105,0x1107], 0x1109, [0x110B,0x110C], [0x110E,0x1112], 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 0x1150,
-  [0x1154,0x1155], 0x1159, [0x115F,0x1161], 0x1163, 0x1165, 0x1167, 0x1169, [0x116D,0x116E], [0x1172,0x1173],
-  0x1175, 0x119E, 0x11A8, 0x11AB, [0x11AE,0x11AF], [0x11B7,0x11B8], 0x11BA, [0x11BC,0x11C2], 0x11EB, 0x11F0,
-  0x11F9, [0x1E00,0x1E9B], [0x1EA0,0x1EF9], [0x1F00,0x1F15], [0x1F18,0x1F1D], [0x1F20,0x1F45], [0x1F48,0x1F4D],
-  [0x1F50,0x1F57], 0x1F59, 0x1F5B, 0x1F5D, [0x1F5F,0x1F7D], [0x1F80,0x1FB4], [0x1FB6,0x1FBC], 0x1FBE,
-  [0x1FC2,0x1FC4], [0x1FC6,0x1FCC], [0x1FD0,0x1FD3], [0x1FD6,0x1FDB], [0x1FE0,0x1FEC], [0x1FF2,0x1FF4],
-  [0x1FF6,0x1FFC], 0x2126, [0x212A,0x212B], 0x212E, [0x2180,0x2182], [0x3041,0x3094], [0x30A1,0x30FA],
-  [0x3105,0x312C], [0xAC00,0xD7A3]);
-my @ideographic_1_0 = ([0x4E00,0x9FA5], 0x3007, [0x3021,0x3029]);
-my @combining_char_1_0 = (
-  [0x0300,0x0345], [0x0360,0x0361], [0x0483,0x0486], [0x0591,0x05A1], [0x05A3,0x05B9], [0x05BB,0x05BD],
-  0x05BF, [0x05C1,0x05C2], 0x05C4, [0x064B,0x0652], 0x0670, [0x06D6,0x06DC], [0x06DD,0x06DF], [0x06E0,0x06E4],
-  [0x06E7,0x06E8], [0x06EA,0x06ED], [0x0901,0x0903], 0x093C, [0x093E,0x094C], 0x094D, [0x0951,0x0954],
-  [0x0962,0x0963], [0x0981,0x0983], 0x09BC, 0x09BE, 0x09BF, [0x09C0,0x09C4], [0x09C7,0x09C8], [0x09CB,0x09CD],
-  0x09D7, [0x09E2,0x09E3], 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, [0x0A40,0x0A42], [0x0A47,0x0A48], [0x0A4B,0x0A4D],
-  [0x0A70,0x0A71], [0x0A81,0x0A83], 0x0ABC, [0x0ABE,0x0AC5], [0x0AC7,0x0AC9], [0x0ACB,0x0ACD], [0x0B01,0x0B03],
-  0x0B3C, [0x0B3E,0x0B43], [0x0B47,0x0B48], [0x0B4B,0x0B4D], [0x0B56,0x0B57], [0x0B82,0x0B83], [0x0BBE,0x0BC2],
-  [0x0BC6,0x0BC8], [0x0BCA,0x0BCD], 0x0BD7, [0x0C01,0x0C03], [0x0C3E,0x0C44], [0x0C46,0x0C48], [0x0C4A,0x0C4D],
-  [0x0C55,0x0C56], [0x0C82,0x0C83], [0x0CBE,0x0CC4], [0x0CC6,0x0CC8], [0x0CCA,0x0CCD], [0x0CD5,0x0CD6],
-  [0x0D02,0x0D03], [0x0D3E,0x0D43], [0x0D46,0x0D48], [0x0D4A,0x0D4D], 0x0D57, 0x0E31, [0x0E34,0x0E3A],
-  [0x0E47,0x0E4E], 0x0EB1, [0x0EB4,0x0EB9], [0x0EBB,0x0EBC], [0x0EC8,0x0ECD], [0x0F18,0x0F19], 0x0F35,
-  0x0F37, 0x0F39, 0x0F3E, 0x0F3F, [0x0F71,0x0F84], [0x0F86,0x0F8B], [0x0F90,0x0F95], 0x0F97, [0x0F99,0x0FAD],
-  [0x0FB1,0x0FB7], 0x0FB9, [0x20D0,0x20DC], 0x20E1, [0x302A,0x302F], 0x3099, 0x309A);
-my @digit_1_0 = (
-  [0x0030,0x0039], [0x0660,0x0669], [0x06F0,0x06F9], [0x0966,0x096F], [0x09E6,0x09EF], [0x0A66,0x0A6F],
-  [0x0AE6,0x0AEF], [0x0B66,0x0B6F], [0x0BE7,0x0BEF], [0x0C66,0x0C6F], [0x0CE6,0x0CEF], [0x0D66,0x0D6F],
-  [0x0E50,0x0E59], [0x0ED0,0x0ED9], [0x0F20,0x0F29]);
-my @extender_1_0 = (
-  0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, [0x3031,0x3035], [0x309D,0x309E], [0x30FC,0x30FE]);
-my @sname_1_1 = (
-  "[:A-Z_a-z]", [0xC0,0xD6], [0xD8,0xF6], [0xF8,0x2FF], [0x370,0x37D], [0x37F,0x1FFF],
-  [0x200C,0x200D], [0x2070,0x218F], [0x2C00,0x2FEF], [0x3001,0xD7FF], [0xF900,0xFDCF], [0xFDF0,0xFFFD], [0x10000,0xEFFFF]);
-
-set("WHITE", @white);
-set("NEW_LINE_1_0", 0xA, 0xD);
-set("NEW_LINE_1_1", 0xA, 0xD, 0x85, 0x2028);
-set("DIGIT", "[0-9]");
-set("XDIGIT", "[0-9a-fA-F]");
-set("VALID_1_0", @white, [0x20,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
-set("VALID_1_1", [0x1,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
-set("UNRESTRICTED_1_1", @white, [0x20,0x7E], 0x85, [0xA0,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
-set("PUBID", 0x20, 0xD, 0xA, "[-a-zA-Z0-9'()+,./:=?:!*#@\$_%]");
-set("ENC_SNAME", "[a-zA-Z]");
-set("ENC_NAME", "[-a-zA-Z0-9._]");
-set("SNAME_1_0", "[_:]", @base_char_1_0, @ideographic_1_0);
-set("NAME_1_0", "[-_:.]", @base_char_1_0, @ideographic_1_0, @combining_char_1_0, @digit_1_0, @extender_1_0);
-set("SNAME_1_1", @sname_1_1);
-set("NAME_1_1", @sname_1_1, "[-.0-9]", 0xB7, [0x0300,0x036F], [0x203F,0x2040]);
-set("GT", "[>]");
-
-($ARGV[0] eq "" || $ARGV[1] eq "") && die("Invalid usage");
-find_cls();
-open(H, ">", $ARGV[0]) or die("Cannot create $ARGV[0]");
-open(C, ">", $ARGV[1]) or die("Cannot create $ARGV[1]");
-gen_enum();
-gen_tabs();
-close(H);
-close(C);
-
-sub set {
-  my $id = shift;
-  $ids{$id} = scalar keys(%ids) if !defined($ids{$id});
-  my $mask = 1 << $ids{$id};
-  foreach my $i (@_) {
-    if (ref($i) eq "ARRAY") {
-      my $j = $i->[0];
-      for (; $j <= $i->[1] && $j < 0x10000; $j++) { $cat[$j] |= $mask; }
-      for (; $j <= $i->[1]; $j += 0x10000) { $lcat[$j >> 16] |= $mask; }
-    }
-    elsif ($i =~ /^\[/) { for (my $j=0; $j < 128; $j++) { if (chr($j) =~ /$i/) { $cat[$j] |= $mask; } } }
-    else { $cat[$i] |= $mask; }
-  }
-}
-
-sub find_cls {
-  foreach (my $i=0; $i<@cat; $i++) { $cls{$cat[$i]} = scalar keys(%cls) if !defined($cls{$cat[$i]}); }
-  foreach (my $i=0; $i<@lcat; $i++) { $cls{$lcat[$i]} = scalar keys(%cls) if !defined($cls{$lcat[$i]}); }
-}
-
-sub gen_enum {
-  print H "enum xml_char_type {\n";
-  foreach my $id (sort keys %ids) {
-    my $mask = 0;
-    foreach my $i (keys %cls) {
-      $mask |= 1 << $cls{$i} if $cls{$i} && ($i & (1 << $ids{$id}));
-    }
-    printf H "  XML_CHAR_%-20s = 0x%08x,\n", $id, $mask;
-  }
-  print H "};\n\n";
-}
-
-sub gen_tabs {
-  my @tab = ();
-  my %hash = ();
-
-  print H "extern const byte xml_char_tab1[];\n";
-  print H "extern const uns xml_char_tab2[];\n";
-  print H "extern const byte xml_char_tab3[];\n";
-
-  print C "const uns xml_char_tab2[] = {\n  ";
-  for (my $t=0; $t<256; $t++) {
-    my $i = $t * 256;
-    my @x = ();
-    for (my $j=0; $j<256; $j += 32) {
-      push @x, join(",", map($cls{$_}, @cat[$i+$j..$i+$j+31]));
-    }
-    my $sub = "  " . join(",\n  ", @x);
-    if (!defined($hash{$sub})) {
-      $hash{$sub} = 256 * scalar @tab;
-      push @tab, $sub;
-    }
-    printf C "0x%x", $hash{$sub};
-    print C ((~$t & 15) ? "," : ($t < 255) ? ",\n  " : "\n};\n\n");
-  }
-
-  print C "const byte xml_char_tab1[] = {\n";
-  print C join(",\n\n", @tab);
-  print C "\n};\n\n";
-
-  my @l = ();
-  for (my $i=0; $i<0x11; $i++) {
-    push @l, sprintf("%d", $cls{$lcat[$i]});
-  }
-  print C "const byte xml_char_tab3[] = {" . join(",", @l) . "};\n";
-}
diff --git a/sherlock/xml/xml-test.c b/sherlock/xml/xml-test.c

deleted file mode 100644 (file)

index f6738c5..0000000
--- a/sherlock/xml/xml-test.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#include "sherlock/sherlock.h"
-#include "sherlock/xml/xml.h"
-#include "sherlock/xml/dtd.h"
-#include "ucw/getopt.h"
-#include "ucw/fastbuf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-
-enum {
-  WANT_FIRST = 0x100,
-  WANT_HIDE_ERRORS,
-  WANT_IGNORE_COMMENTS,
-  WANT_IGNORE_PIS,
-  WANT_REPORT_BLOCKS,
-  WANT_REPORT_IGNORABLE,
-  WANT_FILE_ENTITIES,
-};
-
-static char *shortopts = "spdt" CF_SHORT_OPTS;
-static struct option longopts[] = {
-  CF_LONG_OPTS
-  { "sax",             0, 0, 's' },
-  { "pull",            0, 0, 'p' },
-  { "dom",             0, 0, 't' },
-  { "dtd",             0, 0, 'd' },
-  { "hide-errors",     0, 0, WANT_HIDE_ERRORS },
-  { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS },
-  { "ignore-pis",      0, 0, WANT_IGNORE_PIS },
-  { "report-blocks",   0, 0, WANT_REPORT_BLOCKS },
-  { "report-ignorable",        0, 0, WANT_REPORT_IGNORABLE },
-  { "file-entities",   0, 0, WANT_FILE_ENTITIES },
-  { NULL,              0, 0, 0 }
-};
-
-static void NONRET
-usage(void)
-{
-  fputs("\
-Usage: xml-test [options] < input.xml\n\
-\n\
-Options:\n"
-CF_USAGE
-"\
--p, --pull              Test PULL interface\n\
--s, --sax               Test SAX interface\n\
--t, --dom               Test DOM interface\n\
--d, --dtd               Enable parsing of DTD\n\
-    --hide-errors       Hide warnings and error messages\n\
-    --ignore-comments   Ignore comments\n\
-    --ignore-pis        Ignore processing instructions\n\
-    --report-blocks    Report blocks or characters and CDATA sections\n\
-    --report-ignorable  Report ignorable whitespace\n\
-    --file-entities     Resolve file external entities (not fully normative)\n\
-\n", stderr);
-  exit(1);
-}
-
-static uns want_sax;
-static uns want_pull;
-static uns want_dom;
-static uns want_parse_dtd;
-static uns want_hide_errors;
-static uns want_ignore_comments;
-static uns want_ignore_pis;
-static uns want_report_blocks;
-static uns want_report_ignorable;
-static uns want_file_entities;
-
-static struct fastbuf *out;
-
-static char *
-node_type(struct xml_node *node)
-{
-  switch (node->type)
-    {
-      case XML_NODE_ELEM: return "element";
-      case XML_NODE_COMMENT: return "comment";
-      case XML_NODE_PI: return "pi";
-      case XML_NODE_CHARS: return "chars";
-      default: return "unknown";
-    }
-}
-
-static void
-show_node(struct xml_node *node)
-{
-  switch (node->type)
-    {
-      case XML_NODE_ELEM:
-       bprintf(out, " <%s>", node->name);
-        XML_ATTR_FOR_EACH(a, node)
-          bprintf(out, " %s='%s'", a->name, a->val);
-       bputc(out, '\n');
-       break;
-      case XML_NODE_COMMENT:
-       bprintf(out, " text='%s'\n", node->text);
-       break;
-      case XML_NODE_PI:
-       bprintf(out, " target=%s text='%s'\n", node->name, node->text);
-       break;
-      case XML_NODE_CHARS:
-       bprintf(out, " text='%s'\n", node->text);
-       break;
-      default:
-        bputc(out, '\n');
-    }
-}
-
-static void
-show_tree(struct xml_node *node, uns level)
-{
-  if (!node)
-    return;
-  bputs(out, "DOM:  ");
-  for (uns i = 0; i < level; i++)
-    bputs(out, "    ");
-  bputs(out, node_type(node));
-  show_node(node);
-  if (node->type == XML_NODE_ELEM)
-    XML_NODE_FOR_EACH(son, node)
-      show_tree(son, level + 1);
-}
-
-static void
-h_error(struct xml_context *ctx)
-{
-  bprintf(out, "SAX:  %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg);
-}
-
-static void
-h_document_start(struct xml_context *ctx UNUSED)
-{
-  bputs(out, "SAX:  document_start\n");
-}
-
-static void
-h_document_end(struct xml_context *ctx UNUSED)
-{
-  bputs(out, "SAX:  document_end\n");
-}
-
-static void
-h_xml_decl(struct xml_context *ctx)
-{
-  bprintf(out, "SAX:  xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding);
-}
-
-static void
-h_doctype_decl(struct xml_context *ctx)
-{
-  bprintf(out, "SAX:  doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n",
-    ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "",
-    !!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET));
-}
-
-static void
-h_comment(struct xml_context *ctx)
-{
-  bputs(out, "SAX:  comment");
-  show_node(ctx->node);
-}
-
-static void
-h_pi(struct xml_context *ctx)
-{
-  bputs(out, "SAX:  pi");
-  show_node(ctx->node);
-}
-
-static void
-h_stag(struct xml_context *ctx)
-{
-  bputs(out, "SAX:  stag");
-  show_node(ctx->node);
-}
-
-static void
-h_etag(struct xml_context *ctx)
-{
-  bprintf(out, "SAX:  etag </%s>\n", ctx->node->name);
-}
-
-static void
-h_chars(struct xml_context *ctx)
-{
-  bputs(out, "SAX:  chars");
-  show_node(ctx->node);
-}
-
-static void
-h_block(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
-{
-  bprintf(out, "SAX:  block text='%s'\n", text);
-}
-
-static void
-h_cdata(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
-{
-  bprintf(out, "SAX:  cdata text='%s'\n", text);
-}
-
-static void
-h_ignorable(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
-{
-  bprintf(out, "SAX:  ignorable text='%s'\n", text);
-}
-
-static void
-h_dtd_start(struct xml_context *ctx UNUSED)
-{
-  bputs(out, "SAX:  dtd_start\n");
-}
-
-static void
-h_dtd_end(struct xml_context *ctx UNUSED)
-{
-  bputs(out, "SAX:  dtd_end\n");
-}
-
-static void
-h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e)
-{
-  xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096));
-}
-
-int
-main(int argc, char **argv)
-{
-  int opt;
-  cf_def_file = NULL;
-  log_init(argv[0]);
-  while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
-    switch (opt)
-      {
-       case 's':
-         want_sax++;
-         break;
-       case 'p':
-         want_pull++;
-         break;
-       case 't':
-         want_dom++;
-         break;
-       case 'd':
-         want_parse_dtd++;
-         break;
-       case WANT_HIDE_ERRORS:
-         want_hide_errors++;
-         break;
-       case WANT_IGNORE_COMMENTS:
-         want_ignore_comments++;
-         break;
-       case WANT_IGNORE_PIS:
-         want_ignore_pis++;
-         break;
-       case WANT_REPORT_BLOCKS:
-         want_report_blocks++;
-         break;
-       case WANT_REPORT_IGNORABLE:
-         want_report_ignorable++;
-         break;
-       case WANT_FILE_ENTITIES:
-         want_file_entities++;
-         break;
-       default:
-         usage();
-      }
-  if (optind != argc)
-    usage();
-
-  out = bfdopen_shared(1, 4096);
-  struct xml_context ctx;
-  xml_init(&ctx);
-  if (!want_hide_errors)
-    ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error;
-  if (want_sax)
-    {
-      ctx.h_document_start = h_document_start;
-      ctx.h_document_end = h_document_end;
-      ctx.h_xml_decl = h_xml_decl;
-      ctx.h_doctype_decl = h_doctype_decl;
-      ctx.h_comment = h_comment;
-      ctx.h_pi = h_pi;
-      ctx.h_stag = h_stag;
-      ctx.h_etag = h_etag;
-      ctx.h_chars = h_chars;
-      if (want_report_blocks)
-        {
-          ctx.h_block = h_block;
-          ctx.h_cdata = h_cdata;
-       }
-      if (want_report_ignorable)
-        ctx.h_ignorable = h_ignorable;
-      ctx.h_dtd_start = h_dtd_start;
-      ctx.h_dtd_end = h_dtd_end;
-    }
-  if (want_dom)
-    ctx.flags |= XML_ALLOC_ALL;
-  if (want_parse_dtd)
-    ctx.flags |= XML_PARSE_DTD;
-  if (want_ignore_comments)
-    ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS);
-  if (want_ignore_pis)
-    ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS);
-  if (want_file_entities)
-    ctx.h_resolve_entity = h_resolve_entity;
-  xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096));
-  bputs(out, "PULL: start\n");
-  if (want_pull)
-    {
-      ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI;
-      uns state;
-      while (state = xml_next(&ctx))
-       switch (state)
-         {
-           case XML_STATE_CHARS:
-             bputs(out, "PULL: chars");
-             show_node(ctx.node);
-             break;
-           case XML_STATE_STAG:
-             bputs(out, "PULL: stag");
-             show_node(ctx.node);
-             break;
-           case XML_STATE_ETAG:
-             bprintf(out, "PULL: etag </%s>\n", ctx.node->name);
-             break;
-           case XML_STATE_COMMENT:
-             bputs(out, "PULL: comment");
-             show_node(ctx.node);
-             break;
-           case XML_STATE_PI:
-             bputs(out, "PULL: pi");
-             show_node(ctx.node);
-             break;
-           default:
-             bputs(out, "PULL: unknown\n");
-             break;
-         }
-    }
-  else
-    xml_parse(&ctx);
-  if (ctx.err_code)
-    bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg);
-  else
-    {
-      bputs(out, "PULL: eof\n");
-      if (want_dom)
-       show_tree(ctx.dom, 0);
-    }
-
-  xml_cleanup(&ctx);
-  bclose(out);
-  return 0;
-}
diff --git a/sherlock/xml/xml-test.t b/sherlock/xml/xml-test.t

deleted file mode 100644 (file)

index aad3d43..0000000
--- a/sherlock/xml/xml-test.t
+++ /dev/null
@@ -1,58 +0,0 @@
-# Tests for the XML parser
-# (c) 2008 Pavel Charvat <pchar@ucw.cz>
-
-Run:   ../obj/sherlock/xml/xml-test
-In:    <?xml version="1.0"?>
-       <html></html>
-Out:   PULL: start
-       PULL: eof
-
-Run:   ../obj/sherlock/xml/xml-test -s
-In:    <?xml version="1.0" encoding="ISO-8859-1"?>
-       <html><a a1="val1" a2="val2">text1&amp;amp;&lt;</a>text2</html>
-Out:   PULL: start
-       SAX:  document_start
-       SAX:  xml_decl version=1.0 standalone=0 fb_encoding=ISO-8859-1
-       SAX:  stag <html>
-       SAX:  stag <a> a1='val1' a2='val2'
-       SAX:  chars text='text1&amp;<'
-       SAX:  etag </a>
-       SAX:  chars text='text2'
-       SAX:  etag </html>
-       SAX:  document_end
-       PULL: eof
-
-Run:   ../obj/sherlock/xml/xml-test -sptd
-In:    <?xml version="1.0"?>
-       <!DOCTYPE root [
-       <!ELEMENT root (#PCDATA|a)*>
-       <!ENTITY % pe1 "<!ENTITY e1 'text'>">
-       %pe1;
-       <!ENTITY e2 '&lt;&e1;&gt;'>
-       <!ELEMENT a (#PCDATA)*>
-       ]>
-       <root>&e1;<a>&e2;</a></root>
-Out:   PULL: start
-       SAX:  document_start
-       SAX:  xml_decl version=1.0 standalone=0 fb_encoding=UTF-8
-       SAX:  doctype_decl type=root public='' system='' extsub=0 intsub=1
-       SAX:  dtd_start
-       SAX:  dtd_end
-       SAX:  stag <root>
-       PULL: stag <root>
-       SAX:  chars text='text'
-       PULL: chars text='text'
-       SAX:  stag <a>
-       PULL: stag <a>
-       SAX:  chars text='<text>'
-       PULL: chars text='<text>'
-       PULL: etag </a>
-       SAX:  etag </a>
-       PULL: etag </root>
-       SAX:  etag </root>
-       SAX:  document_end
-       PULL: eof
-       DOM:  element <root>
-       DOM:      chars text='text'
-       DOM:      element <a>
-       DOM:          chars text='<text>'
diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h

deleted file mode 100644 (file)

index e945888..0000000
--- a/sherlock/xml/xml.h
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- *     Sherlock Library -- A simple XML parser
- *
- *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#ifndef _SHERLOCK_XML_XML_H
-#define _SHERLOCK_XML_XML_H
-
-#include "ucw/clists.h"
-#include "ucw/slists.h"
-#include "ucw/mempool.h"
-#include "ucw/fastbuf.h"
-
-struct xml_context;
-struct xml_dtd_entity;
-
-enum xml_error {
-  XML_ERR_OK = 0,
-  XML_ERR_WARN = 1000,                                 /* Warning */
-  XML_ERR_ERROR = 2000,                                        /* Recoverable error */
-  XML_ERR_FATAL = 3000,                                        /* Unrecoverable error */
-  XML_ERR_EOF,
-};
-
-enum xml_state {
-  XML_STATE_EOF,                                       /* EOF or a fatal error */
-  XML_STATE_START,                                     /* Initial state */
-  XML_STATE_XML_DECL,                                  /* XML_PULL_XML_DECL */
-  XML_STATE_DOCTYPE_DECL,                              /* XML_PULL_DOCTYPE_DECL */
-  XML_STATE_CHARS,                                     /* XML_PULL_CHARS */
-  XML_STATE_STAG,                                      /* XML_PULL_STAG */
-  XML_STATE_ETAG,                                      /* XML_PULL_ETAG */
-  XML_STATE_COMMENT,                                   /* XML_PULL_COMMENT */
-  XML_STATE_PI,                                                /* XML_PULL_PI */
-
-  /* Internal states */
-  XML_STATE_CHARS_BEFORE_STAG,
-  XML_STATE_CHARS_BEFORE_ETAG,
-  XML_STATE_CHARS_BEFORE_CDATA,
-  XML_STATE_CHARS_BEFORE_COMMENT,
-  XML_STATE_CHARS_BEFORE_PI,
-  XML_STATE_PROLOG_COMMENT,
-  XML_STATE_PROLOG_PI,
-  XML_STATE_EPILOG_COMMENT,
-  XML_STATE_EPILOG_PI,
-};
-
-enum xml_pull {
-  XML_PULL_XML_DECL =                  0x00000001,     /* Stop after the XML declaration */
-  XML_PULL_DOCTYPE_DECL =              0x00000002,     /* Stop in the doctype declaration (before optional internal subset) */
-  XML_PULL_CHARS =                     0x00000004,
-  XML_PULL_STAG =                      0x00000008,
-  XML_PULL_ETAG =                      0x00000010,
-  XML_PULL_COMMENT =                   0x00000020,
-  XML_PULL_PI =                                0x00000040,
-  XML_PULL_ALL =                       0xffffffff,
-};
-
-enum xml_flags {
-  /* Enable reporting of various events via SAX and/or PUSH interface */
-  XML_REPORT_COMMENTS =                        0x00000001,     /* Report comments */
-  XML_REPORT_PIS =                     0x00000002,     /* Report processing instructions */
-  XML_REPORT_CHARS =                   0x00000004,     /* Report characters */
-  XML_REPORT_TAGS =                    0x00000008,     /* Report element starts/ends */
-  XML_REPORT_MISC = XML_REPORT_COMMENTS | XML_REPORT_PIS,
-  XML_REPORT_ALL = XML_REPORT_MISC | XML_REPORT_CHARS | XML_REPORT_TAGS,
-
-  /* Enable construction of DOM for these types */
-  XML_ALLOC_COMMENTS =                 0x00000010,     /* Create comment nodes */
-  XML_ALLOC_PIS =                      0x00000020,     /* Create processing instruction nodes */
-  XML_ALLOC_CHARS =                    0x00000040,     /* Create character nodes */
-  XML_ALLOC_TAGS =                     0x00000080,     /* Create element nodes */
-  XML_ALLOC_MISC = XML_ALLOC_COMMENTS | XML_ALLOC_PIS,
-  XML_ALLOC_ALL = XML_ALLOC_MISC | XML_ALLOC_CHARS | XML_ALLOC_TAGS,
-
-  /* Other parameters */
-  XML_VALIDATING =                     0x00000100,     /* Validate everything (not fully implemented!) */
-  XML_PARSE_DTD =                      0x00000200,     /* Enable parsing of DTD */
-  XML_NO_CHARS =                       0x00000400,     /* The current element must not contain character data (filled automaticaly if using DTD) */
-  XML_ALLOC_DEFAULT_ATTRS =            0x00000800,     /* Allocate default attribute values so they can be found by XML_ATTR_FOR_EACH */
-
-  /* Internals, do not change! */
-  XML_EMPTY_ELEM_TAG =                 0x00010000,     /* The current element match EmptyElemTag */
-  XML_VERSION_1_1 =                    0x00020000,     /* XML version is 1.1, otherwise 1.0 */
-  XML_HAS_EXTERNAL_SUBSET =            0x00040000,     /* The document contains a reference to external DTD subset */
-  XML_HAS_INTERNAL_SUBSET =            0x00080000,     /* The document contains an internal subset */
-  XML_HAS_DTD =        XML_HAS_EXTERNAL_SUBSET | XML_HAS_INTERNAL_SUBSET,
-  XML_SRC_EOF =                                0x00100000,     /* EOF reached */
-  XML_SRC_EXPECTED_DECL =              0x00200000,     /* Just before optional or required XMLDecl/TextDecl */
-  XML_SRC_DOCUMENT =                   0x00400000,     /* The document entity */
-  XML_SRC_EXTERNAL =                   0x00800000,     /* An external entity */
-};
-
-enum xml_node_type {
-  XML_NODE_ELEM,
-  XML_NODE_COMMENT,
-  XML_NODE_CHARS,
-  XML_NODE_PI,
-};
-
-#define XML_NODE_FOR_EACH(var, node) CLIST_FOR_EACH(struct xml_node *, var, (node)->sons)
-#define XML_ATTR_FOR_EACH(var, node) SLIST_FOR_EACH(struct xml_attr *, var, (node)->attrs)
-
-struct xml_node {
-  cnode n;                                             /* Node for list of parent's sons */
-  uns type;                                            /* XML_NODE_x */
-  struct xml_node *parent;                             /* Parent node */
-  char *name;                                          /* Element name / PI target */
-  clist sons;                                          /* Children nodes */
-  union {
-    struct {
-      char *text;                                      /* PI text / Comment / CDATA */
-      uns len;                                         /* Text length in bytes */
-    };
-    struct {
-      struct xml_dtd_elem *dtd;                                /* Element DTD */
-      slist attrs;                                     /* Link list of element attributes */
-    };
-  };
-  void *user;                                          /* User-defined (initialized to NULL) */
-};
-
-struct xml_attr {
-  snode n;                                             /* Node for elem->attrs */
-  struct xml_node *elem;                               /* Parent element */
-  struct xml_dtd_attr *dtd;                            /* Attribute DTD */
-  char *name;                                          /* Attribute name */
-  char *val;                                           /* Attribute value */
-  void *user;                                          /* User-defined (initialized to NULL) */
-};
-
-#define XML_BUF_SIZE 32                                 /* At least 8 -- hardcoded */
-
-struct xml_source {
-  struct xml_source *next;                             /* Link list of pending fastbufs (xml_context.sources) */
-  struct fastbuf *fb;                                  /* Source fastbuf */
-  struct fastbuf *wrapped_fb;                          /* Original wrapped fastbuf (needed for cleanup) */
-  struct fastbuf wrap_fb;                              /* Fbmem wrapper */
-  u32 buf[2 * XML_BUF_SIZE];                           /* Read buffer with Unicode values and categories */
-  u32 *bptr, *bstop;                                   /* Current state of the buffer */
-  uns row;                                             /* File position */
-  char *expected_encoding;                             /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */
-  char *fb_encoding;                                   /* Encoding of the source fastbuf */
-  char *decl_encoding;                                 /* Encoding read from the XMLDecl/TextDecl */
-  uns refill_cat1;                                     /* Character categories, which should be directly passed to the buffer */
-  uns refill_cat2;                                     /* Character categories, which should be processed as newlines (possibly in some built-in
-                                                          sequences) */
-  void (*refill)(struct xml_context *ctx);             /* Callback to decode source characters to the buffer */
-  unsigned short *refill_in_to_x;                      /* Libcharset input table */
-  uns saved_depth;                                     /* Saved ctx->depth */
-  uns pending_0xd;                                     /* The last read character is 0xD */
-};
-
-struct xml_context {
-  /* Error handling */
-  char *err_msg;                                       /* Last error message */
-  enum xml_error err_code;                             /* Last error code */
-  void *throw_buf;                                     /* Where to jump on error */
-  void (*h_warn)(struct xml_context *ctx);             /* Warning callback */
-  void (*h_error)(struct xml_context *ctx);            /* Recoverable error callback */
-  void (*h_fatal)(struct xml_context *ctx);            /* Unrecoverable error callback */
-
-  /* Memory management */
-  struct mempool *pool;                                        /* DOM pool */
-  struct mempool *stack;                               /* Stack pool (freed as soon as possible) */
-  struct xml_stack *stack_list;                                /* See xml_push(), xml_pop() */
-  uns flags;                                           /* XML_FLAG_x (restored on xml_pop()) */
-  uns depth;                                           /* Nesting level (for checking of valid source nesting -> valid pushes/pops on memory pools) */
-  struct fastbuf chars;                                        /* Character data / attribute value */
-  struct mempool_state chars_state;                    /* Mempool state before the current character block has started */
-  char *chars_trivial;                                 /* If not empty, it will be appended to chars */
-  void *tab_attrs;                                     /* Hash table of element attributes */
-
-  /* Input */
-  struct xml_source *src;                              /* Current source */
-  u32 *bptr, *bstop;                                   /* Buffer with preprocessed characters (validated UCS-4 + category flags) */
-  uns cat_chars;                                       /* Unicode range of supported characters (cdata, attribute values, ...) */
-  uns cat_unrestricted;                                        /* Unrestricted characters (may appear in document/external entities) */
-  uns cat_new_line;                                    /* New line characters */
-  uns cat_name;                                                /* Characters that may appear in names */
-  uns cat_sname;                                       /* Characters that may begin a name */
-
-  /* SAX-like interface */
-  void (*h_document_start)(struct xml_context *ctx);   /* Called before entering prolog */
-  void (*h_document_end)(struct xml_context *ctx);     /* Called after leaving epilog */
-  void (*h_xml_decl)(struct xml_context *ctx);         /* Called after the XML declaration */
-  void (*h_doctype_decl)(struct xml_context *ctx);     /* Called in the doctype declaration (before optional internal subset) */
-  void (*h_comment)(struct xml_context *ctx);          /* Called after a comment (only with XML_REPORT_COMMENTS) */
-  void (*h_pi)(struct xml_context *ctx);               /* Called after a processing instruction (only with XML_REPORT_PIS) */
-  void (*h_stag)(struct xml_context *ctx);             /* Called after STag or EmptyElemTag (only with XML_REPORT_TAGS) */
-  void (*h_etag)(struct xml_context *ctx);             /* Called before ETag or after EmptyElemTag (only with XML_REPORT_TAGS) */
-  void (*h_chars)(struct xml_context *ctx);            /* Called after some characters (only with XML_REPORT_CHARS) */
-  void (*h_block)(struct xml_context *ctx, char *text, uns len);       /* Called for each continuous block of characters not reported by h_cdata() (only with XML_REPORT_CHARS) */
-  void (*h_cdata)(struct xml_context *ctx, char *text, uns len);       /* Called for each CDATA section (only with XML_REPORT_CHARS) */
-  void (*h_ignorable)(struct xml_context *ctx, char *text, uns len);   /* Called for ignorable whitespace (content in tags without #PCDATA) */
-  void (*h_dtd_start)(struct xml_context *ctx);                /* Called just after the DTD structure is initialized */
-  void (*h_dtd_end)(struct xml_context *ctx);          /* Called after DTD subsets subsets */
-  struct xml_dtd_entity *(*h_find_entity)(struct xml_context *ctx, char *name);                /* Called when needed to resolve a general entity */
-  void (*h_resolve_entity)(struct xml_context *ctx, struct xml_dtd_entity *ent);       /* User should push source fastbuf for a parsed external entity (either general or parameter) */
-
-  /* DOM */
-  struct xml_node *dom;                                        /* DOM root */
-  struct xml_node *node;                               /* Current DOM node */
-
-  char *version_str;
-  uns standalone;
-  char *doctype;                                       /* The document type (or NULL if unknown) */
-  char *system_id;                                     /* DTD external id */
-  char *public_id;                                     /* DTD public id */
-  struct xml_dtd *dtd;                                 /* The DTD structure (or NULL) */
-  uns state;                                           /* Current state for the PULL interface (XML_STATE_x) */
-  uns pull;                                            /* Parameters for the PULL interface (XML_PULL_x) */
-};
-
-/* Initialize XML context */
-void xml_init(struct xml_context *ctx);
-
-/* Clean up all internal structures */
-void xml_cleanup(struct xml_context *ctx);
-
-/* Reuse XML context, equivalent to xml_cleanup() and xml_init() */
-void xml_reset(struct xml_context *ctx);
-
-/* Add XML source (fastbuf will be automatically closed) */
-struct xml_source *xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb);
-
-/* Parse without the PUSH interface, return XML_ERR_x code (zero on success) */
-uns xml_parse(struct xml_context *ctx);
-
-/* Parse with the PUSH interface, return XML_STATE_x (zero on EOF or fatal error) */
-uns xml_next(struct xml_context *ctx);
-
-/* Equivalent to xml_next, but with temporarily changed ctx->pull value */
-uns xml_next_state(struct xml_context *ctx, uns pull);
-
-/* May be called on XML_STATE_STAG to skip it's content; can return XML_STATE_ETAG or XML_STATE_EOF on fatal error */
-uns xml_skip_element(struct xml_context *ctx);
-
-/* Returns the current row number in the document entity */
-uns xml_row(struct xml_context *ctx);
-
-/* Finds a given attribute value in a XML_NODE_ELEM node */
-struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name);
-
-/* Similar to xml_attr_find, but it deals also with default values */
-char *xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name);
-
-/* The default value of h_find_entity(), knows &lt;, &gt;, &amp;, &apos; and &quot; */
-struct xml_dtd_entity *xml_def_find_entity(struct xml_context *ctx, char *name);
-
-/* The default value of h_resolve_entity(), throws an error */
-void xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent);
-
-/* Remove leading/trailing spaces and replaces sequences of spaces to a single space character (non-CDATA attribute normalization) */
-uns xml_normalize_white(struct xml_context *ctx, char *value);
-
-/* Merge character contents of a given element to a single string (not recursive) */
-char *xml_merge_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool);
-
-/* Merge character contents of a given subtree to a single string */
-char *xml_merge_dom_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool);
-
-/* Public part of error handling */
-void xml_warn(struct xml_context *ctx, const char *format, ...);
-void xml_error(struct xml_context *ctx, const char *format, ...);
-void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...);
-
-#endif
diff --git a/shxml/Makefile b/shxml/Makefile

new file mode 100644 (file)

index 0000000..23e08b2
--- /dev/null
+++ b/shxml/Makefile
@@ -0,0 +1,46 @@
+# Makefile for the XML parser
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+DIRS+=sherlock/xml
+PROGS+=$(o)/sherlock/xml/xml-test
+
+LIBSHXML_MODS=common source parse dtd
+LIBSHXML_INCLUDES=xml.h dtd.h
+
+LIBSHXML_MOD_PATHS=$(addprefix $(o)/sherlock/xml/,$(LIBSHXML_MODS))
+
+$(o)/sherlock/xml/libshxml.a: $(addsuffix .o,$(LIBSHXML_MOD_PATHS))
+$(o)/sherlock/xml/libshxml.so: $(addsuffix .oo,$(LIBSHXML_MOD_PATHS))
+$(o)/sherlock/xml/libshxml.pc: $(LIBSH) $(LIBCHARSET)
+
+$(o)/sherlock/xml/common.o: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/common.oo: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/source.o: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/source.oo: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/dtd.o: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/dtd.oo: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/parse.o: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/parse.oo: $(o)/sherlock/xml/unicat.h
+$(o)/sherlock/xml/unicat.h: $(s)/sherlock/xml/unicat.pl
+       $(M)GEN $(addprefix $(o)/sherlock/xml/unicat,.h .c)
+       $(Q)$< $(addprefix $(o)/sherlock/xml/unicat,.h .c)
+       $(Q)touch $@
+
+TESTS+=$(o)/sherlock/xml/xml-test.test
+$(o)/sherlock/xml/xml-test: $(o)/sherlock/xml/xml-test.o $(LIBSHXML)
+$(o)/sherlock/xml/xml-test.test: $(o)/sherlock/xml/xml-test
+
+API_LIBS+=libshxml
+API_INCLUDES+=$(o)/sherlock/xml/.include-stamp
+$(o)/sherlock/xml/.include-stamp: $(addprefix $(s)/sherlock/xml/,$(LIBSHXML_INCLUDES))
+$(o)/sherlock/xml/.include-stamp: IDST=sherlock/xml
+run/lib/pkgconfig/libshxml.pc: $(o)/sherlock/xml/libshxml.pc
+
+INSTALL_TARGETS+=install-sh-xml
+install-sh-xml:
+       install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/sherlock/xml $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_PKGCONFIG_DIR)
+       install -m 644 $(addprefix run/include/sherlock/xml/,$(LIBSHXML_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/sherlock/xml
+       install -m 644 run/lib/pkgconfig/libshxml.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR)
+       install -m 644 run/lib/libshxml.$(LS) $(DESTDIR)$(INSTALL_LIB_DIR)
+
+.PHONY: install-sh-xml
diff --git a/shxml/TODO b/shxml/TODO

new file mode 100644 (file)

index 0000000..b8dbc29
--- /dev/null
+++ b/shxml/TODO
@@ -0,0 +1,15 @@
+Non-normative / not-implemented:
+-- introduce numeric error codes
+-- cycle detection in internal entities (and possibly external?)
+-- conditional sections in DTD
+-- validation of elements (regular expressions, non-cdata)
+-- validation of attributes (unfinished)
+-- notations
+-- URI normalization
+-- support for xml:space
+-- support for xml:lang
+-- full support for standalone documents
+-- Unicode normalization
+
+Optimizations:
+-- detect definitions of trivial entities
diff --git a/shxml/common.c b/shxml/common.c

new file mode 100644 (file)

index 0000000..6bb2737
--- /dev/null
+++ b/shxml/common.c
@@ -0,0 +1,140 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/internals.h"
+#include "ucw/stkstring.h"
+#include "ucw/ff-unicode.h"
+
+#include <setjmp.h>
+
+/*** Error handling ***/
+
+void NONRET
+xml_throw(struct xml_context *ctx)
+{
+  ASSERT(ctx->err_code && ctx->throw_buf);
+  longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code);
+}
+
+void
+xml_warn(struct xml_context *ctx, const char *format, ...)
+{
+  if (ctx->h_warn)
+    {
+      va_list args;
+      va_start(args, format);
+      ctx->err_msg = stk_vprintf(format, args);
+      ctx->err_code = XML_ERR_WARN;
+      va_end(args);
+      ctx->h_warn(ctx);
+      ctx->err_msg = NULL;
+      ctx->err_code = XML_ERR_OK;
+    }
+}
+
+void
+xml_error(struct xml_context *ctx, const char *format, ...)
+{
+  if (ctx->h_error)
+    {
+      va_list args;
+      va_start(args, format);
+      ctx->err_msg = stk_vprintf(format, args);
+      ctx->err_code = XML_ERR_ERROR;
+      va_end(args);
+      ctx->h_error(ctx);
+      ctx->err_msg = NULL;
+      ctx->err_code = XML_ERR_OK;
+    }
+}
+
+void NONRET
+xml_fatal(struct xml_context *ctx, const char *format, ...)
+{
+  va_list args;
+  va_start(args, format);
+  ctx->err_msg = mp_vprintf(ctx->stack, format, args);
+  ctx->err_code = XML_ERR_FATAL;
+  ctx->state = XML_STATE_EOF;
+  va_end(args);
+  if (ctx->h_fatal)
+    ctx->h_fatal(ctx);
+  xml_throw(ctx);
+}
+
+/*** Memory management ***/
+
+void *
+xml_hash_new(struct mempool *pool, uns size)
+{
+  void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE);
+  *(void **)tab = pool;
+  return tab + XML_HASH_HDR_SIZE;
+}
+
+/*** Initialization ***/
+
+static struct xml_context xml_defaults = {
+  .flags = XML_SRC_EOF | XML_REPORT_ALL,
+  .state = XML_STATE_START,
+  .h_resolve_entity = xml_def_resolve_entity,
+  .chars = {
+    .name = "<xml_chars>",
+    .spout = xml_spout_chars,
+    .can_overwrite_buffer = 1,
+  },
+};
+
+static void
+xml_do_init(struct xml_context *ctx)
+{
+  xml_attrs_table_init(ctx);
+}
+
+void
+xml_init(struct xml_context *ctx)
+{
+  *ctx = xml_defaults;
+  ctx->pool = mp_new(65536);
+  ctx->stack = mp_new(65536);
+  xml_do_init(ctx);
+  TRACE(ctx, "init");
+}
+
+void
+xml_cleanup(struct xml_context *ctx)
+{
+  TRACE(ctx, "cleanup");
+  xml_attrs_table_cleanup(ctx);
+  xml_dtd_cleanup(ctx);
+  xml_sources_cleanup(ctx);
+  mp_delete(ctx->pool);
+  mp_delete(ctx->stack);
+}
+
+void
+xml_reset(struct xml_context *ctx)
+{
+  TRACE(ctx, "reset");
+  struct mempool *pool = ctx->pool, *stack = ctx->stack;
+  xml_attrs_table_cleanup(ctx);
+  xml_dtd_cleanup(ctx);
+  xml_sources_cleanup(ctx);
+  mp_flush(pool);
+  mp_flush(stack);
+  *ctx = xml_defaults;
+  ctx->pool = pool;
+  ctx->stack = stack;
+  xml_do_init(ctx);
+}
diff --git a/shxml/dtd.c b/shxml/dtd.c

new file mode 100644 (file)

index 0000000..67cb7cc
--- /dev/null
+++ b/shxml/dtd.c
@@ -0,0 +1,1003 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/internals.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-unicode.h"
+#include "ucw/unicode.h"
+
+/* Notations */
+
+#define HASH_PREFIX(x) xml_dtd_notns_##x
+#define HASH_NODE struct xml_dtd_notn
+#define HASH_KEY_STRING name
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_FIND
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+struct xml_dtd_notn *
+xml_dtd_find_notn(struct xml_context *ctx, char *name)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name);
+  return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL;
+}
+
+/* General entities */
+
+#define HASH_PREFIX(x) xml_dtd_ents_##x
+#define HASH_NODE struct xml_dtd_entity
+#define HASH_KEY_STRING name
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+static struct xml_dtd_entity *
+xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name);
+  if (ent->flags & XML_DTD_ENTITY_DECLARED)
+    {
+      xml_warn(ctx, "Entity &%s; already declared", name);
+      return NULL;
+    }
+  slist_add_tail(&dtd->ents, &ent->n);
+  ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL;
+  ent->text = text;
+  return ent;
+}
+
+static void
+xml_dtd_declare_default_entities(struct xml_context *ctx)
+{
+  xml_dtd_declare_trivial_entity(ctx, "lt", "<");
+  xml_dtd_declare_trivial_entity(ctx, "gt", ">");
+  xml_dtd_declare_trivial_entity(ctx, "amp", "&");
+  xml_dtd_declare_trivial_entity(ctx, "apos", "'");
+  xml_dtd_declare_trivial_entity(ctx, "quot", "\"");
+}
+
+struct xml_dtd_entity *
+xml_def_find_entity(struct xml_context *ctx UNUSED, char *name)
+{
+#define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL }
+  static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\"");
+#undef ENT
+  switch (name[0])
+    {
+      case 'l':
+       if (!strcmp(name, "lt"))
+         return &ent_lt;
+       break;
+      case 'g':
+       if (!strcmp(name, "gt"))
+         return &ent_gt;
+       break;
+      case 'a':
+       if (!strcmp(name, "amp"))
+         return &ent_amp;
+       if (!strcmp(name, "apos"))
+         return &ent_apos;
+       break;
+      case 'q':
+       if (!strcmp(name, "quot"))
+         return &ent_quot;
+       break;
+    }
+  return NULL;
+}
+
+struct xml_dtd_entity *
+xml_dtd_find_entity(struct xml_context *ctx, char *name)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  if (ctx->h_find_entity)
+    return ctx->h_find_entity(ctx, name);
+  else if (dtd)
+    {
+      struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name);
+      return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
+    }
+  else
+    return xml_def_find_entity(ctx, name);
+}
+
+/* Parameter entities */
+
+static struct xml_dtd_entity *
+xml_dtd_find_pentity(struct xml_context *ctx, char *name)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name);
+  return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL;
+}
+
+/* Elements */
+
+struct xml_dtd_elems_table;
+
+static void
+xml_dtd_elems_init_data(struct xml_dtd_elems_table *tab UNUSED, struct xml_dtd_elem *e)
+{
+  slist_init(&e->attrs);
+}
+
+#define HASH_PREFIX(x) xml_dtd_elems_##x
+#define HASH_NODE struct xml_dtd_elem
+#define HASH_KEY_STRING name
+#define HASH_TABLE_DYNAMIC
+#define HASH_ZERO_FILL
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_GIVE_ALLOC
+#define HASH_GIVE_INIT_DATA
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+struct xml_dtd_elem *
+xml_dtd_find_elem(struct xml_context *ctx, char *name)
+{
+  return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL;
+}
+
+/* Element sons */
+
+struct xml_dtd_enodes_table;
+
+static inline uns
+xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
+{
+  return hash_pointer(parent) ^ hash_pointer(elem);
+}
+
+static inline int
+xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2)
+{
+  return (parent1 == parent2) && (elem1 == elem2);
+}
+
+static inline void
+xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem)
+{
+  node->parent = parent;
+  node->elem = elem;
+}
+
+#define HASH_PREFIX(x) xml_dtd_enodes_##x
+#define HASH_NODE struct xml_dtd_elem_node
+#define HASH_KEY_COMPLEX(x) x parent, x elem
+#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_TABLE_DYNAMIC
+#define HASH_ZERO_FILL
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+/* Element attributes */
+
+struct xml_dtd_attrs_table;
+
+static inline uns
+xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name)
+{
+  return hash_pointer(elem) ^ hash_string(name);
+}
+
+static inline int
+xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2)
+{
+  return (elem1 == elem2) && !strcmp(name1, name2);
+}
+
+static inline void
+xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name)
+{
+  attr->elem = elem;
+  attr->name = name;
+  slist_add_tail(&elem->attrs, &attr->n);
+}
+
+#define HASH_PREFIX(x) xml_dtd_attrs_##x
+#define HASH_NODE struct xml_dtd_attr
+#define HASH_ZERO_FILL
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x elem, x name
+#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+struct xml_dtd_attr *
+xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name)
+{
+  return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL;
+}
+
+/* Enumerated attribute values */
+
+struct xml_dtd_evals_table;
+
+static inline uns
+xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val)
+{
+  return hash_pointer(attr) ^ hash_string(val);
+}
+
+static inline int
+xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2)
+{
+  return (attr1 == attr2) && !strcmp(val1, val2);
+}
+
+static inline void
+xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val)
+{
+  eval->attr = attr;
+  eval->val = val;
+}
+
+#define HASH_PREFIX(x) xml_dtd_evals_##x
+#define HASH_NODE struct xml_dtd_eval
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x attr, x val
+#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+/* Enumerated attribute notations */
+
+struct xml_dtd_enotns_table;
+
+static inline uns
+xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
+{
+  return hash_pointer(attr) ^ hash_pointer(notn);
+}
+
+static inline int
+xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2)
+{
+  return (attr1 == attr2) && (notn1 == notn2);
+}
+
+static inline void
+xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn)
+{
+  enotn->attr = attr;
+  enotn->notn = notn;
+}
+
+#define HASH_PREFIX(x) xml_dtd_enotns_##x
+#define HASH_NODE struct xml_dtd_enotn
+#define HASH_TABLE_DYNAMIC
+#define HASH_KEY_COMPLEX(x) x attr, x notn
+#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_GIVE_ALLOC
+#define HASH_TABLE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+/* DTD initialization/cleanup */
+
+void
+xml_dtd_init(struct xml_context *ctx)
+{
+  if (ctx->dtd)
+    return;
+  struct mempool *pool = mp_new(4096);
+  struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd));
+  dtd->pool = pool;
+  xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
+  xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table)));
+  xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table)));
+  xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table)));
+  xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table)));
+  xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table)));
+  xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table)));
+  xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table)));
+  xml_dtd_declare_default_entities(ctx);
+}
+
+void
+xml_dtd_cleanup(struct xml_context *ctx)
+{
+  if (!ctx->dtd)
+    return;
+  mp_delete(ctx->dtd->pool);
+  ctx->dtd = NULL;
+}
+
+void
+xml_dtd_finish(struct xml_context *ctx)
+{
+  if (!ctx->dtd)
+    return;
+  // FIXME: validity checks
+}
+
+/*** Parsing functions ***/
+
+/* References to parameter entities */
+
+void
+xml_parse_pe_ref(struct xml_context *ctx)
+{
+  /* PEReference ::= '%' Name ';'
+   * Already parsed: '%' */
+  struct mempool_state state;
+  mp_save(ctx->stack, &state);
+  char *name = xml_parse_name(ctx, ctx->stack);
+  xml_parse_char(ctx, ';');
+  struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name);
+  if (!ent)
+    xml_error(ctx, "Unknown entity %%%s;", name);
+  else
+    {
+      TRACE(ctx, "Pushed entity %%%s;", name);
+      mp_restore(ctx->stack, &state);
+      xml_dec(ctx);
+      xml_push_entity(ctx, ent);
+      return;
+    }
+  mp_restore(ctx->stack, &state);
+  xml_dec(ctx);
+}
+
+static uns
+xml_parse_dtd_pe(struct xml_context *ctx, uns entity_decl)
+{
+  /* Already parsed: '%' */
+  do
+    {
+      xml_inc(ctx);
+      if (!~entity_decl && (xml_peek_cat(ctx) & XML_CHAR_WHITE))
+        {
+         xml_dec(ctx);
+         return ~0U;
+       }
+      xml_parse_pe_ref(ctx);
+      while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+       xml_skip_char(ctx);
+    }
+  while (xml_get_char(ctx) == '%');
+  xml_unget_char(ctx);
+  return 1;
+}
+
+static inline uns
+xml_parse_dtd_white(struct xml_context *ctx, uns mandatory)
+{
+  /* Whitespace or parameter entity,
+   * mandatory==~0U has a special maening of the whitespace before the '%' character in an parameter entity declaration */
+  uns cnt = 0;
+  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+    {
+      xml_skip_char(ctx);
+      cnt = 1;
+    }
+  if (xml_peek_char(ctx) == '%')
+    {
+      xml_skip_char(ctx);
+      return xml_parse_dtd_pe(ctx, mandatory);
+    }
+  else if (unlikely(mandatory && !cnt))
+    xml_fatal_expected_white(ctx);
+  return cnt;
+}
+
+static void
+xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uns allow_public)
+{
+  struct xml_dtd *dtd = ctx->dtd;
+  uns c = xml_peek_char(ctx);
+  if (c == 'S')
+    {
+      xml_parse_seq(ctx, "SYSTEM");
+      xml_parse_dtd_white(ctx, 1);
+      *public_id = NULL;
+      *system_id = xml_parse_system_literal(ctx, dtd->pool);
+    }
+  else if (c == 'P')
+    {
+      xml_parse_seq(ctx, "PUBLIC");
+      xml_parse_dtd_white(ctx, 1);
+      *system_id = NULL;
+      *public_id = xml_parse_pubid_literal(ctx, dtd->pool);
+      if (xml_parse_dtd_white(ctx, !allow_public))
+       if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public)
+         *system_id = xml_parse_system_literal(ctx, dtd->pool);
+    }
+  else
+    xml_fatal(ctx, "Expected an external ID");
+}
+
+/* DTD: <!NOTATION ...> */
+
+void
+xml_parse_notation_decl(struct xml_context *ctx)
+{
+  /* NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
+   * Already parsed: '<!NOTATION' */
+  TRACE(ctx, "parse_notation_decl");
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+
+  struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+  xml_parse_dtd_white(ctx, 1);
+  char *system_id, *public_id;
+  xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1);
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+
+  if (notn->flags & XML_DTD_NOTN_DECLARED)
+    xml_warn(ctx, "Notation %s already declared", notn->name);
+  else
+    {
+      notn->flags = XML_DTD_NOTN_DECLARED;
+      notn->system_id = system_id;
+      notn->public_id = public_id;
+      slist_add_tail(&dtd->notns, &notn->n);
+    }
+  xml_dec(ctx);
+}
+
+/* DTD: <!ENTITY ...> */
+
+void
+xml_parse_entity_decl(struct xml_context *ctx)
+{
+  /* Already parsed: '<!ENTITY' */
+  TRACE(ctx, "parse_entity_decl");
+  struct xml_dtd *dtd = ctx->dtd;
+  uns flags = ~xml_parse_dtd_white(ctx, ~0U) ? 0 : XML_DTD_ENTITY_PARAMETER;
+  if (flags)
+    xml_parse_dtd_white(ctx, 1);
+  struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool));
+  xml_parse_dtd_white(ctx, 1);
+  slist *list = flags ? &dtd->pents : &dtd->ents;
+  if (ent->flags & XML_DTD_ENTITY_DECLARED)
+    {
+       xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name);
+       // FIXME: should be only warning
+    }
+  uns c, sep = xml_get_char(ctx);
+  if (sep == '\'' || sep == '"')
+    {
+      /* Internal entity:
+       * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */
+      char *p = mp_start_noalign(dtd->pool, 1);
+      while (1)
+        {
+         if ((c = xml_get_char(ctx)) == sep)
+           break;
+         if (c == '%')
+           {
+             // FIXME
+             ASSERT(0);
+             //xml_parse_parameter_ref(ctx);
+             continue;
+           }
+         if (c == '&')
+           {
+             xml_inc(ctx);
+             if (xml_peek_char(ctx) != '#')
+               {
+                 /* Bypass references to general entities */
+                 struct mempool_state state;
+                 mp_save(ctx->stack, &state);
+                 char *n = xml_parse_name(ctx, ctx->stack);
+                 xml_parse_char(ctx, ';');
+                 xml_dec(ctx);
+                 uns l = strlen(n);
+                 p = mp_spread(dtd->pool, p, 3 + l);
+                 *p++ = '&';
+                 memcpy(p, n, l);
+                 p += l;
+                 *p++ = ';';;
+                 mp_restore(ctx->stack, &state);
+                 continue;
+               }
+             else
+               {
+                 xml_skip_char(ctx);
+                 c = xml_parse_char_ref(ctx);
+               }
+           }
+         p = mp_spread(dtd->pool, p, 5);
+         p = utf8_32_put(p, c);
+       }
+      *p = 0;
+      ent->len = p - (char *)mp_ptr(dtd->pool);
+      ent->text = mp_end(dtd->pool, p + 1);
+      slist_add_tail(list, &ent->n);
+      ent->flags = flags | XML_DTD_ENTITY_DECLARED;
+    }
+  else
+    {
+      /* External entity */
+      struct xml_dtd_notn *notn = NULL;
+      char *system_id, *public_id;
+      xml_unget_char(ctx);
+      xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0);
+      if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>')
+        {
+         /* General external unparsed entity */
+         flags |= XML_DTD_ENTITY_UNPARSED;
+         xml_parse_seq(ctx, "NDATA");
+         xml_parse_dtd_white(ctx, 1);
+         notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+       }
+      slist_add_tail(list, &ent->n);
+      ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL;
+      ent->system_id = system_id;
+      ent->public_id = public_id;
+      ent->notn = notn;
+    }
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+/* DTD: <!ELEMENT ...> */
+
+void
+xml_parse_element_decl(struct xml_context *ctx)
+{
+  /* Elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
+   * Already parsed: '<!ELEMENT' */
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+  char *name = xml_parse_name(ctx, dtd->pool);
+  xml_parse_dtd_white(ctx, 1);
+  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name);
+  if (elem->flags & XML_DTD_ELEM_DECLARED)
+    xml_fatal(ctx, "Element <%s> already declared", name);
+
+  /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */
+  uns c = xml_peek_char(ctx);
+  if (c == 'E')
+    {
+      xml_parse_seq(ctx, "EMPTY");
+      elem->type = XML_DTD_ELEM_EMPTY;
+    }
+  else if (c == 'A')
+    {
+      xml_parse_seq(ctx, "ANY");
+      elem->type = XML_DTD_ELEM_ANY;
+    }
+  else if (c == '(')
+    {
+      xml_skip_char(ctx);
+      xml_inc(ctx);
+      xml_parse_dtd_white(ctx, 0);
+      struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent));
+      if (xml_peek_char(ctx) == '#')
+        {
+         /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */
+         xml_skip_char(ctx);
+         xml_parse_seq(ctx, "PCDATA");
+         elem->type = XML_DTD_ELEM_MIXED;
+          parent->type = XML_DTD_ELEM_PCDATA;
+         while (1)
+           {
+             xml_parse_dtd_white(ctx, 0);
+             if ((c = xml_get_char(ctx)) == ')')
+               break;
+             else if (c != '|')
+               xml_fatal_expected(ctx, ')');
+             xml_parse_dtd_white(ctx, 0);
+             struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+             if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem))
+               xml_error(ctx, "Duplicate content '%s'", son_elem->name);
+             else
+               {
+                 struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
+                 slist_add_tail(&parent->sons, &son->n);
+               }
+           }
+         xml_dec(ctx);
+         if (xml_peek_char(ctx) == '*')
+           {
+             xml_skip_char(ctx);
+             parent->occur = XML_DTD_ELEM_OCCUR_MULT;
+           }
+         else if (!slist_head(&parent->sons))
+           parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
+         else
+           xml_fatal_expected(ctx, '*');
+       }
+      else
+        {
+         /* children ::= (choice | seq) ('?' | '*' | '+')?
+          * cp ::= (Name | choice | seq) ('?' | '*' | '+')?
+          * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
+          * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */
+
+         elem->type = XML_DTD_ELEM_CHILDREN;
+         parent->type = XML_DTD_ELEM_PCDATA;
+         uns c;
+         goto first;
+
+         while (1)
+           {
+             /* After name */
+             xml_parse_dtd_white(ctx, 0);
+             if ((c = xml_get_char(ctx)) ==  ')')
+               {
+                 xml_dec(ctx);
+                 if (parent->type == XML_DTD_ELEM_PCDATA)
+                   parent->type = XML_DTD_ELEM_SEQ;
+                 if ((c = xml_get_char(ctx)) == '?')
+                   parent->occur = XML_DTD_ELEM_OCCUR_OPT;
+                 else if (c == '*')
+                   parent->occur = XML_DTD_ELEM_OCCUR_MULT;
+                 else if (c == '+')
+                   parent->occur = XML_DTD_ELEM_OCCUR_PLUS;
+                 else
+                   {
+                     xml_unget_char(ctx);
+                     parent->occur = XML_DTD_ELEM_OCCUR_ONCE;
+                   }
+                 if (!parent->parent)
+                   break;
+                 parent = parent->parent;
+                 continue;
+               }
+             else if (c == '|')
+               {
+                 if (parent->type == XML_DTD_ELEM_PCDATA)
+                   parent->type = XML_DTD_ELEM_OR;
+                 else if (parent->type != XML_DTD_ELEM_OR)
+                   xml_fatal(ctx, "Mixed operators in the list of element children");
+               }
+             else if (c == ',')
+               {
+                 if (parent->type == XML_DTD_ELEM_PCDATA)
+                   parent->type = XML_DTD_ELEM_SEQ;
+                 else if (parent->type != XML_DTD_ELEM_SEQ)
+                   xml_fatal(ctx, "Mixed operators in the list of element children");
+               }
+             else if (c == '(')
+               {
+                 xml_inc(ctx);
+                 struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
+                 son->parent = parent;
+                 slist_add_tail(&parent->sons, &son->n);
+                 parent = son->parent;
+                 son->type = XML_DTD_ELEM_MIXED;
+               }
+             else
+               xml_unget_char(ctx);
+
+             /* Before name */
+             xml_parse_dtd_white(ctx, 0);
+first:;
+             struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+             // FIXME: duplicates, occurance
+             //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem);
+             struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son));
+             son->parent = parent;
+             son->elem = son_elem;
+             slist_add_tail(&parent->sons, &son->n);
+           }
+       }
+    }
+  else
+    xml_fatal(ctx, "Expected element content specification");
+
+  xml_parse_dtd_white(ctx, 0);
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+void
+xml_parse_attr_list_decl(struct xml_context *ctx)
+{
+  /* AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+   * AttDef ::= S Name S AttType S DefaultDecl
+   * Already parsed: '<!ATTLIST' */
+  struct xml_dtd *dtd = ctx->dtd;
+  xml_parse_dtd_white(ctx, 1);
+  struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool));
+
+  while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>')
+    {
+      char *name = xml_parse_name(ctx, dtd->pool);
+      struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name);
+      uns ignored = 0;
+      if (attr)
+        {
+         xml_warn(ctx, "Duplicate attribute definition");
+         ignored++;
+       }
+      else
+       attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name);
+      xml_parse_dtd_white(ctx, 1);
+      if (xml_peek_char(ctx) == '(')
+        {
+         xml_skip_char(ctx); // FIXME: xml_inc/dec ?
+         if (!ignored)
+           attr->type = XML_ATTR_ENUM;
+         do
+           {
+             xml_parse_dtd_white(ctx, 0);
+             char *value = xml_parse_nmtoken(ctx, dtd->pool);
+             if (!ignored)
+               if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value))
+                 xml_error(ctx, "Duplicate enumeration value");
+               else
+                 xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value);
+             xml_parse_dtd_white(ctx, 0);
+           }
+         while (xml_get_char(ctx) == '|');
+         xml_unget_char(ctx);
+         xml_parse_char(ctx, ')');
+       }
+      else
+        {
+         char *type = xml_parse_name(ctx, dtd->pool);
+         enum xml_dtd_attr_type t = XML_ATTR_CDATA;
+         if (!strcmp(type, "CDATA"))
+           t = XML_ATTR_CDATA;
+         else if (!strcmp(type, "ID"))
+           t = XML_ATTR_ID;
+         else if (!strcmp(type, "IDREF"))
+           t = XML_ATTR_IDREF;
+         else if (!strcmp(type, "IDREFS"))
+           t = XML_ATTR_IDREFS;
+         else if (!strcmp(type, "ENTITY"))
+           t = XML_ATTR_ENTITY;
+         else if (!strcmp(type, "ENTITIES"))
+           t = XML_ATTR_ENTITIES;
+         else if (!strcmp(type, "NMTOKEN"))
+           t = XML_ATTR_NMTOKEN;
+         else if (!strcmp(type, "NMTOKENS"))
+           t = XML_ATTR_NMTOKENS;
+         else if (!strcmp(type, "NOTATION"))
+           {
+             if (elem->type == XML_DTD_ELEM_EMPTY)
+               xml_fatal(ctx, "Empty element must not have notation attribute");
+             // FIXME: An element type MUST NOT have more than one NOTATION attribute specified.
+             t = XML_ATTR_NOTATION;
+             xml_parse_dtd_white(ctx, 1);
+             xml_parse_char(ctx, '(');
+             do
+               {
+                 xml_parse_dtd_white(ctx, 0);
+                 struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool));
+                 if (!ignored)
+                   if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n))
+                     xml_error(ctx, "Duplicate enumerated notation");
+                   else
+                     xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n);
+                 xml_parse_dtd_white(ctx, 0);
+               }
+             while (xml_get_char(ctx) == '|');
+             xml_unget_char(ctx);
+             xml_parse_char(ctx, ')');
+           }
+         else
+           xml_fatal(ctx, "Unknown attribute type");
+         if (!ignored)
+           attr->type = t;
+       }
+      xml_parse_dtd_white(ctx, 1);
+      enum xml_dtd_attr_default def = XML_ATTR_NONE;
+      if (xml_get_char(ctx) == '#')
+       switch (xml_peek_char(ctx))
+          {
+           case 'R':
+             xml_parse_seq(ctx, "REQUIRED");
+             def = XML_ATTR_REQUIRED;
+             break;
+           case 'I':
+             xml_parse_seq(ctx, "IMPLIED");
+             def = XML_ATTR_IMPLIED;
+             break;
+           case 'F':
+             xml_parse_seq(ctx, "FIXED");
+             def = XML_ATTR_FIXED;
+             xml_parse_dtd_white(ctx, 1);
+             break;
+           default:
+             xml_fatal(ctx, "Expected a modifier for default attribute value");
+         }
+      else
+       xml_unget_char(ctx);
+      if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED)
+        {
+         char *v = xml_parse_attr_value(ctx, attr);
+         if (!ignored)
+           attr->default_value = v;
+       }
+      if (!ignored)
+       attr->default_mode = def;
+    }
+  xml_skip_char(ctx);
+  xml_dec(ctx);
+}
+
+void
+xml_skip_internal_subset(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_internal_subset");
+  /* AlreadyParsed: '[' */
+  uns c;
+  while ((c = xml_get_char(ctx)) != ']')
+    {
+      if (c != '<')
+       continue;
+      if ((c = xml_get_char(ctx)) == '?')
+        {
+          xml_inc(ctx);
+         xml_skip_pi(ctx);
+       }
+      else if (c != '!')
+       xml_dec(ctx);
+      else if (xml_get_char(ctx) == '-')
+        {
+         xml_inc(ctx);
+         xml_skip_comment(ctx);
+       }
+      else
+       while ((c = xml_get_char(ctx)) != '>')
+         if (c == '\'' || c == '"')
+           while (xml_get_char(ctx) != c);
+    }
+  xml_dec(ctx);
+}
+
+/*** Validation of attribute values ***/
+
+static uns
+xml_check_tokens(char *value, uns first_cat, uns next_cat, uns seq)
+{
+  char *p = value;
+  uns u;
+  while (1)
+    {
+      p = utf8_32_get(p, &u);
+      if (!(xml_char_cat(u) & first_cat))
+        return 0;
+      while (*p & ~0x20)
+        {
+         p = utf8_32_get(p, &u);
+         if (!(xml_char_cat(u) & next_cat))
+           return 0;
+       }
+      if (!*p)
+       return 1;
+      if (!seq)
+       return 0;
+      p++;
+    }
+}
+
+static uns
+xml_is_name(struct xml_context *ctx, char *value)
+{
+  /* Name ::= NameStartChar (NameChar)* */
+  return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0);
+}
+
+static uns
+xml_is_names(struct xml_context *ctx, char *value)
+{
+  /* Names ::= Name (#x20 Name)* */
+  return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1);
+}
+
+static uns
+xml_is_nmtoken(struct xml_context *ctx, char *value)
+{
+  /* Nmtoken ::= (NameChar)+ */
+  return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0);
+}
+
+static uns
+xml_is_nmtokens(struct xml_context *ctx, char *value)
+{
+  /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */
+  return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1);
+}
+
+static void
+xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type)
+{
+  xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type);
+}
+
+void
+xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value)
+{
+  if (dtd->type == XML_ATTR_CDATA)
+    return;
+  xml_normalize_white(ctx, value);
+  switch (dtd->type)
+    {
+      case XML_ATTR_ID:
+       if (!xml_is_name(ctx, value))
+         xml_err_attr_format(ctx, dtd, "NAME");
+       //FIXME: add to a hash table
+       break;
+      case XML_ATTR_IDREF:
+       if (!xml_is_name(ctx, value))
+         xml_err_attr_format(ctx, dtd, "NAME");
+       // FIXME: find in hash table (beware forward references)
+       break;
+      case XML_ATTR_IDREFS:
+       if (!xml_is_names(ctx, value))
+         xml_err_attr_format(ctx, dtd, "NAMES");
+       // FIXME: find
+       break;
+      case XML_ATTR_ENTITY:
+       // FIXME
+       break;
+      case XML_ATTR_ENTITIES:
+       // FIXME
+       break;
+      case XML_ATTR_NMTOKEN:
+       if (!xml_is_nmtoken(ctx, value))
+         xml_err_attr_format(ctx, dtd, "NMTOKEN");
+       break;
+      case XML_ATTR_NMTOKENS:
+       if (!xml_is_nmtokens(ctx, value))
+         xml_err_attr_format(ctx, dtd, "NMTOKENS");
+       break;
+      case XML_ATTR_ENUM:
+       if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value))
+         xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name);
+       break;
+      case XML_ATTR_NOTATION:
+       if (!xml_dtd_find_notn(ctx, value))
+         xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name);
+       break;
+    }
+}
diff --git a/shxml/dtd.h b/shxml/dtd.h

new file mode 100644 (file)

index 0000000..e2caf98
--- /dev/null
+++ b/shxml/dtd.h
@@ -0,0 +1,168 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_XML_DTD_H
+#define _SHERLOCK_XML_DTD_H
+
+#include "sherlock/xml/xml.h"
+
+struct xml_dtd {
+  struct mempool *pool;                        /* Memory pool where to allocate DTD */
+  slist ents;                          /* Link list of general entities */
+  slist pents;                         /* Link list of parameter entities */
+  slist notns;                         /* Link list of notations */
+  slist elems;                         /* Link list of elements */
+  void *tab_ents;                      /* Hash table of general entities */
+  void *tab_pents;                     /* Hash table of parameter entities */
+  void *tab_notns;                     /* Hash table of notations */
+  void *tab_elems;                     /* Hash table of elements */
+  void *tab_enodes;                    /* Hash table of element sons */
+  void *tab_attrs;                     /* Hash table of element attributes */
+  void *tab_evals;                     /* Hash table of enumerated attribute values */
+  void *tab_enotns;                    /* hash table of enumerated attribute notations */
+};
+
+/* Notations */
+
+enum xml_dtd_notn_flags {
+  XML_DTD_NOTN_DECLARED = 0x1,         /* The notation has been declared (internal usage) */
+};
+
+struct xml_dtd_notn {
+  snode n;                             /* Node in xml_dtd.notns */
+  uns flags;                           /* XML_DTD_NOTN_x */
+  char *name;                          /* Notation name */
+  char *system_id;                     /* External ID */
+  char *public_id;
+  void *user;                          /* User-defined */
+};
+
+struct xml_dtd_notn *xml_dtd_find_notn(struct xml_context *ctx, char *name);
+
+/* Entities */
+
+enum xml_dtd_entity_flags {
+  XML_DTD_ENTITY_DECLARED = 0x1,       /* The entity has been declared (internal usage) */
+  XML_DTD_ENTITY_VISITED = 0x2,                /* Cycle detection (internal usage) */
+  XML_DTD_ENTITY_PARAMETER = 0x4,      /* Parameter entity, general otherwise */
+  XML_DTD_ENTITY_EXTERNAL = 0x8,       /* External entity, internal otherwise */
+  XML_DTD_ENTITY_UNPARSED = 0x10,      /* Unparsed entity, parsed otherwise */
+  XML_DTD_ENTITY_TRIVIAL = 0x20,       /* Replacement text is a sequence of characters and character references */
+};
+
+struct xml_dtd_entity {
+  snode n;                             /* Node in xml_dtd.[gp]ents */
+  uns flags;                           /* XML_DTD_ENT_x */
+  char *name;                          /* Entity name */
+  char *text;                          /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */
+  uns len;                             /* Text length */
+  char *system_id;                     /* External ID */
+  char *public_id;
+  struct xml_dtd_notn *notn;           /* Notation (XML_DTD_ENT_UNPARSED only) */
+  void *user;                          /* User-defined */
+};
+
+struct xml_dtd_entity *xml_dtd_find_entity(struct xml_context *ctx, char *name);
+
+/* Elements */
+
+enum xml_dtd_elem_flags {
+  XML_DTD_ELEM_DECLARED = 0x1,         /* The element has been declared (internal usage) */
+};
+
+enum xml_dtd_elem_type {
+  XML_DTD_ELEM_EMPTY,
+  XML_DTD_ELEM_ANY,
+  XML_DTD_ELEM_MIXED,
+  XML_DTD_ELEM_CHILDREN,
+};
+
+struct xml_dtd_elem {
+  snode n;
+  uns flags;
+  uns type;
+  char *name;
+  struct xml_dtd_elem_node *node;
+  slist attrs;
+  void *user;                          /* User-defined */
+};
+
+struct xml_dtd_elem_node {
+  snode n;
+  struct xml_dtd_elem_node *parent;
+  struct xml_dtd_elem *elem;
+  slist sons;
+  uns type;
+  uns occur;
+  void *user;                          /* User-defined */
+};
+
+enum xml_dtd_elem_node_type {
+  XML_DTD_ELEM_PCDATA,
+  XML_DTD_ELEM_SEQ,
+  XML_DTD_ELEM_OR,
+};
+
+enum xml_dtd_elem_node_occur {
+  XML_DTD_ELEM_OCCUR_ONCE,
+  XML_DTD_ELEM_OCCUR_OPT,
+  XML_DTD_ELEM_OCCUR_MULT,
+  XML_DTD_ELEM_OCCUR_PLUS,
+};
+
+struct xml_dtd_elem *xml_dtd_find_elem(struct xml_context *ctx, char *name);
+
+/* Attributes */
+
+enum xml_dtd_attr_default {
+  XML_ATTR_NONE,
+  XML_ATTR_REQUIRED,
+  XML_ATTR_IMPLIED,
+  XML_ATTR_FIXED,
+};
+
+enum xml_dtd_attr_type {
+  XML_ATTR_CDATA,
+  XML_ATTR_ID,
+  XML_ATTR_IDREF,
+  XML_ATTR_IDREFS,
+  XML_ATTR_ENTITY,
+  XML_ATTR_ENTITIES,
+  XML_ATTR_NMTOKEN,
+  XML_ATTR_NMTOKENS,
+  XML_ATTR_ENUM,
+  XML_ATTR_NOTATION,
+};
+
+struct xml_dtd_attr {
+  snode n;
+  char *name;                          /* Attribute name */
+  struct xml_dtd_elem *elem;           /* Owner element */
+  uns type;                            /* See enum xml_dtd_attr_type */
+  uns default_mode;                    /* See enum xml_dtd_attr_default */
+  char *default_value;                 /* The default value defined in DTD (or NULL) */
+};
+
+struct xml_dtd_eval {
+  struct xml_dtd_attr *attr;
+  char *val;
+};
+
+struct xml_dtd_enotn {
+  struct xml_dtd_attr *attr;
+  struct xml_dtd_notn *notn;
+};
+
+void xml_dtd_init(struct xml_context *ctx);
+void xml_dtd_cleanup(struct xml_context *ctx);
+void xml_dtd_finish(struct xml_context *ctx);
+
+struct xml_dtd_attr *xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name);
+
+#endif
diff --git a/shxml/internals.h b/shxml/internals.h

new file mode 100644 (file)

index 0000000..bbf28c0
--- /dev/null
+++ b/shxml/internals.h
@@ -0,0 +1,311 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_XML_INTERNALS_H
+#define _SHERLOCK_XML_INTERNALS_H
+
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+
+/*** Debugging ***/
+
+#ifdef LOCAL_DEBUG
+#define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
+#else
+#define TRACE(c, f, p...) do {} while(0)
+#endif
+
+/*** Error handling ***/
+
+void NONRET xml_throw(struct xml_context *ctx);
+
+/*** Memory management ***/
+
+struct xml_stack {
+  struct xml_stack *next;
+  struct mempool_state state;
+  uns flags;
+};
+
+static inline void *
+xml_do_push(struct xml_context *ctx, uns size)
+{
+  /* Saves ctx->stack and ctx->flags state */
+  struct mempool_state state;
+  mp_save(ctx->stack, &state);
+  struct xml_stack *s = mp_alloc(ctx->stack, size);
+  s->state = state;
+  s->flags = ctx->flags;
+  s->next = ctx->stack_list;
+  ctx->stack_list = s;
+  return s;
+}
+
+static inline void
+xml_do_pop(struct xml_context *ctx, struct xml_stack *s)
+{
+  /* Restore ctx->stack and ctx->flags state */
+  ctx->stack_list = s->next;
+  ctx->flags = s->flags;
+  mp_restore(ctx->stack, &s->state);
+}
+
+static inline void
+xml_push(struct xml_context *ctx)
+{
+  TRACE(ctx, "push");
+  xml_do_push(ctx, sizeof(struct xml_stack));
+}
+
+static inline void
+xml_pop(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop");
+  ASSERT(ctx->stack_list);
+  xml_do_pop(ctx, ctx->stack_list);
+}
+
+struct xml_dom_stack {
+  struct xml_stack stack;
+  struct mempool_state state;
+};
+
+static inline struct xml_node *
+xml_push_dom(struct xml_context *ctx, struct mempool_state *state)
+{
+  /* Create a new DOM node */
+  TRACE(ctx, "push_dom");
+  struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s));
+  if (state)
+    s->state = *state;
+  else
+    mp_save(ctx->pool, &s->state);
+  struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n));
+  n->user = NULL;
+  if (n->parent = ctx->node)
+    clist_add_tail(&n->parent->sons, &n->n);
+  return ctx->node = n;
+}
+
+static inline void
+xml_pop_dom(struct xml_context *ctx, uns free)
+{
+  /* Leave DOM subtree */
+  TRACE(ctx, "pop_dom");
+  ASSERT(ctx->node);
+  struct xml_node *p = ctx->node->parent;
+  struct xml_dom_stack *s = (void *)ctx->stack_list;
+  if (free)
+    {
+      /* See xml_pop_element() for cleanup of attribute hash table */
+      if (p)
+        clist_remove(&ctx->node->n);
+      mp_restore(ctx->pool, &s->state);
+    }
+  ctx->node = p;
+  xml_do_pop(ctx, &s->stack);
+}
+
+#define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
+#define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
+  static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
+  { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
+  static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
+
+void *xml_hash_new(struct mempool *pool, uns size);
+
+void xml_spout_chars(struct fastbuf *fb);
+
+/*** Reading of document/external entities ***/
+
+void NONRET xml_fatal_nested(struct xml_context *ctx);
+
+static inline void
+xml_inc(struct xml_context *ctx)
+{
+  /* Called after the first character of a block */
+  TRACE(ctx, "inc");
+  ctx->depth++;
+}
+
+static inline void
+xml_dec(struct xml_context *ctx)
+{
+  /* Called after the last character of a block */
+  TRACE(ctx, "dec");
+  if (unlikely(!ctx->depth--))
+    xml_fatal_nested(ctx);
+}
+
+#include "obj/sherlock/xml/unicat.h"
+
+static inline uns
+xml_char_cat(uns c)
+{
+  if (c < 0x10000)
+    return 1U << xml_char_tab1[(c & 0xff) + xml_char_tab2[c >> 8]];
+  else if (likely(c < 0x110000))
+    return 1U << xml_char_tab3[c >> 16];
+  else
+    return 1;
+}
+
+static inline uns
+xml_ascii_cat(uns c)
+{
+  return xml_char_tab1[c];
+}
+
+struct xml_source *xml_push_source(struct xml_context *ctx);
+void xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent);
+
+void xml_refill(struct xml_context *ctx);
+
+static inline uns
+xml_peek_char(struct xml_context *ctx)
+{
+  if (ctx->bptr == ctx->bstop)
+    xml_refill(ctx);
+  return ctx->bptr[0];
+}
+
+static inline uns
+xml_peek_cat(struct xml_context *ctx)
+{
+  if (ctx->bptr == ctx->bstop)
+    xml_refill(ctx);
+  return ctx->bptr[1];
+}
+
+static inline uns
+xml_get_char(struct xml_context *ctx)
+{
+  uns c = xml_peek_char(ctx);
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_get_cat(struct xml_context *ctx)
+{
+  uns c = xml_peek_cat(ctx);
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_last_char(struct xml_context *ctx)
+{
+  return ctx->bptr[-2];
+}
+
+static inline uns
+xml_last_cat(struct xml_context *ctx)
+{
+  return ctx->bptr[-1];
+}
+
+static inline uns
+xml_skip_char(struct xml_context *ctx)
+{
+  uns c = ctx->bptr[0];
+  ctx->bptr += 2;
+  return c;
+}
+
+static inline uns
+xml_unget_char(struct xml_context *ctx)
+{
+  return *(ctx->bptr -= 2);
+}
+
+void xml_sources_cleanup(struct xml_context *ctx);
+
+/*** Parsing ***/
+
+void NONRET xml_fatal_expected(struct xml_context *ctx, uns c);
+void NONRET xml_fatal_expected_white(struct xml_context *ctx);
+void NONRET xml_fatal_expected_quot(struct xml_context *ctx);
+
+static inline uns
+xml_parse_white(struct xml_context *ctx, uns mandatory)
+{
+  /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
+   * mandatory=0 -> S? */
+  uns cnt = 0;
+  while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
+    {
+      xml_skip_char(ctx);
+      cnt++;
+    }
+  if (unlikely(mandatory && !cnt))
+    xml_fatal_expected_white(ctx);
+  return cnt;
+}
+
+static inline void
+xml_parse_char(struct xml_context *ctx, uns c)
+{
+  /* Consumes a given Unicode character */
+  if (unlikely(c != xml_get_char(ctx)))
+    xml_fatal_expected(ctx, c);
+}
+
+static inline void
+xml_parse_seq(struct xml_context *ctx, const char *seq)
+{
+  /* Consumes a given sequence of ASCII characters */
+  while (*seq)
+    xml_parse_char(ctx, *seq++);
+}
+
+void xml_parse_eq(struct xml_context *ctx);
+
+static inline uns
+xml_parse_quote(struct xml_context *ctx)
+{
+  /* "'" | '"' */
+  uns c = xml_get_char(ctx);
+  if (unlikely(c != '\'' && c != '\"'))
+    xml_fatal_expected_quot(ctx);
+  return c;
+}
+
+char *xml_parse_name(struct xml_context *ctx, struct mempool *pool);
+void xml_skip_name(struct xml_context *ctx);
+char *xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool);
+
+char *xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool);
+char *xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool);
+
+uns xml_parse_char_ref(struct xml_context *ctx);
+void xml_parse_pe_ref(struct xml_context *ctx);
+
+char *xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr);
+
+void xml_skip_internal_subset(struct xml_context *ctx);
+void xml_parse_notation_decl(struct xml_context *ctx);
+void xml_parse_entity_decl(struct xml_context *ctx);
+void xml_parse_element_decl(struct xml_context *ctx);
+void xml_parse_attr_list_decl(struct xml_context *ctx);
+
+void xml_push_comment(struct xml_context *ctx);
+void xml_pop_comment(struct xml_context *ctx);
+void xml_skip_comment(struct xml_context *ctx);
+
+void xml_push_pi(struct xml_context *ctx);
+void xml_pop_pi(struct xml_context *ctx);
+void xml_skip_pi(struct xml_context *ctx);
+
+void xml_attrs_table_init(struct xml_context *ctx);
+void xml_attrs_table_cleanup(struct xml_context *ctx);
+
+void xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value);
+
+#endif
diff --git a/shxml/libshxml.pc b/shxml/libshxml.pc

new file mode 100644 (file)

index 0000000..c2172b3
--- /dev/null
+++ b/shxml/libshxml.pc
@@ -0,0 +1,11 @@
+# pkg-config metadata for libshxml
+
+libdir=@LIBDIR@
+incdir=.
+
+Name: libshxml
+Description: XML parser for Sherlock project
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lshxml
+Requires: @DEPS@
diff --git a/shxml/parse.c b/shxml/parse.c

new file mode 100644 (file)

index 0000000..27141b1
--- /dev/null
+++ b/shxml/parse.c
@@ -0,0 +1,1287 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/internals.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-unicode.h"
+#include "ucw/unicode.h"
+#include "ucw/chartype.h"
+#include "ucw/hashfunc.h"
+
+#include <setjmp.h>
+
+/*** Basic parsing ***/
+
+void NONRET
+xml_fatal_expected(struct xml_context *ctx, uns c)
+{
+  if (c >= 32 && c < 128)
+    xml_fatal(ctx, "Expected '%c'", c);
+  else
+    xml_fatal(ctx, "Expected U+%04x", c);
+}
+
+void NONRET
+xml_fatal_expected_white(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Expected a white space");
+}
+
+void NONRET
+xml_fatal_expected_quot(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Expected a quotation mark");
+}
+
+void
+xml_parse_eq(struct xml_context *ctx)
+{
+  /* Eq ::= S? '=' S? */
+  xml_parse_white(ctx, 0);
+  xml_parse_char(ctx, '=');
+  xml_parse_white(ctx, 0);
+}
+
+/*** Names and nmtokens ***/
+
+static char *
+xml_parse_string(struct xml_context *ctx, struct mempool *pool, uns first_cat, uns next_cat, char *err)
+{
+  char *p = mp_start_noalign(pool, 1);
+  if (unlikely(!(xml_peek_cat(ctx) & first_cat)))
+    xml_fatal(ctx, "%s", err);
+  do
+    {
+      p = mp_spread(pool, p, 5);
+      p = utf8_32_put(p, xml_skip_char(ctx));
+    }
+  while (xml_peek_cat(ctx) & next_cat);
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+static void
+xml_skip_string(struct xml_context *ctx, uns first_cat, uns next_cat, char *err)
+{
+  if (unlikely(!(xml_get_cat(ctx) & first_cat)))
+    xml_fatal(ctx, "%s", err);
+  while (xml_peek_cat(ctx) & next_cat)
+    xml_skip_char(ctx);
+}
+
+char *
+xml_parse_name(struct xml_context *ctx, struct mempool *pool)
+{
+  /* Name ::= NameStartChar (NameChar)* */
+  return xml_parse_string(ctx, pool, ctx->cat_sname, ctx->cat_name, "Expected a name");
+}
+
+void
+xml_skip_name(struct xml_context *ctx)
+{
+  xml_skip_string(ctx, ctx->cat_sname, ctx->cat_name, "Expected a name");
+}
+
+char *
+xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool)
+{
+  /* Nmtoken ::= (NameChar)+ */
+  return xml_parse_string(ctx, pool, ctx->cat_name, ctx->cat_name, "Expected a nmtoken");
+}
+
+/*** Simple literals ***/
+
+char *
+xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool)
+{
+  /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
+  char *p = mp_start_noalign(pool, 1);
+  uns q = xml_parse_quote(ctx), c;
+  while ((c = xml_get_char(ctx)) != q)
+    {
+      p = mp_spread(pool, p, 5);
+      p = utf8_32_put(p, c);
+    }
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+char *
+xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool)
+{
+  /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
+  char *p = mp_start_noalign(pool, 1);
+  uns q = xml_parse_quote(ctx), c;
+  while ((c = xml_get_char(ctx)) != q)
+    {
+      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID)))
+       xml_fatal(ctx, "Expected a pubid character");
+      p = mp_spread(pool, p, 2);
+      *p++ = c;
+    }
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+/*** Comments ***/
+
+void
+xml_push_comment(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_comment");
+  /* Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+   * Already parsed: '<!-' */
+  xml_parse_char(ctx, '-');
+  struct xml_node *n = xml_push_dom(ctx, NULL);
+  n->type = XML_NODE_COMMENT;
+  char *p = mp_start_noalign(ctx->pool, 6);
+  while (1)
+    {
+      if (xml_get_char(ctx) == '-')
+       if (xml_get_char(ctx) == '-')
+         break;
+       else
+         *p++ = '-';
+      p = utf8_32_put(p, xml_last_char(ctx));
+      p = mp_spread(ctx->pool, p, 6);
+    }
+  xml_parse_char(ctx, '>');
+  *p = 0;
+  n->len = p - (char *)mp_ptr(ctx->pool);
+  n->text = mp_end(ctx->pool, p + 1);
+  if ((ctx->flags & XML_REPORT_COMMENTS) && ctx->h_comment)
+    ctx->h_comment(ctx);
+}
+
+void
+xml_pop_comment(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_COMMENTS));
+  xml_dec(ctx);
+  TRACE(ctx, "pop_comment");
+}
+
+void
+xml_skip_comment(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_comment");
+  xml_parse_char(ctx, '-');
+  while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-');
+  xml_parse_char(ctx, '>');
+  xml_dec(ctx);
+}
+
+/*** Processing instructions ***/
+
+void
+xml_push_pi(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_pi");
+  /* Parses a PI to ctx->value and ctx->name:
+   *   PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+   *   PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
+   * Already parsed: '<?' */
+  struct xml_node *n = xml_push_dom(ctx, NULL);
+  n->type = XML_NODE_PI;
+  n->name = xml_parse_name(ctx, ctx->pool);
+  if (unlikely(!strcasecmp(n->name, "xml")))
+    xml_error(ctx, "Reserved PI target");
+  char *p = mp_start_noalign(ctx->pool, 5);
+  if (!xml_parse_white(ctx, 0))
+    xml_parse_seq(ctx, "?>");
+  else
+    while (1)
+      {
+       if (xml_get_char(ctx) == '?')
+         if (xml_peek_char(ctx) == '>')
+           {
+             xml_skip_char(ctx);
+             break;
+           }
+         else
+           *p++ = '?';
+       else
+         p = utf8_32_put(p, xml_last_char(ctx));
+       p = mp_spread(ctx->pool, p, 5);
+      }
+  *p = 0;
+  n->len = p - (char *)mp_ptr(ctx->pool);
+  n->text = mp_end(ctx->pool, p + 1);
+  if ((ctx->flags & XML_REPORT_PIS) && ctx->h_pi)
+    ctx->h_pi(ctx);
+}
+
+void
+xml_pop_pi(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_PIS));
+  xml_dec(ctx);
+  TRACE(ctx, "pop_pi");
+}
+
+void
+xml_skip_pi(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_pi");
+  if (ctx->flags & XML_VALIDATING)
+    {
+      struct mempool_state state;
+      mp_save(ctx->stack, &state);
+      if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml")))
+       xml_error(ctx, "Reserved PI target");
+      mp_restore(ctx->stack, &state);
+      if (!xml_parse_white(ctx, 0))
+        {
+         xml_parse_seq(ctx, "?>");
+         xml_dec(ctx);
+         return;
+       }
+    }
+  while (1)
+    if (xml_get_char(ctx) == '?')
+      if (xml_peek_char(ctx) == '>')
+       break;
+  xml_skip_char(ctx);
+  xml_dec(ctx);
+}
+
+/*** Character references ***/
+
+uns
+xml_parse_char_ref(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_char_ref");
+  /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+   * Already parsed: '&#' */
+  uns v = 0;
+  if (xml_get_char(ctx) == 'x')
+    {
+      if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT))
+        {
+         xml_error(ctx, "Expected a hexadecimal value of character reference");
+         goto recover;
+       }
+      do
+        {
+         v = (v << 4) + Cxvalue(xml_last_char(ctx));
+       }
+      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT));
+    }
+  else
+    {
+      if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT))
+        {
+         xml_error(ctx, "Expected a numeric value of character reference");
+         goto recover;
+       }
+      do
+        {
+         v = v * 10 + xml_last_char(ctx) - '0';
+       }
+      while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT));
+    }
+  uns cat = xml_char_cat(v);
+  if (!(cat & ctx->cat_unrestricted))
+    {
+      xml_error(ctx, "Character reference out of range");
+      goto recover;
+    }
+  if (xml_last_char(ctx) == ';')
+    {
+      xml_dec(ctx);
+      return v;
+    }
+  xml_error(ctx, "Expected ';'");
+recover:
+  while (xml_last_char(ctx) != ';')
+    xml_get_char(ctx);
+  xml_dec(ctx);
+  return UNI_REPLACEMENT;
+}
+
+/*** References to general entities ***/
+
+static void
+xml_parse_ref(struct xml_context *ctx)
+{
+  /* Reference ::= EntityRef | CharRef
+   * EntityRef ::= '&' Name ';'
+   * Already parsed: '&' */
+  struct fastbuf *out = &ctx->chars;
+  if (xml_peek_char(ctx) == '#')
+    {
+      xml_skip_char(ctx);
+      bput_utf8_32(out, xml_parse_char_ref(ctx));
+    }
+  else
+    {
+      TRACE(ctx, "parse_ge_ref");
+      struct mempool_state state;
+      mp_save(ctx->stack, &state);
+      char *name = xml_parse_name(ctx, ctx->stack);
+      xml_parse_char(ctx, ';');
+      struct xml_dtd_entity *ent = xml_dtd_find_entity(ctx, name);
+      if (!ent)
+        {
+         xml_error(ctx, "Unknown entity &%s;", name);
+         bputc(out, '&');
+         bputs(out, name);
+         bputc(out, ';');
+       }
+      else if (ent->flags & XML_DTD_ENTITY_TRIVIAL)
+        {
+         TRACE(ctx, "Trivial entity &%s;", name);
+         bputs(out, ent->text);
+       }
+      else
+        {
+         TRACE(ctx, "Pushed entity &%s;", name);
+         mp_restore(ctx->stack, &state);
+          xml_dec(ctx);
+         xml_push_entity(ctx, ent);
+         return;
+       }
+      mp_restore(ctx->stack, &state);
+      xml_dec(ctx);
+    }
+}
+
+/*** Character data ***/
+
+void
+xml_spout_chars(struct fastbuf *fb)
+{
+  if (fb->bptr < fb->bufend)
+    return;
+  struct xml_context *ctx = SKIP_BACK(struct xml_context, chars, fb);
+  struct mempool *pool = ctx->pool;
+  if (fb->bufend != fb->buffer)
+    {
+      TRACE(ctx, "growing chars");
+      uns len = fb->bufend - fb->buffer;
+      uns reported = fb->bstop - fb->buffer;
+      fb->buffer = mp_expand(pool);
+      fb->bufend = fb->buffer + mp_avail(pool);
+      fb->bptr = fb->buffer + len;
+      fb->bstop = fb->buffer + reported;
+    }
+  else
+    {
+      TRACE(ctx, "starting chars");
+      mp_save(pool, &ctx->chars_state);
+      fb->bptr = fb->buffer = fb->bstop = mp_start_noalign(pool, 2);
+      fb->bufend = fb->buffer + mp_avail(pool) - 1;
+    }
+}
+
+static inline uns
+xml_end_chars(struct xml_context *ctx, char **out)
+{
+  struct fastbuf *fb = &ctx->chars;
+  uns len = fb->bptr - fb->buffer;
+  if (len)
+    {
+      TRACE(ctx, "ending chars");
+      *fb->bptr = 0;
+      *out = mp_end(ctx->pool, fb->bptr + 1);
+      fb->bufend = fb->bstop = fb->bptr = fb->buffer;
+    }
+  return len;
+}
+
+static inline uns
+xml_report_chars(struct xml_context *ctx, char **out)
+{
+  struct fastbuf *fb = &ctx->chars;
+  uns len = fb->bptr - fb->buffer;
+  if (len)
+    {
+      *fb->bptr = 0;
+      *out = fb->bstop;
+      fb->bstop = fb->bptr;
+    }
+  return len;
+}
+
+static inline uns
+xml_flush_chars(struct xml_context *ctx)
+{
+  char *text, *rtext;
+  uns len = xml_end_chars(ctx, &text), rlen;
+  if (len)
+    {
+      if (ctx->flags & XML_NO_CHARS)
+        {
+          if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_ignorable)
+            ctx->h_ignorable(ctx, text, len);
+         mp_restore(ctx->pool, &ctx->chars_state);
+         return 0;
+       }
+      if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext)))
+       ctx->h_block(ctx, rtext, rlen);
+      if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS))
+        {
+         mp_restore(ctx->pool, &ctx->chars_state);
+         return 0;
+       }
+      struct xml_node *n = xml_push_dom(ctx, &ctx->chars_state);
+      n->type = XML_NODE_CHARS;
+      n->text = text;
+      n->len = len;
+      if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_chars)
+        ctx->h_chars(ctx);
+    }
+  return len;
+}
+
+static inline void
+xml_pop_chars(struct xml_context *ctx)
+{
+  xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS));
+  TRACE(ctx, "pop_chars");
+}
+
+static inline void
+xml_append_chars(struct xml_context *ctx)
+{
+  TRACE(ctx, "append_chars");
+  struct fastbuf *out = &ctx->chars;
+  if (ctx->flags & XML_NO_CHARS)
+    while (xml_get_char(ctx) != '<')
+      if (xml_last_cat(ctx) & XML_CHAR_WHITE)
+       bput_utf8_32(out, xml_last_char(ctx));
+      else
+        {
+         xml_error(ctx, "This element must not contain character data");
+         while (xml_get_char(ctx) != '<');
+         break;
+       }
+  else
+    while (xml_get_char(ctx) != '<')
+      if (xml_last_char(ctx) == '&')
+        {
+         xml_inc(ctx);
+          xml_parse_ref(ctx);
+        }
+      else
+        bput_utf8_32(out, xml_last_char(ctx));
+  xml_unget_char(ctx);
+}
+
+/*** CDATA sections ***/
+
+static void
+xml_skip_cdata(struct xml_context *ctx)
+{
+  TRACE(ctx, "skip_cdata");
+  xml_parse_seq(ctx, "CDATA[");
+  while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
+  xml_dec(ctx);
+}
+
+static void
+xml_append_cdata(struct xml_context *ctx)
+{
+  /* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
+   * Already parsed: '<![' */
+  TRACE(ctx, "append_cdata");
+  if (ctx->flags & XML_NO_CHARS)
+    {
+      xml_error(ctx, "This element must not contain CDATA");
+      xml_skip_cdata(ctx);
+      return;
+    }
+  xml_parse_seq(ctx, "CDATA[");
+  struct fastbuf *out = &ctx->chars;
+  uns rlen;
+  char *rtext;
+  if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext)))
+    ctx->h_block(ctx, rtext, rlen);
+  while (1)
+    {
+      if (xml_get_char(ctx) == ']')
+        {
+          if (xml_get_char(ctx) == ']')
+           if (xml_get_char(ctx) == '>')
+             break;
+           else
+             bputc(out, ']');
+         bputc(out, ']');
+       }
+      bput_utf8_32(out, xml_last_char(ctx));
+    }
+  if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_cdata && (rlen = xml_report_chars(ctx, &rtext)))
+    ctx->h_cdata(ctx, rtext, rlen);
+  xml_dec(ctx);
+}
+
+/*** Attribute values ***/
+
+char *
+xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED)
+{
+  TRACE(ctx, "parse_attr_value");
+  /* AttValue ::= '"' ([^<&"] | Reference)* '"'        | "'" ([^<&'] | Reference)* "'" */
+  /* FIXME: -- check value constrains / normalize leading/trailing WS and repeated WS */
+  struct mempool_state state;
+  uns quote = xml_parse_quote(ctx);
+  mp_save(ctx->stack, &state);
+  struct fastbuf *out = &ctx->chars;
+  struct xml_source *src = ctx->src;
+  while (1)
+    {
+      uns c = xml_get_char(ctx);
+      if (c == '&')
+        {
+         xml_inc(ctx);
+         xml_parse_ref(ctx);
+       }
+      else if (c == quote && src == ctx->src)
+       break;
+      else if (c == '<')
+       xml_error(ctx, "Attribute value must not contain '<'");
+      else if (xml_last_cat(ctx) & XML_CHAR_WHITE)
+       bputc(out, ' ');
+      else
+       bput_utf8_32(out, c);
+    }
+  mp_restore(ctx->stack, &state);
+  char *text;
+  return xml_end_chars(ctx, &text) ? text : "";
+}
+
+uns
+xml_normalize_white(struct xml_context *ctx UNUSED, char *text)
+{
+  char *s = text, *d = text;
+  while (*s == 0x20)
+    s++;
+  while (1)
+    {
+      while (*s & ~0x20)
+       *d++ = *s++;
+      if (!*s)
+       break;
+      while (*++s == 0x20);
+      *d++ = 0x20;
+    }
+  if (d != text && d[-1] == 0x20)
+    d--;
+  *d = 0;
+  return d - text;
+}
+
+/*** Attributes ***/
+
+struct xml_attrs_table;
+
+static inline uns
+xml_attrs_hash(struct xml_attrs_table *t UNUSED, struct xml_node *e, char *n)
+{
+  return hash_pointer(e) ^ hash_string(n);
+}
+
+static inline int
+xml_attrs_eq(struct xml_attrs_table *t UNUSED, struct xml_node *e1, char *n1, struct xml_node *e2, char *n2)
+{
+  return (e1 == e2) && !strcmp(n1, n2);
+}
+
+static inline void
+xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct xml_node *e, char *name)
+{
+  a->elem = e;
+  a->name = name;
+  a->val = NULL;
+  a->user = NULL;
+  slist_add_tail(&e->attrs, &a->n);
+}
+
+#define HASH_PREFIX(x) xml_attrs_##x
+#define HASH_NODE struct xml_attr
+#define HASH_KEY_COMPLEX(x) x elem, x name
+#define HASH_KEY_DECL struct xml_node *elem, char *name
+#define HASH_TABLE_DYNAMIC
+#define HASH_GIVE_EQ
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_INIT_KEY
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_REMOVE
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_FIND
+#define HASH_GIVE_ALLOC
+XML_HASH_GIVE_ALLOC
+#include "ucw/hashtable.h"
+
+static void
+xml_parse_attr(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_attr");
+  /* Attribute ::= Name Eq AttValue */
+  struct xml_node *e = ctx->node;
+  char *n = xml_parse_name(ctx, ctx->pool);
+  struct xml_attr *a = xml_attrs_lookup(ctx->tab_attrs, e, n);
+  xml_parse_eq(ctx);
+  char *v = xml_parse_attr_value(ctx, NULL);
+  if (a->val)
+    {
+      xml_error(ctx, "Attribute %s is not unique in element <%s>", n, e->name);
+      return;
+    }
+  a->val = v;
+  if (!e->dtd)
+    a->dtd = NULL;
+  else if (!(a->dtd = xml_dtd_find_attr(ctx, e->dtd, a->name)))
+    xml_error(ctx, "Undefined attribute %s in element <%s>", n, e->name);
+  else
+    xml_validate_attr(ctx, a->dtd, a->val);
+}
+
+struct xml_attr *
+xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name)
+{
+  return xml_attrs_find(ctx->tab_attrs, node, name);
+}
+
+char *
+xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name)
+{
+  struct xml_attr *attr = xml_attrs_find(ctx->tab_attrs, node, name);
+  if (attr)
+    return attr->val;
+  if (!node->dtd)
+    return NULL;
+  struct xml_dtd_attr *dtd = xml_dtd_find_attr(ctx, node->dtd, name);
+  return dtd ? dtd->default_value : NULL;
+}
+
+void
+xml_attrs_table_init(struct xml_context *ctx)
+{
+  xml_attrs_init(ctx->tab_attrs = xml_hash_new(ctx->pool, sizeof(struct xml_attrs_table)));
+}
+
+void
+xml_attrs_table_cleanup(struct xml_context *ctx)
+{
+  xml_attrs_cleanup(ctx->tab_attrs);
+}
+
+/*** Elements ***/
+
+static uns
+xml_validate_element(struct xml_dtd_elem_node *root, struct xml_dtd_elem *elem)
+{
+  if (root->elem)
+    return elem == root->elem;
+  else
+    SLIST_FOR_EACH(struct xml_dtd_elem_node *, son, root->sons)
+      if (xml_validate_element(son, elem))
+       return 1;
+  return 0;
+}
+
+static void
+xml_push_element(struct xml_context *ctx)
+{
+  TRACE(ctx, "push_element");
+  /* EmptyElemTag | STag
+   * EmptyElemTag ::= '<' Name (S  Attribute)* S? '/>'
+   * STag ::= '<' Name (S  Attribute)* S? '>'
+   * Already parsed: '<' */
+  struct xml_node *e = xml_push_dom(ctx, NULL);
+  clist_init(&e->sons);
+  e->type = XML_NODE_ELEM;
+  e->name = xml_parse_name(ctx, ctx->pool);
+  slist_init(&e->attrs);
+  if (!e->parent)
+    {
+      ctx->dom = e;
+      if (ctx->doctype && strcmp(e->name, ctx->doctype))
+       xml_error(ctx, "The root element <%s> does not match the document type <%s>", e->name, ctx->doctype);
+    }
+  if (!ctx->dtd)
+    e->dtd = NULL;
+  else if (!(e->dtd = xml_dtd_find_elem(ctx, e->name)))
+    xml_error(ctx, "Undefined element <%s>", e->name);
+  else
+    {
+      struct xml_dtd_elem *dtd = e->dtd, *parent_dtd = e->parent ? e->parent->dtd : NULL;
+      if (dtd->type == XML_DTD_ELEM_MIXED)
+        ctx->flags &= ~XML_NO_CHARS;
+      else
+       ctx->flags |= XML_NO_CHARS;
+      if (parent_dtd)
+        if (parent_dtd->type == XML_DTD_ELEM_EMPTY)
+         xml_error(ctx, "Empty element must not contain children");
+        else if (parent_dtd->type != XML_DTD_ELEM_ANY)
+         {
+           // FIXME: validate regular expressions
+           if (!xml_validate_element(parent_dtd->node, dtd))
+             xml_error(ctx, "Unexpected element <%s>", e->name);
+         }
+    }
+  while (1)
+    {
+      uns white = xml_parse_white(ctx, 0);
+      uns c = xml_get_char(ctx);
+      if (c == '/')
+        {
+         xml_parse_char(ctx, '>');
+         ctx->flags |= XML_EMPTY_ELEM_TAG;
+         break;
+       }
+      else if (c == '>')
+       break;
+      else if (!white)
+       xml_fatal_expected_white(ctx);
+      xml_unget_char(ctx);
+      xml_parse_attr(ctx);
+    }
+  if (e->dtd)
+    SLIST_FOR_EACH(struct xml_dtd_attr *, a, e->dtd->attrs)
+      if (a->default_mode == XML_ATTR_REQUIRED)
+        {
+         if (!xml_attrs_find(ctx->tab_attrs, e, a->name))
+           xml_error(ctx, "Missing required attribute %s in element <%s>", a->name, e->name);
+       }
+      else if (a->default_mode != XML_ATTR_IMPLIED && ctx->flags & XML_ALLOC_DEFAULT_ATTRS)
+        {
+         struct xml_attr *attr = xml_attrs_lookup(ctx->tab_attrs, e, a->name);
+         if (!attr->val)
+           attr->val = a->default_value;
+       }
+  if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag)
+    ctx->h_stag(ctx);
+}
+
+static void
+xml_pop_element(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop_element");
+  if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag)
+    ctx->h_etag(ctx);
+  struct xml_node *e = ctx->node;
+  uns free = !(ctx->flags & XML_ALLOC_TAGS);
+  if (free)
+    {
+      if (!e->parent)
+       ctx->dom = NULL;
+      /* Restore hash table of attributes */
+      SLIST_FOR_EACH(struct xml_attr *, a, e->attrs)
+       xml_attrs_remove(ctx->tab_attrs, a);
+      struct xml_node *n;
+      while (n = clist_head(&e->sons))
+        {
+         if (n->type == XML_NODE_ELEM)
+           {
+             SLIST_FOR_EACH(struct xml_attr *, a, n->attrs)
+               xml_attrs_remove(ctx->tab_attrs, a);
+             clist_insert_list_after(&n->sons, &n->n);
+           }
+         clist_remove(&n->n);
+       }
+    }
+  xml_pop_dom(ctx, free);
+  xml_dec(ctx);
+}
+
+static void
+xml_parse_etag(struct xml_context *ctx)
+{
+ /* ETag ::= '</' Name S? '>'
+  * Already parsed: '<' */
+  struct xml_node *e = ctx->node;
+  ASSERT(e);
+  char *n = e->name;
+  while (*n)
+    {
+      uns c;
+      n = utf8_32_get(n, &c);
+      if (xml_get_char(ctx) != c)
+       goto recover;
+    }
+  xml_parse_white(ctx, 0);
+  if (xml_get_char(ctx) != '>')
+    {
+recover:
+      xml_error(ctx, "Invalid ETag, expected </%s>", e->name);
+      while (xml_get_char(ctx) != '>');
+    }
+  xml_dec(ctx);
+}
+
+/*** Document type declaration ***/
+
+static void
+xml_parse_doctype_decl(struct xml_context *ctx)
+{
+  TRACE(ctx, "parse_doctype_decl");
+  /* doctypedecl ::= '<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'
+   * Already parsed: '<!'
+   * Terminated before '[' or '>' */
+  if (ctx->doctype)
+    xml_fatal(ctx, "Multiple document types not allowed");
+  xml_parse_seq(ctx, "DOCTYPE");
+  xml_parse_white(ctx, 1);
+  ctx->doctype = xml_parse_name(ctx, ctx->pool);
+  TRACE(ctx, "doctype=%s", ctx->doctype);
+  uns c;
+  if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P'))
+    {
+      if (c == 'S')
+        {
+         xml_parse_seq(ctx, "SYSTEM");
+         xml_parse_white(ctx, 1);
+         ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
+       }
+      else
+        {
+         xml_parse_seq(ctx, "PUBLIC");
+         xml_parse_white(ctx, 1);
+         ctx->public_id = xml_parse_pubid_literal(ctx, ctx->pool);
+         xml_parse_white(ctx, 1);
+         ctx->system_id = xml_parse_system_literal(ctx, ctx->pool);
+       }
+      xml_parse_white(ctx, 0);
+      ctx->flags |= XML_HAS_EXTERNAL_SUBSET;
+    }
+  if (xml_peek_char(ctx) == '[')
+    {
+      ctx->flags |= XML_HAS_INTERNAL_SUBSET;
+      xml_skip_char(ctx);
+      xml_inc(ctx);
+    }
+  if (ctx->h_doctype_decl)
+    ctx->h_doctype_decl(ctx);
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/* DTD: Internal subset */
+
+static void
+xml_parse_subset(struct xml_context *ctx, uns external)
+{
+  // FIXME:
+  // -- comments/pi have no parent
+  // -- conditional sections in external subset
+  // -- check corectness of parameter entities
+
+  /* '[' intSubset ']'
+   * intSubset :== (markupdecl | DeclSep)
+   * Already parsed: '['
+   *
+   * extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
+   */
+  while (1)
+    {
+      xml_parse_white(ctx, 0);
+      uns c = xml_get_char(ctx);
+      xml_inc(ctx);
+      if (c == '<')
+       if ((c = xml_get_char(ctx)) == '!')
+         switch (c = xml_get_char(ctx))
+           {
+             case '-':
+               xml_push_comment(ctx);
+               xml_pop_comment(ctx);
+               break;
+             case 'N':
+               xml_parse_seq(ctx, "OTATION");
+               xml_parse_notation_decl(ctx);
+               break;
+             case 'E':
+               if ((c = xml_get_char(ctx)) == 'N')
+                 {
+                   xml_parse_seq(ctx, "TITY");
+                   xml_parse_entity_decl(ctx);
+                 }
+               else if (c == 'L')
+                 {
+                   xml_parse_seq(ctx, "EMENT");
+                   xml_parse_element_decl(ctx);
+                 }
+               else
+                 goto invalid_markup;
+               break;
+             case 'A':
+               xml_parse_seq(ctx, "TTLIST");
+               xml_parse_attr_list_decl(ctx);
+               break;
+             default:
+               goto invalid_markup;
+           }
+        else if (c == '?')
+         {
+           xml_push_pi(ctx);
+           xml_pop_pi(ctx);
+         }
+        else
+         goto invalid_markup;
+      else if (c == '%')
+       xml_parse_pe_ref(ctx);
+      else if (c == ']' && !external)
+        {
+         break;
+       }
+      else if (c == '>' && external)
+        {
+         break;
+       }
+      else
+       goto invalid_markup;
+    }
+  xml_dec(ctx);
+  return;
+invalid_markup: ;
+  xml_fatal(ctx, "Invalid markup in the %s subset", external ? "external" : "internal");
+}
+
+/*** The State Machine ***/
+
+uns
+xml_next(struct xml_context *ctx)
+{
+  /* A nasty state machine */
+
+#define PULL(x) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##x; case XML_STATE_##x: ; } while (0)
+#define PULL_STATE(x, s) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##s, XML_STATE_##x; case XML_STATE_##s: ; } while (0)
+
+  TRACE(ctx, "xml_next (state=%u)", ctx->state);
+  jmp_buf throw_buf;
+  ctx->throw_buf = &throw_buf;
+  if (setjmp(throw_buf))
+    {
+error:
+      if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal)
+       ctx->h_fatal(ctx);
+      TRACE(ctx, "raised fatal error");
+      return ctx->state = XML_STATE_EOF;
+    }
+  uns c;
+  switch (ctx->state)
+    {
+      case XML_STATE_START:
+       TRACE(ctx, "entering prolog");
+       ctx->flags |= XML_SRC_DOCUMENT | XML_SRC_EXPECTED_DECL;
+       if (ctx->h_document_start)
+         ctx->h_document_start(ctx);
+       /* XMLDecl */
+       xml_refill(ctx);
+       if (ctx->h_xml_decl)
+         ctx->h_xml_decl(ctx);
+       PULL(XML_DECL);
+
+       /* Misc* (doctypedecl Misc*)? */
+        while (1)
+         {
+           xml_parse_white(ctx, 0);
+           xml_parse_char(ctx, '<');
+           xml_inc(ctx);
+           if ((c = xml_get_char(ctx)) == '?')
+             /* Processing intruction */
+             if (!(ctx->flags & XML_REPORT_PIS))
+               xml_skip_pi(ctx);
+             else
+               {
+                 xml_push_pi(ctx);
+                 PULL_STATE(PI, PROLOG_PI);
+                 xml_pop_pi(ctx);
+               }
+           else if (c != '!')
+             {
+               /* Found the root tag */
+               xml_unget_char(ctx);
+               goto first_tag;
+             }
+           else if (xml_get_char(ctx) == '-')
+             if (!(ctx->flags & XML_REPORT_COMMENTS))
+               xml_skip_comment(ctx);
+             else
+               {
+                 xml_push_comment(ctx);
+                 PULL_STATE(COMMENT, PROLOG_COMMENT);
+                 xml_pop_comment(ctx);
+               }
+           else
+             {
+               /* DocTypeDecl */
+               xml_unget_char(ctx);
+               xml_parse_doctype_decl(ctx);
+               PULL(DOCTYPE_DECL);
+               if (ctx->flags & XML_HAS_DTD)
+                 if (ctx->flags & XML_PARSE_DTD)
+                   {
+                     xml_dtd_init(ctx);
+                     if (ctx->h_dtd_start)
+                       ctx->h_dtd_start(ctx);
+                     if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
+                       {
+                         xml_parse_subset(ctx, 0);
+                         xml_dec(ctx);
+                       }
+                     if (ctx->flags & XML_HAS_EXTERNAL_SUBSET)
+                       {
+                         struct xml_dtd_entity ent = {
+                           .system_id = ctx->system_id,
+                           .public_id = ctx->public_id,
+                         };
+                         xml_parse_white(ctx, 0);
+                         xml_parse_char(ctx, '>');
+                         xml_unget_char(ctx);
+                         ASSERT(ctx->h_resolve_entity);
+                         ctx->h_resolve_entity(ctx, &ent);
+                         ctx->flags |= XML_SRC_EXPECTED_DECL;
+                         xml_parse_subset(ctx, 1);
+                         xml_unget_char(ctx);;
+                       }
+                     if (ctx->h_dtd_end)
+                       ctx->h_dtd_end(ctx);
+                   }
+                 else if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
+                   xml_skip_internal_subset(ctx);
+               xml_parse_white(ctx, 0);
+               xml_parse_char(ctx, '>');
+               xml_dec(ctx);
+             }
+         }
+
+      case XML_STATE_CHARS:
+
+       while (1)
+         {
+           if (xml_peek_char(ctx) != '<')
+             {
+               /* CharData */
+               xml_append_chars(ctx);
+               continue;
+             }
+           else
+             xml_skip_char(ctx);
+           xml_inc(ctx);
+first_tag:
+
+           if ((c = xml_get_char(ctx)) == '?')
+             {
+               /* PI */
+               if (!(ctx->flags & (XML_REPORT_PIS | XML_ALLOC_PIS)))
+                 xml_skip_pi(ctx);
+               else
+                 {
+                   if (xml_flush_chars(ctx))
+                     {
+                       PULL_STATE(CHARS, CHARS_BEFORE_PI);
+                       xml_pop_chars(ctx);
+                     }
+                   xml_push_pi(ctx);
+                   PULL(PI);
+                   xml_pop_pi(ctx);
+                 }
+             }
+
+           else if (c == '!')
+             if ((c = xml_get_char(ctx)) == '-')
+               {
+                 /* Comment */
+                 if (!(ctx->flags & (XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS)))
+                   xml_skip_comment(ctx);
+                 else
+                   {
+                     if (xml_flush_chars(ctx))
+                       {
+                         PULL_STATE(CHARS, CHARS_BEFORE_COMMENT);
+                         xml_pop_chars(ctx);
+                       }
+                     xml_push_comment(ctx);
+                     PULL(COMMENT);
+                     xml_pop_comment(ctx);
+                   }
+               }
+             else if (c == '[')
+               {
+                 /* CDATA */
+                 xml_append_cdata(ctx);
+               }
+             else
+               xml_fatal(ctx, "Unexpected character after '<!'");
+
+           else if (c != '/')
+             {
+               /* STag | EmptyElemTag */
+               xml_unget_char(ctx);
+               if (xml_flush_chars(ctx))
+                 {
+                   PULL_STATE(CHARS, CHARS_BEFORE_STAG);
+                   xml_pop_chars(ctx);
+                 }
+
+               xml_push_element(ctx);
+               PULL(STAG);
+               if (ctx->flags & XML_EMPTY_ELEM_TAG)
+                 goto pop_element;
+             }
+
+           else
+             {
+               /* ETag */
+               if (xml_flush_chars(ctx))
+                 {
+                   PULL_STATE(CHARS, CHARS_BEFORE_ETAG);
+                   xml_pop_chars(ctx);
+                 }
+
+               xml_parse_etag(ctx);
+pop_element:
+               PULL(ETAG);
+               xml_pop_element(ctx);
+               if (!ctx->node)
+                 goto epilog;
+             }
+         }
+
+epilog:
+       /* Misc* */
+        TRACE(ctx, "entering epilog");
+       while (1)
+         {
+           /* Epilog whitespace is the only place, where a valid document can reach EOF */
+           if (setjmp(throw_buf))
+             if (ctx->err_code == XML_ERR_EOF)
+               {
+                 TRACE(ctx, "reached EOF");
+                 ctx->state = XML_STATE_EOF;
+                 if (ctx->h_document_end)
+                   ctx->h_document_end(ctx);
+      case XML_STATE_EOF:
+                 ctx->err_code = 0;
+                 ctx->err_msg = NULL;
+                 return XML_STATE_EOF;
+               }
+             else
+               goto error;
+           xml_parse_white(ctx, 0);
+           if (setjmp(throw_buf))
+             goto error;
+
+           /* Misc */
+           xml_parse_char(ctx, '<');
+           xml_inc(ctx);
+           if ((c = xml_get_char(ctx)) == '?')
+             /* Processing instruction */
+             if (!(ctx->flags & XML_REPORT_PIS))
+               xml_skip_pi(ctx);
+             else
+               {
+                 xml_push_pi(ctx);
+                 PULL_STATE(PI, EPILOG_PI);
+                 xml_pop_pi(ctx);
+               }
+           else if (c == '!')
+             {
+               xml_parse_char(ctx, '-');
+               /* Comment */
+               if (!(ctx->flags & XML_REPORT_COMMENTS))
+                 xml_skip_comment(ctx);
+               else
+                 {
+                   xml_push_comment(ctx);
+                   PULL_STATE(COMMENT, EPILOG_COMMENT);
+                   xml_pop_comment(ctx);
+                 }
+             }
+           else
+             xml_fatal(ctx, "Syntax error in the epilog");
+         }
+
+    }
+  ASSERT(0);
+}
+
+uns
+xml_next_state(struct xml_context *ctx, uns pull)
+{
+  uns saved = ctx->pull;
+  ctx->pull = pull;
+  uns res = xml_next(ctx);
+  ctx->pull = saved;
+  return res;
+}
+
+uns
+xml_skip_element(struct xml_context *ctx)
+{
+  ASSERT(ctx->state == XML_STATE_STAG);
+  struct xml_node *node = ctx->node;
+  uns saved = ctx->pull, res;
+  ctx->pull = XML_PULL_ETAG;
+  while ((res = xml_next(ctx)) && ctx->node != node);
+  ctx->pull = saved;
+  return res;
+}
+
+uns
+xml_parse(struct xml_context *ctx)
+{
+  /* This cycle should run only once unless the user overrides the value of ctx->pull in a SAX handler */
+  do
+    {
+      ctx->pull = 0;
+    }
+  while (xml_next(ctx));
+  return ctx->err_code;
+}
+
+char *
+xml_merge_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool)
+{
+  ASSERT(node->type == XML_NODE_ELEM);
+  char *p = mp_start_noalign(pool, 1);
+  XML_NODE_FOR_EACH(son, node)
+    if (son->type == XML_NODE_CHARS)
+      {
+       p = mp_spread(pool, p, son->len + 1);
+       memcpy(p, son->text, son->len);
+       p += son->len;
+      }
+  *p++ = 0;
+  return mp_end(pool, p);
+}
+
+static char *
+xml_append_dom_chars(char *p, struct mempool *pool, struct xml_node *node)
+{
+  XML_NODE_FOR_EACH(son, node)
+    if (son->type == XML_NODE_CHARS)
+      {
+       p = mp_spread(pool, p, son->len + 1);
+       memcpy(p, son->text, son->len);
+       p += son->len;
+      }
+    else if (son->type == XML_NODE_ELEM)
+      p = xml_append_dom_chars(p, pool, son);
+  return p;
+}
+
+char *
+xml_merge_dom_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool)
+{
+  ASSERT(node->type == XML_NODE_ELEM);
+  char *p = mp_start_noalign(pool, 1);
+  p = xml_append_dom_chars(p, pool, node);
+  *p++ = 0;
+  return mp_end(pool, p);
+}
diff --git a/shxml/source.c b/shxml/source.c

new file mode 100644 (file)

index 0000000..29226f0
--- /dev/null
+++ b/shxml/source.c
@@ -0,0 +1,486 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "sherlock/xml/internals.h"
+#include "ucw/unicode.h"
+#include "ucw/ff-unicode.h"
+#include "charset/charconv.h"
+#include "charset/fb-charconv.h"
+
+/*** Charecter categorization ***/
+
+#include "obj/sherlock/xml/unicat.c"
+
+static void
+xml_init_cats(struct xml_context *ctx)
+{
+  if (!(ctx->flags & XML_VERSION_1_1))
+    {
+      ctx->cat_chars = XML_CHAR_VALID_1_0;
+      ctx->cat_unrestricted = XML_CHAR_VALID_1_0;
+      ctx->cat_new_line = XML_CHAR_NEW_LINE_1_0;
+      ctx->cat_name = XML_CHAR_NAME_1_0;
+      ctx->cat_sname = XML_CHAR_SNAME_1_0;
+    }
+  else
+    {
+      ctx->cat_chars = XML_CHAR_VALID_1_1;
+      ctx->cat_unrestricted = XML_CHAR_UNRESTRICTED_1_1;
+      ctx->cat_new_line = XML_CHAR_NEW_LINE_1_1;
+      ctx->cat_name = XML_CHAR_NAME_1_1;
+      ctx->cat_sname = XML_CHAR_SNAME_1_1;
+    }
+}
+
+/*** Reading of document/external entities ***/
+
+static void NONRET
+xml_eof(struct xml_context *ctx)
+{
+  ctx->err_msg = "Unexpected EOF";
+  ctx->err_code = XML_ERR_EOF;
+  xml_throw(ctx);
+}
+
+void NONRET
+xml_fatal_nested(struct xml_context *ctx)
+{
+  xml_fatal(ctx, "Entity is not nested correctly");
+}
+
+static inline void
+xml_add_char(u32 **bstop, uns c)
+{
+  *(*bstop)++ = c;
+  *(*bstop)++ = xml_char_cat(c);
+}
+
+struct xml_source *
+xml_push_source(struct xml_context *ctx)
+{
+  xml_push(ctx);
+  struct xml_source *src = ctx->src;
+  if (src)
+    {
+      src->bptr = ctx->bptr;
+      src->bstop = ctx->bstop;
+    }
+  src = mp_alloc_zero(ctx->stack, sizeof(*src));
+  src->next = ctx->src;
+  src->saved_depth = ctx->depth;
+  ctx->src = src;
+  ctx->flags &= ~(XML_SRC_EOF | XML_SRC_EXPECTED_DECL | XML_SRC_DOCUMENT);
+  ctx->bstop = ctx->bptr = src->buf;
+  ctx->depth = 0;
+  return src;
+}
+
+struct xml_source *
+xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb)
+{
+  struct xml_source *src = xml_push_source(ctx);
+  src->fb = fb;
+  return src;
+}
+
+static void
+xml_close_source(struct xml_source *src)
+{
+  bclose(src->fb);
+  if (src->wrapped_fb)
+    bclose(src->wrapped_fb);
+}
+
+static void
+xml_pop_source(struct xml_context *ctx)
+{
+  TRACE(ctx, "pop_source");
+  if (unlikely(ctx->depth != 0))
+    xml_fatal(ctx, "Unexpected end of entity");
+  struct xml_source *src = ctx->src;
+  if (!src)
+    xml_fatal(ctx, "Undefined source");
+  xml_close_source(src);
+  ctx->depth = src->saved_depth;
+  ctx->src = src = src->next;
+  if (src)
+    {
+      ctx->bptr = src->bptr;
+      ctx->bstop = src->bstop;
+    }
+  xml_pop(ctx);
+  if (unlikely(!src))
+    xml_eof(ctx);
+}
+
+void
+xml_sources_cleanup(struct xml_context *ctx)
+{
+  struct xml_source *s;
+  while (s = ctx->src)
+    {
+      ctx->src = s->next;
+      xml_close_source(s);
+    }
+}
+
+static void xml_refill_utf8(struct xml_context *ctx);
+
+void
+xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent UNUSED)
+{
+  xml_error(ctx, "References to external entities are not supported");
+}
+
+void
+xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent)
+{
+  TRACE(ctx, "xml_push_entity");
+  struct xml_source *src;
+  if (ent->flags & XML_DTD_ENTITY_EXTERNAL)
+    {
+      ASSERT(ctx->h_resolve_entity);
+      ctx->h_resolve_entity(ctx, ent);
+      ctx->flags |= XML_SRC_EXPECTED_DECL;
+      src = ctx->src;
+    }
+  else
+    {
+      src = xml_push_source(ctx);
+      fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, strlen(ent->text), 0);
+    }
+  src->refill = xml_refill_utf8;
+  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line;
+  src->refill_cat2 = ctx->cat_new_line;
+}
+
+static uns
+xml_error_restricted(struct xml_context *ctx, uns c)
+{
+  if (c == ~1U)
+    xml_error(ctx, "Corrupted encoding");
+  else
+    xml_error(ctx, "Restricted char U+%04X", c);
+  return UNI_REPLACEMENT;
+}
+
+void xml_parse_decl(struct xml_context *ctx);
+
+#define REFILL(ctx, func, params...)                                                   \
+  struct xml_source *src = ctx->src;                                                   \
+  struct fastbuf *fb = src->fb;                                                                \
+  if (ctx->bptr == ctx->bstop)                                                         \
+    ctx->bptr = ctx->bstop = src->buf;                                                 \
+  uns c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row;         \
+  u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop,                    \
+      *last_0xd = src->pending_0xd ? bstop : NULL;                                     \
+  do                                                                                   \
+    {                                                                                  \
+      c = func(fb, ##params);                                                          \
+      uns t = xml_char_cat(c);                                                         \
+      if (t & t1)                                                                      \
+        /* Typical branch */                                                           \
+       *bstop++ = c, *bstop++ = t;                                                     \
+      else if (t & t2)                                                                 \
+        {                                                                              \
+         /* New line */                                                                \
+         /* XML 1.0: 0xA | 0xD | 0xD 0xA */                                            \
+         /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */                 \
+         if (c == 0xd)                                                                 \
+           last_0xd = bstop + 2;                                                       \
+         else if (c != 0x2028 && last_0xd == bstop)                                    \
+           {                                                                           \
+             last_0xd = NULL;                                                          \
+             continue;                                                                 \
+           }                                                                           \
+         xml_add_char(&bstop, 0xa), row++;                                             \
+       }                                                                               \
+      else if (c == '>')                                                               \
+        {                                                                              \
+         /* Used only in XML/TextDecl to switch the encoding */                        \
+         *bstop++ = c, *bstop++ = t;                                                   \
+         break;                                                                        \
+       }                                                                               \
+      else if (~c)                                                                     \
+        /* Restricted character */                                                     \
+        xml_add_char(&bstop, xml_error_restricted(ctx, c));                            \
+      else                                                                             \
+        {                                                                              \
+         /* EOF */                                                                     \
+          ctx->flags |= XML_SRC_EOF;                                                   \
+          break;                                                                       \
+       }                                                                               \
+    }                                                                                  \
+  while (bstop < bend);                                                                        \
+  src->pending_0xd = (last_0xd == bstop);                                              \
+  ctx->bstop = bstop;                                                                  \
+  src->row = row;
+
+static void
+xml_refill_utf8(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf8_repl, ~1U);
+}
+
+static void
+xml_refill_utf16_le(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf16_le_repl, ~1U);
+}
+
+static void
+xml_refill_utf16_be(struct xml_context *ctx)
+{
+  REFILL(ctx, bget_utf16_be_repl, ~1U);
+}
+
+#undef REFILL
+
+void
+xml_refill(struct xml_context *ctx)
+{
+  do
+    {
+      if (ctx->flags & XML_SRC_EOF)
+       xml_pop_source(ctx);
+      else if (ctx->flags & XML_SRC_EXPECTED_DECL)
+       xml_parse_decl(ctx);
+      else
+        {
+         ctx->src->refill(ctx);
+         TRACE(ctx, "refilled %u characters", (uns)((ctx->bstop - ctx->bptr) / 2));
+       }
+    }
+  while (ctx->bptr == ctx->bstop);
+}
+
+static uns
+xml_source_row(struct xml_context *ctx, struct xml_source *src)
+{
+  uns row = src->row;
+  for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2)
+    if (p[-1] & src->refill_cat2)
+      row--;
+  return row + 1;
+}
+
+uns
+xml_row(struct xml_context *ctx)
+{
+  return ctx->src ? xml_source_row(ctx, ctx->src) : 0;
+}
+
+/* Document/external entity header */
+
+static char *
+xml_parse_encoding_name(struct xml_context *ctx)
+{
+  /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */
+  char *p = mp_start_noalign(ctx->pool, 1);
+  uns q = xml_parse_quote(ctx);
+  if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME)))
+    xml_fatal(ctx, "Invalid character in the encoding name");
+  while (1)
+    {
+      p = mp_spread(ctx->pool, p, 2);
+      *p++ = xml_last_char(ctx);
+      if (xml_get_char(ctx) == q)
+       break;
+      if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME)))
+       xml_fatal(ctx, "Invalid character in the encoding name");
+    }
+  *p++ = 0;
+  return mp_end(ctx->pool, p);
+}
+
+static void
+xml_init_charconv(struct xml_context *ctx, int cs)
+{
+  // XXX: with a direct access to libcharset tables could be faster
+  struct xml_source *src = ctx->src;
+  TRACE(ctx, "wrapping charset %s", charset_name(cs));
+  src->wrapped_fb = src->fb;
+  src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8);
+}
+
+void
+xml_parse_decl(struct xml_context *ctx)
+{
+  TRACE(ctx, "xml_parse_decl");
+  struct xml_source *src = ctx->src;
+  ctx->flags &= ~XML_SRC_EXPECTED_DECL;
+  uns doc = ctx->flags & XML_SRC_DOCUMENT;
+
+  /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */
+  if (doc)
+    xml_init_cats(ctx);
+  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line & ~XML_CHAR_GT;
+  src->refill_cat2 = ctx->cat_new_line;
+
+  /* Initialize the supplied charset (if any) or try to guess it */
+  char *expected_encoding = src->expected_encoding;
+  src->refill = xml_refill_utf8;
+  int bom = bpeekc(src->fb);
+  if (bom < 0)
+    ctx->flags |= XML_SRC_EOF;
+  if (!src->fb_encoding)
+    {
+      if (bom == 0xfe)
+       src->refill = xml_refill_utf16_be;
+      else if (bom == 0xff)
+       src->refill = xml_refill_utf16_le;
+    }
+  else
+    {
+      int cs = find_charset_by_name(src->fb_encoding);
+      if (cs == CONV_CHARSET_UTF8)
+        {}
+      else if (cs >= 0)
+        {
+         xml_init_charconv(ctx, cs);
+         bom = 0;
+       }
+      else if (strcasecmp(src->fb_encoding, "UTF-16"))
+        {
+         src->refill = xml_refill_utf16_be;
+         if (bom == 0xff)
+           src->refill = xml_refill_utf16_le;
+       }
+      else if (strcasecmp(src->fb_encoding, "UTF-16BE"))
+       src->refill = xml_refill_utf16_be;
+      else if (strcasecmp(src->fb_encoding, "UTF-16LE"))
+       src->refill = xml_refill_utf16_le;
+      else
+        {
+         xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding);
+         expected_encoding = NULL;
+       }
+    }
+  uns utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be;
+  if (utf16)
+    src->fb_encoding = (src->refill == xml_refill_utf16_be) ? "UTF-16BE" : "UTF-16LE";
+  if (!expected_encoding)
+    expected_encoding = src->fb_encoding;
+  if (bom > 0 && xml_peek_char(ctx) == 0xfeff)
+    xml_skip_char(ctx);
+  else if (utf16)
+    xml_error(ctx, "Missing or corrupted BOM");
+  TRACE(ctx, "Initial encoding=%s", src->fb_encoding ? : "?");
+
+  /* Look ahead for presence of XMLDecl or optional TextDecl */
+  if (!(ctx->flags & XML_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf))
+    xml_refill(ctx);
+  u32 *bptr = ctx->bptr;
+  uns have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) &&
+    bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L');
+  if (!have_decl)
+    {
+      if (doc)
+        xml_fatal(ctx, "Missing or corrupted XML header");
+      else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16)
+       xml_error(ctx, "Missing or corrupted entity header");
+      goto exit;
+    }
+  ctx->bptr = bptr + 12;
+  xml_parse_white(ctx, 0);
+
+  /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */
+  if (xml_peek_char(ctx) == 'v')
+    {
+      xml_parse_seq(ctx, "version");
+      xml_parse_eq(ctx);
+      char *version = xml_parse_pubid_literal(ctx, ctx->pool);
+      TRACE(ctx, "version=%s", version);
+      uns v = 0;
+      if (!strcmp(version, "1.1"))
+       v = XML_VERSION_1_1;
+      else if (strcmp(version, "1.0"))
+        {
+         xml_error(ctx, "Unknown XML version string '%s'", version);
+         version = "1.0";
+       }
+      if (doc)
+        {
+         ctx->version_str = version;
+         ctx->flags |= v;
+       }
+      else if (v > (ctx->flags & XML_VERSION_1_1))
+        xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document");
+      if (!xml_parse_white(ctx, !doc))
+        goto end;
+    }
+  else if (doc)
+    {
+      xml_error(ctx, "Expected XML version");
+      ctx->version_str = "1.0";
+    }
+
+  /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */
+  if (xml_peek_char(ctx) == 'e')
+    {
+      xml_parse_seq(ctx, "encoding");
+      xml_parse_eq(ctx);
+      src->decl_encoding = xml_parse_encoding_name(ctx);
+      TRACE(ctx, "encoding=%s", src->decl_encoding);
+      if (!xml_parse_white(ctx, 0))
+       goto end;
+    }
+  else if (!doc)
+    xml_error(ctx, "Expected XML encoding");
+
+  /* Parse whether the document is standalone (optional in XMLDecl) */
+  if (doc && xml_peek_char(ctx) == 's')
+    {
+      xml_parse_seq(ctx, "standalone");
+      xml_parse_eq(ctx);
+      uns c = xml_parse_quote(ctx);
+      if (ctx->standalone = (xml_peek_char(ctx) == 'y'))
+       xml_parse_seq(ctx, "yes");
+      else
+        xml_parse_seq(ctx, "no");
+      xml_parse_char(ctx, c);
+      TRACE(ctx, "standalone=%d", ctx->standalone);
+      xml_parse_white(ctx, 0);
+    }
+end:
+  xml_parse_seq(ctx, "?>");
+
+  /* Switch to the final encoding */
+  if (src->decl_encoding)
+    {
+      int cs = find_charset_by_name(src->decl_encoding);
+      if (cs < 0 && !expected_encoding)
+       xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding);
+      else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8)
+        {
+         xml_init_charconv(ctx, cs);
+         src->fb_encoding = src->decl_encoding;
+       }
+      else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 ||
+       !(!strcasecmp(src->decl_encoding, "UTF-16") ||
+        (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) ||
+        (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE")))))
+       xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding);
+    }
+  if (!src->fb_encoding)
+    src->fb_encoding = "UTF-8";
+  TRACE(ctx, "Final encoding=%s", src->fb_encoding);
+
+exit:
+  /* Update valid Unicode ranges */
+  if (doc)
+    xml_init_cats(ctx);
+  src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line;
+  src->refill_cat2 = ctx->cat_new_line;
+}
diff --git a/shxml/unicat.pl b/shxml/unicat.pl

new file mode 100755 (executable)

index 0000000..b86106f
--- /dev/null
+++ b/shxml/unicat.pl
@@ -0,0 +1,165 @@
+#!/usr/bin/perl
+#
+#      UCW Library -- Character map for the XML parser
+#
+#      (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+#      This software may be freely distributed and used according to the terms
+#      of the GNU Lesser General Public License.
+#
+
+my @cat = ();
+my @lcat = ();
+my %ids = ();
+my %cls = ();
+for (my $i = 0; $i < 0x10000; $i++) { $cat[$i] = 0; }
+for (my $i = 0; $i < 0x11; $i++) { $lcat[$i] = 0; }
+
+my @white = (0x9, 0xA, 0xD, 0x20);
+my @base_char_1_0 = (
+  [0x0041,0x005A], [0x0061,0x007A], [0x00C0,0x00D6], [0x00D8,0x00F6], [0x00F8,0x00FF], [0x0100,0x0131],
+  [0x0134,0x013E], [0x0141,0x0148], [0x014A,0x017E], [0x0180,0x01C3], [0x01CD,0x01F0], [0x01F4,0x01F5],
+  [0x01FA,0x0217], [0x0250,0x02A8], [0x02BB,0x02C1], 0x0386, [0x0388,0x038A], 0x038C, [0x038E,0x03A1],
+  [0x03A3,0x03CE], [0x03D0,0x03D6], 0x03DA, 0x03DC, 0x03DE, 0x03E0, [0x03E2,0x03F3], [0x0401,0x040C],
+  [0x040E,0x044F], [0x0451,0x045C], [0x045E,0x0481], [0x0490,0x04C4], [0x04C7,0x04C8], [0x04CB,0x04CC],
+  [0x04D0,0x04EB], [0x04EE,0x04F5], [0x04F8,0x04F9], [0x0531,0x0556], 0x0559, [0x0561,0x0586], [0x05D0,0x05EA],
+  [0x05F0,0x05F2], [0x0621,0x063A], [0x0641,0x064A], [0x0671,0x06B7], [0x06BA,0x06BE], [0x06C0,0x06CE],
+  [0x06D0,0x06D3], 0x06D5, [0x06E5,0x06E6], [0x0905,0x0939], 0x093D, [0x0958,0x0961], [0x0985,0x098C],
+  [0x098F,0x0990], [0x0993,0x09A8], [0x09AA,0x09B0], 0x09B2, [0x09B6,0x09B9], [0x09DC,0x09DD], [0x09DF,0x09E1],
+  [0x09F0,0x09F1], [0x0A05,0x0A0A], [0x0A0F,0x0A10], [0x0A13,0x0A28], [0x0A2A,0x0A30], [0x0A32,0x0A33],
+  [0x0A35,0x0A36], [0x0A38,0x0A39], [0x0A59,0x0A5C], 0x0A5E, [0x0A72,0x0A74], [0x0A85,0x0A8B], 0x0A8D,
+  [0x0A8F,0x0A91], [0x0A93,0x0AA8], [0x0AAA,0x0AB0], [0x0AB2,0x0AB3], [0x0AB5,0x0AB9], 0x0ABD, 0x0AE0,
+  [0x0B05,0x0B0C], [0x0B0F,0x0B10], [0x0B13,0x0B28], [0x0B2A,0x0B30], [0x0B32,0x0B33], [0x0B36,0x0B39],
+  0x0B3D, [0x0B5C,0x0B5D], [0x0B5F,0x0B61], [0x0B85,0x0B8A], [0x0B8E,0x0B90], [0x0B92,0x0B95], [0x0B99,0x0B9A],
+  0x0B9C, [0x0B9E,0x0B9F], [0x0BA3,0x0BA4], [0x0BA8,0x0BAA], [0x0BAE,0x0BB5], [0x0BB7,0x0BB9], [0x0C05,0x0C0C],
+  [0x0C0E,0x0C10], [0x0C12,0x0C28], [0x0C2A,0x0C33], [0x0C35,0x0C39], [0x0C60,0x0C61], [0x0C85,0x0C8C],
+  [0x0C8E,0x0C90], [0x0C92,0x0CA8], [0x0CAA,0x0CB3], [0x0CB5,0x0CB9], 0x0CDE, [0x0CE0,0x0CE1], [0x0D05,0x0D0C],
+  [0x0D0E,0x0D10], [0x0D12,0x0D28], [0x0D2A,0x0D39], [0x0D60,0x0D61], [0x0E01,0x0E2E], 0x0E30, [0x0E32,0x0E33],
+  [0x0E40,0x0E45], [0x0E81,0x0E82], 0x0E84, [0x0E87,0x0E88], 0x0E8A, 0x0E8D, [0x0E94,0x0E97], [0x0E99,0x0E9F],
+  [0x0EA1,0x0EA3], 0x0EA5, 0x0EA7, [0x0EAA,0x0EAB], [0x0EAD,0x0EAE], 0x0EB0, [0x0EB2,0x0EB3], 0x0EBD,
+  [0x0EC0,0x0EC4], [0x0F40,0x0F47], [0x0F49,0x0F69], [0x10A0,0x10C5], [0x10D0,0x10F6], 0x1100, [0x1102,0x1103],
+  [0x1105,0x1107], 0x1109, [0x110B,0x110C], [0x110E,0x1112], 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 0x1150,
+  [0x1154,0x1155], 0x1159, [0x115F,0x1161], 0x1163, 0x1165, 0x1167, 0x1169, [0x116D,0x116E], [0x1172,0x1173],
+  0x1175, 0x119E, 0x11A8, 0x11AB, [0x11AE,0x11AF], [0x11B7,0x11B8], 0x11BA, [0x11BC,0x11C2], 0x11EB, 0x11F0,
+  0x11F9, [0x1E00,0x1E9B], [0x1EA0,0x1EF9], [0x1F00,0x1F15], [0x1F18,0x1F1D], [0x1F20,0x1F45], [0x1F48,0x1F4D],
+  [0x1F50,0x1F57], 0x1F59, 0x1F5B, 0x1F5D, [0x1F5F,0x1F7D], [0x1F80,0x1FB4], [0x1FB6,0x1FBC], 0x1FBE,
+  [0x1FC2,0x1FC4], [0x1FC6,0x1FCC], [0x1FD0,0x1FD3], [0x1FD6,0x1FDB], [0x1FE0,0x1FEC], [0x1FF2,0x1FF4],
+  [0x1FF6,0x1FFC], 0x2126, [0x212A,0x212B], 0x212E, [0x2180,0x2182], [0x3041,0x3094], [0x30A1,0x30FA],
+  [0x3105,0x312C], [0xAC00,0xD7A3]);
+my @ideographic_1_0 = ([0x4E00,0x9FA5], 0x3007, [0x3021,0x3029]);
+my @combining_char_1_0 = (
+  [0x0300,0x0345], [0x0360,0x0361], [0x0483,0x0486], [0x0591,0x05A1], [0x05A3,0x05B9], [0x05BB,0x05BD],
+  0x05BF, [0x05C1,0x05C2], 0x05C4, [0x064B,0x0652], 0x0670, [0x06D6,0x06DC], [0x06DD,0x06DF], [0x06E0,0x06E4],
+  [0x06E7,0x06E8], [0x06EA,0x06ED], [0x0901,0x0903], 0x093C, [0x093E,0x094C], 0x094D, [0x0951,0x0954],
+  [0x0962,0x0963], [0x0981,0x0983], 0x09BC, 0x09BE, 0x09BF, [0x09C0,0x09C4], [0x09C7,0x09C8], [0x09CB,0x09CD],
+  0x09D7, [0x09E2,0x09E3], 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, [0x0A40,0x0A42], [0x0A47,0x0A48], [0x0A4B,0x0A4D],
+  [0x0A70,0x0A71], [0x0A81,0x0A83], 0x0ABC, [0x0ABE,0x0AC5], [0x0AC7,0x0AC9], [0x0ACB,0x0ACD], [0x0B01,0x0B03],
+  0x0B3C, [0x0B3E,0x0B43], [0x0B47,0x0B48], [0x0B4B,0x0B4D], [0x0B56,0x0B57], [0x0B82,0x0B83], [0x0BBE,0x0BC2],
+  [0x0BC6,0x0BC8], [0x0BCA,0x0BCD], 0x0BD7, [0x0C01,0x0C03], [0x0C3E,0x0C44], [0x0C46,0x0C48], [0x0C4A,0x0C4D],
+  [0x0C55,0x0C56], [0x0C82,0x0C83], [0x0CBE,0x0CC4], [0x0CC6,0x0CC8], [0x0CCA,0x0CCD], [0x0CD5,0x0CD6],
+  [0x0D02,0x0D03], [0x0D3E,0x0D43], [0x0D46,0x0D48], [0x0D4A,0x0D4D], 0x0D57, 0x0E31, [0x0E34,0x0E3A],
+  [0x0E47,0x0E4E], 0x0EB1, [0x0EB4,0x0EB9], [0x0EBB,0x0EBC], [0x0EC8,0x0ECD], [0x0F18,0x0F19], 0x0F35,
+  0x0F37, 0x0F39, 0x0F3E, 0x0F3F, [0x0F71,0x0F84], [0x0F86,0x0F8B], [0x0F90,0x0F95], 0x0F97, [0x0F99,0x0FAD],
+  [0x0FB1,0x0FB7], 0x0FB9, [0x20D0,0x20DC], 0x20E1, [0x302A,0x302F], 0x3099, 0x309A);
+my @digit_1_0 = (
+  [0x0030,0x0039], [0x0660,0x0669], [0x06F0,0x06F9], [0x0966,0x096F], [0x09E6,0x09EF], [0x0A66,0x0A6F],
+  [0x0AE6,0x0AEF], [0x0B66,0x0B6F], [0x0BE7,0x0BEF], [0x0C66,0x0C6F], [0x0CE6,0x0CEF], [0x0D66,0x0D6F],
+  [0x0E50,0x0E59], [0x0ED0,0x0ED9], [0x0F20,0x0F29]);
+my @extender_1_0 = (
+  0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, [0x3031,0x3035], [0x309D,0x309E], [0x30FC,0x30FE]);
+my @sname_1_1 = (
+  "[:A-Z_a-z]", [0xC0,0xD6], [0xD8,0xF6], [0xF8,0x2FF], [0x370,0x37D], [0x37F,0x1FFF],
+  [0x200C,0x200D], [0x2070,0x218F], [0x2C00,0x2FEF], [0x3001,0xD7FF], [0xF900,0xFDCF], [0xFDF0,0xFFFD], [0x10000,0xEFFFF]);
+
+set("WHITE", @white);
+set("NEW_LINE_1_0", 0xA, 0xD);
+set("NEW_LINE_1_1", 0xA, 0xD, 0x85, 0x2028);
+set("DIGIT", "[0-9]");
+set("XDIGIT", "[0-9a-fA-F]");
+set("VALID_1_0", @white, [0x20,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
+set("VALID_1_1", [0x1,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
+set("UNRESTRICTED_1_1", @white, [0x20,0x7E], 0x85, [0xA0,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]);
+set("PUBID", 0x20, 0xD, 0xA, "[-a-zA-Z0-9'()+,./:=?:!*#@\$_%]");
+set("ENC_SNAME", "[a-zA-Z]");
+set("ENC_NAME", "[-a-zA-Z0-9._]");
+set("SNAME_1_0", "[_:]", @base_char_1_0, @ideographic_1_0);
+set("NAME_1_0", "[-_:.]", @base_char_1_0, @ideographic_1_0, @combining_char_1_0, @digit_1_0, @extender_1_0);
+set("SNAME_1_1", @sname_1_1);
+set("NAME_1_1", @sname_1_1, "[-.0-9]", 0xB7, [0x0300,0x036F], [0x203F,0x2040]);
+set("GT", "[>]");
+
+($ARGV[0] eq "" || $ARGV[1] eq "") && die("Invalid usage");
+find_cls();
+open(H, ">", $ARGV[0]) or die("Cannot create $ARGV[0]");
+open(C, ">", $ARGV[1]) or die("Cannot create $ARGV[1]");
+gen_enum();
+gen_tabs();
+close(H);
+close(C);
+
+sub set {
+  my $id = shift;
+  $ids{$id} = scalar keys(%ids) if !defined($ids{$id});
+  my $mask = 1 << $ids{$id};
+  foreach my $i (@_) {
+    if (ref($i) eq "ARRAY") {
+      my $j = $i->[0];
+      for (; $j <= $i->[1] && $j < 0x10000; $j++) { $cat[$j] |= $mask; }
+      for (; $j <= $i->[1]; $j += 0x10000) { $lcat[$j >> 16] |= $mask; }
+    }
+    elsif ($i =~ /^\[/) { for (my $j=0; $j < 128; $j++) { if (chr($j) =~ /$i/) { $cat[$j] |= $mask; } } }
+    else { $cat[$i] |= $mask; }
+  }
+}
+
+sub find_cls {
+  foreach (my $i=0; $i<@cat; $i++) { $cls{$cat[$i]} = scalar keys(%cls) if !defined($cls{$cat[$i]}); }
+  foreach (my $i=0; $i<@lcat; $i++) { $cls{$lcat[$i]} = scalar keys(%cls) if !defined($cls{$lcat[$i]}); }
+}
+
+sub gen_enum {
+  print H "enum xml_char_type {\n";
+  foreach my $id (sort keys %ids) {
+    my $mask = 0;
+    foreach my $i (keys %cls) {
+      $mask |= 1 << $cls{$i} if $cls{$i} && ($i & (1 << $ids{$id}));
+    }
+    printf H "  XML_CHAR_%-20s = 0x%08x,\n", $id, $mask;
+  }
+  print H "};\n\n";
+}
+
+sub gen_tabs {
+  my @tab = ();
+  my %hash = ();
+
+  print H "extern const byte xml_char_tab1[];\n";
+  print H "extern const uns xml_char_tab2[];\n";
+  print H "extern const byte xml_char_tab3[];\n";
+
+  print C "const uns xml_char_tab2[] = {\n  ";
+  for (my $t=0; $t<256; $t++) {
+    my $i = $t * 256;
+    my @x = ();
+    for (my $j=0; $j<256; $j += 32) {
+      push @x, join(",", map($cls{$_}, @cat[$i+$j..$i+$j+31]));
+    }
+    my $sub = "  " . join(",\n  ", @x);
+    if (!defined($hash{$sub})) {
+      $hash{$sub} = 256 * scalar @tab;
+      push @tab, $sub;
+    }
+    printf C "0x%x", $hash{$sub};
+    print C ((~$t & 15) ? "," : ($t < 255) ? ",\n  " : "\n};\n\n");
+  }
+
+  print C "const byte xml_char_tab1[] = {\n";
+  print C join(",\n\n", @tab);
+  print C "\n};\n\n";
+
+  my @l = ();
+  for (my $i=0; $i<0x11; $i++) {
+    push @l, sprintf("%d", $cls{$lcat[$i]});
+  }
+  print C "const byte xml_char_tab3[] = {" . join(",", @l) . "};\n";
+}
diff --git a/shxml/xml-test.c b/shxml/xml-test.c

new file mode 100644 (file)

index 0000000..f6738c5
--- /dev/null
+++ b/shxml/xml-test.c
@@ -0,0 +1,365 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "sherlock/xml/xml.h"
+#include "sherlock/xml/dtd.h"
+#include "ucw/getopt.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+
+enum {
+  WANT_FIRST = 0x100,
+  WANT_HIDE_ERRORS,
+  WANT_IGNORE_COMMENTS,
+  WANT_IGNORE_PIS,
+  WANT_REPORT_BLOCKS,
+  WANT_REPORT_IGNORABLE,
+  WANT_FILE_ENTITIES,
+};
+
+static char *shortopts = "spdt" CF_SHORT_OPTS;
+static struct option longopts[] = {
+  CF_LONG_OPTS
+  { "sax",             0, 0, 's' },
+  { "pull",            0, 0, 'p' },
+  { "dom",             0, 0, 't' },
+  { "dtd",             0, 0, 'd' },
+  { "hide-errors",     0, 0, WANT_HIDE_ERRORS },
+  { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS },
+  { "ignore-pis",      0, 0, WANT_IGNORE_PIS },
+  { "report-blocks",   0, 0, WANT_REPORT_BLOCKS },
+  { "report-ignorable",        0, 0, WANT_REPORT_IGNORABLE },
+  { "file-entities",   0, 0, WANT_FILE_ENTITIES },
+  { NULL,              0, 0, 0 }
+};
+
+static void NONRET
+usage(void)
+{
+  fputs("\
+Usage: xml-test [options] < input.xml\n\
+\n\
+Options:\n"
+CF_USAGE
+"\
+-p, --pull              Test PULL interface\n\
+-s, --sax               Test SAX interface\n\
+-t, --dom               Test DOM interface\n\
+-d, --dtd               Enable parsing of DTD\n\
+    --hide-errors       Hide warnings and error messages\n\
+    --ignore-comments   Ignore comments\n\
+    --ignore-pis        Ignore processing instructions\n\
+    --report-blocks    Report blocks or characters and CDATA sections\n\
+    --report-ignorable  Report ignorable whitespace\n\
+    --file-entities     Resolve file external entities (not fully normative)\n\
+\n", stderr);
+  exit(1);
+}
+
+static uns want_sax;
+static uns want_pull;
+static uns want_dom;
+static uns want_parse_dtd;
+static uns want_hide_errors;
+static uns want_ignore_comments;
+static uns want_ignore_pis;
+static uns want_report_blocks;
+static uns want_report_ignorable;
+static uns want_file_entities;
+
+static struct fastbuf *out;
+
+static char *
+node_type(struct xml_node *node)
+{
+  switch (node->type)
+    {
+      case XML_NODE_ELEM: return "element";
+      case XML_NODE_COMMENT: return "comment";
+      case XML_NODE_PI: return "pi";
+      case XML_NODE_CHARS: return "chars";
+      default: return "unknown";
+    }
+}
+
+static void
+show_node(struct xml_node *node)
+{
+  switch (node->type)
+    {
+      case XML_NODE_ELEM:
+       bprintf(out, " <%s>", node->name);
+        XML_ATTR_FOR_EACH(a, node)
+          bprintf(out, " %s='%s'", a->name, a->val);
+       bputc(out, '\n');
+       break;
+      case XML_NODE_COMMENT:
+       bprintf(out, " text='%s'\n", node->text);
+       break;
+      case XML_NODE_PI:
+       bprintf(out, " target=%s text='%s'\n", node->name, node->text);
+       break;
+      case XML_NODE_CHARS:
+       bprintf(out, " text='%s'\n", node->text);
+       break;
+      default:
+        bputc(out, '\n');
+    }
+}
+
+static void
+show_tree(struct xml_node *node, uns level)
+{
+  if (!node)
+    return;
+  bputs(out, "DOM:  ");
+  for (uns i = 0; i < level; i++)
+    bputs(out, "    ");
+  bputs(out, node_type(node));
+  show_node(node);
+  if (node->type == XML_NODE_ELEM)
+    XML_NODE_FOR_EACH(son, node)
+      show_tree(son, level + 1);
+}
+
+static void
+h_error(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg);
+}
+
+static void
+h_document_start(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  document_start\n");
+}
+
+static void
+h_document_end(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  document_end\n");
+}
+
+static void
+h_xml_decl(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding);
+}
+
+static void
+h_doctype_decl(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n",
+    ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "",
+    !!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET));
+}
+
+static void
+h_comment(struct xml_context *ctx)
+{
+  bputs(out, "SAX:  comment");
+  show_node(ctx->node);
+}
+
+static void
+h_pi(struct xml_context *ctx)
+{
+  bputs(out, "SAX:  pi");
+  show_node(ctx->node);
+}
+
+static void
+h_stag(struct xml_context *ctx)
+{
+  bputs(out, "SAX:  stag");
+  show_node(ctx->node);
+}
+
+static void
+h_etag(struct xml_context *ctx)
+{
+  bprintf(out, "SAX:  etag </%s>\n", ctx->node->name);
+}
+
+static void
+h_chars(struct xml_context *ctx)
+{
+  bputs(out, "SAX:  chars");
+  show_node(ctx->node);
+}
+
+static void
+h_block(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
+{
+  bprintf(out, "SAX:  block text='%s'\n", text);
+}
+
+static void
+h_cdata(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
+{
+  bprintf(out, "SAX:  cdata text='%s'\n", text);
+}
+
+static void
+h_ignorable(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
+{
+  bprintf(out, "SAX:  ignorable text='%s'\n", text);
+}
+
+static void
+h_dtd_start(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  dtd_start\n");
+}
+
+static void
+h_dtd_end(struct xml_context *ctx UNUSED)
+{
+  bputs(out, "SAX:  dtd_end\n");
+}
+
+static void
+h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e)
+{
+  xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096));
+}
+
+int
+main(int argc, char **argv)
+{
+  int opt;
+  cf_def_file = NULL;
+  log_init(argv[0]);
+  while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
+    switch (opt)
+      {
+       case 's':
+         want_sax++;
+         break;
+       case 'p':
+         want_pull++;
+         break;
+       case 't':
+         want_dom++;
+         break;
+       case 'd':
+         want_parse_dtd++;
+         break;
+       case WANT_HIDE_ERRORS:
+         want_hide_errors++;
+         break;
+       case WANT_IGNORE_COMMENTS:
+         want_ignore_comments++;
+         break;
+       case WANT_IGNORE_PIS:
+         want_ignore_pis++;
+         break;
+       case WANT_REPORT_BLOCKS:
+         want_report_blocks++;
+         break;
+       case WANT_REPORT_IGNORABLE:
+         want_report_ignorable++;
+         break;
+       case WANT_FILE_ENTITIES:
+         want_file_entities++;
+         break;
+       default:
+         usage();
+      }
+  if (optind != argc)
+    usage();
+
+  out = bfdopen_shared(1, 4096);
+  struct xml_context ctx;
+  xml_init(&ctx);
+  if (!want_hide_errors)
+    ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error;
+  if (want_sax)
+    {
+      ctx.h_document_start = h_document_start;
+      ctx.h_document_end = h_document_end;
+      ctx.h_xml_decl = h_xml_decl;
+      ctx.h_doctype_decl = h_doctype_decl;
+      ctx.h_comment = h_comment;
+      ctx.h_pi = h_pi;
+      ctx.h_stag = h_stag;
+      ctx.h_etag = h_etag;
+      ctx.h_chars = h_chars;
+      if (want_report_blocks)
+        {
+          ctx.h_block = h_block;
+          ctx.h_cdata = h_cdata;
+       }
+      if (want_report_ignorable)
+        ctx.h_ignorable = h_ignorable;
+      ctx.h_dtd_start = h_dtd_start;
+      ctx.h_dtd_end = h_dtd_end;
+    }
+  if (want_dom)
+    ctx.flags |= XML_ALLOC_ALL;
+  if (want_parse_dtd)
+    ctx.flags |= XML_PARSE_DTD;
+  if (want_ignore_comments)
+    ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS);
+  if (want_ignore_pis)
+    ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS);
+  if (want_file_entities)
+    ctx.h_resolve_entity = h_resolve_entity;
+  xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096));
+  bputs(out, "PULL: start\n");
+  if (want_pull)
+    {
+      ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI;
+      uns state;
+      while (state = xml_next(&ctx))
+       switch (state)
+         {
+           case XML_STATE_CHARS:
+             bputs(out, "PULL: chars");
+             show_node(ctx.node);
+             break;
+           case XML_STATE_STAG:
+             bputs(out, "PULL: stag");
+             show_node(ctx.node);
+             break;
+           case XML_STATE_ETAG:
+             bprintf(out, "PULL: etag </%s>\n", ctx.node->name);
+             break;
+           case XML_STATE_COMMENT:
+             bputs(out, "PULL: comment");
+             show_node(ctx.node);
+             break;
+           case XML_STATE_PI:
+             bputs(out, "PULL: pi");
+             show_node(ctx.node);
+             break;
+           default:
+             bputs(out, "PULL: unknown\n");
+             break;
+         }
+    }
+  else
+    xml_parse(&ctx);
+  if (ctx.err_code)
+    bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg);
+  else
+    {
+      bputs(out, "PULL: eof\n");
+      if (want_dom)
+       show_tree(ctx.dom, 0);
+    }
+
+  xml_cleanup(&ctx);
+  bclose(out);
+  return 0;
+}
diff --git a/shxml/xml-test.t b/shxml/xml-test.t

new file mode 100644 (file)

index 0000000..aad3d43
--- /dev/null
+++ b/shxml/xml-test.t
@@ -0,0 +1,58 @@
+# Tests for the XML parser
+# (c) 2008 Pavel Charvat <pchar@ucw.cz>
+
+Run:   ../obj/sherlock/xml/xml-test
+In:    <?xml version="1.0"?>
+       <html></html>
+Out:   PULL: start
+       PULL: eof
+
+Run:   ../obj/sherlock/xml/xml-test -s
+In:    <?xml version="1.0" encoding="ISO-8859-1"?>
+       <html><a a1="val1" a2="val2">text1&amp;amp;&lt;</a>text2</html>
+Out:   PULL: start
+       SAX:  document_start
+       SAX:  xml_decl version=1.0 standalone=0 fb_encoding=ISO-8859-1
+       SAX:  stag <html>
+       SAX:  stag <a> a1='val1' a2='val2'
+       SAX:  chars text='text1&amp;<'
+       SAX:  etag </a>
+       SAX:  chars text='text2'
+       SAX:  etag </html>
+       SAX:  document_end
+       PULL: eof
+
+Run:   ../obj/sherlock/xml/xml-test -sptd
+In:    <?xml version="1.0"?>
+       <!DOCTYPE root [
+       <!ELEMENT root (#PCDATA|a)*>
+       <!ENTITY % pe1 "<!ENTITY e1 'text'>">
+       %pe1;
+       <!ENTITY e2 '&lt;&e1;&gt;'>
+       <!ELEMENT a (#PCDATA)*>
+       ]>
+       <root>&e1;<a>&e2;</a></root>
+Out:   PULL: start
+       SAX:  document_start
+       SAX:  xml_decl version=1.0 standalone=0 fb_encoding=UTF-8
+       SAX:  doctype_decl type=root public='' system='' extsub=0 intsub=1
+       SAX:  dtd_start
+       SAX:  dtd_end
+       SAX:  stag <root>
+       PULL: stag <root>
+       SAX:  chars text='text'
+       PULL: chars text='text'
+       SAX:  stag <a>
+       PULL: stag <a>
+       SAX:  chars text='<text>'
+       PULL: chars text='<text>'
+       PULL: etag </a>
+       SAX:  etag </a>
+       PULL: etag </root>
+       SAX:  etag </root>
+       SAX:  document_end
+       PULL: eof
+       DOM:  element <root>
+       DOM:      chars text='text'
+       DOM:      element <a>
+       DOM:          chars text='<text>'
diff --git a/shxml/xml.h b/shxml/xml.h

new file mode 100644 (file)

index 0000000..e945888
--- /dev/null
+++ b/shxml/xml.h
@@ -0,0 +1,272 @@
+/*
+ *     Sherlock Library -- A simple XML parser
+ *
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_XML_XML_H
+#define _SHERLOCK_XML_XML_H
+
+#include "ucw/clists.h"
+#include "ucw/slists.h"
+#include "ucw/mempool.h"
+#include "ucw/fastbuf.h"
+
+struct xml_context;
+struct xml_dtd_entity;
+
+enum xml_error {
+  XML_ERR_OK = 0,
+  XML_ERR_WARN = 1000,                                 /* Warning */
+  XML_ERR_ERROR = 2000,                                        /* Recoverable error */
+  XML_ERR_FATAL = 3000,                                        /* Unrecoverable error */
+  XML_ERR_EOF,
+};
+
+enum xml_state {
+  XML_STATE_EOF,                                       /* EOF or a fatal error */
+  XML_STATE_START,                                     /* Initial state */
+  XML_STATE_XML_DECL,                                  /* XML_PULL_XML_DECL */
+  XML_STATE_DOCTYPE_DECL,                              /* XML_PULL_DOCTYPE_DECL */
+  XML_STATE_CHARS,                                     /* XML_PULL_CHARS */
+  XML_STATE_STAG,                                      /* XML_PULL_STAG */
+  XML_STATE_ETAG,                                      /* XML_PULL_ETAG */
+  XML_STATE_COMMENT,                                   /* XML_PULL_COMMENT */
+  XML_STATE_PI,                                                /* XML_PULL_PI */
+
+  /* Internal states */
+  XML_STATE_CHARS_BEFORE_STAG,
+  XML_STATE_CHARS_BEFORE_ETAG,
+  XML_STATE_CHARS_BEFORE_CDATA,
+  XML_STATE_CHARS_BEFORE_COMMENT,
+  XML_STATE_CHARS_BEFORE_PI,
+  XML_STATE_PROLOG_COMMENT,
+  XML_STATE_PROLOG_PI,
+  XML_STATE_EPILOG_COMMENT,
+  XML_STATE_EPILOG_PI,
+};
+
+enum xml_pull {
+  XML_PULL_XML_DECL =                  0x00000001,     /* Stop after the XML declaration */
+  XML_PULL_DOCTYPE_DECL =              0x00000002,     /* Stop in the doctype declaration (before optional internal subset) */
+  XML_PULL_CHARS =                     0x00000004,
+  XML_PULL_STAG =                      0x00000008,
+  XML_PULL_ETAG =                      0x00000010,
+  XML_PULL_COMMENT =                   0x00000020,
+  XML_PULL_PI =                                0x00000040,
+  XML_PULL_ALL =                       0xffffffff,
+};
+
+enum xml_flags {
+  /* Enable reporting of various events via SAX and/or PUSH interface */
+  XML_REPORT_COMMENTS =                        0x00000001,     /* Report comments */
+  XML_REPORT_PIS =                     0x00000002,     /* Report processing instructions */
+  XML_REPORT_CHARS =                   0x00000004,     /* Report characters */
+  XML_REPORT_TAGS =                    0x00000008,     /* Report element starts/ends */
+  XML_REPORT_MISC = XML_REPORT_COMMENTS | XML_REPORT_PIS,
+  XML_REPORT_ALL = XML_REPORT_MISC | XML_REPORT_CHARS | XML_REPORT_TAGS,
+
+  /* Enable construction of DOM for these types */
+  XML_ALLOC_COMMENTS =                 0x00000010,     /* Create comment nodes */
+  XML_ALLOC_PIS =                      0x00000020,     /* Create processing instruction nodes */
+  XML_ALLOC_CHARS =                    0x00000040,     /* Create character nodes */
+  XML_ALLOC_TAGS =                     0x00000080,     /* Create element nodes */
+  XML_ALLOC_MISC = XML_ALLOC_COMMENTS | XML_ALLOC_PIS,
+  XML_ALLOC_ALL = XML_ALLOC_MISC | XML_ALLOC_CHARS | XML_ALLOC_TAGS,
+
+  /* Other parameters */
+  XML_VALIDATING =                     0x00000100,     /* Validate everything (not fully implemented!) */
+  XML_PARSE_DTD =                      0x00000200,     /* Enable parsing of DTD */
+  XML_NO_CHARS =                       0x00000400,     /* The current element must not contain character data (filled automaticaly if using DTD) */
+  XML_ALLOC_DEFAULT_ATTRS =            0x00000800,     /* Allocate default attribute values so they can be found by XML_ATTR_FOR_EACH */
+
+  /* Internals, do not change! */
+  XML_EMPTY_ELEM_TAG =                 0x00010000,     /* The current element match EmptyElemTag */
+  XML_VERSION_1_1 =                    0x00020000,     /* XML version is 1.1, otherwise 1.0 */
+  XML_HAS_EXTERNAL_SUBSET =            0x00040000,     /* The document contains a reference to external DTD subset */
+  XML_HAS_INTERNAL_SUBSET =            0x00080000,     /* The document contains an internal subset */
+  XML_HAS_DTD =        XML_HAS_EXTERNAL_SUBSET | XML_HAS_INTERNAL_SUBSET,
+  XML_SRC_EOF =                                0x00100000,     /* EOF reached */
+  XML_SRC_EXPECTED_DECL =              0x00200000,     /* Just before optional or required XMLDecl/TextDecl */
+  XML_SRC_DOCUMENT =                   0x00400000,     /* The document entity */
+  XML_SRC_EXTERNAL =                   0x00800000,     /* An external entity */
+};
+
+enum xml_node_type {
+  XML_NODE_ELEM,
+  XML_NODE_COMMENT,
+  XML_NODE_CHARS,
+  XML_NODE_PI,
+};
+
+#define XML_NODE_FOR_EACH(var, node) CLIST_FOR_EACH(struct xml_node *, var, (node)->sons)
+#define XML_ATTR_FOR_EACH(var, node) SLIST_FOR_EACH(struct xml_attr *, var, (node)->attrs)
+
+struct xml_node {
+  cnode n;                                             /* Node for list of parent's sons */
+  uns type;                                            /* XML_NODE_x */
+  struct xml_node *parent;                             /* Parent node */
+  char *name;                                          /* Element name / PI target */
+  clist sons;                                          /* Children nodes */
+  union {
+    struct {
+      char *text;                                      /* PI text / Comment / CDATA */
+      uns len;                                         /* Text length in bytes */
+    };
+    struct {
+      struct xml_dtd_elem *dtd;                                /* Element DTD */
+      slist attrs;                                     /* Link list of element attributes */
+    };
+  };
+  void *user;                                          /* User-defined (initialized to NULL) */
+};
+
+struct xml_attr {
+  snode n;                                             /* Node for elem->attrs */
+  struct xml_node *elem;                               /* Parent element */
+  struct xml_dtd_attr *dtd;                            /* Attribute DTD */
+  char *name;                                          /* Attribute name */
+  char *val;                                           /* Attribute value */
+  void *user;                                          /* User-defined (initialized to NULL) */
+};
+
+#define XML_BUF_SIZE 32                                 /* At least 8 -- hardcoded */
+
+struct xml_source {
+  struct xml_source *next;                             /* Link list of pending fastbufs (xml_context.sources) */
+  struct fastbuf *fb;                                  /* Source fastbuf */
+  struct fastbuf *wrapped_fb;                          /* Original wrapped fastbuf (needed for cleanup) */
+  struct fastbuf wrap_fb;                              /* Fbmem wrapper */
+  u32 buf[2 * XML_BUF_SIZE];                           /* Read buffer with Unicode values and categories */
+  u32 *bptr, *bstop;                                   /* Current state of the buffer */
+  uns row;                                             /* File position */
+  char *expected_encoding;                             /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */
+  char *fb_encoding;                                   /* Encoding of the source fastbuf */
+  char *decl_encoding;                                 /* Encoding read from the XMLDecl/TextDecl */
+  uns refill_cat1;                                     /* Character categories, which should be directly passed to the buffer */
+  uns refill_cat2;                                     /* Character categories, which should be processed as newlines (possibly in some built-in
+                                                          sequences) */
+  void (*refill)(struct xml_context *ctx);             /* Callback to decode source characters to the buffer */
+  unsigned short *refill_in_to_x;                      /* Libcharset input table */
+  uns saved_depth;                                     /* Saved ctx->depth */
+  uns pending_0xd;                                     /* The last read character is 0xD */
+};
+
+struct xml_context {
+  /* Error handling */
+  char *err_msg;                                       /* Last error message */
+  enum xml_error err_code;                             /* Last error code */
+  void *throw_buf;                                     /* Where to jump on error */
+  void (*h_warn)(struct xml_context *ctx);             /* Warning callback */
+  void (*h_error)(struct xml_context *ctx);            /* Recoverable error callback */
+  void (*h_fatal)(struct xml_context *ctx);            /* Unrecoverable error callback */
+
+  /* Memory management */
+  struct mempool *pool;                                        /* DOM pool */
+  struct mempool *stack;                               /* Stack pool (freed as soon as possible) */
+  struct xml_stack *stack_list;                                /* See xml_push(), xml_pop() */
+  uns flags;                                           /* XML_FLAG_x (restored on xml_pop()) */
+  uns depth;                                           /* Nesting level (for checking of valid source nesting -> valid pushes/pops on memory pools) */
+  struct fastbuf chars;                                        /* Character data / attribute value */
+  struct mempool_state chars_state;                    /* Mempool state before the current character block has started */
+  char *chars_trivial;                                 /* If not empty, it will be appended to chars */
+  void *tab_attrs;                                     /* Hash table of element attributes */
+
+  /* Input */
+  struct xml_source *src;                              /* Current source */
+  u32 *bptr, *bstop;                                   /* Buffer with preprocessed characters (validated UCS-4 + category flags) */
+  uns cat_chars;                                       /* Unicode range of supported characters (cdata, attribute values, ...) */
+  uns cat_unrestricted;                                        /* Unrestricted characters (may appear in document/external entities) */
+  uns cat_new_line;                                    /* New line characters */
+  uns cat_name;                                                /* Characters that may appear in names */
+  uns cat_sname;                                       /* Characters that may begin a name */
+
+  /* SAX-like interface */
+  void (*h_document_start)(struct xml_context *ctx);   /* Called before entering prolog */
+  void (*h_document_end)(struct xml_context *ctx);     /* Called after leaving epilog */
+  void (*h_xml_decl)(struct xml_context *ctx);         /* Called after the XML declaration */
+  void (*h_doctype_decl)(struct xml_context *ctx);     /* Called in the doctype declaration (before optional internal subset) */
+  void (*h_comment)(struct xml_context *ctx);          /* Called after a comment (only with XML_REPORT_COMMENTS) */
+  void (*h_pi)(struct xml_context *ctx);               /* Called after a processing instruction (only with XML_REPORT_PIS) */
+  void (*h_stag)(struct xml_context *ctx);             /* Called after STag or EmptyElemTag (only with XML_REPORT_TAGS) */
+  void (*h_etag)(struct xml_context *ctx);             /* Called before ETag or after EmptyElemTag (only with XML_REPORT_TAGS) */
+  void (*h_chars)(struct xml_context *ctx);            /* Called after some characters (only with XML_REPORT_CHARS) */
+  void (*h_block)(struct xml_context *ctx, char *text, uns len);       /* Called for each continuous block of characters not reported by h_cdata() (only with XML_REPORT_CHARS) */
+  void (*h_cdata)(struct xml_context *ctx, char *text, uns len);       /* Called for each CDATA section (only with XML_REPORT_CHARS) */
+  void (*h_ignorable)(struct xml_context *ctx, char *text, uns len);   /* Called for ignorable whitespace (content in tags without #PCDATA) */
+  void (*h_dtd_start)(struct xml_context *ctx);                /* Called just after the DTD structure is initialized */
+  void (*h_dtd_end)(struct xml_context *ctx);          /* Called after DTD subsets subsets */
+  struct xml_dtd_entity *(*h_find_entity)(struct xml_context *ctx, char *name);                /* Called when needed to resolve a general entity */
+  void (*h_resolve_entity)(struct xml_context *ctx, struct xml_dtd_entity *ent);       /* User should push source fastbuf for a parsed external entity (either general or parameter) */
+
+  /* DOM */
+  struct xml_node *dom;                                        /* DOM root */
+  struct xml_node *node;                               /* Current DOM node */
+
+  char *version_str;
+  uns standalone;
+  char *doctype;                                       /* The document type (or NULL if unknown) */
+  char *system_id;                                     /* DTD external id */
+  char *public_id;                                     /* DTD public id */
+  struct xml_dtd *dtd;                                 /* The DTD structure (or NULL) */
+  uns state;                                           /* Current state for the PULL interface (XML_STATE_x) */
+  uns pull;                                            /* Parameters for the PULL interface (XML_PULL_x) */
+};
+
+/* Initialize XML context */
+void xml_init(struct xml_context *ctx);
+
+/* Clean up all internal structures */
+void xml_cleanup(struct xml_context *ctx);
+
+/* Reuse XML context, equivalent to xml_cleanup() and xml_init() */
+void xml_reset(struct xml_context *ctx);
+
+/* Add XML source (fastbuf will be automatically closed) */
+struct xml_source *xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb);
+
+/* Parse without the PUSH interface, return XML_ERR_x code (zero on success) */
+uns xml_parse(struct xml_context *ctx);
+
+/* Parse with the PUSH interface, return XML_STATE_x (zero on EOF or fatal error) */
+uns xml_next(struct xml_context *ctx);
+
+/* Equivalent to xml_next, but with temporarily changed ctx->pull value */
+uns xml_next_state(struct xml_context *ctx, uns pull);
+
+/* May be called on XML_STATE_STAG to skip it's content; can return XML_STATE_ETAG or XML_STATE_EOF on fatal error */
+uns xml_skip_element(struct xml_context *ctx);
+
+/* Returns the current row number in the document entity */
+uns xml_row(struct xml_context *ctx);
+
+/* Finds a given attribute value in a XML_NODE_ELEM node */
+struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name);
+
+/* Similar to xml_attr_find, but it deals also with default values */
+char *xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name);
+
+/* The default value of h_find_entity(), knows &lt;, &gt;, &amp;, &apos; and &quot; */
+struct xml_dtd_entity *xml_def_find_entity(struct xml_context *ctx, char *name);
+
+/* The default value of h_resolve_entity(), throws an error */
+void xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent);
+
+/* Remove leading/trailing spaces and replaces sequences of spaces to a single space character (non-CDATA attribute normalization) */
+uns xml_normalize_white(struct xml_context *ctx, char *value);
+
+/* Merge character contents of a given element to a single string (not recursive) */
+char *xml_merge_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool);
+
+/* Merge character contents of a given subtree to a single string */
+char *xml_merge_dom_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool);
+
+/* Public part of error handling */
+void xml_warn(struct xml_context *ctx, const char *format, ...);
+void xml_error(struct xml_context *ctx, const char *format, ...);
+void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...);
+
+#endif
author	Martin Mares <mj@ucw.cz>
	Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)
committer	Martin Mares <mj@ucw.cz>
	Tue, 13 Jul 2010 11:45:30 +0000 (13:45 +0200)
sherlock/xml/Makefile	[deleted file]	patch \| blob \| history
sherlock/xml/TODO	[deleted file]	patch \| blob \| history
sherlock/xml/common.c	[deleted file]	patch \| blob \| history
sherlock/xml/dtd.c	[deleted file]	patch \| blob \| history
sherlock/xml/dtd.h	[deleted file]	patch \| blob \| history
sherlock/xml/internals.h	[deleted file]	patch \| blob \| history
sherlock/xml/libshxml.pc	[deleted file]	patch \| blob \| history
sherlock/xml/parse.c	[deleted file]	patch \| blob \| history
sherlock/xml/source.c	[deleted file]	patch \| blob \| history
sherlock/xml/unicat.pl	[deleted file]	patch \| blob \| history
sherlock/xml/xml-test.c	[deleted file]	patch \| blob \| history
sherlock/xml/xml-test.t	[deleted file]	patch \| blob \| history
sherlock/xml/xml.h	[deleted file]	patch \| blob \| history
shxml/Makefile	[new file with mode: 0644]	patch \| blob
shxml/TODO	[new file with mode: 0644]	patch \| blob
shxml/common.c	[new file with mode: 0644]	patch \| blob
shxml/dtd.c	[new file with mode: 0644]	patch \| blob
shxml/dtd.h	[new file with mode: 0644]	patch \| blob
shxml/internals.h	[new file with mode: 0644]	patch \| blob
shxml/libshxml.pc	[new file with mode: 0644]	patch \| blob
shxml/parse.c	[new file with mode: 0644]	patch \| blob
shxml/source.c	[new file with mode: 0644]	patch \| blob
shxml/unicat.pl	[new file with mode: 0755]	patch \| blob
shxml/xml-test.c	[new file with mode: 0644]	patch \| blob
shxml/xml-test.t	[new file with mode: 0644]	patch \| blob
shxml/xml.h	[new file with mode: 0644]	patch \| blob