From da05d6244dd826a76a5cacfa6dcd77dc9fd0c28e Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Mon, 7 Jan 2008 13:21:09 +0100 Subject: [PATCH] XML & WML: WML parser now uses the new XML library. --- sherlock/xml/common.c | 2 ++ sherlock/xml/common.h | 1 + sherlock/xml/dtd.c | 25 +++++++++++++------------ sherlock/xml/dtd.h | 4 +++- sherlock/xml/parse.c | 14 ++++++++++---- sherlock/xml/xml-test.c | 2 +- sherlock/xml/xml.h | 11 ++++++++--- 7 files changed, 38 insertions(+), 21 deletions(-) diff --git a/sherlock/xml/common.c b/sherlock/xml/common.c index 0c516ea9..d3155ed8 100644 --- a/sherlock/xml/common.c +++ b/sherlock/xml/common.c @@ -161,5 +161,7 @@ xml_reset(struct xml_context *ctx) mp_flush(pool); mp_flush(stack); bzero(ctx, sizeof(*ctx)); + ctx->pool = pool; + ctx->stack = stack; xml_do_init(ctx); } diff --git a/sherlock/xml/common.h b/sherlock/xml/common.h index dd540e86..9ea8f74e 100644 --- a/sherlock/xml/common.h +++ b/sherlock/xml/common.h @@ -87,6 +87,7 @@ xml_push_dom(struct xml_context *ctx) struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s)); mp_save(ctx->pool, &s->state); struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n)); + n->user = NULL; if (n->parent = ctx->node) clist_add_tail(&n->parent->sons, &n->n); return ctx->node = n; diff --git a/sherlock/xml/dtd.c b/sherlock/xml/dtd.c index a4ae9a02..fbe7a325 100644 --- a/sherlock/xml/dtd.c +++ b/sherlock/xml/dtd.c @@ -44,7 +44,7 @@ XML_HASH_GIVE_ALLOC #include "lib/hashtable.h" static struct xml_dtd_ent * -xml_dtd_declare_trivial_ent(struct xml_context *ctx, char *name, char *text) +xml_dtd_declare_trivial_ent(struct xml_context *ctx, char *name, uns uni) { struct xml_dtd *dtd = ctx->dtd; struct xml_dtd_ent *ent = xml_dtd_ents_lookup(dtd->tab_ents, name); @@ -54,35 +54,36 @@ xml_dtd_declare_trivial_ent(struct xml_context *ctx, char *name, char *text) return NULL; } slist_add_tail(&dtd->ents, &ent->n); - ent->flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL; - ent->text = text; - ent->len = strlen(text); + ent->flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL_UNI; + ent->uni = uni; return ent; } static void xml_dtd_declare_default_ents(struct xml_context *ctx) { - xml_dtd_declare_trivial_ent(ctx, "lt", "<"); - xml_dtd_declare_trivial_ent(ctx, "gt", ">"); - xml_dtd_declare_trivial_ent(ctx, "amp", "&"); - xml_dtd_declare_trivial_ent(ctx, "apos", "'"); - xml_dtd_declare_trivial_ent(ctx, "quot", "\""); + xml_dtd_declare_trivial_ent(ctx, "lt", 60); + xml_dtd_declare_trivial_ent(ctx, "gt", 62); + xml_dtd_declare_trivial_ent(ctx, "amp", 38); + xml_dtd_declare_trivial_ent(ctx, "apos", 39); + xml_dtd_declare_trivial_ent(ctx, "quot", 34); } struct xml_dtd_ent * xml_dtd_find_ent(struct xml_context *ctx, char *name) { struct xml_dtd *dtd = ctx->dtd; - if (dtd) + if (ctx->h_resolve_entity) + return ctx->h_resolve_entity(ctx, name); + else if (dtd) { struct xml_dtd_ent *ent = xml_dtd_ents_find(dtd->tab_ents, name); return !ent ? NULL : (ent->flags & XML_DTD_ENT_DECLARED) ? ent : NULL; } else { -#define ENT(n, t) ent_##n = { .name = #n, .text = t, .len = 1, .flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL } - static struct xml_dtd_ent ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\""); +#define ENT(n, u) ent_##n = { .name = #n, .uni = u, .flags = XML_DTD_ENT_DECLARED | XML_DTD_ENT_TRIVIAL_UNI } + static struct xml_dtd_ent ENT(lt, 60), ENT(gt, 62), ENT(amp, 38), ENT(apos, 39), ENT(quot, 34); #undef ENT switch (name[0]) { diff --git a/sherlock/xml/dtd.h b/sherlock/xml/dtd.h index 549696e1..522274f2 100644 --- a/sherlock/xml/dtd.h +++ b/sherlock/xml/dtd.h @@ -54,7 +54,8 @@ enum xml_dtd_ent_flags { XML_DTD_ENT_PARAMETER = 0x4, /* Parameter entity, general otherwise */ XML_DTD_ENT_EXTERNAL = 0x8, /* External entity, internal otherwise */ XML_DTD_ENT_UNPARSED = 0x10, /* Unparsed entity, parsed otherwise */ - XML_DTD_ENT_TRIVIAL = 0x20, /* Replacement text is a sequence of characters and character references */ + XML_DTD_ENT_TRIVIAL_STR = 0x20, /* Replacement text is a sequence of characters and character references */ + XML_DTD_ENT_TRIVIAL_UNI = 0x40, /* Replacement text is a single Unicode character */ }; struct xml_dtd_ent { @@ -63,6 +64,7 @@ struct xml_dtd_ent { char *name; /* Entity name */ char *text; /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */ uns len; /* Text length */ + uns uni; /* Unicode value */ struct xml_ext_id eid; /* External ID */ struct xml_dtd_notn *notn; /* Notation (XML_DTD_ENT_UNPARSED only) */ }; diff --git a/sherlock/xml/parse.c b/sherlock/xml/parse.c index 25ab84cb..cc0e59d2 100644 --- a/sherlock/xml/parse.c +++ b/sherlock/xml/parse.c @@ -451,7 +451,12 @@ xml_parse_ref(struct xml_context *ctx) bputs(out, name); bputc(out, ';'); } - else if (ent->flags & XML_DTD_ENT_TRIVIAL) + else if (ent->flags & XML_DTD_ENT_TRIVIAL_UNI) + { + TRACE(ctx, "Trivial entity &%s;", name); + bput_utf8_32(out, ent->uni); + } + else if (ent->flags & XML_DTD_ENT_TRIVIAL_STR) { TRACE(ctx, "Trivial entity &%s;", name); bwrite(out, ent->text, ent->len); @@ -529,6 +534,7 @@ xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct a->elem = e; a->name = name; a->val = NULL; + a->user = NULL; slist_add_tail(&e->attrs, &a->n); } @@ -602,7 +608,7 @@ xml_push_element(struct xml_context *ctx) slist_init(&e->attrs); if (!e->parent) { - ctx->root = e; + ctx->dom = e; if (ctx->doctype && strcmp(e->name, ctx->doctype)) xml_error(ctx, "The root element %s does not match the document type %s", e->name, ctx->doctype); } @@ -638,7 +644,7 @@ xml_pop_element(struct xml_context *ctx) if (free) { if (!e->parent) - ctx->root = NULL; + ctx->dom = NULL; /* Restore hash table of attributes */ SLIST_FOR_EACH(struct xml_attr *, a, e->attrs) xml_attrs_remove(ctx->tab_attrs, a); @@ -876,7 +882,7 @@ error: xml_dtd_init(ctx); if (ctx->h_dtd_start) ctx->h_dtd_start(ctx); - // FIXME: pu;; iface? + // FIXME: pull iface? xml_parse_internal_subset(ctx); // FIXME: external subset if (ctx->h_dtd_end) diff --git a/sherlock/xml/xml-test.c b/sherlock/xml/xml-test.c index ab492e19..db252c8d 100644 --- a/sherlock/xml/xml-test.c +++ b/sherlock/xml/xml-test.c @@ -322,7 +322,7 @@ main(int argc, char **argv) { bputs(out, "PULL: eof\n"); if (want_dom) - show_tree(ctx.root, 0); + show_tree(ctx.dom, 0); } xml_cleanup(&ctx); diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h index a608bfb8..0e3cacea 100644 --- a/sherlock/xml/xml.h +++ b/sherlock/xml/xml.h @@ -15,6 +15,9 @@ #include "lib/mempool.h" #include "lib/fastbuf.h" +struct xml_context; +struct xml_dtd_ent; + enum xml_error { // FIXME XML_ERR_OK = 0, @@ -121,6 +124,7 @@ struct xml_node { slist attrs; /* Link list of element attributes */ }; }; + void *user; /* User-defined (initialized to NULL) */ }; struct xml_attr { @@ -128,6 +132,7 @@ struct xml_attr { struct xml_node *elem; /* Parent element */ char *name; /* Attribute name */ char *val; /* Attribute value */ + void *user; /* User-defined (initialized to NULL) */ }; struct xml_context { @@ -163,16 +168,17 @@ struct xml_context { void (*h_xml_decl)(struct xml_context *ctx); /* Called after the XML declaration */ void (*h_doctype_decl)(struct xml_context *ctx); /* Called in the doctype declaration (before optional internal subset) */ void (*h_comment)(struct xml_context *ctx); /* Called after a comment (only with XML_REPORT_COMMENTS) */ - void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction (only with XML_REPORT_PIS) */ + void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction (only with XML_REPORT_PIS) */ void (*h_stag)(struct xml_context *ctx); /* Called after STag or EmptyElemTag (only with XML_REPORT_TAGS) */ void (*h_etag)(struct xml_context *ctx); /* Called before ETag or after EmptyElemTag (only with XML_REPORT_TAGS) */ void (*h_chars)(struct xml_context *ctx); /* Called after some characters (only with XML_REPORT_CHARS) */ void (*h_cdata)(struct xml_context *ctx); /* Called after a CDATA section (only with XML_REPORT_CHARS and XML_UNFOLD_CDATA) */ void (*h_dtd_start)(struct xml_context *ctx); /* Called just after the DTD structure is initialized */ void (*h_dtd_end)(struct xml_context *ctx); /* Called after DTD subsets subsets */ + struct xml_dtd_ent *(*h_resolve_entity)(struct xml_context *ctx, char *name); /* DOM */ - struct xml_node *root; /* DOM root */ + struct xml_node *dom; /* DOM root */ struct xml_node *node; /* Current DOM node */ char *version_str; @@ -186,7 +192,6 @@ struct xml_context { void (*start_entity)(struct xml_context *ctx); void (*end_entity)(struct xml_context *ctx); - struct fastbuf *(*resolve_entity)(struct xml_context *ctx); void (*notation_decl)(struct xml_context *ctx); void (*unparsed_entity_decl)(struct xml_context *ctx); }; -- 2.39.2