X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=sherlock%2Fxml%2Fparse.c;h=27141b142287c973d7385a9a30c37d9df9bed16f;hb=89f86d97def2a365b1799f70e3f254264b9c7615;hp=6f5b192f0ab29e021f4e8e219e6bf6c5d9544cbb;hpb=306801fc6da02fcbed53db6ff98cd0277b763d9a;p=libucw.git diff --git a/sherlock/xml/parse.c b/sherlock/xml/parse.c index 6f5b192f..27141b14 100644 --- a/sherlock/xml/parse.c +++ b/sherlock/xml/parse.c @@ -13,11 +13,11 @@ #include "sherlock/xml/xml.h" #include "sherlock/xml/dtd.h" #include "sherlock/xml/internals.h" -#include "lib/fastbuf.h" -#include "lib/ff-unicode.h" -#include "lib/unicode.h" -#include "lib/chartype.h" -#include "lib/hashfunc.h" +#include "ucw/fastbuf.h" +#include "ucw/ff-unicode.h" +#include "ucw/unicode.h" +#include "ucw/chartype.h" +#include "ucw/hashfunc.h" #include @@ -622,7 +622,7 @@ xml_attrs_init_key(struct xml_attrs_table *t UNUSED, struct xml_attr *a, struct #define HASH_WANT_FIND #define HASH_GIVE_ALLOC XML_HASH_GIVE_ALLOC -#include "lib/hashtable.h" +#include "ucw/hashtable.h" static void xml_parse_attr(struct xml_context *ctx) @@ -749,6 +749,19 @@ xml_push_element(struct xml_context *ctx) xml_unget_char(ctx); xml_parse_attr(ctx); } + if (e->dtd) + SLIST_FOR_EACH(struct xml_dtd_attr *, a, e->dtd->attrs) + if (a->default_mode == XML_ATTR_REQUIRED) + { + if (!xml_attrs_find(ctx->tab_attrs, e, a->name)) + xml_error(ctx, "Missing required attribute %s in element <%s>", a->name, e->name); + } + else if (a->default_mode != XML_ATTR_IMPLIED && ctx->flags & XML_ALLOC_DEFAULT_ATTRS) + { + struct xml_attr *attr = xml_attrs_lookup(ctx->tab_attrs, e, a->name); + if (!attr->val) + attr->val = a->default_value; + } if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag) ctx->h_stag(ctx); } @@ -845,7 +858,11 @@ xml_parse_doctype_decl(struct xml_context *ctx) ctx->flags |= XML_HAS_EXTERNAL_SUBSET; } if (xml_peek_char(ctx) == '[') - ctx->flags |= XML_HAS_INTERNAL_SUBSET; + { + ctx->flags |= XML_HAS_INTERNAL_SUBSET; + xml_skip_char(ctx); + xml_inc(ctx); + } if (ctx->h_doctype_decl) ctx->h_doctype_decl(ctx); } @@ -857,12 +874,19 @@ xml_parse_doctype_decl(struct xml_context *ctx) /* DTD: Internal subset */ static void -xml_parse_internal_subset(struct xml_context *ctx) +xml_parse_subset(struct xml_context *ctx, uns external) { - // FIXME: comments/pi have no parent + // FIXME: + // -- comments/pi have no parent + // -- conditional sections in external subset + // -- check corectness of parameter entities + /* '[' intSubset ']' * intSubset :== (markupdecl | DeclSep) - * Already parsed: ']' */ + * Already parsed: '[' + * + * extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* + */ while (1) { xml_parse_white(ctx, 0); @@ -910,16 +934,21 @@ xml_parse_internal_subset(struct xml_context *ctx) goto invalid_markup; else if (c == '%') xml_parse_pe_ref(ctx); - else if (c == ']') - break; + else if (c == ']' && !external) + { + break; + } + else if (c == '>' && external) + { + break; + } else goto invalid_markup; } xml_dec(ctx); - xml_dec(ctx); return; -invalid_markup: - xml_fatal(ctx, "Invalid markup in the internal subset"); +invalid_markup: ; + xml_fatal(ctx, "Invalid markup in the %s subset", external ? "external" : "internal"); } /*** The State Machine ***/ @@ -994,24 +1023,37 @@ error: xml_unget_char(ctx); xml_parse_doctype_decl(ctx); PULL(DOCTYPE_DECL); - if (xml_peek_char(ctx) == '[') - { - xml_skip_char(ctx); - xml_inc(ctx); - if (ctx->flags & XML_PARSE_DTD) - { - xml_dtd_init(ctx); - if (ctx->h_dtd_start) - ctx->h_dtd_start(ctx); - // FIXME: pull iface? - xml_parse_internal_subset(ctx); - // FIXME: external subset - if (ctx->h_dtd_end) - ctx->h_dtd_end(ctx); - } - else - xml_skip_internal_subset(ctx); - } + if (ctx->flags & XML_HAS_DTD) + if (ctx->flags & XML_PARSE_DTD) + { + xml_dtd_init(ctx); + if (ctx->h_dtd_start) + ctx->h_dtd_start(ctx); + if (ctx->flags & XML_HAS_INTERNAL_SUBSET) + { + xml_parse_subset(ctx, 0); + xml_dec(ctx); + } + if (ctx->flags & XML_HAS_EXTERNAL_SUBSET) + { + struct xml_dtd_entity ent = { + .system_id = ctx->system_id, + .public_id = ctx->public_id, + }; + xml_parse_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_unget_char(ctx); + ASSERT(ctx->h_resolve_entity); + ctx->h_resolve_entity(ctx, &ent); + ctx->flags |= XML_SRC_EXPECTED_DECL; + xml_parse_subset(ctx, 1); + xml_unget_char(ctx);; + } + if (ctx->h_dtd_end) + ctx->h_dtd_end(ctx); + } + else if (ctx->flags & XML_HAS_INTERNAL_SUBSET) + xml_skip_internal_subset(ctx); xml_parse_white(ctx, 0); xml_parse_char(ctx, '>'); xml_dec(ctx); @@ -1202,3 +1244,44 @@ xml_parse(struct xml_context *ctx) while (xml_next(ctx)); return ctx->err_code; } + +char * +xml_merge_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *p = mp_start_noalign(pool, 1); + XML_NODE_FOR_EACH(son, node) + if (son->type == XML_NODE_CHARS) + { + p = mp_spread(pool, p, son->len + 1); + memcpy(p, son->text, son->len); + p += son->len; + } + *p++ = 0; + return mp_end(pool, p); +} + +static char * +xml_append_dom_chars(char *p, struct mempool *pool, struct xml_node *node) +{ + XML_NODE_FOR_EACH(son, node) + if (son->type == XML_NODE_CHARS) + { + p = mp_spread(pool, p, son->len + 1); + memcpy(p, son->text, son->len); + p += son->len; + } + else if (son->type == XML_NODE_ELEM) + p = xml_append_dom_chars(p, pool, son); + return p; +} + +char * +xml_merge_dom_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *p = mp_start_noalign(pool, 1); + p = xml_append_dom_chars(p, pool, node); + *p++ = 0; + return mp_end(pool, p); +}