X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=sherlock%2Fxml%2Fparse.c;h=6f5b192f0ab29e021f4e8e219e6bf6c5d9544cbb;hb=306801fc6da02fcbed53db6ff98cd0277b763d9a;hp=ef957bc6911632bb4553e63b8ca4f3cb4955255e;hpb=7ff16aadb50619ba67d2fe3fa0b7a958b86c3162;p=libucw.git diff --git a/sherlock/xml/parse.c b/sherlock/xml/parse.c index ef957bc6..6f5b192f 100644 --- a/sherlock/xml/parse.c +++ b/sherlock/xml/parse.c @@ -421,9 +421,16 @@ xml_flush_chars(struct xml_context *ctx) uns len = xml_end_chars(ctx, &text), rlen; if (len) { + if (ctx->flags & XML_NO_CHARS) + { + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_ignorable) + ctx->h_ignorable(ctx, text, len); + mp_restore(ctx->pool, &ctx->chars_state); + return 0; + } if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext))) ctx->h_block(ctx, rtext, rlen); - if (!(ctx->flags & XML_ALLOC_CHARS) && (!(ctx->flags & XML_REPORT_CHARS) || !ctx->h_chars)) + if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS)) { mp_restore(ctx->pool, &ctx->chars_state); return 0; @@ -450,25 +457,51 @@ xml_append_chars(struct xml_context *ctx) { TRACE(ctx, "append_chars"); struct fastbuf *out = &ctx->chars; - while (xml_get_char(ctx) != '<') - if (xml_last_char(ctx) == '&') - { - xml_inc(ctx); - xml_parse_ref(ctx); - } - else - bput_utf8_32(out, xml_last_char(ctx)); + if (ctx->flags & XML_NO_CHARS) + while (xml_get_char(ctx) != '<') + if (xml_last_cat(ctx) & XML_CHAR_WHITE) + bput_utf8_32(out, xml_last_char(ctx)); + else + { + xml_error(ctx, "This element must not contain character data"); + while (xml_get_char(ctx) != '<'); + break; + } + else + while (xml_get_char(ctx) != '<') + if (xml_last_char(ctx) == '&') + { + xml_inc(ctx); + xml_parse_ref(ctx); + } + else + bput_utf8_32(out, xml_last_char(ctx)); xml_unget_char(ctx); } /*** CDATA sections ***/ +static void +xml_skip_cdata(struct xml_context *ctx) +{ + TRACE(ctx, "skip_cdata"); + xml_parse_seq(ctx, "CDATA["); + while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>'); + xml_dec(ctx); +} + static void xml_append_cdata(struct xml_context *ctx) { /* CDSect :== '' Char*)) ']]>' * Already parsed: 'flags & XML_NO_CHARS) + { + xml_error(ctx, "This element must not contain CDATA"); + xml_skip_cdata(ctx); + return; + } xml_parse_seq(ctx, "CDATA["); struct fastbuf *out = &ctx->chars; uns rlen; @@ -493,15 +526,6 @@ xml_append_cdata(struct xml_context *ctx) xml_dec(ctx); } -static void UNUSED -xml_skip_cdata(struct xml_context *ctx) -{ - TRACE(ctx, "skip_cdata"); - xml_parse_seq(ctx, "CDATA["); - while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>'); - xml_dec(ctx); -} - /*** Attribute values ***/ char * @@ -630,6 +654,18 @@ xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name) return xml_attrs_find(ctx->tab_attrs, node, name); } +char * +xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name) +{ + struct xml_attr *attr = xml_attrs_find(ctx->tab_attrs, node, name); + if (attr) + return attr->val; + if (!node->dtd) + return NULL; + struct xml_dtd_attr *dtd = xml_dtd_find_attr(ctx, node->dtd, name); + return dtd ? dtd->default_value : NULL; +} + void xml_attrs_table_init(struct xml_context *ctx) { @@ -644,6 +680,18 @@ xml_attrs_table_cleanup(struct xml_context *ctx) /*** Elements ***/ +static uns +xml_validate_element(struct xml_dtd_elem_node *root, struct xml_dtd_elem *elem) +{ + if (root->elem) + return elem == root->elem; + else + SLIST_FOR_EACH(struct xml_dtd_elem_node *, son, root->sons) + if (xml_validate_element(son, elem)) + return 1; + return 0; +} + static void xml_push_element(struct xml_context *ctx) { @@ -669,7 +717,20 @@ xml_push_element(struct xml_context *ctx) xml_error(ctx, "Undefined element <%s>", e->name); else { - // FIXME: validate regular expressions + struct xml_dtd_elem *dtd = e->dtd, *parent_dtd = e->parent ? e->parent->dtd : NULL; + if (dtd->type == XML_DTD_ELEM_MIXED) + ctx->flags &= ~XML_NO_CHARS; + else + ctx->flags |= XML_NO_CHARS; + if (parent_dtd) + if (parent_dtd->type == XML_DTD_ELEM_EMPTY) + xml_error(ctx, "Empty element must not contain children"); + else if (parent_dtd->type != XML_DTD_ELEM_ANY) + { + // FIXME: validate regular expressions + if (!xml_validate_element(parent_dtd->node, dtd)) + xml_error(ctx, "Unexpected element <%s>", e->name); + } } while (1) { @@ -1108,6 +1169,28 @@ epilog: ASSERT(0); } +uns +xml_next_state(struct xml_context *ctx, uns pull) +{ + uns saved = ctx->pull; + ctx->pull = pull; + uns res = xml_next(ctx); + ctx->pull = saved; + return res; +} + +uns +xml_skip_element(struct xml_context *ctx) +{ + ASSERT(ctx->state == XML_STATE_STAG); + struct xml_node *node = ctx->node; + uns saved = ctx->pull, res; + ctx->pull = XML_PULL_ETAG; + while ((res = xml_next(ctx)) && ctx->node != node); + ctx->pull = saved; + return res; +} + uns xml_parse(struct xml_context *ctx) {