/*** Elements ***/
+static uns
+xml_validate_element(struct xml_dtd_elem_node *root, struct xml_dtd_elem *elem)
+{
+ if (root->elem)
+ return elem == root->elem;
+ else
+ SLIST_FOR_EACH(struct xml_dtd_elem_node *, son, root->sons)
+ if (xml_validate_element(son, elem))
+ return 1;
+ return 0;
+}
+
static void
xml_push_element(struct xml_context *ctx)
{
xml_error(ctx, "Undefined element <%s>", e->name);
else
{
- if (e->dtd->type == XML_DTD_ELEM_MIXED)
+ struct xml_dtd_elem *dtd = e->dtd, *parent_dtd = e->parent ? e->parent->dtd : NULL;
+ if (dtd->type == XML_DTD_ELEM_MIXED)
ctx->flags &= ~XML_NO_CHARS;
else
ctx->flags |= XML_NO_CHARS;
-
- // FIXME: validate regular expressions
+ if (parent_dtd)
+ if (parent_dtd->type == XML_DTD_ELEM_EMPTY)
+ xml_error(ctx, "Empty element must not contain children");
+ else if (parent_dtd->type != XML_DTD_ELEM_ANY)
+ {
+ // FIXME: validate regular expressions
+ if (!xml_validate_element(parent_dtd->node, dtd))
+ xml_error(ctx, "Unexpected element <%s>", e->name);
+ }
}
while (1)
{
ctx->flags |= XML_HAS_EXTERNAL_SUBSET;
}
if (xml_peek_char(ctx) == '[')
- ctx->flags |= XML_HAS_INTERNAL_SUBSET;
+ {
+ ctx->flags |= XML_HAS_INTERNAL_SUBSET;
+ xml_skip_char(ctx);
+ xml_inc(ctx);
+ }
if (ctx->h_doctype_decl)
ctx->h_doctype_decl(ctx);
}
/* DTD: Internal subset */
static void
-xml_parse_internal_subset(struct xml_context *ctx)
+xml_parse_subset(struct xml_context *ctx, uns external)
{
- // FIXME: comments/pi have no parent
+ // FIXME:
+ // -- comments/pi have no parent
+ // -- conditional sections in external subset
+ // -- check corectness of parameter entities
+
/* '[' intSubset ']'
* intSubset :== (markupdecl | DeclSep)
- * Already parsed: ']' */
+ * Already parsed: '['
+ *
+ * extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
+ */
while (1)
{
xml_parse_white(ctx, 0);
goto invalid_markup;
else if (c == '%')
xml_parse_pe_ref(ctx);
- else if (c == ']')
- break;
+ else if (c == ']' && !external)
+ {
+ break;
+ }
+ else if (c == '>' && external)
+ {
+ break;
+ }
else
goto invalid_markup;
}
xml_dec(ctx);
- xml_dec(ctx);
return;
-invalid_markup:
- xml_fatal(ctx, "Invalid markup in the internal subset");
+invalid_markup: ;
+ xml_fatal(ctx, "Invalid markup in the %s subset", external ? "external" : "internal");
}
/*** The State Machine ***/
xml_unget_char(ctx);
xml_parse_doctype_decl(ctx);
PULL(DOCTYPE_DECL);
- if (xml_peek_char(ctx) == '[')
- {
- xml_skip_char(ctx);
- xml_inc(ctx);
- if (ctx->flags & XML_PARSE_DTD)
- {
- xml_dtd_init(ctx);
- if (ctx->h_dtd_start)
- ctx->h_dtd_start(ctx);
- // FIXME: pull iface?
- xml_parse_internal_subset(ctx);
- // FIXME: external subset
- if (ctx->h_dtd_end)
- ctx->h_dtd_end(ctx);
- }
- else
- xml_skip_internal_subset(ctx);
- }
+ if (ctx->flags & XML_HAS_DTD)
+ if (ctx->flags & XML_PARSE_DTD)
+ {
+ xml_dtd_init(ctx);
+ if (ctx->h_dtd_start)
+ ctx->h_dtd_start(ctx);
+ if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
+ {
+ xml_parse_subset(ctx, 0);
+ xml_dec(ctx);
+ }
+ if (ctx->flags & XML_HAS_EXTERNAL_SUBSET)
+ {
+ struct xml_dtd_entity ent = {
+ .system_id = ctx->system_id,
+ .public_id = ctx->public_id,
+ };
+ xml_parse_white(ctx, 0);
+ xml_parse_char(ctx, '>');
+ xml_unget_char(ctx);
+ ASSERT(ctx->h_resolve_entity);
+ ctx->h_resolve_entity(ctx, &ent);
+ ctx->flags |= XML_SRC_EXPECTED_DECL;
+ xml_parse_subset(ctx, 1);
+ xml_unget_char(ctx);;
+ }
+ if (ctx->h_dtd_end)
+ ctx->h_dtd_end(ctx);
+ }
+ else if (ctx->flags & XML_HAS_INTERNAL_SUBSET)
+ xml_skip_internal_subset(ctx);
xml_parse_white(ctx, 0);
xml_parse_char(ctx, '>');
xml_dec(ctx);