uns len = xml_end_chars(ctx, &text), rlen;
if (len)
{
+ if (ctx->flags & XML_NO_CHARS)
+ {
+ if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_ignorable)
+ ctx->h_ignorable(ctx, text, len);
+ mp_restore(ctx->pool, &ctx->chars_state);
+ return 0;
+ }
if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext)))
ctx->h_block(ctx, rtext, rlen);
- if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS) && !ctx->h_chars)
+ if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS))
{
mp_restore(ctx->pool, &ctx->chars_state);
return 0;
{
TRACE(ctx, "append_chars");
struct fastbuf *out = &ctx->chars;
- while (xml_get_char(ctx) != '<')
- if (xml_last_char(ctx) == '&')
- {
- xml_inc(ctx);
- xml_parse_ref(ctx);
- }
- else
- bput_utf8_32(out, xml_last_char(ctx));
+ if (ctx->flags & XML_NO_CHARS)
+ while (xml_get_char(ctx) != '<')
+ if (xml_last_cat(ctx) & XML_CHAR_WHITE)
+ bput_utf8_32(out, xml_last_char(ctx));
+ else
+ {
+ xml_error(ctx, "This element must not contain character data");
+ while (xml_get_char(ctx) != '<');
+ break;
+ }
+ else
+ while (xml_get_char(ctx) != '<')
+ if (xml_last_char(ctx) == '&')
+ {
+ xml_inc(ctx);
+ xml_parse_ref(ctx);
+ }
+ else
+ bput_utf8_32(out, xml_last_char(ctx));
xml_unget_char(ctx);
}
/*** CDATA sections ***/
+static void
+xml_skip_cdata(struct xml_context *ctx)
+{
+ TRACE(ctx, "skip_cdata");
+ xml_parse_seq(ctx, "CDATA[");
+ while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
+ xml_dec(ctx);
+}
+
static void
xml_append_cdata(struct xml_context *ctx)
{
/* CDSect :== '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
* Already parsed: '<![' */
TRACE(ctx, "append_cdata");
+ if (ctx->flags & XML_NO_CHARS)
+ {
+ xml_error(ctx, "This element must not contain CDATA");
+ xml_skip_cdata(ctx);
+ return;
+ }
xml_parse_seq(ctx, "CDATA[");
struct fastbuf *out = &ctx->chars;
uns rlen;
xml_dec(ctx);
}
-static void UNUSED
-xml_skip_cdata(struct xml_context *ctx)
-{
- TRACE(ctx, "skip_cdata");
- xml_parse_seq(ctx, "CDATA[");
- while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>');
- xml_dec(ctx);
-}
-
/*** Attribute values ***/
char *
xml_error(ctx, "Undefined element <%s>", e->name);
else
{
+ if (e->dtd->type == XML_DTD_ELEM_MIXED)
+ ctx->flags &= ~XML_NO_CHARS;
+ else
+ ctx->flags |= XML_NO_CHARS;
+
// FIXME: validate regular expressions
}
while (1)
WANT_IGNORE_COMMENTS,
WANT_IGNORE_PIS,
WANT_REPORT_BLOCKS,
+ WANT_REPORT_IGNORABLE,
WANT_FILE_ENTITIES,
};
{ "hide-errors", 0, 0, WANT_HIDE_ERRORS },
{ "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS },
{ "ignore-pis", 0, 0, WANT_IGNORE_PIS },
- { "reports-blocks", 0, 0, WANT_REPORT_BLOCKS },
+ { "report-blocks", 0, 0, WANT_REPORT_BLOCKS },
+ { "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE },
{ "file-entities", 0, 0, WANT_FILE_ENTITIES },
{ NULL, 0, 0, 0 }
};
--ignore-comments Ignore comments\n\
--ignore-pis Ignore processing instructions\n\
--report-blocks Report blocks or characters and CDATA sections\n\
+ --report-ignorable Report ignorable whitespace\n\
--file-entities Resolve file external entities (not fully normative)\n\
\n", stderr);
exit(1);
static uns want_ignore_comments;
static uns want_ignore_pis;
static uns want_report_blocks;
+static uns want_report_ignorable;
static uns want_file_entities;
static struct fastbuf *out;
bprintf(out, "SAX: cdata text='%s'\n", text);
}
+static void
+h_ignorable(struct xml_context *ctx UNUSED, char *text, uns len UNUSED)
+{
+ bprintf(out, "SAX: ignorable text='%s'\n", text);
+}
+
static void
h_dtd_start(struct xml_context *ctx UNUSED)
{
case WANT_REPORT_BLOCKS:
want_report_blocks++;
break;
+ case WANT_REPORT_IGNORABLE:
+ want_report_ignorable++;
+ break;
case WANT_FILE_ENTITIES:
want_file_entities++;
break;
ctx.h_block = h_block;
ctx.h_cdata = h_cdata;
}
+ if (want_report_ignorable)
+ ctx.h_ignorable = h_ignorable;
ctx.h_dtd_start = h_dtd_start;
ctx.h_dtd_end = h_dtd_end;
}
/* Other parameters */
XML_VALIDATING = 0x00000100, /* Validate everything (not fully implemented!) */
XML_PARSE_DTD = 0x00000200, /* Enable parsing of DTD */
+ XML_NO_CHARS = 0x00000400, /* The current element must not contain character data (filled automaticaly if using DTD) */
/* Internals, do not change! */
XML_EMPTY_ELEM_TAG = 0x00010000, /* The current element match EmptyElemTag */
void (*h_chars)(struct xml_context *ctx); /* Called after some characters (only with XML_REPORT_CHARS) */
void (*h_block)(struct xml_context *ctx, char *text, uns len); /* Called for each continuous block of characters not reported by h_cdata() (only with XML_REPORT_CHARS) */
void (*h_cdata)(struct xml_context *ctx, char *text, uns len); /* Called for each CDATA section (only with XML_REPORT_CHARS) */
+ void (*h_ignorable)(struct xml_context *ctx, char *text, uns len); /* Called for ignorable whitespace (content in tags without #PCDATA) */
void (*h_dtd_start)(struct xml_context *ctx); /* Called just after the DTD structure is initialized */
void (*h_dtd_end)(struct xml_context *ctx); /* Called after DTD subsets subsets */
struct xml_dtd_entity *(*h_find_entity)(struct xml_context *ctx, char *name); /* Called when needed to resolve a general entity */