2 * UCW Library -- A simple XML parser
4 * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include <ucw-xml/xml.h>
12 #include <ucw-xml/dtd.h>
13 #include <ucw/getopt.h>
14 #include <ucw/fastbuf.h>
27 WANT_REPORT_IGNORABLE,
32 static char *shortopts = "spdtn" CF_SHORT_OPTS;
33 static struct option longopts[] = {
36 { "pull", 0, 0, 'p' },
39 { "namespaces", 0, 0, 'n' },
40 { "hide-errors", 0, 0, WANT_HIDE_ERRORS },
41 { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS },
42 { "ignore-pis", 0, 0, WANT_IGNORE_PIS },
43 { "report-blocks", 0, 0, WANT_REPORT_BLOCKS },
44 { "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE },
45 { "file-entities", 0, 0, WANT_FILE_ENTITIES },
46 { "qnames", 0, 0, WANT_QNAMES },
54 Usage: xml-test [options] < input.xml\n\
59 -p, --pull Test PULL interface\n\
60 -s, --sax Test SAX interface\n\
61 -t, --dom Test DOM interface\n\
62 -d, --dtd Enable parsing of DTD\n\
63 -n, --namespaces Resolve namespaces\n\
64 --hide-errors Hide warnings and error messages\n\
65 --ignore-comments Ignore comments\n\
66 --ignore-pis Ignore processing instructions\n\
67 --report-blocks Report blocks or characters and CDATA sections\n\
68 --report-ignorable Report ignorable whitespace\n\
69 --file-entities Resolve file external entities (not fully normative)\n\
70 --qnames Display qualified names including namespace prefixes\n\
76 static uint want_pull;
79 static uint want_parse_dtd;
80 static uint want_hide_errors;
81 static uint want_ignore_comments;
82 static uint want_ignore_pis;
83 static uint want_report_blocks;
84 static uint want_report_ignorable;
85 static uint want_file_entities;
86 static uint want_qnames;
88 static struct fastbuf *out;
91 node_type(struct xml_node *node)
95 case XML_NODE_ELEM: return "element";
96 case XML_NODE_COMMENT: return "comment";
97 case XML_NODE_PI: return "pi";
98 case XML_NODE_CHARS: return "chars";
99 default: return "unknown";
104 show_node(struct xml_context *ctx, struct xml_node *node)
110 bprintf(out, " (ns%u)<%s>", node->ns, (want_qnames ? xml_node_qname(ctx, node) : node->name));
112 bprintf(out, " <%s>", node->name);
113 XML_ATTR_FOR_EACH(a, node)
115 bprintf(out, " (ns%u)%s='%s'", a->ns, (want_qnames ? xml_attr_qname(ctx, a) : a->name), a->val);
117 bprintf(out, " %s='%s'", a->name, a->val);
120 case XML_NODE_COMMENT:
121 bprintf(out, " text='%s'\n", node->text);
124 bprintf(out, " target=%s text='%s'\n", node->name, node->text);
127 bprintf(out, " text='%s'\n", node->text);
135 show_tree(struct xml_context *ctx, struct xml_node *node, uint level)
140 for (uint i = 0; i < level; i++)
142 bputs(out, node_type(node));
143 show_node(ctx, node);
144 if (node->type == XML_NODE_ELEM)
145 XML_NODE_FOR_EACH(son, node)
146 show_tree(ctx, son, level + 1);
150 h_error(struct xml_context *ctx)
152 bprintf(out, "SAX: %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg);
156 h_document_start(struct xml_context *ctx UNUSED)
158 bputs(out, "SAX: document_start\n");
162 h_document_end(struct xml_context *ctx UNUSED)
164 bputs(out, "SAX: document_end\n");
168 h_xml_decl(struct xml_context *ctx)
170 bprintf(out, "SAX: xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding);
174 h_doctype_decl(struct xml_context *ctx)
176 bprintf(out, "SAX: doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n",
177 ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "",
178 !!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET));
182 h_comment(struct xml_context *ctx)
184 bputs(out, "SAX: comment");
185 show_node(ctx, ctx->node);
189 h_pi(struct xml_context *ctx)
191 bputs(out, "SAX: pi");
192 show_node(ctx, ctx->node);
196 h_stag(struct xml_context *ctx)
198 bputs(out, "SAX: stag");
199 show_node(ctx, ctx->node);
203 h_etag(struct xml_context *ctx)
205 bprintf(out, "SAX: etag </%s>\n", ctx->node->name);
209 h_chars(struct xml_context *ctx)
211 bputs(out, "SAX: chars");
212 show_node(ctx, ctx->node);
216 h_block(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
218 bprintf(out, "SAX: block text='%s'\n", text);
222 h_cdata(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
224 bprintf(out, "SAX: cdata text='%s'\n", text);
228 h_ignorable(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
230 bprintf(out, "SAX: ignorable text='%s'\n", text);
234 h_dtd_start(struct xml_context *ctx UNUSED)
236 bputs(out, "SAX: dtd_start\n");
240 h_dtd_end(struct xml_context *ctx UNUSED)
242 bputs(out, "SAX: dtd_end\n");
246 h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e)
248 xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096));
252 main(int argc, char **argv)
257 while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
275 case WANT_HIDE_ERRORS:
278 case WANT_IGNORE_COMMENTS:
279 want_ignore_comments++;
281 case WANT_IGNORE_PIS:
284 case WANT_REPORT_BLOCKS:
285 want_report_blocks++;
287 case WANT_REPORT_IGNORABLE:
288 want_report_ignorable++;
290 case WANT_FILE_ENTITIES:
291 want_file_entities++;
302 out = bfdopen_shared(1, 4096);
303 struct xml_context ctx;
305 if (!want_hide_errors)
306 ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error;
309 ctx.h_document_start = h_document_start;
310 ctx.h_document_end = h_document_end;
311 ctx.h_xml_decl = h_xml_decl;
312 ctx.h_doctype_decl = h_doctype_decl;
313 ctx.h_comment = h_comment;
317 ctx.h_chars = h_chars;
318 if (want_report_blocks)
320 ctx.h_block = h_block;
321 ctx.h_cdata = h_cdata;
323 if (want_report_ignorable)
324 ctx.h_ignorable = h_ignorable;
325 ctx.h_dtd_start = h_dtd_start;
326 ctx.h_dtd_end = h_dtd_end;
329 ctx.flags |= XML_ALLOC_ALL;
331 ctx.flags |= XML_PARSE_DTD;
332 if (want_ignore_comments)
333 ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS);
335 ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS);
336 if (want_file_entities)
337 ctx.h_resolve_entity = h_resolve_entity;
340 xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096));
341 bputs(out, "PULL: start\n");
344 ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI;
346 while (state = xml_next(&ctx))
349 case XML_STATE_CHARS:
350 bputs(out, "PULL: chars");
351 show_node(&ctx, ctx.node);
354 bputs(out, "PULL: stag");
355 show_node(&ctx, ctx.node);
358 bprintf(out, "PULL: etag </%s>\n", ctx.node->name);
360 case XML_STATE_COMMENT:
361 bputs(out, "PULL: comment");
362 show_node(&ctx, ctx.node);
365 bputs(out, "PULL: pi");
366 show_node(&ctx, ctx.node);
369 bputs(out, "PULL: unknown\n");
376 bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg);
379 bputs(out, "PULL: eof\n");
381 show_tree(&ctx, ctx.dom, 0);
386 bputs(out, "Known namespaces:\n");
387 for (uns i=0; i < GARY_SIZE(ctx.ns_by_id); i++)
388 bprintf(out, "%u\t%s\n", i, ctx.ns_by_id[i]);