X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=sherlock%2Fxml%2Fxml-test.c;h=f6738c56db4164cd3851dfdd7c7351ebeac88812;hb=6c0e6f3b91ed669384f7fe894e4bb4891ffa6440;hp=cca5ad8ac77ce70e54c38eeefd819de74b7cc591;hpb=d998b1961061c93132531d6d9cd2772a0c51ea1a;p=libucw.git diff --git a/sherlock/xml/xml-test.c b/sherlock/xml/xml-test.c index cca5ad8a..f6738c56 100644 --- a/sherlock/xml/xml-test.c +++ b/sherlock/xml/xml-test.c @@ -1,7 +1,7 @@ /* * Sherlock Library -- A simple XML parser * - * (c) 2007 Pavel Charvat + * (c) 2007--2008 Pavel Charvat * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@ -9,33 +9,59 @@ #include "sherlock/sherlock.h" #include "sherlock/xml/xml.h" -#include "lib/getopt.h" -#include "lib/fastbuf.h" +#include "sherlock/xml/dtd.h" +#include "ucw/getopt.h" +#include "ucw/fastbuf.h" #include #include +#include -static char *shortopts = "sp" CF_SHORT_OPTS; +enum { + WANT_FIRST = 0x100, + WANT_HIDE_ERRORS, + WANT_IGNORE_COMMENTS, + WANT_IGNORE_PIS, + WANT_REPORT_BLOCKS, + WANT_REPORT_IGNORABLE, + WANT_FILE_ENTITIES, +}; + +static char *shortopts = "spdt" CF_SHORT_OPTS; static struct option longopts[] = { CF_LONG_OPTS - { "sax", 0, 0, 's' }, - { "pull", 0, 0, 'p' }, - { "dom", 0, 0, 'd' }, - { NULL, 0, 0, 0 } + { "sax", 0, 0, 's' }, + { "pull", 0, 0, 'p' }, + { "dom", 0, 0, 't' }, + { "dtd", 0, 0, 'd' }, + { "hide-errors", 0, 0, WANT_HIDE_ERRORS }, + { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS }, + { "ignore-pis", 0, 0, WANT_IGNORE_PIS }, + { "report-blocks", 0, 0, WANT_REPORT_BLOCKS }, + { "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE }, + { "file-entities", 0, 0, WANT_FILE_ENTITIES }, + { NULL, 0, 0, 0 } }; static void NONRET usage(void) { fputs("\ -Usage: xml-test [options] < in.xml\n\ +Usage: xml-test [options] < input.xml\n\ \n\ Options:\n" CF_USAGE "\ --s, --pull Test PULL interface\n\ --s, --sax Test SAX interface\n\ --d, --dom Test DOM interface\n\ +-p, --pull Test PULL interface\n\ +-s, --sax Test SAX interface\n\ +-t, --dom Test DOM interface\n\ +-d, --dtd Enable parsing of DTD\n\ + --hide-errors Hide warnings and error messages\n\ + --ignore-comments Ignore comments\n\ + --ignore-pis Ignore processing instructions\n\ + --report-blocks Report blocks or characters and CDATA sections\n\ + --report-ignorable Report ignorable whitespace\n\ + --file-entities Resolve file external entities (not fully normative)\n\ \n", stderr); exit(1); } @@ -43,6 +69,14 @@ CF_USAGE static uns want_sax; static uns want_pull; static uns want_dom; +static uns want_parse_dtd; +static uns want_hide_errors; +static uns want_ignore_comments; +static uns want_ignore_pis; +static uns want_report_blocks; +static uns want_report_ignorable; +static uns want_file_entities; + static struct fastbuf *out; static char * @@ -53,7 +87,7 @@ node_type(struct xml_node *node) case XML_NODE_ELEM: return "element"; case XML_NODE_COMMENT: return "comment"; case XML_NODE_PI: return "pi"; - case XML_NODE_CDATA: return "chars"; + case XML_NODE_CHARS: return "chars"; default: return "unknown"; } } @@ -65,7 +99,7 @@ show_node(struct xml_node *node) { case XML_NODE_ELEM: bprintf(out, " <%s>", node->name); - SLIST_FOR_EACH(struct xml_attr *, a, node->attrs) + XML_ATTR_FOR_EACH(a, node) bprintf(out, " %s='%s'", a->name, a->val); bputc(out, '\n'); break; @@ -75,7 +109,7 @@ show_node(struct xml_node *node) case XML_NODE_PI: bprintf(out, " target=%s text='%s'\n", node->name, node->text); break; - case XML_NODE_CDATA: + case XML_NODE_CHARS: bprintf(out, " text='%s'\n", node->text); break; default: @@ -94,7 +128,7 @@ show_tree(struct xml_node *node, uns level) bputs(out, node_type(node)); show_node(node); if (node->type == XML_NODE_ELEM) - CLIST_FOR_EACH(struct xml_node *, son, node->sons) + XML_NODE_FOR_EACH(son, node) show_tree(son, level + 1); } @@ -119,15 +153,15 @@ h_document_end(struct xml_context *ctx UNUSED) static void h_xml_decl(struct xml_context *ctx) { - bprintf(out, "SAX: xml_decl version=%s standalone=%d\n", ctx->version_str, ctx->standalone); + bprintf(out, "SAX: xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding); } static void h_doctype_decl(struct xml_context *ctx) { bprintf(out, "SAX: doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n", - ctx->document_type, ctx->eid.public_id ? : "", ctx->eid.system_id ? : "", - !!(ctx->flags & XML_FLAG_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_FLAG_HAS_INTERNAL_SUBSET)); + ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "", + !!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET)); } static void @@ -140,35 +174,71 @@ h_comment(struct xml_context *ctx) static void h_pi(struct xml_context *ctx) { - bprintf(out, "SAX: pi"); + bputs(out, "SAX: pi"); show_node(ctx->node); } static void -h_element_start(struct xml_context *ctx) +h_stag(struct xml_context *ctx) { - bprintf(out, "SAX: element_start"); + bputs(out, "SAX: stag"); show_node(ctx->node); } static void -h_element_end(struct xml_context *ctx) +h_etag(struct xml_context *ctx) { - bprintf(out, "SAX: element_end \n", ctx->node->name); + bprintf(out, "SAX: etag \n", ctx->node->name); } static void h_chars(struct xml_context *ctx) { - bprintf(out, "SAX: chars"); + bputs(out, "SAX: chars"); show_node(ctx->node); } +static void +h_block(struct xml_context *ctx UNUSED, char *text, uns len UNUSED) +{ + bprintf(out, "SAX: block text='%s'\n", text); +} + +static void +h_cdata(struct xml_context *ctx UNUSED, char *text, uns len UNUSED) +{ + bprintf(out, "SAX: cdata text='%s'\n", text); +} + +static void +h_ignorable(struct xml_context *ctx UNUSED, char *text, uns len UNUSED) +{ + bprintf(out, "SAX: ignorable text='%s'\n", text); +} + +static void +h_dtd_start(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: dtd_start\n"); +} + +static void +h_dtd_end(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: dtd_end\n"); +} + +static void +h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e) +{ + xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096)); +} + int main(int argc, char **argv) { int opt; - cf_def_file = NULL; // FIXME + cf_def_file = NULL; log_init(argv[0]); while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0) switch (opt) @@ -179,9 +249,30 @@ main(int argc, char **argv) case 'p': want_pull++; break; - case 'd': + case 't': want_dom++; break; + case 'd': + want_parse_dtd++; + break; + case WANT_HIDE_ERRORS: + want_hide_errors++; + break; + case WANT_IGNORE_COMMENTS: + want_ignore_comments++; + break; + case WANT_IGNORE_PIS: + want_ignore_pis++; + break; + case WANT_REPORT_BLOCKS: + want_report_blocks++; + break; + case WANT_REPORT_IGNORABLE: + want_report_ignorable++; + break; + case WANT_FILE_ENTITIES: + want_file_entities++; + break; default: usage(); } @@ -191,7 +282,8 @@ main(int argc, char **argv) out = bfdopen_shared(1, 4096); struct xml_context ctx; xml_init(&ctx); - ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error; + if (!want_hide_errors) + ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error; if (want_sax) { ctx.h_document_start = h_document_start; @@ -200,52 +292,72 @@ main(int argc, char **argv) ctx.h_doctype_decl = h_doctype_decl; ctx.h_comment = h_comment; ctx.h_pi = h_pi; - ctx.h_element_start = h_element_start; - ctx.h_element_end = h_element_end; + ctx.h_stag = h_stag; + ctx.h_etag = h_etag; ctx.h_chars = h_chars; + if (want_report_blocks) + { + ctx.h_block = h_block; + ctx.h_cdata = h_cdata; + } + if (want_report_ignorable) + ctx.h_ignorable = h_ignorable; + ctx.h_dtd_start = h_dtd_start; + ctx.h_dtd_end = h_dtd_end; } - if (want_pull) - ctx.want = XML_WANT_CHARS | XML_WANT_STAG | XML_WANT_ETAG | XML_WANT_COMMENT | XML_WANT_PI; if (want_dom) - ctx.flags &= ~XML_DOM_FREE; - xml_set_source(&ctx, bfdopen_shared(0, 4096)); - int state; - bprintf(out, "PULL: start\n"); - while ((state = xml_next(&ctx)) >= 0 && state != XML_STATE_EOF) - switch (state) - { - case XML_STATE_CHARS: - bprintf(out, "PULL: chars"); - show_node(ctx.node); - break; - case XML_STATE_STAG: - bprintf(out, "PULL: element_start"); - show_node(ctx.node); - break; - case XML_STATE_ETAG: - bprintf(out, "PULL: element_end \n", ctx.node->name); - break; - case XML_STATE_COMMENT: - bprintf(out, "PULL: comment"); - show_node(ctx.node); - break; - case XML_STATE_PI: - bprintf(out, "PULL: pi"); - show_node(ctx.node); - break; -#if 0 - case XML_STATE_CDATA: - bprintf(out, "PULL: cdata [%s]\n", ctx.node->text); - break; -#endif - } - if (state != XML_STATE_EOF) - bprintf(out, "PULL: fatal error\n"); + ctx.flags |= XML_ALLOC_ALL; + if (want_parse_dtd) + ctx.flags |= XML_PARSE_DTD; + if (want_ignore_comments) + ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS); + if (want_ignore_pis) + ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS); + if (want_file_entities) + ctx.h_resolve_entity = h_resolve_entity; + xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096)); + bputs(out, "PULL: start\n"); + if (want_pull) + { + ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI; + uns state; + while (state = xml_next(&ctx)) + switch (state) + { + case XML_STATE_CHARS: + bputs(out, "PULL: chars"); + show_node(ctx.node); + break; + case XML_STATE_STAG: + bputs(out, "PULL: stag"); + show_node(ctx.node); + break; + case XML_STATE_ETAG: + bprintf(out, "PULL: etag \n", ctx.node->name); + break; + case XML_STATE_COMMENT: + bputs(out, "PULL: comment"); + show_node(ctx.node); + break; + case XML_STATE_PI: + bputs(out, "PULL: pi"); + show_node(ctx.node); + break; + default: + bputs(out, "PULL: unknown\n"); + break; + } + } else - bprintf(out, "PULL: eof\n"); - - if (want_dom) - show_tree(ctx.root, 0); + xml_parse(&ctx); + if (ctx.err_code) + bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg); + else + { + bputs(out, "PULL: eof\n"); + if (want_dom) + show_tree(ctx.dom, 0); + } xml_cleanup(&ctx); bclose(out);