X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=sherlock%2Fxml%2Fxml.h;h=e94588836bf733470c2d008725d08b06fa140252;hb=d5fdccbecd2acde9a6e067b54fcd69b02f31a820;hp=8e416dbc8a775966ac2fe83c66a0227c7408aeb7;hpb=ccf64507b45774b007ab6200036827f1597022d8;p=libucw.git diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h index 8e416dbc..e9458883 100644 --- a/sherlock/xml/xml.h +++ b/sherlock/xml/xml.h @@ -10,13 +10,12 @@ #ifndef _SHERLOCK_XML_XML_H #define _SHERLOCK_XML_XML_H -#include "lib/clists.h" -#include "lib/slists.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" +#include "ucw/clists.h" +#include "ucw/slists.h" +#include "ucw/mempool.h" +#include "ucw/fastbuf.h" struct xml_context; -struct xml_source; struct xml_dtd_entity; enum xml_error { @@ -81,18 +80,19 @@ enum xml_flags { /* Other parameters */ XML_VALIDATING = 0x00000100, /* Validate everything (not fully implemented!) */ XML_PARSE_DTD = 0x00000200, /* Enable parsing of DTD */ + XML_NO_CHARS = 0x00000400, /* The current element must not contain character data (filled automaticaly if using DTD) */ + XML_ALLOC_DEFAULT_ATTRS = 0x00000800, /* Allocate default attribute values so they can be found by XML_ATTR_FOR_EACH */ /* Internals, do not change! */ XML_EMPTY_ELEM_TAG = 0x00010000, /* The current element match EmptyElemTag */ XML_VERSION_1_1 = 0x00020000, /* XML version is 1.1, otherwise 1.0 */ XML_HAS_EXTERNAL_SUBSET = 0x00040000, /* The document contains a reference to external DTD subset */ XML_HAS_INTERNAL_SUBSET = 0x00080000, /* The document contains an internal subset */ + XML_HAS_DTD = XML_HAS_EXTERNAL_SUBSET | XML_HAS_INTERNAL_SUBSET, XML_SRC_EOF = 0x00100000, /* EOF reached */ XML_SRC_EXPECTED_DECL = 0x00200000, /* Just before optional or required XMLDecl/TextDecl */ - XML_SRC_NEW_LINE = 0x00400000, /* The last read character is 0xD */ - XML_SRC_SURROUND = 0x00800000, /* Surround the text with 0x20 (references to parameter entities) */ - XML_SRC_DOCUMENT = 0x01000000, /* The document entity */ - XML_SRC_EXTERNAL = 0x02000000, /* An external entity */ + XML_SRC_DOCUMENT = 0x00400000, /* The document entity */ + XML_SRC_EXTERNAL = 0x00800000, /* An external entity */ }; enum xml_node_type { @@ -133,6 +133,28 @@ struct xml_attr { void *user; /* User-defined (initialized to NULL) */ }; +#define XML_BUF_SIZE 32 /* At least 8 -- hardcoded */ + +struct xml_source { + struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */ + struct fastbuf *fb; /* Source fastbuf */ + struct fastbuf *wrapped_fb; /* Original wrapped fastbuf (needed for cleanup) */ + struct fastbuf wrap_fb; /* Fbmem wrapper */ + u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */ + u32 *bptr, *bstop; /* Current state of the buffer */ + uns row; /* File position */ + char *expected_encoding; /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */ + char *fb_encoding; /* Encoding of the source fastbuf */ + char *decl_encoding; /* Encoding read from the XMLDecl/TextDecl */ + uns refill_cat1; /* Character categories, which should be directly passed to the buffer */ + uns refill_cat2; /* Character categories, which should be processed as newlines (possibly in some built-in + sequences) */ + void (*refill)(struct xml_context *ctx); /* Callback to decode source characters to the buffer */ + unsigned short *refill_in_to_x; /* Libcharset input table */ + uns saved_depth; /* Saved ctx->depth */ + uns pending_0xd; /* The last read character is 0xD */ +}; + struct xml_context { /* Error handling */ char *err_msg; /* Last error message */ @@ -174,6 +196,7 @@ struct xml_context { void (*h_chars)(struct xml_context *ctx); /* Called after some characters (only with XML_REPORT_CHARS) */ void (*h_block)(struct xml_context *ctx, char *text, uns len); /* Called for each continuous block of characters not reported by h_cdata() (only with XML_REPORT_CHARS) */ void (*h_cdata)(struct xml_context *ctx, char *text, uns len); /* Called for each CDATA section (only with XML_REPORT_CHARS) */ + void (*h_ignorable)(struct xml_context *ctx, char *text, uns len); /* Called for ignorable whitespace (content in tags without #PCDATA) */ void (*h_dtd_start)(struct xml_context *ctx); /* Called just after the DTD structure is initialized */ void (*h_dtd_end)(struct xml_context *ctx); /* Called after DTD subsets subsets */ struct xml_dtd_entity *(*h_find_entity)(struct xml_context *ctx, char *name); /* Called when needed to resolve a general entity */ @@ -191,11 +214,6 @@ struct xml_context { struct xml_dtd *dtd; /* The DTD structure (or NULL) */ uns state; /* Current state for the PULL interface (XML_STATE_x) */ uns pull; /* Parameters for the PULL interface (XML_PULL_x) */ - - void (*start_entity)(struct xml_context *ctx); - void (*end_entity)(struct xml_context *ctx); - void (*notation_decl)(struct xml_context *ctx); - void (*unparsed_entity_decl)(struct xml_context *ctx); }; /* Initialize XML context */ @@ -204,7 +222,7 @@ void xml_init(struct xml_context *ctx); /* Clean up all internal structures */ void xml_cleanup(struct xml_context *ctx); -/* Reuse XML context */ +/* Reuse XML context, equivalent to xml_cleanup() and xml_init() */ void xml_reset(struct xml_context *ctx); /* Add XML source (fastbuf will be automatically closed) */ @@ -216,12 +234,21 @@ uns xml_parse(struct xml_context *ctx); /* Parse with the PUSH interface, return XML_STATE_x (zero on EOF or fatal error) */ uns xml_next(struct xml_context *ctx); +/* Equivalent to xml_next, but with temporarily changed ctx->pull value */ +uns xml_next_state(struct xml_context *ctx, uns pull); + +/* May be called on XML_STATE_STAG to skip it's content; can return XML_STATE_ETAG or XML_STATE_EOF on fatal error */ +uns xml_skip_element(struct xml_context *ctx); + /* Returns the current row number in the document entity */ uns xml_row(struct xml_context *ctx); /* Finds a given attribute value in a XML_NODE_ELEM node */ struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name); +/* Similar to xml_attr_find, but it deals also with default values */ +char *xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name); + /* The default value of h_find_entity(), knows <, >, &, ' and " */ struct xml_dtd_entity *xml_def_find_entity(struct xml_context *ctx, char *name); @@ -231,4 +258,15 @@ void xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent) /* Remove leading/trailing spaces and replaces sequences of spaces to a single space character (non-CDATA attribute normalization) */ uns xml_normalize_white(struct xml_context *ctx, char *value); +/* Merge character contents of a given element to a single string (not recursive) */ +char *xml_merge_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool); + +/* Merge character contents of a given subtree to a single string */ +char *xml_merge_dom_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool); + +/* Public part of error handling */ +void xml_warn(struct xml_context *ctx, const char *format, ...); +void xml_error(struct xml_context *ctx, const char *format, ...); +void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...); + #endif