X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=sherlock%2Fxml%2Fxml.h;h=ac9ebefb70f8a88ab6f1056326dd876eb0e44002;hb=83400e61386e912475562889bfb2c39dc5ecf6d0;hp=8e416dbc8a775966ac2fe83c66a0227c7408aeb7;hpb=ccf64507b45774b007ab6200036827f1597022d8;p=libucw.git diff --git a/sherlock/xml/xml.h b/sherlock/xml/xml.h index 8e416dbc..ac9ebefb 100644 --- a/sherlock/xml/xml.h +++ b/sherlock/xml/xml.h @@ -16,7 +16,6 @@ #include "lib/fastbuf.h" struct xml_context; -struct xml_source; struct xml_dtd_entity; enum xml_error { @@ -89,10 +88,8 @@ enum xml_flags { XML_HAS_INTERNAL_SUBSET = 0x00080000, /* The document contains an internal subset */ XML_SRC_EOF = 0x00100000, /* EOF reached */ XML_SRC_EXPECTED_DECL = 0x00200000, /* Just before optional or required XMLDecl/TextDecl */ - XML_SRC_NEW_LINE = 0x00400000, /* The last read character is 0xD */ - XML_SRC_SURROUND = 0x00800000, /* Surround the text with 0x20 (references to parameter entities) */ - XML_SRC_DOCUMENT = 0x01000000, /* The document entity */ - XML_SRC_EXTERNAL = 0x02000000, /* An external entity */ + XML_SRC_DOCUMENT = 0x00400000, /* The document entity */ + XML_SRC_EXTERNAL = 0x00800000, /* An external entity */ }; enum xml_node_type { @@ -133,6 +130,28 @@ struct xml_attr { void *user; /* User-defined (initialized to NULL) */ }; +#define XML_BUF_SIZE 32 /* At least 8 -- hardcoded */ + +struct xml_source { + struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */ + struct fastbuf *fb; /* Source fastbuf */ + struct fastbuf *wrapped_fb; /* Original wrapped fastbuf (needed for cleanup) */ + struct fastbuf wrap_fb; /* Fbmem wrapper */ + u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */ + u32 *bptr, *bstop; /* Current state of the buffer */ + uns row; /* File position */ + char *expected_encoding; /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */ + char *fb_encoding; /* Encoding of the source fastbuf */ + char *decl_encoding; /* Encoding read from the XMLDecl/TextDecl */ + uns refill_cat1; /* Character categories, which should be directly passed to the buffer */ + uns refill_cat2; /* Character categories, which should be processed as newlines (possibly in some built-in + sequences) */ + void (*refill)(struct xml_context *ctx); /* Callback to decode source characters to the buffer */ + unsigned short *refill_in_to_x; /* Libcharset input table */ + uns saved_depth; /* Saved ctx->depth */ + uns pending_0xd; /* The last read character is 0xD */ +}; + struct xml_context { /* Error handling */ char *err_msg; /* Last error message */ @@ -191,11 +210,6 @@ struct xml_context { struct xml_dtd *dtd; /* The DTD structure (or NULL) */ uns state; /* Current state for the PULL interface (XML_STATE_x) */ uns pull; /* Parameters for the PULL interface (XML_PULL_x) */ - - void (*start_entity)(struct xml_context *ctx); - void (*end_entity)(struct xml_context *ctx); - void (*notation_decl)(struct xml_context *ctx); - void (*unparsed_entity_decl)(struct xml_context *ctx); }; /* Initialize XML context */ @@ -204,7 +218,7 @@ void xml_init(struct xml_context *ctx); /* Clean up all internal structures */ void xml_cleanup(struct xml_context *ctx); -/* Reuse XML context */ +/* Reuse XML context, equivalent to xml_cleanup() and xml_init() */ void xml_reset(struct xml_context *ctx); /* Add XML source (fastbuf will be automatically closed) */ @@ -231,4 +245,9 @@ void xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent) /* Remove leading/trailing spaces and replaces sequences of spaces to a single space character (non-CDATA attribute normalization) */ uns xml_normalize_white(struct xml_context *ctx, char *value); +/* Public part of error handling */ +void xml_warn(struct xml_context *ctx, const char *format, ...); +void xml_error(struct xml_context *ctx, const char *format, ...); +void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...); + #endif