2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
10 #ifndef _SHERLOCK_XML_XML_H
11 #define _SHERLOCK_XML_XML_H
13 #include "lib/clists.h"
14 #include "lib/slists.h"
15 #include "lib/mempool.h"
16 #include "lib/fastbuf.h"
20 XML_ERR_WARN = 1000, /* Warning */
21 XML_ERR_ERROR = 2000, /* Recoverable error */
22 XML_ERR_FATAL = 3000, /* Unrecoverable error */
29 XML_STATE_DOCUMENT_TYPE,
41 XML_STATE_CHARS_BEFORE_STAG,
42 XML_STATE_CHARS_BEFORE_ETAG,
43 XML_STATE_CHARS_BEFORE_CDATA,
44 XML_STATE_CHARS_BEFORE_PI,
45 XML_STATE_CHARS_BEFORE_COMMENT,
47 XML_STATE_PROLOG_COMMENT,
49 XML_STATE_EPILOG_COMMENT,
53 XML_WANT_DECL = 1 << XML_STATE_DECL,
54 XML_WANT_DOCUMENT_TYPE = 1 << XML_STATE_DOCUMENT_TYPE,
55 XML_WANT_CHARS = 1 << XML_STATE_CHARS,
56 XML_WANT_WHITE = 1 << XML_STATE_WHITE,
57 XML_WANT_CDATA = 1 << XML_STATE_CDATA,
58 XML_WANT_STAG = 1 << XML_STATE_STAG,
59 XML_WANT_ETAG = 1 << XML_STATE_ETAG,
60 XML_WANT_COMMENT = 1 << XML_STATE_COMMENT,
61 XML_WANT_PI = 1 << XML_STATE_PI,
62 XML_WANT_EOF = 1 << XML_STATE_EOF,
67 XML_FLAG_VALIDATING = 0x1,
68 XML_FLAG_VERSION_1_1 = 0x2, /* XML version 1.1, otherwise 1.0 */
69 XML_FLAG_HAS_EXTERNAL_SUBSET = 0x4, /* The document contains a reference to external DTD subset */
70 XML_FLAG_HAS_INTERNAL_SUBSET = 0x8, /* The document contains an internal subset */
72 XML_FLAG_SRC_EOF = 0x10, /* EOF reached */
73 XML_FLAG_SRC_EXPECTED_DECL = 0x20, /* Just before optional or required XMLDecl/TextDecl */
74 XML_FLAG_SRC_NEW_LINE = 0x40, /* The last read character is 0xD */
75 XML_FLAG_SRC_SURROUND = 0x80, /* Surround the text with 0x20 (references to parameter entities) */
76 XML_FLAG_SRC_DOCUMENT = 0x100, /* The document entity */
77 XML_FLAG_SRC_EXTERNAL = 0x200, /* An external entity */
79 XML_DOM_SKIP = 0x1000, /* Do not report DOM nodes */
80 XML_DOM_FREE = 0x2000, /* Free the subtree when leaving */
81 XML_DOM_IGNORE = XML_DOM_SKIP | XML_DOM_FREE, /* Completely ignore the subtree */
83 XML_FLAG_EMPTY_ELEM = 0x100000,
99 cnode n; /* Node for list of parent's sons */
100 uns type; /* XML_NODE_x */
101 struct xml_node *parent; /* Parent node */
102 char *name; /* Element name / PI target */
103 clist sons; /* Children nodes */
106 char *text; /* PI text / Comment / CDATA */
107 uns len; /* Text length in bytes */
110 struct xml_dtd_elem *dtd; /* Element DTD */
111 slist attrs; /* Link list of element attributes */
118 struct xml_node *elem;
125 #define XML_BUF_SIZE 32 /* At least 16 -- hardcoded */
128 struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */
129 struct fastbuf *fb; /* Source fastbuf */
130 struct fastbuf wrap_fb; /* Libcharset or fbmem wrapper */
131 u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */
132 u32 *bptr, *bstop; /* Current state of the buffer */
133 uns row; /* File position */
134 char *expected_encoding; /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */
135 char *fb_encoding; /* Encoding of the source fastbuf */
136 char *decl_encoding; /* Encoding read from the XMLDecl/TextDecl */
137 uns refill_cat1; /* Character categories, which should be directly passed to the buffer */
138 uns refill_cat2; /* Character categories, which should be processed as newlines (possibly in some built-in sequences) */
139 void (*refill)(struct xml_context *ctx); /* Callback to decode source characters to the buffer */
140 unsigned short *refill_in_to_x; /* Libcharset input table */
141 uns saved_depth; /* Saved ctx->depth */
146 char *err_msg; /* Last error message */
147 enum xml_error err_code; /* Last error code */
148 void *throw_buf; /* Where to jump on error */
149 void (*h_warn)(struct xml_context *ctx); /* Warning callback */
150 void (*h_error)(struct xml_context *ctx); /* Recoverable error callback */
151 void (*h_fatal)(struct xml_context *ctx); /* Unrecoverable error callback */
153 /* Memory management */
154 struct mempool *pool; /* DOM pool */
155 struct mempool *stack; /* Stack pool (freed as soon as possible) */
156 struct xml_stack *stack_list; /* See xml_push(), xml_pop() */
157 uns flags; /* XML_FLAG_x (restored on xml_pop()) */
158 uns depth; /* Nesting level */
159 struct fastbuf chars; /* Character data / attribute value */
163 struct xml_source *src; /* Current source */
164 u32 *bptr, *bstop; /* Character buffer */
166 /* SAX-like interface */
167 void (*h_document_start)(struct xml_context *ctx); /* Called before entering prolog */
168 void (*h_document_end)(struct xml_context *ctx); /* Called after leaving epilog */
169 void (*h_xml_decl)(struct xml_context *ctx); /* Called after the XML declaration */
170 void (*h_doctype_decl)(struct xml_context *ctx); /* Called in the doctype declaration just before internal subset */
171 void (*h_comment)(struct xml_context *ctx); /* Called after a comment */
172 void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction */
173 void (*h_element_start)(struct xml_context *ctx); /* Called after STag or EmptyElemTag */
174 void (*h_element_end)(struct xml_context *ctx); /* Called before ETag or after EmptyElemTag */
175 void (*h_chars)(struct xml_context *ctx); /* Called after some characters */
176 void (*h_cdata)(struct xml_context *ctx); /* Called after a CDATA section */
179 struct xml_node *root; /* DOM root */
180 struct xml_node *node; /* Current DOM node */
186 struct xml_ext_id eid;
190 void (*start_dtd)(struct xml_context *ctx);
191 void (*end_dtd)(struct xml_context *ctx);
192 void (*start_entity)(struct xml_context *ctx);
193 void (*end_entity)(struct xml_context *ctx);
194 struct fastbuf *(*resolve_entity)(struct xml_context *ctx);
195 void (*notation_decl)(struct xml_context *ctx);
196 void (*unparsed_entity_decl)(struct xml_context *ctx);
199 void xml_init(struct xml_context *ctx);
200 void xml_cleanup(struct xml_context *ctx);
201 void xml_set_source(struct xml_context *ctx, struct fastbuf *fb);
202 int xml_next(struct xml_context *ctx);
203 uns xml_row(struct xml_context *ctx);