2 * UCW Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
13 #include "lib/clists.h"
14 #include "lib/slists.h"
18 XML_ERR_WARN = 1000, /* Warning */
19 XML_ERR_ERROR = 2000, /* Recoverable error */
20 XML_ERR_FATAL = 3000, /* Unrecoverable error */
27 XML_STATE_DOCUMENT_TYPE,
39 XML_STATE_CHARS_BEFORE_STAG,
40 XML_STATE_CHARS_BEFORE_ETAG,
41 XML_STATE_CHARS_BEFORE_CDATA,
42 XML_STATE_CHARS_BEFORE_PI,
43 XML_STATE_CHARS_BEFORE_COMMENT,
45 XML_STATE_PROLOG_COMMENT,
47 XML_STATE_EPILOG_COMMENT,
51 XML_WANT_DECL = 1 << XML_STATE_DECL,
52 XML_WANT_DOCUMENT_TYPE = 1 << XML_STATE_DOCUMENT_TYPE,
53 XML_WANT_CHARS = 1 << XML_STATE_CHARS,
54 XML_WANT_WHITE = 1 << XML_STATE_WHITE,
55 XML_WANT_CDATA = 1 << XML_STATE_CDATA,
56 XML_WANT_STAG = 1 << XML_STATE_STAG,
57 XML_WANT_ETAG = 1 << XML_STATE_ETAG,
58 XML_WANT_COMMENT = 1 << XML_STATE_COMMENT,
59 XML_WANT_PI = 1 << XML_STATE_PI,
60 XML_WANT_EOF = 1 << XML_STATE_EOF,
65 XML_FLAG_VALIDATING = 0x1,
66 XML_FLAG_VERSION_1_1 = 0x2,
81 #define XML_BUF_SIZE 32
84 struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */
86 u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */
87 u32 *bptr, *bstop; /* Current state of the buffer */
92 enum xml_source_flags {
93 XML_SRC_DECL = 0x1, /* Expected document/text declaration */
94 XML_SRC_EOF = 0x2, /* Reached the end of the fastbuf */
95 XML_SRC_NEW_LINE = 0x4, /* The last read character is 0xD */
96 XML_SRC_SURROUND = 0x8, /* Surround the text with 0x20 (references to parameter entities) */
97 XML_SRC_DOCUMENT = 0x10, /* The document entity */
98 XML_SRC_EXTERNAL = 0x20, /* An external entity */
103 cnode n; /* Node for list of parent's sons */
104 uns type; /* XML_NODE_x */
105 struct xml_node *parent; /* Parent node */
109 struct xml_node node;
110 char *name; /* Element name */
111 clist sons; /* List of subnodes */
112 struct xml_dtd_elem *dtd; /* Element DTD */
113 slist attrs; /* Link list of attributes */
119 char *err_msg; /* Last error message */
120 enum xml_error err_code; /* Last error code */
121 void *throw_buf; /* Where to jump on error */
122 void (*h_warn)(struct xml_context *ctx); /* Warning callback */
123 void (*h_error)(struct xml_context *ctx); /* Recoverable error callback */
124 void (*h_fatal)(struct xml_context *ctx); /* Unrecoverable error callback */
126 /* Memory management */
127 struct mempool *pool; /* Most data */
128 struct fastbuf *chars; /* Character data */
129 struct fastbuf *value; /* Attribute value / comment / processing instruction data */
130 char *name; /* Attribute name, processing instruction target */
133 struct xml_source *sources; /* Stack of pending sources */
134 u32 *bptr, *bstop; /* Character buffer */
135 uns depth; /* Nesting level */
137 /* SAX-like interface */
138 void (*h_document_start)(struct xml_context *ctx); /* Called before entering prolog */
139 void (*h_document_end)(struct xml_context *ctx); /* Called after leaving epilog */
140 void (*h_xml_decl)(struct xml_context *ctx); /* Called after the XML declaration */
141 void (*h_doctype_decl)(struct xml_context *ctx); /* Called in the doctype declaration just before internal subset */
142 void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction */
143 void (*h_comment)(struct xml_context *ctx); /* Called after a comment */
146 struct xml_node *node; /* Current XML node */
147 uns flags; /* XML_FLAG_x */
148 struct xml_element *element; /* Current element */
149 void *attribute_table;
155 struct xml_ext_id eid;
159 void (*start_dtd)(struct xml_context *ctx);
160 void (*end_dtd)(struct xml_context *ctx);
161 void (*start_element)(struct xml_context *ctx);
162 void (*end_element)(struct xml_context *ctx);
163 void (*start_cdata)(struct xml_context *ctx);
164 void (*end_cdata)(struct xml_context *ctx);
165 void (*start_entity)(struct xml_context *ctx);
166 void (*end_entity)(struct xml_context *ctx);
167 void (*chacacters)(struct xml_context *ctx);
168 struct fastbuf *(*resolve_entity)(struct xml_context *ctx);
169 void (*notation_decl)(struct xml_context *ctx);
170 void (*unparsed_entity_decl)(struct xml_context *ctx);
173 struct xml_attribute {
176 struct xml_element *element;
177 struct xml_attribute *next;
178 struct xml_dtd_attribute *dtd;
183 struct xml_attribute *attrs;
184 struct xml_element *parent;
185 struct xml_dtd_element *dtd;
188 /*** Document Type Definition (DTD) ***/
191 slist gents; /* Link list of general entities */
192 slist pents; /* Link list of parapeter entities */
193 slist notns; /* Link list of notations */
194 slist elems; /* Link list of elements */
195 void *tab_gents; /* Hash table of general entities */
196 void *tab_pents; /* Hash table of parameter entities */
197 void *tab_notns; /* Hash table of notations */
198 void *tab_elems; /* Hash table of elements */
199 void *tab_attrs; /* Hash table of element attributes */
200 void *tab_evals; /* Hash table of enumerated attribute values */
201 void *tab_enotns; /* hash table of enumerated attribute notations */
206 enum xml_dtd_notn_flags {
207 XML_DTD_NOTN_DECLARED = 0x1, /* The notation has been declared (interbal usage) */
210 struct xml_dtd_notn {
211 snode n; /* Node in xml_dtd.notns */
212 uns flags; /* XML_DTD_NOTN_x */
213 char *name; /* Notation name */
214 struct xml_ext_id eid; /* External id */
219 enum xml_dtd_ent_flags {
220 XML_DTD_ENT_DECLARED = 0x1, /* The entity has been declared (internal usage) */
221 XML_DTD_ENT_VISITED = 0x2, /* Cycle detection (internal usage) */
222 XML_DTD_ENT_PARAMETER = 0x4, /* Parameter entity, general otherwise */
223 XML_DTD_ENT_EXTERNAL = 0x8, /* External entity, internal otherwise */
224 XML_DTD_ENT_UNPARSED = 0x10, /* Unparsed entity, parsed otherwise */
225 XML_DTD_ENT_TRIVIAL = 0x20, /* Replacement text is a sequence of characters and character references */
229 snode n; /* Node in xml_dtd.[gp]ents */
230 uns flags; /* XML_DTD_ENT_x */
231 char *name; /* Entity name */
232 char *text; /* Replacement text / expanded replacement text (XML_DTD_ENT_TRVIAL) */
233 uns len; /* Text length */
234 struct xml_ext_id eid; /* External ID */
235 struct xml_dtd_notn *notn; /* Notation (XML_DTD_ENT_UNPARSED only) */
240 enum xml_dtd_elem_flags {
241 XML_DTD_ELEM_DECLARED = 0x1, /* The element has been declared (internal usage) */
244 struct xml_dtd_elem {
248 struct xml_dtd_elem_node *node;
251 struct xml_dtd_elem_node {
253 struct xml_dtd_elem_node *parent;
259 enum xml_dtd_elem_node_type {
265 enum xml_dtd_elem_node_occur {
266 XML_DTD_ELEM_OCCUR_ONCE,
267 XML_DTD_ELEM_OCCUR_OPT,
268 XML_DTD_ELEM_OCCUR_MULT,
269 XML_DTD_ELEM_OCCUR_PLUS,
275 enum xml_dtd_attribute_default {
282 enum xml_dtd_attribute_type {
295 struct xml_dtd_attr {
297 struct xml_dtd_elem *elem;
298 enum xml_dtd_attribute_type type;
299 enum xml_dtd_attribute_default default_mode;
303 struct xml_dtd_eval {
304 struct xml_dtd_attr *attr;
308 struct xml_dtd_enotn {
309 struct xml_dtd_attr *attr;
310 struct xml_dtd_notn *notn;
313 void xml_init(struct xml_context *ctx);
314 void xml_cleanup(struct xml_context *ctx);
315 void xml_set_source(struct xml_context *ctx, struct fastbuf *fb);
316 int xml_next(struct xml_context *ctx);