2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
10 #ifndef _SHERLOCK_XML_XML_H
11 #define _SHERLOCK_XML_XML_H
13 #include "lib/clists.h"
14 #include "lib/slists.h"
15 #include "lib/mempool.h"
16 #include "lib/fastbuf.h"
24 XML_ERR_WARN = 1000, /* Warning */
25 XML_ERR_ERROR = 2000, /* Recoverable error */
26 XML_ERR_FATAL = 3000, /* Unrecoverable error */
31 XML_STATE_EOF, /* EOF or a fatal error */
32 XML_STATE_START, /* Initial state */
33 XML_STATE_XML_DECL, /* XML_PULL_XML_DECL */
34 XML_STATE_DOCTYPE_DECL, /* XML_PULL_DOCTYPE_DECL */
35 XML_STATE_CHARS, /* XML_PULL_CHARS */
36 XML_STATE_CDATA, /* XML_PULL_CDATA */
37 XML_STATE_STAG, /* XML_PULL_STAG */
38 XML_STATE_ETAG, /* XML_PULL_ETAG */
39 XML_STATE_COMMENT, /* XML_PULL_COMMENT */
40 XML_STATE_PI, /* XML_PULL_PI */
43 XML_STATE_CHARS_BEFORE_STAG,
44 XML_STATE_CHARS_BEFORE_ETAG,
45 XML_STATE_CHARS_BEFORE_CDATA,
46 XML_STATE_CHARS_BEFORE_COMMENT,
47 XML_STATE_CHARS_BEFORE_PI,
48 XML_STATE_PROLOG_COMMENT,
50 XML_STATE_EPILOG_COMMENT,
55 XML_PULL_XML_DECL = 0x00000001, /* Stop after the XML declaration */
56 XML_PULL_DOCTYPE_DECL = 0x00000002, /* Stop in the doctype declaration (before optional internal subset) */
57 XML_PULL_CHARS = 0x00000004,
58 XML_PULL_CDATA = 0x00000008,
59 XML_PULL_STAG = 0x00000010,
60 XML_PULL_ETAG = 0x00000020,
61 XML_PULL_COMMENT = 0x00000040,
62 XML_PULL_PI = 0x00000080,
63 XML_PULL_ALL = 0xffffffff,
67 /* Enable reporting of various events via SAX and/or PUSH interface */
68 XML_REPORT_COMMENTS = 0x00000001, /* Report comments */
69 XML_REPORT_PIS = 0x00000002, /* Report processing instructions */
70 XML_REPORT_CHARS = 0x00000004, /* Report characters */
71 XML_REPORT_TAGS = 0x00000008, /* Report element starts/ends */
72 XML_REPORT_MISC = XML_REPORT_COMMENTS | XML_REPORT_PIS,
73 XML_REPORT_ALL = XML_REPORT_MISC | XML_REPORT_CHARS | XML_REPORT_TAGS,
75 /* Enable construction of DOM for these types */
76 XML_ALLOC_COMMENTS = 0x00000010, /* Create comment nodes */
77 XML_ALLOC_PIS = 0x00000020, /* Create processing instruction nodes */
78 XML_ALLOC_CHARS = 0x00000040, /* Create character nodes */
79 XML_ALLOC_TAGS = 0x00000080, /* Create element nodes */
80 XML_ALLOC_MISC = XML_ALLOC_COMMENTS | XML_ALLOC_PIS,
81 XML_ALLOC_ALL = XML_ALLOC_MISC | XML_ALLOC_CHARS | XML_ALLOC_TAGS,
83 /* Other parameters */
84 XML_UNFOLD_CDATA = 0x00000100, /* Unfold CDATA sections */
85 XML_VALIDATING = 0x00000200, /* Validate everything (not fully implemented!) */
86 XML_PARSE_DTD = 0x00000400, /* Enable parsing of DTD */
88 /* Internals, do not change! */
89 XML_EMPTY_ELEM_TAG = 0x00010000, /* The current element match EmptyElemTag */
90 XML_VERSION_1_1 = 0x00020000, /* XML version is 1.1, otherwise 1.0 */
91 XML_HAS_EXTERNAL_SUBSET = 0x00040000, /* The document contains a reference to external DTD subset */
92 XML_HAS_INTERNAL_SUBSET = 0x00080000, /* The document contains an internal subset */
93 XML_SRC_EOF = 0x00100000, /* EOF reached */
94 XML_SRC_EXPECTED_DECL = 0x00200000, /* Just before optional or required XMLDecl/TextDecl */
95 XML_SRC_NEW_LINE = 0x00400000, /* The last read character is 0xD */
96 XML_SRC_SURROUND = 0x00800000, /* Surround the text with 0x20 (references to parameter entities) */
97 XML_SRC_DOCUMENT = 0x01000000, /* The document entity */
98 XML_SRC_EXTERNAL = 0x02000000, /* An external entity */
108 #define XML_NODE_FOR_EACH(var, node) CLIST_FOR_EACH(struct xml_node *, var, (node)->sons)
109 #define XML_ATTR_FOR_EACH(var, node) SLIST_FOR_EACH(struct xml_attr *, var, (node)->attrs)
112 cnode n; /* Node for list of parent's sons */
113 uns type; /* XML_NODE_x */
114 struct xml_node *parent; /* Parent node */
115 char *name; /* Element name / PI target */
116 clist sons; /* Children nodes */
119 char *text; /* PI text / Comment / CDATA */
120 uns len; /* Text length in bytes */
123 struct xml_dtd_elem *dtd; /* Element DTD */
124 slist attrs; /* Link list of element attributes */
127 void *user; /* User-defined (initialized to NULL) */
131 snode n; /* Node for elem->attrs */
132 struct xml_node *elem; /* Parent element */
133 char *name; /* Attribute name */
134 char *val; /* Attribute value */
135 void *user; /* User-defined (initialized to NULL) */
140 char *err_msg; /* Last error message */
141 enum xml_error err_code; /* Last error code */
142 void *throw_buf; /* Where to jump on error */
143 void (*h_warn)(struct xml_context *ctx); /* Warning callback */
144 void (*h_error)(struct xml_context *ctx); /* Recoverable error callback */
145 void (*h_fatal)(struct xml_context *ctx); /* Unrecoverable error callback */
147 /* Memory management */
148 struct mempool *pool; /* DOM pool */
149 struct mempool *stack; /* Stack pool (free as soon as possible) */
150 struct xml_stack *stack_list; /* See xml_push(), xml_pop() */
151 uns flags; /* XML_FLAG_x (restored on xml_pop()) */
152 uns depth; /* Nesting level */
153 struct fastbuf chars; /* Character data / attribute value */
154 void *tab_attrs; /* Hash table of element attributes */
157 struct xml_source *src; /* Current source */
158 u32 *bptr, *bstop; /* Buffer with preprocessed characters (validated UCS-4 + category flags) */
159 uns cat_chars; /* Unicode range of supported characters (cdata, attribute values, ...) */
160 uns cat_unrestricted; /* Unrestricted characters (may appear in document/external entities) */
161 uns cat_new_line; /* New line characters */
162 uns cat_name; /* Characters that may appear in names */
163 uns cat_sname; /* Characters that may begin a name */
165 /* SAX-like interface */
166 void (*h_document_start)(struct xml_context *ctx); /* Called before entering prolog */
167 void (*h_document_end)(struct xml_context *ctx); /* Called after leaving epilog */
168 void (*h_xml_decl)(struct xml_context *ctx); /* Called after the XML declaration */
169 void (*h_doctype_decl)(struct xml_context *ctx); /* Called in the doctype declaration (before optional internal subset) */
170 void (*h_comment)(struct xml_context *ctx); /* Called after a comment (only with XML_REPORT_COMMENTS) */
171 void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction (only with XML_REPORT_PIS) */
172 void (*h_stag)(struct xml_context *ctx); /* Called after STag or EmptyElemTag (only with XML_REPORT_TAGS) */
173 void (*h_etag)(struct xml_context *ctx); /* Called before ETag or after EmptyElemTag (only with XML_REPORT_TAGS) */
174 void (*h_chars)(struct xml_context *ctx); /* Called after some characters (only with XML_REPORT_CHARS) */
175 void (*h_cdata)(struct xml_context *ctx); /* Called after a CDATA section (only with XML_REPORT_CHARS and XML_UNFOLD_CDATA) */
176 void (*h_dtd_start)(struct xml_context *ctx); /* Called just after the DTD structure is initialized */
177 void (*h_dtd_end)(struct xml_context *ctx); /* Called after DTD subsets subsets */
178 struct xml_dtd_ent *(*h_resolve_entity)(struct xml_context *ctx, char *name);
181 struct xml_node *dom; /* DOM root */
182 struct xml_node *node; /* Current DOM node */
186 char *doctype; /* The document type (or NULL if unknown) */
187 char *system_id; /* DTD external id */
188 char *public_id; /* DTD public id */
189 struct xml_dtd *dtd; /* The DTD structure (or NULL) */
190 uns state; /* Current state for the PULL interface (XML_STATE_x) */
191 uns pull; /* Parameters for the PULL interface (XML_PULL_x) */
193 void (*start_entity)(struct xml_context *ctx);
194 void (*end_entity)(struct xml_context *ctx);
195 void (*notation_decl)(struct xml_context *ctx);
196 void (*unparsed_entity_decl)(struct xml_context *ctx);
199 /* Initialize XML context */
200 void xml_init(struct xml_context *ctx);
202 /* Clean up all internal structures */
203 void xml_cleanup(struct xml_context *ctx);
205 /* Reuse XML context */
206 void xml_reset(struct xml_context *ctx);
208 /* Setup XML source (fastbuf will be automatically closed) */
209 void xml_set_source(struct xml_context *ctx, struct fastbuf *fb);
211 /* Parse without the PUSH interface, return XML_ERR_x code (zero on success) */
212 uns xml_parse(struct xml_context *ctx);
214 /* Parse with the PUSH interface, return XML_STATE_x (zero on EOF or fatal error) */
215 uns xml_next(struct xml_context *ctx);
217 uns xml_row(struct xml_context *ctx);
218 struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name);