2 * Sherlock Library -- A simple XML parser
4 * (c) 2007 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
10 #ifndef _SHERLOCK_XML_COMMON_H
11 #define _SHERLOCK_XML_COMMON_H
13 #include "sherlock/xml/xml.h"
14 #include "sherlock/xml/dtd.h"
19 #define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0)
21 #define TRACE(c, f, p...) do {} while(0)
24 /*** Error handling ***/
26 void NONRET xml_throw(struct xml_context *ctx);
27 void xml_warn(struct xml_context *ctx, const char *format, ...);
28 void xml_error(struct xml_context *ctx, const char *format, ...);
29 void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...);
31 /*** Memory management ***/
34 struct xml_stack *next;
35 struct mempool_state state;
40 xml_do_push(struct xml_context *ctx, uns size)
42 /* Saves ctx->stack and ctx->flags state */
43 struct mempool_state state;
44 mp_save(ctx->stack, &state);
45 struct xml_stack *s = mp_alloc(ctx->stack, size);
47 s->flags = ctx->flags;
48 s->next = ctx->stack_list;
54 xml_do_pop(struct xml_context *ctx, struct xml_stack *s)
56 /* Restore ctx->stack and ctx->flags state */
57 ctx->stack_list = s->next;
58 ctx->flags = s->flags;
59 mp_restore(ctx->stack, &s->state);
63 xml_push(struct xml_context *ctx)
66 xml_do_push(ctx, sizeof(struct xml_stack));
70 xml_pop(struct xml_context *ctx)
73 ASSERT(ctx->stack_list);
74 xml_do_pop(ctx, ctx->stack_list);
77 struct xml_dom_stack {
78 struct xml_stack stack;
79 struct mempool_state state;
82 static inline struct xml_node *
83 xml_push_dom(struct xml_context *ctx)
85 /* Create a new DOM node */
86 TRACE(ctx, "push_dom");
87 struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s));
88 mp_save(ctx->pool, &s->state);
89 struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n));
90 if (n->parent = ctx->node)
91 clist_add_tail(&n->parent->sons, &n->n);
96 xml_pop_dom(struct xml_context *ctx, uns free)
98 /* Leave DOM subtree */
99 TRACE(ctx, "pop_dom");
101 struct xml_node *p = ctx->node->parent;
102 struct xml_dom_stack *s = (void *)ctx->stack_list;
105 /* See xml_pop_element() for cleanup of attribute hash table */
107 clist_remove(&ctx->node->n);
108 mp_restore(ctx->pool, &s->state);
111 xml_do_pop(ctx, &s->stack);
114 #define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN)
115 #define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \
116 static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uns size) \
117 { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \
118 static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {}
120 void *xml_hash_new(struct mempool *pool, uns size);
123 xml_start_chars(struct xml_context *ctx)
125 struct fastbuf *fb = &ctx->chars;
126 fb->bstop = fb->bptr = fb->buffer = mp_start_noalign(ctx->pool, 1);
127 fb->bufend = fb->buffer + mp_avail(ctx->pool);
131 xml_end_chars(struct xml_context *ctx, uns *len)
133 struct fastbuf *fb = &ctx->chars;
134 uns l = fb->bufend - fb->buffer;
135 if (fb->bptr == fb->bufend)
136 fb->bptr = mp_expand(ctx->pool) + l;
138 char *c = mp_end(ctx->pool, fb->bptr + 1);
139 fb->bptr = fb->bstop = fb->buffer = fb->bufend = NULL;
144 /*** Reading of document/external entities ***/
146 #define XML_BUF_SIZE 32 /* At least 8 -- hardcoded */
149 struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */
150 struct fastbuf *fb; /* Source fastbuf */
151 struct fastbuf *wrapped_fb; /* Original wrapped fastbuf (needed for cleanup) */
152 struct fastbuf wrap_fb; /* Fbmem wrapper */
153 u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */
154 u32 *bptr, *bstop; /* Current state of the buffer */
155 uns row; /* File position */
156 char *expected_encoding; /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */
157 char *fb_encoding; /* Encoding of the source fastbuf */
158 char *decl_encoding; /* Encoding read from the XMLDecl/TextDecl */
159 uns refill_cat1; /* Character categories, which should be directly passed to the buffer */
160 uns refill_cat2; /* Character categories, which should be processed as newlines (possibly in some built-in sequences) */
161 void (*refill)(struct xml_context *ctx); /* Callback to decode source characters to the buffer */
162 unsigned short *refill_in_to_x; /* Libcharset input table */
163 uns saved_depth; /* Saved ctx->depth */
166 void NONRET xml_fatal_nested(struct xml_context *ctx);
169 xml_inc(struct xml_context *ctx)
171 /* Called after the first character of a block */
177 xml_dec(struct xml_context *ctx)
179 /* Called after the last character of a block */
181 if (unlikely(!ctx->depth--))
182 xml_fatal_nested(ctx);
185 #include "obj/sherlock/xml/unicat.h"
191 return 1U << xml_char_tab1[(c & 0xff) + xml_char_tab2[c >> 8]];
192 else if (likely(c < 0x110000))
193 return 1U << xml_char_tab3[c >> 16];
201 return xml_char_tab1[c];
204 struct xml_source *xml_push_source(struct xml_context *ctx, uns flags);
205 void xml_push_entity(struct xml_context *ctx, struct xml_dtd_ent *ent);
207 void xml_refill(struct xml_context *ctx);
210 xml_peek_char(struct xml_context *ctx)
212 if (ctx->bptr == ctx->bstop)
218 xml_peek_cat(struct xml_context *ctx)
220 if (ctx->bptr == ctx->bstop)
226 xml_get_char(struct xml_context *ctx)
228 uns c = xml_peek_char(ctx);
234 xml_get_cat(struct xml_context *ctx)
236 uns c = xml_peek_cat(ctx);
242 xml_last_char(struct xml_context *ctx)
244 return ctx->bptr[-2];
248 xml_last_cat(struct xml_context *ctx)
250 return ctx->bptr[-1];
254 xml_skip_char(struct xml_context *ctx)
256 uns c = ctx->bptr[0];
262 xml_unget_char(struct xml_context *ctx)
264 return *(ctx->bptr -= 2);
267 void xml_sources_cleanup(struct xml_context *ctx);
271 void NONRET xml_fatal_expected(struct xml_context *ctx, uns c);
272 void NONRET xml_fatal_expected_white(struct xml_context *ctx);
273 void NONRET xml_fatal_expected_quot(struct xml_context *ctx);
276 xml_parse_white(struct xml_context *ctx, uns mandatory)
278 /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+
279 * mandatory=0 -> S? */
281 while (xml_peek_cat(ctx) & XML_CHAR_WHITE)
286 if (unlikely(mandatory && !cnt))
287 xml_fatal_expected_white(ctx);
292 xml_parse_char(struct xml_context *ctx, uns c)
294 /* Consumes a given Unicode character */
295 if (unlikely(c != xml_get_char(ctx)))
296 xml_fatal_expected(ctx, c);
300 xml_parse_seq(struct xml_context *ctx, const char *seq)
302 /* Consumes a given sequence of ASCII characters */
304 xml_parse_char(ctx, *seq++);
307 void xml_parse_eq(struct xml_context *ctx);
310 xml_parse_quote(struct xml_context *ctx)
313 uns c = xml_get_char(ctx);
314 if (unlikely(c != '\'' && c != '\"'))
315 xml_fatal_expected_quot(ctx);
319 char *xml_parse_name(struct xml_context *ctx, struct mempool *pool);
320 void xml_skip_name(struct xml_context *ctx);
321 char *xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool);
323 char *xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool);
324 char *xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool);
326 uns xml_parse_char_ref(struct xml_context *ctx);
327 void xml_parse_ref(struct xml_context *ctx);
328 void xml_parse_pe_ref(struct xml_context *ctx);
330 char *xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr);
332 void xml_parse_notation_decl(struct xml_context *ctx);
333 void xml_parse_entity_decl(struct xml_context *ctx);
334 void xml_parse_element_decl(struct xml_context *ctx);
335 void xml_parse_attr_list_decl(struct xml_context *ctx);
337 void xml_push_comment(struct xml_context *ctx);
338 void xml_pop_comment(struct xml_context *ctx);
339 void xml_skip_comment(struct xml_context *ctx);
341 void xml_push_pi(struct xml_context *ctx);
342 void xml_pop_pi(struct xml_context *ctx);
343 void xml_skip_pi(struct xml_context *ctx);
345 void xml_attrs_table_init(struct xml_context *ctx);
346 void xml_attrs_table_cleanup(struct xml_context *ctx);