X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=ucw-json%2Fparse.c;h=df99ea405196efad68538da24d28ad15073170cc;hb=HEAD;hp=181c6afb5bcdff274600080d6233bb358df4ca24;hpb=5b7aeae4b33b686e52304127d1648f7b71c9b405;p=libucw.git diff --git a/ucw-json/parse.c b/ucw-json/parse.c index 181c6afb..df99ea40 100644 --- a/ucw-json/parse.c +++ b/ucw-json/parse.c @@ -7,11 +7,10 @@ * of the GNU Lesser General Public License. */ -#undef LOCAL_DEBUG - #include -#include +#include #include +#include #include #include @@ -22,17 +21,15 @@ void json_set_input(struct json_context *js, struct fastbuf *in) { js->in_fb = in; js->in_line = 1; + js->in_column = 0; js->next_char = -1; js->next_token = NULL; js->in_eof = 0; - if (!js->trivial_token) - js->trivial_token = json_new_node(js, JSON_INVALID); } -// FIXME: Report column as well as line? static void NONRET json_parse_error(struct json_context *js, const char *msg) { - trans_throw("ucw.js.parse", js, "%s at line %u", msg, js->in_line); + trans_throw("ucw.json.parse", js, "%s at line %u:%u", msg, js->in_line, js->in_column); } static int json_get_char(struct json_context *js) @@ -43,9 +40,9 @@ static int json_get_char(struct json_context *js) if (c == -2) json_parse_error(js, "Malformed UTF-8 character"); js->in_eof = 1; - // FIXME: Reject alternative sequences return c; } + js->in_column++; return c; } @@ -96,6 +93,7 @@ static struct json_node *json_parse_number(struct json_context *js, int c) if (c == '.') { p = mp_append_char(js->pool, p, c); + c = json_get_char(js); if (!(c >= '0' && c <= '9')) json_parse_error(js, "Malformed number: no digits after decimal point"); while (c >= '0' && c <= '9') @@ -138,28 +136,30 @@ static struct json_node *json_parse_number(struct json_context *js, int c) static struct json_node *json_parse_name(struct json_context *js, int c) { - mp_push(js->pool); - char *p = mp_start_noalign(js->pool, 0); + char name[16]; + uint i = 0; while (c >= 'a' && c <= 'z') { - p = mp_append_char(js->pool, p, c); + if (i < sizeof(name) - 1) + name[i++] = c; c = json_get_char(js); } + if (i >= sizeof(name) - 1) + json_parse_error(js, "Invalid literal name"); + name[i] = 0; json_unget_char(js, c); - p = mp_end_string(js->pool, p); struct json_node *n; - if (!strcmp(p, "null")) + if (!strcmp(name, "null")) n = json_new_null(js); - else if (!strcmp(p, "false")) + else if (!strcmp(name, "false")) n = json_new_bool(js, 0); - else if (!strcmp(p, "true")) + else if (!strcmp(name, "true")) n = json_new_bool(js, 1); else json_parse_error(js, "Invalid literal name"); - mp_pop(js->pool); return n; } @@ -203,7 +203,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid private-use character in string"); } - if (unlikely(c > 0xf0000)) + if (unlikely(c >= 0xf0000)) { if (c > 0x10ffff) json_parse_error(js, "Invalid non-Unicode character in string"); @@ -249,7 +249,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) y = json_parse_hex4(js); if (!(y >= 0xdc00 && y < 0xe000)) json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint"); - c = 0x10000 | ((x & 0x03ff) << 10) | (y & 0x03ff); + c = 0x10000 + ((x & 0x03ff) << 10) | (y & 0x03ff); if (c > 0xf0000) json_parse_error(js, "Invalid escaped private-use character"); } @@ -261,6 +261,8 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid escaped private-use character"); } + else + c = x; break; } default: @@ -275,7 +277,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) return json_new_string_ref(js, p); } -struct json_node *json_peek_token(struct json_context *js) +static struct json_node *json_read_token(struct json_context *js) { if (unlikely(js->in_eof)) return json_triv_token(js, JSON_EOF); @@ -289,7 +291,10 @@ struct json_node *json_peek_token(struct json_context *js) while (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d) { if (c == 0x0a) - js->in_line++; + { + js->in_line++; + js->in_column = 0; + } c = json_get_char(js); } if (c < 0) @@ -318,21 +323,34 @@ struct json_node *json_peek_token(struct json_context *js) return json_triv_token(js, JSON_NAME_SEP); case ',': return json_triv_token(js, JSON_VALUE_SEP); + case '.': + json_parse_error(js, "Numbers must start with a digit"); + case 0xfeff: + json_parse_error(js, "Misplaced byte-order mark, complain in Redmond"); default: json_parse_error(js, "Invalid character"); } } -struct json_node *json_next_token(struct json_context *js) +struct json_node *json_peek_token(struct json_context *js) { if (!js->next_token) - json_peek_token(js); + js->next_token = json_read_token(js); + return js->next_token; +} + +struct json_node *json_next_token(struct json_context *js) +{ struct json_node *t = js->next_token; - js->next_token = NULL; - return t; + if (t) + { + js->next_token = NULL; + return t; + } + return json_read_token(js); } -struct json_node *json_next_object(struct json_context *js) +struct json_node *json_next_value(struct json_context *js) { struct json_node *t = json_next_token(js); @@ -356,7 +374,7 @@ struct json_node *json_next_object(struct json_context *js) json_next_token(js); else for (;;) { - struct json_node *v = json_next_object(js); + struct json_node *v = json_next_value(js); if (!v) json_parse_error(js, "Unterminated array"); json_array_append(a, v); @@ -365,7 +383,7 @@ struct json_node *json_next_object(struct json_context *js) if (t->type == JSON_END_ARRAY) break; if (t->type != JSON_VALUE_SEP) - json_parse_error(js, "Comma expected"); + json_parse_error(js, "Comma or right bracket expected"); } return a; } @@ -378,7 +396,7 @@ struct json_node *json_next_object(struct json_context *js) json_next_token(js); else for (;;) { - struct json_node *k = json_next_object(js); + struct json_node *k = json_next_value(js); if (!k) json_parse_error(js, "Unterminated object"); if (k->type != JSON_STRING) @@ -388,7 +406,7 @@ struct json_node *json_next_object(struct json_context *js) if (t->type != JSON_NAME_SEP) json_parse_error(js, "Colon expected"); - struct json_node *v = json_next_object(js); + struct json_node *v = json_next_value(js); if (!v) json_parse_error(js, "Unterminated object"); if (json_object_get(o, k->string)) // FIXME: Optimize @@ -422,7 +440,7 @@ struct json_node *json_parse(struct json_context *js, struct fastbuf *fb) { json_set_input(js, fb); - struct json_node *n = json_next_object(js); + struct json_node *n = json_next_value(js); if (!n) json_parse_error(js, "Empty input");