X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=ucw-json%2Fparse.c;h=df99ea405196efad68538da24d28ad15073170cc;hb=refs%2Ftags%2Fv6.5.7;hp=a4de28defa522f6ead699ee133ba38ab0e2908f2;hpb=52084f22fd5b700b749d7dfc6dedb6e01ef9ac22;p=libucw.git diff --git a/ucw-json/parse.c b/ucw-json/parse.c index a4de28de..df99ea40 100644 --- a/ucw-json/parse.c +++ b/ucw-json/parse.c @@ -21,17 +21,15 @@ void json_set_input(struct json_context *js, struct fastbuf *in) { js->in_fb = in; js->in_line = 1; + js->in_column = 0; js->next_char = -1; js->next_token = NULL; js->in_eof = 0; - if (!js->trivial_token) - js->trivial_token = json_new_node(js, JSON_INVALID); } -// FIXME: Report column as well as line? static void NONRET json_parse_error(struct json_context *js, const char *msg) { - trans_throw("ucw.js.parse", js, "%s at line %u", msg, js->in_line); + trans_throw("ucw.json.parse", js, "%s at line %u:%u", msg, js->in_line, js->in_column); } static int json_get_char(struct json_context *js) @@ -42,9 +40,9 @@ static int json_get_char(struct json_context *js) if (c == -2) json_parse_error(js, "Malformed UTF-8 character"); js->in_eof = 1; - // FIXME: Reject alternative sequences return c; } + js->in_column++; return c; } @@ -95,6 +93,7 @@ static struct json_node *json_parse_number(struct json_context *js, int c) if (c == '.') { p = mp_append_char(js->pool, p, c); + c = json_get_char(js); if (!(c >= '0' && c <= '9')) json_parse_error(js, "Malformed number: no digits after decimal point"); while (c >= '0' && c <= '9') @@ -137,28 +136,30 @@ static struct json_node *json_parse_number(struct json_context *js, int c) static struct json_node *json_parse_name(struct json_context *js, int c) { - mp_push(js->pool); - char *p = mp_start_noalign(js->pool, 0); + char name[16]; + uint i = 0; while (c >= 'a' && c <= 'z') { - p = mp_append_char(js->pool, p, c); + if (i < sizeof(name) - 1) + name[i++] = c; c = json_get_char(js); } + if (i >= sizeof(name) - 1) + json_parse_error(js, "Invalid literal name"); + name[i] = 0; json_unget_char(js, c); - p = mp_end_string(js->pool, p); struct json_node *n; - if (!strcmp(p, "null")) + if (!strcmp(name, "null")) n = json_new_null(js); - else if (!strcmp(p, "false")) + else if (!strcmp(name, "false")) n = json_new_bool(js, 0); - else if (!strcmp(p, "true")) + else if (!strcmp(name, "true")) n = json_new_bool(js, 1); else json_parse_error(js, "Invalid literal name"); - mp_pop(js->pool); return n; } @@ -202,7 +203,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid private-use character in string"); } - if (unlikely(c > 0xf0000)) + if (unlikely(c >= 0xf0000)) { if (c > 0x10ffff) json_parse_error(js, "Invalid non-Unicode character in string"); @@ -248,7 +249,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) y = json_parse_hex4(js); if (!(y >= 0xdc00 && y < 0xe000)) json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint"); - c = 0x10000 | ((x & 0x03ff) << 10) | (y & 0x03ff); + c = 0x10000 + ((x & 0x03ff) << 10) | (y & 0x03ff); if (c > 0xf0000) json_parse_error(js, "Invalid escaped private-use character"); } @@ -260,6 +261,8 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid escaped private-use character"); } + else + c = x; break; } default: @@ -274,7 +277,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) return json_new_string_ref(js, p); } -struct json_node *json_peek_token(struct json_context *js) +static struct json_node *json_read_token(struct json_context *js) { if (unlikely(js->in_eof)) return json_triv_token(js, JSON_EOF); @@ -288,7 +291,10 @@ struct json_node *json_peek_token(struct json_context *js) while (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d) { if (c == 0x0a) - js->in_line++; + { + js->in_line++; + js->in_column = 0; + } c = json_get_char(js); } if (c < 0) @@ -317,18 +323,31 @@ struct json_node *json_peek_token(struct json_context *js) return json_triv_token(js, JSON_NAME_SEP); case ',': return json_triv_token(js, JSON_VALUE_SEP); + case '.': + json_parse_error(js, "Numbers must start with a digit"); + case 0xfeff: + json_parse_error(js, "Misplaced byte-order mark, complain in Redmond"); default: json_parse_error(js, "Invalid character"); } } -struct json_node *json_next_token(struct json_context *js) +struct json_node *json_peek_token(struct json_context *js) { if (!js->next_token) - json_peek_token(js); + js->next_token = json_read_token(js); + return js->next_token; +} + +struct json_node *json_next_token(struct json_context *js) +{ struct json_node *t = js->next_token; - js->next_token = NULL; - return t; + if (t) + { + js->next_token = NULL; + return t; + } + return json_read_token(js); } struct json_node *json_next_value(struct json_context *js) @@ -364,7 +383,7 @@ struct json_node *json_next_value(struct json_context *js) if (t->type == JSON_END_ARRAY) break; if (t->type != JSON_VALUE_SEP) - json_parse_error(js, "Comma expected"); + json_parse_error(js, "Comma or right bracket expected"); } return a; }