From: Martin Mares Date: Wed, 8 Jul 2015 17:44:02 +0000 (+0200) Subject: JSON: Tests and bug fixes X-Git-Tag: v6.5~23 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=04aa19bb1e150722ff1fc2b04a2a8adf7be71a92;p=libucw.git JSON: Tests and bug fixes --- diff --git a/ucw-json/format.c b/ucw-json/format.c index 1028285b..58526c84 100644 --- a/ucw-json/format.c +++ b/ucw-json/format.c @@ -69,7 +69,7 @@ static void write_string(struct json_context *js, const char *p) static void write_number(struct fastbuf *fb, double val) { ASSERT(isfinite(val)); - bprintf(fb, "%.*g", DBL_DIG, val); + bprintf(fb, "%.*g", DBL_DIG+1, val); } static bool want_indent_p(struct json_context *js) diff --git a/ucw-json/json-test.c b/ucw-json/json-test.c index e097fc96..0b6d7a06 100644 --- a/ucw-json/json-test.c +++ b/ucw-json/json-test.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,8 @@ static int opt_read; static int opt_write; static int opt_escape; static int opt_indent; +static int opt_read_hex; +static int opt_write_hex; static struct opt_section options = { OPT_ITEMS { @@ -26,13 +29,31 @@ static struct opt_section options = { OPT_HELP("Options:"), OPT_HELP_OPTION, OPT_BOOL('r', "read", opt_read, 0, "\tRead JSON from standard input"), + OPT_BOOL('R', "read-hex", opt_read_hex, 0, "\tRead JSON, interpreting as hex escapes"), OPT_BOOL('w', "write", opt_write, 0, "\tWrite JSON to standard output"), + OPT_BOOL('W', "write-hex", opt_write_hex, 0, "\tWrite JSON, print non-ASCII as hex escapes"), OPT_BOOL('e', "escape", opt_escape, 0, "\tEscape non-ASCII characters in strings"), OPT_BOOL('i', "indent", opt_indent, 0, "\tIndent output"), OPT_END } }; +static struct json_node *do_parse(struct json_context *js, struct fastbuf *fb) +{ + struct json_node *n; + TRANS_TRY + { + n = json_parse(js, fb); + } + TRANS_CATCH(x) + { + fprintf(stderr, "ERROR: %s\n", x->msg); + exit(1); + } + TRANS_END; + return n; +} + int main(int argc UNUSED, char **argv) { opt_parse(&options, argv+1); @@ -45,30 +66,23 @@ int main(int argc UNUSED, char **argv) if (opt_indent) js->format_options |= JSON_FORMAT_INDENT; - if (opt_read) + if (opt_read || opt_read_hex) { struct fastbuf *fb = bfdopen_shared(0, 65536); - TRANS_TRY - { - n = json_parse(js, fb); - } - TRANS_CATCH(x) - { - fprintf(stderr, "ERROR: %s\n", x->msg); - exit(1); - } - TRANS_END; + if (opt_read_hex) + fb = fb_wrap_hex_in(fb); + n = do_parse(js, fb); bclose(fb); } if (!n) - { - n = json_new_number(js, 42); - } + n = json_new_number(js, 42); - if (opt_write) + if (opt_write || opt_write_hex) { struct fastbuf *fb = bfdopen_shared(1, 65536); + if (opt_write_hex) + fb = fb_wrap_hex_out(fb); json_write(js, fb, n); bclose(fb); } diff --git a/ucw-json/json-test.t b/ucw-json/json-test.t index 8dc9983f..20aef1a9 100644 --- a/ucw-json/json-test.t +++ b/ucw-json/json-test.t @@ -1,2 +1,277 @@ # Tests for the JSON library # (c) 2015 Martin Mares + +Name: Empty input +Run: ../obj/ucw-json/json-test -rw +Exit: 1 +Err: ERROR: Empty input at line 1:0 + +### Literals ### + +Name: Null +In: null +Out: null + +Name: True +In: true +Out: true + +Name: False +In: false +Out: false + +Name: Invalid literal 1 +In: lomikel +Exit: 1 +Err: ERROR: Invalid literal name at line 1:8 + +Name: Invalid literal 2 +In: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Exit: 1 +Err: ERROR: Invalid literal name at line 1:101 + +### Numbers ### + +Name: Plain number +In: 42 +Out: 42 + +Name: Negative number +In: -42 +Out: -42 + +Name: Zero number +In: 0 +Out: 0 + +# The largest number guaranteed to be precise by RFC 7159 +Name: Large number +In: 9007199254740991 +Out: 9007199254740991 + +Name: Fractional number 1 +In: 12345.54321 +Out: 12345.54321 + +Name: Fractional number 2 +In: 0.333333333 +Out: 0.333333333 + +Name: Number in scientific notation 1 +In: 3.14159e20 +Out: 3.14159e+20 + +Name: Number in scientific notation 2 +In: 3.14159e+20 +Out: 3.14159e+20 + +Name: Number in scientific notation 3 +In: 3.14159e-20 +Out: 3.14159e-20 + +Name: No leading zero allowed +In: 01234 +Exit: 1 +Err: ERROR: Malformed number: leading zero at line 1:2 + +Name: Bare sign is not a number +In: - +Exit: 1 +Err: ERROR: Malformed number: just minus at line 1:2 + +Name: No leading decimal point allowed +In: .1234 +Exit: 1 +Err: ERROR: Numbers must start with a digit at line 1:1 + +Name: Digits after decimal point required +In: 1234. +Exit: 1 +Err: ERROR: Malformed number: no digits after decimal point at line 1:6 + +Name: Exponent part must be non-empty 1 +In: 1e +Exit: 1 +Err: ERROR: Malformed number: empty exponent at line 1:3 + +Name: Exponent part must be non-empty 2 +In: 1e+ +Exit: 1 +Err: ERROR: Malformed number: empty exponent at line 1:4 + +Name: Number out of range +In: 1e3000000 +Exit: 1: +Err: ERROR: Number out of range at line 1:10 + +### Strings ### + +Name: Plain string +In: "foo" +Out: "foo" + +Name: Empty string +In: "" +Out: "" + +Name: UTF-8 string +In: "šelmička" +Out: "šelmička" + +Name: Unterminated string +In: "infinity +Exit: 1 +Err: ERROR: Unterminated string at line 1:10 + +Name: Multi-line string +In: "infi + nity" +Exit: 1 +Err: ERROR: Unterminated string at line 1:6 + +# Some characters are written as \uXXXX on output, which is correct +Name: Escaped characters +In: "\"\\\/\b\f\n\r\t" +Out: "\"\\/\u0008\u000c\n\r\t" + +Name: Improper escaped characters +In: "\a" +Exit: 1 +Err: ERROR: Invalid backslash sequence in string at line 1:3 + +Name: Unicode escapes +In: "\u0041\u010d\u010D\u0001" +Out: "Ačč\u0001" + +Name: Improper Unicode escapes 1 +In: "\u" +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:4 + +Name: Improper Unicode escapes 2 +In: "\u +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:4 + +Name: Improper Unicode escapes 3 +In: "\uABCZ" +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:7 + +### Unicode magic ### + +# TAB is forbidden +Name: Control characters 1 +Run: ../obj/ucw-json/json-test -RW +In: "<09>" +Exit: 1 +Err: ERROR: Invalid control character in string at line 1:2 + +# Surprisingly, DEL is not +Name: Control characters 2 +In: "<7f>" +Out: "<7f>"<0a> + +# U+31234 +Name: UTF-8 outside BMP +In: "<88>" +Out: "<88>"<0a> + +Name: Private use characters in BMP +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid private-use character in string at line 1:2 + +Name: UTF-8 outside BMP +In: "<88>" +Out: "<88>"<0a> + +Name: Private use characters outside BMP +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid private-use character in string at line 1:2 + +# U+FF0000 +Name: UTF-8 outside UCS +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid non-Unicode character in string at line 1:2 + +# U+D800 +Name: UTF-8 surrogate 1 +In: "<80>" +Exit: 1 +Err: ERROR: Invalid surrogate character in string at line 1:2 + +# U+DC00 +Name: UTF-8 surrogate 2 +In: "<80>" +Exit: 1 +Err: ERROR: Invalid surrogate character in string at line 1:2 + +# U+FEFF +Name: UTF-8 BOM +In: +Exit: 1 +Err: ERROR: Misplaced byte-order mark, complain in Redmond at line 1:1 + +Name: Escaped NUL +In: "\u0000" +Exit: 1 +Err: ERROR: Zero bytes in strings are not supported at line 1:7 + +Name: Escaped surrogate +In: "\udaff\udcba" +Out: "<8f>"<0a> + +Name: Escaped surrogate malformation 1 +In: "\udaff" +Exit: 1 +Err: ERROR: Escaped high surrogate codepoint must be followed by a low surrogate codepoint at line 1:8 + +Name: Escaped surrogate malformation 2 +In: "\udcff" +Exit: 1 +Err: ERROR: Invalid escaped surrogate codepoint at line 1:7 + +Name: Escaped low private-use character +In: "\uedac" +Exit: 1 +Err: ERROR: Invalid escaped private-use character at line 1:7 + +Name: Escaped high private-use character +In: "\udbff\udc00" +Exit: 1 +Err: ERROR: Invalid escaped private-use character at line 1:13 + +# Switch back to normal mode +Name: Invalid ASCII character +Run: ../obj/ucw-json/json-test -rw +In: @ +Exit: 1 +Err: ERROR: Invalid character at line 1:1 + +### Arrays ### + +Name: Empty array +In: [] +Out: [] + +Name: One-element array +In: [1] +Out: [ 1 ] + +Name: Two-element array +In: [1,2] +Out: [ 1, 2 ] + +Name: Nested arrays +In: [[1,2],["a","b"]] +Out: [ [ 1, 2 ], [ "a", "b" ] ] + +Name: Multi-line array +In: [ + "a", null,false + ,false + ] +Out: [ "a", null, false, false ] diff --git a/ucw-json/json.c b/ucw-json/json.c index 6b53d2fb..3661a735 100644 --- a/ucw-json/json.c +++ b/ucw-json/json.c @@ -12,12 +12,17 @@ #include #include +static void json_init(struct json_context *js) +{ + mp_save(js->pool, &js->init_state); +} + struct json_context *json_new(void) { struct mempool *mp = mp_new(4096); struct json_context *js = mp_alloc_zero(mp, sizeof(*js)); js->pool = mp; - mp_save(mp, &js->init_state); + json_init(js); return js; } @@ -28,10 +33,13 @@ void json_delete(struct json_context *js) void json_reset(struct json_context *js) { - mp_restore(js->pool, &js->init_state); + struct mempool *mp = js->pool; + mp_restore(mp, &js->init_state); + bzero(js, sizeof(*js)); + js->pool = mp; + json_init(js); } - struct json_node *json_new_node(struct json_context *js, enum json_node_type type) { struct json_node *n = mp_alloc_fast(js->pool, sizeof(*n)); diff --git a/ucw-json/json.h b/ucw-json/json.h index 8e87d9cf..656ea5b1 100644 --- a/ucw-json/json.h +++ b/ucw-json/json.h @@ -26,7 +26,6 @@ struct json_context { struct mempool *pool; struct mempool_state init_state; - // FIXME: Size limit? struct fastbuf *in_fb; uint in_line; diff --git a/ucw-json/parse.c b/ucw-json/parse.c index c7389c93..765352b0 100644 --- a/ucw-json/parse.c +++ b/ucw-json/parse.c @@ -139,28 +139,30 @@ static struct json_node *json_parse_number(struct json_context *js, int c) static struct json_node *json_parse_name(struct json_context *js, int c) { - mp_push(js->pool); - char *p = mp_start_noalign(js->pool, 0); + char name[16]; + uint i = 0; while (c >= 'a' && c <= 'z') { - p = mp_append_char(js->pool, p, c); + if (i < sizeof(name) - 1) + name[i++] = c; c = json_get_char(js); } + if (i >= sizeof(name) - 1) + json_parse_error(js, "Invalid literal name"); + name[i] = 0; json_unget_char(js, c); - p = mp_end_string(js->pool, p); struct json_node *n; - if (!strcmp(p, "null")) + if (!strcmp(name, "null")) n = json_new_null(js); - else if (!strcmp(p, "false")) + else if (!strcmp(name, "false")) n = json_new_bool(js, 0); - else if (!strcmp(p, "true")) + else if (!strcmp(name, "true")) n = json_new_bool(js, 1); else json_parse_error(js, "Invalid literal name"); - mp_pop(js->pool); return n; } @@ -204,7 +206,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid private-use character in string"); } - if (unlikely(c > 0xf0000)) + if (unlikely(c >= 0xf0000)) { if (c > 0x10ffff) json_parse_error(js, "Invalid non-Unicode character in string"); @@ -324,6 +326,10 @@ static struct json_node *json_read_token(struct json_context *js) return json_triv_token(js, JSON_NAME_SEP); case ',': return json_triv_token(js, JSON_VALUE_SEP); + case '.': + json_parse_error(js, "Numbers must start with a digit"); + case 0xfeff: + json_parse_error(js, "Misplaced byte-order mark, complain in Redmond"); default: json_parse_error(js, "Invalid character"); }