static void write_number(struct fastbuf *fb, double val)
{
ASSERT(isfinite(val));
- bprintf(fb, "%.*g", DBL_DIG, val);
+ bprintf(fb, "%.*g", DBL_DIG+1, val);
}
static bool want_indent_p(struct json_context *js)
#include <ucw/lib.h>
#include <ucw/fastbuf.h>
+#include <ucw/fw-hex.h>
#include <ucw/opt.h>
#include <ucw/trans.h>
#include <ucw-json/json.h>
static int opt_write;
static int opt_escape;
static int opt_indent;
+static int opt_read_hex;
+static int opt_write_hex;
static struct opt_section options = {
OPT_ITEMS {
OPT_HELP("Options:"),
OPT_HELP_OPTION,
OPT_BOOL('r', "read", opt_read, 0, "\tRead JSON from standard input"),
+ OPT_BOOL('R', "read-hex", opt_read_hex, 0, "\tRead JSON, interpreting <XY> as hex escapes"),
OPT_BOOL('w', "write", opt_write, 0, "\tWrite JSON to standard output"),
+ OPT_BOOL('W', "write-hex", opt_write_hex, 0, "\tWrite JSON, print non-ASCII as hex escapes"),
OPT_BOOL('e', "escape", opt_escape, 0, "\tEscape non-ASCII characters in strings"),
OPT_BOOL('i', "indent", opt_indent, 0, "\tIndent output"),
OPT_END
}
};
+static struct json_node *do_parse(struct json_context *js, struct fastbuf *fb)
+{
+ struct json_node *n;
+ TRANS_TRY
+ {
+ n = json_parse(js, fb);
+ }
+ TRANS_CATCH(x)
+ {
+ fprintf(stderr, "ERROR: %s\n", x->msg);
+ exit(1);
+ }
+ TRANS_END;
+ return n;
+}
+
int main(int argc UNUSED, char **argv)
{
opt_parse(&options, argv+1);
if (opt_indent)
js->format_options |= JSON_FORMAT_INDENT;
- if (opt_read)
+ if (opt_read || opt_read_hex)
{
struct fastbuf *fb = bfdopen_shared(0, 65536);
- TRANS_TRY
- {
- n = json_parse(js, fb);
- }
- TRANS_CATCH(x)
- {
- fprintf(stderr, "ERROR: %s\n", x->msg);
- exit(1);
- }
- TRANS_END;
+ if (opt_read_hex)
+ fb = fb_wrap_hex_in(fb);
+ n = do_parse(js, fb);
bclose(fb);
}
if (!n)
- {
- n = json_new_number(js, 42);
- }
+ n = json_new_number(js, 42);
- if (opt_write)
+ if (opt_write || opt_write_hex)
{
struct fastbuf *fb = bfdopen_shared(1, 65536);
+ if (opt_write_hex)
+ fb = fb_wrap_hex_out(fb);
json_write(js, fb, n);
bclose(fb);
}
# Tests for the JSON library
# (c) 2015 Martin Mares <mj@ucw.cz>
+
+Name: Empty input
+Run: ../obj/ucw-json/json-test -rw
+Exit: 1
+Err: ERROR: Empty input at line 1:0
+
+### Literals ###
+
+Name: Null
+In: null
+Out: null
+
+Name: True
+In: true
+Out: true
+
+Name: False
+In: false
+Out: false
+
+Name: Invalid literal 1
+In: lomikel
+Exit: 1
+Err: ERROR: Invalid literal name at line 1:8
+
+Name: Invalid literal 2
+In: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Exit: 1
+Err: ERROR: Invalid literal name at line 1:101
+
+### Numbers ###
+
+Name: Plain number
+In: 42
+Out: 42
+
+Name: Negative number
+In: -42
+Out: -42
+
+Name: Zero number
+In: 0
+Out: 0
+
+# The largest number guaranteed to be precise by RFC 7159
+Name: Large number
+In: 9007199254740991
+Out: 9007199254740991
+
+Name: Fractional number 1
+In: 12345.54321
+Out: 12345.54321
+
+Name: Fractional number 2
+In: 0.333333333
+Out: 0.333333333
+
+Name: Number in scientific notation 1
+In: 3.14159e20
+Out: 3.14159e+20
+
+Name: Number in scientific notation 2
+In: 3.14159e+20
+Out: 3.14159e+20
+
+Name: Number in scientific notation 3
+In: 3.14159e-20
+Out: 3.14159e-20
+
+Name: No leading zero allowed
+In: 01234
+Exit: 1
+Err: ERROR: Malformed number: leading zero at line 1:2
+
+Name: Bare sign is not a number
+In: -
+Exit: 1
+Err: ERROR: Malformed number: just minus at line 1:2
+
+Name: No leading decimal point allowed
+In: .1234
+Exit: 1
+Err: ERROR: Numbers must start with a digit at line 1:1
+
+Name: Digits after decimal point required
+In: 1234.
+Exit: 1
+Err: ERROR: Malformed number: no digits after decimal point at line 1:6
+
+Name: Exponent part must be non-empty 1
+In: 1e
+Exit: 1
+Err: ERROR: Malformed number: empty exponent at line 1:3
+
+Name: Exponent part must be non-empty 2
+In: 1e+
+Exit: 1
+Err: ERROR: Malformed number: empty exponent at line 1:4
+
+Name: Number out of range
+In: 1e3000000
+Exit: 1:
+Err: ERROR: Number out of range at line 1:10
+
+### Strings ###
+
+Name: Plain string
+In: "foo"
+Out: "foo"
+
+Name: Empty string
+In: ""
+Out: ""
+
+Name: UTF-8 string
+In: "šelmička"
+Out: "šelmička"
+
+Name: Unterminated string
+In: "infinity
+Exit: 1
+Err: ERROR: Unterminated string at line 1:10
+
+Name: Multi-line string
+In: "infi
+ nity"
+Exit: 1
+Err: ERROR: Unterminated string at line 1:6
+
+# Some characters are written as \uXXXX on output, which is correct
+Name: Escaped characters
+In: "\"\\\/\b\f\n\r\t"
+Out: "\"\\/\u0008\u000c\n\r\t"
+
+Name: Improper escaped characters
+In: "\a"
+Exit: 1
+Err: ERROR: Invalid backslash sequence in string at line 1:3
+
+Name: Unicode escapes
+In: "\u0041\u010d\u010D\u0001"
+Out: "Ačč\u0001"
+
+Name: Improper Unicode escapes 1
+In: "\u"
+Exit: 1
+Err: ERROR: Invalid Unicode escape sequence at line 1:4
+
+Name: Improper Unicode escapes 2
+In: "\u
+Exit: 1
+Err: ERROR: Invalid Unicode escape sequence at line 1:4
+
+Name: Improper Unicode escapes 3
+In: "\uABCZ"
+Exit: 1
+Err: ERROR: Invalid Unicode escape sequence at line 1:7
+
+### Unicode magic ###
+
+# TAB is forbidden
+Name: Control characters 1
+Run: ../obj/ucw-json/json-test -RW
+In: "<09>"
+Exit: 1
+Err: ERROR: Invalid control character in string at line 1:2
+
+# Surprisingly, DEL is not
+Name: Control characters 2
+In: "<7f>"
+Out: "<7f>"<0a>
+
+# U+31234
+Name: UTF-8 outside BMP
+In: "<f0><b1><88><b4>"
+Out: "<f0><b1><88><b4>"<0a>
+
+Name: Private use characters in BMP
+In: "<ef><80><80>"
+Exit: 1
+Err: ERROR: Invalid private-use character in string at line 1:2
+
+Name: UTF-8 outside BMP
+In: "<f0><b1><88><b4>"
+Out: "<f0><b1><88><b4>"<0a>
+
+Name: Private use characters outside BMP
+In: "<f3><b0><80><80>"
+Exit: 1
+Err: ERROR: Invalid private-use character in string at line 1:2
+
+# U+FF0000
+Name: UTF-8 outside UCS
+In: "<f8><bf><b0><80><80>"
+Exit: 1
+Err: ERROR: Invalid non-Unicode character in string at line 1:2
+
+# U+D800
+Name: UTF-8 surrogate 1
+In: "<ed><a0><80>"
+Exit: 1
+Err: ERROR: Invalid surrogate character in string at line 1:2
+
+# U+DC00
+Name: UTF-8 surrogate 2
+In: "<ed><b0><80>"
+Exit: 1
+Err: ERROR: Invalid surrogate character in string at line 1:2
+
+# U+FEFF
+Name: UTF-8 BOM
+In: <ef><bb><bf>
+Exit: 1
+Err: ERROR: Misplaced byte-order mark, complain in Redmond at line 1:1
+
+Name: Escaped NUL
+In: "\u0000"
+Exit: 1
+Err: ERROR: Zero bytes in strings are not supported at line 1:7
+
+Name: Escaped surrogate
+In: "\udaff\udcba"
+Out: "<f3><8f><b2><ba>"<0a>
+
+Name: Escaped surrogate malformation 1
+In: "\udaff"
+Exit: 1
+Err: ERROR: Escaped high surrogate codepoint must be followed by a low surrogate codepoint at line 1:8
+
+Name: Escaped surrogate malformation 2
+In: "\udcff"
+Exit: 1
+Err: ERROR: Invalid escaped surrogate codepoint at line 1:7
+
+Name: Escaped low private-use character
+In: "\uedac"
+Exit: 1
+Err: ERROR: Invalid escaped private-use character at line 1:7
+
+Name: Escaped high private-use character
+In: "\udbff\udc00"
+Exit: 1
+Err: ERROR: Invalid escaped private-use character at line 1:13
+
+# Switch back to normal mode
+Name: Invalid ASCII character
+Run: ../obj/ucw-json/json-test -rw
+In: @
+Exit: 1
+Err: ERROR: Invalid character at line 1:1
+
+### Arrays ###
+
+Name: Empty array
+In: []
+Out: []
+
+Name: One-element array
+In: [1]
+Out: [ 1 ]
+
+Name: Two-element array
+In: [1,2]
+Out: [ 1, 2 ]
+
+Name: Nested arrays
+In: [[1,2],["a","b"]]
+Out: [ [ 1, 2 ], [ "a", "b" ] ]
+
+Name: Multi-line array
+In: [
+ "a", null,false
+ ,false
+ ]
+Out: [ "a", null, false, false ]
#include <ucw/mempool.h>
#include <ucw-json/json.h>
+static void json_init(struct json_context *js)
+{
+ mp_save(js->pool, &js->init_state);
+}
+
struct json_context *json_new(void)
{
struct mempool *mp = mp_new(4096);
struct json_context *js = mp_alloc_zero(mp, sizeof(*js));
js->pool = mp;
- mp_save(mp, &js->init_state);
+ json_init(js);
return js;
}
void json_reset(struct json_context *js)
{
- mp_restore(js->pool, &js->init_state);
+ struct mempool *mp = js->pool;
+ mp_restore(mp, &js->init_state);
+ bzero(js, sizeof(*js));
+ js->pool = mp;
+ json_init(js);
}
-
struct json_node *json_new_node(struct json_context *js, enum json_node_type type)
{
struct json_node *n = mp_alloc_fast(js->pool, sizeof(*n));
struct json_context {
struct mempool *pool;
struct mempool_state init_state;
- // FIXME: Size limit?
struct fastbuf *in_fb;
uint in_line;
static struct json_node *json_parse_name(struct json_context *js, int c)
{
- mp_push(js->pool);
- char *p = mp_start_noalign(js->pool, 0);
+ char name[16];
+ uint i = 0;
while (c >= 'a' && c <= 'z')
{
- p = mp_append_char(js->pool, p, c);
+ if (i < sizeof(name) - 1)
+ name[i++] = c;
c = json_get_char(js);
}
+ if (i >= sizeof(name) - 1)
+ json_parse_error(js, "Invalid literal name");
+ name[i] = 0;
json_unget_char(js, c);
- p = mp_end_string(js->pool, p);
struct json_node *n;
- if (!strcmp(p, "null"))
+ if (!strcmp(name, "null"))
n = json_new_null(js);
- else if (!strcmp(p, "false"))
+ else if (!strcmp(name, "false"))
n = json_new_bool(js, 0);
- else if (!strcmp(p, "true"))
+ else if (!strcmp(name, "true"))
n = json_new_bool(js, 1);
else
json_parse_error(js, "Invalid literal name");
- mp_pop(js->pool);
return n;
}
else
json_parse_error(js, "Invalid private-use character in string");
}
- if (unlikely(c > 0xf0000))
+ if (unlikely(c >= 0xf0000))
{
if (c > 0x10ffff)
json_parse_error(js, "Invalid non-Unicode character in string");
return json_triv_token(js, JSON_NAME_SEP);
case ',':
return json_triv_token(js, JSON_VALUE_SEP);
+ case '.':
+ json_parse_error(js, "Numbers must start with a digit");
+ case 0xfeff:
+ json_parse_error(js, "Misplaced byte-order mark, complain in Redmond");
default:
json_parse_error(js, "Invalid character");
}