From 31018a2bfb3bd26a2638f9f63bd5138816f8a52b Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Wed, 8 Jul 2015 18:03:04 +0200 Subject: [PATCH] JSON: More parsing and formatting details --- ucw-json/format.c | 82 +++++++++++++++++++++++++++++++++++--------- ucw-json/json-test.c | 68 ++++++++++++++++++++++++++++++++---- ucw-json/json.h | 7 ++++ ucw-json/parse.c | 24 +++++++++---- 4 files changed, 153 insertions(+), 28 deletions(-) diff --git a/ucw-json/format.c b/ucw-json/format.c index 2c17ca59..1028285b 100644 --- a/ucw-json/format.c +++ b/ucw-json/format.c @@ -14,6 +14,8 @@ #include #include +#include +#include #include void json_set_output(struct json_context *js, struct fastbuf *fb) @@ -21,8 +23,10 @@ void json_set_output(struct json_context *js, struct fastbuf *fb) js->out_fb = fb; } -static void write_string(struct fastbuf *fb, const char *p) +static void write_string(struct json_context *js, const char *p) { + struct fastbuf *fb = js->out_fb; + bputc(fb, '"'); for (;;) { @@ -47,12 +51,46 @@ static void write_string(struct fastbuf *fb, const char *p) bprintf(fb, "\\u%04x", u); } } + else if (u >= 0x007f && (js->format_options & JSON_FORMAT_ESCAPE_NONASCII)) + { + if (u < 0x10000) + bprintf(fb, "\\u%04x", u); + else if (u < 0x110000) + bprintf(fb, "\\u%04x\\u%04x", 0xd800 + ((u - 0x10000) >> 10), 0xdc00 + (u & 0x3ff)); + else + ASSERT(0); + } else - bputc(fb, u); + bput_utf8_32(fb, u); } bputc(fb, '"'); } +static void write_number(struct fastbuf *fb, double val) +{ + ASSERT(isfinite(val)); + bprintf(fb, "%.*g", DBL_DIG, val); +} + +static bool want_indent_p(struct json_context *js) +{ + return (js->format_options & JSON_FORMAT_INDENT); +} + +static void write_space(struct json_context *js) +{ + struct fastbuf *fb = js->out_fb; + + if (want_indent_p(js)) + { + bputc(fb, '\n'); + for (uint i=0; i < js->out_indent; i++) + bputc(fb, '\t'); + } + else + bputc(fb, ' '); +} + void json_write_value(struct json_context *js, struct json_node *n) { struct fastbuf *fb = js->out_fb; @@ -66,38 +104,50 @@ void json_write_value(struct json_context *js, struct json_node *n) bputs(fb, (n->boolean ? "true" : "false")); break; case JSON_NUMBER: - // FIXME: Formatting of floats - bprintf(fb, "%f", n->number); + write_number(fb, n->number); break; case JSON_STRING: - write_string(fb, n->string); + write_string(js, n->string); break; case JSON_ARRAY: { - // FIXME: Indent - bputs(fb, "[ "); - for (size_t i=0; i < GARY_SIZE(n->elements); i++) + if (!GARY_SIZE(n->elements)) + bputs(fb, "[]"); + else { - if (i) - bputs(fb, ", "); - json_write_value(js, n->elements[i]); + bputc(fb, '['); + js->out_indent++; + for (size_t i=0; i < GARY_SIZE(n->elements); i++) + { + if (i) + bputc(fb, ','); + write_space(js); + json_write_value(js, n->elements[i]); + } + js->out_indent--; + write_space(js); + bputc(fb, ']'); } - bputc(fb, ']'); break; } case JSON_OBJECT: { - bputs(fb, "{ "); - // FIXME: Indent + if (!GARY_SIZE(n->pairs)) + bputs(fb, "{}"); + bputc(fb, '{'); + js->out_indent++; for (size_t i=0; i < GARY_SIZE(n->pairs); i++) { if (i) - bputs(fb, ", "); + bputc(fb, ','); + write_space(js); struct json_pair *p = &n->pairs[i]; - write_string(fb, p->key); + write_string(js, p->key); bputs(fb, ": "); json_write_value(js, p->value); } + js->out_indent--; + write_space(js); bputc(fb, '}'); break; } diff --git a/ucw-json/json-test.c b/ucw-json/json-test.c index 807ecf12..e097fc96 100644 --- a/ucw-json/json-test.c +++ b/ucw-json/json-test.c @@ -8,15 +8,71 @@ */ #include -#include #include +#include +#include +#include + +static int opt_read; +static int opt_write; +static int opt_escape; +static int opt_indent; -#include -#include -#include +static struct opt_section options = { + OPT_ITEMS { + OPT_HELP("Test program for UCW JSON library."), + OPT_HELP("Usage: json-test [options]"), + OPT_HELP(""), + OPT_HELP("Options:"), + OPT_HELP_OPTION, + OPT_BOOL('r', "read", opt_read, 0, "\tRead JSON from standard input"), + OPT_BOOL('w', "write", opt_write, 0, "\tWrite JSON to standard output"), + OPT_BOOL('e', "escape", opt_escape, 0, "\tEscape non-ASCII characters in strings"), + OPT_BOOL('i', "indent", opt_indent, 0, "\tIndent output"), + OPT_END + } +}; -int -main(int argc, char **argv) +int main(int argc UNUSED, char **argv) { + opt_parse(&options, argv+1); + + struct json_context *js = json_new(); + struct json_node *n = NULL; + + if (opt_escape) + js->format_options |= JSON_FORMAT_ESCAPE_NONASCII; + if (opt_indent) + js->format_options |= JSON_FORMAT_INDENT; + + if (opt_read) + { + struct fastbuf *fb = bfdopen_shared(0, 65536); + TRANS_TRY + { + n = json_parse(js, fb); + } + TRANS_CATCH(x) + { + fprintf(stderr, "ERROR: %s\n", x->msg); + exit(1); + } + TRANS_END; + bclose(fb); + } + + if (!n) + { + n = json_new_number(js, 42); + } + + if (opt_write) + { + struct fastbuf *fb = bfdopen_shared(1, 65536); + json_write(js, fb, n); + bclose(fb); + } + + json_delete(js); return 0; } diff --git a/ucw-json/json.h b/ucw-json/json.h index ac5570d9..503a44c0 100644 --- a/ucw-json/json.h +++ b/ucw-json/json.h @@ -36,6 +36,8 @@ struct json_context { int next_char; struct fastbuf *out_fb; + uint out_indent; + uint format_options; // Public }; struct json_context *json_new(void); @@ -130,4 +132,9 @@ void json_set_output(struct json_context *js, struct fastbuf *fb); void json_write_value(struct json_context *js, struct json_node *n); void json_write(struct json_context *js, struct fastbuf *fb, struct json_node *n); +enum json_format_option { + JSON_FORMAT_ESCAPE_NONASCII = 1, + JSON_FORMAT_INDENT = 2, +}; + #endif diff --git a/ucw-json/parse.c b/ucw-json/parse.c index a4de28de..f0010788 100644 --- a/ucw-json/parse.c +++ b/ucw-json/parse.c @@ -95,6 +95,7 @@ static struct json_node *json_parse_number(struct json_context *js, int c) if (c == '.') { p = mp_append_char(js->pool, p, c); + c = json_get_char(js); if (!(c >= '0' && c <= '9')) json_parse_error(js, "Malformed number: no digits after decimal point"); while (c >= '0' && c <= '9') @@ -248,7 +249,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) y = json_parse_hex4(js); if (!(y >= 0xdc00 && y < 0xe000)) json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint"); - c = 0x10000 | ((x & 0x03ff) << 10) | (y & 0x03ff); + c = 0x10000 + ((x & 0x03ff) << 10) | (y & 0x03ff); if (c > 0xf0000) json_parse_error(js, "Invalid escaped private-use character"); } @@ -260,6 +261,8 @@ static struct json_node *json_parse_string(struct json_context *js, int c) else json_parse_error(js, "Invalid escaped private-use character"); } + else + c = x; break; } default: @@ -274,7 +277,7 @@ static struct json_node *json_parse_string(struct json_context *js, int c) return json_new_string_ref(js, p); } -struct json_node *json_peek_token(struct json_context *js) +static struct json_node *json_read_token(struct json_context *js) { if (unlikely(js->in_eof)) return json_triv_token(js, JSON_EOF); @@ -322,13 +325,22 @@ struct json_node *json_peek_token(struct json_context *js) } } -struct json_node *json_next_token(struct json_context *js) +struct json_node *json_peek_token(struct json_context *js) { if (!js->next_token) - json_peek_token(js); + js->next_token = json_read_token(js); + return js->next_token; +} + +struct json_node *json_next_token(struct json_context *js) +{ struct json_node *t = js->next_token; - js->next_token = NULL; - return t; + if (t) + { + js->next_token = NULL; + return t; + } + return json_read_token(js); } struct json_node *json_next_value(struct json_context *js) -- 2.39.2