--- /dev/null
+/*
+ * UCW JSON Library -- Data Representation
+ *
+ * (c) 2015 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include <ucw/lib.h>
+#include <ucw/gary.h>
+#include <ucw/mempool.h>
+#include <ucw-json/json.h>
+
+struct json_context *json_new(void)
+{
+ struct mempool *mp = mp_new(4096);
+ struct json_context *js = mp_alloc_zero(mp, sizeof(*js));
+ js->pool = mp;
+ mp_save(mp, &js->init_state);
+ return js;
+}
+
+void json_delete(struct json_context *js)
+{
+ mp_delete(js->pool);
+}
+
+void json_reset(struct json_context *js)
+{
+ mp_restore(js->pool, &js->init_state);
+}
+
+
+struct json_node *json_new_node(struct json_context *js, enum json_node_type type)
+{
+ struct json_node *n = mp_alloc_fast(js->pool, sizeof(*n));
+ n->type = type;
+ return n;
+}
+
+struct json_node *json_new_array(struct json_context *js)
+{
+ struct json_node *n = json_new_node(js, JSON_ARRAY);
+ GARY_INIT_SPACE_ALLOC(n->elements, 4, mp_get_allocator(js->pool));
+ return n;
+}
+
+void json_array_append(struct json_node *array, struct json_node *elt)
+{
+ ASSERT(array->type == JSON_ARRAY);
+ *GARY_PUSH(array->elements) = elt;
+}
+
+struct json_node *json_new_object(struct json_context *js)
+{
+ struct json_node *n = json_new_node(js, JSON_OBJECT);
+ GARY_INIT_SPACE_ALLOC(n->pairs, 4, mp_get_allocator(js->pool));
+ return n;
+}
+
+void json_object_set(struct json_node *n, const char *key, struct json_node *value)
+{
+ for (size_t i=0; i < GARY_SIZE(n->pairs); i++)
+ if (!strcmp(n->pairs[i].key, key))
+ {
+ if (value)
+ n->pairs[i].value = value;
+ else
+ {
+ n->pairs[i] = n->pairs[GARY_SIZE(n->pairs) - 1];
+ GARY_POP(n->pairs);
+ }
+ return;
+ }
+
+ if (value)
+ {
+ struct json_pair *p = GARY_PUSH(n->pairs);
+ p->key = key;
+ p->value = value;
+ }
+}
+
+struct json_node *json_object_get(struct json_node *n, const char *key)
+{
+ for (size_t i=0; i < GARY_SIZE(n->pairs); i++)
+ if (!strcmp(n->pairs[i].key, key))
+ return n->pairs[i].value;
+ return NULL;
+}
--- /dev/null
+/*
+ * UCW JSON Library
+ *
+ * (c) 2015 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_JSON_JSON_H
+#define _UCW_JSON_JSON_H
+
+#include <ucw/clists.h>
+#include <ucw/slists.h>
+#include <ucw/mempool.h>
+#include <ucw/fastbuf.h>
+
+#ifdef CONFIG_UCW_CLEAN_ABI
+// FIXME
+#endif
+
+/***
+ * === FIXME
+ ***/
+
+struct json_context {
+ struct mempool *pool;
+ struct mempool_state init_state;
+ // FIXME: Size limit?
+
+ struct fastbuf *in_fb;
+ uint in_line;
+ bool in_eof;
+ struct json_node *next_token;
+ struct json_node *trivial_token;
+ int next_char;
+};
+
+struct json_context *json_new(void);
+void json_delete(struct json_context *js);
+void json_reset(struct json_context *js);
+
+enum json_node_type {
+ JSON_INVALID,
+ JSON_NULL,
+ JSON_BOOLEAN,
+ JSON_NUMBER,
+ JSON_STRING,
+ JSON_ARRAY,
+ JSON_OBJECT,
+ // These are not real nodes, but raw tokens
+ JSON_BEGIN_ARRAY,
+ JSON_END_ARRAY,
+ JSON_BEGIN_OBJECT,
+ JSON_END_OBJECT,
+ JSON_NAME_SEP,
+ JSON_VALUE_SEP,
+ JSON_EOF,
+};
+
+struct json_node {
+ enum json_node_type type;
+ union {
+ bool boolean;
+ double number;
+ const char *string;
+ struct json_node **elements; // Growing array
+ struct json_pair *pairs; // Growing array
+ };
+};
+
+struct json_pair {
+ const char *key;
+ struct json_node *value;
+ // FIXME: Hash table
+};
+
+struct json_node *json_new_node(struct json_context *js, enum json_node_type type);
+
+static inline struct json_node *json_new_null(struct json_context *js)
+{
+ return json_new_node(js, JSON_NULL);
+}
+
+static inline struct json_node *json_new_bool(struct json_context *js, bool value)
+{
+ struct json_node *n = json_new_node(js, JSON_BOOLEAN);
+ n->boolean = value;
+ return n;
+}
+
+static inline struct json_node *json_new_number(struct json_context *js, double value)
+{
+ struct json_node *n = json_new_node(js, JSON_NUMBER);
+ n->number = value;
+ return n;
+}
+
+static inline struct json_node *json_new_string_ref(struct json_context *js, const char *value)
+{
+ struct json_node *n = json_new_node(js, JSON_STRING);
+ n->string = value;
+ return n;
+}
+
+static inline struct json_node *json_new_string(struct json_context *js, const char *value)
+{
+ return json_new_string_ref(js, mp_strdup(js->pool, value));
+}
+
+struct json_node *json_new_array(struct json_context *js);
+void json_array_append(struct json_node *array, struct json_node *elt);
+
+struct json_node *json_new_object(struct json_context *js);
+// FIXME: key must not be freed
+void json_object_set(struct json_node *n, const char *key, struct json_node *value);
+struct json_node *json_object_get(struct json_node *n, const char *key);
+
+void json_set_input(struct json_context *js, struct fastbuf *in);
+struct json_node *json_peek_token(struct json_context *js);
+struct json_node *json_next_token(struct json_context *js);
+
+struct json_node *json_next_object(struct json_context *js);
+
+struct json_node *json_parse(struct json_context *js, struct fastbuf *fb);
+
+#endif
--- /dev/null
+/*
+ * UCW JSON Library -- Parser
+ *
+ * (c) 2015 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include <ucw/lib.h>
+#include <ucw/trans.h>
+#include <ucw/ff-unicode.h>
+#include <ucw/unicode.h>
+#include <ucw-json/json.h>
+
+#include <errno.h>
+#include <stdlib.h>
+
+void json_set_input(struct json_context *js, struct fastbuf *in)
+{
+ js->in_fb = in;
+ js->in_line = 1;
+ js->next_char = -1;
+ js->next_token = NULL;
+ js->in_eof = 0;
+ if (!js->trivial_token)
+ js->trivial_token = json_new_node(js, JSON_INVALID);
+}
+
+// FIXME: Report column as well as line?
+static void NONRET json_parse_error(struct json_context *js, const char *msg)
+{
+ trans_throw("ucw.js.parse", js, "%s at line %u", msg, js->in_line);
+}
+
+static int json_get_char(struct json_context *js)
+{
+ int c = bget_utf8_32_repl(js->in_fb, -2);
+ if (unlikely(c < 0))
+ {
+ if (c == -2)
+ json_parse_error(js, "Malformed UTF-8 character");
+ js->in_eof = 1;
+ // FIXME: Reject alternative sequences
+ return c;
+ }
+ return c;
+}
+
+static void json_unget_char(struct json_context *js, int c)
+{
+ js->next_char = c;
+}
+
+static struct json_node *json_triv_token(struct json_context *js, enum json_node_type type)
+{
+ js->trivial_token->type = type;
+ return js->trivial_token;
+}
+
+static struct json_node *json_parse_number(struct json_context *js, int c)
+{
+ mp_push(js->pool);
+ char *p = mp_start_noalign(js->pool, 0);
+
+ // Optional minus
+ if (c == '-')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ if (!(c >= '0' && c <= '9'))
+ json_parse_error(js, "Malformed number: just minus");
+ }
+
+ // Integer part
+ if (c == '0')
+ {
+ // Leading zeroes are forbidden by RFC 7159
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ if (c >= '0' && c <= '9')
+ json_parse_error(js, "Malformed number: leading zero");
+ }
+ else
+ {
+ while (c >= '0' && c <= '9')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ }
+ }
+
+ // Fractional part
+ if (c == '.')
+ {
+ p = mp_append_char(js->pool, p, c);
+ if (!(c >= '0' && c <= '9'))
+ json_parse_error(js, "Malformed number: no digits after decimal point");
+ while (c >= '0' && c <= '9')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ }
+ }
+
+ // Exponent
+ if (c == 'e' || c == 'E')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ if (c == '+' || c == '-')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ }
+ if (!(c >= '0' && c <= '9'))
+ json_parse_error(js, "Malformed number: empty exponent");
+ while (c >= '0' && c <= '9')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ }
+ }
+
+ json_unget_char(js, c);
+
+ p = mp_end_string(js->pool, p);
+ errno = 0;
+ double val = strtod(p, NULL);
+ if (errno == ERANGE)
+ json_parse_error(js, "Number out of range");
+ mp_pop(js->pool);
+
+ return json_new_number(js, val);
+}
+
+static struct json_node *json_parse_name(struct json_context *js, int c)
+{
+ mp_push(js->pool);
+ char *p = mp_start_noalign(js->pool, 0);
+
+ while (c >= 'a' && c <= 'z')
+ {
+ p = mp_append_char(js->pool, p, c);
+ c = json_get_char(js);
+ }
+ json_unget_char(js, c);
+
+ p = mp_end_string(js->pool, p);
+ struct json_node *n;
+ if (!strcmp(p, "null"))
+ n = json_new_null(js);
+ else if (!strcmp(p, "false"))
+ n = json_new_bool(js, 0);
+ else if (!strcmp(p, "true"))
+ n = json_new_bool(js, 1);
+ else
+ json_parse_error(js, "Invalid literal name");
+
+ mp_pop(js->pool);
+ return n;
+}
+
+static uint json_parse_hex4(struct json_context *js)
+{
+ uint x = 0;
+ for (int i=0; i<4; i++)
+ {
+ x = x << 4;
+ int c = json_get_char(js);
+ if (c >= '0' && c <= '9')
+ x += c - '0';
+ else if (c >= 'a' && c <= 'f')
+ x += c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ x += c - 'A' + 10;
+ else
+ json_parse_error(js, "Invalid Unicode escape sequence");
+ }
+ return x;
+}
+
+static struct json_node *json_parse_string(struct json_context *js, int c)
+{
+ char *p = mp_start_noalign(js->pool, 0);
+
+ c = json_get_char(js);
+ while (c != '"')
+ {
+ if (unlikely(c < 0x20))
+ {
+ if (c < 0 || c == 0x0d || c == 0x0a)
+ json_parse_error(js, "Unterminated string");
+ else
+ json_parse_error(js, "Invalid control character in string");
+ }
+ if (unlikely(c >= 0xd800 && c < 0xf900))
+ {
+ if (c < 0xe000)
+ json_parse_error(js, "Invalid surrogate character in string");
+ else
+ json_parse_error(js, "Invalid private-use character in string");
+ }
+ if (unlikely(c > 0xf0000))
+ {
+ if (c > 0x10ffff)
+ json_parse_error(js, "Invalid non-Unicode character in string");
+ else
+ json_parse_error(js, "Invalid private-use character in string");
+ }
+ if (c == '\\')
+ {
+ c = json_get_char(js);
+ switch (c)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ break;
+ case 'b':
+ c = 0x08;
+ break;
+ case 'f':
+ c = 0x0c;
+ break;
+ case 'n':
+ c = 0x0a;
+ break;
+ case 'r':
+ c = 0x0d;
+ break;
+ case 't':
+ c = 0x09;
+ break;
+ case 'u':
+ {
+ uint x = json_parse_hex4(js);
+ if (!x)
+ json_parse_error(js, "Zero bytes in strings are not supported");
+ if (x >= 0xd800 && x < 0xf900)
+ {
+ if (x < 0xdc00)
+ {
+ // High surrogate: low surrogate must follow
+ uint y = 0;
+ if (json_get_char(js) == '\\' && json_get_char(js) == 'u')
+ y = json_parse_hex4(js);
+ if (!(y >= 0xdc00 && y < 0xe000))
+ json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint");
+ c = 0x10000 | ((x & 0x03ff) << 10) | (y & 0x03ff);
+ if (c > 0xf0000)
+ json_parse_error(js, "Invalid escaped private-use character");
+ }
+ else if (x < 0xe000)
+ {
+ // Low surrogate
+ json_parse_error(js, "Invalid escaped surrogate codepoint");
+ }
+ else
+ json_parse_error(js, "Invalid escaped private-use character");
+ }
+ break;
+ }
+ default:
+ json_parse_error(js, "Invalid backslash sequence in string");
+ }
+ }
+ p = mp_append_utf8_32(js->pool, p, c);
+ c = json_get_char(js);
+ }
+
+ p = mp_end_string(js->pool, p);
+ return json_new_string_ref(js, p);
+}
+
+struct json_node *json_peek_token(struct json_context *js)
+{
+ if (unlikely(js->in_eof))
+ return json_triv_token(js, JSON_EOF);
+
+ int c = js->next_char;
+ if (c >= 0)
+ js->next_char = -1;
+ else
+ c = json_get_char(js);
+
+ while (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d)
+ {
+ if (c == 0x0a)
+ js->in_line++;
+ c = json_get_char(js);
+ }
+ if (c < 0)
+ return json_triv_token(js, JSON_EOF);
+
+ if (c >= '0' && c <= '9' || c == '-')
+ return json_parse_number(js, c);
+
+ if (c >= 'a' && c <= 'z')
+ return json_parse_name(js, c);
+
+ if (c == '"')
+ return json_parse_string(js, c);
+
+ switch (c)
+ {
+ case '[':
+ return json_triv_token(js, JSON_BEGIN_ARRAY);
+ case ']':
+ return json_triv_token(js, JSON_END_ARRAY);
+ case '{':
+ return json_triv_token(js, JSON_BEGIN_OBJECT);
+ case '}':
+ return json_triv_token(js, JSON_END_OBJECT);
+ case ':':
+ return json_triv_token(js, JSON_NAME_SEP);
+ case ',':
+ return json_triv_token(js, JSON_VALUE_SEP);
+ default:
+ json_parse_error(js, "Invalid character");
+ }
+}
+
+struct json_node *json_next_token(struct json_context *js)
+{
+ if (!js->next_token)
+ json_peek_token(js);
+ struct json_node *t = js->next_token;
+ js->next_token = NULL;
+ return t;
+}
+
+struct json_node *json_next_object(struct json_context *js)
+{
+ struct json_node *t = json_next_token(js);
+
+ switch (t->type)
+ {
+ case JSON_EOF:
+ return NULL;
+
+ // Elementary values
+ case JSON_NULL:
+ case JSON_BOOLEAN:
+ case JSON_NUMBER:
+ case JSON_STRING:
+ return t;
+
+ // Array
+ case JSON_BEGIN_ARRAY:
+ {
+ struct json_node *a = json_new_array(js);
+ if (json_peek_token(js)->type == JSON_END_ARRAY)
+ json_next_token(js);
+ else for (;;)
+ {
+ struct json_node *v = json_next_object(js);
+ if (!v)
+ json_parse_error(js, "Unterminated array");
+ json_array_append(a, v);
+
+ t = json_next_token(js);
+ if (t->type == JSON_END_ARRAY)
+ break;
+ if (t->type != JSON_VALUE_SEP)
+ json_parse_error(js, "Comma expected");
+ }
+ return a;
+ }
+
+ // Object
+ case JSON_BEGIN_OBJECT:
+ {
+ struct json_node *o = json_new_object(js);
+ if (json_peek_token(js)->type == JSON_END_OBJECT)
+ json_next_token(js);
+ else for (;;)
+ {
+ struct json_node *k = json_next_object(js);
+ if (!k)
+ json_parse_error(js, "Unterminated object");
+ if (k->type != JSON_STRING)
+ json_parse_error(js, "Object key must be a string");
+
+ t = json_next_token(js);
+ if (t->type != JSON_NAME_SEP)
+ json_parse_error(js, "Colon expected");
+
+ struct json_node *v = json_next_object(js);
+ if (!v)
+ json_parse_error(js, "Unterminated object");
+ if (json_object_get(o, k->string)) // FIXME: Optimize
+ json_parse_error(js, "Key already set");
+ json_object_set(o, k->string, v);
+
+ t = json_next_token(js);
+ if (t->type == JSON_END_OBJECT)
+ break;
+ if (t->type != JSON_VALUE_SEP)
+ json_parse_error(js, "Comma expected");
+ }
+ return o;
+ }
+
+ // Misplaced characters
+ case JSON_END_ARRAY:
+ json_parse_error(js, "Misplaced end of array");
+ case JSON_END_OBJECT:
+ json_parse_error(js, "Misplaced end of object");
+ case JSON_NAME_SEP:
+ json_parse_error(js, "Misplaced colon");
+ case JSON_VALUE_SEP:
+ json_parse_error(js, "Misplaced comma");
+ default:
+ ASSERT(0);
+ }
+}
+
+struct json_node *json_parse(struct json_context *js, struct fastbuf *fb)
+{
+ json_set_input(js, fb);
+
+ struct json_node *n = json_next_object(js);
+ if (!n)
+ json_parse_error(js, "Empty input");
+
+ struct json_node *t = json_next_token(js);
+ if (t->type != JSON_EOF)
+ json_parse_error(js, "Only one top-level value allowed");
+
+ return n;
+}