From da5a2b1f5eb7b04f67473f69763fdf376adc259f Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Thu, 15 May 2008 11:22:52 +0200 Subject: [PATCH] Added parts of libsh from Sherlock v3.12.2. Only the modules related to configuration and Sherlock objects have been retained. (This is probably a short-time solution only, I intend to replace all uses of Sherlock objects.) --- sherlock/Makefile | 15 ++ sherlock/attrset.c | 44 ++++ sherlock/attrset.h | 36 ++++ sherlock/buck2obj.c | 406 +++++++++++++++++++++++++++++++++++++ sherlock/conf-parse.c | 232 ++++++++++++++++++++++ sherlock/conf.h | 42 ++++ sherlock/libsh.pc | 11 + sherlock/obj-format.c | 45 +++++ sherlock/obj-linear.c | 65 ++++++ sherlock/obj2buck.c | 368 ++++++++++++++++++++++++++++++++++ sherlock/object.c | 430 ++++++++++++++++++++++++++++++++++++++++ sherlock/object.h | 169 ++++++++++++++++ sherlock/objread.h | 94 +++++++++ sherlock/perl/Makefile | 8 + sherlock/perl/Object.pm | 265 +++++++++++++++++++++++++ 15 files changed, 2230 insertions(+) create mode 100644 sherlock/Makefile create mode 100644 sherlock/attrset.c create mode 100644 sherlock/attrset.h create mode 100644 sherlock/buck2obj.c create mode 100644 sherlock/conf-parse.c create mode 100644 sherlock/conf.h create mode 100644 sherlock/libsh.pc create mode 100644 sherlock/obj-format.c create mode 100644 sherlock/obj-linear.c create mode 100644 sherlock/obj2buck.c create mode 100644 sherlock/object.c create mode 100644 sherlock/object.h create mode 100644 sherlock/objread.h create mode 100644 sherlock/perl/Makefile create mode 100644 sherlock/perl/Object.pm diff --git a/sherlock/Makefile b/sherlock/Makefile new file mode 100644 index 0000000..23a6f26 --- /dev/null +++ b/sherlock/Makefile @@ -0,0 +1,15 @@ +# Makefile for the Sherlock Library (c) 2004--2007 Martin Mares + +DIRS+=sherlock + +LIBSH_MODS= \ + object bucket buck2obj obj2buck obj-linear obj-format \ + attrset conf-parse + +LIBSH_MOD_PATHS=$(addprefix $(o)/sherlock/,$(LIBSH_MODS)) + +$(o)/sherlock/libsh.a: $(addsuffix .o,$(LIBSH_MOD_PATHS)) +$(o)/sherlock/libsh.so: $(addsuffix .oo,$(LIBSH_MOD_PATHS)) +$(o)/sherlock/libsh.pc: $(LIBUCW) + +include $(s)/sherlock/perl/Makefile diff --git a/sherlock/attrset.c b/sherlock/attrset.c new file mode 100644 index 0000000..a025739 --- /dev/null +++ b/sherlock/attrset.c @@ -0,0 +1,44 @@ +/* + * Sherlock Library -- Parsing Attribute Sets + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "sherlock/object.h" +#include "sherlock/attrset.h" +#include "lib/clists.h" +#include "sherlock/conf.h" + +struct attr_node { + cnode n; + uns attr; +}; + +struct cf_section attr_set_cf = { + CF_TYPE(struct attr_node), + CF_ITEMS { + CF_USER("Attr", PTR_TO(struct attr_node, attr), &cf_type_attr), + CF_END + } +}; + +struct cf_section attr_set_cf_sub = { + CF_TYPE(struct attr_node), + CF_ITEMS { + CF_USER("Attr", PTR_TO(struct attr_node, attr), &cf_type_attr_sub), + CF_END + } +}; + +void +attr_set_commit(struct attr_set *set, clist *l) +{ + CF_JOURNAL_VAR(*set); + bit_array_zero(set->a, ATTR_SET_SIZE); + CLIST_FOR_EACH(struct attr_node *, n, *l) + bit_array_set(set->a, n->attr); +} diff --git a/sherlock/attrset.h b/sherlock/attrset.h new file mode 100644 index 0000000..62e6162 --- /dev/null +++ b/sherlock/attrset.h @@ -0,0 +1,36 @@ +/* + * Sherlock Library -- Sets of object attributes + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _SHERLOCK_ATTRSET_H +#define _SHERLOCK_ATTRSET_H + +#include "lib/bitarray.h" +#include "sherlock/object.h" + +COMPILE_ASSERT(son_value, OBJ_ATTR_SON == 256); +#define ATTR_SET_SIZE 512 + +struct attr_set { + BIT_ARRAY(a, ATTR_SET_SIZE); +}; + +static inline uns +attr_set_match(struct attr_set *set, struct oattr *attr) +{ + return bit_array_isset(set->a, attr->attr); +} + +/* Configuration helpers */ + +extern struct cf_section attr_set_cf, attr_set_cf_sub; + +struct clist; +void attr_set_commit(struct attr_set *set, struct clist *list); + +#endif diff --git a/sherlock/buck2obj.c b/sherlock/buck2obj.c new file mode 100644 index 0000000..6bee56b --- /dev/null +++ b/sherlock/buck2obj.c @@ -0,0 +1,406 @@ +/* + * Sherlock Library -- Generating Objects from Buckets + * + * (c) 2004, Robert Spalek + * (c) 2004--2006, Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "sherlock/sherlock.h" +#include "lib/unaligned.h" +#include "lib/mempool.h" +#include "lib/fastbuf.h" +#include "lib/unicode.h" +#include "sherlock/object.h" +#include "sherlock/objread.h" +#include "lib/lizard.h" +#include "lib/bbuf.h" +#include "lib/ff-unicode.h" + +#include +#include + +#define RET_ERR(num) ({ errno = num; return -1; }) + +struct buck2obj_buf +{ + bb_t bb; + struct lizard_buffer *lizard; +}; + +static uns get_attr_type; + +void +get_attr_set_type(uns type) +{ + if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD) + die("Unknown buckettype %x", type); + get_attr_type = type; +} + +int +get_attr(byte **pos, byte *end, struct parsed_attr *attr) +{ + byte *ptr = *pos; + if (ptr >= end) + return -1; + if (get_attr_type < BUCKET_TYPE_V33) + { + if (get_attr_type == BUCKET_TYPE_PLAIN) + { + while (ptr < end && *ptr == '\n') + ptr++; + *pos = ptr; + if (ptr >= end) + return -1; + } + else if (*ptr == '\n') + { + *pos = ++ptr; + attr->attr = 0; + return 0; + } + attr->attr = *ptr++; + attr->val = ptr; + while (ptr < end && *ptr != '\n') + ptr++; + attr->len = ptr++ - attr->val; + } + else + { + uns len; + GET_UTF8_32(ptr, len); + if (!len--) + { + *pos = ptr; + attr->attr = 0; + return 0; + } + attr->attr = ptr[len]; + attr->val = ptr; + attr->len = len; + ptr += len+1; + } + if (ptr > end) + die("Incomplete attribute %c", attr->attr); + *pos = ptr; + return attr->attr; +} + +int +bget_attr(struct fastbuf *b, struct parsed_attr *attr) +{ + static bb_t buf; + if (get_attr_type < BUCKET_TYPE_V33) + { + int c = bgetc(b); + if (c < 0) + return -1; + if (get_attr_type == BUCKET_TYPE_PLAIN) + { + while (c == '\n') + c = bgetc(b); + if (c < 0) + return -1; + } + else if (c == '\n') + { + attr->attr = 0; + return 0; + } + attr->attr = c; + + byte *ptr, *end; + uns len = bdirect_read_prepare(b, &ptr); + end = ptr + len; + attr->val = ptr; + while (ptr < end && *ptr != '\n') + ptr++; + if (ptr < end) + { + bdirect_read_commit(b, ptr+1); + attr->len = ptr - attr->val; + return attr->attr; + } + + len = 0; + c = bgetc(b); + while (c >= 0 && c != '\n') + { + bb_grow(&buf, len+1); + buf.ptr[len++] = c; + c = bgetc(b); + } + if (c < 0) + die("Incomplete attribute %c", attr->attr); + attr->val = buf.ptr; + attr->len = len; + } + else + { + int len = bget_utf8_32(b); + if (len < 0) + return -1; + if (!len) + { + attr->attr = 0; + return 0; + } + attr->len = len-1; + + byte *ptr; + int avail = bdirect_read_prepare(b, &ptr); + if (avail >= len) + { + attr->val = ptr; + attr->attr = ptr[len-1]; + bdirect_read_commit(b, ptr + len); + return attr->attr; + } + bb_grow(&buf, --len); + breadb(b, buf.ptr, len); + attr->val = buf.ptr; + attr->len = len; + attr->attr = bgetc(b); + if (attr->attr < 0) + die("Incomplete attribute %c", attr->attr); + } + return attr->attr; +} + +void +copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr) +{ + byte *b = mp_alloc_fast_noalign(pool, attr->len+1); + memcpy(b, attr->val, attr->len); + b[attr->len] = 0; + attr->val = b; +} + +struct buck2obj_buf * +buck2obj_alloc(void) +{ + struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf)); + bb_init(&buf->bb); + buf->lizard = lizard_alloc(); + return buf; +} + +void +buck2obj_free(struct buck2obj_buf *buf) +{ + lizard_free(buf->lizard); + bb_done(&buf->bb); + xfree(buf); +} + +static inline byte * +decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite) +{ + struct obj_read_state st; + obj_read_start(&st, o); + + if (can_overwrite >= 2) + while (ptr < end) + { + uns len; + GET_UTF8_32(ptr, len); + if (!len--) + break; + byte type = ptr[len]; + + ptr[len] = 0; + obj_read_attr_ref(&st, type, ptr); + + ptr += len + 1; + } + else + while (ptr < end) + { + uns len; + GET_UTF8_32(ptr, len); + if (!len--) + break; + byte type = ptr[len]; + + byte *dup = mp_alloc_fast_noalign(o->pool, len+1); + memcpy(dup, ptr, len); + dup[len] = 0; + obj_read_attr_ref(&st, type, dup); + + ptr += len + 1; + } + obj_read_end(&st); + return ptr; +} + +int +buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, + struct odes *o_hdr, uns *body_start, struct odes *o_body, + uns allow_zero_copy) +{ + struct obj_read_state st; + if (buck_type <= BUCKET_TYPE_PLAIN) + { + if (body_start) // there is no header part + *body_start = 0; + obj_read_start(&st, o_hdr); + byte *b; + // ignore empty lines and read until the end of the bucket + sh_off_t end = btell(body) + buck_len; + while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U)) + if ((b = buf->bb.ptr)[0]) + obj_read_attr(&st, b[0], b+1); + ASSERT(btell(body) == end); + obj_read_end(&st); + } + else if (buck_type == BUCKET_TYPE_V30) + { + sh_off_t start = btell(body); + sh_off_t end = start + buck_len; + byte *b; + struct obj_read_state st; + obj_read_start(&st, o_hdr); + while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U) && (b = buf->bb.ptr)[0]) + obj_read_attr(&st, b[0], b+1); + obj_read_end(&st); + if (body_start) + *body_start = btell(body) - start; + else + { + obj_read_start(&st, o_body); + while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U)) + if ((b = buf->bb.ptr)[0]) + obj_read_attr(&st, b[0], b+1); + ASSERT(btell(body) == end); + obj_read_end(&st); + } + } + else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD) + { + /* Avoid reading the whole bucket if only its header is needed. */ + if (body_start) + { + sh_off_t start = btell(body); + sh_off_t end = start + buck_len; + obj_read_start(&st, o_hdr); + while (btell(body) < end) + { + uns len = bget_utf8_32(body); + if (!len) + break; + byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len); + bread(body, buf, len); + uns type = buf[--len]; + buf[len] = 0; + obj_read_attr_ref(&st, type, buf); + } + obj_read_end(&st); + *body_start = btell(body) - start; + return 0; + } + + /* Read all the bucket into 1 buffer, 0-copy if possible. */ + byte *ptr, *end; + uns len = bdirect_read_prepare(body, &ptr); + uns copied = 0; + if (len < buck_len || + ((body->can_overwrite_buffer < 2 || !allow_zero_copy) && buck_type == BUCKET_TYPE_V33)) + { + /* Copy if the original buffer is too small. + * If it is write-protected, copy it also if it is uncompressed. */ + DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type); + bb_grow(&buf->bb, buck_len); + len = bread(body, buf->bb.ptr, buck_len); + ptr = buf->bb.ptr; + copied = 1; + } + else + DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type); + end = ptr + buck_len; + + ptr = decode_attributes(ptr, end, o_hdr, 0); // header + if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression + { + if (ptr + 8 > end) + { + if (ptr == end) // truncated bucket + goto commit; + RET_ERR(EINVAL); + } + len = GET_U32(ptr); + ptr += 4; + uns adler = GET_U32(ptr); + ptr += 4; + byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len); + if (!new_ptr) + return -1; + if (adler32(new_ptr, len) != adler) + RET_ERR(EINVAL); + if (!copied) + bdirect_read_commit(body, end); + ptr = new_ptr; + end = ptr + len; + copied = 1; + } + ptr = decode_attributes(ptr, end, o_body, 2); // body + if (ptr != end) + RET_ERR(EINVAL); + commit: + if (!copied) + bdirect_read_commit_modified(body, ptr); + } + else + { + bskip(body, buck_len); + RET_ERR(EINVAL); + } + return 0; +} + +struct odes * +obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, + uns *body_start, uns allow_zero_copy) +{ + struct odes *o = obj_new(pool); + if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o, allow_zero_copy) < 0) + return NULL; + else + return o; +} + +static int +obj_read_line(struct fastbuf *f, struct obj_read_state *st) +{ + byte *buf = bgets_stk(f); + if (buf) + { + if (!buf[0]) + return 1; + obj_read_attr(st, buf[0], buf+1); + return -1; + } + else + return 0; +} + +int +obj_read(struct fastbuf *f, struct odes *o) +{ + struct obj_read_state st; + int rc = 0; + obj_read_start(&st, o); + while ((rc = obj_read_line(f, &st)) < 0); + obj_read_end(&st); + return rc; +} + +void +default_obj_read_error(struct obj_read_state *st UNUSED, char *err) +{ + msg(L_ERROR, "%s", err); +} diff --git a/sherlock/conf-parse.c b/sherlock/conf-parse.c new file mode 100644 index 0000000..bf66d39 --- /dev/null +++ b/sherlock/conf-parse.c @@ -0,0 +1,232 @@ +/* + * Sherlock Library -- Configuration Parsing Helpers + * + * (c) 2006 Martin Mares + * (c) 2006 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "sherlock/object.h" +#include "lib/chartype.h" +#include "lib/fastbuf.h" +#include "lib/ff-unicode.h" +#include "lib/unicode.h" +#include "sherlock/conf.h" + +/*** Attribute names ***/ + +static byte * +attr_sub_parser(byte *c, uns *ptr) +{ + if (c[0] && !c[1]) + *ptr = c[0]; + else if (c[0] == '(' && c[1] && c[1] != ')' && c[2] == ')' && !c[3]) + *ptr = OBJ_ATTR_SON + c[1]; + else + return "Invalid attribute name"; + return NULL; +} + +static byte * +attr_parser(byte *c, uns *ptr) +{ + byte *err; + if (err = attr_sub_parser(c, ptr)) + return err; + if (*ptr >= OBJ_ATTR_SON) + return "Names of sub-objects are not allowed here"; + return NULL; +} + +static void +attr_sub_dumper(struct fastbuf *b, uns *ptr) +{ + if (!*ptr) + bprintf(b, " "); + else if (*ptr < OBJ_ATTR_SON) + bprintf(b, "%c ", *ptr); + else + bprintf(b, "(%c) ", *ptr - OBJ_ATTR_SON); +} + +struct cf_user_type cf_type_attr = { + .size = sizeof(uns), + .name = "attr", + .parser = (cf_parser1 *) attr_parser, + .dumper = (cf_dumper1 *) attr_sub_dumper +}; + +struct cf_user_type cf_type_attr_sub = { + .size = sizeof(uns), + .name = "attr_sub", + .parser = (cf_parser1 *) attr_sub_parser, + .dumper = (cf_dumper1 *) attr_sub_dumper +}; + +/*** Unicode characters ***/ + +static uns +uni_parser(byte *c, u16 *up) +{ + uns u; + byte *cc = (byte*)utf8_get(c, &u); + if (*cc || u == UNI_REPLACEMENT) + { + for (uns i=0; i<4; i++) + if (!Cxdigit(c[i])) + return 1; + else + u = (u << 4) | Cxvalue(c[i]); + if (c[4]) + return 1; + } + *up = u; + return 0; +} + +static byte * +unichar_parser(byte *c, uns *up) +{ + u16 u; + if (uni_parser(c, &u)) + return "Expecting one UTF-8 character or its code"; + *up = u; + return 0; +} + +static void +unichar_dumper(struct fastbuf *b, uns *up) +{ + bput_utf8(b, *up); + bputc(b, ' '); +} + +struct cf_user_type cf_type_unichar = { + .size = sizeof(uns), + .name = "unichar", + .parser = (cf_parser1 *) unichar_parser, + .dumper = (cf_dumper1 *) unichar_dumper +}; + +/*** Unicode ranges ***/ + +static byte * +unirange_parser(byte *s, struct unirange *ur) +{ + byte *c; + if ((c = strchr(s, '-')) && c > s) + { + *c++ = 0; + if (uni_parser(s, &ur->min) || uni_parser(c, &ur->max)) + goto err; + } + else + { + if (uni_parser(s, &ur->min)) + goto err; + ur->max = ur->min; + } + if (ur->min > ur->max) + return "Invalid code range (min>max)"; + return NULL; + + err: + return "Incorrect syntax of a code range"; +} + +static void +unirange_dumper(struct fastbuf *b, struct unirange *ur) +{ + bprintf(b, (ur->min == ur->max ? "%04x " : "%04x-%04x "), ur->min, ur->max); +} + +struct cf_user_type cf_type_unirange = { + .size = sizeof(struct unirange), + .name = "unirange", + .parser = (cf_parser1 *) unirange_parser, + .dumper = (cf_dumper1 *) unirange_dumper +}; + +/*** Unsigned integer ranges ***/ + +static byte * +unsrange_parser(byte *s, struct unsrange *r) +{ + byte *c, *msg; + if ((c = strchr(s, '-')) && c > s) + { + *c++ = 0; + if (*c == '-') + return "Incorrect syntax of an unsigned range"; + if ((msg = cf_parse_int(s, &r->min)) || (msg = cf_parse_int(c, &r->max))) + return msg; + } + else + { + if (msg = cf_parse_int(s, &r->min)) + return msg; + r->max = r->min; + } + if (r->min > r->max) + return "Invalid unsigned range (min>max)"; + return NULL; +} + +static void +unsrange_dumper(struct fastbuf *b, struct unsrange *r) +{ + bprintf(b, (r->min == r->max ? "%u " : "%u-%u "), r->min, r->max); +} + +struct cf_user_type cf_type_unsrange = { + .size = sizeof(struct unsrange), + .name = "gerr_range", + .parser = (cf_parser1 *) unsrange_parser, + .dumper = (cf_dumper1 *) unsrange_dumper +}; + +/* Configuration sections for (word|meta|string)-types */ + +static byte * +parse_u8(byte *s, uns *w) +{ + CF_JOURNAL_VAR(*w); + byte *msg = cf_parse_int(s, (int *)w); + if (msg) + return msg; + if (*w > 255) + return "Weights are limited to 0..255"; + return NULL; +} + +static void +dump_u8(struct fastbuf *fb, uns *ptr) +{ + bprintf(fb, "%d ", *ptr); +} + +static struct cf_user_type weight_type = { + .size = sizeof(uns), + .name = "weight", + .parser = (cf_parser1*) parse_u8, + .dumper = (cf_dumper1*) dump_u8 +}; + +void +cf_generate_word_type_config(struct cf_section *sec, byte **names, uns multiple, uns just_u8) +{ + uns number = 0; + while (names[number]) + number++; + struct cf_item *items = sec->cfg = xmalloc((number + 1) * sizeof(struct cf_item)); + for (uns i = 0; i < number; i++) { + if (just_u8) + items[i] = (struct cf_item) CF_USER_ARY(names[i], ((uns*) NULL) + i*multiple, &weight_type, multiple); + else + items[i] = (struct cf_item) CF_UNS_ARY(names[i], ((uns*) NULL) + i*multiple, multiple); + } + items[number] = (struct cf_item) CF_END; +} diff --git a/sherlock/conf.h b/sherlock/conf.h new file mode 100644 index 0000000..4883c21 --- /dev/null +++ b/sherlock/conf.h @@ -0,0 +1,42 @@ +/* + * Sherlock Library -- Configuration Parsing Helpers + * + * (c) 2006 Martin Mares + * (c) 2006 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _SHERLOCK_CONF_H +#define _SHERLOCK_CONF_H + +#include "lib/conf.h" + +/* All of the following objects are defined in conf-parse.c + * + * Object names */ + +extern struct cf_user_type cf_type_attr, cf_type_attr_sub; + +/* Unicode character and ranges */ + +struct unirange { + u16 min, max; +}; + +extern struct cf_user_type cf_type_unirange; +extern struct cf_user_type cf_type_unichar; + +/* Unsigned integer ranges */ + +struct unsrange { + uns min, max; +}; + +extern struct cf_user_type cf_type_unsrange; + +/* Sections for (word|meta|string)-types */ +void cf_generate_word_type_config(struct cf_section *sec, byte **names, uns multiple, uns just_u8); + +#endif diff --git a/sherlock/libsh.pc b/sherlock/libsh.pc new file mode 100644 index 0000000..89932d7 --- /dev/null +++ b/sherlock/libsh.pc @@ -0,0 +1,11 @@ +# pkg-config metadata for libsh + +libdir=@LIBDIR@ +incdir=. + +Name: libsh +Description: Functions common to the whole Sherlock project +Version: @SHERLOCK_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} -lsh +Requires: @DEPS@ diff --git a/sherlock/obj-format.c b/sherlock/obj-format.c new file mode 100644 index 0000000..96aeb1d --- /dev/null +++ b/sherlock/obj-format.c @@ -0,0 +1,45 @@ +/* + * Sherlock Library -- Adding Formatted Attributes + * + * (c) 2005 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "sherlock/object.h" +#include "lib/stkstring.h" + +#include + +struct oattr * +obj_add_attr_vformat(struct odes *o, uns x, char *fmt, va_list args) +{ + return obj_add_attr(o, x, stk_vprintf(fmt, args)); +} + +struct oattr *obj_add_attr_format(struct odes *o, uns x, char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + struct oattr *a = obj_add_attr_vformat(o, x, fmt, va); + va_end(va); + return a; +} + +struct oattr * +obj_set_attr_vformat(struct odes *o, uns x, char *fmt, va_list args) +{ + return obj_set_attr(o, x, stk_vprintf(fmt, args)); +} + +struct oattr *obj_set_attr_format(struct odes *o, uns x, char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + struct oattr *a = obj_set_attr_vformat(o, x, fmt, va); + va_end(va); + return a; +} diff --git a/sherlock/obj-linear.c b/sherlock/obj-linear.c new file mode 100644 index 0000000..cfde3ad --- /dev/null +++ b/sherlock/obj-linear.c @@ -0,0 +1,65 @@ +/* + * Sherlock Library -- Linear Representation of Objects + * + * (c) 2005 Martin Mares + * (c) 2005 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "lib/fastbuf.h" +#include "lib/unaligned.h" +#include "lib/lizard.h" +#include "sherlock/object.h" +#include "sherlock/lizard-fb.h" + +byte * +obj_linearize(struct odes *d, uns min_compress, uns *plen) +{ + // Create uncompressed linearization + put_attr_set_type(BUCKET_TYPE_V33); + uns size = size_object(d); + byte *out = xmalloc(size+LIZARD_COMPRESS_HEADER + LIZARD_NEEDS_CHARS) + LIZARD_COMPRESS_HEADER; + byte *t = put_object(out, d); + ASSERT(t == out+size); + + struct lizard_block_req req = { + .type = BUCKET_TYPE_V33_LIZARD, + .ratio = min_compress / 100., + .in_ptr = out, + .in_len = size, + .out_ptr = NULL, + .out_len = 0, + }; + // Allocate a buffer for compressed data + int res = lizard_compress_req(&req); + ASSERT(res <= 0); + byte *buf = res<0 ? req.out_ptr=xmalloc(req.out_len+=LIZARD_COMPRESS_HEADER) : NULL; + res = lizard_compress_req_header(&req, 1); + ASSERT(res > 0); + if (req.out_ptr != out-LIZARD_COMPRESS_HEADER) + xfree(out-LIZARD_COMPRESS_HEADER); + else if (buf) + xfree(buf); + + *plen = req.out_len; + return req.out_ptr; +} + +struct odes * +obj_delinearize(struct buck2obj_buf *bbuf, struct mempool *mp, byte *buf, uns len, uns destructive) +{ + struct odes *o = obj_new(mp); + ASSERT(len >= LIZARD_COMPRESS_HEADER); + uns buck_type = buf[0] + BUCKET_TYPE_PLAIN; + + struct fastbuf fb; + uns sh = LIZARD_COMPRESS_HEADER - 1; + fbbuf_init_read(&fb, buf+sh, len-sh, destructive); + if (buck2obj_parse(bbuf, buck_type, len-sh, &fb, NULL, NULL, o, 1) < 0) + return NULL; + else + return o; +} diff --git a/sherlock/obj2buck.c b/sherlock/obj2buck.c new file mode 100644 index 0000000..3e1873a --- /dev/null +++ b/sherlock/obj2buck.c @@ -0,0 +1,368 @@ +/* + * Sherlock Library -- Generating Buckets from Objects + * + * (c) 2004, Robert Spalek + * (c) 2005, Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "lib/fastbuf.h" +#include "lib/ff-unicode.h" +#include "sherlock/object.h" + +#include +#include +#include + +static uns use_v33; +static int hdr_sep; + +void +put_attr_set_type(uns type) +{ + switch (type) + { + case BUCKET_TYPE_PLAIN: + use_v33 = 0; + hdr_sep = -1; + break; + case BUCKET_TYPE_V30: + use_v33 = 0; + hdr_sep = '\n'; + break; + case BUCKET_TYPE_V33: + case BUCKET_TYPE_V33_LIZARD: + use_v33 = 1; + hdr_sep = 0; + break; + default: + die("Don't know how to generate buckets of type %08x", type); + } +} + +uns +size_attr(uns len) +{ + if (use_v33) + { + len++; + return len + utf8_space(len); + } + else + return len + 2; +} + +uns +size_object(struct odes *d) +{ + uns sz = 0; + for (struct oattr *a=d->attrs; a; a=a->next) + for (struct oattr *b=a; b; b=b->same) + if (a->attr >= OBJ_ATTR_SON) + sz += 3 + size_object(b->son) + 2; + else + sz += size_attr(strlen(b->val)); + return sz; +} + +inline byte * +put_attr(byte *ptr, uns type, byte *val, uns len) +{ + if (use_v33) + { + PUT_UTF8_32(ptr, len+1); + memcpy(ptr, val, len); + ptr += len; + *ptr++ = type; + } + else + { + *ptr++ = type; + memcpy(ptr, val, len); + ptr += len; + *ptr++ = '\n'; + } + return ptr; +} + +byte * +put_attr_str(byte *ptr, uns type, byte *val) +{ + return put_attr(ptr, type, val, strlen(val)); +} + +inline byte * +put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va) +{ + uns len; + if (use_v33) + { + len = vsprintf(ptr+1, mask, va); + if (len >= 127) + { + byte tmp[6], *tmp_end = tmp; + PUT_UTF8_32(tmp_end, len+1); + uns l = tmp_end - tmp; + memmove(ptr+l, ptr+1, len); + memcpy(ptr, tmp, l); + ptr += l + len; + } + else + { + *ptr = len+1; + ptr += len+1; + } + *ptr++ = type; + } + else + { + *ptr++ = type; + len = vsprintf(ptr, mask, va); + ptr += len; + *ptr++ = '\n'; + } + return ptr; +} + +byte * +put_attr_format(byte *ptr, uns type, char *mask, ...) +{ + va_list va; + va_start(va, mask); + byte *ret = put_attr_vformat(ptr, type, mask, va); + va_end(va); + return ret; +} + +byte * +put_attr_num(byte *ptr, uns type, uns val) +{ + if (use_v33) + { + uns len = sprintf(ptr+1, "%d", val) + 1; + *ptr = len; + ptr += len; + *ptr++ = type; + } + else + ptr += sprintf(ptr, "%c%d\n", type, val); + return ptr; +} + +byte * +put_attr_separator(byte *ptr) +{ + if (hdr_sep >= 0) + *ptr++ = hdr_sep; + return ptr; +} + +byte * +put_attr_push(byte *ptr, uns type) +{ + byte name = type; + return put_attr(ptr, '(', &name, 1); +} + +byte * +put_attr_pop(byte *ptr) +{ + return put_attr(ptr, ')', NULL, 0); +} + +byte * +put_object(byte *t, struct odes *d) +{ + for (struct oattr *a=d->attrs; a; a=a->next) + for (struct oattr *b=a; b; b=b->same) + if (a->attr >= OBJ_ATTR_SON) + { + t = put_attr_push(t, a->attr - OBJ_ATTR_SON); + t = put_object(t, b->son); + t = put_attr_pop(t); + } + else + t = put_attr_str(t, a->attr, b->val); + return t; +} + +inline void +bput_attr_large(struct fastbuf *b, uns type, byte *val, uns len) +{ + if (use_v33) + { + bput_utf8_32(b, len+1); + bwrite(b, val, len); + bputc(b, type); + } + else + { + bputc(b, type); + bwrite(b, val, len); + bputc(b, '\n'); + } +} + +inline void +bput_attr(struct fastbuf *b, uns type, byte *val, uns len) +{ + bput_attr_large(b, type, val, len); +} + +void +bput_attr_str(struct fastbuf *b, uns type, byte *val) +{ + bput_attr(b, type, val, strlen(val)); +} + +void +bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va) +{ + int len; + if (use_v33) + { + va_list va2; + va_copy(va2, va); + len = vsnprintf(NULL, 0, mask, va2); + va_end(va2); + if (len < 0) + die("vsnprintf() does not support size=0"); + bput_utf8_32(b, len+1); + vbprintf(b, mask, va); + bputc(b, type); + } + else + { + bputc(b, type); + len = vbprintf(b, mask, va); + bputc(b, '\n'); + } +} + +void +bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) +{ + va_list va; + va_start(va, mask); + bput_attr_vformat(b, type, mask, va); + va_end(va); +} + +void +bput_attr_num(struct fastbuf *b, uns type, uns val) +{ + if (use_v33) + { + byte tmp[12]; + uns len = sprintf(tmp, "%d", val); + bputc(b, len+1); + bwrite(b, tmp, len); + bputc(b, type); + } + else + bprintf(b, "%c%d\n", type, val); +} + +void +bput_attr_separator(struct fastbuf *b) +{ + if (hdr_sep >= 0) + bputc(b, hdr_sep); +} + +void +bput_attr_push(struct fastbuf *b, uns type) +{ + byte name = type; + bput_attr(b, '(', &name, 1); +} + +void +bput_attr_pop(struct fastbuf *b) +{ + bput_attr(b, ')', NULL, 0); +} + +static inline void +do_bput_oattr(struct fastbuf *f, struct oattr *a) +{ + for(struct oattr *b=a; b; b=b->same) + if (a->attr >= OBJ_ATTR_SON) + { + bput_attr_push(f, a->attr - OBJ_ATTR_SON); + bput_object(f, b->son); + bput_attr_pop(f); + } + else + { +#ifdef DEBUG_ASSERTS + byte *z; + for (z = b->val; *z; z++) + if (*z < ' ' && *z != '\t') + { + log(L_ERROR, "obj_write: Found non-ASCII character %02x in %c%s", *z, a->attr, b->val); + ASSERT(0); + } +#endif + bput_attr_str(f, a->attr, b->val); + } +} + +void +bput_oattr(struct fastbuf *f, struct oattr *a) +{ + if (a) + do_bput_oattr(f, a); +} + +void +bput_object(struct fastbuf *f, struct odes *d) +{ + for(struct oattr *a=d->attrs; a; a=a->next) + do_bput_oattr(f, a); +} + +static inline void +do_bput_oattr_nocheck(struct fastbuf *f, struct oattr *a) +{ + for(struct oattr *b=a; b; b=b->same) + if (a->attr >= OBJ_ATTR_SON) + { + bput_attr_push(f, a->attr - OBJ_ATTR_SON); + bput_object_nocheck(f, b->son); + bput_attr_pop(f); + } + else + bput_attr_large(f, a->attr, b->val, strlen(b->val)); +} + +void +bput_oattr_nocheck(struct fastbuf *f, struct oattr *a) +{ + if (a) + do_bput_oattr_nocheck(f, a); +} + +void +bput_object_nocheck(struct fastbuf *f, struct odes *d) +{ + for(struct oattr *a=d->attrs; a; a=a->next) + do_bput_oattr_nocheck(f, a); +} + +void +obj_write(struct fastbuf *b, struct odes *o, uns bucket_type) +{ + put_attr_set_type(bucket_type); + bput_object(b, o); +} + +void +obj_write_nocheck(struct fastbuf *b, struct odes *o, uns bucket_type) +{ + put_attr_set_type(bucket_type); + bput_object_nocheck(b, o); +} diff --git a/sherlock/object.c b/sherlock/object.c new file mode 100644 index 0000000..d12270b --- /dev/null +++ b/sherlock/object.c @@ -0,0 +1,430 @@ +/* + * Sherlock Library -- Object Functions + * + * (c) 1997--2006 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "sherlock/sherlock.h" +#include "lib/mempool.h" +#include "lib/fastbuf.h" +#include "sherlock/object.h" + +#include +#include +#include + +void +obj_dump(struct odes *d) +{ + for(struct oattr *a=d->attrs; a; a=a->next) + for(struct oattr *b=a; b; b=b->same) + if (a->attr >= OBJ_ATTR_SON) + { + printf("(%c\n", a->attr - OBJ_ATTR_SON); + obj_dump(b->son); + printf(")\n"); + } + else + printf("%c%s\n", (a==b ? a->attr : ' '), b->val); +} + +void +obj_dump_indented(struct odes *d, uns indent) +{ + for(struct oattr *a=d->attrs; a; a=a->next) + for(struct oattr *b=a; b; b=b->same) + { + for (uns i=0; iattr >= OBJ_ATTR_SON) + { + printf("(%c\n", a->attr - OBJ_ATTR_SON); + obj_dump_indented(b->son, indent+1); + for (uns i=0; i<=indent; i++) + putchar('\t'); + printf(")\n"); + } + else + printf("%c%s\n", (a==b ? a->attr : ' '), b->val); + } +} + +static struct oattr * +oa_new(struct odes *o, uns x, byte *v) +{ + uns l = strlen(v)+1; + struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr) + l); + + a->next = a->same = NULL; + a->attr = x; + a->val = (byte*) (a+1); + memcpy(a->val, v, l); + return a; +} + +static struct oattr * +oa_new_ref(struct odes *o, uns x, byte *v) +{ + struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr)); + + a->next = a->same = NULL; + a->attr = x; + a->val = v; + return a; +} + +static struct oattr * +oa_new_son(struct odes *o, uns x, struct odes *son) +{ + struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr)); + + a->next = a->same = NULL; + a->attr = x; + a->son = son; + son->parent = o; + return a; +} + +struct odes * +obj_new(struct mempool *pool) +{ + struct odes *o = mp_alloc(pool, sizeof(struct odes)); + o->pool = pool; + o->attrs = NULL; + o->cached_attr = NULL; + o->parent = NULL; + return o; +} + +struct oattr * +obj_find_attr(struct odes *o, uns x) +{ + struct oattr *a; + for(a=o->attrs; a && a->attr != x; a=a->next) + ; + return a; +} + +struct oattr * +obj_find_attr_last(struct odes *o, uns x) +{ + struct oattr *a = obj_find_attr(o, x); + + if (a) + { + while (a->same) + a = a->same; + } + return a; +} + +uns +obj_del_attr(struct odes *o, struct oattr *a) +{ + struct oattr *x, **p; + uns aa = a->attr; + + o->cached_attr = NULL; + p = &o->attrs; + while (x = *p) + { + if (x->attr == aa) + { + if (x == a) + { + if (x->same) + { + x->same->next = x->next; + *p = x->same; + } + else + *p = x->next; + return 1; + } + p = &x->same; + while (x = *p) + { + if (x == a) + { + *p = x->same; + return 1; + } + p = &x->same; + } + return 0; + } + p = &x->next; + } + return 0; +} + +byte * +obj_find_aval(struct odes *o, uns x) +{ + struct oattr *a = obj_find_attr(o, x); + return a ? a->val : NULL; +} + +uns +obj_find_anum(struct odes *o, uns x, uns def) +{ + struct oattr *a = obj_find_attr(o, x); + return a ? (uns)atol(a->val) : def; +} + +u32 +obj_find_x32(struct odes *o, uns x, u32 def) +{ + struct oattr *a = obj_find_attr(o, x); + return a ? (u32)strtoul(a->val, NULL, 16) : def; +} + +u64 +obj_find_x64(struct odes *o, uns x, u64 def) +{ + struct oattr *a = obj_find_attr(o, x); + return a ? (u64)strtoull(a->val, NULL, 16) : def; +} + +struct oattr * +obj_set_attr(struct odes *o, uns x, byte *v) +{ + struct oattr *a, **z; + + z = &o->attrs; + while (a = *z) + { + if (a->attr == x) + { + *z = a->next; + goto set; + } + z = &a->next; + } + + set: + if (v) + { + a = oa_new(o, x, v); + a->next = o->attrs; + o->attrs = a; + } + else + a = NULL; + o->cached_attr = a; + return a; +} + +struct oattr * +obj_set_attr_num(struct odes *o, uns a, uns v) +{ + byte x[32]; + + sprintf(x, "%d", v); + return obj_set_attr(o, a, x); +} + +static inline struct oattr * +obj_link_attr(struct odes *o, struct oattr *b) +{ + struct oattr *a, **z; + + if (!(a = o->cached_attr) || a->attr != b->attr) + { + z = &o->attrs; + while ((a = *z) && a->attr != b->attr) + z = &a->next; + if (!a) + { + *z = b; + /* b->next is NULL */ + goto done; + } + } + while (a->same) + a = a->same; + a->same = b; + done: + o->cached_attr = b; + return b; +} + +struct oattr * +obj_add_attr(struct odes *o, uns x, byte *v) +{ + return obj_link_attr(o, oa_new(o, x, v)); +} + +struct oattr * +obj_add_attr_ref(struct odes *o, uns x, byte *v) +{ + return obj_link_attr(o, oa_new_ref(o, x, v)); +} + +struct oattr * +obj_add_attr_num(struct odes *o, uns a, uns v) +{ + byte x[32]; + + sprintf(x, "%d", v); + return obj_add_attr(o, a, x); +} + +struct oattr * +obj_add_attr_son(struct odes *o, uns x, struct odes *son) +{ + return obj_link_attr(o, oa_new_son(o, x, son)); +} + +struct oattr * +obj_prepend_attr(struct odes *o, uns x, byte *v) +{ + struct oattr *a, *b, **z; + + b = oa_new(o, x, v); + z = &o->attrs; + while (a = *z) + { + if (a->attr == x) + { + b->same = a; + b->next = a->next; + a->next = NULL; + *z = b; + return b; + } + z = &a->next; + } + b->next = o->attrs; + o->attrs = b; + return b; +} + +struct oattr * +obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v) +{ + struct oattr *b = oa_new(o, first->attr, v); + b->same = after->same; + after->same = b; + return b; +} + +void +obj_move_attr_to_head(struct odes *o, uns x) +{ + struct oattr *a, **z; + + z = &o->attrs; + while (a = *z) + { + if (a->attr == x) + { + *z = a->next; + a->next = o->attrs; + o->attrs = a; + break; + } + z = &a->next; + } +} + +void +obj_move_attr_to_tail(struct odes *o, uns x) +{ + struct oattr *a, **z; + + z = &o->attrs; + while (a = *z) + { + if (a->attr == x) + { + *z = a->next; + while (*z) + z = &(*z)->next; + *z = a; + a->next = NULL; + break; + } + z = &a->next; + } +} + +struct odes * +obj_find_son(struct odes *o, uns x) +{ + ASSERT(x >= OBJ_ATTR_SON); + struct oattr *a = obj_find_attr(o, x); + return a ? a->son : NULL; +} + +struct oattr * +obj_add_son_ref(struct odes *o, uns x, struct odes *son) +{ + struct oattr *oa = oa_new_son(o, x, son); + obj_link_attr(o, oa); + return oa; +} + +struct odes * +obj_add_son(struct odes *o, uns x) +{ + struct odes *son = obj_new(o->pool); + obj_add_son_ref(o, x, son); + return son; +} + +static void obj_clone_attr_list(struct odes *dest, struct odes *src); + +static struct oattr * +obj_clone_attr(struct odes *dest, struct oattr *a) +{ + struct oattr *res = NULL, **rr = &res; + if (a->attr < OBJ_ATTR_SON) + { + for (; a; a=a->same) + { + *rr = oa_new(dest, a->attr, a->val); + rr = &(*rr)->same; + } + } + else + { + for (; a; a=a->same) + { + struct odes *dson = obj_new(dest->pool); + *rr = oa_new_son(dest, a->attr, dson); + rr = &(*rr)->same; + obj_clone_attr_list(dson, a->son); + } + } + return res; +} + +static void +obj_clone_attr_list(struct odes *dest, struct odes *src) +{ + struct oattr **p = &dest->attrs; + for (struct oattr *a = src->attrs; a; a=a->next) + { + *p = obj_clone_attr(dest, a); + p = &(*p)->next; + } +} + +void +obj_add_attr_clone(struct odes *o, struct oattr *a) +{ + obj_link_attr(o, obj_clone_attr(o, a)); +} + +struct odes * +obj_clone(struct mempool *pool, struct odes *src) +{ + struct odes *dest = obj_new(pool); + obj_clone_attr_list(dest, src); + return dest; +} diff --git a/sherlock/object.h b/sherlock/object.h new file mode 100644 index 0000000..c123044 --- /dev/null +++ b/sherlock/object.h @@ -0,0 +1,169 @@ +/* + * Sherlock Library -- Objects and operations on them + * + * (c) 1997--2006 Martin Mares + * (c) 2004--2005, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is the main data structure used by Sherlock for many different + * purposes, most notably storage of documents in various stages of processing + * + * Each object consists of a sequence of attributes whose names are single + * characters and values are either strings or subobjects. The order of attributes + * is not maintained (except for a couple of very special cases), while the order + * of multiple values of a single attribute is. + * + * Objects exist either in the form of struct odes (an in-memory representation + * with very easy manipulation) or as a bucket (a linear stream of bytes in one of + * several possible formats, some of which are compressed, used for sending objects + * between processes and storing them in files [see sherlock/bucket.h for bucket files]). + * + * See doc/objects for a more detailed description on how objects are used to + * represent documents. + */ + +#ifndef _SHERLOCK_OBJECT_H +#define _SHERLOCK_OBJECT_H + +struct fastbuf; +struct mempool; + +/* object.c: In-memory representation of objects */ + +struct odes { /* Object description */ + struct oattr *attrs; + struct mempool *pool; + struct oattr *cached_attr; + struct odes *parent; +}; + +struct oattr { /* Object attribute */ + struct oattr *next, *same; + uns attr; /* +OBJ_ATTR_SON if it's a sub-object */ + union { + byte *val; + struct odes *son; + }; +}; + +#define OBJ_ATTR_SON 256 + +void obj_dump(struct odes *); +void obj_dump_indented(struct odes *, uns); +struct odes *obj_new(struct mempool *); +struct oattr *obj_find_attr(struct odes *, uns); +struct oattr *obj_find_attr_last(struct odes *, uns); +uns obj_del_attr(struct odes *, struct oattr *); +byte *obj_find_aval(struct odes *, uns); +uns obj_find_anum(struct odes *, uns, uns); +u32 obj_find_x32(struct odes *, uns, u32); +u64 obj_find_x64(struct odes *, uns, u64); +struct oattr *obj_set_attr(struct odes *, uns, byte *); +struct oattr *obj_set_attr_num(struct odes *, uns, uns); +struct oattr *obj_add_attr(struct odes *, uns, byte *); +struct oattr *obj_add_attr_ref(struct odes *o, uns x, byte *v); // no strdup() +struct oattr *obj_add_attr_num(struct odes *o, uns, uns); +struct oattr *obj_add_attr_son(struct odes *, uns, struct odes *); +struct oattr *obj_prepend_attr(struct odes *, uns, byte *); +struct oattr *obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v); +void obj_move_attr_to_head(struct odes *o, uns); +void obj_move_attr_to_tail(struct odes *o, uns); +struct odes *obj_find_son(struct odes *, uns); +struct odes *obj_add_son(struct odes *, uns); +struct oattr *obj_add_son_ref(struct odes *o, uns x, struct odes *son); +void obj_add_attr_clone(struct odes *o, struct oattr *a); +struct odes *obj_clone(struct mempool *pool, struct odes *src); + +/* Supported bucket formats */ + +enum bucket_type { + BUCKET_TYPE_COMPAT = 0x7fffffff, /* and less -- buckets created by older versions of Sherlock */ + BUCKET_TYPE_PLAIN = 0x80000000, /* plain textual buckets */ + BUCKET_TYPE_V30 = 0x80000001, /* v3.0 uncompressed buckets */ + BUCKET_TYPE_V33 = 0x80000002, /* v3.3 uncompressed buckets */ + BUCKET_TYPE_V33_LIZARD = 0x80000003 /* v3.3 buckets compressed by lizard */ +}; + +/* buck2obj.c: Reading of objects from buckets */ + +struct parsed_attr { + int attr; + byte *val; + uns len; +}; +struct buck2obj_buf; + +/* note: get_attr routines are not thread-safe */ +void get_attr_set_type(uns type); +int get_attr(byte **pos, byte *end, struct parsed_attr *attr); +int bget_attr(struct fastbuf *b, struct parsed_attr *attr); +void copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr); + +struct buck2obj_buf *buck2obj_alloc(void); +void buck2obj_free(struct buck2obj_buf *buf); + +int buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, + struct odes *o_hdr, uns *body_start, struct odes *o_body, + uns allow_zero_copy); +struct odes *obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, + uns *body_start, uns allow_zero_copy); + /* If body_start != NULL, then only the header is parsed and *body_start is + * set to the position of the body. This function does a plenty of optimizations + * and if the body fastbuf is overwritable (body->can_overwrite_buffer), it can keep the + * attribute values stored on their original locations in the fastbuf's buffer. + * However, no such things are performed when reading the header only. + */ + +int obj_read(struct fastbuf *, struct odes *); + +/* obj2buck.c: Generating buckets from objects */ + +void put_attr_set_type(uns type); + +uns size_attr(uns len); +uns size_object(struct odes *d); + +byte *put_attr(byte *ptr, uns type, byte *val, uns len); +byte *put_attr_str(byte *ptr, uns type, byte *val); +byte *put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va); +byte *put_attr_format(byte *ptr, uns type, char *mask, ...) __attribute__((format(printf,3,4))); +byte *put_attr_num(byte *ptr, uns type, uns val); +byte *put_attr_separator(byte *ptr); +byte *put_attr_push(byte *ptr, uns type); +byte *put_attr_pop(byte *ptr); +byte *put_object(byte *t, struct odes *d); + +void bput_attr(struct fastbuf *b, uns type, byte *val, uns len); +void bput_attr_large(struct fastbuf *b, uns type, byte *val, uns len); +void bput_attr_str(struct fastbuf *b, uns type, byte *val); +void bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va); +void bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) __attribute__((format(printf,3,4))); +void bput_attr_num(struct fastbuf *b, uns type, uns val); +void bput_attr_separator(struct fastbuf *b); +void bput_attr_push(struct fastbuf *b, uns type); +void bput_attr_pop(struct fastbuf *b); +void bput_oattr(struct fastbuf *f, struct oattr *a); +void bput_oattr_nocheck(struct fastbuf *f, struct oattr *a); +void bput_object(struct fastbuf *b, struct odes *o); +void bput_object_nocheck(struct fastbuf *b, struct odes *o); + +void obj_write(struct fastbuf *b, struct odes *o, uns bucket_type); +void obj_write_nocheck(struct fastbuf *b, struct odes *o, uns bucket_type); + +/* obj-linear.c: Linear representation of objects by in-memory buckets */ + +byte *obj_linearize(struct odes *d, uns min_compress, uns *plen); +struct odes *obj_delinearize(struct buck2obj_buf *bbuf, struct mempool *mp, byte *buf, uns len, uns destructive); + +/* obj-format.c: Adding of formatted values */ + +struct oattr *obj_add_attr_vformat(struct odes *o, uns x, char *fmt, va_list args); +struct oattr *obj_add_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4); +struct oattr *obj_set_attr_vformat(struct odes *o, uns x, char *fmt, va_list args); +struct oattr *obj_set_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4); + +#endif diff --git a/sherlock/objread.h b/sherlock/objread.h new file mode 100644 index 0000000..1abe936 --- /dev/null +++ b/sherlock/objread.h @@ -0,0 +1,94 @@ +/* + * Sherlock Library -- Nested Object Reading Functions + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _SHERLOCK_OBJREAD_H +#define _SHERLOCK_OBJREAD_H + +#include "sherlock/object.h" + +struct obj_read_state { + struct odes *root, *obj; + void (*error_callback)(struct obj_read_state *st, char *msg); + void *user; + int errors; +}; + +void default_obj_read_error(struct obj_read_state *st, char *msg); + +static inline void +obj_read_start(struct obj_read_state *st, struct odes *obj) +{ + st->root = st->obj = obj; + st->errors = 0; + st->error_callback = default_obj_read_error; +} + +static inline void +obj_read_push(struct obj_read_state *st, byte *ptr) +{ + if (unlikely(!ptr[0] || ptr[1])) + { + if (!st->errors++) + st->error_callback(st, "Malformed object: bad `(' attribute"); + } + else + st->obj = obj_add_son(st->obj, ptr[0] + OBJ_ATTR_SON); +} + +static inline void +obj_read_pop(struct obj_read_state *st, byte *ptr) +{ + if (unlikely(ptr[0])) + { + if (!st->errors++) + st->error_callback(st, "Malformed object: bad ')' attribute"); + } + else if (unlikely(!st->obj->parent)) + { + if (!st->errors++) + st->error_callback(st, "Malformed object: improper nesting of `( ... )' blocks"); + } + else + st->obj = st->obj->parent; +} + +static inline void +obj_read_attr(struct obj_read_state *st, uns type, byte *val) +{ + if (type == '(') + obj_read_push(st, val); + else if (type == ')') + obj_read_pop(st, val); + else + obj_add_attr(st->obj, type, val); +} + +static inline void +obj_read_attr_ref(struct obj_read_state *st, uns type, byte *val) +{ + if (type == '(') + obj_read_push(st, val); + else if (type == ')') + obj_read_pop(st, val); + else + obj_add_attr_ref(st->obj, type, val); +} + +static inline int +obj_read_end(struct obj_read_state *st) +{ + if (unlikely(st->obj != st->root)) + { + if (!st->errors++) + st->error_callback(st, "Malformed object: truncated `( ... )' block"); + } + return st->errors; +} + +#endif diff --git a/sherlock/perl/Makefile b/sherlock/perl/Makefile new file mode 100644 index 0000000..af4cee5 --- /dev/null +++ b/sherlock/perl/Makefile @@ -0,0 +1,8 @@ +# Perl modules + +DIRS+=sherlock/perl +EXTRA_RUNDIRS+=lib/perl5/Sherlock +SHERLOCK_PERL_MODS=$(addprefix $(o)/sherlock/perl/,Object.pm) +PROGS+=$(SHERLOCK_PERL_MODS) + +$(SHERLOCK_PERL_MODS): PERL_MODULE_DIR=Sherlock diff --git a/sherlock/perl/Object.pm b/sherlock/perl/Object.pm new file mode 100644 index 0000000..07f7899 --- /dev/null +++ b/sherlock/perl/Object.pm @@ -0,0 +1,265 @@ +# Perl module for manipulating Sherlock objects +# +# (c) 2007 Martin Mares +# +# This software may be freely distributed and used according to the terms +# of the GNU Lesser General Public License. + +=head1 NAME + +Sherlock::Object -- Manipulation with Sherlock objects + +=head1 DESCRIPTION + +This module offers a simple interface to Sherlock objects. See F +for a description of how the object system works. + +=head1 METHODS + +=over + +=item B + +Creates a new empty object. + +=item B<< new(I => I, ...) >> + +Creates a new object with some attributes initialized. + +=item B<< set(I => I, ...) >> + +Sets given attributes to specified values. If any of the attributes already +exists, the old value is replaced by the new one. + +The value can be: + +=over + +=item * + +a number + +=item * + +a string + +=item * + +a reference to an array of values, which creates a multi-valued attribute. + +=item * + +a reference to another object, which created a nested object. In this case +(and only in this case), the name of the attribute must start with C<(>. + +=back + +=item B<< unset(I, ...) >> + +Removes given attributes. + +=item B<< add(I => I, ...) >> + +Sets given attributes to specified values. If any of the attributes already +exists, the new value is added to the original one, creating a multi-valued +attribute. + +The repertoir of values is the same as in the C method except for references +to arrays, which are not allowed. + +=item B<< get(I) >> + +Gets the value of the given attribute or C if it does not exist. +If the attribute is multi-valued, the first value is returned. + +=item B<< getarray(I) >> + +Gets all values of the given attribute as an array or an empty array if the +attribute does not exist. + +=item B<< get_attrs() >> + +Get an array of names of all attributes present in the object. + +=item B<< read(I) >> + +Reads a textual representation of an object from the given handle and adds it +to the object it is invoked on. The effect is the same as calling the C +method on all read attributes. + +Returns 1 if the object has been read successfully, 0 if the input +stream ended before an object started or C if the input was +malformed. + +=item B<< read(I, raw => 1) >> + +Reads an object as above, but add a special attribute called C<< RAW >>, which will +contain the raw form of the object. + +=item B<< write(I) >> + +Writes a textual representation of the object to the given handle. + +=item B<< write_indented(I, [I]) >> + +Writes an indented textual representation of the object to the given handle. +The I will be prepended to all printed lines, each nested level +will get one tab character more. + +This is intended for debugging dumps and the output cannot be read back by any +of the Sherlock libraries. + +=back + +=head1 AUTHOR + +Martin Mares + +=cut + +package Sherlock::Object; + +use strict; +use warnings; + +sub new($@) { + my $self = { }; + bless $self; + shift @_; + if (@_) { + $self->set(@_); + } + return $self; +} + +sub set($@) { + my $self = shift @_; + my $attr; + while (defined($attr = shift @_)) { + $self->{$attr} = shift @_; + } +} + +sub unset($@) { + my $self = shift @_; + foreach my $attr (@_) { + delete $self->{$attr}; + } +} + +sub add($@) { + my $self = shift @_; + my $attr; + while (defined($attr = shift @_)) { + my $val = shift @_; + if (!exists $self->{$attr}) { + $self->{$attr} = $val; + } elsif (ref $self->{$attr} eq "ARRAY") { + push @{$self->{$attr}}, $val; + } else { + $self->{$attr} = [ $self->{$attr}, $val ]; + } + } +} + +sub get($$) { + my ($self, $attr) = @_; + if (!exists $self->{$attr}) { + return undef; + } elsif (ref $self->{$attr} eq "ARRAY") { + return $self->{$attr}->[0]; + } else { + return $self->{$attr}; + } +} + +sub getarray($$) { + my ($self, $attr) = @_; + if (!exists $self->{$attr}) { + return (); + } elsif (ref $self->{$attr} eq "ARRAY") { + return @{$self->{$attr}}; + } else { + return ( $self->{$attr} ); + } +} + +sub get_attrs($) { + my ($self) = @_; + return keys %$self; +} + +sub read($$@) { + my $self = shift @_; + my $fh = shift @_; + my %opts = @_; + my @stack = (); + my $read_something = 0; + my $obj = $self; + my $raw; + if ($opts{raw}) { + $raw = $obj->{"RAW"} = []; + } + while (<$fh>) { + chomp; + /^$/ && last; + my ($a, $v) = /^(.)(.*)$/ or return undef; + push @$raw, $_ if $raw; + if ($a eq "(") { + $a = "$a$v"; + my $new = new Sherlock::Object; + $obj->add($a, $new); + push @stack, $obj; + $obj = $new; + } elsif ($a eq ")") { + @stack or return undef; + $obj = pop @stack; + } else { + $obj->add($a, $v); + } + $read_something = 1; + } + @stack and return undef; + return $read_something; +} + +sub write($$) { + my ($self, $fh) = @_; + foreach my $a (keys %$self) { + my $vals = $self->{$a}; + ref $vals eq "ARRAY" or $vals = [$vals]; + foreach my $v (@{$vals}) { + if (ref $v eq "") { + print $fh $a, $v, "\n"; + } elsif (ref $v eq "Sherlock::Object") { + print $fh $a, "\n"; + $v->write($fh); + print $fh ")\n"; + } else { + die; + } + } + } +} + +sub write_indented($$$) { + my ($self, $fh, $indent) = @_; + defined $indent or $indent = ""; + foreach my $a (sort keys %$self) { + my $vals = $self->{$a}; + ref $vals eq "ARRAY" or $vals = [$vals]; + foreach my $v (@{$vals}) { + if (ref $v eq "") { + print $fh $indent, $a, $v, "\n"; + } elsif (ref $v eq "Sherlock::Object") { + print $fh $indent, $a, "\n"; + $v->write_indented($fh, $indent . "\t"); + print $fh $indent, ")\n"; + } else { + die; + } + } + } +} + +1; # OK -- 2.39.2