--- /dev/null
+# Makefile for the Sherlock Library (c) 2004--2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=sherlock
+
+LIBSH_MODS= \
+ object bucket buck2obj obj2buck obj-linear obj-format \
+ attrset conf-parse
+
+LIBSH_MOD_PATHS=$(addprefix $(o)/sherlock/,$(LIBSH_MODS))
+
+$(o)/sherlock/libsh.a: $(addsuffix .o,$(LIBSH_MOD_PATHS))
+$(o)/sherlock/libsh.so: $(addsuffix .oo,$(LIBSH_MOD_PATHS))
+$(o)/sherlock/libsh.pc: $(LIBUCW)
+
+include $(s)/sherlock/perl/Makefile
--- /dev/null
+/*
+ * Sherlock Library -- Parsing Attribute Sets
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "sherlock/object.h"
+#include "sherlock/attrset.h"
+#include "lib/clists.h"
+#include "sherlock/conf.h"
+
+struct attr_node {
+ cnode n;
+ uns attr;
+};
+
+struct cf_section attr_set_cf = {
+ CF_TYPE(struct attr_node),
+ CF_ITEMS {
+ CF_USER("Attr", PTR_TO(struct attr_node, attr), &cf_type_attr),
+ CF_END
+ }
+};
+
+struct cf_section attr_set_cf_sub = {
+ CF_TYPE(struct attr_node),
+ CF_ITEMS {
+ CF_USER("Attr", PTR_TO(struct attr_node, attr), &cf_type_attr_sub),
+ CF_END
+ }
+};
+
+void
+attr_set_commit(struct attr_set *set, clist *l)
+{
+ CF_JOURNAL_VAR(*set);
+ bit_array_zero(set->a, ATTR_SET_SIZE);
+ CLIST_FOR_EACH(struct attr_node *, n, *l)
+ bit_array_set(set->a, n->attr);
+}
--- /dev/null
+/*
+ * Sherlock Library -- Sets of object attributes
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_ATTRSET_H
+#define _SHERLOCK_ATTRSET_H
+
+#include "lib/bitarray.h"
+#include "sherlock/object.h"
+
+COMPILE_ASSERT(son_value, OBJ_ATTR_SON == 256);
+#define ATTR_SET_SIZE 512
+
+struct attr_set {
+ BIT_ARRAY(a, ATTR_SET_SIZE);
+};
+
+static inline uns
+attr_set_match(struct attr_set *set, struct oattr *attr)
+{
+ return bit_array_isset(set->a, attr->attr);
+}
+
+/* Configuration helpers */
+
+extern struct cf_section attr_set_cf, attr_set_cf_sub;
+
+struct clist;
+void attr_set_commit(struct attr_set *set, struct clist *list);
+
+#endif
--- /dev/null
+/*
+ * Sherlock Library -- Generating Objects from Buckets
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2004--2006, Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/unaligned.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+#include "lib/unicode.h"
+#include "sherlock/object.h"
+#include "sherlock/objread.h"
+#include "lib/lizard.h"
+#include "lib/bbuf.h"
+#include "lib/ff-unicode.h"
+
+#include <errno.h>
+#include <unistd.h>
+
+#define RET_ERR(num) ({ errno = num; return -1; })
+
+struct buck2obj_buf
+{
+ bb_t bb;
+ struct lizard_buffer *lizard;
+};
+
+static uns get_attr_type;
+
+void
+get_attr_set_type(uns type)
+{
+ if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD)
+ die("Unknown buckettype %x", type);
+ get_attr_type = type;
+}
+
+int
+get_attr(byte **pos, byte *end, struct parsed_attr *attr)
+{
+ byte *ptr = *pos;
+ if (ptr >= end)
+ return -1;
+ if (get_attr_type < BUCKET_TYPE_V33)
+ {
+ if (get_attr_type == BUCKET_TYPE_PLAIN)
+ {
+ while (ptr < end && *ptr == '\n')
+ ptr++;
+ *pos = ptr;
+ if (ptr >= end)
+ return -1;
+ }
+ else if (*ptr == '\n')
+ {
+ *pos = ++ptr;
+ attr->attr = 0;
+ return 0;
+ }
+ attr->attr = *ptr++;
+ attr->val = ptr;
+ while (ptr < end && *ptr != '\n')
+ ptr++;
+ attr->len = ptr++ - attr->val;
+ }
+ else
+ {
+ uns len;
+ GET_UTF8_32(ptr, len);
+ if (!len--)
+ {
+ *pos = ptr;
+ attr->attr = 0;
+ return 0;
+ }
+ attr->attr = ptr[len];
+ attr->val = ptr;
+ attr->len = len;
+ ptr += len+1;
+ }
+ if (ptr > end)
+ die("Incomplete attribute %c", attr->attr);
+ *pos = ptr;
+ return attr->attr;
+}
+
+int
+bget_attr(struct fastbuf *b, struct parsed_attr *attr)
+{
+ static bb_t buf;
+ if (get_attr_type < BUCKET_TYPE_V33)
+ {
+ int c = bgetc(b);
+ if (c < 0)
+ return -1;
+ if (get_attr_type == BUCKET_TYPE_PLAIN)
+ {
+ while (c == '\n')
+ c = bgetc(b);
+ if (c < 0)
+ return -1;
+ }
+ else if (c == '\n')
+ {
+ attr->attr = 0;
+ return 0;
+ }
+ attr->attr = c;
+
+ byte *ptr, *end;
+ uns len = bdirect_read_prepare(b, &ptr);
+ end = ptr + len;
+ attr->val = ptr;
+ while (ptr < end && *ptr != '\n')
+ ptr++;
+ if (ptr < end)
+ {
+ bdirect_read_commit(b, ptr+1);
+ attr->len = ptr - attr->val;
+ return attr->attr;
+ }
+
+ len = 0;
+ c = bgetc(b);
+ while (c >= 0 && c != '\n')
+ {
+ bb_grow(&buf, len+1);
+ buf.ptr[len++] = c;
+ c = bgetc(b);
+ }
+ if (c < 0)
+ die("Incomplete attribute %c", attr->attr);
+ attr->val = buf.ptr;
+ attr->len = len;
+ }
+ else
+ {
+ int len = bget_utf8_32(b);
+ if (len < 0)
+ return -1;
+ if (!len)
+ {
+ attr->attr = 0;
+ return 0;
+ }
+ attr->len = len-1;
+
+ byte *ptr;
+ int avail = bdirect_read_prepare(b, &ptr);
+ if (avail >= len)
+ {
+ attr->val = ptr;
+ attr->attr = ptr[len-1];
+ bdirect_read_commit(b, ptr + len);
+ return attr->attr;
+ }
+ bb_grow(&buf, --len);
+ breadb(b, buf.ptr, len);
+ attr->val = buf.ptr;
+ attr->len = len;
+ attr->attr = bgetc(b);
+ if (attr->attr < 0)
+ die("Incomplete attribute %c", attr->attr);
+ }
+ return attr->attr;
+}
+
+void
+copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr)
+{
+ byte *b = mp_alloc_fast_noalign(pool, attr->len+1);
+ memcpy(b, attr->val, attr->len);
+ b[attr->len] = 0;
+ attr->val = b;
+}
+
+struct buck2obj_buf *
+buck2obj_alloc(void)
+{
+ struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
+ bb_init(&buf->bb);
+ buf->lizard = lizard_alloc();
+ return buf;
+}
+
+void
+buck2obj_free(struct buck2obj_buf *buf)
+{
+ lizard_free(buf->lizard);
+ bb_done(&buf->bb);
+ xfree(buf);
+}
+
+static inline byte *
+decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
+{
+ struct obj_read_state st;
+ obj_read_start(&st, o);
+
+ if (can_overwrite >= 2)
+ while (ptr < end)
+ {
+ uns len;
+ GET_UTF8_32(ptr, len);
+ if (!len--)
+ break;
+ byte type = ptr[len];
+
+ ptr[len] = 0;
+ obj_read_attr_ref(&st, type, ptr);
+
+ ptr += len + 1;
+ }
+ else
+ while (ptr < end)
+ {
+ uns len;
+ GET_UTF8_32(ptr, len);
+ if (!len--)
+ break;
+ byte type = ptr[len];
+
+ byte *dup = mp_alloc_fast_noalign(o->pool, len+1);
+ memcpy(dup, ptr, len);
+ dup[len] = 0;
+ obj_read_attr_ref(&st, type, dup);
+
+ ptr += len + 1;
+ }
+ obj_read_end(&st);
+ return ptr;
+}
+
+int
+buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body,
+ struct odes *o_hdr, uns *body_start, struct odes *o_body,
+ uns allow_zero_copy)
+{
+ struct obj_read_state st;
+ if (buck_type <= BUCKET_TYPE_PLAIN)
+ {
+ if (body_start) // there is no header part
+ *body_start = 0;
+ obj_read_start(&st, o_hdr);
+ byte *b;
+ // ignore empty lines and read until the end of the bucket
+ sh_off_t end = btell(body) + buck_len;
+ while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
+ if ((b = buf->bb.ptr)[0])
+ obj_read_attr(&st, b[0], b+1);
+ ASSERT(btell(body) == end);
+ obj_read_end(&st);
+ }
+ else if (buck_type == BUCKET_TYPE_V30)
+ {
+ sh_off_t start = btell(body);
+ sh_off_t end = start + buck_len;
+ byte *b;
+ struct obj_read_state st;
+ obj_read_start(&st, o_hdr);
+ while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U) && (b = buf->bb.ptr)[0])
+ obj_read_attr(&st, b[0], b+1);
+ obj_read_end(&st);
+ if (body_start)
+ *body_start = btell(body) - start;
+ else
+ {
+ obj_read_start(&st, o_body);
+ while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
+ if ((b = buf->bb.ptr)[0])
+ obj_read_attr(&st, b[0], b+1);
+ ASSERT(btell(body) == end);
+ obj_read_end(&st);
+ }
+ }
+ else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
+ {
+ /* Avoid reading the whole bucket if only its header is needed. */
+ if (body_start)
+ {
+ sh_off_t start = btell(body);
+ sh_off_t end = start + buck_len;
+ obj_read_start(&st, o_hdr);
+ while (btell(body) < end)
+ {
+ uns len = bget_utf8_32(body);
+ if (!len)
+ break;
+ byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
+ bread(body, buf, len);
+ uns type = buf[--len];
+ buf[len] = 0;
+ obj_read_attr_ref(&st, type, buf);
+ }
+ obj_read_end(&st);
+ *body_start = btell(body) - start;
+ return 0;
+ }
+
+ /* Read all the bucket into 1 buffer, 0-copy if possible. */
+ byte *ptr, *end;
+ uns len = bdirect_read_prepare(body, &ptr);
+ uns copied = 0;
+ if (len < buck_len ||
+ ((body->can_overwrite_buffer < 2 || !allow_zero_copy) && buck_type == BUCKET_TYPE_V33))
+ {
+ /* Copy if the original buffer is too small.
+ * If it is write-protected, copy it also if it is uncompressed. */
+ DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type);
+ bb_grow(&buf->bb, buck_len);
+ len = bread(body, buf->bb.ptr, buck_len);
+ ptr = buf->bb.ptr;
+ copied = 1;
+ }
+ else
+ DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type);
+ end = ptr + buck_len;
+
+ ptr = decode_attributes(ptr, end, o_hdr, 0); // header
+ if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression
+ {
+ if (ptr + 8 > end)
+ {
+ if (ptr == end) // truncated bucket
+ goto commit;
+ RET_ERR(EINVAL);
+ }
+ len = GET_U32(ptr);
+ ptr += 4;
+ uns adler = GET_U32(ptr);
+ ptr += 4;
+ byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
+ if (!new_ptr)
+ return -1;
+ if (adler32(new_ptr, len) != adler)
+ RET_ERR(EINVAL);
+ if (!copied)
+ bdirect_read_commit(body, end);
+ ptr = new_ptr;
+ end = ptr + len;
+ copied = 1;
+ }
+ ptr = decode_attributes(ptr, end, o_body, 2); // body
+ if (ptr != end)
+ RET_ERR(EINVAL);
+ commit:
+ if (!copied)
+ bdirect_read_commit_modified(body, ptr);
+ }
+ else
+ {
+ bskip(body, buck_len);
+ RET_ERR(EINVAL);
+ }
+ return 0;
+}
+
+struct odes *
+obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body,
+ uns *body_start, uns allow_zero_copy)
+{
+ struct odes *o = obj_new(pool);
+ if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o, allow_zero_copy) < 0)
+ return NULL;
+ else
+ return o;
+}
+
+static int
+obj_read_line(struct fastbuf *f, struct obj_read_state *st)
+{
+ byte *buf = bgets_stk(f);
+ if (buf)
+ {
+ if (!buf[0])
+ return 1;
+ obj_read_attr(st, buf[0], buf+1);
+ return -1;
+ }
+ else
+ return 0;
+}
+
+int
+obj_read(struct fastbuf *f, struct odes *o)
+{
+ struct obj_read_state st;
+ int rc = 0;
+ obj_read_start(&st, o);
+ while ((rc = obj_read_line(f, &st)) < 0);
+ obj_read_end(&st);
+ return rc;
+}
+
+void
+default_obj_read_error(struct obj_read_state *st UNUSED, char *err)
+{
+ msg(L_ERROR, "%s", err);
+}
--- /dev/null
+/*
+ * Sherlock Library -- Configuration Parsing Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "sherlock/object.h"
+#include "lib/chartype.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-unicode.h"
+#include "lib/unicode.h"
+#include "sherlock/conf.h"
+
+/*** Attribute names ***/
+
+static byte *
+attr_sub_parser(byte *c, uns *ptr)
+{
+ if (c[0] && !c[1])
+ *ptr = c[0];
+ else if (c[0] == '(' && c[1] && c[1] != ')' && c[2] == ')' && !c[3])
+ *ptr = OBJ_ATTR_SON + c[1];
+ else
+ return "Invalid attribute name";
+ return NULL;
+}
+
+static byte *
+attr_parser(byte *c, uns *ptr)
+{
+ byte *err;
+ if (err = attr_sub_parser(c, ptr))
+ return err;
+ if (*ptr >= OBJ_ATTR_SON)
+ return "Names of sub-objects are not allowed here";
+ return NULL;
+}
+
+static void
+attr_sub_dumper(struct fastbuf *b, uns *ptr)
+{
+ if (!*ptr)
+ bprintf(b, "<none> ");
+ else if (*ptr < OBJ_ATTR_SON)
+ bprintf(b, "%c ", *ptr);
+ else
+ bprintf(b, "(%c) ", *ptr - OBJ_ATTR_SON);
+}
+
+struct cf_user_type cf_type_attr = {
+ .size = sizeof(uns),
+ .name = "attr",
+ .parser = (cf_parser1 *) attr_parser,
+ .dumper = (cf_dumper1 *) attr_sub_dumper
+};
+
+struct cf_user_type cf_type_attr_sub = {
+ .size = sizeof(uns),
+ .name = "attr_sub",
+ .parser = (cf_parser1 *) attr_sub_parser,
+ .dumper = (cf_dumper1 *) attr_sub_dumper
+};
+
+/*** Unicode characters ***/
+
+static uns
+uni_parser(byte *c, u16 *up)
+{
+ uns u;
+ byte *cc = (byte*)utf8_get(c, &u);
+ if (*cc || u == UNI_REPLACEMENT)
+ {
+ for (uns i=0; i<4; i++)
+ if (!Cxdigit(c[i]))
+ return 1;
+ else
+ u = (u << 4) | Cxvalue(c[i]);
+ if (c[4])
+ return 1;
+ }
+ *up = u;
+ return 0;
+}
+
+static byte *
+unichar_parser(byte *c, uns *up)
+{
+ u16 u;
+ if (uni_parser(c, &u))
+ return "Expecting one UTF-8 character or its code";
+ *up = u;
+ return 0;
+}
+
+static void
+unichar_dumper(struct fastbuf *b, uns *up)
+{
+ bput_utf8(b, *up);
+ bputc(b, ' ');
+}
+
+struct cf_user_type cf_type_unichar = {
+ .size = sizeof(uns),
+ .name = "unichar",
+ .parser = (cf_parser1 *) unichar_parser,
+ .dumper = (cf_dumper1 *) unichar_dumper
+};
+
+/*** Unicode ranges ***/
+
+static byte *
+unirange_parser(byte *s, struct unirange *ur)
+{
+ byte *c;
+ if ((c = strchr(s, '-')) && c > s)
+ {
+ *c++ = 0;
+ if (uni_parser(s, &ur->min) || uni_parser(c, &ur->max))
+ goto err;
+ }
+ else
+ {
+ if (uni_parser(s, &ur->min))
+ goto err;
+ ur->max = ur->min;
+ }
+ if (ur->min > ur->max)
+ return "Invalid code range (min>max)";
+ return NULL;
+
+ err:
+ return "Incorrect syntax of a code range";
+}
+
+static void
+unirange_dumper(struct fastbuf *b, struct unirange *ur)
+{
+ bprintf(b, (ur->min == ur->max ? "%04x " : "%04x-%04x "), ur->min, ur->max);
+}
+
+struct cf_user_type cf_type_unirange = {
+ .size = sizeof(struct unirange),
+ .name = "unirange",
+ .parser = (cf_parser1 *) unirange_parser,
+ .dumper = (cf_dumper1 *) unirange_dumper
+};
+
+/*** Unsigned integer ranges ***/
+
+static byte *
+unsrange_parser(byte *s, struct unsrange *r)
+{
+ byte *c, *msg;
+ if ((c = strchr(s, '-')) && c > s)
+ {
+ *c++ = 0;
+ if (*c == '-')
+ return "Incorrect syntax of an unsigned range";
+ if ((msg = cf_parse_int(s, &r->min)) || (msg = cf_parse_int(c, &r->max)))
+ return msg;
+ }
+ else
+ {
+ if (msg = cf_parse_int(s, &r->min))
+ return msg;
+ r->max = r->min;
+ }
+ if (r->min > r->max)
+ return "Invalid unsigned range (min>max)";
+ return NULL;
+}
+
+static void
+unsrange_dumper(struct fastbuf *b, struct unsrange *r)
+{
+ bprintf(b, (r->min == r->max ? "%u " : "%u-%u "), r->min, r->max);
+}
+
+struct cf_user_type cf_type_unsrange = {
+ .size = sizeof(struct unsrange),
+ .name = "gerr_range",
+ .parser = (cf_parser1 *) unsrange_parser,
+ .dumper = (cf_dumper1 *) unsrange_dumper
+};
+
+/* Configuration sections for (word|meta|string)-types */
+
+static byte *
+parse_u8(byte *s, uns *w)
+{
+ CF_JOURNAL_VAR(*w);
+ byte *msg = cf_parse_int(s, (int *)w);
+ if (msg)
+ return msg;
+ if (*w > 255)
+ return "Weights are limited to 0..255";
+ return NULL;
+}
+
+static void
+dump_u8(struct fastbuf *fb, uns *ptr)
+{
+ bprintf(fb, "%d ", *ptr);
+}
+
+static struct cf_user_type weight_type = {
+ .size = sizeof(uns),
+ .name = "weight",
+ .parser = (cf_parser1*) parse_u8,
+ .dumper = (cf_dumper1*) dump_u8
+};
+
+void
+cf_generate_word_type_config(struct cf_section *sec, byte **names, uns multiple, uns just_u8)
+{
+ uns number = 0;
+ while (names[number])
+ number++;
+ struct cf_item *items = sec->cfg = xmalloc((number + 1) * sizeof(struct cf_item));
+ for (uns i = 0; i < number; i++) {
+ if (just_u8)
+ items[i] = (struct cf_item) CF_USER_ARY(names[i], ((uns*) NULL) + i*multiple, &weight_type, multiple);
+ else
+ items[i] = (struct cf_item) CF_UNS_ARY(names[i], ((uns*) NULL) + i*multiple, multiple);
+ }
+ items[number] = (struct cf_item) CF_END;
+}
--- /dev/null
+/*
+ * Sherlock Library -- Configuration Parsing Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_CONF_H
+#define _SHERLOCK_CONF_H
+
+#include "lib/conf.h"
+
+/* All of the following objects are defined in conf-parse.c
+ *
+ * Object names */
+
+extern struct cf_user_type cf_type_attr, cf_type_attr_sub;
+
+/* Unicode character and ranges */
+
+struct unirange {
+ u16 min, max;
+};
+
+extern struct cf_user_type cf_type_unirange;
+extern struct cf_user_type cf_type_unichar;
+
+/* Unsigned integer ranges */
+
+struct unsrange {
+ uns min, max;
+};
+
+extern struct cf_user_type cf_type_unsrange;
+
+/* Sections for (word|meta|string)-types */
+void cf_generate_word_type_config(struct cf_section *sec, byte **names, uns multiple, uns just_u8);
+
+#endif
--- /dev/null
+# pkg-config metadata for libsh
+
+libdir=@LIBDIR@
+incdir=.
+
+Name: libsh
+Description: Functions common to the whole Sherlock project
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lsh
+Requires: @DEPS@
--- /dev/null
+/*
+ * Sherlock Library -- Adding Formatted Attributes
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "sherlock/object.h"
+#include "lib/stkstring.h"
+
+#include <stdio.h>
+
+struct oattr *
+obj_add_attr_vformat(struct odes *o, uns x, char *fmt, va_list args)
+{
+ return obj_add_attr(o, x, stk_vprintf(fmt, args));
+}
+
+struct oattr *obj_add_attr_format(struct odes *o, uns x, char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ struct oattr *a = obj_add_attr_vformat(o, x, fmt, va);
+ va_end(va);
+ return a;
+}
+
+struct oattr *
+obj_set_attr_vformat(struct odes *o, uns x, char *fmt, va_list args)
+{
+ return obj_set_attr(o, x, stk_vprintf(fmt, args));
+}
+
+struct oattr *obj_set_attr_format(struct odes *o, uns x, char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ struct oattr *a = obj_set_attr_vformat(o, x, fmt, va);
+ va_end(va);
+ return a;
+}
--- /dev/null
+/*
+ * Sherlock Library -- Linear Representation of Objects
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "lib/unaligned.h"
+#include "lib/lizard.h"
+#include "sherlock/object.h"
+#include "sherlock/lizard-fb.h"
+
+byte *
+obj_linearize(struct odes *d, uns min_compress, uns *plen)
+{
+ // Create uncompressed linearization
+ put_attr_set_type(BUCKET_TYPE_V33);
+ uns size = size_object(d);
+ byte *out = xmalloc(size+LIZARD_COMPRESS_HEADER + LIZARD_NEEDS_CHARS) + LIZARD_COMPRESS_HEADER;
+ byte *t = put_object(out, d);
+ ASSERT(t == out+size);
+
+ struct lizard_block_req req = {
+ .type = BUCKET_TYPE_V33_LIZARD,
+ .ratio = min_compress / 100.,
+ .in_ptr = out,
+ .in_len = size,
+ .out_ptr = NULL,
+ .out_len = 0,
+ };
+ // Allocate a buffer for compressed data
+ int res = lizard_compress_req(&req);
+ ASSERT(res <= 0);
+ byte *buf = res<0 ? req.out_ptr=xmalloc(req.out_len+=LIZARD_COMPRESS_HEADER) : NULL;
+ res = lizard_compress_req_header(&req, 1);
+ ASSERT(res > 0);
+ if (req.out_ptr != out-LIZARD_COMPRESS_HEADER)
+ xfree(out-LIZARD_COMPRESS_HEADER);
+ else if (buf)
+ xfree(buf);
+
+ *plen = req.out_len;
+ return req.out_ptr;
+}
+
+struct odes *
+obj_delinearize(struct buck2obj_buf *bbuf, struct mempool *mp, byte *buf, uns len, uns destructive)
+{
+ struct odes *o = obj_new(mp);
+ ASSERT(len >= LIZARD_COMPRESS_HEADER);
+ uns buck_type = buf[0] + BUCKET_TYPE_PLAIN;
+
+ struct fastbuf fb;
+ uns sh = LIZARD_COMPRESS_HEADER - 1;
+ fbbuf_init_read(&fb, buf+sh, len-sh, destructive);
+ if (buck2obj_parse(bbuf, buck_type, len-sh, &fb, NULL, NULL, o, 1) < 0)
+ return NULL;
+ else
+ return o;
+}
--- /dev/null
+/*
+ * Sherlock Library -- Generating Buckets from Objects
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2005, Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-unicode.h"
+#include "sherlock/object.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+static uns use_v33;
+static int hdr_sep;
+
+void
+put_attr_set_type(uns type)
+{
+ switch (type)
+ {
+ case BUCKET_TYPE_PLAIN:
+ use_v33 = 0;
+ hdr_sep = -1;
+ break;
+ case BUCKET_TYPE_V30:
+ use_v33 = 0;
+ hdr_sep = '\n';
+ break;
+ case BUCKET_TYPE_V33:
+ case BUCKET_TYPE_V33_LIZARD:
+ use_v33 = 1;
+ hdr_sep = 0;
+ break;
+ default:
+ die("Don't know how to generate buckets of type %08x", type);
+ }
+}
+
+uns
+size_attr(uns len)
+{
+ if (use_v33)
+ {
+ len++;
+ return len + utf8_space(len);
+ }
+ else
+ return len + 2;
+}
+
+uns
+size_object(struct odes *d)
+{
+ uns sz = 0;
+ for (struct oattr *a=d->attrs; a; a=a->next)
+ for (struct oattr *b=a; b; b=b->same)
+ if (a->attr >= OBJ_ATTR_SON)
+ sz += 3 + size_object(b->son) + 2;
+ else
+ sz += size_attr(strlen(b->val));
+ return sz;
+}
+
+inline byte *
+put_attr(byte *ptr, uns type, byte *val, uns len)
+{
+ if (use_v33)
+ {
+ PUT_UTF8_32(ptr, len+1);
+ memcpy(ptr, val, len);
+ ptr += len;
+ *ptr++ = type;
+ }
+ else
+ {
+ *ptr++ = type;
+ memcpy(ptr, val, len);
+ ptr += len;
+ *ptr++ = '\n';
+ }
+ return ptr;
+}
+
+byte *
+put_attr_str(byte *ptr, uns type, byte *val)
+{
+ return put_attr(ptr, type, val, strlen(val));
+}
+
+inline byte *
+put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va)
+{
+ uns len;
+ if (use_v33)
+ {
+ len = vsprintf(ptr+1, mask, va);
+ if (len >= 127)
+ {
+ byte tmp[6], *tmp_end = tmp;
+ PUT_UTF8_32(tmp_end, len+1);
+ uns l = tmp_end - tmp;
+ memmove(ptr+l, ptr+1, len);
+ memcpy(ptr, tmp, l);
+ ptr += l + len;
+ }
+ else
+ {
+ *ptr = len+1;
+ ptr += len+1;
+ }
+ *ptr++ = type;
+ }
+ else
+ {
+ *ptr++ = type;
+ len = vsprintf(ptr, mask, va);
+ ptr += len;
+ *ptr++ = '\n';
+ }
+ return ptr;
+}
+
+byte *
+put_attr_format(byte *ptr, uns type, char *mask, ...)
+{
+ va_list va;
+ va_start(va, mask);
+ byte *ret = put_attr_vformat(ptr, type, mask, va);
+ va_end(va);
+ return ret;
+}
+
+byte *
+put_attr_num(byte *ptr, uns type, uns val)
+{
+ if (use_v33)
+ {
+ uns len = sprintf(ptr+1, "%d", val) + 1;
+ *ptr = len;
+ ptr += len;
+ *ptr++ = type;
+ }
+ else
+ ptr += sprintf(ptr, "%c%d\n", type, val);
+ return ptr;
+}
+
+byte *
+put_attr_separator(byte *ptr)
+{
+ if (hdr_sep >= 0)
+ *ptr++ = hdr_sep;
+ return ptr;
+}
+
+byte *
+put_attr_push(byte *ptr, uns type)
+{
+ byte name = type;
+ return put_attr(ptr, '(', &name, 1);
+}
+
+byte *
+put_attr_pop(byte *ptr)
+{
+ return put_attr(ptr, ')', NULL, 0);
+}
+
+byte *
+put_object(byte *t, struct odes *d)
+{
+ for (struct oattr *a=d->attrs; a; a=a->next)
+ for (struct oattr *b=a; b; b=b->same)
+ if (a->attr >= OBJ_ATTR_SON)
+ {
+ t = put_attr_push(t, a->attr - OBJ_ATTR_SON);
+ t = put_object(t, b->son);
+ t = put_attr_pop(t);
+ }
+ else
+ t = put_attr_str(t, a->attr, b->val);
+ return t;
+}
+
+inline void
+bput_attr_large(struct fastbuf *b, uns type, byte *val, uns len)
+{
+ if (use_v33)
+ {
+ bput_utf8_32(b, len+1);
+ bwrite(b, val, len);
+ bputc(b, type);
+ }
+ else
+ {
+ bputc(b, type);
+ bwrite(b, val, len);
+ bputc(b, '\n');
+ }
+}
+
+inline void
+bput_attr(struct fastbuf *b, uns type, byte *val, uns len)
+{
+ bput_attr_large(b, type, val, len);
+}
+
+void
+bput_attr_str(struct fastbuf *b, uns type, byte *val)
+{
+ bput_attr(b, type, val, strlen(val));
+}
+
+void
+bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va)
+{
+ int len;
+ if (use_v33)
+ {
+ va_list va2;
+ va_copy(va2, va);
+ len = vsnprintf(NULL, 0, mask, va2);
+ va_end(va2);
+ if (len < 0)
+ die("vsnprintf() does not support size=0");
+ bput_utf8_32(b, len+1);
+ vbprintf(b, mask, va);
+ bputc(b, type);
+ }
+ else
+ {
+ bputc(b, type);
+ len = vbprintf(b, mask, va);
+ bputc(b, '\n');
+ }
+}
+
+void
+bput_attr_format(struct fastbuf *b, uns type, char *mask, ...)
+{
+ va_list va;
+ va_start(va, mask);
+ bput_attr_vformat(b, type, mask, va);
+ va_end(va);
+}
+
+void
+bput_attr_num(struct fastbuf *b, uns type, uns val)
+{
+ if (use_v33)
+ {
+ byte tmp[12];
+ uns len = sprintf(tmp, "%d", val);
+ bputc(b, len+1);
+ bwrite(b, tmp, len);
+ bputc(b, type);
+ }
+ else
+ bprintf(b, "%c%d\n", type, val);
+}
+
+void
+bput_attr_separator(struct fastbuf *b)
+{
+ if (hdr_sep >= 0)
+ bputc(b, hdr_sep);
+}
+
+void
+bput_attr_push(struct fastbuf *b, uns type)
+{
+ byte name = type;
+ bput_attr(b, '(', &name, 1);
+}
+
+void
+bput_attr_pop(struct fastbuf *b)
+{
+ bput_attr(b, ')', NULL, 0);
+}
+
+static inline void
+do_bput_oattr(struct fastbuf *f, struct oattr *a)
+{
+ for(struct oattr *b=a; b; b=b->same)
+ if (a->attr >= OBJ_ATTR_SON)
+ {
+ bput_attr_push(f, a->attr - OBJ_ATTR_SON);
+ bput_object(f, b->son);
+ bput_attr_pop(f);
+ }
+ else
+ {
+#ifdef DEBUG_ASSERTS
+ byte *z;
+ for (z = b->val; *z; z++)
+ if (*z < ' ' && *z != '\t')
+ {
+ log(L_ERROR, "obj_write: Found non-ASCII character %02x in %c%s", *z, a->attr, b->val);
+ ASSERT(0);
+ }
+#endif
+ bput_attr_str(f, a->attr, b->val);
+ }
+}
+
+void
+bput_oattr(struct fastbuf *f, struct oattr *a)
+{
+ if (a)
+ do_bput_oattr(f, a);
+}
+
+void
+bput_object(struct fastbuf *f, struct odes *d)
+{
+ for(struct oattr *a=d->attrs; a; a=a->next)
+ do_bput_oattr(f, a);
+}
+
+static inline void
+do_bput_oattr_nocheck(struct fastbuf *f, struct oattr *a)
+{
+ for(struct oattr *b=a; b; b=b->same)
+ if (a->attr >= OBJ_ATTR_SON)
+ {
+ bput_attr_push(f, a->attr - OBJ_ATTR_SON);
+ bput_object_nocheck(f, b->son);
+ bput_attr_pop(f);
+ }
+ else
+ bput_attr_large(f, a->attr, b->val, strlen(b->val));
+}
+
+void
+bput_oattr_nocheck(struct fastbuf *f, struct oattr *a)
+{
+ if (a)
+ do_bput_oattr_nocheck(f, a);
+}
+
+void
+bput_object_nocheck(struct fastbuf *f, struct odes *d)
+{
+ for(struct oattr *a=d->attrs; a; a=a->next)
+ do_bput_oattr_nocheck(f, a);
+}
+
+void
+obj_write(struct fastbuf *b, struct odes *o, uns bucket_type)
+{
+ put_attr_set_type(bucket_type);
+ bput_object(b, o);
+}
+
+void
+obj_write_nocheck(struct fastbuf *b, struct odes *o, uns bucket_type)
+{
+ put_attr_set_type(bucket_type);
+ bput_object_nocheck(b, o);
+}
--- /dev/null
+/*
+ * Sherlock Library -- Object Functions
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "sherlock/sherlock.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+#include "sherlock/object.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void
+obj_dump(struct odes *d)
+{
+ for(struct oattr *a=d->attrs; a; a=a->next)
+ for(struct oattr *b=a; b; b=b->same)
+ if (a->attr >= OBJ_ATTR_SON)
+ {
+ printf("(%c\n", a->attr - OBJ_ATTR_SON);
+ obj_dump(b->son);
+ printf(")\n");
+ }
+ else
+ printf("%c%s\n", (a==b ? a->attr : ' '), b->val);
+}
+
+void
+obj_dump_indented(struct odes *d, uns indent)
+{
+ for(struct oattr *a=d->attrs; a; a=a->next)
+ for(struct oattr *b=a; b; b=b->same)
+ {
+ for (uns i=0; i<indent; i++)
+ putchar('\t');
+ if (a->attr >= OBJ_ATTR_SON)
+ {
+ printf("(%c\n", a->attr - OBJ_ATTR_SON);
+ obj_dump_indented(b->son, indent+1);
+ for (uns i=0; i<=indent; i++)
+ putchar('\t');
+ printf(")\n");
+ }
+ else
+ printf("%c%s\n", (a==b ? a->attr : ' '), b->val);
+ }
+}
+
+static struct oattr *
+oa_new(struct odes *o, uns x, byte *v)
+{
+ uns l = strlen(v)+1;
+ struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr) + l);
+
+ a->next = a->same = NULL;
+ a->attr = x;
+ a->val = (byte*) (a+1);
+ memcpy(a->val, v, l);
+ return a;
+}
+
+static struct oattr *
+oa_new_ref(struct odes *o, uns x, byte *v)
+{
+ struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr));
+
+ a->next = a->same = NULL;
+ a->attr = x;
+ a->val = v;
+ return a;
+}
+
+static struct oattr *
+oa_new_son(struct odes *o, uns x, struct odes *son)
+{
+ struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr));
+
+ a->next = a->same = NULL;
+ a->attr = x;
+ a->son = son;
+ son->parent = o;
+ return a;
+}
+
+struct odes *
+obj_new(struct mempool *pool)
+{
+ struct odes *o = mp_alloc(pool, sizeof(struct odes));
+ o->pool = pool;
+ o->attrs = NULL;
+ o->cached_attr = NULL;
+ o->parent = NULL;
+ return o;
+}
+
+struct oattr *
+obj_find_attr(struct odes *o, uns x)
+{
+ struct oattr *a;
+ for(a=o->attrs; a && a->attr != x; a=a->next)
+ ;
+ return a;
+}
+
+struct oattr *
+obj_find_attr_last(struct odes *o, uns x)
+{
+ struct oattr *a = obj_find_attr(o, x);
+
+ if (a)
+ {
+ while (a->same)
+ a = a->same;
+ }
+ return a;
+}
+
+uns
+obj_del_attr(struct odes *o, struct oattr *a)
+{
+ struct oattr *x, **p;
+ uns aa = a->attr;
+
+ o->cached_attr = NULL;
+ p = &o->attrs;
+ while (x = *p)
+ {
+ if (x->attr == aa)
+ {
+ if (x == a)
+ {
+ if (x->same)
+ {
+ x->same->next = x->next;
+ *p = x->same;
+ }
+ else
+ *p = x->next;
+ return 1;
+ }
+ p = &x->same;
+ while (x = *p)
+ {
+ if (x == a)
+ {
+ *p = x->same;
+ return 1;
+ }
+ p = &x->same;
+ }
+ return 0;
+ }
+ p = &x->next;
+ }
+ return 0;
+}
+
+byte *
+obj_find_aval(struct odes *o, uns x)
+{
+ struct oattr *a = obj_find_attr(o, x);
+ return a ? a->val : NULL;
+}
+
+uns
+obj_find_anum(struct odes *o, uns x, uns def)
+{
+ struct oattr *a = obj_find_attr(o, x);
+ return a ? (uns)atol(a->val) : def;
+}
+
+u32
+obj_find_x32(struct odes *o, uns x, u32 def)
+{
+ struct oattr *a = obj_find_attr(o, x);
+ return a ? (u32)strtoul(a->val, NULL, 16) : def;
+}
+
+u64
+obj_find_x64(struct odes *o, uns x, u64 def)
+{
+ struct oattr *a = obj_find_attr(o, x);
+ return a ? (u64)strtoull(a->val, NULL, 16) : def;
+}
+
+struct oattr *
+obj_set_attr(struct odes *o, uns x, byte *v)
+{
+ struct oattr *a, **z;
+
+ z = &o->attrs;
+ while (a = *z)
+ {
+ if (a->attr == x)
+ {
+ *z = a->next;
+ goto set;
+ }
+ z = &a->next;
+ }
+
+ set:
+ if (v)
+ {
+ a = oa_new(o, x, v);
+ a->next = o->attrs;
+ o->attrs = a;
+ }
+ else
+ a = NULL;
+ o->cached_attr = a;
+ return a;
+}
+
+struct oattr *
+obj_set_attr_num(struct odes *o, uns a, uns v)
+{
+ byte x[32];
+
+ sprintf(x, "%d", v);
+ return obj_set_attr(o, a, x);
+}
+
+static inline struct oattr *
+obj_link_attr(struct odes *o, struct oattr *b)
+{
+ struct oattr *a, **z;
+
+ if (!(a = o->cached_attr) || a->attr != b->attr)
+ {
+ z = &o->attrs;
+ while ((a = *z) && a->attr != b->attr)
+ z = &a->next;
+ if (!a)
+ {
+ *z = b;
+ /* b->next is NULL */
+ goto done;
+ }
+ }
+ while (a->same)
+ a = a->same;
+ a->same = b;
+ done:
+ o->cached_attr = b;
+ return b;
+}
+
+struct oattr *
+obj_add_attr(struct odes *o, uns x, byte *v)
+{
+ return obj_link_attr(o, oa_new(o, x, v));
+}
+
+struct oattr *
+obj_add_attr_ref(struct odes *o, uns x, byte *v)
+{
+ return obj_link_attr(o, oa_new_ref(o, x, v));
+}
+
+struct oattr *
+obj_add_attr_num(struct odes *o, uns a, uns v)
+{
+ byte x[32];
+
+ sprintf(x, "%d", v);
+ return obj_add_attr(o, a, x);
+}
+
+struct oattr *
+obj_add_attr_son(struct odes *o, uns x, struct odes *son)
+{
+ return obj_link_attr(o, oa_new_son(o, x, son));
+}
+
+struct oattr *
+obj_prepend_attr(struct odes *o, uns x, byte *v)
+{
+ struct oattr *a, *b, **z;
+
+ b = oa_new(o, x, v);
+ z = &o->attrs;
+ while (a = *z)
+ {
+ if (a->attr == x)
+ {
+ b->same = a;
+ b->next = a->next;
+ a->next = NULL;
+ *z = b;
+ return b;
+ }
+ z = &a->next;
+ }
+ b->next = o->attrs;
+ o->attrs = b;
+ return b;
+}
+
+struct oattr *
+obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v)
+{
+ struct oattr *b = oa_new(o, first->attr, v);
+ b->same = after->same;
+ after->same = b;
+ return b;
+}
+
+void
+obj_move_attr_to_head(struct odes *o, uns x)
+{
+ struct oattr *a, **z;
+
+ z = &o->attrs;
+ while (a = *z)
+ {
+ if (a->attr == x)
+ {
+ *z = a->next;
+ a->next = o->attrs;
+ o->attrs = a;
+ break;
+ }
+ z = &a->next;
+ }
+}
+
+void
+obj_move_attr_to_tail(struct odes *o, uns x)
+{
+ struct oattr *a, **z;
+
+ z = &o->attrs;
+ while (a = *z)
+ {
+ if (a->attr == x)
+ {
+ *z = a->next;
+ while (*z)
+ z = &(*z)->next;
+ *z = a;
+ a->next = NULL;
+ break;
+ }
+ z = &a->next;
+ }
+}
+
+struct odes *
+obj_find_son(struct odes *o, uns x)
+{
+ ASSERT(x >= OBJ_ATTR_SON);
+ struct oattr *a = obj_find_attr(o, x);
+ return a ? a->son : NULL;
+}
+
+struct oattr *
+obj_add_son_ref(struct odes *o, uns x, struct odes *son)
+{
+ struct oattr *oa = oa_new_son(o, x, son);
+ obj_link_attr(o, oa);
+ return oa;
+}
+
+struct odes *
+obj_add_son(struct odes *o, uns x)
+{
+ struct odes *son = obj_new(o->pool);
+ obj_add_son_ref(o, x, son);
+ return son;
+}
+
+static void obj_clone_attr_list(struct odes *dest, struct odes *src);
+
+static struct oattr *
+obj_clone_attr(struct odes *dest, struct oattr *a)
+{
+ struct oattr *res = NULL, **rr = &res;
+ if (a->attr < OBJ_ATTR_SON)
+ {
+ for (; a; a=a->same)
+ {
+ *rr = oa_new(dest, a->attr, a->val);
+ rr = &(*rr)->same;
+ }
+ }
+ else
+ {
+ for (; a; a=a->same)
+ {
+ struct odes *dson = obj_new(dest->pool);
+ *rr = oa_new_son(dest, a->attr, dson);
+ rr = &(*rr)->same;
+ obj_clone_attr_list(dson, a->son);
+ }
+ }
+ return res;
+}
+
+static void
+obj_clone_attr_list(struct odes *dest, struct odes *src)
+{
+ struct oattr **p = &dest->attrs;
+ for (struct oattr *a = src->attrs; a; a=a->next)
+ {
+ *p = obj_clone_attr(dest, a);
+ p = &(*p)->next;
+ }
+}
+
+void
+obj_add_attr_clone(struct odes *o, struct oattr *a)
+{
+ obj_link_attr(o, obj_clone_attr(o, a));
+}
+
+struct odes *
+obj_clone(struct mempool *pool, struct odes *src)
+{
+ struct odes *dest = obj_new(pool);
+ obj_clone_attr_list(dest, src);
+ return dest;
+}
--- /dev/null
+/*
+ * Sherlock Library -- Objects and operations on them
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2004--2005, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is the main data structure used by Sherlock for many different
+ * purposes, most notably storage of documents in various stages of processing
+ *
+ * Each object consists of a sequence of attributes whose names are single
+ * characters and values are either strings or subobjects. The order of attributes
+ * is not maintained (except for a couple of very special cases), while the order
+ * of multiple values of a single attribute is.
+ *
+ * Objects exist either in the form of struct odes (an in-memory representation
+ * with very easy manipulation) or as a bucket (a linear stream of bytes in one of
+ * several possible formats, some of which are compressed, used for sending objects
+ * between processes and storing them in files [see sherlock/bucket.h for bucket files]).
+ *
+ * See doc/objects for a more detailed description on how objects are used to
+ * represent documents.
+ */
+
+#ifndef _SHERLOCK_OBJECT_H
+#define _SHERLOCK_OBJECT_H
+
+struct fastbuf;
+struct mempool;
+
+/* object.c: In-memory representation of objects */
+
+struct odes { /* Object description */
+ struct oattr *attrs;
+ struct mempool *pool;
+ struct oattr *cached_attr;
+ struct odes *parent;
+};
+
+struct oattr { /* Object attribute */
+ struct oattr *next, *same;
+ uns attr; /* +OBJ_ATTR_SON if it's a sub-object */
+ union {
+ byte *val;
+ struct odes *son;
+ };
+};
+
+#define OBJ_ATTR_SON 256
+
+void obj_dump(struct odes *);
+void obj_dump_indented(struct odes *, uns);
+struct odes *obj_new(struct mempool *);
+struct oattr *obj_find_attr(struct odes *, uns);
+struct oattr *obj_find_attr_last(struct odes *, uns);
+uns obj_del_attr(struct odes *, struct oattr *);
+byte *obj_find_aval(struct odes *, uns);
+uns obj_find_anum(struct odes *, uns, uns);
+u32 obj_find_x32(struct odes *, uns, u32);
+u64 obj_find_x64(struct odes *, uns, u64);
+struct oattr *obj_set_attr(struct odes *, uns, byte *);
+struct oattr *obj_set_attr_num(struct odes *, uns, uns);
+struct oattr *obj_add_attr(struct odes *, uns, byte *);
+struct oattr *obj_add_attr_ref(struct odes *o, uns x, byte *v); // no strdup()
+struct oattr *obj_add_attr_num(struct odes *o, uns, uns);
+struct oattr *obj_add_attr_son(struct odes *, uns, struct odes *);
+struct oattr *obj_prepend_attr(struct odes *, uns, byte *);
+struct oattr *obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v);
+void obj_move_attr_to_head(struct odes *o, uns);
+void obj_move_attr_to_tail(struct odes *o, uns);
+struct odes *obj_find_son(struct odes *, uns);
+struct odes *obj_add_son(struct odes *, uns);
+struct oattr *obj_add_son_ref(struct odes *o, uns x, struct odes *son);
+void obj_add_attr_clone(struct odes *o, struct oattr *a);
+struct odes *obj_clone(struct mempool *pool, struct odes *src);
+
+/* Supported bucket formats */
+
+enum bucket_type {
+ BUCKET_TYPE_COMPAT = 0x7fffffff, /* and less -- buckets created by older versions of Sherlock */
+ BUCKET_TYPE_PLAIN = 0x80000000, /* plain textual buckets */
+ BUCKET_TYPE_V30 = 0x80000001, /* v3.0 uncompressed buckets */
+ BUCKET_TYPE_V33 = 0x80000002, /* v3.3 uncompressed buckets */
+ BUCKET_TYPE_V33_LIZARD = 0x80000003 /* v3.3 buckets compressed by lizard */
+};
+
+/* buck2obj.c: Reading of objects from buckets */
+
+struct parsed_attr {
+ int attr;
+ byte *val;
+ uns len;
+};
+struct buck2obj_buf;
+
+/* note: get_attr routines are not thread-safe */
+void get_attr_set_type(uns type);
+int get_attr(byte **pos, byte *end, struct parsed_attr *attr);
+int bget_attr(struct fastbuf *b, struct parsed_attr *attr);
+void copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr);
+
+struct buck2obj_buf *buck2obj_alloc(void);
+void buck2obj_free(struct buck2obj_buf *buf);
+
+int buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body,
+ struct odes *o_hdr, uns *body_start, struct odes *o_body,
+ uns allow_zero_copy);
+struct odes *obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body,
+ uns *body_start, uns allow_zero_copy);
+ /* If body_start != NULL, then only the header is parsed and *body_start is
+ * set to the position of the body. This function does a plenty of optimizations
+ * and if the body fastbuf is overwritable (body->can_overwrite_buffer), it can keep the
+ * attribute values stored on their original locations in the fastbuf's buffer.
+ * However, no such things are performed when reading the header only.
+ */
+
+int obj_read(struct fastbuf *, struct odes *);
+
+/* obj2buck.c: Generating buckets from objects */
+
+void put_attr_set_type(uns type);
+
+uns size_attr(uns len);
+uns size_object(struct odes *d);
+
+byte *put_attr(byte *ptr, uns type, byte *val, uns len);
+byte *put_attr_str(byte *ptr, uns type, byte *val);
+byte *put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va);
+byte *put_attr_format(byte *ptr, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
+byte *put_attr_num(byte *ptr, uns type, uns val);
+byte *put_attr_separator(byte *ptr);
+byte *put_attr_push(byte *ptr, uns type);
+byte *put_attr_pop(byte *ptr);
+byte *put_object(byte *t, struct odes *d);
+
+void bput_attr(struct fastbuf *b, uns type, byte *val, uns len);
+void bput_attr_large(struct fastbuf *b, uns type, byte *val, uns len);
+void bput_attr_str(struct fastbuf *b, uns type, byte *val);
+void bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va);
+void bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
+void bput_attr_num(struct fastbuf *b, uns type, uns val);
+void bput_attr_separator(struct fastbuf *b);
+void bput_attr_push(struct fastbuf *b, uns type);
+void bput_attr_pop(struct fastbuf *b);
+void bput_oattr(struct fastbuf *f, struct oattr *a);
+void bput_oattr_nocheck(struct fastbuf *f, struct oattr *a);
+void bput_object(struct fastbuf *b, struct odes *o);
+void bput_object_nocheck(struct fastbuf *b, struct odes *o);
+
+void obj_write(struct fastbuf *b, struct odes *o, uns bucket_type);
+void obj_write_nocheck(struct fastbuf *b, struct odes *o, uns bucket_type);
+
+/* obj-linear.c: Linear representation of objects by in-memory buckets */
+
+byte *obj_linearize(struct odes *d, uns min_compress, uns *plen);
+struct odes *obj_delinearize(struct buck2obj_buf *bbuf, struct mempool *mp, byte *buf, uns len, uns destructive);
+
+/* obj-format.c: Adding of formatted values */
+
+struct oattr *obj_add_attr_vformat(struct odes *o, uns x, char *fmt, va_list args);
+struct oattr *obj_add_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4);
+struct oattr *obj_set_attr_vformat(struct odes *o, uns x, char *fmt, va_list args);
+struct oattr *obj_set_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4);
+
+#endif
--- /dev/null
+/*
+ * Sherlock Library -- Nested Object Reading Functions
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _SHERLOCK_OBJREAD_H
+#define _SHERLOCK_OBJREAD_H
+
+#include "sherlock/object.h"
+
+struct obj_read_state {
+ struct odes *root, *obj;
+ void (*error_callback)(struct obj_read_state *st, char *msg);
+ void *user;
+ int errors;
+};
+
+void default_obj_read_error(struct obj_read_state *st, char *msg);
+
+static inline void
+obj_read_start(struct obj_read_state *st, struct odes *obj)
+{
+ st->root = st->obj = obj;
+ st->errors = 0;
+ st->error_callback = default_obj_read_error;
+}
+
+static inline void
+obj_read_push(struct obj_read_state *st, byte *ptr)
+{
+ if (unlikely(!ptr[0] || ptr[1]))
+ {
+ if (!st->errors++)
+ st->error_callback(st, "Malformed object: bad `(' attribute");
+ }
+ else
+ st->obj = obj_add_son(st->obj, ptr[0] + OBJ_ATTR_SON);
+}
+
+static inline void
+obj_read_pop(struct obj_read_state *st, byte *ptr)
+{
+ if (unlikely(ptr[0]))
+ {
+ if (!st->errors++)
+ st->error_callback(st, "Malformed object: bad ')' attribute");
+ }
+ else if (unlikely(!st->obj->parent))
+ {
+ if (!st->errors++)
+ st->error_callback(st, "Malformed object: improper nesting of `( ... )' blocks");
+ }
+ else
+ st->obj = st->obj->parent;
+}
+
+static inline void
+obj_read_attr(struct obj_read_state *st, uns type, byte *val)
+{
+ if (type == '(')
+ obj_read_push(st, val);
+ else if (type == ')')
+ obj_read_pop(st, val);
+ else
+ obj_add_attr(st->obj, type, val);
+}
+
+static inline void
+obj_read_attr_ref(struct obj_read_state *st, uns type, byte *val)
+{
+ if (type == '(')
+ obj_read_push(st, val);
+ else if (type == ')')
+ obj_read_pop(st, val);
+ else
+ obj_add_attr_ref(st->obj, type, val);
+}
+
+static inline int
+obj_read_end(struct obj_read_state *st)
+{
+ if (unlikely(st->obj != st->root))
+ {
+ if (!st->errors++)
+ st->error_callback(st, "Malformed object: truncated `( ... )' block");
+ }
+ return st->errors;
+}
+
+#endif
--- /dev/null
+# Perl modules
+
+DIRS+=sherlock/perl
+EXTRA_RUNDIRS+=lib/perl5/Sherlock
+SHERLOCK_PERL_MODS=$(addprefix $(o)/sherlock/perl/,Object.pm)
+PROGS+=$(SHERLOCK_PERL_MODS)
+
+$(SHERLOCK_PERL_MODS): PERL_MODULE_DIR=Sherlock
--- /dev/null
+# Perl module for manipulating Sherlock objects
+#
+# (c) 2007 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+=head1 NAME
+
+Sherlock::Object -- Manipulation with Sherlock objects
+
+=head1 DESCRIPTION
+
+This module offers a simple interface to Sherlock objects. See F<doc/objects>
+for a description of how the object system works.
+
+=head1 METHODS
+
+=over
+
+=item B<new()>
+
+Creates a new empty object.
+
+=item B<< new(I<name> => I<value>, ...) >>
+
+Creates a new object with some attributes initialized.
+
+=item B<< set(I<name> => I<value>, ...) >>
+
+Sets given attributes to specified values. If any of the attributes already
+exists, the old value is replaced by the new one.
+
+The value can be:
+
+=over
+
+=item *
+
+a number
+
+=item *
+
+a string
+
+=item *
+
+a reference to an array of values, which creates a multi-valued attribute.
+
+=item *
+
+a reference to another object, which created a nested object. In this case
+(and only in this case), the name of the attribute must start with C<(>.
+
+=back
+
+=item B<< unset(I<name>, ...) >>
+
+Removes given attributes.
+
+=item B<< add(I<name> => I<value>, ...) >>
+
+Sets given attributes to specified values. If any of the attributes already
+exists, the new value is added to the original one, creating a multi-valued
+attribute.
+
+The repertoir of values is the same as in the C<set> method except for references
+to arrays, which are not allowed.
+
+=item B<< get(I<name>) >>
+
+Gets the value of the given attribute or C<undef> if it does not exist.
+If the attribute is multi-valued, the first value is returned.
+
+=item B<< getarray(I<name>) >>
+
+Gets all values of the given attribute as an array or an empty array if the
+attribute does not exist.
+
+=item B<< get_attrs() >>
+
+Get an array of names of all attributes present in the object.
+
+=item B<< read(I<handle>) >>
+
+Reads a textual representation of an object from the given handle and adds it
+to the object it is invoked on. The effect is the same as calling the C<add>
+method on all read attributes.
+
+Returns 1 if the object has been read successfully, 0 if the input
+stream ended before an object started or C<undef> if the input was
+malformed.
+
+=item B<< read(I<handle>, raw => 1) >>
+
+Reads an object as above, but add a special attribute called C<< RAW >>, which will
+contain the raw form of the object.
+
+=item B<< write(I<handle>) >>
+
+Writes a textual representation of the object to the given handle.
+
+=item B<< write_indented(I<handle>, [I<base_indent>]) >>
+
+Writes an indented textual representation of the object to the given handle.
+The I<base_indent> will be prepended to all printed lines, each nested level
+will get one tab character more.
+
+This is intended for debugging dumps and the output cannot be read back by any
+of the Sherlock libraries.
+
+=back
+
+=head1 AUTHOR
+
+Martin Mares <mj@ucw.cz>
+
+=cut
+
+package Sherlock::Object;
+
+use strict;
+use warnings;
+
+sub new($@) {
+ my $self = { };
+ bless $self;
+ shift @_;
+ if (@_) {
+ $self->set(@_);
+ }
+ return $self;
+}
+
+sub set($@) {
+ my $self = shift @_;
+ my $attr;
+ while (defined($attr = shift @_)) {
+ $self->{$attr} = shift @_;
+ }
+}
+
+sub unset($@) {
+ my $self = shift @_;
+ foreach my $attr (@_) {
+ delete $self->{$attr};
+ }
+}
+
+sub add($@) {
+ my $self = shift @_;
+ my $attr;
+ while (defined($attr = shift @_)) {
+ my $val = shift @_;
+ if (!exists $self->{$attr}) {
+ $self->{$attr} = $val;
+ } elsif (ref $self->{$attr} eq "ARRAY") {
+ push @{$self->{$attr}}, $val;
+ } else {
+ $self->{$attr} = [ $self->{$attr}, $val ];
+ }
+ }
+}
+
+sub get($$) {
+ my ($self, $attr) = @_;
+ if (!exists $self->{$attr}) {
+ return undef;
+ } elsif (ref $self->{$attr} eq "ARRAY") {
+ return $self->{$attr}->[0];
+ } else {
+ return $self->{$attr};
+ }
+}
+
+sub getarray($$) {
+ my ($self, $attr) = @_;
+ if (!exists $self->{$attr}) {
+ return ();
+ } elsif (ref $self->{$attr} eq "ARRAY") {
+ return @{$self->{$attr}};
+ } else {
+ return ( $self->{$attr} );
+ }
+}
+
+sub get_attrs($) {
+ my ($self) = @_;
+ return keys %$self;
+}
+
+sub read($$@) {
+ my $self = shift @_;
+ my $fh = shift @_;
+ my %opts = @_;
+ my @stack = ();
+ my $read_something = 0;
+ my $obj = $self;
+ my $raw;
+ if ($opts{raw}) {
+ $raw = $obj->{"RAW"} = [];
+ }
+ while (<$fh>) {
+ chomp;
+ /^$/ && last;
+ my ($a, $v) = /^(.)(.*)$/ or return undef;
+ push @$raw, $_ if $raw;
+ if ($a eq "(") {
+ $a = "$a$v";
+ my $new = new Sherlock::Object;
+ $obj->add($a, $new);
+ push @stack, $obj;
+ $obj = $new;
+ } elsif ($a eq ")") {
+ @stack or return undef;
+ $obj = pop @stack;
+ } else {
+ $obj->add($a, $v);
+ }
+ $read_something = 1;
+ }
+ @stack and return undef;
+ return $read_something;
+}
+
+sub write($$) {
+ my ($self, $fh) = @_;
+ foreach my $a (keys %$self) {
+ my $vals = $self->{$a};
+ ref $vals eq "ARRAY" or $vals = [$vals];
+ foreach my $v (@{$vals}) {
+ if (ref $v eq "") {
+ print $fh $a, $v, "\n";
+ } elsif (ref $v eq "Sherlock::Object") {
+ print $fh $a, "\n";
+ $v->write($fh);
+ print $fh ")\n";
+ } else {
+ die;
+ }
+ }
+ }
+}
+
+sub write_indented($$$) {
+ my ($self, $fh, $indent) = @_;
+ defined $indent or $indent = "";
+ foreach my $a (sort keys %$self) {
+ my $vals = $self->{$a};
+ ref $vals eq "ARRAY" or $vals = [$vals];
+ foreach my $v (@{$vals}) {
+ if (ref $v eq "") {
+ print $fh $indent, $a, $v, "\n";
+ } elsif (ref $v eq "Sherlock::Object") {
+ print $fh $indent, $a, "\n";
+ $v->write_indented($fh, $indent . "\t");
+ print $fh $indent, ")\n";
+ } else {
+ die;
+ }
+ }
+ }
+}
+
+1; # OK