2 * Sherlock Library -- Objects and operations on them
4 * (c) 1997--2006 Martin Mares <mj@ucw.cz>
5 * (c) 2004--2005, Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 * This is the main data structure used by Sherlock for many different
13 * purposes, most notably storage of documents in various stages of processing
15 * Each object consists of a sequence of attributes whose names are single
16 * characters and values are either strings or subobjects. The order of attributes
17 * is not maintained (except for a couple of very special cases), while the order
18 * of multiple values of a single attribute is.
20 * Objects exist either in the form of struct odes (an in-memory representation
21 * with very easy manipulation) or as a bucket (a linear stream of bytes in one of
22 * several possible formats, some of which are compressed, used for sending objects
23 * between processes and storing them in files [see sherlock/bucket.h for bucket files]).
25 * See doc/objects for a more detailed description on how objects are used to
26 * represent documents.
29 #ifndef _SHERLOCK_OBJECT_H
30 #define _SHERLOCK_OBJECT_H
35 /* object.c: In-memory representation of objects */
37 struct odes { /* Object description */
40 struct oattr *cached_attr;
44 struct oattr { /* Object attribute */
45 struct oattr *next, *same;
46 uns attr; /* +OBJ_ATTR_SON if it's a sub-object */
53 #define OBJ_ATTR_SON 256
55 void obj_dump(struct odes *);
56 void obj_dump_indented(struct odes *, uns);
57 struct odes *obj_new(struct mempool *);
58 struct oattr *obj_find_attr(struct odes *, uns);
59 struct oattr *obj_find_attr_last(struct odes *, uns);
60 uns obj_del_attr(struct odes *, struct oattr *);
61 byte *obj_find_aval(struct odes *, uns);
62 uns obj_find_anum(struct odes *, uns, uns);
63 u32 obj_find_x32(struct odes *, uns, u32);
64 u64 obj_find_x64(struct odes *, uns, u64);
65 struct oattr *obj_set_attr(struct odes *, uns, byte *);
66 struct oattr *obj_set_attr_num(struct odes *, uns, uns);
67 struct oattr *obj_add_attr(struct odes *, uns, byte *);
68 struct oattr *obj_add_attr_ref(struct odes *o, uns x, byte *v); // no strdup()
69 struct oattr *obj_add_attr_num(struct odes *o, uns, uns);
70 struct oattr *obj_add_attr_son(struct odes *, uns, struct odes *);
71 struct oattr *obj_prepend_attr(struct odes *, uns, byte *);
72 struct oattr *obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v);
73 void obj_move_attr_to_head(struct odes *o, uns);
74 void obj_move_attr_to_tail(struct odes *o, uns);
75 struct odes *obj_find_son(struct odes *, uns);
76 struct odes *obj_add_son(struct odes *, uns);
77 struct oattr *obj_add_son_ref(struct odes *o, uns x, struct odes *son);
78 void obj_add_attr_clone(struct odes *o, struct oattr *a);
79 struct odes *obj_clone(struct mempool *pool, struct odes *src);
81 /* Supported bucket formats */
84 BUCKET_TYPE_COMPAT = 0x7fffffff, /* and less -- buckets created by older versions of Sherlock */
85 BUCKET_TYPE_PLAIN = 0x80000000, /* plain textual buckets */
86 BUCKET_TYPE_V30 = 0x80000001, /* v3.0 uncompressed buckets */
87 BUCKET_TYPE_V33 = 0x80000002, /* v3.3 uncompressed buckets */
88 BUCKET_TYPE_V33_LIZARD = 0x80000003 /* v3.3 buckets compressed by lizard */
91 /* buck2obj.c: Reading of objects from buckets */
100 /* note: get_attr routines are not thread-safe */
101 void get_attr_set_type(uns type);
102 int get_attr(byte **pos, byte *end, struct parsed_attr *attr);
103 int bget_attr(struct fastbuf *b, struct parsed_attr *attr);
104 void copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr);
106 struct buck2obj_buf *buck2obj_alloc(void);
107 void buck2obj_free(struct buck2obj_buf *buf);
109 int buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body,
110 struct odes *o_hdr, uns *body_start, struct odes *o_body,
111 uns allow_zero_copy);
112 struct odes *obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body,
113 uns *body_start, uns allow_zero_copy);
114 /* If body_start != NULL, then only the header is parsed and *body_start is
115 * set to the position of the body. This function does a plenty of optimizations
116 * and if the body fastbuf is overwritable (body->can_overwrite_buffer), it can keep the
117 * attribute values stored on their original locations in the fastbuf's buffer.
118 * However, no such things are performed when reading the header only.
121 int obj_read(struct fastbuf *, struct odes *);
123 /* obj2buck.c: Generating buckets from objects */
125 void put_attr_set_type(uns type);
127 uns size_attr(uns len);
128 uns size_object(struct odes *d);
130 byte *put_attr(byte *ptr, uns type, byte *val, uns len);
131 byte *put_attr_str(byte *ptr, uns type, byte *val);
132 byte *put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va);
133 byte *put_attr_format(byte *ptr, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
134 byte *put_attr_num(byte *ptr, uns type, uns val);
135 byte *put_attr_separator(byte *ptr);
136 byte *put_attr_push(byte *ptr, uns type);
137 byte *put_attr_pop(byte *ptr);
138 byte *put_object(byte *t, struct odes *d);
140 void bput_attr(struct fastbuf *b, uns type, byte *val, uns len);
141 void bput_attr_large(struct fastbuf *b, uns type, byte *val, uns len);
142 void bput_attr_str(struct fastbuf *b, uns type, byte *val);
143 void bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va);
144 void bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
145 void bput_attr_num(struct fastbuf *b, uns type, uns val);
146 void bput_attr_separator(struct fastbuf *b);
147 void bput_attr_push(struct fastbuf *b, uns type);
148 void bput_attr_pop(struct fastbuf *b);
149 void bput_oattr(struct fastbuf *f, struct oattr *a);
150 void bput_oattr_nocheck(struct fastbuf *f, struct oattr *a);
151 void bput_object(struct fastbuf *b, struct odes *o);
152 void bput_object_nocheck(struct fastbuf *b, struct odes *o);
154 void obj_write(struct fastbuf *b, struct odes *o, uns bucket_type);
155 void obj_write_nocheck(struct fastbuf *b, struct odes *o, uns bucket_type);
157 /* obj-linear.c: Linear representation of objects by in-memory buckets */
159 byte *obj_linearize(struct odes *d, uns min_compress, uns *plen);
160 struct odes *obj_delinearize(struct buck2obj_buf *bbuf, struct mempool *mp, byte *buf, uns len, uns destructive);
162 /* obj-format.c: Adding of formatted values */
164 struct oattr *obj_add_attr_vformat(struct odes *o, uns x, char *fmt, va_list args);
165 struct oattr *obj_add_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4);
166 struct oattr *obj_set_attr_vformat(struct odes *o, uns x, char *fmt, va_list args);
167 struct oattr *obj_set_attr_format(struct odes *o, uns x, char *fmt, ...) FORMAT_CHECK(printf,3,4);