2 * Sherlock Library -- Generating Objects from Buckets
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
5 * (c) 2004--2006, Martin Mares <mj@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
13 #include "sherlock/sherlock.h"
14 #include "ucw/unaligned.h"
15 #include "ucw/mempool.h"
16 #include "ucw/fastbuf.h"
17 #include "ucw/unicode.h"
18 #include "sherlock/object.h"
19 #include "sherlock/objread.h"
20 #include "ucw/lizard.h"
22 #include "ucw/ff-unicode.h"
27 #define RET_ERR(num) ({ errno = num; return -1; })
32 struct lizard_buffer *lizard;
35 static uns get_attr_type;
38 get_attr_set_type(uns type)
40 if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD)
41 die("Unknown buckettype %x", type);
46 get_attr(byte **pos, byte *end, struct parsed_attr *attr)
51 if (get_attr_type < BUCKET_TYPE_V33)
53 if (get_attr_type == BUCKET_TYPE_PLAIN)
55 while (ptr < end && *ptr == '\n')
61 else if (*ptr == '\n')
69 while (ptr < end && *ptr != '\n')
71 attr->len = ptr++ - attr->val;
76 ptr = utf8_32_get(ptr, &len);
83 attr->attr = ptr[len];
89 die("Incomplete attribute %c", attr->attr);
95 bget_attr(struct fastbuf *b, struct parsed_attr *attr)
98 if (get_attr_type < BUCKET_TYPE_V33)
103 if (get_attr_type == BUCKET_TYPE_PLAIN)
118 uns len = bdirect_read_prepare(b, &ptr);
121 while (ptr < end && *ptr != '\n')
125 bdirect_read_commit(b, ptr+1);
126 attr->len = ptr - attr->val;
132 while (c >= 0 && c != '\n')
134 bb_grow(&buf, len+1);
139 die("Incomplete attribute %c", attr->attr);
145 int len = bget_utf8_32(b);
156 int avail = bdirect_read_prepare(b, &ptr);
160 attr->attr = ptr[len-1];
161 bdirect_read_commit(b, ptr + len);
164 bb_grow(&buf, --len);
165 breadb(b, buf.ptr, len);
168 attr->attr = bgetc(b);
170 die("Incomplete attribute %c", attr->attr);
176 copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr)
178 byte *b = mp_alloc_fast_noalign(pool, attr->len+1);
179 memcpy(b, attr->val, attr->len);
184 struct buck2obj_buf *
187 struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
189 buf->lizard = lizard_alloc();
194 buck2obj_free(struct buck2obj_buf *buf)
196 lizard_free(buf->lizard);
202 decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
204 struct obj_read_state st;
205 obj_read_start(&st, o);
207 if (can_overwrite >= 2)
211 ptr = utf8_32_get(ptr, &len);
214 byte type = ptr[len];
217 obj_read_attr_ref(&st, type, ptr);
225 ptr = utf8_32_get(ptr, &len);
228 byte type = ptr[len];
230 byte *dup = mp_alloc_fast_noalign(o->pool, len+1);
231 memcpy(dup, ptr, len);
233 obj_read_attr_ref(&st, type, dup);
242 buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body,
243 struct odes *o_hdr, uns *body_start, struct odes *o_body,
246 struct obj_read_state st;
247 if (buck_type <= BUCKET_TYPE_PLAIN)
249 if (body_start) // there is no header part
251 obj_read_start(&st, o_hdr);
253 // ignore empty lines and read until the end of the bucket
254 ucw_off_t end = btell(body) + buck_len;
255 while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
256 if ((b = buf->bb.ptr)[0])
257 obj_read_attr(&st, b[0], b+1);
258 ASSERT(btell(body) == end);
261 else if (buck_type == BUCKET_TYPE_V30)
263 ucw_off_t start = btell(body);
264 ucw_off_t end = start + buck_len;
266 struct obj_read_state st;
267 obj_read_start(&st, o_hdr);
268 while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U) && (b = buf->bb.ptr)[0])
269 obj_read_attr(&st, b[0], b+1);
272 *body_start = btell(body) - start;
275 obj_read_start(&st, o_body);
276 while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
277 if ((b = buf->bb.ptr)[0])
278 obj_read_attr(&st, b[0], b+1);
279 ASSERT(btell(body) == end);
283 else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
285 /* Avoid reading the whole bucket if only its header is needed. */
288 ucw_off_t start = btell(body);
289 ucw_off_t end = start + buck_len;
290 obj_read_start(&st, o_hdr);
291 while (btell(body) < end)
293 uns len = bget_utf8_32(body);
296 byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
297 bread(body, buf, len);
298 uns type = buf[--len];
300 obj_read_attr_ref(&st, type, buf);
303 *body_start = btell(body) - start;
307 /* Read all the bucket into 1 buffer, 0-copy if possible. */
309 uns len = bdirect_read_prepare(body, &ptr);
311 if (len < buck_len ||
312 ((body->can_overwrite_buffer < 2 || !allow_zero_copy) && buck_type == BUCKET_TYPE_V33))
314 /* Copy if the original buffer is too small.
315 * If it is write-protected, copy it also if it is uncompressed. */
316 DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type);
317 bb_grow(&buf->bb, buck_len);
318 len = bread(body, buf->bb.ptr, buck_len);
323 DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type);
324 end = ptr + buck_len;
326 ptr = decode_attributes(ptr, end, o_hdr, 0); // header
327 if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression
331 if (ptr == end) // truncated bucket
337 uns adler = GET_U32(ptr);
339 byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
342 if (adler32(new_ptr, len) != adler)
345 bdirect_read_commit(body, end);
350 ptr = decode_attributes(ptr, end, o_body, 2); // body
355 bdirect_read_commit_modified(body, ptr);
359 bskip(body, buck_len);
366 obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body,
367 uns *body_start, uns allow_zero_copy)
369 struct odes *o = obj_new(pool);
370 if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o, allow_zero_copy) < 0)
377 obj_read_line(struct fastbuf *f, struct obj_read_state *st)
379 byte *buf = bgets_stk(f);
384 obj_read_attr(st, buf[0], buf+1);
392 obj_read(struct fastbuf *f, struct odes *o)
394 struct obj_read_state st;
396 obj_read_start(&st, o);
397 while ((rc = obj_read_line(f, &st)) < 0);
403 default_obj_read_error(struct obj_read_state *st UNUSED, char *err)
405 msg(L_ERROR, "%s", err);