/*
- * Bucket -> Object converter
+ * Generating Objects from Buckets
*
* (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2004, Martin Mares <mj@ucw.cz>
*/
#include "lib/lib.h"
#include "lib/unaligned.h"
-#include "lib/pools.h"
+#include "lib/mempool.h"
#include "lib/fastbuf.h"
-#include "charset/unicode.h"
+#include "lib/unicode.h"
#include "lib/object.h"
#include "lib/bucket.h"
#include "lib/lizard.h"
-#include "lib/buck2obj.h"
+#include "lib/bbuf.h"
+#include "lib/ff-utf8.h"
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
-#define RET_ERR(num) ({ errno = num; return NULL; })
-
-#define GBUF_TYPE byte
-#define GBUF_PREFIX(x) bb_##x
-#include "lib/gbuf.h"
+#define RET_ERR(num) ({ errno = num; return -1; })
struct buck2obj_buf
{
bb_t bb;
struct lizard_buffer *lizard;
- struct mempool *mp;
};
struct buck2obj_buf *
-buck2obj_alloc(struct mempool *mp)
+buck2obj_alloc(void)
{
struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
bb_init(&buf->bb);
buf->lizard = lizard_alloc();
- buf->mp = mp;
return buf;
}
xfree(buf);
}
-void
-buck2obj_flush(struct buck2obj_buf *buf)
-{
- mp_flush(buf->mp);
-}
-
static inline byte *
decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
{
ptr[len] = 0;
obj_add_attr_ref(o, type, ptr);
- ptr += len + 1;
- }
- else if (can_overwrite == 1)
- while (ptr < end)
- {
- uns len;
- GET_UTF8(ptr, len);
- if (!len--)
- break;
- byte type = ptr[len];
-
- ptr[len] = 0;
- obj_add_attr(o, type, ptr);
- ptr[len] = type;
-
ptr += len + 1;
}
else
return ptr;
}
-struct odes *
-obj_read_bucket(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start)
+int
+buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body)
{
- struct odes *o = obj_new(buf->mp);
-
- if (buck_type < BUCKET_TYPE_V33)
+ if (buck_type <= BUCKET_TYPE_PLAIN)
+ {
+ if (body_start) // there is no header part
+ *body_start = 0;
+ // ignore empty lines and read until the end of the bucket
+ sh_off_t end = btell(body) + buck_len;
+ byte buf[MAX_ATTR_SIZE];
+ while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+ if (buf[0])
+ obj_add_attr(o_hdr, buf[0], buf+1);
+ ASSERT(btell(body) == end);
+ }
+ else if (buck_type == BUCKET_TYPE_V30)
{
- if (!body_start) // header + body: ignore empty lines, read until EOF
+ sh_off_t start = btell(body);
+ sh_off_t end = start + buck_len;
+ byte buf[MAX_ATTR_SIZE];
+ while (btell(body) < end && bgets(body, buf, sizeof(buf)) && buf[0])
+ obj_add_attr(o_hdr, buf[0], buf+1);
+ if (body_start)
+ *body_start = btell(body) - start;
+ else
{
- obj_read_multi(body, o);
- bgetc(body);
+ while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+ if (buf[0])
+ obj_add_attr(o_body, buf[0], buf+1);
+ ASSERT(btell(body) == end);
}
- else // header only: end on EOF or the first empty line
+ }
+ else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
+ {
+ /* Avoid reading the whole bucket if only its header is needed. */
+ if (body_start)
{
sh_off_t start = btell(body);
- obj_read(body, o);
+ sh_off_t end = start + buck_len;
+ while (btell(body) < end)
+ {
+ uns len = bget_utf8(body);
+ if (!len)
+ break;
+ byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
+ bread(body, buf, len);
+ uns type = buf[--len];
+ buf[len] = 0;
+ obj_add_attr_ref(o_hdr, type, buf);
+ }
*body_start = btell(body) - start;
+ return 0;
}
- }
- else
- {
+
/* Read all the bucket into 1 buffer, 0-copy if possible. */
- int can_overwrite = bconfig(body, BCONFIG_CAN_OVERWRITE, -1);
- /* FIXME: This could be cached in buck2obj_buf */
- if (can_overwrite < 0)
- can_overwrite = 0;
- uns overwritten;
byte *ptr, *end;
uns len = bdirect_read_prepare(body, &ptr);
+ uns copied = 0;
if (len < buck_len
- || (can_overwrite < 2 && buck_type == BUCKET_TYPE_V33))
+ || (body->can_overwrite_buffer < 2 && buck_type == BUCKET_TYPE_V33))
{
/* Copy if the original buffer is too small.
* If it is write-protected, copy it also if it is uncompressed. */
bb_grow(&buf->bb, buck_len);
len = bread(body, buf->bb.ptr, buck_len);
ptr = buf->bb.ptr;
- can_overwrite = 2;
- overwritten = 0;
+ copied = 1;
}
- else
- overwritten = can_overwrite > 1;
- end = ptr + len;
+ end = ptr + buck_len;
- byte *start = ptr;
- ptr = decode_attributes(ptr, end, o, can_overwrite);// header
- if (body_start)
- {
- *body_start = ptr - start;
- return o;
- }
- if (buck_type == BUCKET_TYPE_V33)
- ;
- else if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression
+ ptr = decode_attributes(ptr, end, o_hdr, 0); // header
+ if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression
{
+ if (ptr + 8 > end)
+ RET_ERR(EINVAL);
len = GET_U32(ptr);
ptr += 4;
- int res;
- byte *new_ptr;
- res = lizard_decompress_safe(ptr, buf->lizard, len, &new_ptr);
- if (res != (int) len)
- {
- if (res >= 0)
- errno = EINVAL;
- return NULL;
- }
+ uns adler = GET_U32(ptr);
+ ptr += 4;
+ byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
+ if (!new_ptr)
+ return -1;
+ if (adler32(new_ptr, len) != adler)
+ RET_ERR(EINVAL);
+ if (!copied)
+ bdirect_read_commit(body, end);
ptr = new_ptr;
end = ptr + len;
- can_overwrite = 2;
+ copied = 1;
}
- else // unknown bucket type
- RET_ERR(EINVAL);
- ASSERT(can_overwrite == 2); // because of the policy and decompression
- ptr = decode_attributes(ptr, end, o, 2); // body
-
+ ptr = decode_attributes(ptr, end, o_body, 2); // body
if (ptr != end)
RET_ERR(EINVAL);
- /* If (overwritten), bflush(body) might be needed. */
+ if (!copied)
+ bdirect_read_commit_modified(body, ptr);
}
- return o;
+ else
+ {
+ bskip(body, buck_len);
+ RET_ERR(EINVAL);
+ }
+ return 0;
}
-byte *
-obj_attr_to_bucket(byte *buf, uns buck_type, uns attr, byte *val)
+struct odes *
+obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start)
{
- uns l;
-
- switch (buck_type)
- {
- case BUCKET_TYPE_PLAIN:
- case BUCKET_TYPE_V30:
- buf += sprintf(buf, "%c%s\n", attr, val);
- break;
- case BUCKET_TYPE_V33:
- case BUCKET_TYPE_V33_LIZARD:
- l = strlen(val) + 1;
- PUT_UTF8(buf, l);
- l--;
- memcpy(buf, val, l);
- buf += l;
- *buf++ = attr;
- break;
- default:
- die("obj_attr_to_bucket called for unknown type %08x", buck_type);
- }
- return buf;
+ struct odes *o = obj_new(pool);
+ if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o) < 0)
+ return NULL;
+ else
+ return o;
}
-byte *
-obj_attr_to_bucket_num(byte *buf, uns buck_type, uns attr, uns val)
+int
+obj_read(struct fastbuf *f, struct odes *o)
{
- byte vbuf[16];
- sprintf(vbuf, "%d", val);
- return obj_attr_to_bucket(buf, buck_type, attr, vbuf);
+ byte buf[MAX_ATTR_SIZE];
+
+ while (bgets(f, buf, sizeof(buf)))
+ {
+ if (!buf[0])
+ return 1;
+ obj_add_attr(o, buf[0], buf+1);
+ }
+ return 0;
}