X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fbuck2obj.c;h=7d651dfe6b65f6941f375a4c32ef53f88033c06d;hb=a010e5f6e075097c0d6362d40fb49e812e99e185;hp=4270b7a36a7dd8f3028b82cb4115e40001175e7d;hpb=94fe89c4f19dca7d9c11a55fdb80209ee2ff59d4;p=libucw.git diff --git a/lib/buck2obj.c b/lib/buck2obj.c index 4270b7a3..7d651dfe 100644 --- a/lib/buck2obj.c +++ b/lib/buck2obj.c @@ -1,42 +1,38 @@ /* - * Bucket -> Object converter + * Generating Objects from Buckets * * (c) 2004, Robert Spalek + * (c) 2004, Martin Mares */ #include "lib/lib.h" #include "lib/unaligned.h" #include "lib/pools.h" #include "lib/fastbuf.h" -#include "charset/unicode.h" +#include "lib/unicode.h" #include "lib/object.h" #include "lib/bucket.h" #include "lib/lizard.h" -#include "lib/buck2obj.h" +#include "lib/bbuf.h" #include #include #include -#define MAX_HEADER_SIZE 1024 // extra space for the header not counted in MaxObjSize -#define RET_ERR(num) ({ errno = num; return NULL; }) +#define RET_ERR(num) ({ errno = num; return -1; }) struct buck2obj_buf { - byte *raw; - uns raw_len; + bb_t bb; struct lizard_buffer *lizard; - struct mempool *mp; }; struct buck2obj_buf * -buck2obj_alloc(uns max_len, struct mempool *mp) +buck2obj_alloc(void) { struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf)); - buf->raw_len = max_len * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD + MAX_HEADER_SIZE; - buf->raw = xmalloc(buf->raw_len); - buf->lizard = lizard_alloc(max_len); - buf->mp = mp; + bb_init(&buf->bb); + buf->lizard = lizard_alloc(); return buf; } @@ -44,107 +40,150 @@ void buck2obj_free(struct buck2obj_buf *buf) { lizard_free(buf->lizard); - xfree(buf->raw); + bb_done(&buf->bb); xfree(buf); } static inline byte * decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite) { - while (ptr < end) - { - uns len; - GET_UTF8(ptr, len); - if (!len--) - break; - byte type = ptr[len]; - if (can_overwrite == 2) + if (can_overwrite >= 2) + while (ptr < end) { + uns len; + GET_UTF8(ptr, len); + if (!len--) + break; + byte type = ptr[len]; + ptr[len] = 0; obj_add_attr_ref(o, type, ptr); + + ptr += len + 1; } - else if (can_overwrite == 1) - { - ptr[len] = 0; - obj_add_attr(o, type, ptr); - ptr[len] = type; - } - else + else + while (ptr < end) { - byte *dup = mp_alloc(o->pool, len+1); + uns len; + GET_UTF8(ptr, len); + if (!len--) + break; + byte type = ptr[len]; + + byte *dup = mp_alloc_fast_noalign(o->pool, len+1); memcpy(dup, ptr, len); dup[len] = 0; - obj_add_attr_ref(o, type, ptr); + obj_add_attr_ref(o, type, dup); + + ptr += len + 1; } - ptr += len + 1; - } return ptr; } -struct odes * -buck2obj_convert(struct buck2obj_buf *buf, uns buck_type, struct fastbuf *body) +int +buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body) { - mp_flush(buf->mp); - struct odes *o = obj_new(buf->mp); - - if (buck_type < BUCKET_TYPE_V33) - obj_read_multi(body, o); - else + if (buck_type == BUCKET_TYPE_PLAIN) + { + if (body_start) + *body_start = 0; + obj_read_multi(body, o_hdr); // ignore empty lines, read until EOF or NUL + } + else if (buck_type == BUCKET_TYPE_V30) + { + sh_off_t start = btell(body); + obj_read(body, o_hdr); // end on EOF or the first empty line + if (body_start) + *body_start = btell(body) - start; + else + { + obj_read(body, o_body); + bgetc(body); + } + } + else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD) { - /* Compute the length of the bucket. We cannot fetch this attribute - * directly due to remote indexing. */ - bseek(body, 0, SEEK_END); - sh_off_t buck_len = btell(body); - bsetpos(body, 0); - /* Read all the bucket into 1 buffer, 0-copy if possible. */ - int can_overwrite = MAX(bconfig(body, BCONFIG_CAN_OVERWRITE, 0), 0); - uns overwritten; byte *ptr, *end; uns len = bdirect_read_prepare(body, &ptr); + uns copied = 0; if (len < buck_len - || (can_overwrite < 2 && buck_type == BUCKET_TYPE_V33)) + || (body->can_overwrite_buffer < 2 && buck_type == BUCKET_TYPE_V33)) { /* Copy if the original buffer is too small. * If it is write-protected, copy it also if it is uncompressed. */ - if (buck_len > buf->raw_len) - RET_ERR(EFBIG); - len = bread(body, buf->raw, buck_len); - ptr = buf->raw; - can_overwrite = 2; - overwritten = 0; + bb_grow(&buf->bb, buck_len); + len = bread(body, buf->bb.ptr, buck_len); + ptr = buf->bb.ptr; + copied = 1; } - else - overwritten = can_overwrite > 1; end = ptr + len; - ptr = decode_attributes(ptr, end, o, can_overwrite);// header - if (buck_type == BUCKET_TYPE_V33) - ; - else if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression + byte *start = ptr; + ptr = decode_attributes(ptr, end, o_hdr, 0); // header + if (body_start) + { + *body_start = ptr - start; + if (!copied) + bdirect_read_commit(body, ptr); + return 0; + } + if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression { + if (ptr + 4 > end) + RET_ERR(EINVAL); len = GET_U32(ptr); ptr += 4; - int res = lizard_decompress_safe(ptr, buf->lizard, len); - if (res != (int) len) - { - if (res >= 0) - errno = EINVAL; - return NULL; - } - ptr = buf->lizard->ptr; + byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len); + if (!new_ptr) + return -1; + if (!copied) + bdirect_read_commit(body, end); + ptr = new_ptr; end = ptr + len; - can_overwrite = 2; + copied = 1; } - else // unknown bucket type - RET_ERR(EINVAL); - ASSERT(can_overwrite == 2); // because of the policy and decompression - ptr = decode_attributes(ptr, end, o, 2); // body - + ptr = decode_attributes(ptr, end, o_body, 2); // body if (ptr != end) RET_ERR(EINVAL); - if (overwritten) - bflush(body); + if (!copied) + bdirect_read_commit_modified(body, ptr); } - return o; + else + RET_ERR(EINVAL); + return 0; +} + +struct odes * +obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start) +{ + struct odes *o = obj_new(pool); + if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o) < 0) + return NULL; + else + return o; +} + +int +obj_read(struct fastbuf *f, struct odes *o) +{ + byte buf[MAX_ATTR_SIZE]; + + while (bgets(f, buf, sizeof(buf))) + { + if (!buf[0]) + return 1; + obj_add_attr(o, buf[0], buf+1); + } + return 0; +} + +void +obj_read_multi(struct fastbuf *f, struct odes *o) +{ + /* Read a multi-part object ending with either EOF or a NUL character */ + byte buf[MAX_ATTR_SIZE]; + while (bpeekc(f) > 0 && bgets(f, buf, sizeof(buf))) + if (buf[0]) + obj_add_attr(o, buf[0], buf+1); }