Parsing of PLAIN type buckets also ends after buck_len bytes.

[libucw.git] / lib / buck2obj.c
diff --git a/lib/buck2obj.c b/lib/buck2obj.c

index 4270b7a36a7dd8f3028b82cb4115e40001175e7d..e90ff875447eead2125b08e7d8624804df20a612 100644 (file)
--- a/lib/buck2obj.c
+++ b/lib/buck2obj.c
@@ -1,42 +1,38 @@
  /*
- *     Bucket -> Object converter
+ *     Generating Objects from Buckets
   *
   *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *     (c) 2004, Martin Mares <mj@ucw.cz>
   */
  
  #include "lib/lib.h"
  #include "lib/unaligned.h"
-#include "lib/pools.h"
+#include "lib/mempool.h"
  #include "lib/fastbuf.h"
-#include "charset/unicode.h"
+#include "lib/unicode.h"
  #include "lib/object.h"
  #include "lib/bucket.h"
  #include "lib/lizard.h"
-#include "lib/buck2obj.h"
+#include "lib/bbuf.h"
  
  #include <stdlib.h>
  #include <errno.h>
  #include <unistd.h>
  
-#define        MAX_HEADER_SIZE 1024            // extra space for the header not counted in MaxObjSize
-#define        RET_ERR(num)    ({ errno = num; return NULL; })
+#define        RET_ERR(num)    ({ errno = num; return -1; })
  
  struct buck2obj_buf
  {
-  byte *raw;
-  uns raw_len;
+  bb_t bb;
    struct lizard_buffer *lizard;
-  struct mempool *mp;
  };
  
  struct buck2obj_buf *
-buck2obj_alloc(uns max_len, struct mempool *mp)
+buck2obj_alloc(void)
  {
    struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
-  buf->raw_len = max_len * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD + MAX_HEADER_SIZE;
-  buf->raw = xmalloc(buf->raw_len);
-  buf->lizard = lizard_alloc(max_len);
-  buf->mp = mp;
+  bb_init(&buf->bb);
+  buf->lizard = lizard_alloc();
    return buf;
  }
  
@@ -44,107 +40,155 @@ void
  buck2obj_free(struct buck2obj_buf *buf)
  {
    lizard_free(buf->lizard);
-  xfree(buf->raw);
+  bb_done(&buf->bb);
    xfree(buf);
  }
  
  static inline byte *
  decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
  {
-  while (ptr < end)
-  {
-    uns len;
-    GET_UTF8(ptr, len);
-    if (!len--)
-      break;
-    byte type = ptr[len];
-    if (can_overwrite == 2)
+  if (can_overwrite >= 2)
+    while (ptr < end)
      {
+      uns len;
+      GET_UTF8(ptr, len);
+      if (!len--)
+       break;
+      byte type = ptr[len];
+
        ptr[len] = 0;
        obj_add_attr_ref(o, type, ptr);
+
+      ptr += len + 1;
      }
-    else if (can_overwrite == 1)
-    {
-      ptr[len] = 0;
-      obj_add_attr(o, type, ptr);
-      ptr[len] = type;
-    }
-    else
+  else
+    while (ptr < end)
      {
-      byte *dup = mp_alloc(o->pool, len+1);
+      uns len;
+      GET_UTF8(ptr, len);
+      if (!len--)
+       break;
+      byte type = ptr[len];
+
+      byte *dup = mp_alloc_fast_noalign(o->pool, len+1);
        memcpy(dup, ptr, len);
        dup[len] = 0;
-      obj_add_attr_ref(o, type, ptr);
+      obj_add_attr_ref(o, type, dup);
+
+      ptr += len + 1;
      }
-    ptr += len + 1;
-  }
    return ptr;
  }
  
-struct odes *
-buck2obj_convert(struct buck2obj_buf *buf, uns buck_type, struct fastbuf *body)
+int
+buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body)
  {
-  mp_flush(buf->mp);
-  struct odes *o = obj_new(buf->mp);
-
-  if (buck_type < BUCKET_TYPE_V33)
-    obj_read_multi(body, o);
-  else
+  if (buck_type <= BUCKET_TYPE_PLAIN)
+  {
+    if (body_start)                    // there is no header part
+      *body_start = 0;
+    // ignore empty lines and read until the end of the bucket
+    sh_off_t end = btell(body) + buck_len;
+    byte buf[MAX_ATTR_SIZE];
+    while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+      if (buf[0])
+       obj_add_attr(o_hdr, buf[0], buf+1);
+    ASSERT(btell(body) == end);
+  }
+  else if (buck_type == BUCKET_TYPE_V30)
+  {
+    sh_off_t start = btell(body);
+    sh_off_t end = start + buck_len;
+    byte buf[MAX_ATTR_SIZE];
+    while (btell(body) < end && bgets(body, buf, sizeof(buf)) && buf[0])
+      obj_add_attr(o_hdr, buf[0], buf+1);
+    if (body_start)
+      *body_start = btell(body) - start;
+    else
+    {
+      while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+       if (buf[0])
+         obj_add_attr(o_hdr, buf[0], buf+1);
+      ASSERT(btell(body) == end);
+    }
+  }
+  else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
    {
-    /* Compute the length of the bucket.  We cannot fetch this attribute
-     * directly due to remote indexing.  */
-    bseek(body, 0, SEEK_END);
-    sh_off_t buck_len = btell(body);
-    bsetpos(body, 0);
-
      /* Read all the bucket into 1 buffer, 0-copy if possible.  */
-    int can_overwrite = MAX(bconfig(body, BCONFIG_CAN_OVERWRITE, 0), 0);
-    uns overwritten;
      byte *ptr, *end;
      uns len = bdirect_read_prepare(body, &ptr);
+    uns copied = 0;
      if (len < buck_len
-    || (can_overwrite < 2 && buck_type == BUCKET_TYPE_V33))
+    || (body->can_overwrite_buffer < 2 && buck_type == BUCKET_TYPE_V33))
      {
        /* Copy if the original buffer is too small.
         * If it is write-protected, copy it also if it is uncompressed.  */
-      if (buck_len > buf->raw_len)
-       RET_ERR(EFBIG);
-      len = bread(body, buf->raw, buck_len);
-      ptr = buf->raw;
-      can_overwrite = 2;
-      overwritten = 0;
+      bb_grow(&buf->bb, buck_len);
+      len = bread(body, buf->bb.ptr, buck_len);
+      ptr = buf->bb.ptr;
+      copied = 1;
      }
-    else
-      overwritten = can_overwrite > 1;
      end = ptr + len;
  
-    ptr = decode_attributes(ptr, end, o, can_overwrite);// header
-    if (buck_type == BUCKET_TYPE_V33)
-      ;
-    else if (buck_type == BUCKET_TYPE_V33_LIZARD)      // decompression
+    byte *start = ptr;
+    ptr = decode_attributes(ptr, end, o_hdr, 0);               // header
+    if (body_start)
+    {
+      *body_start = ptr - start;
+      if (!copied)
+       bdirect_read_commit(body, ptr);
+      return 0;
+    }
+    if (buck_type == BUCKET_TYPE_V33_LIZARD)           // decompression
      {
+      if (ptr + 8 > end)
+       RET_ERR(EINVAL);
        len = GET_U32(ptr);
        ptr += 4;
-      int res = lizard_decompress_safe(ptr, buf->lizard, len);
-      if (res != (int) len)
-      {
-       if (res >= 0)
-         errno = EINVAL;
-       return NULL;
-      }
-      ptr = buf->lizard->ptr;
+      uns adler = GET_U32(ptr);
+      ptr += 4;
+      byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
+      if (!new_ptr)
+       return -1;
+      if (adler32(new_ptr, len) != adler)
+       RET_ERR(EINVAL);
+      if (!copied)
+       bdirect_read_commit(body, end);
+      ptr = new_ptr;
        end = ptr + len;
-      can_overwrite = 2;
+      copied = 1;
      }
-    else                                               // unknown bucket type
-      RET_ERR(EINVAL);
-    ASSERT(can_overwrite == 2);                                // because of the policy and decompression
-    ptr = decode_attributes(ptr, end, o, 2);           // body
-
+    ptr = decode_attributes(ptr, end, o_body, 2);      // body
      if (ptr != end)
        RET_ERR(EINVAL);
-    if (overwritten)
-      bflush(body);
+    if (!copied)
+      bdirect_read_commit_modified(body, ptr);
    }
-  return o;
+  else
+    RET_ERR(EINVAL);
+  return 0;
+}
+
+struct odes *
+obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start)
+{
+  struct odes *o = obj_new(pool);
+  if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o) < 0)
+    return NULL;
+  else
+    return o;
+}
+
+int
+obj_read(struct fastbuf *f, struct odes *o)
+{
+  byte buf[MAX_ATTR_SIZE];
+
+  while (bgets(f, buf, sizeof(buf)))
+    {
+      if (!buf[0])
+       return 1;
+      obj_add_attr(o, buf[0], buf+1);
+    }
+  return 0;
  }