obj_add_attr_ref() with an on-stack buffer is not advisable, better

[libucw.git] / lib / buck2obj.c
diff --git a/lib/buck2obj.c b/lib/buck2obj.c

index 81d67856691ea9990aa60beb5d57f7387a4f74ad..d75b48ab19e212b2d65e8996981db3b6dda85325 100644 (file)
--- a/lib/buck2obj.c
+++ b/lib/buck2obj.c
@@ -14,6 +14,7 @@
  #include "lib/bucket.h"
  #include "lib/lizard.h"
  #include "lib/bbuf.h"
+#include "lib/ff-utf8.h"
  
  #include <stdlib.h>
  #include <errno.h>
@@ -83,26 +84,57 @@ decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
  int
  buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body)
  {
-  if (buck_type == BUCKET_TYPE_PLAIN)
+  if (buck_type <= BUCKET_TYPE_PLAIN)
    {
-    if (body_start)
+    if (body_start)                    // there is no header part
        *body_start = 0;
-    obj_read_multi(body, o_hdr);       // ignore empty lines, read until EOF or NUL
+    // ignore empty lines and read until the end of the bucket
+    sh_off_t end = btell(body) + buck_len;
+    byte buf[MAX_ATTR_SIZE];
+    while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+      if (buf[0])
+       obj_add_attr(o_hdr, buf[0], buf+1);
+    ASSERT(btell(body) == end);
    }
    else if (buck_type == BUCKET_TYPE_V30)
    {
      sh_off_t start = btell(body);
-    obj_read(body, o_hdr);             // end on EOF or the first empty line
+    sh_off_t end = start + buck_len;
+    byte buf[MAX_ATTR_SIZE];
+    while (btell(body) < end && bgets(body, buf, sizeof(buf)) && buf[0])
+      obj_add_attr(o_hdr, buf[0], buf+1);
      if (body_start)
        *body_start = btell(body) - start;
      else
      {
-      obj_read(body, o_body);
-      bgetc(body);
+      while (btell(body) < end && bgets(body, buf, sizeof(buf)))
+       if (buf[0])
+         obj_add_attr(o_body, buf[0], buf+1);
+      ASSERT(btell(body) == end);
      }
    }
    else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
    {
+    /* Avoid reading the whole bucket if only its header is needed.  */
+    if (body_start)
+    {
+      sh_off_t start = btell(body);
+      sh_off_t end = start + buck_len;
+      while (btell(body) < end)
+      {
+       uns len = bget_utf8(body);
+       if (!len)
+         break;
+       byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
+       bread(body, buf, len);
+       uns type = buf[--len];
+       buf[len] = 0;
+       obj_add_attr_ref(o_hdr, type, buf);
+      }
+      *body_start = btell(body) - start;
+      return 0;
+    }
+
      /* Read all the bucket into 1 buffer, 0-copy if possible.  */
      byte *ptr, *end;
      uns len = bdirect_read_prepare(body, &ptr);
@@ -117,17 +149,9 @@ buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fas
        ptr = buf->bb.ptr;
        copied = 1;
      }
-    end = ptr + len;
+    end = ptr + buck_len;
  
-    byte *start = ptr;
      ptr = decode_attributes(ptr, end, o_hdr, 0);               // header
-    if (body_start)
-    {
-      *body_start = ptr - start;
-      if (!copied)
-       bdirect_read_commit(body, ptr);
-      return 0;
-    }
      if (buck_type == BUCKET_TYPE_V33_LIZARD)           // decompression
      {
        if (ptr + 8 > end)
@@ -154,7 +178,10 @@ buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fas
        bdirect_read_commit_modified(body, ptr);
    }
    else
-    RET_ERR(EINVAL);
+    {
+      bskip(body, buck_len);
+      RET_ERR(EINVAL);
+    }
    return 0;
  }
  
@@ -181,13 +208,3 @@ obj_read(struct fastbuf *f, struct odes *o)
      }
    return 0;
  }
-
-void
-obj_read_multi(struct fastbuf *f, struct odes *o)
-{
-  /* Read a multi-part object ending with either EOF or a NUL character */
-  byte buf[MAX_ATTR_SIZE];
-  while (bpeekc(f) > 0 && bgets(f, buf, sizeof(buf)))
-    if (buf[0])
-      obj_add_attr(o, buf[0], buf+1);
-}