sherlock/buck2obj.c

   1 /*
   2  *      Sherlock Library -- Generating Objects from Buckets
   3  *
   4  *      (c) 2004, Robert Spalek <robert@ucw.cz>
   5  *      (c) 2004--2006, Martin Mares <mj@ucw.cz>
   6  *
   7  *      This software may be freely distributed and used according to the terms
   8  *      of the GNU Lesser General Public License.
   9  */
  10
  11 #undef LOCAL_DEBUG
  12
  13 #include "sherlock/sherlock.h"
  14 #include "ucw/unaligned.h"
  15 #include "ucw/mempool.h"
  16 #include "ucw/fastbuf.h"
  17 #include "ucw/unicode.h"
  18 #include "sherlock/object.h"
  19 #include "sherlock/objread.h"
  20 #include "ucw/lizard.h"
  21 #include "ucw/bbuf.h"
  22 #include "ucw/ff-unicode.h"
  23
  24 #include <errno.h>
  25 #include <unistd.h>
  26
  27 #define RET_ERR(num)    ({ errno = num; return -1; })
  28
  29 struct buck2obj_buf
  30 {
  31   bb_t bb;
  32   struct lizard_buffer *lizard;
  33 };
  34
  35 static uns get_attr_type;
  36
  37 void
  38 get_attr_set_type(uns type)
  39 {
  40   if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD)
  41     die("Unknown buckettype %x", type);
  42   get_attr_type = type;
  43 }
  44
  45 int
  46 get_attr(byte **pos, byte *end, struct parsed_attr *attr)
  47 {
  48   byte *ptr = *pos;
  49   if (ptr >= end)
  50     return -1;
  51   if (get_attr_type < BUCKET_TYPE_V33)
  52   {
  53     if (get_attr_type == BUCKET_TYPE_PLAIN)
  54     {
  55       while (ptr < end && *ptr == '\n')
  56         ptr++;
  57       *pos = ptr;
  58       if (ptr >= end)
  59         return -1;
  60     }
  61     else if (*ptr == '\n')
  62     {
  63       *pos = ++ptr;
  64       attr->attr = 0;
  65       return 0;
  66     }
  67     attr->attr = *ptr++;
  68     attr->val = ptr;
  69     while (ptr < end && *ptr != '\n')
  70       ptr++;
  71     attr->len = ptr++ - attr->val;
  72   }
  73   else
  74   {
  75     uns len;
  76     ptr = utf8_32_get(ptr, &len);
  77     if (!len--)
  78     {
  79       *pos = ptr;
  80       attr->attr = 0;
  81       return 0;
  82     }
  83     attr->attr = ptr[len];
  84     attr->val = ptr;
  85     attr->len = len;
  86     ptr += len+1;
  87   }
  88   if (ptr > end)
  89     die("Incomplete attribute %c", attr->attr);
  90   *pos = ptr;
  91   return attr->attr;
  92 }
  93
  94 int
  95 bget_attr(struct fastbuf *b, struct parsed_attr *attr)
  96 {
  97   static bb_t buf;
  98   if (get_attr_type < BUCKET_TYPE_V33)
  99   {
 100     int c = bgetc(b);
 101     if (c < 0)
 102       return -1;
 103     if (get_attr_type == BUCKET_TYPE_PLAIN)
 104     {
 105       while (c == '\n')
 106         c = bgetc(b);
 107       if (c < 0)
 108         return -1;
 109     }
 110     else if (c == '\n')
 111     {
 112       attr->attr = 0;
 113       return 0;
 114     }
 115     attr->attr = c;
 116
 117     byte *ptr, *end;
 118     uns len = bdirect_read_prepare(b, &ptr);
 119     end = ptr + len;
 120     attr->val = ptr;
 121     while (ptr < end && *ptr != '\n')
 122       ptr++;
 123     if (ptr < end)
 124     {
 125       bdirect_read_commit(b, ptr+1);
 126       attr->len = ptr - attr->val;
 127       return attr->attr;
 128     }
 129
 130     len = 0;
 131     c = bgetc(b);
 132     while (c >= 0 && c != '\n')
 133     {
 134       bb_grow(&buf, len+1);
 135       buf.ptr[len++] = c;
 136       c = bgetc(b);
 137     }
 138     if (c < 0)
 139       die("Incomplete attribute %c", attr->attr);
 140     attr->val = buf.ptr;
 141     attr->len = len;
 142   }
 143   else
 144   {
 145     int len = bget_utf8_32(b);
 146     if (len < 0)
 147       return -1;
 148     if (!len)
 149     {
 150       attr->attr = 0;
 151       return 0;
 152     }
 153     attr->len = len-1;
 154
 155     byte *ptr;
 156     int avail = bdirect_read_prepare(b, &ptr);
 157     if (avail >= len)
 158     {
 159       attr->val = ptr;
 160       attr->attr = ptr[len-1];
 161       bdirect_read_commit(b, ptr + len);
 162       return attr->attr;
 163     }
 164     bb_grow(&buf, --len);
 165     breadb(b, buf.ptr, len);
 166     attr->val = buf.ptr;
 167     attr->len = len;
 168     attr->attr = bgetc(b);
 169     if (attr->attr < 0)
 170       die("Incomplete attribute %c", attr->attr);
 171   }
 172   return attr->attr;
 173 }
 174
 175 void
 176 copy_parsed_attr(struct mempool *pool, struct parsed_attr *attr)
 177 {
 178   byte *b = mp_alloc_fast_noalign(pool, attr->len+1);
 179   memcpy(b, attr->val, attr->len);
 180   b[attr->len] = 0;
 181   attr->val = b;
 182 }
 183
 184 struct buck2obj_buf *
 185 buck2obj_alloc(void)
 186 {
 187   struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
 188   bb_init(&buf->bb);
 189   buf->lizard = lizard_alloc();
 190   return buf;
 191 }
 192
 193 void
 194 buck2obj_free(struct buck2obj_buf *buf)
 195 {
 196   lizard_free(buf->lizard);
 197   bb_done(&buf->bb);
 198   xfree(buf);
 199 }
 200
 201 static inline byte *
 202 decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
 203 {
 204   struct obj_read_state st;
 205   obj_read_start(&st, o);
 206
 207   if (can_overwrite >= 2)
 208     while (ptr < end)
 209     {
 210       uns len;
 211       ptr = utf8_32_get(ptr, &len);
 212       if (!len--)
 213         break;
 214       byte type = ptr[len];
 215
 216       ptr[len] = 0;
 217       obj_read_attr_ref(&st, type, ptr);
 218
 219       ptr += len + 1;
 220     }
 221   else
 222     while (ptr < end)
 223     {
 224       uns len;
 225       ptr = utf8_32_get(ptr, &len);
 226       if (!len--)
 227         break;
 228       byte type = ptr[len];
 229
 230       byte *dup = mp_alloc_fast_noalign(o->pool, len+1);
 231       memcpy(dup, ptr, len);
 232       dup[len] = 0;
 233       obj_read_attr_ref(&st, type, dup);
 234
 235       ptr += len + 1;
 236     }
 237   obj_read_end(&st);
 238   return ptr;
 239 }
 240
 241 int
 242 buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body,
 243                struct odes *o_hdr, uns *body_start, struct odes *o_body,
 244                uns allow_zero_copy)
 245 {
 246   struct obj_read_state st;
 247   if (buck_type <= BUCKET_TYPE_PLAIN)
 248   {
 249     if (body_start)                     // there is no header part
 250       *body_start = 0;
 251     obj_read_start(&st, o_hdr);
 252     byte *b;
 253     // ignore empty lines and read until the end of the bucket
 254     ucw_off_t end = btell(body) + buck_len;
 255     while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
 256       if ((b = buf->bb.ptr)[0])
 257         obj_read_attr(&st, b[0], b+1);
 258     ASSERT(btell(body) == end);
 259     obj_read_end(&st);
 260   }
 261   else if (buck_type == BUCKET_TYPE_V30)
 262   {
 263     ucw_off_t start = btell(body);
 264     ucw_off_t end = start + buck_len;
 265     byte *b;
 266     struct obj_read_state st;
 267     obj_read_start(&st, o_hdr);
 268     while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U) && (b = buf->bb.ptr)[0])
 269       obj_read_attr(&st, b[0], b+1);
 270     obj_read_end(&st);
 271     if (body_start)
 272       *body_start = btell(body) - start;
 273     else
 274     {
 275       obj_read_start(&st, o_body);
 276       while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
 277         if ((b = buf->bb.ptr)[0])
 278           obj_read_attr(&st, b[0], b+1);
 279       ASSERT(btell(body) == end);
 280       obj_read_end(&st);
 281     }
 282   }
 283   else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
 284   {
 285     /* Avoid reading the whole bucket if only its header is needed.  */
 286     if (body_start)
 287     {
 288       ucw_off_t start = btell(body);
 289       ucw_off_t end = start + buck_len;
 290       obj_read_start(&st, o_hdr);
 291       while (btell(body) < end)
 292       {
 293         uns len = bget_utf8_32(body);
 294         if (!len)
 295           break;
 296         byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
 297         bread(body, buf, len);
 298         uns type = buf[--len];
 299         buf[len] = 0;
 300         obj_read_attr_ref(&st, type, buf);
 301       }
 302       obj_read_end(&st);
 303       *body_start = btell(body) - start;
 304       return 0;
 305     }
 306
 307     /* Read all the bucket into 1 buffer, 0-copy if possible.  */
 308     byte *ptr, *end;
 309     uns len = bdirect_read_prepare(body, &ptr);
 310     uns copied = 0;
 311     if (len < buck_len ||
 312         ((body->can_overwrite_buffer < 2 || !allow_zero_copy) && buck_type == BUCKET_TYPE_V33))
 313     {
 314       /* Copy if the original buffer is too small.
 315        * If it is write-protected, copy it also if it is uncompressed.  */
 316       DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type);
 317       bb_grow(&buf->bb, buck_len);
 318       len = bread(body, buf->bb.ptr, buck_len);
 319       ptr = buf->bb.ptr;
 320       copied = 1;
 321     }
 322     else
 323       DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type);
 324     end = ptr + buck_len;
 325
 326     ptr = decode_attributes(ptr, end, o_hdr, 0);                // header
 327     if (buck_type == BUCKET_TYPE_V33_LIZARD)            // decompression
 328     {
 329       if (ptr + 8 > end)
 330         {
 331           if (ptr == end)                               // truncated bucket
 332             goto commit;
 333           RET_ERR(EINVAL);
 334         }
 335       len = GET_U32(ptr);
 336       ptr += 4;
 337       uns adler = GET_U32(ptr);
 338       ptr += 4;
 339       byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
 340       if (!new_ptr)
 341         return -1;
 342       if (adler32(new_ptr, len) != adler)
 343         RET_ERR(EINVAL);
 344       if (!copied)
 345         bdirect_read_commit(body, end);
 346       ptr = new_ptr;
 347       end = ptr + len;
 348       copied = 1;
 349     }
 350     ptr = decode_attributes(ptr, end, o_body, 2);       // body
 351     if (ptr != end)
 352       RET_ERR(EINVAL);
 353   commit:
 354     if (!copied)
 355       bdirect_read_commit_modified(body, ptr);
 356   }
 357   else
 358     {
 359       bskip(body, buck_len);
 360       RET_ERR(EINVAL);
 361     }
 362   return 0;
 363 }
 364
 365 struct odes *
 366 obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body,
 367                 uns *body_start, uns allow_zero_copy)
 368 {
 369   struct odes *o = obj_new(pool);
 370   if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o, allow_zero_copy) < 0)
 371     return NULL;
 372   else
 373     return o;
 374 }
 375
 376 static int
 377 obj_read_line(struct fastbuf *f, struct obj_read_state *st)
 378 {
 379   byte *buf = bgets_stk(f);
 380   if (buf)
 381     {
 382       if (!buf[0])
 383         return 1;
 384       obj_read_attr(st, buf[0], buf+1);
 385       return -1;
 386     }
 387   else
 388     return 0;
 389 }
 390
 391 int
 392 obj_read(struct fastbuf *f, struct odes *o)
 393 {
 394   struct obj_read_state st;
 395   int rc = 0;
 396   obj_read_start(&st, o);
 397   while ((rc = obj_read_line(f, &st)) < 0);
 398   obj_read_end(&st);
 399   return rc;
 400 }
 401
 402 void
 403 default_obj_read_error(struct obj_read_state *st UNUSED, char *err)
 404 {
 405   msg(L_ERROR, "%s", err);
 406 }