2 * Sherlock Library -- Object Buckets
4 * (c) 2001--2003 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include "lib/bucket.h"
12 #include "lib/fastbuf.h"
23 static unsigned int obuck_remains, obuck_check_pad;
24 static struct fastbuf *obuck_fb;
25 static struct obuck_header obuck_hdr;
26 static sh_off_t bucket_start, bucket_current;
28 /*** Configuration ***/
30 byte *obuck_name = "not/configured";
31 static uns obuck_io_buflen = 65536;
32 static int obuck_shake_buflen = 1048576;
33 static uns obuck_slurp_buflen = 65536;
35 static struct cfitem obuck_config[] = {
36 { "Buckets", CT_SECTION, NULL },
37 { "BucketFile", CT_STRING, &obuck_name },
38 { "BufSize", CT_INT, &obuck_io_buflen },
39 { "ShakeBufSize", CT_INT, &obuck_shake_buflen },
40 { "SlurpBufSize", CT_INT, &obuck_slurp_buflen },
41 { NULL, CT_STOP, NULL }
44 static void CONSTRUCTOR obuck_init_config(void)
46 cf_register(obuck_config);
49 /*** Internal operations ***/
52 obuck_broken(char *msg)
54 die("Object pool corrupted: %s (pos=%Lx)", msg, (long long) bucket_start);
58 * Unfortunately we cannot use flock() here since it happily permits
59 * locking a shared fd (e.g., after fork()) multiple times. The fcntl
60 * locks are very ugly and they don't support 64-bit offsets, but we
61 * can work around the problem by always locking the first header
66 obuck_do_lock(int type)
71 fl.l_whence = SEEK_SET;
73 fl.l_len = sizeof(struct obuck_header);
74 if (fcntl(obuck_fd, F_SETLKW, &fl) < 0)
75 die("fcntl lock: %m");
81 obuck_do_lock(F_RDLCK);
85 obuck_lock_write(void)
87 obuck_do_lock(F_WRLCK);
93 obuck_do_lock(F_UNLCK);
96 /*** FastIO emulation ***/
98 /* We need to use pread/pwrite since we work on fd's shared between processes */
101 obuck_fb_refill(struct fastbuf *f)
103 unsigned limit = (obuck_io_buflen < obuck_remains) ? obuck_io_buflen : obuck_remains;
104 unsigned size = (limit == obuck_remains) ? (limit+obuck_check_pad+4) : limit;
109 l = sh_pread(obuck_fd, f->buffer, size, bucket_current);
111 die("Error reading bucket: %m");
112 if ((unsigned) l != size)
113 obuck_broken("Short read");
115 f->bstop = f->buffer + limit;
116 bucket_current += limit;
117 f->pos = bucket_current - bucket_start - sizeof(obuck_hdr);
118 obuck_remains -= limit;
119 if (!obuck_remains) /* Should check the trailer */
121 if (GET_U32(f->buffer + size - 4) != OBUCK_TRAILER)
122 obuck_broken("Missing trailer");
128 obuck_fb_spout(struct fastbuf *f)
130 int l = f->bptr - f->buffer;
135 int z = sh_pwrite(obuck_fd, c, l, bucket_current);
137 die("Error writing bucket: %m");
143 f->pos = bucket_current - bucket_start - sizeof(obuck_hdr);
146 /*** Exported functions ***/
149 obuck_init(int writeable)
154 obuck_fd = sh_open(obuck_name, (writeable ? O_RDWR | O_CREAT : O_RDONLY), 0666);
156 die("Unable to open bucket file %s: %m", obuck_name);
157 obuck_fb = b = xmalloc_zero(sizeof(struct fastbuf) + obuck_io_buflen + OBUCK_ALIGN + 4);
158 b->buffer = (char *)(b+1);
159 b->bptr = b->bstop = b->buffer;
160 b->bufend = b->buffer + obuck_io_buflen;
162 b->refill = obuck_fb_refill;
163 b->spout = obuck_fb_spout;
165 size = sh_seek(obuck_fd, 0, SEEK_END);
168 /* If the bucket pool is not empty, check consistency of its end */
170 bucket_start = size - 4; /* for error reporting */
171 if (sh_pread(obuck_fd, &check, 4, size-4) != 4 ||
172 check != OBUCK_TRAILER)
173 obuck_broken("Missing trailer of last object");
196 struct fastbuf *b = obuck_fb;
198 bucket_start = obuck_get_pos(oid);
200 if (sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_start) != sizeof(obuck_hdr))
201 obuck_broken("Short header read");
202 bucket_current = bucket_start + sizeof(obuck_hdr);
203 if (obuck_hdr.magic != OBUCK_MAGIC)
204 obuck_broken("Missing magic number");
205 if (obuck_hdr.oid == OBUCK_OID_DELETED)
206 obuck_broken("Access to deleted bucket");
207 if (obuck_hdr.oid != oid)
208 obuck_broken("Invalid backlink");
212 obuck_find_by_oid(struct obuck_header *hdrp)
214 oid_t oid = hdrp->oid;
216 ASSERT(oid < OBUCK_OID_FIRST_SPECIAL);
220 memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr));
224 obuck_find_first(struct obuck_header *hdrp, int full)
228 return obuck_find_next(hdrp, full);
232 obuck_find_next(struct obuck_header *hdrp, int full)
235 struct fastbuf *b = obuck_fb;
240 bucket_start = (bucket_start + sizeof(obuck_hdr) + obuck_hdr.length +
241 4 + OBUCK_ALIGN - 1) & ~((sh_off_t)(OBUCK_ALIGN - 1));
244 c = sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_start);
248 if (c != sizeof(obuck_hdr))
249 obuck_broken("Short header read");
250 bucket_current = bucket_start + sizeof(obuck_hdr);
251 if (obuck_hdr.magic != OBUCK_MAGIC)
252 obuck_broken("Missing magic number");
253 if (obuck_hdr.oid != OBUCK_OID_DELETED || full)
255 memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr));
265 obuck_remains = obuck_hdr.length;
266 obuck_check_pad = (OBUCK_ALIGN - sizeof(obuck_hdr) - obuck_hdr.length - 4) & (OBUCK_ALIGN - 1);
271 obuck_fetch_end(struct fastbuf *b UNUSED)
276 obuck_predict_last_oid(void)
278 sh_off_t size = sh_seek(obuck_fd, 0, SEEK_END);
279 return size >> OBUCK_SHIFT;
287 bucket_start = sh_seek(obuck_fd, 0, SEEK_END);
288 if (bucket_start & (OBUCK_ALIGN - 1))
289 obuck_broken("Misaligned file");
290 obuck_hdr.magic = OBUCK_INCOMPLETE_MAGIC;
291 obuck_hdr.oid = bucket_start >> OBUCK_SHIFT;
292 obuck_hdr.length = obuck_hdr.orig_length = 0;
293 bucket_current = bucket_start;
294 bwrite(obuck_fb, &obuck_hdr, sizeof(obuck_hdr));
295 obuck_fb->pos = -sizeof(obuck_hdr);
300 obuck_create_end(struct fastbuf *b UNUSED, struct obuck_header *hdrp)
303 obuck_hdr.magic = OBUCK_MAGIC;
304 obuck_hdr.length = obuck_hdr.orig_length = btell(obuck_fb);
305 pad = (OBUCK_ALIGN - sizeof(obuck_hdr) - obuck_hdr.length - 4) & (OBUCK_ALIGN - 1);
308 bputl(obuck_fb, OBUCK_TRAILER);
310 ASSERT(!(bucket_current & (OBUCK_ALIGN - 1)));
311 sh_pwrite(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_start);
313 memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr));
317 obuck_delete(oid_t oid)
321 obuck_hdr.oid = OBUCK_OID_DELETED;
322 sh_pwrite(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_start);
326 /*** Fast reading of the whole pool ***/
328 static struct fastbuf *obuck_rpf;
331 obuck_slurp_refill(struct fastbuf *f)
337 l = bdirect_read_prepare(obuck_rpf, &f->buffer);
339 obuck_broken("Incomplete object");
340 l = MIN(l, obuck_remains);
341 bdirect_read_commit(obuck_rpf, f->buffer + l);
344 f->bufend = f->bstop = f->buffer + l;
349 obuck_slurp_pool(struct obuck_header *hdrp)
351 static struct fastbuf limiter;
359 obuck_rpf = bopen(obuck_name, O_RDONLY, obuck_slurp_buflen);
363 bsetpos(obuck_rpf, bucket_current - 4);
364 if (bgetl(obuck_rpf) != OBUCK_TRAILER)
365 obuck_broken("Missing trailer");
367 bucket_start = btell(obuck_rpf);
368 l = bread(obuck_rpf, hdrp, sizeof(struct obuck_header));
376 if (l != sizeof(struct obuck_header))
377 obuck_broken("Short header read");
378 if (hdrp->magic != OBUCK_MAGIC)
379 obuck_broken("Missing magic number");
380 bucket_current = (bucket_start + sizeof(obuck_hdr) + hdrp->length +
381 4 + OBUCK_ALIGN - 1) & ~((sh_off_t)(OBUCK_ALIGN - 1));
383 while (hdrp->oid == OBUCK_OID_DELETED);
384 if (obuck_get_pos(hdrp->oid) != bucket_start)
385 obuck_broken("Invalid backlink");
386 obuck_remains = hdrp->length;
387 limiter.bptr = limiter.bstop = limiter.buffer = limiter.bufend = NULL;
388 limiter.name = "Bucket";
390 limiter.refill = obuck_slurp_refill;
397 obuck_shakedown(int (*kibitz)(struct obuck_header *old, oid_t new, byte *buck))
399 byte *rbuf, *wbuf, *msg;
400 sh_off_t rstart, wstart, w_bucket_start;
401 int roff, woff, rsize, l;
402 struct obuck_header *rhdr, *whdr;
404 rbuf = xmalloc(obuck_shake_buflen);
405 wbuf = xmalloc(obuck_shake_buflen);
407 roff = woff = rsize = 0;
409 /* We need to be the only accessor, all the object ID's are becoming invalid */
414 bucket_start = rstart + roff;
415 w_bucket_start = wstart + woff;
416 if (rsize - roff < OBUCK_ALIGN)
418 rhdr = (struct obuck_header *)(rbuf + roff);
419 if (rhdr->magic != OBUCK_MAGIC ||
420 rhdr->oid != OBUCK_OID_DELETED && rhdr->oid != (oid_t)(bucket_start >> OBUCK_SHIFT))
422 msg = "header mismatch";
425 l = (sizeof(struct obuck_header) + rhdr->length + 4 + OBUCK_ALIGN - 1) & ~(OBUCK_ALIGN-1);
426 if (l > obuck_shake_buflen)
428 if (rhdr->oid != OBUCK_OID_DELETED)
430 msg = "bucket longer than ShakeBufSize";
433 rstart = bucket_start + l;
438 if (rsize - roff < l)
440 if (GET_U32(rbuf + roff + l - 4) != OBUCK_TRAILER)
442 msg = "missing trailer";
445 if (rhdr->oid != OBUCK_OID_DELETED)
447 if (kibitz(rhdr, w_bucket_start >> OBUCK_SHIFT, (byte *)(rhdr+1)))
449 if (bucket_start == w_bucket_start)
451 /* No copying needed now nor ever in the past, hence woff==0 */
456 if (obuck_shake_buflen - woff < l)
458 if (sh_pwrite(obuck_fd, wbuf, woff, wstart) != woff)
459 die("obuck_shakedown write failed: %m");
463 whdr = (struct obuck_header *)(wbuf+woff);
464 memcpy(whdr, rhdr, l);
465 whdr->oid = w_bucket_start >> OBUCK_SHIFT;
471 kibitz(rhdr, OBUCK_OID_DELETED, NULL);
478 memmove(rbuf, rbuf+roff, rsize-roff);
483 l = sh_pread(obuck_fd, rbuf+rsize, obuck_shake_buflen-rsize, rstart+rsize);
485 die("obuck_shakedown read error: %m");
490 msg = "unexpected EOF";
497 if (sh_pwrite(obuck_fd, wbuf, woff, wstart) != woff)
498 die("obuck_shakedown write failed: %m");
501 sh_ftruncate(obuck_fd, wstart);
509 log(L_ERROR, "Error during object pool shakedown: %s (pos=%Ld, id=%x), gathering debris", msg, (long long) bucket_start, (uns)(bucket_start >> OBUCK_SHIFT));
512 sh_pwrite(obuck_fd, wbuf, woff, wstart);
515 while (wstart + OBUCK_ALIGN <= bucket_start)
517 u32 check = OBUCK_TRAILER;
518 obuck_hdr.magic = OBUCK_MAGIC;
519 obuck_hdr.oid = OBUCK_OID_DELETED;
520 if (bucket_start - wstart < 0x40000000)
521 obuck_hdr.length = bucket_start - wstart - sizeof(obuck_hdr) - 4;
523 obuck_hdr.length = 0x40000000 - sizeof(obuck_hdr) - 4;
524 obuck_hdr.orig_length = obuck_hdr.length;
525 sh_pwrite(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), wstart);
526 wstart += sizeof(obuck_hdr) + obuck_hdr.length + 4;
527 sh_pwrite(obuck_fd, &check, 4, wstart-4);
529 die("Fatal error during object pool shakedown");
539 #define LEN(i) ((259309*(i))%MAXLEN)
541 int main(int argc, char **argv)
544 unsigned int i, j, cnt;
545 struct obuck_header h;
549 if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
552 fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
558 for(j=0; j<COUNT; j++)
561 for(i=0; i<LEN(j); i++)
562 bputc(b, (i+j) % 256);
563 obuck_create_end(b, &h);
564 printf("Writing %08x %d -> %d\n", h.oid, h.orig_length, h.length);
567 for(j=0; j<COUNT; j++)
568 if (j % 100 < KILLPERC)
570 printf("Deleting %08x\n", ids[j]);
571 obuck_delete(ids[j]);
574 for(j=0; j<COUNT; j++)
575 if (j % 100 >= KILLPERC)
579 obuck_find_by_oid(&h);
581 printf("Reading %08x %d -> %d\n", h.oid, h.orig_length, h.length);
582 if (h.orig_length != LEN(j))
583 die("Invalid length");
584 for(i=0; i<h.orig_length; i++)
585 if ((unsigned) bgetc(b) != (i+j) % 256)
586 die("Contents mismatch");
591 if (obuck_find_first(&h, 0))
594 printf("<<< %08x\t%d\n", h.oid, h.orig_length);
597 while (obuck_find_next(&h, 0));
599 die("Walk mismatch");