From: Martin Mares Date: Sat, 11 Dec 2004 11:30:41 +0000 (+0000) Subject: Split library functions to libsh and libucw. X-Git-Tag: holmes-import~856 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=fc7fa4d600551c44261e8faf6fc4caaa5d459851;p=libucw.git Split library functions to libsh and libucw. --- diff --git a/lib/Makefile b/lib/Makefile index dcd7e6ff..3b41f8c1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,9 +1,9 @@ -# Makefile for the Sherlock Library (c) 1997--2004 Martin Mares +# Makefile for the UCW Library (c) 1997--2004 Martin Mares DIRS+=lib -PROGS+=obj/lib/db-tool obj/lib/buckettool +PROGS+=obj/lib/db-tool -LIBSH_MODS= \ +LIBUCW_MODS= \ alloc alloc_str realloc mempool mempool-str \ mmap pagecache partmap hashfunc \ lists sorter bitsig \ @@ -14,10 +14,9 @@ LIBSH_MODS= \ fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer \ str_ctype str_upper str_lower unicode-utf8 \ wildmatch wordsplit ctmatch patimatch patmatch regex \ - bucket object buck2obj obj2buck \ prime random timer log2 randomkey \ db \ - url urlkey finger \ + url \ mainloop exitstatus runcmd sighandler \ lizard lizard-safe lizard-fb adler32 \ md5 md5hex \ @@ -28,26 +27,26 @@ ifdef CONFIG_OWN_REGEX include lib/regex/Makefile endif -LIBSH_MOD_PATHS=$(addprefix obj/lib/,$(LIBSH_MODS)) $(CUSTOM_LIB_MODULES) +LIBUCW=obj/lib/libucw.$(LS) +LIBUCW_MOD_PATHS=$(addprefix obj/lib/,$(LIBUCW_MODS)) $(CUSTOM_LIB_MODULES) -obj/lib/libsh.a: $(addsuffix .o,$(LIBSH_MOD_PATHS)) -obj/lib/libsh.so: $(addsuffix .oo,$(LIBSH_MOD_PATHS)) +obj/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) +obj/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) obj/lib/hashfunc.o obj/lib/hashfunc.oo: CFLAGS += -funroll-loops obj/lib/lizard.o: CFLAGS += -O6 -funroll-loops -obj/lib/db-test: obj/lib/db-test.o $(LIBSH) -obj/lib/db-tool: obj/lib/db-tool.o $(LIBSH) -obj/lib/buckettool: obj/lib/buckettool.o $(LIBSH) -obj/lib/conf-test: obj/lib/conf-test.o $(LIBSH) -obj/lib/sort-test: obj/lib/sort-test.o $(LIBSH) -obj/lib/lfs-test: obj/lib/lfs-test.o $(LIBSH) -obj/lib/hash-test: obj/lib/hash-test.o $(LIBSH) -obj/lib/str-test: obj/lib/str-test.o $(LIBSH) -obj/lib/asort-test: obj/lib/asort-test.o $(LIBSH) -obj/lib/redblack-test: obj/lib/redblack-test.o $(LIBSH) -obj/lib/binheap-test: obj/lib/binheap-test.o $(LIBSH) -obj/lib/lizard-test: obj/lib/lizard-test.o $(LIBSH) +obj/lib/db-test: obj/lib/db-test.o $(LIBUCW) +obj/lib/db-tool: obj/lib/db-tool.o $(LIBUCW) +obj/lib/conf-test: obj/lib/conf-test.o $(LIBUCW) +obj/lib/sort-test: obj/lib/sort-test.o $(LIBUCW) +obj/lib/lfs-test: obj/lib/lfs-test.o $(LIBUCW) +obj/lib/hash-test: obj/lib/hash-test.o $(LIBUCW) +obj/lib/str-test: obj/lib/str-test.o $(LIBUCW) +obj/lib/asort-test: obj/lib/asort-test.o $(LIBUCW) +obj/lib/redblack-test: obj/lib/redblack-test.o $(LIBUCW) +obj/lib/binheap-test: obj/lib/binheap-test.o $(LIBUCW) +obj/lib/lizard-test: obj/lib/lizard-test.o $(LIBUCW) TESTS+=$(addprefix obj/lib/,regex.test unicode-utf8.test hash-test.test) obj/lib/regex.test: obj/lib/regex-t diff --git a/lib/buck2obj.c b/lib/buck2obj.c deleted file mode 100644 index dacdeba4..00000000 --- a/lib/buck2obj.c +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Generating Objects from Buckets - * - * (c) 2004, Robert Spalek - * (c) 2004, Martin Mares - */ - -#undef LOCAL_DEBUG - -#include "lib/lib.h" -#include "lib/unaligned.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" -#include "lib/unicode.h" -#include "lib/object.h" -#include "lib/bucket.h" -#include "lib/lizard.h" -#include "lib/bbuf.h" -#include "lib/ff-utf8.h" - -#include -#include -#include - -#define RET_ERR(num) ({ errno = num; return -1; }) - -struct buck2obj_buf -{ - bb_t bb; - struct lizard_buffer *lizard; -}; - -static uns get_attr_type; - -void -get_attr_set_type(uns type) -{ - if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD) - die("Unknown buckettype %x", type); - get_attr_type = type; -} - -int -get_attr(byte **pos, byte *end, struct parsed_attr *attr) -{ - byte *ptr = *pos; - if (ptr >= end) - return -1; - if (get_attr_type < BUCKET_TYPE_V33) - { - if (get_attr_type == BUCKET_TYPE_PLAIN) - { - while (ptr < end && *ptr == '\n') - ptr++; - *pos = ptr; - if (ptr >= end) - return -1; - } - else if (*ptr == '\n') - { - *pos = ++ptr; - attr->attr = 0; - return 0; - } - attr->attr = *ptr++; - attr->val = ptr; - while (ptr < end && *ptr != '\n') - ptr++; - attr->len = ptr++ - attr->val; - } - else - { - uns len; - GET_UTF8_32(ptr, len); - if (!len--) - { - *pos = ptr; - attr->attr = 0; - return 0; - } - attr->attr = ptr[len]; - attr->val = ptr; - attr->len = len; - ptr += len+1; - } - if (ptr > end) - die("Incomplete attribute %c", attr->attr); - *pos = ptr; - return attr->attr; -} - -int -bget_attr(struct fastbuf *b, struct parsed_attr *attr) -{ - static bb_t buf; - if (get_attr_type < BUCKET_TYPE_V33) - { - int c = bgetc(b); - if (c < 0) - return -1; - if (get_attr_type == BUCKET_TYPE_PLAIN) - { - while (c == '\n') - c = bgetc(b); - if (c < 0) - return -1; - } - else if (c == '\n') - { - attr->attr = 0; - return 0; - } - attr->attr = c; - - byte *ptr, *end; - uns len = bdirect_read_prepare(b, &ptr); - end = ptr + len; - attr->val = ptr; - while (ptr < end && *ptr != '\n') - ptr++; - if (ptr < end) - { - bdirect_read_commit(b, ptr+1); - attr->len = ptr - attr->val; - return attr->attr; - } - - len = 0; - c = bgetc(b); - while (c >= 0 && c != '\n') - { - bb_grow(&buf, len+1); - buf.ptr[len++] = c; - c = bgetc(b); - } - if (c < 0) - die("Incomplete attribute %c", attr->attr); - attr->val = buf.ptr; - attr->len = len; - } - else - { - int len = bget_utf8_32(b); - if (len < 0) - return -1; - if (!len) - { - attr->attr = 0; - return 0; - } - attr->len = len-1; - - byte *ptr; - int avail = bdirect_read_prepare(b, &ptr); - if (avail >= len) - { - attr->val = ptr; - attr->attr = ptr[len-1]; - bdirect_read_commit(b, ptr + len); - return attr->attr; - } - bb_grow(&buf, --len); - breadb(b, buf.ptr, len); - attr->val = buf.ptr; - attr->len = len; - attr->attr = bgetc(b); - if (attr->attr < 0) - die("Incomplete attribute %c", attr->attr); - } - return attr->attr; -} - -struct buck2obj_buf * -buck2obj_alloc(void) -{ - struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf)); - bb_init(&buf->bb); - buf->lizard = lizard_alloc(); - return buf; -} - -void -buck2obj_free(struct buck2obj_buf *buf) -{ - lizard_free(buf->lizard); - bb_done(&buf->bb); - xfree(buf); -} - -static inline byte * -decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite) -{ - if (can_overwrite >= 2) - while (ptr < end) - { - uns len; - GET_UTF8_32(ptr, len); - if (!len--) - break; - byte type = ptr[len]; - - ptr[len] = 0; - obj_add_attr_ref(o, type, ptr); - - ptr += len + 1; - } - else - while (ptr < end) - { - uns len; - GET_UTF8_32(ptr, len); - if (!len--) - break; - byte type = ptr[len]; - - byte *dup = mp_alloc_fast_noalign(o->pool, len+1); - memcpy(dup, ptr, len); - dup[len] = 0; - obj_add_attr_ref(o, type, dup); - - ptr += len + 1; - } - return ptr; -} - -int -buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body) -{ - if (buck_type <= BUCKET_TYPE_PLAIN) - { - if (body_start) // there is no header part - *body_start = 0; - // ignore empty lines and read until the end of the bucket - sh_off_t end = btell(body) + buck_len; - byte buf[MAX_ATTR_SIZE]; - while (btell(body) < end && bgets(body, buf, sizeof(buf))) - if (buf[0]) - obj_add_attr(o_hdr, buf[0], buf+1); - ASSERT(btell(body) == end); - } - else if (buck_type == BUCKET_TYPE_V30) - { - sh_off_t start = btell(body); - sh_off_t end = start + buck_len; - byte buf[MAX_ATTR_SIZE]; - while (btell(body) < end && bgets(body, buf, sizeof(buf)) && buf[0]) - obj_add_attr(o_hdr, buf[0], buf+1); - if (body_start) - *body_start = btell(body) - start; - else - { - while (btell(body) < end && bgets(body, buf, sizeof(buf))) - if (buf[0]) - obj_add_attr(o_body, buf[0], buf+1); - ASSERT(btell(body) == end); - } - } - else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD) - { - /* Avoid reading the whole bucket if only its header is needed. */ - if (body_start) - { - sh_off_t start = btell(body); - sh_off_t end = start + buck_len; - while (btell(body) < end) - { - uns len = bget_utf8_32(body); - if (!len) - break; - byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len); - bread(body, buf, len); - uns type = buf[--len]; - buf[len] = 0; - obj_add_attr_ref(o_hdr, type, buf); - } - *body_start = btell(body) - start; - return 0; - } - - /* Read all the bucket into 1 buffer, 0-copy if possible. */ - byte *ptr, *end; - uns len = bdirect_read_prepare(body, &ptr); - uns copied = 0; - if (len < buck_len - || (body->can_overwrite_buffer < 2 && buck_type == BUCKET_TYPE_V33)) - { - /* Copy if the original buffer is too small. - * If it is write-protected, copy it also if it is uncompressed. */ - DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type); - bb_grow(&buf->bb, buck_len); - len = bread(body, buf->bb.ptr, buck_len); - ptr = buf->bb.ptr; - copied = 1; - } - else - DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type); - end = ptr + buck_len; - - ptr = decode_attributes(ptr, end, o_hdr, 0); // header - if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression - { - if (ptr + 8 > end) - { - if (ptr == end) // truncated bucket - goto commit; - RET_ERR(EINVAL); - } - len = GET_U32(ptr); - ptr += 4; - uns adler = GET_U32(ptr); - ptr += 4; - byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len); - if (!new_ptr) - return -1; - if (adler32(new_ptr, len) != adler) - RET_ERR(EINVAL); - if (!copied) - bdirect_read_commit(body, end); - ptr = new_ptr; - end = ptr + len; - copied = 1; - } - ptr = decode_attributes(ptr, end, o_body, 2); // body - if (ptr != end) - RET_ERR(EINVAL); - commit: - if (!copied) - bdirect_read_commit_modified(body, ptr); - } - else - { - bskip(body, buck_len); - RET_ERR(EINVAL); - } - return 0; -} - -struct odes * -obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start) -{ - struct odes *o = obj_new(pool); - if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o) < 0) - return NULL; - else - return o; -} - -int -obj_read(struct fastbuf *f, struct odes *o) -{ - byte buf[MAX_ATTR_SIZE]; - - while (bgets(f, buf, sizeof(buf))) - { - if (!buf[0]) - return 1; - obj_add_attr(o, buf[0], buf+1); - } - return 0; -} diff --git a/lib/bucket.c b/lib/bucket.c deleted file mode 100644 index 296c7fa8..00000000 --- a/lib/bucket.c +++ /dev/null @@ -1,857 +0,0 @@ -/* - * Sherlock Library -- Object Buckets - * - * (c) 2001--2004 Martin Mares - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#undef LOCAL_DEBUG - -#include "lib/lib.h" -#include "lib/bucket.h" -#include "lib/fastbuf.h" -#include "lib/lfs.h" -#include "lib/conf.h" - -#include -#include -#include -#include -#include -#include - -static int obuck_fd; -static struct obuck_header obuck_hdr, obuck_create_hdr; -static sh_off_t bucket_find_pos; -static struct fastbuf *obuck_write_fb; - -/*** Configuration ***/ - -byte *obuck_name = "not/configured"; -static uns obuck_io_buflen = 65536; -static int obuck_shake_buflen = 1048576; -static uns obuck_shake_security; -static uns obuck_slurp_buflen = 65536; - -static struct cfitem obuck_config[] = { - { "Buckets", CT_SECTION, NULL }, - { "BucketFile", CT_STRING, &obuck_name }, - { "BufSize", CT_INT, &obuck_io_buflen }, - { "ShakeBufSize", CT_INT, &obuck_shake_buflen }, - { "ShakeSecurity", CT_INT, &obuck_shake_security }, - { "SlurpBufSize", CT_INT, &obuck_slurp_buflen }, - { NULL, CT_STOP, NULL } -}; - -static void CONSTRUCTOR obuck_init_config(void) -{ - cf_register(obuck_config); -} - -/*** Internal operations ***/ - -static void -obuck_broken(char *msg, sh_off_t pos) -{ - die("Object pool corrupted: %s (pos=%Lx)", msg, (long long) pos); -} - -/* - * We need several types of locks: - * - * Read lock reading parts of bucket file - * Write lock any write operations - * Append lock appending to the end of the file - * Scan lock reading parts which we are certain they exist - * - * Multiple read and scan locks can co-exist together. - * Scan locks can co-exist with an append lock. - * There can be at most one write/append lock at a time. - * - * These lock types map to a pair of normal read-write locks which - * we represent as fcntl() locks on the first and second byte of the - * bucket file. [We cannot use flock() since it happily permits - * locking a shared fd (e.g., after fork()) multiple times at it also - * doesn't offer multiple locks on a single file.] - * - * byte0 byte1 - * Read - * Write - * Append - - * Scan - - */ - -static inline void -obuck_do_lock(int type, int start, int len) -{ - struct flock fl; - - fl.l_type = type; - fl.l_whence = SEEK_SET; - fl.l_start = start; - fl.l_len = len; - if (fcntl(obuck_fd, F_SETLKW, &fl) < 0) - die("fcntl lock: %m"); -} - -inline void -obuck_lock_read(void) -{ - obuck_do_lock(F_RDLCK, 0, 2); -} - -inline void -obuck_lock_write(void) -{ - obuck_do_lock(F_WRLCK, 0, 2); -} - -static inline void -obuck_lock_append(void) -{ - obuck_do_lock(F_WRLCK, 0, 1); -} - -static inline void -obuck_lock_read_to_scan(void) -{ - obuck_do_lock(F_UNLCK, 0, 1); -} - -inline void -obuck_unlock(void) -{ - obuck_do_lock(F_UNLCK, 0, 2); -} - -/*** FastIO emulation ***/ - -struct fb_bucket { - struct fastbuf fb; - sh_off_t start_pos; - uns bucket_size; - byte buffer[0]; -}; -#define FB_BUCKET(f) ((struct fb_bucket *)(f)->is_fastbuf) - -static int obuck_fb_count; - -static void -obuck_fb_close(struct fastbuf *f) -{ - obuck_fb_count--; - xfree(f); -} - -/* We need to use pread/pwrite since we work on fd's shared between processes */ - -static int -obuck_fb_refill(struct fastbuf *f) -{ - uns remains, bufsize, size, datasize; - - remains = FB_BUCKET(f)->bucket_size - (uns)f->pos; - if (!remains) - return 0; - f->buffer = FB_BUCKET(f)->buffer; /* Could have been trimmed by bdirect_read_commit_modified() */ - bufsize = f->bufend - f->buffer; - sh_off_t start = FB_BUCKET(f)->start_pos; - sh_off_t pos = start + sizeof(struct obuck_header) + f->pos; - if (remains <= bufsize) - { - datasize = remains; - size = start + obuck_bucket_size(FB_BUCKET(f)->bucket_size) - pos; - } - else - size = datasize = bufsize; - int l = sh_pread(obuck_fd, f->buffer, size, pos); - if (l < 0) - die("Error reading bucket: %m"); - if ((unsigned) l != size) - obuck_broken("Short read", FB_BUCKET(f)->start_pos); - f->bptr = f->buffer; - f->bstop = f->buffer + datasize; - f->pos += datasize; - if (datasize < size) - { - if (GET_U32(f->buffer + size - 4) != OBUCK_TRAILER) - obuck_broken("Missing trailer", FB_BUCKET(f)->start_pos); - } - return datasize; -} - -static void -obuck_fb_seek(struct fastbuf *f, sh_off_t pos, int whence) -{ - ASSERT(whence == SEEK_SET || whence == SEEK_END); - if (whence == SEEK_END) - pos += FB_BUCKET(f)->bucket_size; - ASSERT(pos >= 0 && pos <= FB_BUCKET(f)->bucket_size); - f->pos = pos; -} - -static void -obuck_fb_spout(struct fastbuf *f) -{ - int l = f->bptr - f->buffer; - char *c = f->buffer; - - while (l) - { - int z = sh_pwrite(obuck_fd, c, l, FB_BUCKET(f)->start_pos + sizeof(struct obuck_header) + f->pos); - if (z <= 0) - die("Error writing bucket: %m"); - f->pos += z; - l -= z; - c += z; - } - f->bptr = f->buffer; -} - -/*** Exported functions ***/ - -void -obuck_init(int writeable) -{ - sh_off_t size; - - obuck_fd = sh_open(obuck_name, (writeable ? O_RDWR | O_CREAT : O_RDONLY), 0666); - if (obuck_fd < 0) - die("Unable to open bucket file %s: %m", obuck_name); - obuck_lock_read(); - size = sh_seek(obuck_fd, 0, SEEK_END); - if (size) - { - /* If the bucket pool is not empty, check consistency of its end */ - u32 check; - if (sh_pread(obuck_fd, &check, 4, size-4) != 4 || - check != OBUCK_TRAILER) - obuck_broken("Missing trailer of last object", size - 4); - } - obuck_unlock(); -} - -void -obuck_cleanup(void) -{ - close(obuck_fd); - if (obuck_fb_count) - log(L_ERROR, "Bug: Unbalanced bucket opens/closes: %d streams remain", obuck_fb_count); - if (obuck_write_fb) - log(L_ERROR, "Bug: Forgot to close bucket write stream"); -} - -void -obuck_sync(void) -{ - if (obuck_write_fb) - bflush(obuck_write_fb); - fsync(obuck_fd); -} - -static void -obuck_get(oid_t oid) -{ - bucket_find_pos = obuck_get_pos(oid); - if (sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos) != sizeof(obuck_hdr)) - obuck_broken("Short header read", bucket_find_pos); - if (obuck_hdr.magic != OBUCK_MAGIC) - obuck_broken("Missing magic number", bucket_find_pos); - if (obuck_hdr.oid == OBUCK_OID_DELETED) - obuck_broken("Access to deleted bucket", bucket_find_pos); - if (obuck_hdr.oid != oid) - obuck_broken("Invalid backlink", bucket_find_pos); -} - -void -obuck_find_by_oid(struct obuck_header *hdrp) -{ - oid_t oid = hdrp->oid; - - ASSERT(oid < OBUCK_OID_FIRST_SPECIAL); - obuck_lock_read(); - obuck_get(oid); - obuck_unlock(); - memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr)); -} - -int -obuck_find_first(struct obuck_header *hdrp, int full) -{ - bucket_find_pos = 0; - obuck_hdr.magic = 0; - return obuck_find_next(hdrp, full); -} - -int -obuck_find_next(struct obuck_header *hdrp, int full) -{ - int c; - - for(;;) - { - if (obuck_hdr.magic) - bucket_find_pos += obuck_bucket_size(obuck_hdr.length); - obuck_lock_read(); - c = sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos); - obuck_unlock(); - if (!c) - return 0; - if (c != sizeof(obuck_hdr)) - obuck_broken("Short header read", bucket_find_pos); - if (obuck_hdr.magic != OBUCK_MAGIC) - obuck_broken("Missing magic number", bucket_find_pos); - if (obuck_hdr.oid != OBUCK_OID_DELETED || full) - { - memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr)); - return 1; - } - } -} - -struct fastbuf * -obuck_fetch(void) -{ - struct fastbuf *b; - uns official_buflen = ALIGN(MIN(obuck_hdr.length, obuck_io_buflen), OBUCK_ALIGN); - uns real_buflen = official_buflen + OBUCK_ALIGN; - - b = xmalloc(sizeof(struct fb_bucket) + real_buflen); - b->buffer = b->bptr = b->bstop = FB_BUCKET(b)->buffer; - b->bufend = b->buffer + official_buflen; - b->name = "bucket-read"; - b->pos = 0; - b->refill = obuck_fb_refill; - b->spout = NULL; - b->seek = obuck_fb_seek; - b->close = obuck_fb_close; - b->config = NULL; - b->can_overwrite_buffer = 2; - FB_BUCKET(b)->start_pos = bucket_find_pos; - FB_BUCKET(b)->bucket_size = obuck_hdr.length; - obuck_fb_count++; - return b; -} - -oid_t -obuck_predict_last_oid(void) -{ - sh_off_t size = sh_seek(obuck_fd, 0, SEEK_END); - return (oid_t)(size >> OBUCK_SHIFT); -} - -struct fastbuf * -obuck_create(u32 type) -{ - ASSERT(!obuck_write_fb); - - obuck_lock_append(); - sh_off_t start = sh_seek(obuck_fd, 0, SEEK_END); - if (start & (OBUCK_ALIGN - 1)) - obuck_broken("Misaligned file", start); - obuck_create_hdr.magic = OBUCK_INCOMPLETE_MAGIC; - obuck_create_hdr.oid = start >> OBUCK_SHIFT; - obuck_create_hdr.length = 0; - obuck_create_hdr.type = type; - - struct fastbuf *b = xmalloc(sizeof(struct fb_bucket) + obuck_io_buflen); - obuck_write_fb = b; - b->buffer = FB_BUCKET(b)->buffer; - b->bptr = b->bstop = b->buffer; - b->bufend = b->buffer + obuck_io_buflen; - b->pos = -(int)sizeof(obuck_create_hdr); - b->name = "bucket-write"; - b->refill = NULL; - b->spout = obuck_fb_spout; - b->seek = NULL; - b->close = NULL; - b->config = NULL; - b->can_overwrite_buffer = 0; - FB_BUCKET(b)->start_pos = start; - FB_BUCKET(b)->bucket_size = 0; - bwrite(b, &obuck_create_hdr, sizeof(obuck_create_hdr)); - - return b; -} - -void -obuck_create_end(struct fastbuf *b, struct obuck_header *hdrp) -{ - ASSERT(b == obuck_write_fb); - obuck_write_fb = NULL; - - obuck_create_hdr.magic = OBUCK_MAGIC; - obuck_create_hdr.length = btell(b); - int pad = (OBUCK_ALIGN - sizeof(obuck_create_hdr) - obuck_create_hdr.length - 4) & (OBUCK_ALIGN - 1); - while (pad--) - bputc(b, 0); - bputl(b, OBUCK_TRAILER); - bflush(b); - ASSERT(!((FB_BUCKET(b)->start_pos + sizeof(obuck_create_hdr) + b->pos) & (OBUCK_ALIGN - 1))); - if (sh_pwrite(obuck_fd, &obuck_create_hdr, sizeof(obuck_create_hdr), FB_BUCKET(b)->start_pos) != sizeof(obuck_create_hdr)) - die("Bucket header update failed: %m"); - obuck_unlock(); - memcpy(hdrp, &obuck_create_hdr, sizeof(obuck_create_hdr)); - xfree(b); -} - -void -obuck_delete(oid_t oid) -{ - obuck_lock_write(); - obuck_get(oid); - obuck_hdr.oid = OBUCK_OID_DELETED; - sh_pwrite(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos); - obuck_unlock(); -} - -/*** Fast reading of the whole pool ***/ - -static struct fastbuf *obuck_rpf; -static uns slurp_remains; -static sh_off_t slurp_start, slurp_current, slurp_end; - -static int -obuck_slurp_refill(struct fastbuf *f) -{ - if (!slurp_remains) - return 0; - uns l = bdirect_read_prepare(obuck_rpf, &f->buffer); - if (!l) - obuck_broken("Incomplete object", slurp_start); - l = MIN(l, slurp_remains); - /* XXX: This probably should be bdirect_read_commit_modified() in some cases, - * but it doesn't hurt since we aren't going to seek. - */ - bdirect_read_commit(obuck_rpf, f->buffer + l); - slurp_remains -= l; - f->bptr = f->buffer; - f->bufend = f->bstop = f->buffer + l; - f->pos += l; - return 1; -} - -void -obuck_slurp_end(void) -{ - if (obuck_rpf) - { - bclose(obuck_rpf); - obuck_rpf = NULL; - obuck_unlock(); - } -} - -struct fastbuf * -obuck_slurp_pool(struct obuck_header *hdrp, oid_t next_oid) -{ - static struct fastbuf limiter; - uns l; - - do - { - if (!obuck_rpf) - { - obuck_lock_read(); - obuck_rpf = bopen(obuck_name, O_RDONLY, obuck_slurp_buflen); - slurp_end = bfilesize(obuck_rpf); - obuck_lock_read_to_scan(); - } - else - { - bsetpos(obuck_rpf, slurp_current - 4); - if (bgetl(obuck_rpf) != OBUCK_TRAILER) - obuck_broken("Missing trailer", slurp_start); - } - if (next_oid == OBUCK_OID_ANY) - slurp_start = btell(obuck_rpf); - else - { - slurp_start = obuck_get_pos(next_oid); - bsetpos(obuck_rpf, slurp_start); - } - if (slurp_start < slurp_end) - l = bread(obuck_rpf, hdrp, sizeof(struct obuck_header)); - else - { - obuck_slurp_end(); - return NULL; - } - if (l != sizeof(struct obuck_header)) - obuck_broken("Short header read", slurp_start); - if (hdrp->magic != OBUCK_MAGIC) - obuck_broken("Missing magic number", slurp_start); - slurp_current = slurp_start + obuck_bucket_size(hdrp->length); - } - while (hdrp->oid == OBUCK_OID_DELETED); - if (obuck_get_pos(hdrp->oid) != slurp_start) - obuck_broken("Invalid backlink", slurp_start); - slurp_remains = hdrp->length; - limiter.bptr = limiter.bstop = limiter.buffer = limiter.bufend = NULL; - limiter.name = "Bucket"; - limiter.pos = 0; - limiter.refill = obuck_slurp_refill; - limiter.can_overwrite_buffer = obuck_rpf->can_overwrite_buffer; - return &limiter; -} - -/*** Shakedown ***/ - -static inline void -shake_write(void *addr, int len, sh_off_t pos) -{ - int l = sh_pwrite(obuck_fd, addr, len, pos); - if (l != len) - { - if (l < 0) - die("obuck_shakedown write error: %m"); - else - die("obuck_shakedown write error: disk full"); - } -} - -static inline void -shake_sync(void) -{ - if (obuck_shake_security > 1) - fdatasync(obuck_fd); -} - -static void -shake_write_backup(sh_off_t bpos, byte *norm_buf, int norm_size, byte *fragment, int frag_size, sh_off_t frag_pos, int more_size) -{ - struct obuck_header *bhdr; - int boff = 0; - int l; - oid_t old_oid; - - /* First of all, the "normal" part -- everything that will be written in this pass */ - DBG("Backing up first round of changes at position %Lx + %x", (long long) bpos, norm_size); - while (boff < norm_size) - { - /* This needn't be optimized for speed. */ - bhdr = (struct obuck_header *) (norm_buf + boff); - ASSERT(bhdr->magic == OBUCK_MAGIC); - l = obuck_bucket_size(bhdr->length); - old_oid = bhdr->oid; - bhdr->oid = bpos >> OBUCK_SHIFT; - shake_write(bhdr, l, bpos); - bhdr->oid = old_oid; - boff += l; - bpos += l; - } - - /* If we have an incomplete bucket at the end of the buffer, we must copy it as well. */ - if (more_size) - { - DBG("Backing up fragment of size %x and %x more", frag_size, more_size); - - /* First the part we already have in the buffer */ - bhdr = (struct obuck_header *) fragment; - ASSERT(bhdr->magic == OBUCK_MAGIC); - old_oid = bhdr->oid; - bhdr->oid = bpos >> OBUCK_SHIFT; - shake_write(bhdr, frag_size, bpos); - bhdr->oid = old_oid; - bpos += frag_size; - - /* And then the rest, using a small 64K buffer */ - byte *auxbuf = alloca(65536); - l = 0; - while (l < more_size) - { - int j = MIN(more_size-l, 65536); - if (sh_pread(obuck_fd, auxbuf, j, frag_pos + frag_size + l) != j) - die("obuck_shakedown read error: %m"); - shake_write(auxbuf, j, bpos); - bpos += j; - l += j; - } - } -} - -static void -shake_erase(sh_off_t start, sh_off_t end) -{ - if (start > end) - die("shake_erase called with negative length, that's a bug"); - ASSERT(!(start & (OBUCK_ALIGN-1)) && !(end & (OBUCK_ALIGN-1))); - while (start < end) - { - u32 check = OBUCK_TRAILER; - obuck_hdr.magic = OBUCK_MAGIC; - obuck_hdr.oid = OBUCK_OID_DELETED; - uns len = MIN(0x40000000, end-start); - obuck_hdr.length = len - sizeof(obuck_hdr) - 4; - DBG("Erasing %08x bytes at %Lx", len, (long long) start); - shake_write(&obuck_hdr, sizeof(obuck_hdr), start); - start += len; - shake_write(&check, 4, start-4); - } -} - -void -obuck_shakedown(int (*kibitz)(struct obuck_header *old, oid_t new, byte *buck)) -{ - byte *buf; /* Shakedown buffer and its size */ - int buflen = ALIGN(obuck_shake_buflen, OBUCK_ALIGN); - byte *msg; /* Error message we will print */ - sh_off_t rstart, wstart; /* Original and new position of buffer start */ - sh_off_t r_bucket_start, w_bucket_start; /* Original and new position of the current bucket */ - int roff, woff; /* Orig/new position of the current bucket relative to buffer start */ - int rsize; /* Number of original bytes in the buffer */ - int l; /* Raw size of the current bucket */ - int changed = 0; /* "Something has been altered" flag */ - int wrote_anything = 0; /* We already did a write to the bucket file */ - struct obuck_header *rhdr, *whdr; /* Original and new address of header of the current bucket */ - sh_off_t r_file_size; /* Original size of the bucket file */ - int more; /* How much does the last bucket overlap the buffer */ - - buf = xmalloc(buflen); - rstart = wstart = 0; - roff = woff = rsize = 0; - - /* We need to be the only accessor, all the object ID's are becoming invalid */ - obuck_lock_write(); - r_file_size = sh_seek(obuck_fd, 0, SEEK_END); - ASSERT(!(r_file_size & (OBUCK_ALIGN - 1))); - if (r_file_size >= (0x100000000 << OBUCK_SHIFT) - buflen) - die("Bucket file is too large for safe shakedown. Shaking down with Bucket.ShakeSecurity=0 will still work."); - - DBG("Starting shakedown. Buffer size is %d, original length %Lx", buflen, (long long) r_file_size); - - for(;;) - { - r_bucket_start = rstart + roff; - w_bucket_start = wstart + woff; - rhdr = (struct obuck_header *)(buf + roff); - whdr = (struct obuck_header *)(buf + woff); - if (roff == rsize) - { - more = 0; - goto next; - } - if (rhdr->magic != OBUCK_MAGIC || - rhdr->oid != OBUCK_OID_DELETED && rhdr->oid != (oid_t)(r_bucket_start >> OBUCK_SHIFT)) - { - msg = "header mismatch"; - goto broken; - } - l = obuck_bucket_size(rhdr->length); - if (l > buflen) - { - if (rhdr->oid != OBUCK_OID_DELETED) - { - msg = "bucket longer than ShakeBufSize"; - goto broken; - } - /* Empty buckets are allowed to be large, but we need to handle them extra */ - DBG("Tricking around an extra-large empty bucket at %Lx + %x", (long long)r_bucket_start, l); - rsize = roff + l; - } - else - { - if (rsize - roff < l) - { - more = l - (rsize - roff); - goto next; - } - if (GET_U32((byte *)rhdr + l - 4) != OBUCK_TRAILER) - { - msg = "missing trailer"; - goto broken; - } - } - if (rhdr->oid != OBUCK_OID_DELETED) - { - int status = kibitz(rhdr, w_bucket_start >> OBUCK_SHIFT, (byte *)(rhdr+1)); - if (status) - { - int lnew = l; - if (status > 1) - { - /* Changed! Reconstruct the trailer. */ - lnew = obuck_bucket_size(rhdr->length); - ASSERT(lnew <= l); - PUT_U32((byte *)rhdr + lnew - 4, OBUCK_TRAILER); - changed = 1; - } - whdr = (struct obuck_header *)(buf+woff); - if (rhdr != whdr) - memmove(whdr, rhdr, lnew); - whdr->oid = w_bucket_start >> OBUCK_SHIFT; - woff += lnew; - } - else - changed = 1; - } - else - { - kibitz(rhdr, OBUCK_OID_DELETED, NULL); - changed = 1; - } - roff += l; - continue; - - next: - if (changed) - { - /* Write the new contents of the bucket file */ - if (!wrote_anything) - { - if (obuck_shake_security) - { - /* But first write a backup at the end of the file to ensure nothing can be lost. */ - shake_write_backup(r_file_size, buf, woff, buf+roff, rsize-roff, rstart+roff, more); - shake_sync(); - } - wrote_anything = 1; - } - if (woff) - { - DBG("Write %Lx %x", wstart, woff); - shake_write(buf, woff, wstart); - shake_sync(); - } - } - else - ASSERT(wstart == rstart); - - /* In any case, update the write position */ - wstart += woff; - woff = 0; - - /* Skip what's been read and if there is any fragment at the end of the buffer, move it to the start */ - rstart += roff; - if (more) - { - memmove(buf, buf+roff, rsize-roff); - rsize = rsize-roff; - } - else - rsize = 0; - - /* And refill the buffer */ - r_bucket_start = rstart+rsize; /* Also needed for error messages */ - l = sh_pread(obuck_fd, buf+rsize, MIN(buflen-rsize, r_file_size - r_bucket_start), r_bucket_start); - DBG("Read %Lx %x (%x inherited)", (long long)r_bucket_start, l, rsize); - if (l < 0) - die("obuck_shakedown read error: %m"); - if (!l) - { - if (!more) - break; - msg = "unexpected EOF"; - goto broken; - } - if (l & (OBUCK_ALIGN-1)) - { - msg = "garbage at the end of file"; - goto broken; - } - rsize += l; - roff = 0; - } - - DBG("Finished at position %Lx", (long long) wstart); - sh_ftruncate(obuck_fd, wstart); - shake_sync(); - - obuck_unlock(); - xfree(buf); - return; - - broken: - log(L_ERROR, "Error during object pool shakedown: %s (pos=%Ld, id=%x), gathering debris", - msg, (long long) r_bucket_start, (uns)(r_bucket_start >> OBUCK_SHIFT)); - /* - * We can attempt to clean up the bucket file by erasing everything between the last - * byte written and the next byte to be read. If the secure mode is switched on, we can - * guarantee that no data are lost, only some might be duplicated. - */ - shake_erase(wstart, rstart); - die("Fatal error during object pool shakedown"); -} - -/*** Testing ***/ - -#ifdef TEST - -#define COUNT 5000 -#define MAXLEN 10000 -#define KILLPERC 13 -#define LEN(i) ((259309*(i))%MAXLEN) - -static int test_kibitz(struct obuck_header *h, oid_t new, byte *buck) -{ - return 1; -} - -int main(int argc, char **argv) -{ - int ids[COUNT]; - unsigned int i, j, cnt; - struct obuck_header h; - struct fastbuf *b; - - log_init(NULL); - if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || - optind < argc) - { - fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr); - exit(1); - } - - unlink(obuck_name); - obuck_init(1); - for(j=0; j= KILLPERC) - { - cnt++; - h.oid = ids[j]; - obuck_find_by_oid(&h); - b = obuck_fetch(); - printf("Reading %08x %d\n", h.oid, h.length); - if (h.length != LEN(j)) - die("Invalid length"); - for(i=0; i - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _SHERLOCK_BUCKET_H -#define _SHERLOCK_BUCKET_H - -/* - * Format: The object pool is merely a sequence of object buckets. - * Each bucket starts with struct obuck_header and it's padded - * by zeros to a multiple of OBUCK_ALIGN bytes. - * - * Locking: Each operation on the pool is protected by a flock. - * - * The buckets emulate fastbuf streams. Read streams act as normal files, - * but there can be only one write stream which is non-seekable and you - * also shouldn't open new read streams when writing. - * - * fork()'ing if you don't have any bucket open is safe. - */ - -extern byte *obuck_name; /* Internal, for use by buckettool only! */ - -#define OBUCK_SHIFT 7 -#define OBUCK_ALIGN (1< - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/bucket.h" -#include "lib/fastbuf.h" -#include "lib/lfs.h" -#include "lib/conf.h" -#include "lib/mempool.h" -#include "lib/object.h" -#include "lib/lizard.h" -#include "lib/bbuf.h" -#include "lib/ff-utf8.h" - -#include -#include -#include -#include -#include - -static int verbose; -static struct mempool *pool; -static struct buck2obj_buf *buck_buf; - -static void -help(void) -{ - fprintf(stderr, "\ -Usage: buckettool [] \n\ -\n\ -Options:\n" -CF_USAGE -"\nCommands:\n\ --l\t\tlist all buckets\n\ --L\t\tlist all buckets including deleted ones\n\ --d \tdelete bucket\n\ --x \textract bucket\n\ --i[]\tinsert buckets separated by blank lines\n\ --c\t\tconcatenate and dump all buckets\n\ --f\t\taudit bucket file structure\n\ --F\t\taudit and fix bucket file structure\n\ --q\t\tquick check of bucket file consistency\n\ --r\t\tdo not parse V33 buckets, but print the raw content\n\ --s\t\tshake down bucket file (without updating other structures!!!)\n\ --v\t\tbe verbose\n\ -"); - exit(1); -} - -static oid_t -parse_id(char *c) -{ - char *e; - oid_t o = strtoul(c, &e, 16); - if (e && *e) - die("Invalid object ID: %s", c); - return o; -} - -static void -list(int full) -{ - struct obuck_header h; - - obuck_init(0); - if (obuck_find_first(&h, full)) - do - { - if (h.oid == OBUCK_OID_DELETED) - printf("DELETED %6d\n", h.length); - else - printf("%08x %6d %08x\n", h.oid, h.length, h.type); - } - while (obuck_find_next(&h, full)); - obuck_cleanup(); -} - -static void -delete(char *id) -{ - oid_t oid = parse_id(id); - obuck_init(1); - obuck_delete(oid); - obuck_cleanup(); -} - -static inline void -dump_oattrs(struct fastbuf *out, struct oattr *oa) -{ - for (; oa; oa = oa->next) - for (struct oattr *a=oa; a; a = a->same) - bprintf(out, "%c%s\n", a->attr, a->val); -} - -static void -dump_parsed_bucket(struct fastbuf *out, struct obuck_header *h, struct fastbuf *b) -{ - struct odes *o_hdr, *o_body; - mp_flush(pool); - o_hdr = obj_new(pool); - o_body = obj_new(pool); - if (buck2obj_parse(buck_buf, h->type, h->length, b, o_hdr, NULL, o_body) < 0) - bprintf(out, ".Cannot parse bucket %x of type %x and length %d: %m\n", h->oid, h->type, h->length); - else - { - dump_oattrs(out, o_hdr->attrs); - bputc(out, '\n'); - dump_oattrs(out, o_body->attrs); - } -} - -static void -extract(char *id) -{ - struct fastbuf *b, *out; - struct obuck_header h; - - h.oid = parse_id(id); - obuck_init(0); - obuck_find_by_oid(&h); - out = bfdopen_shared(1, 65536); - if (verbose) - bprintf(out, "### %08x %6d %08x\n", h.oid, h.length, h.type); - b = obuck_fetch(); - if (h.type < BUCKET_TYPE_V33 || !buck_buf) - bbcopy_slow(b, out, ~0U); - else - dump_parsed_bucket(out, &h, b); - bclose(b); - bclose(out); - obuck_cleanup(); -} - -static void -insert(byte *arg) -{ - struct fastbuf *b, *in; - byte buf[4096]; - struct obuck_header h; - byte *e; - u32 type; - bb_t lizard_buf, compressed_buf; - - bb_init(&lizard_buf); - bb_init(&compressed_buf); - if (!arg) - type = BUCKET_TYPE_PLAIN; - else if (sscanf(arg, "%x", &type) != 1) - die("Type `%s' is not a hexadecimal number"); - if (type < 10) - type += BUCKET_TYPE_PLAIN; - put_attr_set_type(type); - - in = bfdopen_shared(0, 4096); - obuck_init(1); - do - { - uns lizard_filled = 0; - uns in_body = 0; - b = NULL; - while ((e = bgets(in, buf, sizeof(buf)))) - { - if (!buf[0]) - { - if (in_body || type < BUCKET_TYPE_V30) - break; - in_body = 1; - } - if (!b) - b = obuck_create(type); - if (in_body == 1) - { - bputc(b, 0); - in_body = 2; - } - else if (type <= BUCKET_TYPE_V33 || !in_body) - { - bput_attr(b, buf[0], buf+1, e-buf-1); - } - else - { - ASSERT(BUCKET_TYPE_V33_LIZARD); - uns want_len = lizard_filled + (e-buf) + 6 + LIZARD_NEEDS_CHARS; // +6 is the maximum UTF-8 length - bb_grow(&lizard_buf, want_len); - byte *ptr = lizard_buf.ptr + lizard_filled; - ptr = put_attr(ptr, buf[0], buf+1, e-buf-1); - lizard_filled = ptr - lizard_buf.ptr; - } - } - if (in_body && type == BUCKET_TYPE_V33_LIZARD) - { - bputl(b, lizard_filled -#if 0 //TEST error resilience: write wrong length - +1 -#endif - ); - bputl(b, adler32(lizard_buf.ptr, lizard_filled) -#if 0 //TEST error resilience: write wrong checksum - +1 -#endif - ); - uns want_len = lizard_filled * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD; - bb_grow(&compressed_buf, want_len); - want_len = lizard_compress(lizard_buf.ptr, lizard_filled, compressed_buf.ptr); -#if 0 //TEST error resilience: tamper the compressed data by removing EOF - compressed_buf[want_len-1] = 1; -#endif - bwrite(b, compressed_buf.ptr, want_len); - } - if (b) - { - obuck_create_end(b, &h); - printf("%08x %d %08x\n", h.oid, h.length, h.type); - } - } - while (e); - bb_done(&lizard_buf); - bb_done(&compressed_buf); - obuck_cleanup(); - bclose(in); -} - -static void -cat(void) -{ - struct obuck_header h; - struct fastbuf *b, *out; - byte buf[1024]; - - obuck_init(0); - out = bfdopen_shared(1, 65536); - while (b = obuck_slurp_pool(&h, OBUCK_OID_ANY)) - { - bprintf(out, "### %08x %6d %08x\n", h.oid, h.length, h.type); - if (h.type < BUCKET_TYPE_V33 || !buck_buf) - { - int lf = 1, l; - while ((l = bread(b, buf, sizeof(buf)))) - { - bwrite(out, buf, l); - lf = (buf[l-1] == '\n'); - } - if (!lf) - bprintf(out, "\n# \n"); - } - else - dump_parsed_bucket(out, &h, b); - bputc(out, '\n'); - } - bclose(out); - obuck_cleanup(); -} - -static void -fsck(int fix) -{ - int fd, i; - struct obuck_header h, nh; - sh_off_t pos = 0; - sh_off_t end; - oid_t oid; - u32 chk; - int errors = 0; - int fatal_errors = 0; - - fd = sh_open(obuck_name, O_RDWR); - if (fd < 0) - die("Unable to open the bucket file %s: %m", obuck_name); - for(;;) - { - oid = pos >> OBUCK_SHIFT; - i = sh_pread(fd, &h, sizeof(h), pos); - if (!i) - break; - if (i != sizeof(h)) - printf("%08x incomplete header\n", oid); - else if (h.magic == OBUCK_INCOMPLETE_MAGIC) - printf("%08x incomplete file\n", oid); - else if (h.magic != OBUCK_MAGIC) - printf("%08x invalid header magic\n", oid); - else if (h.oid != oid && h.oid != OBUCK_OID_DELETED) - printf("%08x invalid header backlink\n", oid); - else - { - end = (pos + sizeof(h) + h.length + 4 + OBUCK_ALIGN - 1) & ~(sh_off_t)(OBUCK_ALIGN - 1); - if (sh_pread(fd, &chk, 4, end-4) != 4) - printf("%08x missing trailer\n", oid); - else if (chk != OBUCK_TRAILER) - printf("%08x mismatched trailer\n", oid); - else - { - /* OK */ - pos = end; - continue; - } - } - errors++; - end = pos; - do - { - if (pos - end > 0x10000000) - { - printf("*** skipped for too long, giving up\n"); - fatal_errors++; - goto finish; - } - end += OBUCK_ALIGN; - if (sh_pread(fd, &nh, sizeof(nh), end) != sizeof(nh)) - { - printf("*** unable to find next header\n"); - if (fix) - { - printf("*** truncating file\n"); - sh_ftruncate(fd, pos); - } - else - printf("*** would truncate the file here\n"); - goto finish; - } - } - while (nh.magic != OBUCK_MAGIC || - (nh.oid != (oid_t)(end >> OBUCK_SHIFT) && nh.oid != OBUCK_OID_DELETED)); - printf("*** match at oid %08x\n", (uns)(end >> OBUCK_SHIFT)); - if (fix) - { - h.magic = OBUCK_MAGIC; - h.oid = OBUCK_OID_DELETED; - h.length = end - pos - sizeof(h) - 4; - sh_pwrite(fd, &h, sizeof(h), pos); - chk = OBUCK_TRAILER; - sh_pwrite(fd, &chk, 4, end-4); - printf("*** replaced the invalid chunk by a DELETED bucket of size %d\n", (uns)(end - pos)); - } - else - printf("*** would mark %d bytes as DELETED\n", (uns)(end - pos)); - pos = end; - } - finish: - close(fd); - if (!fix && errors || fatal_errors) - exit(1); -} - -static int -shake_kibitz(struct obuck_header *old, oid_t new, byte *buck UNUSED) -{ - if (verbose) - { - printf("%08x -> ", old->oid); - if (new == OBUCK_OID_DELETED) - puts("DELETED"); - else - printf("%08x\n", new); - } - return 1; -} - -static void -shake(void) -{ - obuck_init(1); - obuck_shakedown(shake_kibitz); - obuck_cleanup(); -} - -static void -quickcheck(void) -{ - obuck_init(1); - obuck_cleanup(); -} - -int -main(int argc, char **argv) -{ - int i, op; - char *arg = NULL; - uns raw = 0; - - log_init(NULL); - op = 0; - while ((i = cf_getopt(argc, argv, CF_SHORT_OPTS "lLd:x:i::cfFqrsv", CF_NO_LONG_OPTS, NULL)) != -1) - if (i == '?' || op) - help(); - else if (i == 'v') - verbose++; - else if (i == 'r') - raw++; - else - { - op = i; - arg = optarg; - } - if (optind < argc) - help(); - - if (!raw) - { - pool = mp_new(1<<14); - buck_buf = buck2obj_alloc(); - } - switch (op) - { - case 'l': - list(0); - break; - case 'L': - list(1); - break; - case 'd': - delete(arg); - break; - case 'x': - extract(arg); - break; - case 'i': - insert(arg); - break; - case 'c': - cat(); - break; - case 'f': - fsck(0); - break; - case 'F': - fsck(1); - break; - case 'q': - quickcheck(); - break; - case 's': - shake(); - break; - default: - help(); - } - if (buck_buf) - { - buck2obj_free(buck_buf); - mp_delete(pool); - } - - return 0; -} diff --git a/lib/config.h b/lib/config.h index f0ca9632..f83122f0 100644 --- a/lib/config.h +++ b/lib/config.h @@ -1,5 +1,5 @@ /* - * Sherlock -- Configuration-Dependent Definitions + * UCW Library -- Configuration-Dependent Definitions * * (c) 1997--2004 Martin Mares * @@ -7,21 +7,13 @@ * of the GNU Lesser General Public License. */ -#ifndef _SHERLOCK_CONFIG_H -#define _SHERLOCK_CONFIG_H +#ifndef _UCW_CONFIG_H +#define _UCW_CONFIG_H /* Configuration switches */ #include "lib/autoconf.h" -#ifdef CONFIG_MAX_CONTEXTS -#define CONFIG_CONTEXTS -#endif - -/* Version */ - -#define SHER_VER SHERLOCK_VERSION SHERLOCK_VERSION_SUFFIX - /* Types */ typedef unsigned char byte; /* exactly 8 bits, unsigned */ @@ -42,64 +34,7 @@ typedef unsigned int sh_time_t; /* Timestamp */ #define NULL (void *)0 #endif -typedef u32 oid_t; /* Object ID */ - -/* Data types and functions for accessing file positions */ - -#ifdef CONFIG_LARGE_DB -typedef s64 sh_off_t; -#define BYTES_PER_O 5 -#define BYTES_PER_P 8 -#define bgeto(f) bget5(f) -#define bputo(f,l) bput5(f,l) -#define bgetp(f) bgetq(f) -#define bputp(f,l) bputq(f,l) -#define GET_O(p) GET_U40(p) -#define GET_P(p) GET_U64(p) -#define PUT_O(p,x) PUT_U40(p,x) -#define PUT_P(p,x) PUT_U64(p,x) -#else -typedef s32 sh_off_t; -#define BYTES_PER_O 4 -#define BYTES_PER_P 4 -#define bgeto(f) bgetl(f) -#define bputo(f,l) bputl(f,l) -#define bgetp(f) bgetl(f) -#define bputp(f,l) bputl(f,l) -#define GET_O(p) GET_U32(p) -#define GET_P(p) GET_U32(p) -#define PUT_O(p,x) PUT_U32(p,x) -#define PUT_P(p,x) PUT_U32(p,x) -#endif - -/* Data type for area ID's */ - -#ifdef CONFIG_AREAS -typedef u32 area_t; -#define AREA_NONE 0 -#define AREA_ANY ~0U -#else -typedef struct { } area_t; -#define AREA_NONE (area_t){} -#define AREA_ANY (area_t){} -#endif - -/* Misc */ - -#ifdef __GNUC__ - -#undef inline -#define NONRET __attribute__((noreturn)) -#define UNUSED __attribute__((unused)) -#define CONSTRUCTOR __attribute__((constructor)) -#define PACKED __attribute__((packed)) -#define CONST __attribute__((const)) -#define PURE __attribute__((const)) -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - -#else -#error This program requires the GNU C compiler. -#endif +typedef s64 sh_off_t; /* FIXME */ +typedef u32 oid_t; /* Object ID */ /* FIXME */ #endif diff --git a/lib/finger.c b/lib/finger.c deleted file mode 100644 index b2e0460a..00000000 --- a/lib/finger.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Sherlock Library -- String Fingerprints - * - * (c) 2001--2003 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -/* - * We use a hashing function to map all the URL's and other - * hairy strings we work with to a much simpler universe - * of constant length bit strings (currently 96-bit ones). - * With a random hashing function (which is equivalent to - * having a fixed function and random input), the probability - * of at least one collision happening is at most c*n^2/m - * where n is the number of strings we hash, m is the size - * of our bit string universe (2^96) and c is a small constant. - * We set m sufficiently large and expect no collisions - * to occur. On the other hand, the worst thing which could - * be caused by a collision is mixing up two strings or labels - * of two documents which is relatively harmless. - */ - -#include "lib/lib.h" -#include "lib/index.h" -#include "lib/md5.h" - -#include - -void -fingerprint(byte *string, struct fingerprint *fp) -{ - struct MD5Context c; - byte digest[16]; - - MD5Init(&c); - MD5Update(&c, string, strlen(string)); - MD5Final(digest, &c); - memcpy(fp->hash, digest, 12); -} diff --git a/lib/index.h b/lib/index.h deleted file mode 100644 index 6703d71d..00000000 --- a/lib/index.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Sherlock: Data structures used in indices - * - * (c) 2001--2004 Martin Mares - */ - -#ifndef _SHERLOCK_INDEX_H -#define _SHERLOCK_INDEX_H - -#include "custom/lib/custom.h" - -/* - * Magic number which should help to avoid mixing incompatible indices. - * Syntax: - * Remember to increase with each change of index format. - */ - -#define INDEX_VERSION (0x34010000|((CUSTOM_INDEX_TYPE)<< 8)|(CUSTOM_INDEX_VERSION)) - -/* - * Words - * - * MAX_WORD_LEN is the maximum length (measured in UTF-8 characters, excluding - * the terminating zero byte if there's any) of any word which may appear in the - * indices or in the bucket file. Naturally, the same constant also bounds - * the number of UCS-2 characters in a word. - * - * Caveat: If you are upcasing/downcasing the word, the UTF-8 encoding can - * expand, although at most twice, so you need to reserve 2*MAX_WORD_LEN bytes. - */ - -#define MAX_WORD_LEN 64 /* a multiple of 4 */ - -/* Word and string types are defined in custom/lib/custom.h */ - -/* Types used for storing contexts */ - -#ifdef CONFIG_CONTEXTS -#if CONFIG_MAX_CONTEXTS == 32768 -typedef u16 context_t; -#define bget_context bgetw -#define bput_context bputw -#define GET_CONTEXT GET_U16 -#define PUT_CONTEXT PUT_U16 -#elif CONFIG_MAX_CONTEXTS == 256 -typedef byte context_t; -#define bget_context bgetc -#define bput_context bputc -#define GET_CONTEXT GET_U8 -#define PUT_CONTEXT PUT_U8 -#else -#error CONFIG_MAX_CONTEXTS set to an invalid value. -#endif -#else -struct fastbuf; -typedef struct { } context_t; -static inline uns bget_context(struct fastbuf *b UNUSED) { return 0; } -static inline void bput_context(struct fastbuf *b UNUSED, uns context UNUSED) { } -#define GET_CONTEXT(p) 0 -#define PUT_CONTEXT(p,x) do {} while(0) -#endif - -/* Index card attributes */ - -struct card_attr { - u32 card; /* Reference to card description (either oid or filepos) */ -#ifdef CONFIG_SITES - u32 site_id; -#endif - area_t area; - CUSTOM_CARD_ATTRS /* Include all custom attributes */ - byte weight; - byte flags; -#ifdef CONFIG_LASTMOD - byte age; /* Document age in pseudo-logarithmic units wrt. reference time */ -#endif -#ifdef CONFIG_FILETYPE - byte type_flags; /* File type flags (see below) */ -#endif -}; - -enum card_flag { - CARD_FLAG_EMPTY = 1, /* Empty document (redirect, robot file etc.) [scanner] */ - CARD_FLAG_ACCENTED = 2, /* Document contains accented characters [scanner] */ - CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */ - CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */ - CARD_FLAG_IMAGE = 16, /* Is an image object [scanner] */ - CARD_FLAG_FRAMESET = 32, /* Contains a frameset to be ignored [scanner] */ - CARD_FLAG_OVERRIDEN = 64, /* Overriden by another index [sherlockd] */ -}; - -#ifndef CARD_POS_SHIFT /* (can be overriden in custom.h) */ -#define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bits to the right */ -#endif - -/* - * We store document type and several other properties in card_attr->type_flags. - * Here we define only the basic structure, the details are defined in custom.h - * (the list of type names custom_file_type_names[] and also setting of the file - * types in custom_create_attrs()). - * - * bits 7--5 file type: (0-3: text types, 4-7: other types, defined by custom.h) - * bits 4--0 type-dependent information, for text types it's document language code - */ - -#ifdef CONFIG_FILETYPE -#define CA_GET_FILE_TYPE(a) ((a)->type_flags >> 4) -#define CA_GET_FILE_INFO(a) ((a)->type_flags & 0x0f) -#define CA_GET_FILE_LANG(a) ((a)->type_flags & 0x80 ? 0 : CA_GET_FILE_INFO(a)) -#define MAX_FILE_TYPES 16 -#define FILETYPE_IS_TEXT(f) ((f) < 8) -byte *ext_ft_parse(u32 *dest, byte *value, uns intval); -extern byte *custom_file_type_names[MAX_FILE_TYPES]; -#define FILETYPE_STAT_VARS uns matching_per_type[MAX_FILE_TYPES]; -#define FILETYPE_SHOW_STATS(q,f) ext_ft_show(q,f) -#define FILETYPE_INIT_STATS(q) bzero(q->matching_per_type, sizeof(q->matching_per_type)) -#ifdef CONFIG_COUNT_ALL_FILETYPES -#define FILETYPE_ATTRS LATE_SMALL_SET_ATTR(ftype, FILETYPE, CA_GET_FILE_TYPE, ext_ft_parse) -#define FILETYPE_EARLY_STATS(q,a) q->matching_per_type[CA_GET_FILE_TYPE(a)]++ -#define FILETYPE_LATE_STATS(q,a) -#else -#define FILETYPE_ATTRS SMALL_SET_ATTR(ftype, FILETYPE, CA_GET_FILE_TYPE, ext_ft_parse) -#define FILETYPE_EARLY_STATS(q,a) -#define FILETYPE_LATE_STATS(q,a) q->matching_per_type[CA_GET_FILE_TYPE(a)]++ -#endif -#else -#define FILETYPE_ATTRS -#define FILETYPE_STAT_VARS -#define FILETYPE_INIT_STATS(q) -#define FILETYPE_EARLY_STATS(q,a) -#define FILETYPE_LATE_STATS(q,a) -#define FILETYPE_SHOW_STATS(q,f) -#endif - -#ifdef CONFIG_LANG -/* You can use language matching without CONFIG_FILETYPE, but you have to define CA_GET_FILE_LANG yourself. */ -#define LANG_ATTRS SMALL_SET_ATTR(lang, LANG, CA_GET_FILE_LANG, ext_lang_parse) -byte *ext_lang_parse(u32 *dest, byte *value, uns intval); -#else -#define LANG_ATTRS -#endif - -#ifdef CONFIG_AREAS -#define CA_GET_AREA(a) ((a)->area) -#define SPLIT_ATTRS INT_ATTR(area, AREA, CA_GET_AREA, ext_area_parse) -byte *ext_area_parse(u32 *dest, byte *value, uns intval); -#else -#define SPLIT_ATTRS -#endif - -/* - * A list of all extended attributes: custom attributes and also some - * built-in attributes treated in the same way. - */ - -#define EXTENDED_ATTRS CUSTOM_ATTRS FILETYPE_ATTRS LANG_ATTRS SPLIT_ATTRS - -/* - * A list of all statistics collectors, also composed of custom parts - * and built-in parts. - */ - -#ifndef CUSTOM_STAT_VARS -#define CUSTOM_STAT_VARS -#define CUSTOM_INIT_STATS(q) -#define CUSTOM_EARLY_STATS(q,a) -#define CUSTOM_LATE_STATS(q,a) -#define CUSTOM_SHOW_STATS(q,f) -#endif - -#define EXTENDED_STAT_VARS CUSTOM_STAT_VARS FILETYPE_STAT_VARS -#define EXTENDED_INIT_STATS(q) CUSTOM_INIT_STATS(q) FILETYPE_INIT_STATS(q) -#define EXTENDED_EARLY_STATS(q,a) CUSTOM_EARLY_STATS(q,a) FILETYPE_EARLY_STATS(q,a) -#define EXTENDED_LATE_STATS(q,a) CUSTOM_LATE_STATS(q,a) FILETYPE_LATE_STATS(q,a) -#define EXTENDED_SHOW_STATS(q,f) CUSTOM_SHOW_STATS(q,f) FILETYPE_SHOW_STATS(q,f) - -/* String fingerprints */ - -struct fingerprint { - byte hash[12]; -}; - -void fingerprint(byte *string, struct fingerprint *fp); - -static inline u32 -fp_hash(struct fingerprint *fp) -{ - return (fp->hash[0] << 24) | (fp->hash[1] << 16) | (fp->hash[2] << 8) | fp->hash[3]; -} - -/* The card fingerprints */ - -struct card_print { - struct fingerprint fp; - u32 cardid; -}; - -/* URL keys */ - -#define URL_KEY_BUF_SIZE (3*MAX_URL_SIZE) -byte *url_key(byte *url, byte *buf); -void url_fingerprint(byte *url, struct fingerprint *fp); -void url_key_init(void); - -/* Conversion of document age from seconds to our internal units */ - -static inline int -convert_age(sh_time_t lastmod, sh_time_t reftime) -{ - sh_time_t age; - if (reftime < lastmod) /* past times */ - return -1; - age = (reftime - lastmod) / 3600; - if (age < 48) /* last 2 days: 1 hour resolution */ - return age; - age = (age-48) / 24; - if (age < 64) /* next 64 days: 1 day resolution */ - return 48 + age; - age = (age-64) / 7; - if (age < 135) /* next 135 weeks: 1 week resolution */ - return 112 + age; - age = (age-135) / 52; - if (age < 8) /* next 8 years: 1 year resolution */ - return 247 + age; - return 255; /* then just "infinite future" */ -} - -#endif diff --git a/lib/lib.h b/lib/lib.h index 5eb05441..80857c9c 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -1,5 +1,5 @@ /* - * Sherlock Library -- Miscellaneous Functions + * The UCW Library -- Miscellaneous Functions * * (c) 1997--2004 Martin Mares * @@ -7,14 +7,8 @@ * of the GNU Lesser General Public License. */ -/* - * This file should be included as the very first include in all - * source files, especially before all OS includes since it sets - * up libc feature macros. - */ - -#ifndef _SHERLOCK_LIB_H -#define _SHERLOCK_LIB_H +#ifndef _UCW_LIB_H +#define _UCW_LIB_H #include "lib/config.h" #include @@ -46,6 +40,24 @@ #define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0) #define COMPARE_GT(x,y) COMPARE_LT(y,x) +/* GCC Extensions */ + +#ifdef __GNUC__ + +#undef inline +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#define CONSTRUCTOR __attribute__((constructor)) +#define PACKED __attribute__((packed)) +#define CONST __attribute__((const)) +#define PURE __attribute__((const)) +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +#else +#error This program requires the GNU C compiler. +#endif + /* Logging */ #define L_DEBUG 'D' /* Debugging messages */ diff --git a/lib/lizard-fb.c b/lib/lizard-fb.c index 5a5c2054..d356d0cd 100644 --- a/lib/lizard-fb.c +++ b/lib/lizard-fb.c @@ -8,7 +8,7 @@ #include "lib/lizard.h" #include "lib/bbuf.h" #include "lib/fastbuf.h" -#include "lib/bucket.h" +#include "sherlock/bucket.h" /* FIXME */ #include diff --git a/lib/obj2buck.c b/lib/obj2buck.c deleted file mode 100644 index 96a29f73..00000000 --- a/lib/obj2buck.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Generating Buckets from Objects - * - * (c) 2004, Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/fastbuf.h" -#include "lib/ff-utf8.h" -#include "lib/bucket.h" -#include "lib/object.h" - -#include -#include - -static uns use_v33; -static int hdr_sep; - -void -put_attr_set_type(uns type) -{ - switch (type) - { - case BUCKET_TYPE_PLAIN: - use_v33 = 0; - hdr_sep = -1; - break; - case BUCKET_TYPE_V30: - use_v33 = 0; - hdr_sep = '\n'; - break; - case BUCKET_TYPE_V33: - case BUCKET_TYPE_V33_LIZARD: - use_v33 = 1; - hdr_sep = 0; - break; - default: - die("Don't know how to generate buckets of type %08x", type); - } -} - -uns -size_attr(uns len) -{ - ASSERT(len <= MAX_ATTR_SIZE); - if (use_v33) - { - len++; - return len + utf8_space(len); - } - else - return len + 2; -} - -inline byte * -put_attr(byte *ptr, uns type, byte *val, uns len) -{ - if (use_v33) - { - PUT_UTF8_32(ptr, len+1); - memcpy(ptr, val, len); - ptr += len; - *ptr++ = type; - } - else - { - *ptr++ = type; - memcpy(ptr, val, len); - ptr += len; - *ptr++ = '\n'; - } - return ptr; -} - -byte * -put_attr_str(byte *ptr, uns type, byte *val) -{ - return put_attr(ptr, type, val, strlen(val)); -} - -inline byte * -put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va) -{ - if (use_v33) - { - uns len = vsprintf(ptr+1, mask, va); - if (len >= 127) - { - byte tmp[6], *tmp_end = tmp; - PUT_UTF8_32(tmp_end, len+1); - uns l = tmp_end - tmp; - memmove(ptr+l, ptr+1, len); - memcpy(ptr, tmp, l); - ptr += l + len; - } - else - { - *ptr = len+1; - ptr += len+1; - } - *ptr++ = type; - } - else - { - *ptr++ = type; - ptr += vsprintf(ptr, mask, va); - *ptr++ = '\n'; - } - return ptr; -} - -byte * -put_attr_format(byte *ptr, uns type, char *mask, ...) -{ - va_list va; - va_start(va, mask); - byte *ret = put_attr_vformat(ptr, type, mask, va); - va_end(va); - return ret; -} - -byte * -put_attr_num(byte *ptr, uns type, uns val) -{ - if (use_v33) - { - uns len = sprintf(ptr+1, "%d", val) + 1; - *ptr = len; - ptr += len; - *ptr++ = type; - } - else - ptr += sprintf(ptr, "%c%d\n", type, val); - return ptr; -} - -byte * -put_attr_separator(byte *ptr) -{ - if (hdr_sep >= 0) - *ptr++ = hdr_sep; - return ptr; -} - -inline void -bput_attr(struct fastbuf *b, uns type, byte *val, uns len) -{ - if (use_v33) - { - bput_utf8_32(b, len+1); - bwrite(b, val, len); - bputc(b, type); - } - else - { - bputc(b, type); - bwrite(b, val, len); - bputc(b, '\n'); - } -} - -void -bput_attr_str(struct fastbuf *b, uns type, byte *val) -{ - bput_attr(b, type, val, strlen(val)); -} - -inline void -bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va) -{ - if (use_v33) - { - int len = vsnprintf(NULL, 0, mask, va); - if (len < 0) - die("vsnprintf() does not support size=0"); - bput_utf8_32(b, len+1); - vbprintf(b, mask, va); - bputc(b, type); - } - else - { - bputc(b, type); - vbprintf(b, mask, va); - bputc(b, '\n'); - } -} - -void -bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) -{ - va_list va; - va_start(va, mask); - bput_attr_vformat(b, type, mask, va); - va_end(va); -} - -void -bput_attr_num(struct fastbuf *b, uns type, uns val) -{ - if (use_v33) - { - byte tmp[12]; - uns len = sprintf(tmp, "%d", val); - bputc(b, len+1); - bwrite(b, tmp, len); - bputc(b, type); - } - else - bprintf(b, "%c%d\n", type, val); -} - -void -bput_attr_separator(struct fastbuf *b) -{ - if (hdr_sep >= 0) - bputc(b, hdr_sep); -} - -void -obj_write(struct fastbuf *f, struct odes *d) -{ - for(struct oattr *a=d->attrs; a; a=a->next) - for(struct oattr *b=a; b; b=b->same) - { - byte *z; - for (z = b->val; *z; z++) - if (*z < ' ' && *z != '\t') - { - log(L_ERROR, "obj_dump: Found non-ASCII character %02x (URL might be %s) in %c%s", *z, obj_find_aval(d, 'U'), a->attr, b->val); - *z = '?'; - } - ASSERT(z - b->val <= MAX_ATTR_SIZE-2); - bput_attr_str(f, a->attr, b->val); - } -} - -void -obj_write_nocheck(struct fastbuf *f, struct odes *d) -{ - for(struct oattr *a=d->attrs; a; a=a->next) - for(struct oattr *b=a; b; b=b->same) - bput_attr_str(f, a->attr, b->val); -} diff --git a/lib/object.c b/lib/object.c deleted file mode 100644 index 6a2a90c5..00000000 --- a/lib/object.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Sherlock Library -- Object Functions - * - * (c) 1997--2004 Martin Mares - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" -#include "lib/object.h" - -#include -#include -#include - -void -obj_dump(struct odes *o) -{ - for(struct oattr *a=o->attrs; a; a=a->next) - for(struct oattr *b=a; b; b=b->same) - printf("%c%s\n", (a==b ? a->attr : ' '), b->val); -} - -static struct oattr * -oa_new(struct odes *o, uns x, byte *v) -{ - struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr) + strlen(v)+1); - - a->next = a->same = NULL; - a->attr = x; - a->val = (byte*) (a+1); - strcpy(a->val, v); - return a; -} - -static struct oattr * -oa_new_ref(struct odes *o, uns x, byte *v) -{ - struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr)); - - a->next = a->same = NULL; - a->attr = x; - a->val = v; - return a; -} - -struct odes * -obj_new(struct mempool *pool) -{ - struct odes *o = mp_alloc(pool, sizeof(struct odes)); - o->pool = pool; - o->attrs = NULL; - o->cached_attr = NULL; - return o; -} - -struct oattr * -obj_find_attr(struct odes *o, uns x) -{ - struct oattr *a; - for(a=o->attrs; a && a->attr != x; a=a->next) - ; - return a; -} - -struct oattr * -obj_find_attr_last(struct odes *o, uns x) -{ - struct oattr *a = obj_find_attr(o, x); - - if (a) - { - while (a->same) - a = a->same; - } - return a; -} - -uns -obj_del_attr(struct odes *o, struct oattr *a) -{ - struct oattr *x, **p, *y, *l; - byte aa = a->attr; - - o->cached_attr = NULL; - p = &o->attrs; - while (x = *p) - { - if (x->attr == aa) - { - y = x; - l = NULL; - while (x = *p) - { - if (x == a) - { - *p = x->same; - return 1; - } - p = &x->same; - l = x; - } - return 0; - } - p = &x->next; - } - return 0; -} - -byte * -obj_find_aval(struct odes *o, uns x) -{ - struct oattr *a = obj_find_attr(o, x); - return a ? a->val : NULL; -} - -uns -obj_find_anum(struct odes *o, uns x, uns def) -{ - struct oattr *a = obj_find_attr(o, x); - return a ? (uns)atol(a->val) : def; -} - -struct oattr * -obj_set_attr(struct odes *o, uns x, byte *v) -{ - struct oattr *a, **z; - - z = &o->attrs; - while (a = *z) - { - if (a->attr == x) - { - *z = a->next; - goto set; - } - z = &a->next; - } - - set: - if (v) - { - a = oa_new(o, x, v); - a->next = o->attrs; - o->attrs = a; - } - else - a = NULL; - o->cached_attr = a; - return a; -} - -struct oattr * -obj_set_attr_num(struct odes *o, uns a, uns v) -{ - byte x[32]; - - sprintf(x, "%d", v); - return obj_set_attr(o, a, x); -} - -static inline struct oattr * -obj_add_attr_internal(struct odes *o, struct oattr *b) -{ - struct oattr *a, **z; - - if (!(a = o->cached_attr) || a->attr != b->attr) - { - z = &o->attrs; - while ((a = *z) && a->attr != b->attr) - z = &a->next; - if (!a) - { - *z = b; - /* b->next is NULL */ - goto done; - } - } - while (a->same) - a = a->same; - a->same = b; - done: - o->cached_attr = b; - return b; -} - -struct oattr * -obj_add_attr(struct odes *o, uns x, byte *v) -{ - return obj_add_attr_internal(o, oa_new(o, x, v)); -} - -struct oattr * -obj_add_attr_ref(struct odes *o, uns x, byte *v) -{ - return obj_add_attr_internal(o, oa_new_ref(o, x, v)); -} - -struct oattr * -obj_prepend_attr(struct odes *o, uns x, byte *v) -{ - struct oattr *a, *b, **z; - - b = oa_new(o, x, v); - z = &o->attrs; - while (a = *z) - { - if (a->attr == x) - { - b->same = a; - b->next = a->next; - a->next = NULL; - *z = b; - return b; - } - z = &a->next; - } - b->next = o->attrs; - o->attrs = b; - return b; -} - -struct oattr * -obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v) -{ - struct oattr *b = oa_new(o, first->attr, v); - b->same = after->same; - after->same = b; - return b; -} - -void -obj_move_attr_to_head(struct odes *o, uns x) -{ - struct oattr *a, **z; - - z = &o->attrs; - while (a = *z) - { - if (a->attr == x) - { - *z = a->next; - a->next = o->attrs; - o->attrs = a; - break; - } - z = &a->next; - } -} - -void -obj_move_attr_to_tail(struct odes *o, uns x) -{ - struct oattr *a, **z; - - z = &o->attrs; - while (a = *z) - { - if (a->attr == x) - { - *z = a->next; - while (*z) - z = &(*z)->next; - *z = a; - a->next = NULL; - break; - } - z = &a->next; - } -} diff --git a/lib/object.h b/lib/object.h deleted file mode 100644 index 4de7d1b2..00000000 --- a/lib/object.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Sherlock Library -- Object Functions - * - * (c) 1997--2004 Martin Mares - * (c) 2004, Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _SHERLOCK_OBJECT_H -#define _SHERLOCK_OBJECT_H - -#define MAX_ATTR_SIZE 2500 - /* Maximum length an attribute can ever have (including name and trailing 0). - * It has to be long enough to contain 1 URL, 1 reftext, and a few numbers - * (see 'x' attribute in labels). */ - -struct fastbuf; -struct mempool; - -struct odes { /* Object description */ - struct oattr *attrs; - struct mempool *pool; - struct oattr *cached_attr; -}; - -struct oattr { /* Object attribute */ - struct oattr *next, *same; - byte attr; - byte *val; -}; - -void obj_dump(struct odes *); -struct odes *obj_new(struct mempool *); -struct oattr *obj_find_attr(struct odes *, uns); -struct oattr *obj_find_attr_last(struct odes *, uns); -uns obj_del_attr(struct odes *, struct oattr *); -byte *obj_find_aval(struct odes *, uns); -uns obj_find_anum(struct odes *, uns, uns); -struct oattr *obj_set_attr(struct odes *, uns, byte *); -struct oattr *obj_set_attr_num(struct odes *, uns, uns); -struct oattr *obj_add_attr(struct odes *, uns, byte *); -struct oattr *obj_add_attr_ref(struct odes *o, uns x, byte *v); // no strdup() -struct oattr *obj_prepend_attr(struct odes *, uns, byte *); -struct oattr *obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v); -void obj_move_attr_to_head(struct odes *o, uns); -void obj_move_attr_to_tail(struct odes *o, uns); - -/* buck2obj.c: Reading of objects from buckets */ - -struct parsed_attr { - int attr; - byte *val; - uns len; -}; -struct buck2obj_buf; - -void get_attr_set_type(uns type); -int get_attr(byte **pos, byte *end, struct parsed_attr *attr); -int bget_attr(struct fastbuf *b, struct parsed_attr *attr); - -struct buck2obj_buf *buck2obj_alloc(void); -void buck2obj_free(struct buck2obj_buf *buf); - -int buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body); -struct odes *obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start); - /* If body_start != NULL, then only the header is parsed and *body_start is - * set to the position of the body. This function does a plenty of optimizations - * and if the body fastbuf is overwritable (body->can_overwrite_buffer), it can keep the - * attribute values stored on their original locations in the fastbuf's buffer. - * However, no such things are performed when reading the header only. - */ - -int obj_read(struct fastbuf *, struct odes *); - -/* obj2buck.c: Generating buckets from objects */ - -void put_attr_set_type(uns type); - -uns size_attr(uns len); - -byte *put_attr(byte *ptr, uns type, byte *val, uns len); -byte *put_attr_str(byte *ptr, uns type, byte *val); -byte *put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va); -byte *put_attr_format(byte *ptr, uns type, char *mask, ...) __attribute__((format(printf,3,4))); -byte *put_attr_num(byte *ptr, uns type, uns val); -byte *put_attr_separator(byte *ptr); - -void bput_attr(struct fastbuf *b, uns type, byte *val, uns len); -void bput_attr_str(struct fastbuf *b, uns type, byte *val); -void bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va); -void bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) __attribute__((format(printf,3,4))); -void bput_attr_num(struct fastbuf *b, uns type, uns val); -void bput_attr_separator(struct fastbuf *b); - -void obj_write(struct fastbuf *, struct odes *); -void obj_write_nocheck(struct fastbuf *, struct odes *); - -#endif diff --git a/lib/regex/Makefile b/lib/regex/Makefile index 28a41dc5..d688a775 100644 --- a/lib/regex/Makefile +++ b/lib/regex/Makefile @@ -2,6 +2,6 @@ DIRS+=lib/regex -LIBSH_MODS+=regex/regex +LIBUCW_MODS+=regex/regex obj/lib/regex/regex.o: CWARNS= diff --git a/lib/tagged-text.h b/lib/tagged-text.h deleted file mode 100644 index 270bc91d..00000000 --- a/lib/tagged-text.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Sherlock: Processing of tagged characters - * - * (c) 2001--2003 Martin Mares - */ - -#ifndef _SHERLOCK_TAGGED_TEXT_H -#define _SHERLOCK_TAGGED_TEXT_H - -#include "lib/fastbuf.h" -#include "lib/ff-utf8.h" - -/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */ - -#define GET_TAGGED_CHAR(p,u) do { \ - u = *p; \ - if (u >= 0xc0) \ - GET_UTF8(p,u); \ - else if (u >= 0x80) \ - { \ - p++; \ - if (u >= 0xb0) \ - { \ - ASSERT(u == 0xb0); \ - u += 0x80020000; \ - } \ - else if (u >= 0xa0) \ - { \ - ASSERT(*p >= 0x80 && *p <= 0xbf); \ - u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \ - } \ - else \ - u += 0x80000000; \ - } \ - else \ - p++; \ -} while (0) - -#define SKIP_TAGGED_CHAR(p) do { \ - if (*p >= 0x80 && *p < 0xc0) \ - { \ - uns u = *p++; \ - if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0) \ - p++; \ - } \ - else \ - UTF8_SKIP(p); \ -} while (0) - -static inline uns -bget_tagged_char(struct fastbuf *f) -{ - uns u = bgetc(f); - if ((int)u < 0x80) - ; - else if (u < 0xc0) - { - if (u >= 0xb0) - { - ASSERT(u == 0xb0); - u += 0x80020000; - } - else if (u >= 0xa0) - { - uns v = bgetc(f); - ASSERT(v >= 0x80 && v <= 0xbf); - u = 0x80010000 + ((u & 0x0f) << 6) + (v & 0x3f); - } - else - u += 0x80000000; - } - else - { - bungetc(f); - u = bget_utf8(f); - } - return u; -} - -#endif diff --git a/lib/urlkey.c b/lib/urlkey.c deleted file mode 100644 index 34a6fc81..00000000 --- a/lib/urlkey.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Sherlock Library -- URL Keys & URL Fingerprints - * - * (c) 2003 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#undef LOCAL_DEBUG - -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/index.h" -#include "lib/url.h" -#include "lib/fastbuf.h" -#include "lib/chartype.h" -#include "lib/hashfunc.h" - -#include -#include - -/*** Prefix recognition table ***/ - -struct pxtab_rhs { - struct pxtab_node *node; - uns len; - byte rhs[1]; -}; - -struct pxtab_node { - struct pxtab_node *parent; - struct pxtab_rhs *rhs; - uns len, total_len; - byte component[0]; -}; - -#define HASH_NODE struct pxtab_node -#define HASH_PREFIX(p) pxtab_##p -#define HASH_KEY_COMPLEX(x) x parent, x component, x len -#define HASH_KEY_DECL struct pxtab_node *parent UNUSED, byte *component UNUSED, uns len UNUSED -#define HASH_WANT_FIND -#define HASH_WANT_LOOKUP -#define HASH_GIVE_HASHFN -#define HASH_GIVE_EQ -#define HASH_GIVE_EXTRA_SIZE -#define HASH_GIVE_INIT_KEY -#define HASH_USE_POOL cfpool - -static inline uns -pxtab_hash(HASH_KEY_DECL) -{ - return ((uns)parent) ^ hash_block(component, len); -} - -static inline int -pxtab_eq(struct pxtab_node *p1, byte *c1, uns l1, struct pxtab_node *p2, byte *c2, uns l2) -{ - return p1 == p2 && l1 == l2 && !memcmp(c1, c2, l1); -} - -static inline int -pxtab_extra_size(HASH_KEY_DECL) -{ - return len; -} - -static inline void -pxtab_init_key(struct pxtab_node *node, HASH_KEY_DECL) -{ - node->parent = parent; - node->len = len; - memcpy(node->component, component, len); - node->rhs = NULL; -} - -#include "lib/hashtable.h" - -static inline byte * -pxtab_skip_first_comp(byte *x) -{ - while (*x && *x != ':') - x++; - byte *y = x; - while (*x != '/' || x[1] != '/') - { - if (!*x) - return y; - x++; - } - return x+2; -} - -static inline byte * -pxtab_skip_next_comp(byte *x) -{ - for(;;) - { - if (!*x) - return x; - if (*x == '/') - return x+1; - x++; - } -} - -static struct pxtab_node * -pxtab_find_rule(byte *lhs) -{ - byte *next; - struct pxtab_node *node, *parent = NULL; - - next = pxtab_skip_first_comp(lhs); - DBG("\tfirst: %.*s", next-lhs, lhs); - node = pxtab_find(NULL, lhs, next-lhs); - while (node && *next) - { - parent = node; - lhs = next; - next = pxtab_skip_next_comp(lhs); - DBG("\tnext: %.*s", next-lhs, lhs); - node = pxtab_find(parent, lhs, next-lhs); - } - return node ? : parent; -} - -static struct pxtab_node * -pxtab_add_rule(byte *lhs, struct pxtab_rhs *rhs) -{ - byte *lhs_start = lhs; - byte *next; - struct pxtab_node *node, *parent; - - next = pxtab_skip_first_comp(lhs); - DBG("\tfirst: %.*s", next-lhs, lhs); - node = pxtab_lookup(NULL, lhs, next-lhs); - for(;;) - { - if (node->rhs) - return NULL; - if (!*next) - break; - lhs = next; - next = pxtab_skip_next_comp(lhs); - parent = node; - DBG("\tnext: %.*s", next-lhs, lhs); - node = pxtab_lookup(parent, lhs, next-lhs); - } - DBG("\tsetting rhs, %d to eat", next-lhs_start); - node->rhs = rhs; - node->total_len = next - lhs_start; - return node; -} - -static struct pxtab_rhs * -pxtab_add_rhs(byte *rhs) -{ - uns len = strlen(rhs); - struct pxtab_rhs *r = cfg_malloc(sizeof(*r) + len); - r->len = len; - memcpy(r->rhs, rhs, len+1); - struct pxtab_node *node = pxtab_add_rule(rhs, r); - r->node = node; - return r; -} - -static void -pxtab_load(byte *name) -{ - struct fastbuf *f; - struct pxtab_rhs *rhs = NULL; - byte line[MAX_URL_SIZE], url[MAX_URL_SIZE], *c, *d; - int err; - int lino = 0; - - DBG("Loading prefix table %s", name); - f = bopen(name, O_RDONLY, 4096); - while (bgets(f, line, sizeof(line))) - { - lino++; - c = line; - while (Cblank(*c)) - c++; - if (!*c || *c == '#') - continue; - if (err = url_auto_canonicalize(c, url)) - die("%s, line %d: Invalid URL (%s)", name, lino, url_error(err)); - if (!(d = strrchr(c, '/')) || d[1]) - die("%s, line %d: Prefix rules must end with a slash", name, lino); - if (c == line) - { - DBG("Creating RHS <%s>", c); - if (!(rhs = pxtab_add_rhs(c))) - die("%s, line %d: Right-hand side already mapped", name, lino); - } - else if (!rhs) - die("%s, line %d: Syntax error", name, lino); - else - { - DBG("Adding LHS <%s>", c); - if (!pxtab_add_rule(c, rhs)) - die("%s, line %d: Duplicate rule", name, lino); - } - } - bclose(f); -} - -/*** Configuration ***/ - -static uns urlkey_www_hack; -static byte *urlkey_pxtab_path; - -static struct cfitem urlkey_config[] = { - { "URLKey", CT_SECTION, NULL }, - { "WWWHack", CT_INT, &urlkey_www_hack }, - { "PrefixTable", CT_STRING, &urlkey_pxtab_path }, - { NULL, CT_STOP, NULL } -}; - -static void CONSTRUCTOR urlkey_conf_init(void) -{ - cf_register(urlkey_config); -} - -void -url_key_init(void) -{ - pxtab_init(); - if (urlkey_pxtab_path) - pxtab_load(urlkey_pxtab_path); -} - -static inline byte * -url_key_remove_www(byte *url, byte **pbuf) -{ - if (urlkey_www_hack && !strncmp(url, "http://www.", 11)) - { - byte *buf = *pbuf; - strcpy(buf, "http://"); - strcpy(buf+7, url+11); - DBG("\tWWW hack: %s -> %s", url, buf); - url = buf; - *pbuf = buf + MAX_URL_SIZE; - } - return url; -} - -byte * -url_key(byte *url, byte *buf) -{ - DBG("Generating URL key for %s", url); - url = url_key_remove_www(url, &buf); - struct pxtab_node *rule = pxtab_find_rule(url); - if (rule && rule->rhs && rule->rhs->node != rule) - { - struct pxtab_rhs *rhs = rule->rhs; - DBG("\tApplying rule <%s>, remove %d, add %d", rhs->rhs, rule->total_len, rhs->len); - memcpy(buf, rhs->rhs, rhs->len); - strcpy(buf + rhs->len, url + rule->total_len); - url = buf; - buf += MAX_URL_SIZE; - } - DBG("\tOutput: %s", url); - return url; -} - -void -url_fingerprint(byte *url, struct fingerprint *fp) -{ - byte buf[URL_KEY_BUF_SIZE]; - fingerprint(url_key(url, buf), fp); -} - -#ifdef TEST - -int main(int argc, char **argv) -{ - cf_read(cfdeffile); - url_key_init(); - for (int i=1; i