-# Makefile for the Sherlock Library (c) 1997--2004 Martin Mares <mj@ucw.cz>
+# Makefile for the UCW Library (c) 1997--2004 Martin Mares <mj@ucw.cz>
DIRS+=lib
-PROGS+=obj/lib/db-tool obj/lib/buckettool
+PROGS+=obj/lib/db-tool
-LIBSH_MODS= \
+LIBUCW_MODS= \
alloc alloc_str realloc mempool mempool-str \
mmap pagecache partmap hashfunc \
lists sorter bitsig \
fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer \
str_ctype str_upper str_lower unicode-utf8 \
wildmatch wordsplit ctmatch patimatch patmatch regex \
- bucket object buck2obj obj2buck \
prime random timer log2 randomkey \
db \
- url urlkey finger \
+ url \
mainloop exitstatus runcmd sighandler \
lizard lizard-safe lizard-fb adler32 \
md5 md5hex \
include lib/regex/Makefile
endif
-LIBSH_MOD_PATHS=$(addprefix obj/lib/,$(LIBSH_MODS)) $(CUSTOM_LIB_MODULES)
+LIBUCW=obj/lib/libucw.$(LS)
+LIBUCW_MOD_PATHS=$(addprefix obj/lib/,$(LIBUCW_MODS)) $(CUSTOM_LIB_MODULES)
-obj/lib/libsh.a: $(addsuffix .o,$(LIBSH_MOD_PATHS))
-obj/lib/libsh.so: $(addsuffix .oo,$(LIBSH_MOD_PATHS))
+obj/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
+obj/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
obj/lib/hashfunc.o obj/lib/hashfunc.oo: CFLAGS += -funroll-loops
obj/lib/lizard.o: CFLAGS += -O6 -funroll-loops
-obj/lib/db-test: obj/lib/db-test.o $(LIBSH)
-obj/lib/db-tool: obj/lib/db-tool.o $(LIBSH)
-obj/lib/buckettool: obj/lib/buckettool.o $(LIBSH)
-obj/lib/conf-test: obj/lib/conf-test.o $(LIBSH)
-obj/lib/sort-test: obj/lib/sort-test.o $(LIBSH)
-obj/lib/lfs-test: obj/lib/lfs-test.o $(LIBSH)
-obj/lib/hash-test: obj/lib/hash-test.o $(LIBSH)
-obj/lib/str-test: obj/lib/str-test.o $(LIBSH)
-obj/lib/asort-test: obj/lib/asort-test.o $(LIBSH)
-obj/lib/redblack-test: obj/lib/redblack-test.o $(LIBSH)
-obj/lib/binheap-test: obj/lib/binheap-test.o $(LIBSH)
-obj/lib/lizard-test: obj/lib/lizard-test.o $(LIBSH)
+obj/lib/db-test: obj/lib/db-test.o $(LIBUCW)
+obj/lib/db-tool: obj/lib/db-tool.o $(LIBUCW)
+obj/lib/conf-test: obj/lib/conf-test.o $(LIBUCW)
+obj/lib/sort-test: obj/lib/sort-test.o $(LIBUCW)
+obj/lib/lfs-test: obj/lib/lfs-test.o $(LIBUCW)
+obj/lib/hash-test: obj/lib/hash-test.o $(LIBUCW)
+obj/lib/str-test: obj/lib/str-test.o $(LIBUCW)
+obj/lib/asort-test: obj/lib/asort-test.o $(LIBUCW)
+obj/lib/redblack-test: obj/lib/redblack-test.o $(LIBUCW)
+obj/lib/binheap-test: obj/lib/binheap-test.o $(LIBUCW)
+obj/lib/lizard-test: obj/lib/lizard-test.o $(LIBUCW)
TESTS+=$(addprefix obj/lib/,regex.test unicode-utf8.test hash-test.test)
obj/lib/regex.test: obj/lib/regex-t
+++ /dev/null
-/*
- * Generating Objects from Buckets
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- * (c) 2004, Martin Mares <mj@ucw.cz>
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/unaligned.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-#include "lib/unicode.h"
-#include "lib/object.h"
-#include "lib/bucket.h"
-#include "lib/lizard.h"
-#include "lib/bbuf.h"
-#include "lib/ff-utf8.h"
-
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-
-#define RET_ERR(num) ({ errno = num; return -1; })
-
-struct buck2obj_buf
-{
- bb_t bb;
- struct lizard_buffer *lizard;
-};
-
-static uns get_attr_type;
-
-void
-get_attr_set_type(uns type)
-{
- if (type < BUCKET_TYPE_PLAIN || type > BUCKET_TYPE_V33_LIZARD)
- die("Unknown buckettype %x", type);
- get_attr_type = type;
-}
-
-int
-get_attr(byte **pos, byte *end, struct parsed_attr *attr)
-{
- byte *ptr = *pos;
- if (ptr >= end)
- return -1;
- if (get_attr_type < BUCKET_TYPE_V33)
- {
- if (get_attr_type == BUCKET_TYPE_PLAIN)
- {
- while (ptr < end && *ptr == '\n')
- ptr++;
- *pos = ptr;
- if (ptr >= end)
- return -1;
- }
- else if (*ptr == '\n')
- {
- *pos = ++ptr;
- attr->attr = 0;
- return 0;
- }
- attr->attr = *ptr++;
- attr->val = ptr;
- while (ptr < end && *ptr != '\n')
- ptr++;
- attr->len = ptr++ - attr->val;
- }
- else
- {
- uns len;
- GET_UTF8_32(ptr, len);
- if (!len--)
- {
- *pos = ptr;
- attr->attr = 0;
- return 0;
- }
- attr->attr = ptr[len];
- attr->val = ptr;
- attr->len = len;
- ptr += len+1;
- }
- if (ptr > end)
- die("Incomplete attribute %c", attr->attr);
- *pos = ptr;
- return attr->attr;
-}
-
-int
-bget_attr(struct fastbuf *b, struct parsed_attr *attr)
-{
- static bb_t buf;
- if (get_attr_type < BUCKET_TYPE_V33)
- {
- int c = bgetc(b);
- if (c < 0)
- return -1;
- if (get_attr_type == BUCKET_TYPE_PLAIN)
- {
- while (c == '\n')
- c = bgetc(b);
- if (c < 0)
- return -1;
- }
- else if (c == '\n')
- {
- attr->attr = 0;
- return 0;
- }
- attr->attr = c;
-
- byte *ptr, *end;
- uns len = bdirect_read_prepare(b, &ptr);
- end = ptr + len;
- attr->val = ptr;
- while (ptr < end && *ptr != '\n')
- ptr++;
- if (ptr < end)
- {
- bdirect_read_commit(b, ptr+1);
- attr->len = ptr - attr->val;
- return attr->attr;
- }
-
- len = 0;
- c = bgetc(b);
- while (c >= 0 && c != '\n')
- {
- bb_grow(&buf, len+1);
- buf.ptr[len++] = c;
- c = bgetc(b);
- }
- if (c < 0)
- die("Incomplete attribute %c", attr->attr);
- attr->val = buf.ptr;
- attr->len = len;
- }
- else
- {
- int len = bget_utf8_32(b);
- if (len < 0)
- return -1;
- if (!len)
- {
- attr->attr = 0;
- return 0;
- }
- attr->len = len-1;
-
- byte *ptr;
- int avail = bdirect_read_prepare(b, &ptr);
- if (avail >= len)
- {
- attr->val = ptr;
- attr->attr = ptr[len-1];
- bdirect_read_commit(b, ptr + len);
- return attr->attr;
- }
- bb_grow(&buf, --len);
- breadb(b, buf.ptr, len);
- attr->val = buf.ptr;
- attr->len = len;
- attr->attr = bgetc(b);
- if (attr->attr < 0)
- die("Incomplete attribute %c", attr->attr);
- }
- return attr->attr;
-}
-
-struct buck2obj_buf *
-buck2obj_alloc(void)
-{
- struct buck2obj_buf *buf = xmalloc(sizeof(struct buck2obj_buf));
- bb_init(&buf->bb);
- buf->lizard = lizard_alloc();
- return buf;
-}
-
-void
-buck2obj_free(struct buck2obj_buf *buf)
-{
- lizard_free(buf->lizard);
- bb_done(&buf->bb);
- xfree(buf);
-}
-
-static inline byte *
-decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite)
-{
- if (can_overwrite >= 2)
- while (ptr < end)
- {
- uns len;
- GET_UTF8_32(ptr, len);
- if (!len--)
- break;
- byte type = ptr[len];
-
- ptr[len] = 0;
- obj_add_attr_ref(o, type, ptr);
-
- ptr += len + 1;
- }
- else
- while (ptr < end)
- {
- uns len;
- GET_UTF8_32(ptr, len);
- if (!len--)
- break;
- byte type = ptr[len];
-
- byte *dup = mp_alloc_fast_noalign(o->pool, len+1);
- memcpy(dup, ptr, len);
- dup[len] = 0;
- obj_add_attr_ref(o, type, dup);
-
- ptr += len + 1;
- }
- return ptr;
-}
-
-int
-buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body)
-{
- if (buck_type <= BUCKET_TYPE_PLAIN)
- {
- if (body_start) // there is no header part
- *body_start = 0;
- // ignore empty lines and read until the end of the bucket
- sh_off_t end = btell(body) + buck_len;
- byte buf[MAX_ATTR_SIZE];
- while (btell(body) < end && bgets(body, buf, sizeof(buf)))
- if (buf[0])
- obj_add_attr(o_hdr, buf[0], buf+1);
- ASSERT(btell(body) == end);
- }
- else if (buck_type == BUCKET_TYPE_V30)
- {
- sh_off_t start = btell(body);
- sh_off_t end = start + buck_len;
- byte buf[MAX_ATTR_SIZE];
- while (btell(body) < end && bgets(body, buf, sizeof(buf)) && buf[0])
- obj_add_attr(o_hdr, buf[0], buf+1);
- if (body_start)
- *body_start = btell(body) - start;
- else
- {
- while (btell(body) < end && bgets(body, buf, sizeof(buf)))
- if (buf[0])
- obj_add_attr(o_body, buf[0], buf+1);
- ASSERT(btell(body) == end);
- }
- }
- else if (buck_type == BUCKET_TYPE_V33 || buck_type == BUCKET_TYPE_V33_LIZARD)
- {
- /* Avoid reading the whole bucket if only its header is needed. */
- if (body_start)
- {
- sh_off_t start = btell(body);
- sh_off_t end = start + buck_len;
- while (btell(body) < end)
- {
- uns len = bget_utf8_32(body);
- if (!len)
- break;
- byte *buf = mp_alloc_fast_noalign(o_hdr->pool, len);
- bread(body, buf, len);
- uns type = buf[--len];
- buf[len] = 0;
- obj_add_attr_ref(o_hdr, type, buf);
- }
- *body_start = btell(body) - start;
- return 0;
- }
-
- /* Read all the bucket into 1 buffer, 0-copy if possible. */
- byte *ptr, *end;
- uns len = bdirect_read_prepare(body, &ptr);
- uns copied = 0;
- if (len < buck_len
- || (body->can_overwrite_buffer < 2 && buck_type == BUCKET_TYPE_V33))
- {
- /* Copy if the original buffer is too small.
- * If it is write-protected, copy it also if it is uncompressed. */
- DBG("NO ZC: %d < %d, %d %08x", len, buck_len, body->can_overwrite_buffer, buck_type);
- bb_grow(&buf->bb, buck_len);
- len = bread(body, buf->bb.ptr, buck_len);
- ptr = buf->bb.ptr;
- copied = 1;
- }
- else
- DBG("ZC (%d >= %d, %d %08x)", len, buck_len, body->can_overwrite_buffer, buck_type);
- end = ptr + buck_len;
-
- ptr = decode_attributes(ptr, end, o_hdr, 0); // header
- if (buck_type == BUCKET_TYPE_V33_LIZARD) // decompression
- {
- if (ptr + 8 > end)
- {
- if (ptr == end) // truncated bucket
- goto commit;
- RET_ERR(EINVAL);
- }
- len = GET_U32(ptr);
- ptr += 4;
- uns adler = GET_U32(ptr);
- ptr += 4;
- byte *new_ptr = lizard_decompress_safe(ptr, buf->lizard, len);
- if (!new_ptr)
- return -1;
- if (adler32(new_ptr, len) != adler)
- RET_ERR(EINVAL);
- if (!copied)
- bdirect_read_commit(body, end);
- ptr = new_ptr;
- end = ptr + len;
- copied = 1;
- }
- ptr = decode_attributes(ptr, end, o_body, 2); // body
- if (ptr != end)
- RET_ERR(EINVAL);
- commit:
- if (!copied)
- bdirect_read_commit_modified(body, ptr);
- }
- else
- {
- bskip(body, buck_len);
- RET_ERR(EINVAL);
- }
- return 0;
-}
-
-struct odes *
-obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start)
-{
- struct odes *o = obj_new(pool);
- if (buck2obj_parse(buf, buck_type, buck_len, body, o, body_start, o) < 0)
- return NULL;
- else
- return o;
-}
-
-int
-obj_read(struct fastbuf *f, struct odes *o)
-{
- byte buf[MAX_ATTR_SIZE];
-
- while (bgets(f, buf, sizeof(buf)))
- {
- if (!buf[0])
- return 1;
- obj_add_attr(o, buf[0], buf+1);
- }
- return 0;
-}
+++ /dev/null
-/*
- * Sherlock Library -- Object Buckets
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/bucket.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-#include "lib/conf.h"
-
-#include <string.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/file.h>
-#include <alloca.h>
-
-static int obuck_fd;
-static struct obuck_header obuck_hdr, obuck_create_hdr;
-static sh_off_t bucket_find_pos;
-static struct fastbuf *obuck_write_fb;
-
-/*** Configuration ***/
-
-byte *obuck_name = "not/configured";
-static uns obuck_io_buflen = 65536;
-static int obuck_shake_buflen = 1048576;
-static uns obuck_shake_security;
-static uns obuck_slurp_buflen = 65536;
-
-static struct cfitem obuck_config[] = {
- { "Buckets", CT_SECTION, NULL },
- { "BucketFile", CT_STRING, &obuck_name },
- { "BufSize", CT_INT, &obuck_io_buflen },
- { "ShakeBufSize", CT_INT, &obuck_shake_buflen },
- { "ShakeSecurity", CT_INT, &obuck_shake_security },
- { "SlurpBufSize", CT_INT, &obuck_slurp_buflen },
- { NULL, CT_STOP, NULL }
-};
-
-static void CONSTRUCTOR obuck_init_config(void)
-{
- cf_register(obuck_config);
-}
-
-/*** Internal operations ***/
-
-static void
-obuck_broken(char *msg, sh_off_t pos)
-{
- die("Object pool corrupted: %s (pos=%Lx)", msg, (long long) pos);
-}
-
-/*
- * We need several types of locks:
- *
- * Read lock reading parts of bucket file
- * Write lock any write operations
- * Append lock appending to the end of the file
- * Scan lock reading parts which we are certain they exist
- *
- * Multiple read and scan locks can co-exist together.
- * Scan locks can co-exist with an append lock.
- * There can be at most one write/append lock at a time.
- *
- * These lock types map to a pair of normal read-write locks which
- * we represent as fcntl() locks on the first and second byte of the
- * bucket file. [We cannot use flock() since it happily permits
- * locking a shared fd (e.g., after fork()) multiple times at it also
- * doesn't offer multiple locks on a single file.]
- *
- * byte0 byte1
- * Read <read> <read>
- * Write <write> <write>
- * Append <write> -
- * Scan - <read>
- */
-
-static inline void
-obuck_do_lock(int type, int start, int len)
-{
- struct flock fl;
-
- fl.l_type = type;
- fl.l_whence = SEEK_SET;
- fl.l_start = start;
- fl.l_len = len;
- if (fcntl(obuck_fd, F_SETLKW, &fl) < 0)
- die("fcntl lock: %m");
-}
-
-inline void
-obuck_lock_read(void)
-{
- obuck_do_lock(F_RDLCK, 0, 2);
-}
-
-inline void
-obuck_lock_write(void)
-{
- obuck_do_lock(F_WRLCK, 0, 2);
-}
-
-static inline void
-obuck_lock_append(void)
-{
- obuck_do_lock(F_WRLCK, 0, 1);
-}
-
-static inline void
-obuck_lock_read_to_scan(void)
-{
- obuck_do_lock(F_UNLCK, 0, 1);
-}
-
-inline void
-obuck_unlock(void)
-{
- obuck_do_lock(F_UNLCK, 0, 2);
-}
-
-/*** FastIO emulation ***/
-
-struct fb_bucket {
- struct fastbuf fb;
- sh_off_t start_pos;
- uns bucket_size;
- byte buffer[0];
-};
-#define FB_BUCKET(f) ((struct fb_bucket *)(f)->is_fastbuf)
-
-static int obuck_fb_count;
-
-static void
-obuck_fb_close(struct fastbuf *f)
-{
- obuck_fb_count--;
- xfree(f);
-}
-
-/* We need to use pread/pwrite since we work on fd's shared between processes */
-
-static int
-obuck_fb_refill(struct fastbuf *f)
-{
- uns remains, bufsize, size, datasize;
-
- remains = FB_BUCKET(f)->bucket_size - (uns)f->pos;
- if (!remains)
- return 0;
- f->buffer = FB_BUCKET(f)->buffer; /* Could have been trimmed by bdirect_read_commit_modified() */
- bufsize = f->bufend - f->buffer;
- sh_off_t start = FB_BUCKET(f)->start_pos;
- sh_off_t pos = start + sizeof(struct obuck_header) + f->pos;
- if (remains <= bufsize)
- {
- datasize = remains;
- size = start + obuck_bucket_size(FB_BUCKET(f)->bucket_size) - pos;
- }
- else
- size = datasize = bufsize;
- int l = sh_pread(obuck_fd, f->buffer, size, pos);
- if (l < 0)
- die("Error reading bucket: %m");
- if ((unsigned) l != size)
- obuck_broken("Short read", FB_BUCKET(f)->start_pos);
- f->bptr = f->buffer;
- f->bstop = f->buffer + datasize;
- f->pos += datasize;
- if (datasize < size)
- {
- if (GET_U32(f->buffer + size - 4) != OBUCK_TRAILER)
- obuck_broken("Missing trailer", FB_BUCKET(f)->start_pos);
- }
- return datasize;
-}
-
-static void
-obuck_fb_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- ASSERT(whence == SEEK_SET || whence == SEEK_END);
- if (whence == SEEK_END)
- pos += FB_BUCKET(f)->bucket_size;
- ASSERT(pos >= 0 && pos <= FB_BUCKET(f)->bucket_size);
- f->pos = pos;
-}
-
-static void
-obuck_fb_spout(struct fastbuf *f)
-{
- int l = f->bptr - f->buffer;
- char *c = f->buffer;
-
- while (l)
- {
- int z = sh_pwrite(obuck_fd, c, l, FB_BUCKET(f)->start_pos + sizeof(struct obuck_header) + f->pos);
- if (z <= 0)
- die("Error writing bucket: %m");
- f->pos += z;
- l -= z;
- c += z;
- }
- f->bptr = f->buffer;
-}
-
-/*** Exported functions ***/
-
-void
-obuck_init(int writeable)
-{
- sh_off_t size;
-
- obuck_fd = sh_open(obuck_name, (writeable ? O_RDWR | O_CREAT : O_RDONLY), 0666);
- if (obuck_fd < 0)
- die("Unable to open bucket file %s: %m", obuck_name);
- obuck_lock_read();
- size = sh_seek(obuck_fd, 0, SEEK_END);
- if (size)
- {
- /* If the bucket pool is not empty, check consistency of its end */
- u32 check;
- if (sh_pread(obuck_fd, &check, 4, size-4) != 4 ||
- check != OBUCK_TRAILER)
- obuck_broken("Missing trailer of last object", size - 4);
- }
- obuck_unlock();
-}
-
-void
-obuck_cleanup(void)
-{
- close(obuck_fd);
- if (obuck_fb_count)
- log(L_ERROR, "Bug: Unbalanced bucket opens/closes: %d streams remain", obuck_fb_count);
- if (obuck_write_fb)
- log(L_ERROR, "Bug: Forgot to close bucket write stream");
-}
-
-void
-obuck_sync(void)
-{
- if (obuck_write_fb)
- bflush(obuck_write_fb);
- fsync(obuck_fd);
-}
-
-static void
-obuck_get(oid_t oid)
-{
- bucket_find_pos = obuck_get_pos(oid);
- if (sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos) != sizeof(obuck_hdr))
- obuck_broken("Short header read", bucket_find_pos);
- if (obuck_hdr.magic != OBUCK_MAGIC)
- obuck_broken("Missing magic number", bucket_find_pos);
- if (obuck_hdr.oid == OBUCK_OID_DELETED)
- obuck_broken("Access to deleted bucket", bucket_find_pos);
- if (obuck_hdr.oid != oid)
- obuck_broken("Invalid backlink", bucket_find_pos);
-}
-
-void
-obuck_find_by_oid(struct obuck_header *hdrp)
-{
- oid_t oid = hdrp->oid;
-
- ASSERT(oid < OBUCK_OID_FIRST_SPECIAL);
- obuck_lock_read();
- obuck_get(oid);
- obuck_unlock();
- memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr));
-}
-
-int
-obuck_find_first(struct obuck_header *hdrp, int full)
-{
- bucket_find_pos = 0;
- obuck_hdr.magic = 0;
- return obuck_find_next(hdrp, full);
-}
-
-int
-obuck_find_next(struct obuck_header *hdrp, int full)
-{
- int c;
-
- for(;;)
- {
- if (obuck_hdr.magic)
- bucket_find_pos += obuck_bucket_size(obuck_hdr.length);
- obuck_lock_read();
- c = sh_pread(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos);
- obuck_unlock();
- if (!c)
- return 0;
- if (c != sizeof(obuck_hdr))
- obuck_broken("Short header read", bucket_find_pos);
- if (obuck_hdr.magic != OBUCK_MAGIC)
- obuck_broken("Missing magic number", bucket_find_pos);
- if (obuck_hdr.oid != OBUCK_OID_DELETED || full)
- {
- memcpy(hdrp, &obuck_hdr, sizeof(obuck_hdr));
- return 1;
- }
- }
-}
-
-struct fastbuf *
-obuck_fetch(void)
-{
- struct fastbuf *b;
- uns official_buflen = ALIGN(MIN(obuck_hdr.length, obuck_io_buflen), OBUCK_ALIGN);
- uns real_buflen = official_buflen + OBUCK_ALIGN;
-
- b = xmalloc(sizeof(struct fb_bucket) + real_buflen);
- b->buffer = b->bptr = b->bstop = FB_BUCKET(b)->buffer;
- b->bufend = b->buffer + official_buflen;
- b->name = "bucket-read";
- b->pos = 0;
- b->refill = obuck_fb_refill;
- b->spout = NULL;
- b->seek = obuck_fb_seek;
- b->close = obuck_fb_close;
- b->config = NULL;
- b->can_overwrite_buffer = 2;
- FB_BUCKET(b)->start_pos = bucket_find_pos;
- FB_BUCKET(b)->bucket_size = obuck_hdr.length;
- obuck_fb_count++;
- return b;
-}
-
-oid_t
-obuck_predict_last_oid(void)
-{
- sh_off_t size = sh_seek(obuck_fd, 0, SEEK_END);
- return (oid_t)(size >> OBUCK_SHIFT);
-}
-
-struct fastbuf *
-obuck_create(u32 type)
-{
- ASSERT(!obuck_write_fb);
-
- obuck_lock_append();
- sh_off_t start = sh_seek(obuck_fd, 0, SEEK_END);
- if (start & (OBUCK_ALIGN - 1))
- obuck_broken("Misaligned file", start);
- obuck_create_hdr.magic = OBUCK_INCOMPLETE_MAGIC;
- obuck_create_hdr.oid = start >> OBUCK_SHIFT;
- obuck_create_hdr.length = 0;
- obuck_create_hdr.type = type;
-
- struct fastbuf *b = xmalloc(sizeof(struct fb_bucket) + obuck_io_buflen);
- obuck_write_fb = b;
- b->buffer = FB_BUCKET(b)->buffer;
- b->bptr = b->bstop = b->buffer;
- b->bufend = b->buffer + obuck_io_buflen;
- b->pos = -(int)sizeof(obuck_create_hdr);
- b->name = "bucket-write";
- b->refill = NULL;
- b->spout = obuck_fb_spout;
- b->seek = NULL;
- b->close = NULL;
- b->config = NULL;
- b->can_overwrite_buffer = 0;
- FB_BUCKET(b)->start_pos = start;
- FB_BUCKET(b)->bucket_size = 0;
- bwrite(b, &obuck_create_hdr, sizeof(obuck_create_hdr));
-
- return b;
-}
-
-void
-obuck_create_end(struct fastbuf *b, struct obuck_header *hdrp)
-{
- ASSERT(b == obuck_write_fb);
- obuck_write_fb = NULL;
-
- obuck_create_hdr.magic = OBUCK_MAGIC;
- obuck_create_hdr.length = btell(b);
- int pad = (OBUCK_ALIGN - sizeof(obuck_create_hdr) - obuck_create_hdr.length - 4) & (OBUCK_ALIGN - 1);
- while (pad--)
- bputc(b, 0);
- bputl(b, OBUCK_TRAILER);
- bflush(b);
- ASSERT(!((FB_BUCKET(b)->start_pos + sizeof(obuck_create_hdr) + b->pos) & (OBUCK_ALIGN - 1)));
- if (sh_pwrite(obuck_fd, &obuck_create_hdr, sizeof(obuck_create_hdr), FB_BUCKET(b)->start_pos) != sizeof(obuck_create_hdr))
- die("Bucket header update failed: %m");
- obuck_unlock();
- memcpy(hdrp, &obuck_create_hdr, sizeof(obuck_create_hdr));
- xfree(b);
-}
-
-void
-obuck_delete(oid_t oid)
-{
- obuck_lock_write();
- obuck_get(oid);
- obuck_hdr.oid = OBUCK_OID_DELETED;
- sh_pwrite(obuck_fd, &obuck_hdr, sizeof(obuck_hdr), bucket_find_pos);
- obuck_unlock();
-}
-
-/*** Fast reading of the whole pool ***/
-
-static struct fastbuf *obuck_rpf;
-static uns slurp_remains;
-static sh_off_t slurp_start, slurp_current, slurp_end;
-
-static int
-obuck_slurp_refill(struct fastbuf *f)
-{
- if (!slurp_remains)
- return 0;
- uns l = bdirect_read_prepare(obuck_rpf, &f->buffer);
- if (!l)
- obuck_broken("Incomplete object", slurp_start);
- l = MIN(l, slurp_remains);
- /* XXX: This probably should be bdirect_read_commit_modified() in some cases,
- * but it doesn't hurt since we aren't going to seek.
- */
- bdirect_read_commit(obuck_rpf, f->buffer + l);
- slurp_remains -= l;
- f->bptr = f->buffer;
- f->bufend = f->bstop = f->buffer + l;
- f->pos += l;
- return 1;
-}
-
-void
-obuck_slurp_end(void)
-{
- if (obuck_rpf)
- {
- bclose(obuck_rpf);
- obuck_rpf = NULL;
- obuck_unlock();
- }
-}
-
-struct fastbuf *
-obuck_slurp_pool(struct obuck_header *hdrp, oid_t next_oid)
-{
- static struct fastbuf limiter;
- uns l;
-
- do
- {
- if (!obuck_rpf)
- {
- obuck_lock_read();
- obuck_rpf = bopen(obuck_name, O_RDONLY, obuck_slurp_buflen);
- slurp_end = bfilesize(obuck_rpf);
- obuck_lock_read_to_scan();
- }
- else
- {
- bsetpos(obuck_rpf, slurp_current - 4);
- if (bgetl(obuck_rpf) != OBUCK_TRAILER)
- obuck_broken("Missing trailer", slurp_start);
- }
- if (next_oid == OBUCK_OID_ANY)
- slurp_start = btell(obuck_rpf);
- else
- {
- slurp_start = obuck_get_pos(next_oid);
- bsetpos(obuck_rpf, slurp_start);
- }
- if (slurp_start < slurp_end)
- l = bread(obuck_rpf, hdrp, sizeof(struct obuck_header));
- else
- {
- obuck_slurp_end();
- return NULL;
- }
- if (l != sizeof(struct obuck_header))
- obuck_broken("Short header read", slurp_start);
- if (hdrp->magic != OBUCK_MAGIC)
- obuck_broken("Missing magic number", slurp_start);
- slurp_current = slurp_start + obuck_bucket_size(hdrp->length);
- }
- while (hdrp->oid == OBUCK_OID_DELETED);
- if (obuck_get_pos(hdrp->oid) != slurp_start)
- obuck_broken("Invalid backlink", slurp_start);
- slurp_remains = hdrp->length;
- limiter.bptr = limiter.bstop = limiter.buffer = limiter.bufend = NULL;
- limiter.name = "Bucket";
- limiter.pos = 0;
- limiter.refill = obuck_slurp_refill;
- limiter.can_overwrite_buffer = obuck_rpf->can_overwrite_buffer;
- return &limiter;
-}
-
-/*** Shakedown ***/
-
-static inline void
-shake_write(void *addr, int len, sh_off_t pos)
-{
- int l = sh_pwrite(obuck_fd, addr, len, pos);
- if (l != len)
- {
- if (l < 0)
- die("obuck_shakedown write error: %m");
- else
- die("obuck_shakedown write error: disk full");
- }
-}
-
-static inline void
-shake_sync(void)
-{
- if (obuck_shake_security > 1)
- fdatasync(obuck_fd);
-}
-
-static void
-shake_write_backup(sh_off_t bpos, byte *norm_buf, int norm_size, byte *fragment, int frag_size, sh_off_t frag_pos, int more_size)
-{
- struct obuck_header *bhdr;
- int boff = 0;
- int l;
- oid_t old_oid;
-
- /* First of all, the "normal" part -- everything that will be written in this pass */
- DBG("Backing up first round of changes at position %Lx + %x", (long long) bpos, norm_size);
- while (boff < norm_size)
- {
- /* This needn't be optimized for speed. */
- bhdr = (struct obuck_header *) (norm_buf + boff);
- ASSERT(bhdr->magic == OBUCK_MAGIC);
- l = obuck_bucket_size(bhdr->length);
- old_oid = bhdr->oid;
- bhdr->oid = bpos >> OBUCK_SHIFT;
- shake_write(bhdr, l, bpos);
- bhdr->oid = old_oid;
- boff += l;
- bpos += l;
- }
-
- /* If we have an incomplete bucket at the end of the buffer, we must copy it as well. */
- if (more_size)
- {
- DBG("Backing up fragment of size %x and %x more", frag_size, more_size);
-
- /* First the part we already have in the buffer */
- bhdr = (struct obuck_header *) fragment;
- ASSERT(bhdr->magic == OBUCK_MAGIC);
- old_oid = bhdr->oid;
- bhdr->oid = bpos >> OBUCK_SHIFT;
- shake_write(bhdr, frag_size, bpos);
- bhdr->oid = old_oid;
- bpos += frag_size;
-
- /* And then the rest, using a small 64K buffer */
- byte *auxbuf = alloca(65536);
- l = 0;
- while (l < more_size)
- {
- int j = MIN(more_size-l, 65536);
- if (sh_pread(obuck_fd, auxbuf, j, frag_pos + frag_size + l) != j)
- die("obuck_shakedown read error: %m");
- shake_write(auxbuf, j, bpos);
- bpos += j;
- l += j;
- }
- }
-}
-
-static void
-shake_erase(sh_off_t start, sh_off_t end)
-{
- if (start > end)
- die("shake_erase called with negative length, that's a bug");
- ASSERT(!(start & (OBUCK_ALIGN-1)) && !(end & (OBUCK_ALIGN-1)));
- while (start < end)
- {
- u32 check = OBUCK_TRAILER;
- obuck_hdr.magic = OBUCK_MAGIC;
- obuck_hdr.oid = OBUCK_OID_DELETED;
- uns len = MIN(0x40000000, end-start);
- obuck_hdr.length = len - sizeof(obuck_hdr) - 4;
- DBG("Erasing %08x bytes at %Lx", len, (long long) start);
- shake_write(&obuck_hdr, sizeof(obuck_hdr), start);
- start += len;
- shake_write(&check, 4, start-4);
- }
-}
-
-void
-obuck_shakedown(int (*kibitz)(struct obuck_header *old, oid_t new, byte *buck))
-{
- byte *buf; /* Shakedown buffer and its size */
- int buflen = ALIGN(obuck_shake_buflen, OBUCK_ALIGN);
- byte *msg; /* Error message we will print */
- sh_off_t rstart, wstart; /* Original and new position of buffer start */
- sh_off_t r_bucket_start, w_bucket_start; /* Original and new position of the current bucket */
- int roff, woff; /* Orig/new position of the current bucket relative to buffer start */
- int rsize; /* Number of original bytes in the buffer */
- int l; /* Raw size of the current bucket */
- int changed = 0; /* "Something has been altered" flag */
- int wrote_anything = 0; /* We already did a write to the bucket file */
- struct obuck_header *rhdr, *whdr; /* Original and new address of header of the current bucket */
- sh_off_t r_file_size; /* Original size of the bucket file */
- int more; /* How much does the last bucket overlap the buffer */
-
- buf = xmalloc(buflen);
- rstart = wstart = 0;
- roff = woff = rsize = 0;
-
- /* We need to be the only accessor, all the object ID's are becoming invalid */
- obuck_lock_write();
- r_file_size = sh_seek(obuck_fd, 0, SEEK_END);
- ASSERT(!(r_file_size & (OBUCK_ALIGN - 1)));
- if (r_file_size >= (0x100000000 << OBUCK_SHIFT) - buflen)
- die("Bucket file is too large for safe shakedown. Shaking down with Bucket.ShakeSecurity=0 will still work.");
-
- DBG("Starting shakedown. Buffer size is %d, original length %Lx", buflen, (long long) r_file_size);
-
- for(;;)
- {
- r_bucket_start = rstart + roff;
- w_bucket_start = wstart + woff;
- rhdr = (struct obuck_header *)(buf + roff);
- whdr = (struct obuck_header *)(buf + woff);
- if (roff == rsize)
- {
- more = 0;
- goto next;
- }
- if (rhdr->magic != OBUCK_MAGIC ||
- rhdr->oid != OBUCK_OID_DELETED && rhdr->oid != (oid_t)(r_bucket_start >> OBUCK_SHIFT))
- {
- msg = "header mismatch";
- goto broken;
- }
- l = obuck_bucket_size(rhdr->length);
- if (l > buflen)
- {
- if (rhdr->oid != OBUCK_OID_DELETED)
- {
- msg = "bucket longer than ShakeBufSize";
- goto broken;
- }
- /* Empty buckets are allowed to be large, but we need to handle them extra */
- DBG("Tricking around an extra-large empty bucket at %Lx + %x", (long long)r_bucket_start, l);
- rsize = roff + l;
- }
- else
- {
- if (rsize - roff < l)
- {
- more = l - (rsize - roff);
- goto next;
- }
- if (GET_U32((byte *)rhdr + l - 4) != OBUCK_TRAILER)
- {
- msg = "missing trailer";
- goto broken;
- }
- }
- if (rhdr->oid != OBUCK_OID_DELETED)
- {
- int status = kibitz(rhdr, w_bucket_start >> OBUCK_SHIFT, (byte *)(rhdr+1));
- if (status)
- {
- int lnew = l;
- if (status > 1)
- {
- /* Changed! Reconstruct the trailer. */
- lnew = obuck_bucket_size(rhdr->length);
- ASSERT(lnew <= l);
- PUT_U32((byte *)rhdr + lnew - 4, OBUCK_TRAILER);
- changed = 1;
- }
- whdr = (struct obuck_header *)(buf+woff);
- if (rhdr != whdr)
- memmove(whdr, rhdr, lnew);
- whdr->oid = w_bucket_start >> OBUCK_SHIFT;
- woff += lnew;
- }
- else
- changed = 1;
- }
- else
- {
- kibitz(rhdr, OBUCK_OID_DELETED, NULL);
- changed = 1;
- }
- roff += l;
- continue;
-
- next:
- if (changed)
- {
- /* Write the new contents of the bucket file */
- if (!wrote_anything)
- {
- if (obuck_shake_security)
- {
- /* But first write a backup at the end of the file to ensure nothing can be lost. */
- shake_write_backup(r_file_size, buf, woff, buf+roff, rsize-roff, rstart+roff, more);
- shake_sync();
- }
- wrote_anything = 1;
- }
- if (woff)
- {
- DBG("Write %Lx %x", wstart, woff);
- shake_write(buf, woff, wstart);
- shake_sync();
- }
- }
- else
- ASSERT(wstart == rstart);
-
- /* In any case, update the write position */
- wstart += woff;
- woff = 0;
-
- /* Skip what's been read and if there is any fragment at the end of the buffer, move it to the start */
- rstart += roff;
- if (more)
- {
- memmove(buf, buf+roff, rsize-roff);
- rsize = rsize-roff;
- }
- else
- rsize = 0;
-
- /* And refill the buffer */
- r_bucket_start = rstart+rsize; /* Also needed for error messages */
- l = sh_pread(obuck_fd, buf+rsize, MIN(buflen-rsize, r_file_size - r_bucket_start), r_bucket_start);
- DBG("Read %Lx %x (%x inherited)", (long long)r_bucket_start, l, rsize);
- if (l < 0)
- die("obuck_shakedown read error: %m");
- if (!l)
- {
- if (!more)
- break;
- msg = "unexpected EOF";
- goto broken;
- }
- if (l & (OBUCK_ALIGN-1))
- {
- msg = "garbage at the end of file";
- goto broken;
- }
- rsize += l;
- roff = 0;
- }
-
- DBG("Finished at position %Lx", (long long) wstart);
- sh_ftruncate(obuck_fd, wstart);
- shake_sync();
-
- obuck_unlock();
- xfree(buf);
- return;
-
- broken:
- log(L_ERROR, "Error during object pool shakedown: %s (pos=%Ld, id=%x), gathering debris",
- msg, (long long) r_bucket_start, (uns)(r_bucket_start >> OBUCK_SHIFT));
- /*
- * We can attempt to clean up the bucket file by erasing everything between the last
- * byte written and the next byte to be read. If the secure mode is switched on, we can
- * guarantee that no data are lost, only some might be duplicated.
- */
- shake_erase(wstart, rstart);
- die("Fatal error during object pool shakedown");
-}
-
-/*** Testing ***/
-
-#ifdef TEST
-
-#define COUNT 5000
-#define MAXLEN 10000
-#define KILLPERC 13
-#define LEN(i) ((259309*(i))%MAXLEN)
-
-static int test_kibitz(struct obuck_header *h, oid_t new, byte *buck)
-{
- return 1;
-}
-
-int main(int argc, char **argv)
-{
- int ids[COUNT];
- unsigned int i, j, cnt;
- struct obuck_header h;
- struct fastbuf *b;
-
- log_init(NULL);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
- optind < argc)
- {
- fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
- exit(1);
- }
-
- unlink(obuck_name);
- obuck_init(1);
- for(j=0; j<COUNT; j++)
- {
- b = obuck_create(BUCKET_TYPE_PLAIN);
- for(i=0; i<LEN(j); i++)
- bputc(b, (i+j) % 256);
- obuck_create_end(b, &h);
- printf("Writing %08x %d\n", h.oid, h.length);
- ids[j] = h.oid;
- }
- for(j=0; j<COUNT; j++)
- if (j % 100 < KILLPERC)
- {
- printf("Deleting %08x\n", ids[j]);
- obuck_delete(ids[j]);
- }
- cnt = 0;
- for(j=0; j<COUNT; j++)
- if (j % 100 >= KILLPERC)
- {
- cnt++;
- h.oid = ids[j];
- obuck_find_by_oid(&h);
- b = obuck_fetch();
- printf("Reading %08x %d\n", h.oid, h.length);
- if (h.length != LEN(j))
- die("Invalid length");
- for(i=0; i<h.length; i++)
- if ((unsigned) bgetc(b) != (i+j) % 256)
- die("Contents mismatch");
- if (bgetc(b) != EOF)
- die("EOF mismatch");
- bclose(b);
- }
- obuck_shakedown(test_kibitz);
- if (obuck_find_first(&h, 0))
- do
- {
- printf("<<< %08x\t%d\n", h.oid, h.length);
- cnt--;
- }
- while (obuck_find_next(&h, 0));
- if (cnt)
- die("Walk mismatch");
- obuck_cleanup();
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * Sherlock Library -- Object Buckets
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _SHERLOCK_BUCKET_H
-#define _SHERLOCK_BUCKET_H
-
-/*
- * Format: The object pool is merely a sequence of object buckets.
- * Each bucket starts with struct obuck_header and it's padded
- * by zeros to a multiple of OBUCK_ALIGN bytes.
- *
- * Locking: Each operation on the pool is protected by a flock.
- *
- * The buckets emulate fastbuf streams. Read streams act as normal files,
- * but there can be only one write stream which is non-seekable and you
- * also shouldn't open new read streams when writing.
- *
- * fork()'ing if you don't have any bucket open is safe.
- */
-
-extern byte *obuck_name; /* Internal, for use by buckettool only! */
-
-#define OBUCK_SHIFT 7
-#define OBUCK_ALIGN (1<<OBUCK_SHIFT)
-#define OBUCK_MAGIC 0xdeadf00d
-#define OBUCK_INCOMPLETE_MAGIC 0xdeadfeel
-#define OBUCK_TRAILER 0xfeedcafe
-#define OBUCK_OID_DELETED (~(oid_t)0)
-#define OBUCK_OID_ANY (~(oid_t)0)
-#define OBUCK_OID_FIRST_SPECIAL (~(oid_t)0xffff)
-
-struct obuck_header {
- u32 magic; /* OBUCK_MAGIC should dwell here */
- oid_t oid; /* ID of this object or OBUCK_OID_DELETED */
- u32 length; /* Length of data in the bucket */
- u32 type; /* Data type */
- /* Bucket data continue here */
-};
-
-enum bucket_type {
- BUCKET_TYPE_COMPAT = 0x7fffffff, /* and less -- buckets created by older versions of Sherlock */
- BUCKET_TYPE_PLAIN = 0x80000000, /* plain textual buckets */
- BUCKET_TYPE_V30 = 0x80000001, /* v3.0 uncompressed buckets */
- BUCKET_TYPE_V33 = 0x80000002, /* v3.3 uncompressed buckets */
- BUCKET_TYPE_V33_LIZARD = 0x80000003 /* v3.3 buckets compressed by lizard */
-};
-
-struct fastbuf;
-
-void obuck_init(int writeable); /* Initialize the bucket module */
-void obuck_cleanup(void); /* Clean up the bucket module */
-void obuck_sync(void); /* Flush all buffers to disk */
-void obuck_lock_read(void); /* Explicit locking to make sure other threads don't touch buckets now */
-void obuck_lock_write(void);
-void obuck_unlock(void);
-oid_t obuck_predict_last_oid(void); /* Get OID corresponding to the next to be created bucket (i.e., bucket file size estimate) */
-
-/* Searching for buckets */
-void obuck_find_by_oid(struct obuck_header *hdrp);
-int obuck_find_first(struct obuck_header *hdrp, int full);
-int obuck_find_next(struct obuck_header *hdrp, int full);
-
-/* Reading current bucket */
-struct fastbuf *obuck_fetch(void);
-
-/* Creating buckets */
-struct fastbuf *obuck_create(u32 type);
-void obuck_create_end(struct fastbuf *b, struct obuck_header *hdrp);
-
-/* Deleting buckets */
-void obuck_delete(oid_t oid);
-
-/* Fast reading of the whole pool */
-struct fastbuf *obuck_slurp_pool(struct obuck_header *hdrp, oid_t next_oid);
-void obuck_slurp_end(void);
-
-/* Convert bucket ID to file position (for size limitations etc.) */
-
-static inline sh_off_t obuck_get_pos(oid_t oid)
-{
- return ((sh_off_t) oid) << OBUCK_SHIFT;
-}
-
-/* Calculate size of bucket which contains given amount of data */
-
-static inline uns obuck_bucket_size(uns len)
-{
- return ALIGN(sizeof(struct obuck_header) + len + 4, OBUCK_ALIGN);
-}
-
-/* Shaking down bucket file */
-void obuck_shakedown(int (*kibitz)(struct obuck_header *old, oid_t new, byte *buck));
-
-#endif
+++ /dev/null
-/*
- * Sherlock Library -- Bucket Manipulation Tool
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/bucket.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-#include "lib/conf.h"
-#include "lib/mempool.h"
-#include "lib/object.h"
-#include "lib/lizard.h"
-#include "lib/bbuf.h"
-#include "lib/ff-utf8.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-static int verbose;
-static struct mempool *pool;
-static struct buck2obj_buf *buck_buf;
-
-static void
-help(void)
-{
- fprintf(stderr, "\
-Usage: buckettool [<options>] <command>\n\
-\n\
-Options:\n"
-CF_USAGE
-"\nCommands:\n\
--l\t\tlist all buckets\n\
--L\t\tlist all buckets including deleted ones\n\
--d <obj>\tdelete bucket\n\
--x <obj>\textract bucket\n\
--i[<type>]\tinsert buckets separated by blank lines\n\
--c\t\tconcatenate and dump all buckets\n\
--f\t\taudit bucket file structure\n\
--F\t\taudit and fix bucket file structure\n\
--q\t\tquick check of bucket file consistency\n\
--r\t\tdo not parse V33 buckets, but print the raw content\n\
--s\t\tshake down bucket file (without updating other structures!!!)\n\
--v\t\tbe verbose\n\
-");
- exit(1);
-}
-
-static oid_t
-parse_id(char *c)
-{
- char *e;
- oid_t o = strtoul(c, &e, 16);
- if (e && *e)
- die("Invalid object ID: %s", c);
- return o;
-}
-
-static void
-list(int full)
-{
- struct obuck_header h;
-
- obuck_init(0);
- if (obuck_find_first(&h, full))
- do
- {
- if (h.oid == OBUCK_OID_DELETED)
- printf("DELETED %6d\n", h.length);
- else
- printf("%08x %6d %08x\n", h.oid, h.length, h.type);
- }
- while (obuck_find_next(&h, full));
- obuck_cleanup();
-}
-
-static void
-delete(char *id)
-{
- oid_t oid = parse_id(id);
- obuck_init(1);
- obuck_delete(oid);
- obuck_cleanup();
-}
-
-static inline void
-dump_oattrs(struct fastbuf *out, struct oattr *oa)
-{
- for (; oa; oa = oa->next)
- for (struct oattr *a=oa; a; a = a->same)
- bprintf(out, "%c%s\n", a->attr, a->val);
-}
-
-static void
-dump_parsed_bucket(struct fastbuf *out, struct obuck_header *h, struct fastbuf *b)
-{
- struct odes *o_hdr, *o_body;
- mp_flush(pool);
- o_hdr = obj_new(pool);
- o_body = obj_new(pool);
- if (buck2obj_parse(buck_buf, h->type, h->length, b, o_hdr, NULL, o_body) < 0)
- bprintf(out, ".Cannot parse bucket %x of type %x and length %d: %m\n", h->oid, h->type, h->length);
- else
- {
- dump_oattrs(out, o_hdr->attrs);
- bputc(out, '\n');
- dump_oattrs(out, o_body->attrs);
- }
-}
-
-static void
-extract(char *id)
-{
- struct fastbuf *b, *out;
- struct obuck_header h;
-
- h.oid = parse_id(id);
- obuck_init(0);
- obuck_find_by_oid(&h);
- out = bfdopen_shared(1, 65536);
- if (verbose)
- bprintf(out, "### %08x %6d %08x\n", h.oid, h.length, h.type);
- b = obuck_fetch();
- if (h.type < BUCKET_TYPE_V33 || !buck_buf)
- bbcopy_slow(b, out, ~0U);
- else
- dump_parsed_bucket(out, &h, b);
- bclose(b);
- bclose(out);
- obuck_cleanup();
-}
-
-static void
-insert(byte *arg)
-{
- struct fastbuf *b, *in;
- byte buf[4096];
- struct obuck_header h;
- byte *e;
- u32 type;
- bb_t lizard_buf, compressed_buf;
-
- bb_init(&lizard_buf);
- bb_init(&compressed_buf);
- if (!arg)
- type = BUCKET_TYPE_PLAIN;
- else if (sscanf(arg, "%x", &type) != 1)
- die("Type `%s' is not a hexadecimal number");
- if (type < 10)
- type += BUCKET_TYPE_PLAIN;
- put_attr_set_type(type);
-
- in = bfdopen_shared(0, 4096);
- obuck_init(1);
- do
- {
- uns lizard_filled = 0;
- uns in_body = 0;
- b = NULL;
- while ((e = bgets(in, buf, sizeof(buf))))
- {
- if (!buf[0])
- {
- if (in_body || type < BUCKET_TYPE_V30)
- break;
- in_body = 1;
- }
- if (!b)
- b = obuck_create(type);
- if (in_body == 1)
- {
- bputc(b, 0);
- in_body = 2;
- }
- else if (type <= BUCKET_TYPE_V33 || !in_body)
- {
- bput_attr(b, buf[0], buf+1, e-buf-1);
- }
- else
- {
- ASSERT(BUCKET_TYPE_V33_LIZARD);
- uns want_len = lizard_filled + (e-buf) + 6 + LIZARD_NEEDS_CHARS; // +6 is the maximum UTF-8 length
- bb_grow(&lizard_buf, want_len);
- byte *ptr = lizard_buf.ptr + lizard_filled;
- ptr = put_attr(ptr, buf[0], buf+1, e-buf-1);
- lizard_filled = ptr - lizard_buf.ptr;
- }
- }
- if (in_body && type == BUCKET_TYPE_V33_LIZARD)
- {
- bputl(b, lizard_filled
-#if 0 //TEST error resilience: write wrong length
- +1
-#endif
- );
- bputl(b, adler32(lizard_buf.ptr, lizard_filled)
-#if 0 //TEST error resilience: write wrong checksum
- +1
-#endif
- );
- uns want_len = lizard_filled * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD;
- bb_grow(&compressed_buf, want_len);
- want_len = lizard_compress(lizard_buf.ptr, lizard_filled, compressed_buf.ptr);
-#if 0 //TEST error resilience: tamper the compressed data by removing EOF
- compressed_buf[want_len-1] = 1;
-#endif
- bwrite(b, compressed_buf.ptr, want_len);
- }
- if (b)
- {
- obuck_create_end(b, &h);
- printf("%08x %d %08x\n", h.oid, h.length, h.type);
- }
- }
- while (e);
- bb_done(&lizard_buf);
- bb_done(&compressed_buf);
- obuck_cleanup();
- bclose(in);
-}
-
-static void
-cat(void)
-{
- struct obuck_header h;
- struct fastbuf *b, *out;
- byte buf[1024];
-
- obuck_init(0);
- out = bfdopen_shared(1, 65536);
- while (b = obuck_slurp_pool(&h, OBUCK_OID_ANY))
- {
- bprintf(out, "### %08x %6d %08x\n", h.oid, h.length, h.type);
- if (h.type < BUCKET_TYPE_V33 || !buck_buf)
- {
- int lf = 1, l;
- while ((l = bread(b, buf, sizeof(buf))))
- {
- bwrite(out, buf, l);
- lf = (buf[l-1] == '\n');
- }
- if (!lf)
- bprintf(out, "\n# <missing EOL>\n");
- }
- else
- dump_parsed_bucket(out, &h, b);
- bputc(out, '\n');
- }
- bclose(out);
- obuck_cleanup();
-}
-
-static void
-fsck(int fix)
-{
- int fd, i;
- struct obuck_header h, nh;
- sh_off_t pos = 0;
- sh_off_t end;
- oid_t oid;
- u32 chk;
- int errors = 0;
- int fatal_errors = 0;
-
- fd = sh_open(obuck_name, O_RDWR);
- if (fd < 0)
- die("Unable to open the bucket file %s: %m", obuck_name);
- for(;;)
- {
- oid = pos >> OBUCK_SHIFT;
- i = sh_pread(fd, &h, sizeof(h), pos);
- if (!i)
- break;
- if (i != sizeof(h))
- printf("%08x incomplete header\n", oid);
- else if (h.magic == OBUCK_INCOMPLETE_MAGIC)
- printf("%08x incomplete file\n", oid);
- else if (h.magic != OBUCK_MAGIC)
- printf("%08x invalid header magic\n", oid);
- else if (h.oid != oid && h.oid != OBUCK_OID_DELETED)
- printf("%08x invalid header backlink\n", oid);
- else
- {
- end = (pos + sizeof(h) + h.length + 4 + OBUCK_ALIGN - 1) & ~(sh_off_t)(OBUCK_ALIGN - 1);
- if (sh_pread(fd, &chk, 4, end-4) != 4)
- printf("%08x missing trailer\n", oid);
- else if (chk != OBUCK_TRAILER)
- printf("%08x mismatched trailer\n", oid);
- else
- {
- /* OK */
- pos = end;
- continue;
- }
- }
- errors++;
- end = pos;
- do
- {
- if (pos - end > 0x10000000)
- {
- printf("*** skipped for too long, giving up\n");
- fatal_errors++;
- goto finish;
- }
- end += OBUCK_ALIGN;
- if (sh_pread(fd, &nh, sizeof(nh), end) != sizeof(nh))
- {
- printf("*** unable to find next header\n");
- if (fix)
- {
- printf("*** truncating file\n");
- sh_ftruncate(fd, pos);
- }
- else
- printf("*** would truncate the file here\n");
- goto finish;
- }
- }
- while (nh.magic != OBUCK_MAGIC ||
- (nh.oid != (oid_t)(end >> OBUCK_SHIFT) && nh.oid != OBUCK_OID_DELETED));
- printf("*** match at oid %08x\n", (uns)(end >> OBUCK_SHIFT));
- if (fix)
- {
- h.magic = OBUCK_MAGIC;
- h.oid = OBUCK_OID_DELETED;
- h.length = end - pos - sizeof(h) - 4;
- sh_pwrite(fd, &h, sizeof(h), pos);
- chk = OBUCK_TRAILER;
- sh_pwrite(fd, &chk, 4, end-4);
- printf("*** replaced the invalid chunk by a DELETED bucket of size %d\n", (uns)(end - pos));
- }
- else
- printf("*** would mark %d bytes as DELETED\n", (uns)(end - pos));
- pos = end;
- }
- finish:
- close(fd);
- if (!fix && errors || fatal_errors)
- exit(1);
-}
-
-static int
-shake_kibitz(struct obuck_header *old, oid_t new, byte *buck UNUSED)
-{
- if (verbose)
- {
- printf("%08x -> ", old->oid);
- if (new == OBUCK_OID_DELETED)
- puts("DELETED");
- else
- printf("%08x\n", new);
- }
- return 1;
-}
-
-static void
-shake(void)
-{
- obuck_init(1);
- obuck_shakedown(shake_kibitz);
- obuck_cleanup();
-}
-
-static void
-quickcheck(void)
-{
- obuck_init(1);
- obuck_cleanup();
-}
-
-int
-main(int argc, char **argv)
-{
- int i, op;
- char *arg = NULL;
- uns raw = 0;
-
- log_init(NULL);
- op = 0;
- while ((i = cf_getopt(argc, argv, CF_SHORT_OPTS "lLd:x:i::cfFqrsv", CF_NO_LONG_OPTS, NULL)) != -1)
- if (i == '?' || op)
- help();
- else if (i == 'v')
- verbose++;
- else if (i == 'r')
- raw++;
- else
- {
- op = i;
- arg = optarg;
- }
- if (optind < argc)
- help();
-
- if (!raw)
- {
- pool = mp_new(1<<14);
- buck_buf = buck2obj_alloc();
- }
- switch (op)
- {
- case 'l':
- list(0);
- break;
- case 'L':
- list(1);
- break;
- case 'd':
- delete(arg);
- break;
- case 'x':
- extract(arg);
- break;
- case 'i':
- insert(arg);
- break;
- case 'c':
- cat();
- break;
- case 'f':
- fsck(0);
- break;
- case 'F':
- fsck(1);
- break;
- case 'q':
- quickcheck();
- break;
- case 's':
- shake();
- break;
- default:
- help();
- }
- if (buck_buf)
- {
- buck2obj_free(buck_buf);
- mp_delete(pool);
- }
-
- return 0;
-}
/*
- * Sherlock -- Configuration-Dependent Definitions
+ * UCW Library -- Configuration-Dependent Definitions
*
* (c) 1997--2004 Martin Mares <mj@ucw.cz>
*
* of the GNU Lesser General Public License.
*/
-#ifndef _SHERLOCK_CONFIG_H
-#define _SHERLOCK_CONFIG_H
+#ifndef _UCW_CONFIG_H
+#define _UCW_CONFIG_H
/* Configuration switches */
#include "lib/autoconf.h"
-#ifdef CONFIG_MAX_CONTEXTS
-#define CONFIG_CONTEXTS
-#endif
-
-/* Version */
-
-#define SHER_VER SHERLOCK_VERSION SHERLOCK_VERSION_SUFFIX
-
/* Types */
typedef unsigned char byte; /* exactly 8 bits, unsigned */
#define NULL (void *)0
#endif
-typedef u32 oid_t; /* Object ID */
-
-/* Data types and functions for accessing file positions */
-
-#ifdef CONFIG_LARGE_DB
-typedef s64 sh_off_t;
-#define BYTES_PER_O 5
-#define BYTES_PER_P 8
-#define bgeto(f) bget5(f)
-#define bputo(f,l) bput5(f,l)
-#define bgetp(f) bgetq(f)
-#define bputp(f,l) bputq(f,l)
-#define GET_O(p) GET_U40(p)
-#define GET_P(p) GET_U64(p)
-#define PUT_O(p,x) PUT_U40(p,x)
-#define PUT_P(p,x) PUT_U64(p,x)
-#else
-typedef s32 sh_off_t;
-#define BYTES_PER_O 4
-#define BYTES_PER_P 4
-#define bgeto(f) bgetl(f)
-#define bputo(f,l) bputl(f,l)
-#define bgetp(f) bgetl(f)
-#define bputp(f,l) bputl(f,l)
-#define GET_O(p) GET_U32(p)
-#define GET_P(p) GET_U32(p)
-#define PUT_O(p,x) PUT_U32(p,x)
-#define PUT_P(p,x) PUT_U32(p,x)
-#endif
-
-/* Data type for area ID's */
-
-#ifdef CONFIG_AREAS
-typedef u32 area_t;
-#define AREA_NONE 0
-#define AREA_ANY ~0U
-#else
-typedef struct { } area_t;
-#define AREA_NONE (area_t){}
-#define AREA_ANY (area_t){}
-#endif
-
-/* Misc */
-
-#ifdef __GNUC__
-
-#undef inline
-#define NONRET __attribute__((noreturn))
-#define UNUSED __attribute__((unused))
-#define CONSTRUCTOR __attribute__((constructor))
-#define PACKED __attribute__((packed))
-#define CONST __attribute__((const))
-#define PURE __attribute__((const))
-#define likely(x) __builtin_expect((x),1)
-#define unlikely(x) __builtin_expect((x),0)
-
-#else
-#error This program requires the GNU C compiler.
-#endif
+typedef s64 sh_off_t; /* FIXME */
+typedef u32 oid_t; /* Object ID */ /* FIXME */
#endif
+++ /dev/null
-/*
- * Sherlock Library -- String Fingerprints
- *
- * (c) 2001--2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * We use a hashing function to map all the URL's and other
- * hairy strings we work with to a much simpler universe
- * of constant length bit strings (currently 96-bit ones).
- * With a random hashing function (which is equivalent to
- * having a fixed function and random input), the probability
- * of at least one collision happening is at most c*n^2/m
- * where n is the number of strings we hash, m is the size
- * of our bit string universe (2^96) and c is a small constant.
- * We set m sufficiently large and expect no collisions
- * to occur. On the other hand, the worst thing which could
- * be caused by a collision is mixing up two strings or labels
- * of two documents which is relatively harmless.
- */
-
-#include "lib/lib.h"
-#include "lib/index.h"
-#include "lib/md5.h"
-
-#include <string.h>
-
-void
-fingerprint(byte *string, struct fingerprint *fp)
-{
- struct MD5Context c;
- byte digest[16];
-
- MD5Init(&c);
- MD5Update(&c, string, strlen(string));
- MD5Final(digest, &c);
- memcpy(fp->hash, digest, 12);
-}
+++ /dev/null
-/*
- * Sherlock: Data structures used in indices
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- */
-
-#ifndef _SHERLOCK_INDEX_H
-#define _SHERLOCK_INDEX_H
-
-#include "custom/lib/custom.h"
-
-/*
- * Magic number which should help to avoid mixing incompatible indices.
- * Syntax: <version><revision><custom-type><custom-version>
- * Remember to increase with each change of index format.
- */
-
-#define INDEX_VERSION (0x34010000|((CUSTOM_INDEX_TYPE)<< 8)|(CUSTOM_INDEX_VERSION))
-
-/*
- * Words
- *
- * MAX_WORD_LEN is the maximum length (measured in UTF-8 characters, excluding
- * the terminating zero byte if there's any) of any word which may appear in the
- * indices or in the bucket file. Naturally, the same constant also bounds
- * the number of UCS-2 characters in a word.
- *
- * Caveat: If you are upcasing/downcasing the word, the UTF-8 encoding can
- * expand, although at most twice, so you need to reserve 2*MAX_WORD_LEN bytes.
- */
-
-#define MAX_WORD_LEN 64 /* a multiple of 4 */
-
-/* Word and string types are defined in custom/lib/custom.h */
-
-/* Types used for storing contexts */
-
-#ifdef CONFIG_CONTEXTS
-#if CONFIG_MAX_CONTEXTS == 32768
-typedef u16 context_t;
-#define bget_context bgetw
-#define bput_context bputw
-#define GET_CONTEXT GET_U16
-#define PUT_CONTEXT PUT_U16
-#elif CONFIG_MAX_CONTEXTS == 256
-typedef byte context_t;
-#define bget_context bgetc
-#define bput_context bputc
-#define GET_CONTEXT GET_U8
-#define PUT_CONTEXT PUT_U8
-#else
-#error CONFIG_MAX_CONTEXTS set to an invalid value.
-#endif
-#else
-struct fastbuf;
-typedef struct { } context_t;
-static inline uns bget_context(struct fastbuf *b UNUSED) { return 0; }
-static inline void bput_context(struct fastbuf *b UNUSED, uns context UNUSED) { }
-#define GET_CONTEXT(p) 0
-#define PUT_CONTEXT(p,x) do {} while(0)
-#endif
-
-/* Index card attributes */
-
-struct card_attr {
- u32 card; /* Reference to card description (either oid or filepos) */
-#ifdef CONFIG_SITES
- u32 site_id;
-#endif
- area_t area;
- CUSTOM_CARD_ATTRS /* Include all custom attributes */
- byte weight;
- byte flags;
-#ifdef CONFIG_LASTMOD
- byte age; /* Document age in pseudo-logarithmic units wrt. reference time */
-#endif
-#ifdef CONFIG_FILETYPE
- byte type_flags; /* File type flags (see below) */
-#endif
-};
-
-enum card_flag {
- CARD_FLAG_EMPTY = 1, /* Empty document (redirect, robot file etc.) [scanner] */
- CARD_FLAG_ACCENTED = 2, /* Document contains accented characters [scanner] */
- CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */
- CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */
- CARD_FLAG_IMAGE = 16, /* Is an image object [scanner] */
- CARD_FLAG_FRAMESET = 32, /* Contains a frameset to be ignored [scanner] */
- CARD_FLAG_OVERRIDEN = 64, /* Overriden by another index [sherlockd] */
-};
-
-#ifndef CARD_POS_SHIFT /* (can be overriden in custom.h) */
-#define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bits to the right */
-#endif
-
-/*
- * We store document type and several other properties in card_attr->type_flags.
- * Here we define only the basic structure, the details are defined in custom.h
- * (the list of type names custom_file_type_names[] and also setting of the file
- * types in custom_create_attrs()).
- *
- * bits 7--5 file type: (0-3: text types, 4-7: other types, defined by custom.h)
- * bits 4--0 type-dependent information, for text types it's document language code
- */
-
-#ifdef CONFIG_FILETYPE
-#define CA_GET_FILE_TYPE(a) ((a)->type_flags >> 4)
-#define CA_GET_FILE_INFO(a) ((a)->type_flags & 0x0f)
-#define CA_GET_FILE_LANG(a) ((a)->type_flags & 0x80 ? 0 : CA_GET_FILE_INFO(a))
-#define MAX_FILE_TYPES 16
-#define FILETYPE_IS_TEXT(f) ((f) < 8)
-byte *ext_ft_parse(u32 *dest, byte *value, uns intval);
-extern byte *custom_file_type_names[MAX_FILE_TYPES];
-#define FILETYPE_STAT_VARS uns matching_per_type[MAX_FILE_TYPES];
-#define FILETYPE_SHOW_STATS(q,f) ext_ft_show(q,f)
-#define FILETYPE_INIT_STATS(q) bzero(q->matching_per_type, sizeof(q->matching_per_type))
-#ifdef CONFIG_COUNT_ALL_FILETYPES
-#define FILETYPE_ATTRS LATE_SMALL_SET_ATTR(ftype, FILETYPE, CA_GET_FILE_TYPE, ext_ft_parse)
-#define FILETYPE_EARLY_STATS(q,a) q->matching_per_type[CA_GET_FILE_TYPE(a)]++
-#define FILETYPE_LATE_STATS(q,a)
-#else
-#define FILETYPE_ATTRS SMALL_SET_ATTR(ftype, FILETYPE, CA_GET_FILE_TYPE, ext_ft_parse)
-#define FILETYPE_EARLY_STATS(q,a)
-#define FILETYPE_LATE_STATS(q,a) q->matching_per_type[CA_GET_FILE_TYPE(a)]++
-#endif
-#else
-#define FILETYPE_ATTRS
-#define FILETYPE_STAT_VARS
-#define FILETYPE_INIT_STATS(q)
-#define FILETYPE_EARLY_STATS(q,a)
-#define FILETYPE_LATE_STATS(q,a)
-#define FILETYPE_SHOW_STATS(q,f)
-#endif
-
-#ifdef CONFIG_LANG
-/* You can use language matching without CONFIG_FILETYPE, but you have to define CA_GET_FILE_LANG yourself. */
-#define LANG_ATTRS SMALL_SET_ATTR(lang, LANG, CA_GET_FILE_LANG, ext_lang_parse)
-byte *ext_lang_parse(u32 *dest, byte *value, uns intval);
-#else
-#define LANG_ATTRS
-#endif
-
-#ifdef CONFIG_AREAS
-#define CA_GET_AREA(a) ((a)->area)
-#define SPLIT_ATTRS INT_ATTR(area, AREA, CA_GET_AREA, ext_area_parse)
-byte *ext_area_parse(u32 *dest, byte *value, uns intval);
-#else
-#define SPLIT_ATTRS
-#endif
-
-/*
- * A list of all extended attributes: custom attributes and also some
- * built-in attributes treated in the same way.
- */
-
-#define EXTENDED_ATTRS CUSTOM_ATTRS FILETYPE_ATTRS LANG_ATTRS SPLIT_ATTRS
-
-/*
- * A list of all statistics collectors, also composed of custom parts
- * and built-in parts.
- */
-
-#ifndef CUSTOM_STAT_VARS
-#define CUSTOM_STAT_VARS
-#define CUSTOM_INIT_STATS(q)
-#define CUSTOM_EARLY_STATS(q,a)
-#define CUSTOM_LATE_STATS(q,a)
-#define CUSTOM_SHOW_STATS(q,f)
-#endif
-
-#define EXTENDED_STAT_VARS CUSTOM_STAT_VARS FILETYPE_STAT_VARS
-#define EXTENDED_INIT_STATS(q) CUSTOM_INIT_STATS(q) FILETYPE_INIT_STATS(q)
-#define EXTENDED_EARLY_STATS(q,a) CUSTOM_EARLY_STATS(q,a) FILETYPE_EARLY_STATS(q,a)
-#define EXTENDED_LATE_STATS(q,a) CUSTOM_LATE_STATS(q,a) FILETYPE_LATE_STATS(q,a)
-#define EXTENDED_SHOW_STATS(q,f) CUSTOM_SHOW_STATS(q,f) FILETYPE_SHOW_STATS(q,f)
-
-/* String fingerprints */
-
-struct fingerprint {
- byte hash[12];
-};
-
-void fingerprint(byte *string, struct fingerprint *fp);
-
-static inline u32
-fp_hash(struct fingerprint *fp)
-{
- return (fp->hash[0] << 24) | (fp->hash[1] << 16) | (fp->hash[2] << 8) | fp->hash[3];
-}
-
-/* The card fingerprints */
-
-struct card_print {
- struct fingerprint fp;
- u32 cardid;
-};
-
-/* URL keys */
-
-#define URL_KEY_BUF_SIZE (3*MAX_URL_SIZE)
-byte *url_key(byte *url, byte *buf);
-void url_fingerprint(byte *url, struct fingerprint *fp);
-void url_key_init(void);
-
-/* Conversion of document age from seconds to our internal units */
-
-static inline int
-convert_age(sh_time_t lastmod, sh_time_t reftime)
-{
- sh_time_t age;
- if (reftime < lastmod) /* past times */
- return -1;
- age = (reftime - lastmod) / 3600;
- if (age < 48) /* last 2 days: 1 hour resolution */
- return age;
- age = (age-48) / 24;
- if (age < 64) /* next 64 days: 1 day resolution */
- return 48 + age;
- age = (age-64) / 7;
- if (age < 135) /* next 135 weeks: 1 week resolution */
- return 112 + age;
- age = (age-135) / 52;
- if (age < 8) /* next 8 years: 1 year resolution */
- return 247 + age;
- return 255; /* then just "infinite future" */
-}
-
-#endif
/*
- * Sherlock Library -- Miscellaneous Functions
+ * The UCW Library -- Miscellaneous Functions
*
* (c) 1997--2004 Martin Mares <mj@ucw.cz>
*
* of the GNU Lesser General Public License.
*/
-/*
- * This file should be included as the very first include in all
- * source files, especially before all OS includes since it sets
- * up libc feature macros.
- */
-
-#ifndef _SHERLOCK_LIB_H
-#define _SHERLOCK_LIB_H
+#ifndef _UCW_LIB_H
+#define _UCW_LIB_H
#include "lib/config.h"
#include <stdarg.h>
#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0)
#define COMPARE_GT(x,y) COMPARE_LT(y,x)
+/* GCC Extensions */
+
+#ifdef __GNUC__
+
+#undef inline
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#define CONSTRUCTOR __attribute__((constructor))
+#define PACKED __attribute__((packed))
+#define CONST __attribute__((const))
+#define PURE __attribute__((const))
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+#else
+#error This program requires the GNU C compiler.
+#endif
+
/* Logging */
#define L_DEBUG 'D' /* Debugging messages */
#include "lib/lizard.h"
#include "lib/bbuf.h"
#include "lib/fastbuf.h"
-#include "lib/bucket.h"
+#include "sherlock/bucket.h" /* FIXME */
#include <errno.h>
+++ /dev/null
-/*
- * Generating Buckets from Objects
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-utf8.h"
-#include "lib/bucket.h"
-#include "lib/object.h"
-
-#include <string.h>
-#include <stdarg.h>
-
-static uns use_v33;
-static int hdr_sep;
-
-void
-put_attr_set_type(uns type)
-{
- switch (type)
- {
- case BUCKET_TYPE_PLAIN:
- use_v33 = 0;
- hdr_sep = -1;
- break;
- case BUCKET_TYPE_V30:
- use_v33 = 0;
- hdr_sep = '\n';
- break;
- case BUCKET_TYPE_V33:
- case BUCKET_TYPE_V33_LIZARD:
- use_v33 = 1;
- hdr_sep = 0;
- break;
- default:
- die("Don't know how to generate buckets of type %08x", type);
- }
-}
-
-uns
-size_attr(uns len)
-{
- ASSERT(len <= MAX_ATTR_SIZE);
- if (use_v33)
- {
- len++;
- return len + utf8_space(len);
- }
- else
- return len + 2;
-}
-
-inline byte *
-put_attr(byte *ptr, uns type, byte *val, uns len)
-{
- if (use_v33)
- {
- PUT_UTF8_32(ptr, len+1);
- memcpy(ptr, val, len);
- ptr += len;
- *ptr++ = type;
- }
- else
- {
- *ptr++ = type;
- memcpy(ptr, val, len);
- ptr += len;
- *ptr++ = '\n';
- }
- return ptr;
-}
-
-byte *
-put_attr_str(byte *ptr, uns type, byte *val)
-{
- return put_attr(ptr, type, val, strlen(val));
-}
-
-inline byte *
-put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va)
-{
- if (use_v33)
- {
- uns len = vsprintf(ptr+1, mask, va);
- if (len >= 127)
- {
- byte tmp[6], *tmp_end = tmp;
- PUT_UTF8_32(tmp_end, len+1);
- uns l = tmp_end - tmp;
- memmove(ptr+l, ptr+1, len);
- memcpy(ptr, tmp, l);
- ptr += l + len;
- }
- else
- {
- *ptr = len+1;
- ptr += len+1;
- }
- *ptr++ = type;
- }
- else
- {
- *ptr++ = type;
- ptr += vsprintf(ptr, mask, va);
- *ptr++ = '\n';
- }
- return ptr;
-}
-
-byte *
-put_attr_format(byte *ptr, uns type, char *mask, ...)
-{
- va_list va;
- va_start(va, mask);
- byte *ret = put_attr_vformat(ptr, type, mask, va);
- va_end(va);
- return ret;
-}
-
-byte *
-put_attr_num(byte *ptr, uns type, uns val)
-{
- if (use_v33)
- {
- uns len = sprintf(ptr+1, "%d", val) + 1;
- *ptr = len;
- ptr += len;
- *ptr++ = type;
- }
- else
- ptr += sprintf(ptr, "%c%d\n", type, val);
- return ptr;
-}
-
-byte *
-put_attr_separator(byte *ptr)
-{
- if (hdr_sep >= 0)
- *ptr++ = hdr_sep;
- return ptr;
-}
-
-inline void
-bput_attr(struct fastbuf *b, uns type, byte *val, uns len)
-{
- if (use_v33)
- {
- bput_utf8_32(b, len+1);
- bwrite(b, val, len);
- bputc(b, type);
- }
- else
- {
- bputc(b, type);
- bwrite(b, val, len);
- bputc(b, '\n');
- }
-}
-
-void
-bput_attr_str(struct fastbuf *b, uns type, byte *val)
-{
- bput_attr(b, type, val, strlen(val));
-}
-
-inline void
-bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va)
-{
- if (use_v33)
- {
- int len = vsnprintf(NULL, 0, mask, va);
- if (len < 0)
- die("vsnprintf() does not support size=0");
- bput_utf8_32(b, len+1);
- vbprintf(b, mask, va);
- bputc(b, type);
- }
- else
- {
- bputc(b, type);
- vbprintf(b, mask, va);
- bputc(b, '\n');
- }
-}
-
-void
-bput_attr_format(struct fastbuf *b, uns type, char *mask, ...)
-{
- va_list va;
- va_start(va, mask);
- bput_attr_vformat(b, type, mask, va);
- va_end(va);
-}
-
-void
-bput_attr_num(struct fastbuf *b, uns type, uns val)
-{
- if (use_v33)
- {
- byte tmp[12];
- uns len = sprintf(tmp, "%d", val);
- bputc(b, len+1);
- bwrite(b, tmp, len);
- bputc(b, type);
- }
- else
- bprintf(b, "%c%d\n", type, val);
-}
-
-void
-bput_attr_separator(struct fastbuf *b)
-{
- if (hdr_sep >= 0)
- bputc(b, hdr_sep);
-}
-
-void
-obj_write(struct fastbuf *f, struct odes *d)
-{
- for(struct oattr *a=d->attrs; a; a=a->next)
- for(struct oattr *b=a; b; b=b->same)
- {
- byte *z;
- for (z = b->val; *z; z++)
- if (*z < ' ' && *z != '\t')
- {
- log(L_ERROR, "obj_dump: Found non-ASCII character %02x (URL might be %s) in %c%s", *z, obj_find_aval(d, 'U'), a->attr, b->val);
- *z = '?';
- }
- ASSERT(z - b->val <= MAX_ATTR_SIZE-2);
- bput_attr_str(f, a->attr, b->val);
- }
-}
-
-void
-obj_write_nocheck(struct fastbuf *f, struct odes *d)
-{
- for(struct oattr *a=d->attrs; a; a=a->next)
- for(struct oattr *b=a; b; b=b->same)
- bput_attr_str(f, a->attr, b->val);
-}
+++ /dev/null
-/*
- * Sherlock Library -- Object Functions
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-#include "lib/object.h"
-
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-void
-obj_dump(struct odes *o)
-{
- for(struct oattr *a=o->attrs; a; a=a->next)
- for(struct oattr *b=a; b; b=b->same)
- printf("%c%s\n", (a==b ? a->attr : ' '), b->val);
-}
-
-static struct oattr *
-oa_new(struct odes *o, uns x, byte *v)
-{
- struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr) + strlen(v)+1);
-
- a->next = a->same = NULL;
- a->attr = x;
- a->val = (byte*) (a+1);
- strcpy(a->val, v);
- return a;
-}
-
-static struct oattr *
-oa_new_ref(struct odes *o, uns x, byte *v)
-{
- struct oattr *a = mp_alloc(o->pool, sizeof(struct oattr));
-
- a->next = a->same = NULL;
- a->attr = x;
- a->val = v;
- return a;
-}
-
-struct odes *
-obj_new(struct mempool *pool)
-{
- struct odes *o = mp_alloc(pool, sizeof(struct odes));
- o->pool = pool;
- o->attrs = NULL;
- o->cached_attr = NULL;
- return o;
-}
-
-struct oattr *
-obj_find_attr(struct odes *o, uns x)
-{
- struct oattr *a;
- for(a=o->attrs; a && a->attr != x; a=a->next)
- ;
- return a;
-}
-
-struct oattr *
-obj_find_attr_last(struct odes *o, uns x)
-{
- struct oattr *a = obj_find_attr(o, x);
-
- if (a)
- {
- while (a->same)
- a = a->same;
- }
- return a;
-}
-
-uns
-obj_del_attr(struct odes *o, struct oattr *a)
-{
- struct oattr *x, **p, *y, *l;
- byte aa = a->attr;
-
- o->cached_attr = NULL;
- p = &o->attrs;
- while (x = *p)
- {
- if (x->attr == aa)
- {
- y = x;
- l = NULL;
- while (x = *p)
- {
- if (x == a)
- {
- *p = x->same;
- return 1;
- }
- p = &x->same;
- l = x;
- }
- return 0;
- }
- p = &x->next;
- }
- return 0;
-}
-
-byte *
-obj_find_aval(struct odes *o, uns x)
-{
- struct oattr *a = obj_find_attr(o, x);
- return a ? a->val : NULL;
-}
-
-uns
-obj_find_anum(struct odes *o, uns x, uns def)
-{
- struct oattr *a = obj_find_attr(o, x);
- return a ? (uns)atol(a->val) : def;
-}
-
-struct oattr *
-obj_set_attr(struct odes *o, uns x, byte *v)
-{
- struct oattr *a, **z;
-
- z = &o->attrs;
- while (a = *z)
- {
- if (a->attr == x)
- {
- *z = a->next;
- goto set;
- }
- z = &a->next;
- }
-
- set:
- if (v)
- {
- a = oa_new(o, x, v);
- a->next = o->attrs;
- o->attrs = a;
- }
- else
- a = NULL;
- o->cached_attr = a;
- return a;
-}
-
-struct oattr *
-obj_set_attr_num(struct odes *o, uns a, uns v)
-{
- byte x[32];
-
- sprintf(x, "%d", v);
- return obj_set_attr(o, a, x);
-}
-
-static inline struct oattr *
-obj_add_attr_internal(struct odes *o, struct oattr *b)
-{
- struct oattr *a, **z;
-
- if (!(a = o->cached_attr) || a->attr != b->attr)
- {
- z = &o->attrs;
- while ((a = *z) && a->attr != b->attr)
- z = &a->next;
- if (!a)
- {
- *z = b;
- /* b->next is NULL */
- goto done;
- }
- }
- while (a->same)
- a = a->same;
- a->same = b;
- done:
- o->cached_attr = b;
- return b;
-}
-
-struct oattr *
-obj_add_attr(struct odes *o, uns x, byte *v)
-{
- return obj_add_attr_internal(o, oa_new(o, x, v));
-}
-
-struct oattr *
-obj_add_attr_ref(struct odes *o, uns x, byte *v)
-{
- return obj_add_attr_internal(o, oa_new_ref(o, x, v));
-}
-
-struct oattr *
-obj_prepend_attr(struct odes *o, uns x, byte *v)
-{
- struct oattr *a, *b, **z;
-
- b = oa_new(o, x, v);
- z = &o->attrs;
- while (a = *z)
- {
- if (a->attr == x)
- {
- b->same = a;
- b->next = a->next;
- a->next = NULL;
- *z = b;
- return b;
- }
- z = &a->next;
- }
- b->next = o->attrs;
- o->attrs = b;
- return b;
-}
-
-struct oattr *
-obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v)
-{
- struct oattr *b = oa_new(o, first->attr, v);
- b->same = after->same;
- after->same = b;
- return b;
-}
-
-void
-obj_move_attr_to_head(struct odes *o, uns x)
-{
- struct oattr *a, **z;
-
- z = &o->attrs;
- while (a = *z)
- {
- if (a->attr == x)
- {
- *z = a->next;
- a->next = o->attrs;
- o->attrs = a;
- break;
- }
- z = &a->next;
- }
-}
-
-void
-obj_move_attr_to_tail(struct odes *o, uns x)
-{
- struct oattr *a, **z;
-
- z = &o->attrs;
- while (a = *z)
- {
- if (a->attr == x)
- {
- *z = a->next;
- while (*z)
- z = &(*z)->next;
- *z = a;
- a->next = NULL;
- break;
- }
- z = &a->next;
- }
-}
+++ /dev/null
-/*
- * Sherlock Library -- Object Functions
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _SHERLOCK_OBJECT_H
-#define _SHERLOCK_OBJECT_H
-
-#define MAX_ATTR_SIZE 2500
- /* Maximum length an attribute can ever have (including name and trailing 0).
- * It has to be long enough to contain 1 URL, 1 reftext, and a few numbers
- * (see 'x' attribute in labels). */
-
-struct fastbuf;
-struct mempool;
-
-struct odes { /* Object description */
- struct oattr *attrs;
- struct mempool *pool;
- struct oattr *cached_attr;
-};
-
-struct oattr { /* Object attribute */
- struct oattr *next, *same;
- byte attr;
- byte *val;
-};
-
-void obj_dump(struct odes *);
-struct odes *obj_new(struct mempool *);
-struct oattr *obj_find_attr(struct odes *, uns);
-struct oattr *obj_find_attr_last(struct odes *, uns);
-uns obj_del_attr(struct odes *, struct oattr *);
-byte *obj_find_aval(struct odes *, uns);
-uns obj_find_anum(struct odes *, uns, uns);
-struct oattr *obj_set_attr(struct odes *, uns, byte *);
-struct oattr *obj_set_attr_num(struct odes *, uns, uns);
-struct oattr *obj_add_attr(struct odes *, uns, byte *);
-struct oattr *obj_add_attr_ref(struct odes *o, uns x, byte *v); // no strdup()
-struct oattr *obj_prepend_attr(struct odes *, uns, byte *);
-struct oattr *obj_insert_attr(struct odes *o, struct oattr *first, struct oattr *after, byte *v);
-void obj_move_attr_to_head(struct odes *o, uns);
-void obj_move_attr_to_tail(struct odes *o, uns);
-
-/* buck2obj.c: Reading of objects from buckets */
-
-struct parsed_attr {
- int attr;
- byte *val;
- uns len;
-};
-struct buck2obj_buf;
-
-void get_attr_set_type(uns type);
-int get_attr(byte **pos, byte *end, struct parsed_attr *attr);
-int bget_attr(struct fastbuf *b, struct parsed_attr *attr);
-
-struct buck2obj_buf *buck2obj_alloc(void);
-void buck2obj_free(struct buck2obj_buf *buf);
-
-int buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fastbuf *body, struct odes *o_hdr, uns *body_start, struct odes *o_body);
-struct odes *obj_read_bucket(struct buck2obj_buf *buf, struct mempool *pool, uns buck_type, uns buck_len, struct fastbuf *body, uns *body_start);
- /* If body_start != NULL, then only the header is parsed and *body_start is
- * set to the position of the body. This function does a plenty of optimizations
- * and if the body fastbuf is overwritable (body->can_overwrite_buffer), it can keep the
- * attribute values stored on their original locations in the fastbuf's buffer.
- * However, no such things are performed when reading the header only.
- */
-
-int obj_read(struct fastbuf *, struct odes *);
-
-/* obj2buck.c: Generating buckets from objects */
-
-void put_attr_set_type(uns type);
-
-uns size_attr(uns len);
-
-byte *put_attr(byte *ptr, uns type, byte *val, uns len);
-byte *put_attr_str(byte *ptr, uns type, byte *val);
-byte *put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va);
-byte *put_attr_format(byte *ptr, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
-byte *put_attr_num(byte *ptr, uns type, uns val);
-byte *put_attr_separator(byte *ptr);
-
-void bput_attr(struct fastbuf *b, uns type, byte *val, uns len);
-void bput_attr_str(struct fastbuf *b, uns type, byte *val);
-void bput_attr_vformat(struct fastbuf *b, uns type, byte *mask, va_list va);
-void bput_attr_format(struct fastbuf *b, uns type, char *mask, ...) __attribute__((format(printf,3,4)));
-void bput_attr_num(struct fastbuf *b, uns type, uns val);
-void bput_attr_separator(struct fastbuf *b);
-
-void obj_write(struct fastbuf *, struct odes *);
-void obj_write_nocheck(struct fastbuf *, struct odes *);
-
-#endif
DIRS+=lib/regex
-LIBSH_MODS+=regex/regex
+LIBUCW_MODS+=regex/regex
obj/lib/regex/regex.o: CWARNS=
+++ /dev/null
-/*
- * Sherlock: Processing of tagged characters
- *
- * (c) 2001--2003 Martin Mares <mj@ucw.cz>
- */
-
-#ifndef _SHERLOCK_TAGGED_TEXT_H
-#define _SHERLOCK_TAGGED_TEXT_H
-
-#include "lib/fastbuf.h"
-#include "lib/ff-utf8.h"
-
-/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
-
-#define GET_TAGGED_CHAR(p,u) do { \
- u = *p; \
- if (u >= 0xc0) \
- GET_UTF8(p,u); \
- else if (u >= 0x80) \
- { \
- p++; \
- if (u >= 0xb0) \
- { \
- ASSERT(u == 0xb0); \
- u += 0x80020000; \
- } \
- else if (u >= 0xa0) \
- { \
- ASSERT(*p >= 0x80 && *p <= 0xbf); \
- u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \
- } \
- else \
- u += 0x80000000; \
- } \
- else \
- p++; \
-} while (0)
-
-#define SKIP_TAGGED_CHAR(p) do { \
- if (*p >= 0x80 && *p < 0xc0) \
- { \
- uns u = *p++; \
- if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0) \
- p++; \
- } \
- else \
- UTF8_SKIP(p); \
-} while (0)
-
-static inline uns
-bget_tagged_char(struct fastbuf *f)
-{
- uns u = bgetc(f);
- if ((int)u < 0x80)
- ;
- else if (u < 0xc0)
- {
- if (u >= 0xb0)
- {
- ASSERT(u == 0xb0);
- u += 0x80020000;
- }
- else if (u >= 0xa0)
- {
- uns v = bgetc(f);
- ASSERT(v >= 0x80 && v <= 0xbf);
- u = 0x80010000 + ((u & 0x0f) << 6) + (v & 0x3f);
- }
- else
- u += 0x80000000;
- }
- else
- {
- bungetc(f);
- u = bget_utf8(f);
- }
- return u;
-}
-
-#endif
+++ /dev/null
-/*
- * Sherlock Library -- URL Keys & URL Fingerprints
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/index.h"
-#include "lib/url.h"
-#include "lib/fastbuf.h"
-#include "lib/chartype.h"
-#include "lib/hashfunc.h"
-
-#include <string.h>
-#include <fcntl.h>
-
-/*** Prefix recognition table ***/
-
-struct pxtab_rhs {
- struct pxtab_node *node;
- uns len;
- byte rhs[1];
-};
-
-struct pxtab_node {
- struct pxtab_node *parent;
- struct pxtab_rhs *rhs;
- uns len, total_len;
- byte component[0];
-};
-
-#define HASH_NODE struct pxtab_node
-#define HASH_PREFIX(p) pxtab_##p
-#define HASH_KEY_COMPLEX(x) x parent, x component, x len
-#define HASH_KEY_DECL struct pxtab_node *parent UNUSED, byte *component UNUSED, uns len UNUSED
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_GIVE_HASHFN
-#define HASH_GIVE_EQ
-#define HASH_GIVE_EXTRA_SIZE
-#define HASH_GIVE_INIT_KEY
-#define HASH_USE_POOL cfpool
-
-static inline uns
-pxtab_hash(HASH_KEY_DECL)
-{
- return ((uns)parent) ^ hash_block(component, len);
-}
-
-static inline int
-pxtab_eq(struct pxtab_node *p1, byte *c1, uns l1, struct pxtab_node *p2, byte *c2, uns l2)
-{
- return p1 == p2 && l1 == l2 && !memcmp(c1, c2, l1);
-}
-
-static inline int
-pxtab_extra_size(HASH_KEY_DECL)
-{
- return len;
-}
-
-static inline void
-pxtab_init_key(struct pxtab_node *node, HASH_KEY_DECL)
-{
- node->parent = parent;
- node->len = len;
- memcpy(node->component, component, len);
- node->rhs = NULL;
-}
-
-#include "lib/hashtable.h"
-
-static inline byte *
-pxtab_skip_first_comp(byte *x)
-{
- while (*x && *x != ':')
- x++;
- byte *y = x;
- while (*x != '/' || x[1] != '/')
- {
- if (!*x)
- return y;
- x++;
- }
- return x+2;
-}
-
-static inline byte *
-pxtab_skip_next_comp(byte *x)
-{
- for(;;)
- {
- if (!*x)
- return x;
- if (*x == '/')
- return x+1;
- x++;
- }
-}
-
-static struct pxtab_node *
-pxtab_find_rule(byte *lhs)
-{
- byte *next;
- struct pxtab_node *node, *parent = NULL;
-
- next = pxtab_skip_first_comp(lhs);
- DBG("\tfirst: %.*s", next-lhs, lhs);
- node = pxtab_find(NULL, lhs, next-lhs);
- while (node && *next)
- {
- parent = node;
- lhs = next;
- next = pxtab_skip_next_comp(lhs);
- DBG("\tnext: %.*s", next-lhs, lhs);
- node = pxtab_find(parent, lhs, next-lhs);
- }
- return node ? : parent;
-}
-
-static struct pxtab_node *
-pxtab_add_rule(byte *lhs, struct pxtab_rhs *rhs)
-{
- byte *lhs_start = lhs;
- byte *next;
- struct pxtab_node *node, *parent;
-
- next = pxtab_skip_first_comp(lhs);
- DBG("\tfirst: %.*s", next-lhs, lhs);
- node = pxtab_lookup(NULL, lhs, next-lhs);
- for(;;)
- {
- if (node->rhs)
- return NULL;
- if (!*next)
- break;
- lhs = next;
- next = pxtab_skip_next_comp(lhs);
- parent = node;
- DBG("\tnext: %.*s", next-lhs, lhs);
- node = pxtab_lookup(parent, lhs, next-lhs);
- }
- DBG("\tsetting rhs, %d to eat", next-lhs_start);
- node->rhs = rhs;
- node->total_len = next - lhs_start;
- return node;
-}
-
-static struct pxtab_rhs *
-pxtab_add_rhs(byte *rhs)
-{
- uns len = strlen(rhs);
- struct pxtab_rhs *r = cfg_malloc(sizeof(*r) + len);
- r->len = len;
- memcpy(r->rhs, rhs, len+1);
- struct pxtab_node *node = pxtab_add_rule(rhs, r);
- r->node = node;
- return r;
-}
-
-static void
-pxtab_load(byte *name)
-{
- struct fastbuf *f;
- struct pxtab_rhs *rhs = NULL;
- byte line[MAX_URL_SIZE], url[MAX_URL_SIZE], *c, *d;
- int err;
- int lino = 0;
-
- DBG("Loading prefix table %s", name);
- f = bopen(name, O_RDONLY, 4096);
- while (bgets(f, line, sizeof(line)))
- {
- lino++;
- c = line;
- while (Cblank(*c))
- c++;
- if (!*c || *c == '#')
- continue;
- if (err = url_auto_canonicalize(c, url))
- die("%s, line %d: Invalid URL (%s)", name, lino, url_error(err));
- if (!(d = strrchr(c, '/')) || d[1])
- die("%s, line %d: Prefix rules must end with a slash", name, lino);
- if (c == line)
- {
- DBG("Creating RHS <%s>", c);
- if (!(rhs = pxtab_add_rhs(c)))
- die("%s, line %d: Right-hand side already mapped", name, lino);
- }
- else if (!rhs)
- die("%s, line %d: Syntax error", name, lino);
- else
- {
- DBG("Adding LHS <%s>", c);
- if (!pxtab_add_rule(c, rhs))
- die("%s, line %d: Duplicate rule", name, lino);
- }
- }
- bclose(f);
-}
-
-/*** Configuration ***/
-
-static uns urlkey_www_hack;
-static byte *urlkey_pxtab_path;
-
-static struct cfitem urlkey_config[] = {
- { "URLKey", CT_SECTION, NULL },
- { "WWWHack", CT_INT, &urlkey_www_hack },
- { "PrefixTable", CT_STRING, &urlkey_pxtab_path },
- { NULL, CT_STOP, NULL }
-};
-
-static void CONSTRUCTOR urlkey_conf_init(void)
-{
- cf_register(urlkey_config);
-}
-
-void
-url_key_init(void)
-{
- pxtab_init();
- if (urlkey_pxtab_path)
- pxtab_load(urlkey_pxtab_path);
-}
-
-static inline byte *
-url_key_remove_www(byte *url, byte **pbuf)
-{
- if (urlkey_www_hack && !strncmp(url, "http://www.", 11))
- {
- byte *buf = *pbuf;
- strcpy(buf, "http://");
- strcpy(buf+7, url+11);
- DBG("\tWWW hack: %s -> %s", url, buf);
- url = buf;
- *pbuf = buf + MAX_URL_SIZE;
- }
- return url;
-}
-
-byte *
-url_key(byte *url, byte *buf)
-{
- DBG("Generating URL key for %s", url);
- url = url_key_remove_www(url, &buf);
- struct pxtab_node *rule = pxtab_find_rule(url);
- if (rule && rule->rhs && rule->rhs->node != rule)
- {
- struct pxtab_rhs *rhs = rule->rhs;
- DBG("\tApplying rule <%s>, remove %d, add %d", rhs->rhs, rule->total_len, rhs->len);
- memcpy(buf, rhs->rhs, rhs->len);
- strcpy(buf + rhs->len, url + rule->total_len);
- url = buf;
- buf += MAX_URL_SIZE;
- }
- DBG("\tOutput: %s", url);
- return url;
-}
-
-void
-url_fingerprint(byte *url, struct fingerprint *fp)
-{
- byte buf[URL_KEY_BUF_SIZE];
- fingerprint(url_key(url, buf), fp);
-}
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- cf_read(cfdeffile);
- url_key_init();
- for (int i=1; i<argc; i++)
- {
- byte buf[URL_KEY_BUF_SIZE];
- struct fingerprint fp;
- byte *key = url_key(argv[i], buf);
- fingerprint(key, &fp);
- for (int j=0; j<12; j++)
- printf("%02x", fp.hash[j]);
- printf(" %s\n", key);
- }
- return 0;
-}
-
-#endif