No changes have been made except for removing SHERLOCK_VERSION.
--- /dev/null
+# Makefile for the UCW Library (c) 1997--2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib
+CONFIGS+=library
+LIBUCW=$(o)/lib/libucw.pc
+
+ifdef CONFIG_UCW_DBTOOL
+PROGS+=$(o)/lib/db-tool
+endif
+
+LIBUCW_MODS= \
+ threads \
+ alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \
+ mmap pagecache partmap hashfunc \
+ lists slists simple-lists bitsig \
+ log log-file proctitle \
+ conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \
+ ipaccess \
+ profile \
+ fastbuf ff-binary ff-string ff-printf ff-unicode \
+ fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \
+ str_ctype str_upper str_lower unicode stkstring \
+ wildmatch wordsplit ctmatch patimatch patmatch regex \
+ prime primetable random timer randomkey \
+ bit-ffs bit-fls \
+ db \
+ url \
+ mainloop exitstatus runcmd sighandler \
+ lizard lizard-safe adler32 \
+ md5 md5hex \
+ base64 base224 \
+ sync \
+ qache \
+ string \
+ bbuf \
+ getopt
+
+LIBUCW_INCLUDES= \
+ lib.h config.h threads.h \
+ mempool.h pagecache.h \
+ arraysort.h \
+ lists.h clists.h slists.h simple-lists.h \
+ unaligned.h prefetch.h \
+ bbuf.h gbuf.h bitarray.h bitsig.h \
+ hashfunc.h hashtable.h \
+ heap.h binheap.h binheap-node.h \
+ redblack.h \
+ binsearch.h \
+ bitops.h \
+ conf.h getopt.h ipaccess.h \
+ profile.h \
+ fastbuf.h lfs.h ff-unicode.h ff-utf8.h ff-binary.h \
+ chartype.h unicode.h stkstring.h \
+ wildmatch.h patmatch.h \
+ db.h \
+ url.h \
+ mainloop.h \
+ lizard.h \
+ md5.h \
+ base64.h base224.h \
+ qache.h \
+ kmp.h kmp-search.h binsearch.h \
+ partmap.h
+
+ifdef CONFIG_UCW_THREADS
+# Some modules require threading
+LIBUCW_MODS+=threads-conf workqueue asio fb-direct
+LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h
+endif
+
+ifdef CONFIG_OWN_REGEX
+include $(s)/lib/regex/Makefile
+endif
+
+ifdef CONFIG_OWN_GETOPT
+include $(s)/lib/getopt/Makefile
+endif
+
+include $(s)/lib/sorter/Makefile
+
+LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS))
+
+$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
+$(o)/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
+
+$(o)/lib/hashfunc.o $(o)/lib/hashfunc.oo: CFLAGS += -funroll-loops
+$(o)/lib/lizard.o: CFLAGS += $(COPT2) -funroll-loops
+
+$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW)
+$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW)
+$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW)
+$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW)
+$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW)
+$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW)
+$(o)/lib/asort-test: $(o)/lib/asort-test.o $(LIBUCW)
+$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW)
+$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW)
+$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW)
+$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET)
+$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \
+ slists.test kmp-test.test bbuf.test getopt.test fastbuf.test ff-unicode.test eltpool.test)
+
+$(o)/lib/regex.test: $(o)/lib/regex-t
+$(o)/lib/unicode.test: $(o)/lib/unicode-t
+$(o)/lib/hash-test.test: $(o)/lib/hash-test
+$(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t
+$(o)/lib/stkstring.test: $(o)/lib/stkstring-t
+$(o)/lib/bitops.test: $(o)/lib/bit-ffs-t $(o)/lib/bit-fls-t
+$(o)/lib/slists.test: $(o)/lib/slists-t
+$(o)/lib/kmp-test.test: $(o)/lib/kmp-test
+$(o)/lib/bbuf.test: $(o)/lib/bbuf-t
+$(o)/lib/getopt.test: $(o)/lib/getopt-t
+$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t
+$(o)/lib/ff-unicode.test: $(o)/lib/ff-unicode-t
+$(o)/lib/eltpool.test: $(o)/lib/eltpool-t
+
+ifdef CONFIG_UCW_THREADS
+TESTS+=$(addprefix $(o)/lib/,asio.test)
+$(o)/lib/asio.test: $(o)/lib/asio-t
+endif
+
+API_LIBS+=libucw
+API_INCLUDES+=$(o)/lib/.include-stamp
+$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h
+ $(Q)$(s)/build/install-includes $(<D) run/include/lib $(LIBUCW_INCLUDES)
+ $(Q)$(s)/build/install-includes obj run/include/lib autoconf.h
+ $(Q)touch $@
+run/lib/pkgconfig/libucw.pc: $(o)/lib/libucw.pc
+
+ifdef CONFIG_UCW_PERL
+include $(s)/lib/perl/Makefile
+endif
+
+ifdef CONFIG_UCW_SHELL_UTILS
+include $(s)/lib/shell/Makefile
+endif
--- /dev/null
+Generally, functions in the UCW library are reentrant as long as you call them
+on different data. Calling on the same object is not, unless otherwise told,
+which also includes functions acting on any kind of global state.
+
+There are some exceptions:
+
+- setproctitle() is not safe, it modifies global state
--- /dev/null
+/*
+ * adler32.c -- compute the Adler-32 checksum of a data stream
+ *
+ * Copyright (C) 1995--2003 Mark Adler
+ *
+ * Taken from zlib-1.2.1 and adjusted by Robert Spalek. For conditions of
+ * distribution and use, see copyright notice in zlib.h.
+ */
+
+#include "lib/lib.h"
+#include "lib/lizard.h"
+
+#define BASE 65521UL /* largest prime smaller than 65536 */
+#define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+#define MOD(a) a %= BASE
+
+uns
+update_adler32(uns adler, const byte *buf, uns len)
+{
+ uns s1 = adler & 0xffff;
+ uns s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (!buf) return 1L;
+
+ while (len > 0) {
+ k = len < NMAX ? (int)len : NMAX;
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0) do {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ MOD(s1);
+ MOD(s2);
+ }
+ return (s2 << 16) | s1;
+}
--- /dev/null
+/*
+ * UCW Library -- Memory Allocation
+ *
+ * (c) 2000 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef DEBUG_DMALLOC
+
+void *
+xmalloc(uns size)
+{
+ void *x = malloc(size);
+ if (!x)
+ die("Cannot allocate %d bytes of memory", size);
+ return x;
+}
+
+#endif
+
+void *
+xmalloc_zero(uns size)
+{
+ void *x = xmalloc(size);
+ bzero(x, size);
+ return x;
+}
+
+void
+xfree(void *ptr)
+{
+ /*
+ * Maybe it is a little waste of resources to make this a function instead
+ * of a macro, but xmalloc() is not used for anything critical anyway,
+ * so let's prefer simplicity.
+ */
+ free(ptr);
+}
--- /dev/null
+/*
+ * UCW Library -- String Allocation
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <string.h>
+
+char *
+xstrdup(const char *s)
+{
+ uns l = strlen(s) + 1;
+ return memcpy(xmalloc(l), s, l);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Array Sorter
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates an array sorter
+ * with the parameters given.
+ *
+ * You might wonder why the heck do we implement our own array sorter
+ * instead of using qsort(). The primary reason is that qsort handles
+ * only continuous arrays, but we need to sort array-like data structures
+ * where the only way to access elements is by using an indexing macro.
+ * Besides that, we are more than 2 times faster.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_ELT(i) [*] returns the key of i-th element
+ * ASORT_LT(x,y) x < y for ASORT_TYPE (default: "x<y")
+ * ASORT_SWAP(i,j) swap i-th and j-th element (default: assume _ELT
+ * is an l-value and swap just the keys)
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_EXTRA_ARGS extra arguments for the sort function (they are always
+ * visible in all the macros supplied above), starts with comma
+ *
+ * After including this file, a function ASORT_PREFIX(sort)(uns array_size)
+ * is declared and all parameter macros are automatically undef'd.
+ */
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_EXTRA_ARGS
+#define ASORT_EXTRA_ARGS
+#endif
+
+static void ASORT_PREFIX(sort)(uns array_size ASORT_EXTRA_ARGS)
+{
+ struct stk { int l, r; } stack[8*sizeof(uns)];
+ int l, r, left, right, m;
+ uns sp = 0;
+ ASORT_KEY_TYPE pivot;
+
+ if (array_size <= 1)
+ return;
+
+ /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */
+
+ left = 0;
+ right = array_size - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m)))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ }
+ pivot = ASORT_ELT(m);
+ do
+ {
+ while (ASORT_LT(ASORT_ELT(l), pivot))
+ l++;
+ while (ASORT_LT(pivot, ASORT_ELT(r)))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(array_size, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m)))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)array_size; m++)
+ {
+ l=m;
+ while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1)))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+#undef ASORT_PREFIX
+#undef ASORT_KEY_TYPE
+#undef ASORT_ELT
+#undef ASORT_LT
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef ASORT_EXTRA_ARGS
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/asio.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+static uns asio_num_users;
+static struct worker_pool asio_wpool;
+
+static void
+asio_init_unlocked(void)
+{
+ if (asio_num_users++)
+ return;
+
+ DBG("ASIO: INIT");
+ asio_wpool.num_threads = 1;
+ worker_pool_init(&asio_wpool);
+}
+
+static void
+asio_cleanup_unlocked(void)
+{
+ if (--asio_num_users)
+ return;
+
+ DBG("ASIO: CLEANUP");
+ worker_pool_cleanup(&asio_wpool);
+}
+
+void
+asio_init_queue(struct asio_queue *q)
+{
+ ucwlib_lock();
+ asio_init_unlocked();
+ ucwlib_unlock();
+
+ DBG("ASIO: New queue %p", q);
+ ASSERT(q->buffer_size);
+ q->allocated_requests = 0;
+ q->running_requests = 0;
+ q->running_writebacks = 0;
+ q->use_count = 0;
+ clist_init(&q->idle_list);
+ clist_init(&q->done_list);
+ work_queue_init(&asio_wpool, &q->queue);
+}
+
+void
+asio_cleanup_queue(struct asio_queue *q)
+{
+ DBG("ASIO: Removing queue %p", q);
+ ASSERT(!q->running_requests);
+ ASSERT(!q->running_writebacks);
+ ASSERT(!q->allocated_requests);
+ ASSERT(clist_empty(&q->done_list));
+
+ struct asio_request *r;
+ while (r = clist_remove_head(&q->idle_list))
+ {
+ big_free(r->buffer, q->buffer_size);
+ xfree(r);
+ }
+
+ work_queue_cleanup(&q->queue);
+
+ ucwlib_lock();
+ asio_cleanup_unlocked();
+ ucwlib_unlock();
+}
+
+struct asio_request *
+asio_get(struct asio_queue *q)
+{
+ q->allocated_requests++;
+ struct asio_request *r = clist_head(&q->idle_list);
+ if (!r)
+ {
+ r = xmalloc_zero(sizeof(*r));
+ r->queue = q;
+ r->buffer = big_alloc(q->buffer_size);
+ DBG("ASIO: Got %p (new)", r);
+ }
+ else
+ {
+ clist_remove(&r->work.n);
+ DBG("ASIO: Got %p", r);
+ }
+ r->op = ASIO_FREE;
+ r->fd = -1;
+ r->len = 0;
+ r->status = -1;
+ r->returned_errno = -1;
+ r->submitted = 0;
+ return r;
+}
+
+static int
+asio_raw_wait(struct asio_queue *q)
+{
+ struct asio_request *r = (struct asio_request *) work_wait(&q->queue);
+ if (!r)
+ return 0;
+ r->submitted = 0;
+ q->running_requests--;
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ DBG("ASIO: Finished writeback %p", r);
+ if (r->status < 0)
+ die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno));
+ if (r->status != (int)r->len)
+ die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len);
+ q->running_writebacks--;
+ asio_put(r);
+ }
+ else
+ clist_add_tail(&q->done_list, &r->work.n);
+ return 1;
+}
+
+static void
+asio_handler(struct worker_thread *t UNUSED, struct work *w)
+{
+ struct asio_request *r = (struct asio_request *) w;
+
+ DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r,
+ (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len);
+ errno = 0;
+ switch (r->op)
+ {
+ case ASIO_READ:
+ r->status = read(r->fd, r->buffer, r->len);
+ break;
+ case ASIO_WRITE:
+ case ASIO_WRITE_BACK:
+ r->status = write(r->fd, r->buffer, r->len);
+ break;
+ default:
+ die("ASIO: Got unknown request type %d", r->op);
+ }
+ r->returned_errno = errno;
+ DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno);
+}
+
+void
+asio_submit(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Submitting %p on queue %p", r, q);
+ ASSERT(r->op != ASIO_FREE);
+ ASSERT(!r->submitted);
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ while (q->running_writebacks >= q->max_writebacks)
+ {
+ DBG("ASIO: Waiting for free writebacks");
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+ }
+ q->running_writebacks++;
+ }
+ q->running_requests++;
+ r->submitted = 1;
+ r->work.go = asio_handler;
+ r->work.priority = 0;
+ work_submit(&q->queue, &r->work);
+}
+
+struct asio_request *
+asio_wait(struct asio_queue *q)
+{
+ struct asio_request *r;
+ while (!(r = clist_head(&q->done_list)))
+ {
+ DBG("ASIO: Waiting on queue %p", q);
+ if (!asio_raw_wait(q))
+ return NULL;
+ }
+ clist_remove(&r->work.n);
+ DBG("ASIO: Done %p", r);
+ return r;
+}
+
+void
+asio_put(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Put %p", r);
+ ASSERT(!r->submitted);
+ ASSERT(q->allocated_requests);
+ clist_add_tail(&q->idle_list, &r->work.n);
+ q->allocated_requests--;
+}
+
+void
+asio_sync(struct asio_queue *q)
+{
+ DBG("ASIO: Syncing queue %p", q);
+ while (q->running_requests)
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct asio_queue q;
+ struct asio_request *r;
+
+ q.buffer_size = 4096;
+ q.max_writebacks = 2;
+ asio_init_queue(&q);
+
+#if 0
+
+ for (;;)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = q.buffer_size;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ if (r->status <= 0)
+ {
+ asio_put(r);
+ break;
+ }
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = r->status;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+#else
+
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = 1;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+ for (uns i=0; i<10; i++)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = 'A' + i;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+ r = asio_get(&q);
+ r->op = ASIO_WRITE;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = '\n';
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+#endif
+
+ asio_cleanup_queue(&q);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ASIO_H
+#define _UCW_ASIO_H
+
+#include "lib/workqueue.h"
+#include "lib/clists.h"
+
+/*
+ * This module takes care of scheduling and executing asynchronous I/O requests
+ * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf
+ * back-end, but you can use it explicitly, too.
+ *
+ * You can define several I/O queues, each for use by a single thread. Requests
+ * on a single queue are always processed in order of their submits, requests
+ * from different queues may be interleaved (although the current implementation
+ * does not do so). Normal read and write requests are returned to their queue
+ * when they are completed. Write-back requests are automatically freed when
+ * done, but the number of such requests in fly is limited in order to avoid
+ * consuming all memory, so a submit of a write-back request can block.
+ */
+
+struct asio_queue {
+ uns buffer_size; // How large buffers do we use [user-settable]
+ uns max_writebacks; // Maximum number of writeback requests active [user-settable]
+ uns allocated_requests;
+ uns running_requests; // Total number of running requests
+ uns running_writebacks; // How many of them are writebacks
+ clist idle_list; // Recycled requests waiting for get
+ clist done_list; // Finished requests
+ struct work_queue queue;
+ uns use_count; // For use by the caller
+};
+
+enum asio_op {
+ ASIO_FREE,
+ ASIO_READ,
+ ASIO_WRITE,
+ ASIO_WRITE_BACK, // Background write with no success notification
+};
+
+struct asio_request {
+ struct work work; // asio_requests are internally just work nodes
+ struct asio_queue *queue;
+ byte *buffer;
+ int fd;
+ enum asio_op op;
+ uns len;
+ int status;
+ int returned_errno;
+ int submitted;
+ void *user_data; // For use by the caller
+};
+
+void asio_init_queue(struct asio_queue *q); // Initialize a new queue
+void asio_cleanup_queue(struct asio_queue *q);
+struct asio_request *asio_get(struct asio_queue *q); // Get an empty request
+void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks)
+struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more
+void asio_put(struct asio_request *r); // Return a finished request for recycling
+void asio_sync(struct asio_queue *q); // Wait until all requests are finished
+
+#endif /* !_UCW_ASIO_H */
--- /dev/null
+# Tests for asynchronous I/O
+
+Run: echo y | ../obj/lib/asio-t
+Out: ABCDEFGHIJ
--- /dev/null
+/*
+ * UCW Library -- Universal Array Sorter Test and Benchmark
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define N 4000037 /* a prime */
+
+struct elt {
+ u32 key;
+ u32 x, y;
+};
+
+static struct elt array[N];
+
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) array[i].key
+#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
+
+static void generate(void)
+{
+ uns i;
+ for (i=0; i<N; i++)
+#if 0
+ ASORT_ELT(i) = N-i-1;
+#elif 0
+ ASORT_ELT(i) = i;
+#else
+ ASORT_ELT(i) = (i ? ASORT_ELT(i-1)+1944833754 : 3141592) % N;
+#endif
+}
+
+static void check(void)
+{
+ uns i;
+ for (i=0; i<N; i++)
+ if (ASORT_ELT(i) != i)
+ printf("error at pos %d: %08x != %08x\n", i, ASORT_ELT(i), i);
+}
+
+static int qs_comp(const struct elt *X, const struct elt *Y)
+{
+ if (X->key < Y->key)
+ return -1;
+ else if (X->key > Y->key)
+ return 1;
+ else
+ return 0;
+}
+
+#define ASORT_PREFIX(x) as_##x
+#include "lib/arraysort.h"
+
+int main(void)
+{
+ timestamp_t timer;
+
+ generate();
+ init_timer(&timer);
+ qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp);
+ printf("qsort: %d ms\n", get_timer(&timer));
+ check();
+ generate();
+ init_timer(&timer);
+ as_sort(N);
+ printf("asort: %d ms\n", get_timer(&timer));
+ check();
+ return 0;
+}
--- /dev/null
+# Automatic configuration of the UCW Library
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
+# (c) 2006 Robert Spalek <robert@ucw.cz>
+
+### OS ###
+
+Test("OS", "Checking on which OS we run", sub {
+ my $os = `uname`;
+ chomp $os;
+ Fail "Unable to determine OS type" if $? || $os eq "";
+ return $os;
+});
+
+if (Get("OS") eq "Linux") {
+ Set("CONFIG_LINUX");
+} elsif (Get("OS") eq "Darwin") {
+ Set("CONFIG_DARWIN");
+} else {
+ Fail "Don't know how to run on this operating system.";
+}
+
+### Compiler ###
+
+# Default compiler
+Test("CC", "Checking for C compiler", sub { return "gcc"; });
+
+# GCC version
+Test("GCCVER", "Checking for GCC version", sub {
+ my $gcc = Get("CC");
+ my $ver = `$gcc --version | sed '2,\$d; s/^\\(.* \\)*\\([0-9]*\\.[0-9]*\\).*/\\2/'`;
+ chomp $ver;
+ Fail "Unable to determine GCC version" if $? || $ver eq "";
+ return $ver;
+});
+my ($gccmaj, $gccmin) = split(/\./, Get("GCCVER"));
+my $gccver = 1000*$gccmaj + $gccmin;
+$gccver >= 3000 or Fail "GCC older than 3.0 doesn't support C99 well enough.";
+
+### CPU ###
+
+Test("ARCH", "Checking for machine architecture", sub {
+ my $mach = `uname -m`;
+ chomp $mach;
+ Fail "Unable to determine machine type" if $? || $mach eq "";
+ if ($mach =~ /^i[0-9]86$/) {
+ return "i386";
+ } elsif ($mach =~ /^(x86[_-]|amd)64$/) {
+ return "amd64";
+ } else {
+ return "unknown";
+ }
+});
+
+sub parse_cpuinfo_linux() {
+ open X, "/proc/cpuinfo" || undef;
+ my %pc = ();
+ while (<X>) {
+ chomp;
+ /^$/ && last;
+ /^([^\t]+)\t+:\s*(.*)$/ and $pc{$1}=$2;
+ }
+ close X;
+ return ($pc{'vendor_id'},
+ $pc{'cpu family'},
+ $pc{'model'});
+}
+
+sub parse_cpuinfo_darwin() {
+ @cpu = (`sysctl -n machdep.cpu.vendor`,
+ `sysctl -n machdep.cpu.family`,
+ `sysctl -n machdep.cpu.model`);
+ chomp @cpu;
+ return @cpu;
+}
+
+sub parse_cpuinfo() {
+ my @cpu;
+ if (IsSet("CONFIG_LINUX")) {
+ @cpu = parse_cpuinfo_linux();
+ } elsif (IsSet("CONFIG_DARWIN")) {
+ @cpu = parse_cpuinfo_darwin();
+ }
+ $cpu[0] = "" if !defined $cpu[0];
+ $cpu[1] = 0 if !defined $cpu[1];
+ $cpu[2] = 0 if !defined $cpu[2];
+ return @cpu;
+}
+
+Test("CPU_ARCH", "Checking for CPU architecture", sub {
+ my $mach = Get("ARCH");
+ my $arch = "";
+ if ($mach eq "i386") {
+ Set("CPU_I386");
+ UnSet("CPU_64BIT_POINTERS");
+ Set("CPU_LITTLE_ENDIAN");
+ UnSet("CPU_BIG_ENDIAN");
+ Set("CPU_ALLOW_UNALIGNED");
+ Set("CPU_STRUCT_ALIGN" => 4);
+ if (IsSet("CONFIG_EXACT_CPU")) {
+ my ($vendor, $family, $model) = parse_cpuinfo();
+ # Try to understand CPU vendor, family and model [inspired by MPlayer's configure script]
+ if ($vendor eq "AuthenticAMD") {
+ if ($family >= 6) {
+ if ($model >= 31 && $gccver >= 3004) { $arch = "athlon64"; }
+ elsif ($model >= 6 && $gccver >= 3003) { $arch = "athlon-xp"; }
+ else { $arch = "athlon"; }
+ }
+ } elsif ($vendor eq "GenuineIntel") {
+ if ($family >= 15 && $gccver >= 3003) {
+ if ($model >= 4) { $arch = "nocona"; }
+ elsif ($model >= 3) { $arch = "prescott"; }
+ else { $arch = "pentium4"; }
+ } elsif ($family == 6 && $gccver >= 3003) {
+ if ($model == 15) { $arch = "prescott"; }
+ elsif (($model == 9 || $model == 13) && $gccver >= 3004) { $arch = "pentium-m"; }
+ elsif ($model >= 7) { $arch = "pentium3"; }
+ elsif ($model >= 3) { $arch = "pentium2"; }
+ }
+ }
+
+ # No match on vendor, try the family
+ if ($arch eq "") {
+ if ($family >= 6) {
+ $arch = "i686";
+ } elsif ($family >= 3) {
+ $arch = "i${family}86";
+ }
+ }
+ Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+ return $arch;
+ } else {
+ return "default";
+ }
+ } elsif ($mach eq "amd64") {
+ Set("CPU_AMD64");
+ Set("CPU_64BIT_POINTERS");
+ Set("CPU_LITTLE_ENDIAN");
+ UnSet("CPU_BIG_ENDIAN");
+ Set("CPU_ALLOW_UNALIGNED");
+ Set("CPU_STRUCT_ALIGN" => 8);
+ if (IsSet("CONFIG_EXACT_CPU")) {
+ # In x86-64 world, the detection is somewhat easier so far...
+ my ($vendor, $family, $model) = parse_cpuinfo();
+ if ($vendor eq "AuthenticAMD") {
+ $arch = "athlon64";
+ } elsif ($vendor eq "GenuineIntel") {
+ $arch = "nocona";
+ }
+ Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+ return $arch;
+ } else {
+ return "default";
+ }
+ } else {
+ return "unknown";
+ }
+});
+
+if (Get("CPU_ARCH") eq "unknown") {
+ Warn "CPU architecture not recognized, using defaults, keep fingers crossed.\n";
+}
+
+### Compiler and its Options ###
+
+# C flags: tell the compiler we're speaking C99, and disable common symbols
+Set("CLANG" => "-std=gnu99 -fno-common");
+
+# C optimizations
+Set("COPT" => '-O2');
+if (Get("CPU_ARCH") ne "unknown" && Get("CPU_ARCH") ne "default") {
+ Append("COPT", '-march=$(CPU_ARCH)');
+}
+
+# C optimizations for highly exposed code
+Set("COPT2" => '-O3');
+
+# Warnings
+Set("CWARNS" => '-Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline');
+Set("CWARNS_OFF" => '');
+
+# Linker flags
+Set("LOPT" => "");
+
+# Extra libraries
+Set("LIBS" => "");
+
+# Extra flags for compiling and linking shared libraries
+Set("CSHARED" => '-fPIC');
+if (IsSet("CONFIG_DARWIN")) {
+ Set("LSHARED" => '-dynamiclib -install_name lib/$(@F) -undefined dynamic_lookup');
+} else {
+ Set("LSHARED" => '-shared -Wl,-soname,lib/$(@F)');
+}
+
+# Extra switches depending on GCC version:
+if ($gccver == 3000) {
+ Append("COPT" => "-fstrict-aliasing");
+} elsif ($gccver == 3003) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls");
+ Append("COPT" => "-finline-limit=20000 --param max-inline-insns-auto=1000");
+} elsif ($gccver == 3004) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls");
+ Append("COPT" => "-finline-limit=2000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver == 4000 || $gccver == 4001) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+ Append("CWARNS_OFF" => "-Wno-pointer-sign");
+ Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver == 4002) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+ Append("CWARNS_OFF" => "-Wno-pointer-sign");
+ Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400 -fgnu89-inline");
+} else {
+ Warn "Don't know anything about this GCC version, using default switches.\n";
+}
+
+if (IsSet("CONFIG_DEBUG")) {
+ # If debugging:
+ Set("DEBUG_ASSERTS");
+ Set("DEBUG_DIE_BY_ABORT") if Get("CONFIG_DEBUG") > 1;
+ Set("CDEBUG" => "-ggdb");
+} else {
+ # If building a release version:
+ Append("COPT" => "-fomit-frame-pointer");
+ Append("LOPT" => "-s");
+}
+
+if (IsSet("CONFIG_DARWIN")) {
+ # gcc-4.0 on Darwin doesn't set this in the gnu99 mode
+ Append("CLANG" => "-fnested-functions");
+ # Directory hierarchy of the fink project
+ Append("LIBS" => "-L/sw/lib");
+ Append("COPT" => "-I/sw/include");
+ # Fill in some constants not found in the system header files
+ Set("SOL_TCP" => 6); # missing in /usr/include/netinet/tcp.h
+}
+
+# Determine page size
+Test("CPU_PAGE_SIZE", "Determining page size", sub {
+ my $p;
+ if (IsSet("CONFIG_DARWIN")) {
+ $p = `sysctl -n hw.pagesize`;
+ defined $p or Fail "sysctl hw.pagesize failed";
+ } elsif (IsSet("CONFIG_LINUX")) {
+ $p = `getconf PAGE_SIZE`;
+ defined $p or Fail "getconf PAGE_SIZE failed";
+ }
+ chomp $p;
+ return $p;
+});
+
+if (IsSet("CONFIG_LARGE_FILES") && IsSet("CONFIG_LINUX")) {
+ # Use 64-bit versions of file functions
+ Set("CONFIG_LFS");
+}
+
+# Decide how will lib/partmap.c work
+Set("PARTMAP_IS_MMAP") if IsSet("CPU_64BIT_POINTERS");
+
+# Option for lib/mempool.c
+Set("POOL_IS_MMAP");
+
+# Guess optimal bit width of the radix-sorter
+if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) {
+ # This should be safe everywhere
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10);
+} else {
+ # Use this on modern CPU's
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12);
+}
+
+# If debugging memory allocations:
+#LIBS+=-lefence
+#CDEBUG+=-DDEBUG_DMALLOC
+#LIBS+=-ldmalloc
+
+# Return success
+1;
--- /dev/null
+/*
+ * UCW Library -- Base 224 Encoding & Decoding
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * The `base-224' encoding transforms general sequences of bytes
+ * to sequences of non-control 8-bit characters (0x20-0xff). Since
+ * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l)
+ * and we want to avoid lengthy calculations, we cheat a bit:
+ *
+ * Each base-224 digit can be represented as a (base-7 digit, base-32 digit)
+ * pair, so we pass the lower 5 bits directly and use a base-7 encoder
+ * for the upper part. We process blocks of 39 bits and encode them
+ * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert
+ * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get
+ * the 7 upper parts we need (with a little redundancy). Little endian
+ * ordering is used to make handling of partial blocks easy.
+ *
+ * We transform 39 source bits to 40 destination bits, stretching the data
+ * by 1/39 = approx. 2.56%.
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/base224.h"
+
+static void
+encode_block(byte *w, u32 hi, u32 lo)
+{
+ uns x, y;
+
+ /*
+ * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7.
+ * +----------------+----------------+----------------+----------------+----------------+
+ * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0|
+ * +----------------+----------------+----------------+----------------+----------------+
+ */
+
+ w[0] = lo & 0x1f;
+ w[1] = (lo >> 7) & 0x1f;
+ w[2] = (lo >> 15) & 0x1f;
+ w[3] = (lo >> 23) & 0x1f;
+ w[4] = (lo >> 31) | ((hi << 1) & 0x1e);
+ x = (lo >> 5) & 0x0003
+ | (lo >> 10) & 0x001c
+ | (lo >> 15) & 0x00e0
+ | (lo >> 20) & 0x0700
+ | (hi << 7) & 0x3800;
+ DBG("<<< h=%08x l=%08x x=%d", hi, lo, x);
+ for (y=0; y<5; y++)
+ {
+ w[y] += 0x20 + ((x % 7) << 5);
+ x /= 7;
+ }
+}
+
+uns
+base224_encode(byte *dest, const byte *src, uns len)
+{
+ u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */
+ uns i=0; /* How many source bits do we have buffered */
+ u32 x;
+ byte *w=dest;
+
+ while (len--)
+ {
+ x = *src++;
+ if (i < 32)
+ {
+ lo |= x << i;
+ if (i > 24)
+ hi |= x >> (32-i);
+ }
+ else
+ hi |= x << (i-32);
+ i += 8;
+ if (i >= 39)
+ {
+ encode_block(w, hi, lo);
+ w += 5;
+ lo = hi >> 7;
+ hi = 0;
+ i -= 39;
+ }
+ }
+ if (i) /* Partial block */
+ {
+ encode_block(w, hi, lo);
+ w += (i+8)/8; /* Just check logarithms if you want to understand */
+ }
+ return w - dest;
+}
+
+uns
+base224_decode(byte *dest, const byte *src, uns len)
+{
+ u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */
+ uns i=0; /* How many bits do we have accumulated */
+ u32 h, l; /* Decoding of the current block */
+ uns x; /* base-7 part of the current block */
+ uns len0;
+ byte *start = dest;
+
+ do
+ {
+ if (!len)
+ break;
+ len0 = len;
+
+ ASSERT(*src >= 0x20); /* byte 0 */
+ h = 0;
+ l = *src & 0x1f;
+ x = (*src++ >> 5) - 1;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 1 */
+ l |= (*src & 0x1f) << 7;
+ x += ((*src++ >> 5) - 1) * 7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 2 */
+ l |= (*src & 0x1f) << 15;
+ x += ((*src++ >> 5) - 1) * 7*7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 3 */
+ l |= (*src & 0x1f) << 23;
+ x += ((*src++ >> 5) - 1) * 7*7*7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 4 */
+ l |= *src << 31;
+ h = (*src & 0x1f) >> 1;
+ x += ((*src++ >> 5) - 1) * 7*7*7*7;
+ --len;
+
+ blockend:
+ len0 -= len;
+ l |= ((x & 0x0003) << 5) /* Decode base-7 */
+ | ((x & 0x001c) << 10)
+ | ((x & 0x00e0) << 15)
+ | ((x & 0x0700) << 20);
+ h |= (x & 0x3800) >> 7;
+
+ DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0);
+ lo |= l << i;
+ hi |= h << i;
+ if (i)
+ hi |= l >> (32-i);
+ i += len0*8 - 1;
+
+ while (i >= 8)
+ {
+ *dest++ = lo;
+ lo = (lo >> 8U) | (hi << 24);
+ hi >>= 8;
+ i -= 8;
+ }
+ }
+ while (len0 == 5);
+ return dest-start;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+#if 0
+ byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 };
+ byte o[256], w[256];
+ uns l;
+ l = base224_encode(o, i, sizeof(i));
+ fwrite(o, 1, l, stdout);
+ fputc(0xaa, stdout);
+ l = base224_decode(w, o, l);
+ fwrite(w, 1, l, stdout);
+#else
+ if (argc > 1)
+ {
+ byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17];
+ uns l;
+ while (l = fread(i, 1, sizeof(i), stdin))
+ {
+ l = base224_decode(o, i, l);
+ fwrite(o, 1, l, stdout);
+ }
+ }
+ else
+ {
+ byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23];
+ uns l;
+ while (l = fread(i, 1, sizeof(i), stdin))
+ {
+ l = base224_encode(o, i, l);
+ fwrite(o, 1, l, stdout);
+ }
+ }
+#endif
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Base 224 Encoding & Decoding
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+uns base224_encode(byte *dest, const byte *src, uns len);
+uns base224_decode(byte *dest, const byte *src, uns len);
+
+/*
+ * Warning: when encoding, at least 4 bytes of extra space are needed.
+ * Better use this macro to calculate buffer size.
+ */
+#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5)
+
+/*
+ * When called for BASE224_IN_CHUNK-byte chunks, the result will be
+ * always BASE224_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE224_IN_CHUNK 39
+#define BASE224_OUT_CHUNK 40
--- /dev/null
+/*
+ * UCW Library -- Base 64 Encoding & Decoding
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/base64.h"
+
+#include <string.h>
+
+static const byte base64_table[] =
+ { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
+ };
+static const byte base64_pad = '=';
+
+uns
+base64_encode(byte *dest, const byte *src, uns len)
+{
+ const byte *current = src;
+ uns i = 0;
+
+ while (len > 2) { /* keep going until we have less than 24 bits */
+ dest[i++] = base64_table[current[0] >> 2];
+ dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+ dest[i++] = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
+ dest[i++] = base64_table[current[2] & 0x3f];
+
+ current += 3;
+ len -= 3; /* we just handle 3 octets of data */
+ }
+
+ /* now deal with the tail end of things */
+ if (len != 0) {
+ dest[i++] = base64_table[current[0] >> 2];
+ if (len > 1) {
+ dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+ dest[i++] = base64_table[(current[1] & 0x0f) << 2];
+ dest[i++] = base64_pad;
+ }
+ else {
+ dest[i++] = base64_table[(current[0] & 0x03) << 4];
+ dest[i++] = base64_pad;
+ dest[i++] = base64_pad;
+ }
+ }
+ return i;
+}
+
+/* as above, but backwards. :) */
+uns
+base64_decode(byte *dest, const byte *src, uns len)
+{
+ const byte *current = src;
+ uns ch;
+ uns i = 0, j = 0;
+ static byte reverse_table[256];
+ static uns table_built = 0;
+
+ if (table_built == 0) {
+ byte *chp;
+ table_built = 1;
+ for(ch = 0; ch < 256; ch++) {
+ chp = strchr(base64_table, ch);
+ if(chp) {
+ reverse_table[ch] = chp - base64_table;
+ } else {
+ reverse_table[ch] = 0xff;
+ }
+ }
+ }
+
+ /* run through the whole string, converting as we go */
+ ch = 0;
+ while (len > 0) {
+ len--;
+ ch = *current++;
+ if (ch == base64_pad) break;
+
+ /* When Base64 gets POSTed, all pluses are interpreted as spaces.
+ This line changes them back. It's not exactly the Base64 spec,
+ but it is completely compatible with it (the spec says that
+ spaces are invalid). This will also save many people considerable
+ headache. - Turadg Aleahmad <turadg@wise.berkeley.edu>
+ */
+
+ if (ch == ' ') ch = '+';
+
+ ch = reverse_table[ch];
+ if (ch == 0xff) continue;
+
+ switch(i % 4) {
+ case 0:
+ dest[j] = ch << 2;
+ break;
+ case 1:
+ dest[j++] |= ch >> 4;
+ dest[j] = (ch & 0x0f) << 4;
+ break;
+ case 2:
+ dest[j++] |= ch >>2;
+ dest[j] = (ch & 0x03) << 6;
+ break;
+ case 3:
+ dest[j++] |= ch;
+ break;
+ }
+ i++;
+ }
+ return j;
+}
--- /dev/null
+/*
+ * UCW Library -- Base 64 Encoding & Decoding
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+uns base64_encode(byte *dest, const byte *src, uns len);
+uns base64_decode(byte *dest, const byte *src, uns len);
+
+/*
+ * Use this macro to calculate buffer size.
+ */
+#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4)
+
+/*
+ * When called for BASE64_IN_CHUNK-byte chunks, the result will be
+ * always BASE64_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE64_IN_CHUNK 3
+#define BASE64_OUT_CHUNK 4
+
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffers for byte-sized items
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bbuf.h"
+
+#include <stdio.h>
+
+char *
+bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args)
+{
+ bb_grow(bb, ofs + 1);
+ va_list args2;
+ va_copy(args2, args);
+ int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ if (cnt < 0)
+ {
+ /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+ do
+ {
+ bb_do_grow(bb, bb->len + 1);
+ va_copy(args2, args);
+ cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ }
+ while (cnt < 0);
+ }
+ else if ((uns)cnt >= bb->len - ofs)
+ {
+ bb_do_grow(bb, ofs + cnt + 1);
+ va_copy(args2, args);
+ int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ ASSERT(cnt2 == cnt);
+ }
+ return bb->ptr + ofs;
+}
+
+char *
+bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = bb_vprintf_at(bb, ofs, fmt, args);
+ va_end(args);
+ return res;
+}
+
+char *
+bb_vprintf(bb_t *bb, const char *fmt, va_list args)
+{
+ return bb_vprintf_at(bb, 0, fmt, args);
+}
+
+char *
+bb_printf(bb_t *bb, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = bb_vprintf_at(bb, 0, fmt, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ bb_t bb;
+ bb_init(&bb);
+ char *x = bb_printf(&bb, "<Hello, %s!>", "World");
+ fputs(x, stdout);
+ x = bb_printf_at(&bb, 5, "<Hello, %50s!>\n", "World");
+ fputs(x, stdout);
+ bb_done(&bb);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffer for byte-sized items.
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BBUF_H
+#define _UCW_BBUF_H
+
+#define GBUF_TYPE byte
+#define GBUF_PREFIX(x) bb_##x
+#include "lib/gbuf.h"
+
+char *bb_vprintf(bb_t *bb, const char *fmt, va_list args);
+char *bb_printf(bb_t *bb, const char *fmt, ...);
+char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args);
+char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...);
+
+#endif
--- /dev/null
+# Tests for growing buffers
+
+Run: ../obj/lib/bbuf-t
+Out: <Hello, World!><Hello, World!>
--- /dev/null
+/*
+ * UCW Library -- Allocation of Large Aligned Buffers
+ *
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <char@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <sys/mman.h>
+#include <string.h>
+#include <limits.h>
+
+void *
+page_alloc(u64 len)
+{
+ if (len > SIZE_MAX)
+ die("page_alloc: Size %llu is too large for the current architecture", (long long) len);
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (p == (byte*) MAP_FAILED)
+ die("Cannot mmap %llu bytes of memory: %m", (long long)len);
+ return p;
+}
+
+void *
+page_alloc_zero(u64 len)
+{
+ void *p = page_alloc(len);
+ bzero(p, len);
+ return p;
+}
+
+void
+page_free(void *start, u64 len)
+{
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
+ munmap(start, len);
+}
+
+void *
+page_realloc(void *start, u64 old_len, u64 new_len)
+{
+ void *p = page_alloc(new_len);
+ memcpy(p, start, MIN(old_len, new_len));
+ page_free(start, old_len);
+ return p;
+}
+
+static u64
+big_round(u64 len)
+{
+ return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
+}
+
+void *
+big_alloc(u64 len)
+{
+ u64 l = big_round(len);
+ if (l > SIZE_MAX - 2*CPU_PAGE_SIZE)
+ die("big_alloc: Size %llu is too large for the current architecture", (long long) len);
+#ifdef CONFIG_DEBUG
+ l += 2*CPU_PAGE_SIZE;
+#endif
+ byte *p = page_alloc(l);
+#ifdef CONFIG_DEBUG
+ *(u64*)p = len;
+ mprotect(p, CPU_PAGE_SIZE, PROT_NONE);
+ mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
+ p += CPU_PAGE_SIZE;
+#endif
+ return p;
+}
+
+void *
+big_alloc_zero(u64 len)
+{
+ void *p = big_alloc(len);
+ bzero(p, big_round(len));
+ return p;
+}
+
+void
+big_free(void *start, u64 len)
+{
+ byte *p = start;
+ u64 l = big_round(len);
+#ifdef CONFIG_DEBUG
+ p -= CPU_PAGE_SIZE;
+ mprotect(p, CPU_PAGE_SIZE, PROT_READ);
+ ASSERT(*(u64*)p == len);
+ l += 2*CPU_PAGE_SIZE;
+#endif
+ page_free(p, l);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ byte *p = big_alloc(123456);
+ // p[-1] = 1;
+ big_free(p, 123456);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps: Declarations
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+struct bh_node {
+ struct bh_node *first_son;
+ struct bh_node *last_son;
+ struct bh_node *next_sibling;
+ byte order;
+};
+
+struct bh_heap {
+ struct bh_node root;
+};
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps: Testing
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define BH_PREFIX(x) bht_##x
+#define BH_WANT_INSERT
+#define BH_WANT_FINDMIN
+#define BH_WANT_DELETEMIN
+#include "lib/binheap-node.h"
+
+struct item {
+ struct bh_node n;
+ uns key;
+};
+
+static inline uns bht_key(struct bh_node *n)
+{
+ return ((struct item *)n)->key;
+}
+
+static inline uns bht_less(struct bh_node *a, struct bh_node *b)
+{
+ return bht_key(a) < bht_key(b);
+}
+
+static void
+bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uns offset)
+{
+ if (!a)
+ return;
+ printf("%*s", offset, "");
+ printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : "");
+ for (struct bh_node *b=a->first_son; b; b=b->next_sibling)
+ bht_do_dump(b, a->last_son, offset+1);
+}
+
+static void
+bht_dump(struct bh_heap *h)
+{
+ printf("root\n");
+ for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling)
+ bht_do_dump(b, b->last_son, 1);
+}
+
+#include "lib/binheap.h"
+
+int main(void)
+{
+ uns i;
+ struct bh_heap h;
+#define N 1048576
+#define K(i) ((259309*i+1009)%N)
+
+ bht_init(&h);
+
+ for (i=0; i<N; i++)
+ {
+ struct item *a = xmalloc_zero(sizeof(*a));
+ a->key = K(i);
+ // printf("Insert %d\n", a->key);
+ bht_insert(&h, &a->n);
+ // bht_dump(&h);
+ }
+ // bht_dump(&h);
+ ASSERT(bht_key(bht_findmin(&h)) == 0);
+ uns cnt = 0;
+ BH_FOR_ALL(bht_, &h, a)
+ {
+ cnt++;
+ }
+ BH_END_FOR;
+ printf("cnt=%d\n", cnt);
+ ASSERT(cnt == N);
+ for (i=0; i<N; i++)
+ {
+ struct item *a = (struct item *) bht_deletemin(&h);
+ // printf("\nDeleted %d:\n", a->key);
+ ASSERT(a->key == i);
+ // bht_dump(&h);
+ }
+ bht_dump(&h);
+
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a generic implementation of Binomial Heaps. Each time you include
+ * this file with parameters set in the corresponding preprocessor macros
+ * as described below, it generates functions for manipulating the particular
+ * version of the binomial heap.
+ *
+ * You need to specify:
+ *
+ * BH_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the hash table generator). All further
+ * names mentioned here except for macro names will be
+ * implicitly prefixed.
+ *
+ * Then you continue by including "lib/binheap-node.h" which defines struct bh_node
+ * and struct bh_root (both without prefix). The heap elements are always allocated by
+ * you and they must include struct bh_node which serves as a handle used for all
+ * the heap functions and it contains all information needed for heap-keeping.
+ * The heap itself is also allocated by you and it's represented by struct bh_heap.
+ *
+ * When you have the declaration of heap nodes, you continue with defining:
+ *
+ * less(p,q) returns 1 if the key corresponding to bh_node *p
+ * is less than the one corresponding to *q.
+ *
+ * Then specify what operations you request:
+ *
+ * <always defined> init(heap*) -- initialize the heap.
+ * BH_WANT_INSERT insert(heap*, node*) -- insert the node to the heap.
+ * BH_WANT_FINDMIN node *findmin(heap*) -- find node with minimum key.
+ * BH_WANT_DELETEMIN node *deletemin(heap*) -- findmin and delete the node.
+ *
+ * Then include "lib/binheap.h" and voila, you have a binomial heap
+ * suiting all your needs (at least those which you've revealed :) ).
+ *
+ * You also get a iterator macro at no extra charge:
+ *
+ * BH_FOR_ALL(bh_prefix, hash*, variable)
+ * {
+ * // node *variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use BH_BREAK and BH_CONTINUE instead of break and continue
+ * // you must not alter contents of the hash table here
+ * }
+ * BH_END_FOR;
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#define BH_NODE struct bh_node
+#define BH_HEAP struct bh_heap
+
+static void
+BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b)
+{
+ BH_NODE **pp = &a->first_son;
+ BH_NODE *q = b->first_son;
+ BH_NODE *p, *r, *s;
+
+ while ((p = *pp) && q)
+ {
+ /* p,q are the next nodes of a,b; pp points to where p is linked */
+ if (p->order < q->order) /* p is smaller => skip it */
+ pp = &p->next_sibling;
+ else if (p->order > q->order) /* q is smaller => insert it before p */
+ {
+ r = q;
+ q = q->next_sibling;
+ r->next_sibling = p;
+ *pp = r;
+ pp = &r->next_sibling;
+ }
+ else /* p and q are of the same order => need to merge them */
+ {
+ if (BH_PREFIX(less)(p, q)) /* we'll hang r below s */
+ {
+ r = q;
+ s = p;
+ }
+ else
+ {
+ r = p;
+ s = q;
+ }
+ *pp = p->next_sibling; /* unlink p,q from their lists */
+ q = q->next_sibling;
+
+ if (s->last_son) /* merge r to s, increasing order */
+ s->last_son->next_sibling = r;
+ else
+ s->first_son = r;
+ s->last_son = r;
+ s->order++;
+ r->next_sibling = NULL;
+
+ if (!q || q->order > s->order) /* put the result into the b's list if possible */
+ {
+ s->next_sibling = q;
+ q = s;
+ }
+ else /* otherwise put the result to the a's list */
+ {
+ p = s->next_sibling = *pp;
+ *pp = s;
+ if (p && p->order == s->order) /* 3-collision */
+ pp = &s->next_sibling;
+ }
+ }
+ }
+ if (!p)
+ *pp = q;
+}
+
+#ifdef BH_WANT_INSERT
+static void
+BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a)
+{
+ BH_NODE sh;
+
+ sh.first_son = a;
+ a->first_son = a->last_son = a->next_sibling = NULL;
+ BH_PREFIX(merge)(&heap->root, &sh);
+}
+#endif
+
+#ifdef BH_WANT_FINDMIN
+static BH_NODE *
+BH_PREFIX(findmin)(BH_HEAP *heap)
+{
+ BH_NODE *p, *best;
+
+ best = NULL;
+ for (p=heap->root.first_son; p; p=p->next_sibling)
+ if (!best || BH_PREFIX(less)(p, best))
+ best = p;
+ return best;
+}
+#endif
+
+#ifdef BH_WANT_DELETEMIN
+static BH_NODE *
+BH_PREFIX(deletemin)(BH_HEAP *heap)
+{
+ BH_NODE *p, **pp, **bestp;
+
+ bestp = NULL;
+ for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling)
+ if (!bestp || BH_PREFIX(less)(p, *bestp))
+ bestp = pp;
+ if (!bestp)
+ return NULL;
+
+ p = *bestp;
+ *bestp = p->next_sibling;
+ BH_PREFIX(merge)(&heap->root, p);
+ return p;
+}
+#endif
+
+static inline void
+BH_PREFIX(init)(BH_HEAP *heap)
+{
+ bzero(heap, sizeof(*heap));
+}
+
+#ifndef BH_FOR_ALL
+
+#define BH_FOR_ALL(bh_px, bh_heap, bh_var) \
+do { \
+ struct bh_node *bh_stack[32]; \
+ uns bh_sp = 0; \
+ if (bh_stack[0] = (bh_heap)->root.first_son) \
+ bh_sp++; \
+ while (bh_sp) { \
+ struct bh_node *bh_var = bh_stack[--bh_sp]; \
+ if (bh_var->next_sibling) \
+ bh_stack[bh_sp++] = bh_var->next_sibling; \
+ if (bh_var->first_son) \
+ bh_stack[bh_sp++] = bh_var->first_son;
+#define BH_END_FOR \
+ } \
+} while (0)
+
+#define BH_BREAK { bh_sp=0; break; }
+#define BH_CONTINUE continue
+
+#endif
+
+#undef BH_PREFIX
+#undef BH_NODE
+#undef BH_HEAP
+#undef BH_WANT_INSERT
+#undef BH_WANT_FINDMIN
+#undef BH_WANT_DELETEMIN
--- /dev/null
+/*
+ * UCW Library -- Generic Binary Search
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \
+ uns l = 0, r = (N); \
+ while (l < r) \
+ { \
+ uns m = (l+r)/2; \
+ if (ary_lt_x(ary,m,x)) \
+ l = m+1; \
+ else \
+ r = m; \
+ } \
+ l; \
+})
+
+#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x)
+
+#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM)
+#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; })
--- /dev/null
+/*
+ * UCW Library -- Find Lowest Set Bit
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+
+/* Just a table, the rest is in bitops.h */
+
+const byte ffs_table[] = {
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ uns i;
+ while (scanf("%x", &i) == 1)
+ printf("%d\n", bit_ffs(i));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Find Highest Set Bit
+ *
+ * (c) 1997-2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+
+int
+bit_fls(u32 x)
+{
+ uns l;
+
+ if (!x)
+ return -1;
+
+ l = 0;
+ if (x & 0xffff0000) { l += 16; x &= 0xffff0000; }
+ if (x & 0xff00ff00) { l += 8; x &= 0xff00ff00; }
+ if (x & 0xf0f0f0f0) { l += 4; x &= 0xf0f0f0f0; }
+ if (x & 0xcccccccc) { l += 2; x &= 0xcccccccc; }
+ if (x & 0xaaaaaaaa) l++;
+ return l;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ uns i;
+ while (scanf("%x", &i) == 1)
+ printf("%d\n", bit_fls(i));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Array Operations
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITARRAY_H
+#define _UCW_BITARRAY_H
+
+#include <string.h>
+
+typedef u32 *bitarray_t;
+#define BIT_ARRAY_WORDS(n) (((n)+31)/32)
+#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n))
+#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)]
+
+static inline bitarray_t
+bit_array_xmalloc(uns n)
+{
+ return xmalloc(BIT_ARRAY_BYTES(n));
+}
+
+static inline bitarray_t
+bit_array_xmalloc_zero(uns n)
+{
+ return xmalloc_zero(BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_zero(bitarray_t a, uns n)
+{
+ bzero(a, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set_all(bitarray_t a, uns n)
+{
+ memset(a, 255, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set(bitarray_t a, uns i)
+{
+ a[i/32] |= (1 << (i%32));
+}
+
+static inline void
+bit_array_clear(bitarray_t a, uns i)
+{
+ a[i/32] &= ~(1 << (i%32));
+}
+
+static inline void
+bit_array_assign(bitarray_t a, uns i, uns x)
+{
+ if (x)
+ bit_array_set(a, i);
+ else
+ bit_array_clear(a, i);
+}
+
+static inline uns
+bit_array_isset(bitarray_t a, uns i)
+{
+ return a[i/32] & (1 << (i%32));
+}
+
+static inline uns
+bit_array_get(bitarray_t a, uns i)
+{
+ return !! bit_array_isset(a, i);
+}
+
+static inline uns
+bit_array_test_and_set(bitarray_t a, uns i)
+{
+ uns t = bit_array_isset(a, i);
+ bit_array_set(a, i);
+ return t;
+}
+
+static inline uns
+bit_array_test_and_clear(bitarray_t a, uns i)
+{
+ uns t = bit_array_isset(a, i);
+ bit_array_clear(a, i);
+ return t;
+}
+
+/* Iterate over all set bits, possibly destructively */
+#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size) \
+ for (uns var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++) \
+ for (uns var##_lo=0; ary[var##_hi]; var##_lo++) \
+ if (ary[var##_hi] & (1 << var##_lo)) \
+ { \
+ uns var = 32*var##_hi + var##_lo; \
+ ary[var##_hi] &= ~(1 << var##_lo); \
+ do
+
+#define BIT_ARRAY_FISH_BITS_END \
+ while (0); \
+ }
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Operations
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITOPS_H
+#define _UCW_BITOPS_H
+
+/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */
+
+int bit_fls(u32 x); /* bit_fls(0)=-1 */
+
+/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */
+
+extern const byte ffs_table[256];
+
+#ifdef __pentium4 /* On other ia32 machines, the C version is faster */
+
+static inline uns bit_ffs(uns w)
+{
+ asm("bsfl %1,%0" :"=r" (w) :"rm" (w));
+ return w;
+}
+
+#else
+
+static inline uns bit_ffs(uns w)
+{
+ uns b = (w & 0xffff) ? 0 : 16;
+ b += ((w >> b) & 0xff) ? 0 : 8;
+ return b + ffs_table[(w >> b) & 0xff];
+}
+
+#endif
+
+#endif
--- /dev/null
+# Tests for bitops modules
+
+Run: ../obj/lib/bit-ffs-t
+In: 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 12345678
+ 23030300
+ 23030000
+ 23000000
+ 40000000
+ 80000000
+Out: 0
+ 1
+ 0
+ 2
+ 0
+ 1
+ 3
+ 8
+ 16
+ 24
+ 30
+ 31
+
+Run: ../obj/lib/bit-fls-t
+In: 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 12345678
+ 23030303
+ 03030303
+ 00030303
+ 00000303
+ 0fedcba9
+Out: 0
+ 1
+ 1
+ 2
+ 2
+ 2
+ 28
+ 29
+ 25
+ 17
+ 9
+ 27
--- /dev/null
+/*
+ * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files
+ * (An access method for documents and its analytical performance evaluation),
+ * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984.
+ *
+ * This data structure provides a very compact representation
+ * of a set of strings with insertion and membership search,
+ * but with a certain low probability it cheats by incidentally
+ * reporting a non-member as a member. Generally the larger you
+ * create the structure, the lower this probability is.
+ *
+ * How does it work: the structure is just an array of M bits
+ * and each possible element is hashed to a set of (at most) L
+ * bit positions. For each element of the represented set, we
+ * set its L bits to ones and we report as present all elements
+ * whose all L bits ar set.
+ *
+ * Analysis: Let's assume N items have already been stored and let A
+ * denote L/M (density of the hash function). The probability that
+ * a fixed bit of the array is set by any of the N items is
+ * 1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA.
+ * This is minimized by setting A=(ln 2)/N (try taking derivative).
+ * Given a non-present item, the probability that all of the bits
+ * corresponding to this item are set by the other items (that is,
+ * the structure gives a false answer) is (1-e^-NA)^L = 2^-L.
+ * Hence, if we want to give false answers with probability less
+ * than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L.
+ *
+ * Example: For a set of 10^7 items with P[error] < 10^-6, we set
+ * L := 20 and M := 290*10^6 bits = cca 34.5 MB (29 bits per item).
+ *
+ * We leave L and an upper bound for N as parameters set during
+ * creation of the structure. Currently, the structure is limited
+ * to 4 Gb = 512 MB.
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitsig.h"
+#include "lib/md5.h"
+
+#include <string.h>
+
+struct bitsig {
+ uns l, m, n, maxn, max_m_mult;
+ u32 hash[4];
+ uns hindex;
+ byte array[0];
+};
+
+struct bitsig *
+bitsig_init(uns perrlog, uns maxn)
+{
+ struct bitsig *b;
+ u64 m;
+ uns mbytes;
+
+ m = ((u64) maxn * perrlog * 145 + 99) / 100;
+ if (m >= (u64) 1 << 32)
+ die("bitsig_init: bitsig array too large (maximum is 4 Gb)");
+ mbytes = (m + 7) >> 3U;
+ b = xmalloc(sizeof(struct bitsig) + mbytes);
+ b->l = perrlog;
+ b->m = m;
+ b->n = 0;
+ b->maxn = maxn;
+ b->max_m_mult = (0xffffffff / m) * m;
+ bzero(b->array, mbytes);
+ msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
+ return b;
+}
+
+void
+bitsig_free(struct bitsig *b)
+{
+ xfree(b);
+}
+
+static void
+bitsig_hash_init(struct bitsig *b, byte *item)
+{
+ struct MD5Context c;
+
+ MD5Init(&c);
+ MD5Update(&c, item, strlen(item));
+ MD5Final((byte *) b->hash, &c);
+ b->hindex = 0;
+}
+
+static inline uns
+bitsig_hash_bit(struct bitsig *b)
+{
+ u32 h;
+ do
+ {
+ h = b->hash[b->hindex];
+ b->hash[b->hindex] *= 3006477127U;
+ b->hindex = (b->hindex+1) % 4;
+ }
+ while (h >= b->max_m_mult);
+ return h % b->m;
+}
+
+int
+bitsig_member(struct bitsig *b, byte *item)
+{
+ uns i, bit;
+
+ bitsig_hash_init(b, item);
+ for (i=0; i<b->l; i++)
+ {
+ bit = bitsig_hash_bit(b);
+ if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+ return 0;
+ }
+ return 1;
+}
+
+int
+bitsig_insert(struct bitsig *b, byte *item)
+{
+ uns i, bit, was;
+
+ bitsig_hash_init(b, item);
+ was = 1;
+ for (i=0; i<b->l; i++)
+ {
+ bit = bitsig_hash_bit(b);
+ if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+ {
+ was = 0;
+ b->array[bit >> 3] |= (1 << (bit & 7));
+ }
+ }
+ if (!was && b->n++ == b->maxn+1)
+ msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
+ return was;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv)
+{
+ struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2]));
+ byte buf[1024];
+
+ while (fgets(buf, 1024, stdin))
+ printf("%d\n", bitsig_insert(b, buf));
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+struct bitsig;
+
+struct bitsig *bitsig_init(uns perrlog, uns maxn);
+void bitsig_free(struct bitsig *b);
+int bitsig_member(struct bitsig *b, byte *item);
+int bitsig_insert(struct bitsig *b, byte *item);
--- /dev/null
+/*
+ * UCW Library -- Careful Read/Write
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <unistd.h>
+
+/*
+ * Reads and writes on sockets and pipes can return partial results,
+ * so we implement an iterated read/write call.
+ */
+
+int
+careful_read(int fd, void *buf, int len)
+{
+ byte *pos = buf;
+ while (len)
+ {
+ int l = read(fd, pos, len);
+ if (l < 0)
+ return -1;
+ if (!l)
+ return 0;
+ pos += l;
+ len -= l;
+ }
+ return 1;
+}
+
+int
+careful_write(int fd, const void *buf, int len)
+{
+ const byte *pos = buf;
+ while (len)
+ {
+ int l = write(fd, pos, len);
+ if (l < 0)
+ return -1;
+ if (!l)
+ return 0;
+ pos += l;
+ len -= l;
+ }
+ return 1;
+}
--- /dev/null
+/*
+ * UCW Library -- Character Code Map (UTF-8 Version)
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/* Syntax: CHAR(code, uppercase, lowercase, category) */
+
+CHAR(0x00,0x00,0x00,_C_CTRL) // <control>
+CHAR(0x01,0x01,0x01,_C_CTRL) // <control>
+CHAR(0x02,0x02,0x02,_C_CTRL) // <control>
+CHAR(0x03,0x03,0x03,_C_CTRL) // <control>
+CHAR(0x04,0x04,0x04,_C_CTRL) // <control>
+CHAR(0x05,0x05,0x05,_C_CTRL) // <control>
+CHAR(0x06,0x06,0x06,_C_CTRL) // <control>
+CHAR(0x07,0x07,0x07,_C_CTRL) // <control>
+CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT) // <control>
+CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0B,0x0B,0x0B,_C_CTRL) // <control>
+CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0E,0x0E,0x0E,_C_CTRL) // <control>
+CHAR(0x0F,0x0F,0x0F,_C_CTRL) // <control>
+CHAR(0x10,0x10,0x10,_C_CTRL) // <control>
+CHAR(0x11,0x11,0x11,_C_CTRL) // <control>
+CHAR(0x12,0x12,0x12,_C_CTRL) // <control>
+CHAR(0x13,0x13,0x13,_C_CTRL) // <control>
+CHAR(0x14,0x14,0x14,_C_CTRL) // <control>
+CHAR(0x15,0x15,0x15,_C_CTRL) // <control>
+CHAR(0x16,0x16,0x16,_C_CTRL) // <control>
+CHAR(0x17,0x17,0x17,_C_CTRL) // <control>
+CHAR(0x18,0x18,0x18,_C_CTRL) // <control>
+CHAR(0x19,0x19,0x19,_C_CTRL) // <control>
+CHAR(0x1A,0x1A,0x1A,_C_CTRL) // <control>
+CHAR(0x1B,0x1B,0x1B,_C_CTRL) // <control>
+CHAR(0x1C,0x1C,0x1C,_C_CTRL) // <control>
+CHAR(0x1D,0x1D,0x1D,_C_CTRL) // <control>
+CHAR(0x1E,0x1E,0x1E,_C_CTRL) // <control>
+CHAR(0x1F,0x1F,0x1F,_C_CTRL) // <control>
+CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT) // SPACE
+CHAR(0x21,0x21,0x21,_C_PRINT) // EXCLAMATION MARK
+CHAR(0x22,0x22,0x22,_C_PRINT) // QUOTATION MARK
+CHAR(0x23,0x23,0x23,_C_PRINT) // NUMBER SIGN
+CHAR(0x24,0x24,0x24,_C_PRINT) // DOLLAR SIGN
+CHAR(0x25,0x25,0x25,_C_PRINT) // PERCENT SIGN
+CHAR(0x26,0x26,0x26,_C_PRINT) // AMPERSAND
+CHAR(0x27,0x27,0x27,_C_PRINT) // APOSTROPHE
+CHAR(0x28,0x28,0x28,_C_PRINT) // LEFT PARENTHESIS
+CHAR(0x29,0x29,0x29,_C_PRINT) // RIGHT PARENTHESIS
+CHAR(0x2A,0x2A,0x2A,_C_PRINT) // ASTERISK
+CHAR(0x2B,0x2B,0x2B,_C_PRINT) // PLUS SIGN
+CHAR(0x2C,0x2C,0x2C,_C_PRINT) // COMMA
+CHAR(0x2D,0x2D,0x2D,_C_PRINT) // HYPHEN-MINUS
+CHAR(0x2E,0x2E,0x2E,_C_PRINT) // FULL STOP
+CHAR(0x2F,0x2F,0x2F,_C_PRINT) // SOLIDUS
+CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ZERO
+CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ONE
+CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT TWO
+CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT THREE
+CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FOUR
+CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FIVE
+CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SIX
+CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SEVEN
+CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT EIGHT
+CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT NINE
+CHAR(0x3A,0x3A,0x3A,_C_PRINT) // COLON
+CHAR(0x3B,0x3B,0x3B,_C_PRINT) // SEMICOLON
+CHAR(0x3C,0x3C,0x3C,_C_PRINT) // LESS-THAN SIGN
+CHAR(0x3D,0x3D,0x3D,_C_PRINT) // EQUALS SIGN
+CHAR(0x3E,0x3E,0x3E,_C_PRINT) // GREATER-THAN SIGN
+CHAR(0x3F,0x3F,0x3F,_C_PRINT) // QUESTION MARK
+CHAR(0x40,0x40,0x40,_C_PRINT) // COMMERCIAL AT
+CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER A
+CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER B
+CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER C
+CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER D
+CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER E
+CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER F
+CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER G
+CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER H
+CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER I
+CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER J
+CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER K
+CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER L
+CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER M
+CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER N
+CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER O
+CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER P
+CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Q
+CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER R
+CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER S
+CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER T
+CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER U
+CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER V
+CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER W
+CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER X
+CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Y
+CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Z
+CHAR(0x5B,0x5B,0x5B,_C_PRINT) // LEFT SQUARE BRACKET
+CHAR(0x5C,0x5C,0x5C,_C_PRINT) // REVERSE SOLIDUS
+CHAR(0x5D,0x5D,0x5D,_C_PRINT) // RIGHT SQUARE BRACKET
+CHAR(0x5E,0x5E,0x5E,_C_PRINT) // CIRCUMFLEX ACCENT
+CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT) // LOW LINE
+CHAR(0x60,0x60,0x60,_C_PRINT) // GRAVE ACCENT
+CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER A
+CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER B
+CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER C
+CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER D
+CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER E
+CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER F
+CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER G
+CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER H
+CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER I
+CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER J
+CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER K
+CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER L
+CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER M
+CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER N
+CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER O
+CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER P
+CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Q
+CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER R
+CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER S
+CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER T
+CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER U
+CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER V
+CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER W
+CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER X
+CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Y
+CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Z
+CHAR(0x7B,0x7B,0x7B,_C_PRINT) // LEFT CURLY BRACKET
+CHAR(0x7C,0x7C,0x7C,_C_PRINT) // VERTICAL LINE
+CHAR(0x7D,0x7D,0x7D,_C_PRINT) // RIGHT CURLY BRACKET
+CHAR(0x7E,0x7E,0x7E,_C_PRINT) // TILDE
+CHAR(0x7F,0x7F,0x7F,_C_CTRL) // <control>
+CHAR(0x80,0x80,0x80,_C_PRINT) // UTF-8
+CHAR(0x81,0x81,0x81,_C_PRINT) // UTF-8
+CHAR(0x82,0x82,0x82,_C_PRINT) // UTF-8
+CHAR(0x83,0x83,0x83,_C_PRINT) // UTF-8
+CHAR(0x84,0x84,0x84,_C_PRINT) // UTF-8
+CHAR(0x85,0x85,0x85,_C_PRINT) // UTF-8
+CHAR(0x86,0x86,0x86,_C_PRINT) // UTF-8
+CHAR(0x87,0x87,0x87,_C_PRINT) // UTF-8
+CHAR(0x88,0x88,0x88,_C_PRINT) // UTF-8
+CHAR(0x89,0x89,0x89,_C_PRINT) // UTF-8
+CHAR(0x8A,0x8A,0x8A,_C_PRINT) // UTF-8
+CHAR(0x8B,0x8B,0x8B,_C_PRINT) // UTF-8
+CHAR(0x8C,0x8C,0x8C,_C_PRINT) // UTF-8
+CHAR(0x8D,0x8D,0x8D,_C_PRINT) // UTF-8
+CHAR(0x8E,0x8E,0x8E,_C_PRINT) // UTF-8
+CHAR(0x8F,0x8F,0x8F,_C_PRINT) // UTF-8
+CHAR(0x90,0x90,0x90,_C_PRINT) // UTF-8
+CHAR(0x91,0x91,0x91,_C_PRINT) // UTF-8
+CHAR(0x92,0x92,0x92,_C_PRINT) // UTF-8
+CHAR(0x93,0x93,0x93,_C_PRINT) // UTF-8
+CHAR(0x94,0x94,0x94,_C_PRINT) // UTF-8
+CHAR(0x95,0x95,0x95,_C_PRINT) // UTF-8
+CHAR(0x96,0x96,0x96,_C_PRINT) // UTF-8
+CHAR(0x97,0x97,0x97,_C_PRINT) // UTF-8
+CHAR(0x98,0x98,0x98,_C_PRINT) // UTF-8
+CHAR(0x99,0x99,0x99,_C_PRINT) // UTF-8
+CHAR(0x9A,0x9A,0x9A,_C_PRINT) // UTF-8
+CHAR(0x9B,0x9B,0x9B,_C_PRINT) // UTF-8
+CHAR(0x9C,0x9C,0x9C,_C_PRINT) // UTF-8
+CHAR(0x9D,0x9D,0x9D,_C_PRINT) // UTF-8
+CHAR(0x9E,0x9E,0x9E,_C_PRINT) // UTF-8
+CHAR(0x9F,0x9F,0x9F,_C_PRINT) // UTF-8
+CHAR(0xA0,0xA0,0xA0,_C_PRINT) // UTF-8
+CHAR(0xA1,0xA1,0xA1,_C_PRINT) // UTF-8
+CHAR(0xA2,0xA2,0xA2,_C_PRINT) // UTF-8
+CHAR(0xA3,0xA3,0xA3,_C_PRINT) // UTF-8
+CHAR(0xA4,0xA4,0xA4,_C_PRINT) // UTF-8
+CHAR(0xA5,0xA5,0xA5,_C_PRINT) // UTF-8
+CHAR(0xA6,0xA6,0xA6,_C_PRINT) // UTF-8
+CHAR(0xA7,0xA7,0xA7,_C_PRINT) // UTF-8
+CHAR(0xA8,0xA8,0xA8,_C_PRINT) // UTF-8
+CHAR(0xA9,0xA9,0xA9,_C_PRINT) // UTF-8
+CHAR(0xAA,0xAA,0xAA,_C_PRINT) // UTF-8
+CHAR(0xAB,0xAB,0xAB,_C_PRINT) // UTF-8
+CHAR(0xAC,0xAC,0xAC,_C_PRINT) // UTF-8
+CHAR(0xAD,0xAD,0xAD,_C_PRINT) // UTF-8
+CHAR(0xAE,0xAE,0xAE,_C_PRINT) // UTF-8
+CHAR(0xAF,0xAF,0xAF,_C_PRINT) // UTF-8
+CHAR(0xB0,0xB0,0xB0,_C_PRINT) // UTF-8
+CHAR(0xB1,0xB1,0xB1,_C_PRINT) // UTF-8
+CHAR(0xB2,0xB2,0xB2,_C_PRINT) // UTF-8
+CHAR(0xB3,0xB3,0xB3,_C_PRINT) // UTF-8
+CHAR(0xB4,0xB4,0xB4,_C_PRINT) // UTF-8
+CHAR(0xB5,0xB5,0xB5,_C_PRINT) // UTF-8
+CHAR(0xB6,0xB6,0xB6,_C_PRINT) // UTF-8
+CHAR(0xB7,0xB7,0xB7,_C_PRINT) // UTF-8
+CHAR(0xB8,0xB8,0xB8,_C_PRINT) // UTF-8
+CHAR(0xB9,0xB9,0xB9,_C_PRINT) // UTF-8
+CHAR(0xBA,0xBA,0xBA,_C_PRINT) // UTF-8
+CHAR(0xBB,0xBB,0xBB,_C_PRINT) // UTF-8
+CHAR(0xBC,0xBC,0xBC,_C_PRINT) // UTF-8
+CHAR(0xBD,0xBD,0xBD,_C_PRINT) // UTF-8
+CHAR(0xBE,0xBE,0xBE,_C_PRINT) // UTF-8
+CHAR(0xBF,0xBF,0xBF,_C_PRINT) // UTF-8
+CHAR(0xC0,0xC0,0xC0,_C_PRINT) // UTF-8
+CHAR(0xC1,0xC1,0xC1,_C_PRINT) // UTF-8
+CHAR(0xC2,0xC2,0xC2,_C_PRINT) // UTF-8
+CHAR(0xC3,0xC3,0xC3,_C_PRINT) // UTF-8
+CHAR(0xC4,0xC4,0xC4,_C_PRINT) // UTF-8
+CHAR(0xC5,0xC5,0xC5,_C_PRINT) // UTF-8
+CHAR(0xC6,0xC6,0xC6,_C_PRINT) // UTF-8
+CHAR(0xC7,0xC7,0xC7,_C_PRINT) // UTF-8
+CHAR(0xC8,0xC8,0xC8,_C_PRINT) // UTF-8
+CHAR(0xC9,0xC9,0xC9,_C_PRINT) // UTF-8
+CHAR(0xCA,0xCA,0xCA,_C_PRINT) // UTF-8
+CHAR(0xCB,0xCB,0xCB,_C_PRINT) // UTF-8
+CHAR(0xCC,0xCC,0xCC,_C_PRINT) // UTF-8
+CHAR(0xCD,0xCD,0xCD,_C_PRINT) // UTF-8
+CHAR(0xCE,0xCE,0xCE,_C_PRINT) // UTF-8
+CHAR(0xCF,0xCF,0xCF,_C_PRINT) // UTF-8
+CHAR(0xD0,0xD0,0xD0,_C_PRINT) // UTF-8
+CHAR(0xD1,0xD1,0xD1,_C_PRINT) // UTF-8
+CHAR(0xD2,0xD2,0xD2,_C_PRINT) // UTF-8
+CHAR(0xD3,0xD3,0xD3,_C_PRINT) // UTF-8
+CHAR(0xD4,0xD4,0xD4,_C_PRINT) // UTF-8
+CHAR(0xD5,0xD5,0xD5,_C_PRINT) // UTF-8
+CHAR(0xD6,0xD6,0xD6,_C_PRINT) // UTF-8
+CHAR(0xD7,0xD7,0xD7,_C_PRINT) // UTF-8
+CHAR(0xD8,0xD8,0xD8,_C_PRINT) // UTF-8
+CHAR(0xD9,0xD9,0xD9,_C_PRINT) // UTF-8
+CHAR(0xDA,0xDA,0xDA,_C_PRINT) // UTF-8
+CHAR(0xDB,0xDB,0xDB,_C_PRINT) // UTF-8
+CHAR(0xDC,0xDC,0xDC,_C_PRINT) // UTF-8
+CHAR(0xDD,0xDD,0xDD,_C_PRINT) // UTF-8
+CHAR(0xDE,0xDE,0xDE,_C_PRINT) // UTF-8
+CHAR(0xDF,0xDF,0xDF,_C_PRINT) // UTF-8
+CHAR(0xE0,0xE0,0xE0,_C_PRINT) // UTF-8
+CHAR(0xE1,0xE1,0xE1,_C_PRINT) // UTF-8
+CHAR(0xE2,0xE2,0xE2,_C_PRINT) // UTF-8
+CHAR(0xE3,0xE3,0xE3,_C_PRINT) // UTF-8
+CHAR(0xE4,0xE4,0xE4,_C_PRINT) // UTF-8
+CHAR(0xE5,0xE5,0xE5,_C_PRINT) // UTF-8
+CHAR(0xE6,0xE6,0xE6,_C_PRINT) // UTF-8
+CHAR(0xE7,0xE7,0xE7,_C_PRINT) // UTF-8
+CHAR(0xE8,0xE8,0xE8,_C_PRINT) // UTF-8
+CHAR(0xE9,0xE9,0xE9,_C_PRINT) // UTF-8
+CHAR(0xEA,0xEA,0xEA,_C_PRINT) // UTF-8
+CHAR(0xEB,0xEB,0xEB,_C_PRINT) // UTF-8
+CHAR(0xEC,0xEC,0xEC,_C_PRINT) // UTF-8
+CHAR(0xED,0xED,0xED,_C_PRINT) // UTF-8
+CHAR(0xEE,0xEE,0xEE,_C_PRINT) // UTF-8
+CHAR(0xEF,0xEF,0xEF,_C_PRINT) // UTF-8
+CHAR(0xF0,0xF0,0xF0,_C_PRINT) // UTF-8
+CHAR(0xF1,0xF1,0xF1,_C_PRINT) // UTF-8
+CHAR(0xF2,0xF2,0xF2,_C_PRINT) // UTF-8
+CHAR(0xF3,0xF3,0xF3,_C_PRINT) // UTF-8
+CHAR(0xF4,0xF4,0xF4,_C_PRINT) // UTF-8
+CHAR(0xF5,0xF5,0xF5,_C_PRINT) // UTF-8
+CHAR(0xF6,0xF6,0xF6,_C_PRINT) // UTF-8
+CHAR(0xF7,0xF7,0xF7,_C_PRINT) // UTF-8
+CHAR(0xF8,0xF8,0xF8,_C_PRINT) // UTF-8
+CHAR(0xF9,0xF9,0xF9,_C_PRINT) // UTF-8
+CHAR(0xFA,0xFA,0xFA,_C_PRINT) // UTF-8
+CHAR(0xFB,0xFB,0xFB,_C_PRINT) // UTF-8
+CHAR(0xFC,0xFC,0xFC,_C_PRINT) // UTF-8
+CHAR(0xFD,0xFD,0xFD,_C_PRINT) // UTF-8
+CHAR(0xFE,0xFE,0xFE,_C_PRINT) // UTF-8
+CHAR(0xFF,0xFF,0xFF,_C_PRINT) // UTF-8
--- /dev/null
+/*
+ * UCW Library -- Character Types
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CHARTYPE_H
+#define _UCW_CHARTYPE_H
+
+#define _C_UPPER 1 /* Upper-case letters */
+#define _C_LOWER 2 /* Lower-case letters */
+#define _C_PRINT 4 /* Printable */
+#define _C_DIGIT 8 /* Digits */
+#define _C_CTRL 16 /* Control characters */
+#define _C_XDIGIT 32 /* Hexadecimal digits */
+#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */
+#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */
+
+#define _C_ALPHA (_C_UPPER | _C_LOWER)
+#define _C_ALNUM (_C_ALPHA | _C_DIGIT)
+#define _C_WORD (_C_ALNUM | _C_INNER)
+#define _C_WSTART (_C_ALPHA | _C_INNER)
+
+extern const unsigned char _c_cat[256], _c_upper[256], _c_lower[256];
+
+#define Category(x) (_c_cat[(unsigned char)(x)])
+#define Ccat(x,y) (Category(x) & y)
+
+#define Cupper(x) Ccat(x, _C_UPPER)
+#define Clower(x) Ccat(x, _C_LOWER)
+#define Calpha(x) Ccat(x, _C_ALPHA)
+#define Calnum(x) Ccat(x, _C_ALNUM)
+#define Cprint(x) Ccat(x, _C_PRINT)
+#define Cdigit(x) Ccat(x, _C_DIGIT)
+#define Cxdigit(x) Ccat(x, _C_XDIGIT)
+#define Cword(x) Ccat(x, _C_WORD)
+#define Cblank(x) Ccat(x, _C_BLANK)
+#define Cctrl(x) Ccat(x, _C_CTRL)
+#define Cspace(x) Cblank(x)
+
+#define Cupcase(x) _c_upper[(unsigned char)(x)]
+#define Clocase(x) _c_lower[(unsigned char)(x)]
+
+#define Cxvalue(x) (((x)<'A')?((x)-'0'):(((x)&0xdf)-'A'+10))
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Circular Linked Lists
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CLISTS_H
+#define _UCW_CLISTS_H
+
+typedef struct cnode {
+ struct cnode *next, *prev;
+} cnode;
+
+typedef struct clist {
+ struct cnode head;
+} clist;
+
+static inline void *clist_head(clist *l)
+{
+ return (l->head.next != &l->head) ? l->head.next : NULL;
+}
+
+static inline void *clist_tail(clist *l)
+{
+ return (l->head.prev != &l->head) ? l->head.prev : NULL;
+}
+
+static inline void *clist_next(clist *l, cnode *n)
+{
+ return (n->next != &l->head) ? (void *) n->next : NULL;
+}
+
+static inline void *clist_prev(clist *l, cnode *n)
+{
+ return (n->prev != &l->head) ? (void *) n->prev : NULL;
+}
+
+static inline int clist_empty(clist *l)
+{
+ return (l->head.next == &l->head);
+}
+
+#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+
+#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev)
+
+static inline void clist_insert_after(cnode *what, cnode *after)
+{
+ cnode *before = after->next;
+ what->next = before;
+ what->prev = after;
+ before->prev = what;
+ after->next = what;
+}
+
+static inline void clist_insert_before(cnode *what, cnode *before)
+{
+ cnode *after = before->prev;
+ what->next = before;
+ what->prev = after;
+ before->prev = what;
+ after->next = what;
+}
+
+static inline void clist_add_tail(clist *l, cnode *n)
+{
+ clist_insert_before(n, &l->head);
+}
+
+static inline void clist_add_head(clist *l, cnode *n)
+{
+ clist_insert_after(n, &l->head);
+}
+
+static inline void clist_remove(cnode *n)
+{
+ cnode *before = n->prev;
+ cnode *after = n->next;
+ before->next = after;
+ after->prev = before;
+}
+
+static inline void *clist_remove_head(clist *l)
+{
+ cnode *n = clist_head(l);
+ if (n)
+ clist_remove(n);
+ return n;
+}
+
+static inline void *clist_remove_tail(clist *l)
+{
+ cnode *n = clist_tail(l);
+ if (n)
+ clist_remove(n);
+ return n;
+}
+
+static inline void clist_init(clist *l)
+{
+ cnode *head = &l->head;
+ head->next = head->prev = head;
+}
+
+static inline void clist_insert_list_after(clist *what, cnode *after)
+{
+ if (!clist_empty(what))
+ {
+ cnode *w = &what->head;
+ w->prev->next = after->next;
+ after->next->prev = w->prev;
+ w->next->prev = after;
+ after->next = w->next;
+ clist_init(what);
+ }
+}
+
+static inline uns clist_size(clist *l)
+{
+ uns i = 0;
+ CLIST_FOR_EACH(cnode *, n, *l)
+ i++;
+ return i;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Configuration files: memory allocation
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/mempool.h"
+
+struct mempool *cf_pool; // current pool for loading new configuration
+
+void *
+cf_malloc(uns size)
+{
+ return mp_alloc(cf_pool, size);
+}
+
+void *
+cf_malloc_zero(uns size)
+{
+ return mp_alloc_zero(cf_pool, size);
+}
+
+char *
+cf_strdup(const char *s)
+{
+ return mp_strdup(cf_pool, s);
+}
+
+char *
+cf_printf(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf(cf_pool, fmt, args);
+ va_end(args);
+ return res;
+}
--- /dev/null
+/*
+ * UCW Library -- Configuration files: dumping
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/fastbuf.h"
+
+static void
+spaces(struct fastbuf *fb, uns nr)
+{
+ for (uns i=0; i<nr; i++)
+ bputs(fb, " ");
+}
+
+static void
+dump_basic(struct fastbuf *fb, void *ptr, enum cf_type type, union cf_union *u)
+{
+ switch (type) {
+ case CT_INT: bprintf(fb, "%d ", *(uns*)ptr); break;
+ case CT_U64: bprintf(fb, "%llu ", (long long) *(u64*)ptr); break;
+ case CT_DOUBLE: bprintf(fb, "%lg ", *(double*)ptr); break;
+ case CT_IP: bprintf(fb, "%08x ", *(uns*)ptr); break;
+ case CT_STRING:
+ if (*(char**)ptr)
+ bprintf(fb, "'%s' ", *(char**)ptr);
+ else
+ bprintf(fb, "NULL ");
+ break;
+ case CT_LOOKUP: bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : "???"); break;
+ case CT_USER:
+ if (u->utype->dumper)
+ u->utype->dumper(fb, ptr);
+ else
+ bprintf(fb, "??? ");
+ break;
+ }
+}
+
+static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr);
+
+static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
+
+static void
+dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr)
+{
+ ptr += (uintptr_t) item->ptr;
+ enum cf_type type = item->type;
+ uns size = cf_type_size(item->type, item->u.utype);
+ int i;
+ spaces(fb, level);
+ bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]);
+ if (item->number == CF_ANY_NUM)
+ bputs(fb, "any ");
+ else
+ bprintf(fb, "%d ", item->number);
+ if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) {
+ bprintf(fb, "T%s ", cf_type_names[type]);
+ if (item->type == CT_USER)
+ bprintf(fb, "U%s S%d ", item->u.utype->name, size);
+ }
+ if (item->cls == CC_STATIC) {
+ for (i=0; i<item->number; i++)
+ dump_basic(fb, ptr + i * size, type, &item->u);
+ } else if (item->cls == CC_DYNAMIC) {
+ ptr = * (void**) ptr;
+ if (ptr) {
+ int real_nr = DARY_LEN(ptr);
+ bprintf(fb, "N%d ", real_nr);
+ for (i=0; i<real_nr; i++)
+ dump_basic(fb, ptr + i * size, type, &item->u);
+ } else
+ bprintf(fb, "NULL ");
+ } else if (item->cls == CC_BITMAP) {
+ u32 mask = * (u32*) ptr;
+ for (i=0; i<32; i++) {
+ if (item->type == CT_LOOKUP && !item->u.lookup[i])
+ break;
+ if (mask & (1<<i)) {
+ if (item->type == CT_INT)
+ bprintf(fb, "%d ", i);
+ else if (item->type == CT_LOOKUP)
+ bprintf(fb, "%s ", item->u.lookup[i]);
+ }
+ }
+ }
+ bputc(fb, '\n');
+ if (item->cls == CC_SECTION)
+ dump_section(fb, item->u.sec, level+1, ptr);
+ else if (item->cls == CC_LIST) {
+ uns idx = 0;
+ CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) {
+ spaces(fb, level+1);
+ bprintf(fb, "item %d\n", ++idx);
+ dump_section(fb, item->u.sec, level+2, n);
+ }
+ }
+}
+
+static void
+dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr)
+{
+ spaces(fb, level);
+ bprintf(fb, "S%d F%x:\n", sec->size, sec->flags);
+ for (struct cf_item *item=sec->cfg; item->cls; item++)
+ dump_item(fb, item, level, ptr);
+}
+
+void
+cf_dump_sections(struct fastbuf *fb)
+{
+ dump_section(fb, &cf_sections, 0, NULL);
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: parsing input streams
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+#include "lib/chartype.h"
+#include "lib/stkstring.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+/* Text file parser */
+
+static const char *name_parse_fb;
+static struct fastbuf *parse_fb;
+static uns line_num;
+
+#define MAX_LINE 4096
+static char line_buf[MAX_LINE];
+static char *line = line_buf;
+
+#include "lib/bbuf.h"
+static bb_t copy_buf;
+static uns copied;
+
+#define GBUF_TYPE uns
+#define GBUF_PREFIX(x) split_##x
+#include "lib/gbuf.h"
+static split_t word_buf;
+static uns words;
+static uns ends_by_brace; // the line is ended by "{"
+
+static int
+get_line(char **msg)
+{
+ int err = bgets_nodie(parse_fb, line_buf, MAX_LINE);
+ line_num++;
+ if (err <= 0) {
+ *msg = err < 0 ? "Line too long" : NULL;
+ return 0;
+ }
+ line = line_buf;
+ while (Cblank(*line))
+ line++;
+ return 1;
+}
+
+static void
+append(char *start, char *end)
+{
+ uns len = end - start;
+ bb_grow(©_buf, copied + len + 1);
+ memcpy(copy_buf.ptr + copied, start, len);
+ copied += len + 1;
+ copy_buf.ptr[copied-1] = 0;
+}
+
+static char *
+get_word(uns is_command_name)
+{
+ char *msg;
+ if (*line == '\'') {
+ line++;
+ while (1) {
+ char *start = line;
+ while (*line && *line != '\'')
+ line++;
+ append(start, line);
+ if (*line)
+ break;
+ copy_buf.ptr[copied-1] = '\n';
+ if (!get_line(&msg))
+ return msg ? : "Unterminated apostrophe word at the end";
+ }
+ line++;
+
+ } else if (*line == '"') {
+ line++;
+ uns start_copy = copied;
+ while (1) {
+ char *start = line;
+ uns escape = 0;
+ while (*line) {
+ if (*line == '"' && !escape)
+ break;
+ else if (*line == '\\')
+ escape ^= 1;
+ else
+ escape = 0;
+ line++;
+ }
+ append(start, line);
+ if (*line)
+ break;
+ if (!escape)
+ copy_buf.ptr[copied-1] = '\n';
+ else // merge two lines
+ copied -= 2;
+ if (!get_line(&msg))
+ return msg ? : "Unterminated quoted word at the end";
+ }
+ line++;
+
+ char *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
+ uns l = strlen(tmp);
+ bb_grow(©_buf, start_copy + l + 1);
+ strcpy(copy_buf.ptr + start_copy, tmp);
+ copied = start_copy + l + 1;
+
+ } else {
+ // promised that *line is non-null and non-blank
+ char *start = line;
+ while (*line && !Cblank(*line)
+ && *line != '{' && *line != '}' && *line != ';'
+ && (*line != '=' || !is_command_name))
+ line++;
+ if (*line == '=') { // nice for setting from a command-line
+ if (line == start)
+ return "Assignment without a variable";
+ *line = ' ';
+ }
+ if (line == start) // already the first char is control
+ line++;
+ append(start, line);
+ }
+ while (Cblank(*line))
+ line++;
+ return NULL;
+}
+
+static char *
+get_token(uns is_command_name, char **err)
+{
+ *err = NULL;
+ while (1) {
+ if (!*line || *line == '#') {
+ if (!is_command_name || !get_line(err))
+ return NULL;
+ } else if (*line == ';') {
+ *err = get_word(0);
+ if (!is_command_name || *err)
+ return NULL;
+ } else if (*line == '\\' && !line[1]) {
+ if (!get_line(err)) {
+ if (!*err)
+ *err = "Last line ends by a backslash";
+ return NULL;
+ }
+ if (!*line || *line == '#')
+ msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num);
+ } else {
+ split_grow(&word_buf, words+1);
+ uns start = copied;
+ word_buf.ptr[words++] = copied;
+ *err = get_word(is_command_name);
+ return *err ? NULL : copy_buf.ptr + start;
+ }
+ }
+}
+
+static char *
+split_command(void)
+{
+ words = copied = ends_by_brace = 0;
+ char *msg, *start_word;
+ if (!(start_word = get_token(1, &msg)))
+ return msg;
+ if (*start_word == '{') // only one opening brace
+ return "Unexpected opening brace";
+ while (*line != '}') // stays for the next time
+ {
+ if (!(start_word = get_token(0, &msg)))
+ return msg;
+ if (*start_word == '{') {
+ words--; // discard the brace
+ ends_by_brace = 1;
+ break;
+ }
+ }
+ return NULL;
+}
+
+/* Parsing multiple files */
+
+static char *
+parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth)
+{
+ char *err;
+ name_parse_fb = name_fb;
+ parse_fb = fb;
+ line_num = 0;
+ line = line_buf;
+ *line = 0;
+ while (1)
+ {
+ err = split_command();
+ if (err)
+ goto error;
+ if (!words)
+ return NULL;
+ char *name = copy_buf.ptr + word_buf.ptr[0];
+ char *pars[words-1];
+ for (uns i=1; i<words; i++)
+ pars[i-1] = copy_buf.ptr + word_buf.ptr[i];
+ if (!strcasecmp(name, "include"))
+ {
+ if (words != 2)
+ err = "Expecting one filename";
+ else if (depth > 8)
+ err = "Too many nested files";
+ else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared
+ err = "The input command must be the last one on a line";
+ if (err)
+ goto error;
+ struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14);
+ if (!new_fb) {
+ err = cf_printf("Cannot open file %s: %m", pars[0]);
+ goto error;
+ }
+ uns ll = line_num;
+ err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
+ line_num = ll;
+ bclose(new_fb);
+ if (err)
+ goto error;
+ parse_fb = fb;
+ continue;
+ }
+ enum cf_operation op;
+ char *c = strchr(name, ':');
+ if (!c)
+ op = strcmp(name, "}") ? OP_SET : OP_CLOSE;
+ else {
+ *c++ = 0;
+ switch (Clocase(*c)) {
+ case 's': op = OP_SET; break;
+ case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break;
+ case 'a': switch (Clocase(c[1])) {
+ case 'p': op = OP_APPEND; break;
+ case 'f': op = OP_AFTER; break;
+ default: op = OP_ALL;
+ }; break;
+ case 'p': op = OP_PREPEND; break;
+ case 'r': op = OP_REMOVE; break;
+ case 'e': op = OP_EDIT; break;
+ case 'b': op = OP_BEFORE; break;
+ default: op = OP_SET; break;
+ }
+ if (strcasecmp(c, cf_op_names[op])) {
+ err = cf_printf("Unknown operation %s", c);
+ goto error;
+ }
+ }
+ if (ends_by_brace)
+ op |= OP_OPEN;
+ err = cf_interpret_line(name, op, words-1, pars);
+ if (err)
+ goto error;
+ }
+error:
+ if (name_fb)
+ msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err);
+ else if (line_num == 1)
+ msg(L_ERROR, "Manual setting of configuration: %s", err);
+ else
+ msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err);
+ return "included from here";
+}
+
+#ifndef DEFAULT_CONFIG
+#define DEFAULT_CONFIG NULL
+#endif
+char *cf_def_file = DEFAULT_CONFIG;
+
+#ifndef ENV_VAR_CONFIG
+#define ENV_VAR_CONFIG NULL
+#endif
+char *cf_env_file = ENV_VAR_CONFIG;
+
+static uns postpone_commit; // only for cf_getopt()
+static uns everything_committed; // after the 1st load, this flag is set on
+
+static int
+done_stack(void)
+{
+ if (cf_check_stack())
+ return 1;
+ if (cf_commit_all(postpone_commit ? CF_NO_COMMIT : everything_committed ? CF_COMMIT : CF_COMMIT_ALL))
+ return 1;
+ if (!postpone_commit)
+ everything_committed = 1;
+ return 0;
+}
+
+static int
+load_file(const char *file)
+{
+ cf_init_stack();
+ struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14);
+ if (!fb) {
+ msg(L_ERROR, "Cannot open %s: %m", file);
+ return 1;
+ }
+ char *err_msg = parse_fastbuf(file, fb, 0);
+ bclose(fb);
+ int err = !!err_msg || done_stack();
+ if (!err)
+ cf_def_file = NULL;
+ return err;
+}
+
+static int
+load_string(const char *string)
+{
+ cf_init_stack();
+ struct fastbuf fb;
+ fbbuf_init_read(&fb, (byte *)string, strlen(string), 0);
+ char *msg = parse_fastbuf(NULL, &fb, 0);
+ return !!msg || done_stack();
+}
+
+/* Safe loading and reloading */
+
+int
+cf_reload(const char *file)
+{
+ cf_journal_swap();
+ struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+ uns ec = everything_committed;
+ everything_committed = 0;
+ int err = load_file(file);
+ if (!err)
+ {
+ cf_journal_delete();
+ cf_journal_commit_transaction(1, NULL);
+ }
+ else
+ {
+ everything_committed = ec;
+ cf_journal_rollback_transaction(1, oldj);
+ cf_journal_swap();
+ }
+ return err;
+}
+
+int
+cf_load(const char *file)
+{
+ struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+ int err = load_file(file);
+ if (!err)
+ cf_journal_commit_transaction(1, oldj);
+ else
+ cf_journal_rollback_transaction(1, oldj);
+ return err;
+}
+
+int
+cf_set(const char *string)
+{
+ struct cf_journal_item *oldj = cf_journal_new_transaction(0);
+ int err = load_string(string);
+ if (!err)
+ cf_journal_commit_transaction(0, oldj);
+ else
+ cf_journal_rollback_transaction(0, oldj);
+ return err;
+}
+
+/* Command-line parser */
+
+static void
+load_default(void)
+{
+ if (cf_def_file)
+ {
+ char *env;
+ if (cf_env_file && (env = getenv(cf_env_file)))
+ {
+ if (cf_load(env))
+ die("Cannot load config file %s", env);
+ }
+ else if (cf_load(cf_def_file))
+ die("Cannot load default config %s", cf_def_file);
+ }
+ else
+ {
+ // We need to create an empty pool
+ cf_journal_commit_transaction(1, cf_journal_new_transaction(1));
+ }
+}
+
+static void
+final_commit(void)
+{
+ if (postpone_commit) {
+ postpone_commit = 0;
+ if (done_stack())
+ die("Cannot commit after the initialization");
+ }
+}
+
+int
+cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index)
+{
+ static int other_options = 0;
+ while (1) {
+ int res = getopt_long (argc, argv, short_opts, long_opts, long_index);
+ if (res == 'S' || res == 'C' || res == 0x64436667)
+ {
+ if (other_options)
+ die("The -S and -C options must precede all other arguments");
+ if (res == 'S') {
+ postpone_commit = 1;
+ load_default();
+ if (cf_set(optarg))
+ die("Cannot set %s", optarg);
+ } else if (res == 'C') {
+ postpone_commit = 1;
+ if (cf_load(optarg))
+ die("Cannot load config file %s", optarg);
+ }
+#ifdef CONFIG_DEBUG
+ else { /* --dumpconfig */
+ load_default();
+ final_commit();
+ struct fastbuf *b = bfdopen(1, 4096);
+ cf_dump_sections(b);
+ bclose(b);
+ exit(0);
+ }
+#endif
+ } else {
+ /* unhandled option or end of options */
+ if (res != ':' && res != '?')
+ load_default();
+ final_commit();
+ other_options++;
+ return res;
+ }
+ }
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: only for internal use of conf-*.c
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONF_INTERNAL_H
+#define _UCW_CONF_INTERNAL_H
+
+/* conf-intr.c */
+#define OP_MASK 0xff // only get the operation
+#define OP_OPEN 0x100 // here we only get an opening brace instead of parameters
+#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask
+#define OP_2ND 0x400 // in the 2nd phase real data are entered
+enum cf_operation;
+extern char *cf_op_names[];
+extern char *cf_type_names[];
+
+uns cf_type_size(enum cf_type type, struct cf_user_type *utype);
+char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars);
+void cf_init_stack(void);
+int cf_check_stack(void);
+
+/* conf-journal.c */
+void cf_journal_swap(void);
+void cf_journal_delete(void);
+
+/* conf-section.c */
+#define SEC_FLAG_DYNAMIC 0x80000000 // contains a dynamic attribute
+#define SEC_FLAG_UNKNOWN 0x40000000 // ignore unknown entriies
+#define SEC_FLAG_CANT_COPY 0x20000000 // contains lists or parsers
+#define SEC_FLAG_NUMBER 0x0fffffff // number of entries
+enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL };
+extern struct cf_section cf_sections;
+
+struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name);
+int cf_commit_all(enum cf_commit_mode cm);
+void cf_add_dirty(struct cf_section *sec, void *ptr);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Configuration files: interpreter
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0)
+
+/* Register size of and parser for each basic type */
+
+static char *
+cf_parse_string(char *str, char **ptr)
+{
+ *ptr = cf_strdup(str);
+ return NULL;
+}
+
+typedef char *cf_basic_parser(char *str, void *ptr);
+static struct {
+ uns size;
+ void *parser;
+} parsers[] = {
+ { sizeof(int), cf_parse_int },
+ { sizeof(u64), cf_parse_u64 },
+ { sizeof(double), cf_parse_double },
+ { sizeof(u32), cf_parse_ip },
+ { sizeof(char*), cf_parse_string },
+ { sizeof(int), NULL }, // lookups are parsed extra
+ { 0, NULL }, // user-defined types are parsed extra
+};
+
+inline uns
+cf_type_size(enum cf_type type, struct cf_user_type *utype)
+{
+ if (type < CT_USER)
+ return parsers[type].size;
+ else
+ return utype->size;
+}
+
+static char *
+cf_parse_lookup(char *str, int *ptr, char **t)
+{
+ char **n = t;
+ uns total_len = 0;
+ while (*n && strcasecmp(*n, str)) {
+ total_len += strlen(*n) + 2;
+ n++;
+ }
+ if (*n) {
+ *ptr = n - t;
+ return NULL;
+ }
+ char *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
+ c += sprintf(err, "Invalid value %s, possible values are: ", str);
+ for (n=t; *n; n++)
+ c+= sprintf(c, "%s, ", *n);
+ if (*t)
+ c[-2] = 0;
+ *ptr = -1;
+ return err;
+}
+
+static char *
+cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u)
+{
+ for (uns i=0; i<number; i++)
+ {
+ char *msg;
+ uns size = cf_type_size(type, u->utype);
+ if (type < CT_LOOKUP)
+ msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size);
+ else if (type == CT_LOOKUP)
+ msg = cf_parse_lookup(pars[i], ptr + i * size, u->lookup);
+ else if (type == CT_USER)
+ msg = u->utype->parser(pars[i], ptr + i * size);
+ else
+ ASSERT(0);
+ if (msg)
+ return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg;
+ }
+ return NULL;
+}
+
+/* Interpreter */
+
+#define T(x) #x,
+char *cf_op_names[] = { CF_OPERATIONS };
+#undef T
+char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
+
+#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN)
+
+static char *
+interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr)
+{
+ enum cf_type type = item->type;
+ cf_journal_block(ptr, sizeof(void*));
+ // boundary checks done by the caller
+ uns size = cf_type_size(item->type, item->u.utype);
+ *ptr = cf_malloc(DARY_HDR_SIZE + number * size) + DARY_HDR_SIZE;
+ DARY_LEN(*ptr) = number;
+ return cf_parse_ary(number, pars, *ptr, type, &item->u);
+}
+
+static char *
+interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op)
+{
+ enum cf_type type = item->type;
+ void *old_p = *ptr;
+ uns size = cf_type_size(item->type, item->u.utype);
+ ASSERT(size >= sizeof(uns));
+ int old_nr = old_p ? DARY_LEN(old_p) : 0;
+ int taken = MIN(number, ABS(item->number)-old_nr);
+ *processed = taken;
+ // stretch the dynamic array
+ void *new_p = cf_malloc(DARY_HDR_SIZE + (old_nr + taken) * size) + DARY_HDR_SIZE;
+ DARY_LEN(new_p) = old_nr + taken;
+ cf_journal_block(ptr, sizeof(void*));
+ *ptr = new_p;
+ if (op == OP_APPEND) {
+ memcpy(new_p, old_p, old_nr * size);
+ return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u);
+ } else if (op == OP_PREPEND) {
+ memcpy(new_p + taken * size, old_p, old_nr * size);
+ return cf_parse_ary(taken, pars, new_p, type, &item->u);
+ } else
+ return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]);
+}
+
+static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic);
+
+static char *
+interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+ cf_add_dirty(sec, ptr);
+ *processed = 0;
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ {
+ int taken;
+ char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
+ if (msg)
+ return cf_printf("Item %s: %s", ci->name, msg);
+ *processed += taken;
+ number -= taken;
+ pars += taken;
+ if (!number) // stop parsing, because many parsers would otherwise complain that number==0
+ break;
+ }
+ return NULL;
+}
+
+static void
+add_to_list(cnode *where, cnode *new_node, enum cf_operation op)
+{
+ switch (op)
+ {
+ case OP_EDIT: // edition has been done in-place
+ break;
+ case OP_REMOVE:
+ CF_JOURNAL_VAR(where->prev->next);
+ CF_JOURNAL_VAR(where->next->prev);
+ clist_remove(where);
+ break;
+ case OP_AFTER: // implementation dependend (prepend_head = after(list)), and where==list, see clists.h:74
+ case OP_PREPEND:
+ case OP_COPY:
+ CF_JOURNAL_VAR(where->next->prev);
+ CF_JOURNAL_VAR(where->next);
+ clist_insert_after(new_node, where);
+ break;
+ case OP_BEFORE: // implementation dependend (append_tail = before(list))
+ case OP_APPEND:
+ case OP_SET:
+ CF_JOURNAL_VAR(where->prev->next);
+ CF_JOURNAL_VAR(where->prev);
+ clist_insert_before(new_node, where);
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+static char *
+interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op)
+{
+ if (op >= OP_REMOVE)
+ return cf_printf("You have to open a block for operation %s", cf_op_names[op]);
+ if (!number)
+ return "Nothing to add to the list";
+ struct cf_section *sec = item->u.sec;
+ *processed = 0;
+ uns index = 0;
+ while (number > 0)
+ {
+ void *node = cf_malloc(sec->size);
+ cf_init_section(item->name, sec, node, 1);
+ add_to_list(ptr, node, op);
+ int taken;
+ /* If the node contains any dynamic attribute at the end, we suppress
+ * auto-repetition here and pass the flag inside instead. */
+ index++;
+ char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
+ if (msg)
+ return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg);
+ *processed += taken;
+ number -= taken;
+ pars += taken;
+ if (sec->flags & SEC_FLAG_DYNAMIC)
+ break;
+ }
+ return NULL;
+}
+
+static char *
+interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op)
+{
+ if (op != OP_SET && op != OP_REMOVE)
+ return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]);
+ else if (item->type != CT_INT && item->type != CT_LOOKUP)
+ return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]);
+ cf_journal_block(ptr, sizeof(u32));
+ for (int i=0; i<number; i++) {
+ uns idx;
+ if (item->type == CT_INT)
+ TRY( cf_parse_int(pars[i], &idx) );
+ else
+ TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) );
+ if (idx >= 32)
+ return "Bitmaps only have 32 bits";
+ if (op == OP_SET)
+ *ptr |= 1<<idx;
+ else
+ *ptr &= ~(1<<idx);
+ }
+ *processed = number;
+ return NULL;
+}
+
+static char *
+interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+ int taken;
+ switch (item->cls)
+ {
+ case CC_STATIC:
+ if (!number)
+ return "Missing value";
+ taken = MIN(number, item->number);
+ *processed = taken;
+ uns size = cf_type_size(item->type, item->u.utype);
+ cf_journal_block(ptr, taken * size);
+ return cf_parse_ary(taken, pars, ptr, item->type, &item->u);
+ case CC_DYNAMIC:
+ if (!allow_dynamic)
+ return "Dynamic array cannot be used here";
+ taken = MIN(number, ABS(item->number));
+ *processed = taken;
+ return interpret_set_dynamic(item, taken, pars, ptr);
+ case CC_PARSER:
+ if (item->number < 0 && !allow_dynamic)
+ return "Parsers with variable number of parameters cannot be used here";
+ if (item->number > 0 && number < item->number)
+ return "Not enough parameters available for the parser";
+ taken = MIN(number, ABS(item->number));
+ *processed = taken;
+ for (int i=0; i<taken; i++)
+ pars[i] = cf_strdup(pars[i]);
+ return item->u.par(taken, pars, ptr);
+ case CC_SECTION:
+ return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic);
+ case CC_LIST:
+ if (!allow_dynamic)
+ return "Lists cannot be used here";
+ return interpret_add_list(item, number, pars, processed, ptr, OP_SET);
+ case CC_BITMAP:
+ if (!allow_dynamic)
+ return "Bitmaps cannot be used here";
+ return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET);
+ default:
+ ASSERT(0);
+ }
+}
+
+static char *
+interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+ if (item->cls == CC_BITMAP) {
+ cf_journal_block(ptr, sizeof(u32));
+ if (op == OP_CLEAR)
+ * (u32*) ptr = 0;
+ else
+ if (item->type == CT_INT)
+ * (u32*) ptr = ~0u;
+ else {
+ uns nr = -1;
+ while (item->u.lookup[++nr]);
+ * (u32*) ptr = ~0u >> (32-nr);
+ }
+ return NULL;
+ } else if (op != OP_CLEAR)
+ return "The item is not a bitmap";
+
+ if (item->cls == CC_LIST) {
+ cf_journal_block(ptr, sizeof(clist));
+ clist_init(ptr);
+ } else if (item->cls == CC_DYNAMIC) {
+ cf_journal_block(ptr, sizeof(void *));
+ static uns zero = 0;
+ * (void**) ptr = (&zero) + 1;
+ } else if (item->cls == CC_STATIC && item->type == CT_STRING) {
+ cf_journal_block(ptr, item->number * sizeof(char*));
+ bzero(ptr, item->number * sizeof(char*));
+ } else
+ return "The item is not a list, dynamic array, bitmap, or string";
+ return NULL;
+}
+
+static int
+cmp_items(void *i1, void *i2, struct cf_item *item)
+{
+ ASSERT(item->cls == CC_STATIC);
+ i1 += (uintptr_t) item->ptr;
+ i2 += (uintptr_t) item->ptr;
+ if (item->type == CT_STRING)
+ return strcmp(* (char**) i1, * (char**) i2);
+ else // all numeric types
+ return memcmp(i1, i2, cf_type_size(item->type, item->u.utype));
+}
+
+static void *
+find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask)
+{
+ CLIST_FOR_EACH(cnode *, n, *list)
+ {
+ uns found = 1;
+ for (uns i=0; i<32; i++)
+ if (mask & (1<<i))
+ if (cmp_items(n, query, sec->cfg+i))
+ {
+ found = 0;
+ break;
+ }
+ if (found)
+ return n;
+ }
+ return NULL;
+}
+
+static char *
+record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask)
+{
+ uns nr = sec->flags & SEC_FLAG_NUMBER;
+ if (item >= sec->cfg && item < sec->cfg + nr) // setting an attribute relative to this section
+ {
+ uns i = item - sec->cfg;
+ if (i >= 32)
+ return "Cannot select list nodes by this attribute";
+ if (sec->cfg[i].cls != CC_STATIC)
+ return "Selection can only be done based on basic attributes";
+ *mask |= 1 << i;
+ }
+ return NULL;
+}
+
+#define MAX_STACK_SIZE 10
+static struct item_stack {
+ struct cf_section *sec; // nested section
+ void *base_ptr; // because original pointers are often relative
+ enum cf_operation op; // it is performed when a closing brace is encountered
+ void *list; // list the operations should be done on
+ u32 mask; // bit array of selectors searching in a list
+ struct cf_item *item; // cf_item of the list
+} stack[MAX_STACK_SIZE];
+static uns level;
+
+static char *
+opening_brace(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+ if (level >= MAX_STACK_SIZE-1)
+ return "Too many nested sections";
+ enum cf_operation pure_op = op & OP_MASK;
+ stack[++level] = (struct item_stack) {
+ .sec = NULL,
+ .base_ptr = NULL,
+ .op = pure_op,
+ .list = NULL,
+ .mask = 0,
+ .item = NULL,
+ };
+ if (!item) // unknown is ignored; we just need to trace recursion
+ return NULL;
+ stack[level].sec = item->u.sec;
+ if (item->cls == CC_SECTION)
+ {
+ if (pure_op != OP_SET)
+ return "Only SET operation can be used with a section";
+ stack[level].base_ptr = ptr;
+ stack[level].op = OP_EDIT | OP_2ND; // this list operation does nothing
+ }
+ else if (item->cls == CC_LIST)
+ {
+ stack[level].base_ptr = cf_malloc(item->u.sec->size);
+ cf_init_section(item->name, item->u.sec, stack[level].base_ptr, 1);
+ stack[level].list = ptr;
+ stack[level].item = item;
+ if (pure_op == OP_ALL)
+ return "Operation ALL cannot be applied on lists";
+ else if (pure_op < OP_REMOVE) {
+ add_to_list(ptr, stack[level].base_ptr, pure_op);
+ stack[level].op |= OP_2ND;
+ } else
+ stack[level].op |= OP_1ST;
+ }
+ else
+ return "Opening brace can only be used on sections and lists";
+ return NULL;
+}
+
+static char *
+closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars)
+{
+ if (st->op == OP_CLOSE) // top-level
+ return "Unmatched } parenthesis";
+ if (!st->sec) { // dummy run on unknown section
+ if (!(op & OP_OPEN))
+ level--;
+ return NULL;
+ }
+ enum cf_operation pure_op = st->op & OP_MASK;
+ if (st->op & OP_1ST)
+ {
+ st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask);
+ if (!st->list)
+ return "Cannot find a node matching the query";
+ if (pure_op != OP_REMOVE)
+ {
+ if (pure_op == OP_EDIT)
+ st->base_ptr = st->list;
+ else if (pure_op == OP_AFTER || pure_op == OP_BEFORE)
+ cf_init_section(st->item->name, st->sec, st->base_ptr, 1);
+ else if (pure_op == OP_COPY) {
+ if (st->sec->flags & SEC_FLAG_CANT_COPY)
+ return cf_printf("Item %s cannot be copied", st->item->name);
+ memcpy(st->base_ptr, st->list, st->sec->size); // strings and dynamic arrays are shared
+ if (st->sec->copy)
+ TRY( st->sec->copy(st->base_ptr, st->list) );
+ } else
+ ASSERT(0);
+ if (op & OP_OPEN) { // stay at the same recursion level
+ st->op = (st->op | OP_2ND) & ~OP_1ST;
+ add_to_list(st->list, st->base_ptr, pure_op);
+ return NULL;
+ }
+ int taken; // parse parameters on 1 line immediately
+ TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) );
+ number -= taken;
+ pars += taken;
+ // and fall-thru to the 2nd phase
+ }
+ add_to_list(st->list, st->base_ptr, pure_op);
+ }
+ level--;
+ if (number)
+ return "No parameters expected after the }";
+ else if (op & OP_OPEN)
+ return "No { is expected";
+ else
+ return NULL;
+}
+
+static struct cf_item *
+find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr)
+{
+ *msg = NULL;
+ if (name[0] == '^') // absolute name instead of relative
+ name++, curr_sec = &cf_sections, *ptr = NULL;
+ if (!curr_sec) // don't even search in an unknown section
+ return NULL;
+ while (1)
+ {
+ if (curr_sec != &cf_sections)
+ cf_add_dirty(curr_sec, *ptr);
+ char *c = strchr(name, '.');
+ if (c)
+ *c++ = 0;
+ struct cf_item *ci = cf_find_subitem(curr_sec, name);
+ if (!ci->cls)
+ {
+ if (!(curr_sec->flags & SEC_FLAG_UNKNOWN)) // ignore silently unknown top-level sections and unknown attributes in flagged sections
+ *msg = cf_printf("Unknown item %s", name);
+ return NULL;
+ }
+ *ptr += (uintptr_t) ci->ptr;
+ if (!c)
+ return ci;
+ if (ci->cls != CC_SECTION)
+ {
+ *msg = cf_printf("Item %s is not a section", name);
+ return NULL;
+ }
+ curr_sec = ci->u.sec;
+ name = c;
+ }
+}
+
+char *
+cf_interpret_line(char *name, enum cf_operation op, int number, char **pars)
+{
+ char *msg;
+ if ((op & OP_MASK) == OP_CLOSE)
+ return closing_brace(stack+level, op, number, pars);
+ void *ptr = stack[level].base_ptr;
+ struct cf_item *item = find_item(stack[level].sec, name, &msg, &ptr);
+ if (msg)
+ return msg;
+ if (stack[level].op & OP_1ST)
+ TRY( record_selector(item, stack[level].sec, &stack[level].mask) );
+ if (op & OP_OPEN) { // the operation will be performed after the closing brace
+ if (number)
+ return "Cannot open a block after a parameter has been passed on a line";
+ return opening_brace(item, ptr, op);
+ }
+ if (!item) // ignored item in an unknown section
+ return NULL;
+ op &= OP_MASK;
+
+ int taken = 0; // process as many parameters as possible
+ if (op == OP_CLEAR || op == OP_ALL)
+ msg = interpret_set_all(item, ptr, op);
+ else if (op == OP_SET)
+ msg = interpret_set_item(item, number, pars, &taken, ptr, 1);
+ else if (item->cls == CC_DYNAMIC)
+ msg = interpret_add_dynamic(item, number, pars, &taken, ptr, op);
+ else if (item->cls == CC_LIST)
+ msg = interpret_add_list(item, number, pars, &taken, ptr, op);
+ else if (item->cls == CC_BITMAP)
+ msg = interpret_add_bitmap(item, number, pars, &taken, ptr, op);
+ else
+ return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name);
+ if (msg)
+ return msg;
+ if (taken < number)
+ return cf_printf("Too many parameters: %d>%d", number, taken);
+
+ return NULL;
+}
+
+char *
+cf_find_item(const char *name, struct cf_item *item)
+{
+ char *msg;
+ void *ptr = NULL;
+ struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr);
+ if (msg)
+ return msg;
+ if (ci) {
+ *item = *ci;
+ item->ptr = ptr;
+ } else
+ bzero(item, sizeof(struct cf_item));
+ return NULL;
+}
+
+char *
+cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars)
+{
+ char *msg;
+ int taken = 0;
+ switch (op) {
+ case OP_SET:
+ msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1);
+ break;
+ case OP_CLEAR:
+ case OP_ALL:
+ msg = interpret_set_all(item, item->ptr, op);
+ break;
+ case OP_APPEND:
+ case OP_PREPEND:
+ if (item->cls == CC_DYNAMIC)
+ msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op);
+ else if (item->cls == CC_LIST)
+ msg = interpret_add_list(item, number, pars, &taken, item->ptr, op);
+ else
+ return "The attribute does not support append/prepend";
+ break;
+ case OP_REMOVE:
+ if (item->cls == CC_BITMAP)
+ msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op);
+ else
+ return "Only applicable on bitmaps";
+ break;
+ default:
+ return "Unsupported operation";
+ }
+ if (msg)
+ return msg;
+ if (taken < number)
+ return "Too many parameters";
+ return NULL;
+}
+
+void
+cf_init_stack(void)
+{
+ static uns initialized = 0;
+ if (!initialized++) {
+ cf_sections.flags |= SEC_FLAG_UNKNOWN;
+ cf_sections.size = 0; // size of allocated array used to be stored here
+ cf_init_section(NULL, &cf_sections, NULL, 0);
+ }
+ level = 0;
+ stack[0] = (struct item_stack) {
+ .sec = &cf_sections,
+ .base_ptr = NULL,
+ .op = OP_CLOSE,
+ .list = NULL,
+ .mask = 0,
+ .item = NULL
+ };
+}
+
+int
+cf_check_stack(void)
+{
+ if (level > 0) {
+ msg(L_ERROR, "Unterminated block");
+ return 1;
+ }
+ return 0;
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: journaling
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/mempool.h"
+
+#include <string.h>
+
+static struct old_pools {
+ struct old_pools *prev;
+ struct mempool *pool;
+} *pools; // link-list of older cf_pool's
+
+uns cf_need_journal = 1; // some programs do not need journal
+static struct cf_journal_item {
+ struct cf_journal_item *prev;
+ byte *ptr;
+ uns len;
+ byte copy[0];
+} *journal;
+
+void
+cf_journal_block(void *ptr, uns len)
+{
+ if (!cf_need_journal)
+ return;
+ struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len);
+ ji->prev = journal;
+ ji->ptr = ptr;
+ ji->len = len;
+ memcpy(ji->copy, ptr, len);
+ journal = ji;
+}
+
+void
+cf_journal_swap(void)
+ // swaps the contents of the memory and the journal, and reverses the list
+{
+ struct cf_journal_item *curr, *prev, *next;
+ for (next=NULL, curr=journal; curr; next=curr, curr=prev)
+ {
+ prev = curr->prev;
+ curr->prev = next;
+ for (uns i=0; i<curr->len; i++)
+ {
+ byte x = curr->copy[i];
+ curr->copy[i] = curr->ptr[i];
+ curr->ptr[i] = x;
+ }
+ }
+ journal = next;
+}
+
+struct cf_journal_item *
+cf_journal_new_transaction(uns new_pool)
+{
+ if (new_pool)
+ cf_pool = mp_new(1<<10);
+ struct cf_journal_item *oldj = journal;
+ journal = NULL;
+ return oldj;
+}
+
+void
+cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+ if (new_pool)
+ {
+ struct old_pools *p = cf_malloc(sizeof(struct old_pools));
+ p->prev = pools;
+ p->pool = cf_pool;
+ pools = p;
+ }
+ if (oldj)
+ {
+ struct cf_journal_item **j = &journal;
+ while (*j)
+ j = &(*j)->prev;
+ *j = oldj;
+ }
+}
+
+void
+cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+ if (!cf_need_journal)
+ die("Cannot rollback the configuration, because the journal is disabled.");
+ cf_journal_swap();
+ journal = oldj;
+ if (new_pool)
+ {
+ mp_delete(cf_pool);
+ cf_pool = pools ? pools->pool : NULL;
+ }
+}
+
+void
+cf_journal_delete(void)
+{
+ for (struct old_pools *p=pools; p; p=pools)
+ {
+ pools = p->prev;
+ mp_delete(p->pool);
+ }
+}
+
+/* TODO: more space efficient journal */
--- /dev/null
+/*
+ * UCW Library -- Configuration files: parsers for basic types
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/chartype.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+struct unit {
+ uns name; // one-letter name of the unit
+ uns num, den; // fraction
+};
+
+static const struct unit units[] = {
+ { 'd', 86400, 1 },
+ { 'h', 3600, 1 },
+ { 'k', 1000, 1 },
+ { 'm', 1000000, 1 },
+ { 'g', 1000000000, 1 },
+ { 'K', 1024, 1 },
+ { 'M', 1048576, 1 },
+ { 'G', 1073741824, 1 },
+ { '%', 1, 100 },
+ { 0, 0, 0 }
+};
+
+static const struct unit *
+lookup_unit(const char *value, const char *end, char **msg)
+{
+ if (end && *end) {
+ if (end == value || end[1] || *end >= '0' && *end <= '9')
+ *msg = "Invalid number";
+ else {
+ for (const struct unit *u=units; u->name; u++)
+ if ((char)u->name == *end)
+ return u;
+ *msg = "Invalid unit";
+ }
+ }
+ return NULL;
+}
+
+static char cf_rngerr[] = "Number out of range";
+
+char *
+cf_parse_int(const char *str, int *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ char *end;
+ errno = 0;
+ uns x = strtoul(str, &end, 0);
+ if (errno == ERANGE)
+ msg = cf_rngerr;
+ else if (u = lookup_unit(str, end, &msg)) {
+ u64 y = (u64)x * u->num;
+ if (y % u->den)
+ msg = "Number is not an integer";
+ else {
+ y /= u->den;
+ if (y > 0xffffffff)
+ msg = cf_rngerr;
+ *ptr = y;
+ }
+ } else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_u64(const char *str, u64 *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ char *end;
+ errno = 0;
+ u64 x = strtoull(str, &end, 0);
+ if (errno == ERANGE)
+ msg = cf_rngerr;
+ else if (u = lookup_unit(str, end, &msg)) {
+ if (x > ~(u64)0 / u->num)
+ msg = "Number out of range";
+ else {
+ x *= u->num;
+ if (x % u->den)
+ msg = "Number is not an integer";
+ else
+ *ptr = x / u->den;
+ }
+ } else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_double(const char *str, double *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ double x;
+ uns read_chars;
+ if (sscanf(str, "%lf%n", &x, &read_chars) != 1)
+ msg = "Invalid number";
+ else if (u = lookup_unit(str, str + read_chars, &msg))
+ *ptr = x * u->num / u->den;
+ else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_ip(const char *p, u32 *varp)
+{
+ if (!*p)
+ return "Missing IP address";
+ uns x = 0;
+ char *p2;
+ if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) {
+ errno = 0;
+ x = strtoul(p, &p2, 16);
+ if (errno == ERANGE || x > 0xffffffff)
+ goto error;
+ p = p2;
+ }
+ else
+ for (uns i = 0; i < 4; i++) {
+ if (i) {
+ if (*p++ != '.')
+ goto error;
+ }
+ if (!Cdigit(*p))
+ goto error;
+ errno = 0;
+ uns y = strtoul(p, &p2, 10);
+ if (errno == ERANGE || p2 == (char*) p || y > 255)
+ goto error;
+ p = p2;
+ x = (x << 8) + y;
+ }
+ *varp = x;
+ return *p ? "Trailing characters" : NULL;
+error:
+ return "Invalid IP address";
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: sections
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/binsearch.h"
+
+#include <string.h>
+
+/* Dirty sections */
+
+struct dirty_section {
+ struct cf_section *sec;
+ void *ptr;
+};
+#define GBUF_TYPE struct dirty_section
+#define GBUF_PREFIX(x) dirtsec_##x
+#include "lib/gbuf.h"
+static dirtsec_t dirty;
+static uns dirties;
+
+void
+cf_add_dirty(struct cf_section *sec, void *ptr)
+{
+ dirtsec_grow(&dirty, dirties+1);
+ struct dirty_section *dest = dirty.ptr + dirties;
+ if (dirties && dest[-1].sec == sec && dest[-1].ptr == ptr)
+ return;
+ dest->sec = sec;
+ dest->ptr = ptr;
+ dirties++;
+}
+
+#define ASORT_PREFIX(x) dirtsec_##x
+#define ASORT_KEY_TYPE struct dirty_section
+#define ASORT_ELT(i) dirty.ptr[i]
+#define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr
+#include "lib/arraysort.h"
+
+static void
+sort_dirty(void)
+{
+ if (dirties <= 1)
+ return;
+ dirtsec_sort(dirties);
+ // and compress the list
+ struct dirty_section *read = dirty.ptr + 1, *write = dirty.ptr + 1, *limit = dirty.ptr + dirties;
+ while (read < limit) {
+ if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) {
+ if (read != write)
+ *write = *read;
+ write++;
+ }
+ read++;
+ }
+ dirties = write - dirty.ptr;
+}
+
+/* Initialization */
+
+struct cf_section cf_sections; // root section
+
+struct cf_item *
+cf_find_subitem(struct cf_section *sec, const char *name)
+{
+ struct cf_item *ci = sec->cfg;
+ for (; ci->cls; ci++)
+ if (!strcasecmp(ci->name, name))
+ return ci;
+ return ci;
+}
+
+static void
+inspect_section(struct cf_section *sec)
+{
+ sec->flags = 0;
+ struct cf_item *ci;
+ for (ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION) {
+ inspect_section(ci->u.sec);
+ sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY);
+ } else if (ci->cls == CC_LIST) {
+ inspect_section(ci->u.sec);
+ sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY;
+ } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP)
+ sec->flags |= SEC_FLAG_DYNAMIC;
+ else if (ci->cls == CC_PARSER) {
+ sec->flags |= SEC_FLAG_CANT_COPY;
+ if (ci->number < 0)
+ sec->flags |= SEC_FLAG_DYNAMIC;
+ }
+ if (sec->copy)
+ sec->flags &= ~SEC_FLAG_CANT_COPY;
+ sec->flags |= ci - sec->cfg; // record the number of entries
+}
+
+void
+cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown)
+{
+ if (!cf_sections.cfg)
+ {
+ cf_sections.size = 50;
+ cf_sections.cfg = xmalloc_zero(cf_sections.size * sizeof(struct cf_item));
+ }
+ struct cf_item *ci = cf_find_subitem(&cf_sections, name);
+ if (ci->cls)
+ die("Cannot register section %s twice", name);
+ ci->cls = CC_SECTION;
+ ci->name = name;
+ ci->number = 1;
+ ci->ptr = NULL;
+ ci->u.sec = sec;
+ inspect_section(sec);
+ if (allow_unknown)
+ sec->flags |= SEC_FLAG_UNKNOWN;
+ ci++;
+ if (ci - cf_sections.cfg >= (int) cf_sections.size)
+ {
+ cf_sections.cfg = xrealloc(cf_sections.cfg, 2*cf_sections.size * sizeof(struct cf_item));
+ bzero(cf_sections.cfg + cf_sections.size, cf_sections.size * sizeof(struct cf_item));
+ cf_sections.size *= 2;
+ }
+}
+
+void
+cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero)
+{
+ if (do_bzero) {
+ ASSERT(sec->size);
+ bzero(ptr, sec->size);
+ }
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION)
+ cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0);
+ else if (ci->cls == CC_LIST)
+ clist_init(ptr + (uintptr_t) ci->ptr);
+ else if (ci->cls == CC_DYNAMIC) {
+ void **dyn = ptr + (uintptr_t) ci->ptr;
+ if (!*dyn) { // replace NULL by an empty array
+ static uns zero = 0;
+ *dyn = (&zero) + 1;
+ }
+ }
+ if (sec->init) {
+ char *msg = sec->init(ptr);
+ if (msg)
+ die("Cannot initialize section %s: %s", name, msg);
+ }
+}
+
+static char *
+commit_section(struct cf_section *sec, void *ptr, uns commit_all)
+{
+ char *err;
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION) {
+ if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) {
+ msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
+ return "commit of a subsection failed";
+ }
+ } else if (ci->cls == CC_LIST) {
+ uns idx = 0;
+ CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr))
+ if (idx++, err = commit_section(ci->u.sec, n, commit_all)) {
+ msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
+ return "commit of a list failed";
+ }
+ }
+ if (sec->commit) {
+ /* We have to process the whole tree of sections even if just a few changes
+ * have been made, because there are dependencies between commit-hooks and
+ * hence we need to call them in a fixed order. */
+#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr
+ struct dirty_section comp = { sec, ptr };
+ uns pos = BIN_SEARCH_FIRST_GE_CMP(dirty.ptr, dirties, comp, ARY_LT_X);
+
+ if (commit_all
+ || (pos < dirties && dirty.ptr[pos].sec == sec && dirty.ptr[pos].ptr == ptr))
+ return sec->commit(ptr);
+ }
+ return 0;
+}
+
+int
+cf_commit_all(enum cf_commit_mode cm)
+{
+ sort_dirty();
+ if (cm == CF_NO_COMMIT)
+ return 0;
+ if (commit_section(&cf_sections, NULL, cm == CF_COMMIT_ALL))
+ return 1;
+ dirties = 0;
+ return 0;
+}
--- /dev/null
+/*
+ * Insane tester of reading configuration files
+ *
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/clists.h"
+#include "lib/fastbuf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+static int verbose;
+
+struct sub_sect_1 {
+ cnode n;
+ char *name;
+ time_t t;
+ char *level;
+ int confidence[2];
+ double *list;
+};
+
+static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) };
+
+static char *
+init_sec_1(struct sub_sect_1 *s)
+{
+ if (s == &sec1) // this is a static variable; skip clearing
+ return NULL;
+ s->name = "unknown";
+ s->level = "default";
+ s->confidence[0] = 5;
+ s->confidence[1] = 6;
+ // leave s->list==NULL
+ return NULL;
+}
+
+static char *
+commit_sec_1(struct sub_sect_1 *s)
+{
+ if (s->confidence[0] < 0 || s->confidence[0] > 10)
+ return "Well, this can't be";
+ return NULL;
+}
+
+static char *
+time_parser(uns number, char **pars, time_t *ptr)
+{
+ *ptr = number ? atoi(pars[0]) : time(NULL);
+ return NULL;
+}
+
+static struct cf_section cf_sec_1 = {
+ CF_TYPE(struct sub_sect_1),
+ CF_INIT(init_sec_1),
+ CF_COMMIT(commit_sec_1),
+#define F(x) PTR_TO(struct sub_sect_1, x)
+ CF_ITEMS {
+ CF_STRING("name", F(name)),
+ //CF_PARSER("t", F(t), time_parser, 0),
+ CF_STRING("level", F(level)),
+ CF_INT_ARY("confidence", F(confidence[0]), 2), // XXX: the [0] is needed for the sake of type checking
+ CF_DOUBLE_DYN("list", F(list), 100),
+ CF_END
+ }
+#undef F
+};
+
+static uns nr1 = 15;
+static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1);
+static int nrs2[5];
+static char *str1 = "no worries";
+static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob");
+static u64 u1 = 0xCafeBeefDeadC00ll;
+static double d1 = -1.1;
+static clist secs;
+static time_t t1, t2;
+static u32 ip;
+static int *look = DARY_ALLOC(int, 2, 2, 1);
+static u16 numbers[10] = { 2, 100, 1, 5 };
+static u32 bitmap1 = 0xff;
+static u32 bitmap2 = 3;
+
+static char *
+parse_u16(char *string, u16 *ptr)
+{
+ uns a;
+ char *msg = cf_parse_int(string, &a);
+ if (msg)
+ return msg;
+ if (a >= (1<<16))
+ return "Come on, man, this doesn't fit to 16 bits";
+ *ptr = a;
+ return NULL;
+}
+
+static void
+dump_u16(struct fastbuf *fb, u16 *ptr)
+{
+ bprintf(fb, "%d ", *ptr);
+}
+
+static struct cf_user_type u16_type = {
+ .size = sizeof(u16),
+ .name = "u16",
+ .parser = (cf_parser1*) parse_u16,
+ .dumper = (cf_dumper1*) dump_u16
+};
+
+static char *
+init_top(void *ptr UNUSED)
+{
+ for (uns i=0; i<5; i++)
+ {
+ struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed
+ cf_init_section("slaves", &cf_sec_1, s, 1);
+ s->confidence[1] = i;
+ clist_add_tail(&secs, &s->n);
+ }
+ return NULL;
+}
+
+static char *
+commit_top(void *ptr UNUSED)
+{
+ if (nr1 != 15)
+ return "Don't touch my variable!";
+ return NULL;
+}
+
+static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
+static struct cf_section cf_top = {
+ CF_INIT(init_top),
+ CF_COMMIT(commit_top),
+ CF_ITEMS {
+ CF_UNS("nr1", &nr1),
+ CF_INT_DYN("nrs1", &nrs1, 1000),
+ CF_INT_ARY("nrs2", nrs2, 5),
+ CF_STRING("str1", &str1),
+ CF_STRING_DYN("str2", &str2, 20),
+ CF_U64("u1", &u1),
+ CF_DOUBLE("d1", &d1),
+ CF_PARSER("FirstTime", &t1, time_parser, -1),
+ CF_PARSER("SecondTime", &t2, time_parser, 1),
+ CF_SECTION("master", &sec1, &cf_sec_1),
+ CF_LIST("slaves", &secs, &cf_sec_1),
+ CF_IP("ip", &ip),
+ CF_LOOKUP_DYN("look", &look, alphabet, 1000),
+ CF_USER_ARY("numbers", numbers, &u16_type, 10),
+ CF_BITMAP_INT("bitmap1", &bitmap1),
+ CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) {
+ "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen",
+ "eighteen", "nineteen", "twenty", NULL // hidden joke here
+ })),
+ CF_END
+ }
+};
+
+static byte short_opts[] = CF_SHORT_OPTS "v";
+static struct option long_opts[] = {
+ CF_LONG_OPTS
+ {"verbose", 0, 0, 'v'},
+ {NULL, 0, 0, 0}
+};
+
+static char *help = "\
+Usage: conf-test <options>\n\
+\n\
+Options:\n"
+CF_USAGE
+"-v\t\t\tBe verbose\n\
+";
+
+static void NONRET
+usage(char *msg, ...)
+{
+ va_list va;
+ va_start(va, msg);
+ if (msg)
+ vfprintf(stderr, msg, va);
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ log_init(argv[0]);
+ cf_declare_section("top", &cf_top, 0);
+ cf_def_file = "lib/conf-test.cf";
+
+ int opt;
+ while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0)
+ switch (opt) {
+ case 'v': verbose++; break;
+ default: usage("unknown option %c\n", opt);
+ }
+ if (optind < argc)
+ usage("too many parameters (%d more)\n", argc-optind);
+
+ /*
+ cf_load("non-existent file");
+ //cf_reload("non-existent file");
+ cf_load("non-existent file");
+ cf_set("top.d1 -1.1; top.master b");
+ */
+
+ struct fastbuf *out = bfdopen(1, 1<<14);
+ cf_dump_sections(out);
+ bclose(out);
+
+ return 0;
+}
--- /dev/null
+# test config file
+#include lib/conf-test.t ; top.xa=1
+#include 'non-existent file'; #top.xa=1
+Top { \
+
+ nr1=16 #!!!
+ nrs1 2 3 5 \
+ 7 11 13 \
+ \
+ 17M
+ nrs2 3 3k 3 3 3 ; \
+ str1 "hello,\t\x2bworld%%\n"
+ str2 'Hagenuk,
+ the best' "\
+ " qu'est-ce que c'est?
+ u1 0xbadcafebadbeefc0
+ str2:prepend prepended
+ str2:append appended
+ d1 7%
+ d1 -1.14e-25
+ firsttime ; secondtime 56
+ ^top.master:set alice HB8+
+ slaves:clear
+ ip 0xa
+ ip 195.113.31.123
+ look Alpha
+ look:prepend Beta GAMMA
+ numbers 11000 65535
+ bitmap1 31
+ bitmap1:remove 3 3
+ bitmap2:all
+ bitmap2:remove eleven twelve one
+};;;;;;
+
+unknown.ignored :-)
+
+top.slaves cairns gpua 7 7 -10% +10%
+top.slaves daintree rafc 4 5 -171%
+top.slaves coogee pum 9 8
+top.slaves:prepend {name=bondi; level=\
+ "PUG"; confidence 10 10}
+top.slaves:remove {name daintree}
+top.slaveS:edit {level PUG} Bondi PUG!
+top.slaveS:before {level pum}{
+ confidence 2
+ list 123 456 789
+}
+top.slaves:copy {name coogee} Coogee2 PUM
+
+topp.a=15
+top.nr1= ' 15'
+a { ;-D }
--- /dev/null
+/*
+ * UCW Library -- Configuration files
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONF_H
+#define _UCW_CONF_H
+
+enum cf_class {
+ CC_END, // end of list
+ CC_STATIC, // single variable or static array
+ CC_DYNAMIC, // dynamically allocated array
+ CC_PARSER, // arbitrary parser function
+ CC_SECTION, // section appears exactly once
+ CC_LIST, // list with 0..many nodes
+ CC_BITMAP // of up to 32 items
+};
+
+enum cf_type {
+ CT_INT, CT_U64, CT_DOUBLE, // number types
+ CT_IP, // IP address
+ CT_STRING, // string type
+ CT_LOOKUP, // in a string table
+ CT_USER // user-defined type
+};
+
+struct fastbuf;
+typedef char *cf_parser(uns number, char **pars, void *ptr);
+ /* A parser function gets an array of (strdup'ed) strings and a pointer with
+ * the customized information (most likely the target address). It can store
+ * the parsed value anywhere in any way it likes, however it must first call
+ * cf_journal_block() on the overwritten memory block. It returns an error
+ * message or NULL if everything is all right. */
+typedef char *cf_parser1(char *string, void *ptr);
+ /* A parser function for user-defined types gets a string and a pointer to
+ * the destination variable. It must store the value within [ptr,ptr+size),
+ * where size is fixed for each type. It should not call cf_journal_block(). */
+typedef char *cf_hook(void *ptr);
+ /* An init- or commit-hook gets a pointer to the section or NULL if this
+ * is the global section. It returns an error message or NULL if everything
+ * is all right. The init-hook should fill in default values (needed for
+ * dynamically allocated nodes of link lists or for filling global variables
+ * that are run-time dependent). The commit-hook should perform sanity
+ * checks and postprocess the parsed values. Commit-hooks must call
+ * cf_journal_block() too. Caveat! init-hooks for static sections must not
+ * use cf_malloc() but normal xmalloc(). */
+typedef void cf_dumper1(struct fastbuf *fb, void *ptr);
+ /* Dumps the contents of a variable of a user-defined type. */
+typedef char *cf_copier(void *dest, void *src);
+ /* Similar to init-hook, but it copies attributes from another list node
+ * instead of setting the attributes to default values. You have to provide
+ * it if your node contains parsed values and/or sub-lists. */
+
+struct cf_user_type {
+ uns size; // of the parsed attribute
+ char *name; // name of the type (for dumping)
+ cf_parser1 *parser; // how to parse it
+ cf_dumper1 *dumper; // how to dump the type
+};
+
+struct cf_section;
+struct cf_item {
+ const char *name; // case insensitive
+ int number; // length of an array or #parameters of a parser (negative means at most)
+ void *ptr; // pointer to a global variable or an offset in a section
+ union cf_union {
+ struct cf_section *sec; // declaration of a section or a list
+ cf_parser *par; // parser function
+ char **lookup; // NULL-terminated sequence of allowed strings for lookups
+ struct cf_user_type *utype; // specification of the user-defined type
+ } u;
+ enum cf_class cls:16; // attribute class
+ enum cf_type type:16; // type of a static or dynamic attribute
+};
+
+struct cf_section {
+ uns size; // 0 for a global block, sizeof(struct) for a section
+ cf_hook *init; // fills in default values (no need to bzero)
+ cf_hook *commit; // verifies parsed data (optional)
+ cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes)
+ struct cf_item *cfg; // CC_END-terminated array of items
+ uns flags; // for internal use only
+};
+
+/* Declaration of cf_section */
+#define CF_TYPE(s) .size = sizeof(s)
+#define CF_INIT(f) .init = (cf_hook*) f
+#define CF_COMMIT(f) .commit = (cf_hook*) f
+#define CF_COPY(f) .copy = (cf_copier*) f
+#define CF_ITEMS .cfg = ( struct cf_item[] )
+#define CF_END { .cls = CC_END }
+/* Configuration items */
+#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) }
+#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) }
+#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f }
+#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s }
+#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s }
+#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) }
+#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t }
+/* Configuration items for basic types */
+#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1)
+#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c)
+#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c)
+#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1)
+#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c)
+#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c)
+#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1)
+#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c)
+#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c)
+#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1)
+#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c)
+#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c)
+#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1)
+#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c)
+#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c)
+#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1)
+#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c)
+#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c)
+#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t }
+#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t }
+#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+
+/* If you aren't picky about the number of parameters */
+#define CF_ANY_NUM -0x7fffffff
+
+#define DARY_LEN(a) ((uns*)a)[-1]
+ // length of a dynamic array
+#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a
+ // creates a static instance of a dynamic array
+
+/* Memory allocation: conf-alloc.c */
+struct mempool;
+extern struct mempool *cf_pool;
+void *cf_malloc(uns size);
+void *cf_malloc_zero(uns size);
+char *cf_strdup(const char *s);
+char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2);
+
+/* Undo journal for error recovery: conf-journal.c */
+extern uns cf_need_journal;
+void cf_journal_block(void *ptr, uns len);
+#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var))
+
+/* Declaration: conf-section.c */
+void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown);
+void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero);
+
+/* Parsers for basic types: conf-parse.c */
+char *cf_parse_int(const char *str, int *ptr);
+char *cf_parse_u64(const char *str, u64 *ptr);
+char *cf_parse_double(const char *str, double *ptr);
+char *cf_parse_ip(const char *p, u32 *varp);
+
+#endif
+
--- /dev/null
+/*
+ * UCW Library -- Configuration-Dependent Definitions
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONFIG_H
+#define _UCW_CONFIG_H
+
+/* Configuration switches */
+
+#include "autoconf.h"
+
+/* Tell libc we're going to use all extensions available */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+/* Types (based on standard C99 integers) */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef uint8_t byte; /* exactly 8 bits, unsigned */
+typedef uint8_t u8; /* exactly 8 bits, unsigned */
+typedef int8_t s8; /* exactly 8 bits, signed */
+typedef uint16_t u16; /* exactly 16 bits, unsigned */
+typedef int16_t s16; /* exactly 16 bits, signed */
+typedef uint32_t u32; /* exactly 32 bits, unsigned */
+typedef int32_t s32; /* exactly 32 bits, signed */
+typedef uint64_t u64; /* exactly 64 bits, unsigned */
+typedef int64_t s64; /* exactly 64 bits, signed */
+
+typedef unsigned int uns; /* at least 32 bits */
+typedef u32 sh_time_t; /* seconds since UNIX epoch */
+typedef s64 timestamp_t; /* milliseconds since UNIX epoch */
+
+#ifdef CONFIG_LARGE_FILES /* File positions */
+typedef s64 sh_off_t;
+#else
+typedef s32 sh_off_t;
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Content-Type Pattern Matching
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+int
+match_ct_patt(const char *p, const char *t)
+{
+ if (*p == '*' && !p[1]) /* "*" matches everything */
+ return 1;
+
+ if (*p == '*' && p[1] == '/') /* "*" on the left-hand side */
+ {
+ while (*t && *t != ' ' && *t != ';' && *t != '/')
+ t++;
+ p += 2;
+ }
+ else /* Normal left-hand side */
+ {
+ while (*p != '/')
+ if (Cupcase(*p++) != Cupcase(*t++))
+ return 0;
+ p++;
+ }
+ if (*t++ != '/')
+ return 0;
+
+ if (*p == '*' && !p[1]) /* "*" on the right-hand side */
+ return 1;
+ while (*p)
+ if (Cupcase(*p++) != Cupcase(*t++))
+ return 0;
+ if (*t && *t != ' ' && *t != ';')
+ return 0;
+
+ return 1;
+}
--- /dev/null
+/*
+ * UCW Library -- SDBM emulator at top of GDBM
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/db.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gdbm.h>
+
+struct sdbm {
+ GDBM_FILE db;
+ datum prevkey;
+};
+
+struct sdbm *
+sdbm_open(struct sdbm_options *o)
+{
+ struct sdbm *d = xmalloc(sizeof(struct sdbm));
+ d->db = gdbm_open(o->name,
+ (o->page_order ? (1 << o->page_order) : 0),
+ ((o->flags & SDBM_WRITE) ? ((o->flags & SDBM_CREAT) ? GDBM_WRCREAT : GDBM_WRITER) : GDBM_READER)
+ | ((o->flags & SDBM_SYNC) ? GDBM_SYNC : 0),
+ 0666,
+ NULL);
+ if (o->cache_size)
+ gdbm_setopt(d->db, GDBM_CACHESIZE, &o->cache_size, sizeof(o->cache_size));
+ d->prevkey.dptr = NULL;
+ return d;
+}
+
+void
+sdbm_close(struct sdbm *d)
+{
+ sdbm_rewind(d);
+ gdbm_close(d->db);
+ xfree(d);
+}
+
+static int
+sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
+{
+ if (vallen)
+ {
+ if (*vallen < Dl)
+ return 1;
+ *vallen = Dl;
+ }
+ if (val)
+ memcpy(val, D, Dl);
+ return 0;
+}
+
+int
+sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+ datum K, V;
+ int rc;
+
+ K.dptr = key;
+ K.dsize = keylen;
+ V.dptr = val;
+ V.dsize = vallen;
+ rc = gdbm_store(d->db, K, V, GDBM_INSERT);
+ return (rc < 0) ? rc : !rc;
+}
+
+int
+sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+ datum K, V;
+ int rc;
+
+ if (!val)
+ return sdbm_delete(d, key, keylen);
+ K.dptr = key;
+ K.dsize = keylen;
+ V.dptr = val;
+ V.dsize = vallen;
+ rc = gdbm_store(d->db, K, V, GDBM_REPLACE);
+ return (rc < 0) ? rc : !rc;
+}
+
+int
+sdbm_delete(struct sdbm *d, byte *key, uns keylen)
+{
+ datum K;
+
+ K.dptr = key;
+ K.dsize = keylen;
+ return !gdbm_delete(d->db, K);
+}
+
+int
+sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
+{
+ datum K, V;
+ int rc;
+
+ K.dptr = key;
+ K.dsize = keylen;
+ if (!val && !vallen)
+ return gdbm_exists(d->db, K);
+ V = gdbm_fetch(d->db, K);
+ if (!V.dptr)
+ return 0;
+ rc = sdbm_put_user(V.dptr, V.dsize, val, vallen);
+ xfree(V.dptr);
+ return rc ? SDBM_ERROR_TOO_LARGE : 1;
+}
+
+void
+sdbm_rewind(struct sdbm *d)
+{
+ if (d->prevkey.dptr)
+ {
+ xfree(d->prevkey.dptr);
+ d->prevkey.dptr = NULL;
+ }
+}
+
+int
+sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
+{
+ datum K;
+
+ if (d->prevkey.dptr)
+ {
+ K = gdbm_nextkey(d->db, d->prevkey);
+ xfree(d->prevkey.dptr);
+ }
+ else
+ K = gdbm_firstkey(d->db);
+ d->prevkey = K;
+ if (!K.dptr)
+ return 0;
+ if (sdbm_put_user(K.dptr, K.dsize, key, keylen))
+ return SDBM_ERROR_TOO_LARGE;
+ if (val || vallen)
+ return sdbm_fetch(d, key, *keylen, val, vallen);
+ return 1;
+}
+
+void
+sdbm_sync(struct sdbm *d)
+{
+}
--- /dev/null
+/*
+ * UCW Library -- Database Manager -- Tests and Benchmarks
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#if 1
+#include "lib/db.c"
+#define NAME "SDBM"
+#else
+#include "lib/db-emul.c"
+#define NAME "GDBM"
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/stat.h>
+
+static struct sdbm_options opts = {
+ flags: SDBM_CREAT | SDBM_WRITE,
+ name: "db.test",
+ page_order: 10,
+ cache_size: 1024,
+ key_size: -1,
+ val_size: -1
+};
+
+static struct sdbm *d;
+static int key_min, key_max; /* min<0 -> URL distribution */
+static int val_min, val_max;
+static int num_keys; /* Number of distinct keys */
+static int verbose;
+
+static void
+help(void)
+{
+ printf("Usage: dbtest [<options>] <commands>\n\
+\n\
+Options:\n\
+-c<n> Use cache of <n> pages\n\
+-p<n> Use pages of order <n>\n\
+-k<n> Use key size <n>\n\
+-k<m>-<n> Use key size uniformly distributed between <m> and <n>\n\
+-kU Use keys with URL distribution\n\
+-n<n> Number of distinct keys\n\
+-d<m>[-<n>] Use specified value size (see -k<m>-<n>)\n\
+-t Perform the tests on an existing database file\n\
+-v Be verbose\n\
+-s Turn on synchronous mode\n\
+-S Turn on supersynchronous mode\n\
+-F Turn on fast mode\n\
+\n\
+Commands:\n\
+c Fill database\n\
+r Rewrite database\n\
+f[<p>%%][<n>] Find <n> records with probability of success <p>%% (default=100)\n\
+F[<p>%%][<n>] Find, but don't fetch values\n\
+d Delete records\n\
+w Walk database\n\
+W Walk, but don't fetch values\n\
+");
+ exit(0);
+}
+
+static uns
+krand(uns kn)
+{
+ return kn * 2000000011;
+}
+
+static uns
+gen_url_size(uns rnd)
+{
+ uns l, m, r;
+ static uns utable[] = {
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 22, 108, 245, 481, 979, 3992, 7648, 13110, 19946, 27256, 34993, 43222, 52859, 64563,
+80626, 117521, 147685, 188364, 233174, 290177, 347132, 407231, 465787, 540931, 628601, 710246, 808671, 922737, 1025691, 1138303,
+1238802, 1344390, 1443843, 1533207, 1636494, 1739082, 1826911, 1910725, 1993940, 2094365, 2188987, 2267827, 2350190, 2441980,
+2520713, 2593654, 2668632, 2736009, 2808356, 2889682, 2959300, 3017945, 3086488, 3146032, 3204818, 3251897, 3307001, 3349388,
+3392798, 3433429, 3476765, 3529107, 3556884, 3585120, 3633005, 3677697, 3699561, 3716660, 3739823, 3765154, 3795096, 3821184,
+3858117, 3908757, 3929095, 3943264, 3957033, 3969588, 3983441, 3994630, 4005413, 4028890, 4039678, 4058007, 4071906, 4087029,
+4094233, 4105259, 4111603, 4120338, 4127364, 4133983, 4140310, 4144843, 4150565, 4155974, 4165132, 4170648, 4176811, 4187118,
+4190866, 4199051, 4206686, 4216122, 4226109, 4233721, 4254123, 4261792, 4270396, 4276650, 4282932, 4291738, 4295932, 4299370,
+4304011, 4307098, 4311866, 4318168, 4325730, 4329774, 4332946, 4336305, 4339770, 4345237, 4349038, 4356129, 4362872, 4366542,
+4371077, 4374524, 4376733, 4378794, 4380652, 4382340, 4383552, 4385952, 4386914, 4393123, 4394106, 4395142, 4396593, 4399112,
+4399909, 4401015, 4401780, 4402616, 4403454, 4404481, 4405231, 4405947, 4406886, 4408364, 4409159, 4409982, 4410872, 4412010,
+4413341, 4414161, 4415673, 4417135, 4418032, 4419117, 4419952, 4420677, 4421387, 4421940, 4422469, 4423210, 4423696, 4424274,
+4424982, 4425665, 4426363, 4427018, 4427969, 4428992, 4429791, 4430804, 4432601, 4433440, 4434157, 4434967, 4436280, 4439784,
+4444255, 4445544, 4446416, 4447620, 4449638, 4453004, 4455470, 4456982, 4457956, 4458617, 4459538, 4460007, 4460377, 4460768,
+4461291, 4461520, 4461678, 4461911, 4462063, 4462239, 4462405, 4462607, 4462666, 4462801, 4462919, 4463108, 4463230, 4463438,
+4463530, 4463698, 4463779, 4463908, 4463991, 4464138, 4464188, 4464391, 4464580, 4464868, 4464980, 4465174, 4465255, 4465473,
+4465529, 4465681, 4465746, 4465916, 4465983, 4466171, 4466248, 4466430, 4466560, 4466751, 4466930, 4467807, 4468847, 4469940,
+4470344, 4470662, 4470716, 4471120, 4471389, 4471814, 4472141, 4472545, 4472687, 4473051, 4473253, 4473603, 4473757, 4474065,
+4474125, 4474354, 4474428, 4474655, 4474705, 4474841, 4474858, 4475133, 4475201, 4475327, 4475367, 4475482, 4475533, 4475576,
+4475586, 4475616, 4475637, 4475659, 4475696, 4475736, 4475775, 4475794, 4476156, 4476711, 4477004, 4477133, 4477189, 4477676,
+4477831, 4477900, 4477973, 4477994, 4478011, 4478040, 4478063, 4478085, 4478468, 4478715, 4479515, 4480034, 4481804, 4483259,
+4483866, 4484202, 4484932, 4485693, 4486184, 4486549, 4486869, 4487405, 4487639, 4487845, 4488086, 4488256, 4488505, 4488714,
+4492669, 4496233, 4497738, 4498122, 4498653, 4499862, 4501169, 4501627, 4501673, 4501811, 4502182, 4502475, 4502533, 4502542,
+4502548, 4502733, 4503389, 4504381, 4505070, 4505378, 4505814, 4506031, 4506336, 4506642, 4506845, 4506971, 4506986, 4507016,
+4507051, 4507098, 4507107, 4507114, 4507139, 4507478, 4507643, 4507674, 4507694, 4507814, 4507894, 4507904, 4507929, 4507989,
+4508023, 4508047, 4508053, 4508063, 4508075, 4508092, 4508104, 4508113, 4508239, 4508285, 4508324, 4508335, 4508340, 4508378,
+4508405, 4508419, 4508436, 4508449, 4508470, 4508488, 4508515, 4508541, 4508564, 4508570, 4508584, 4508594, 4508607, 4508634,
+4508652, 4508665, 4508673, 4508692, 4508704, 4508742, 4508755, 4508773, 4508788, 4508798, 4508832, 4508869, 4508885, 4508905,
+4508915, 4508947, 4508956, 4509061, 4509070, 4509357, 4509368, 4509380, 4509393, 4509401, 4509412, 4509426, 4509438, 4509451,
+4509461, 4509473, 4509489, 4509498, 4509512, 4509537, 4509568, 4509582, 4509621, 4509629, 4509747, 4509766, 4509776, 4509795,
+4509802, 4509813, 4509822, 4509829, 4509834, 4509844, 4509854, 4509863, 4509868, 4509875, 4509886, 4509898, 4509908, 4509920,
+4509932, 4509941, 4509949, 4509955, 4509967, 4509972, 4509979, 4509987, 4509999, 4510002, 4510010, 4510014, 4510018, 4510025,
+4510028, 4510049, 4510055, 4510061, 4510068, 4510079, 4510085, 4510091, 4510098, 4510102, 4510104, 4510110, 4510121, 4510128,
+4510132, 4510138, 4510144, 4510145, 4510153, 4510161, 4510174, 4510196, 4510199, 4510208, 4510209, 4510212, 4510216, 4510217,
+4510219, 4510222, 4510228, 4510231, 4510236, 4510241, 4510245, 4510248, 4510250, 4510254, 4510255, 4510261, 4510262, 4510266,
+4510266, 4510271, 4510285, 4510287, 4510291, 4510295, 4510303, 4510306, 4510308, 4510310, 4510314, 4510319, 4510320, 4510324,
+4510328, 4510333, 4510333, 4510336, 4510340, 4510342, 4510348, 4510353, 4510359, 4510362, 4510365, 4510371, 4510373, 4510375,
+4510378, 4510380, 4510385, 4510389, 4510391, 4510391, 4510394, 4510396, 4510397, 4510398, 4510400, 4510403, 4510406, 4510407,
+4510408, 4510409, 4510411, 4510413, 4510417, 4510417, 4510419, 4510422, 4510426, 4510427, 4510430, 4510435, 4510437, 4510439,
+4510440, 4510442, 4510442, 4510446, 4510447, 4510448, 4510450, 4510451, 4510451, 4510453, 4510454, 4510455, 4510457, 4510460,
+4510460, 4510460, 4510462, 4510463, 4510466, 4510468, 4510472, 4510475, 4510480, 4510482, 4510483, 4510486, 4510488, 4510492,
+4510494, 4510497, 4510497, 4510499, 4510503, 4510505, 4510506, 4510507, 4510509, 4510512, 4510514, 4510527, 4510551, 4510553,
+4510554, 4510555, 4510556, 4510558, 4510561, 4510562, 4510566, 4510567, 4510568, 4510570, 4510573, 4510574, 4510586, 4510603,
+4510605, 4510607, 4510610, 4510610, 4510613, 4510613, 4510614, 4510614, 4510615, 4510616, 4510616, 4510620, 4510622, 4510623,
+4510624, 4510627, 4510628, 4510630, 4510631, 4510632, 4510634, 4510634, 4510634, 4510636, 4510636, 4510639, 4510639, 4510640,
+4510643, 4510647, 4510649, 4510650, 4510653, 4510653, 4510653, 4510653, 4510656, 4510659, 4510661, 4510664, 4510665, 4510669,
+4510672, 4510673, 4510674, 4510675, 4510680, 4510683, 4510684, 4510686, 4510687, 4510690, 4510691, 4510693, 4510693, 4510697,
+4510699, 4510700, 4510703, 4510704, 4510709, 4510711, 4510713, 4510713, 4510720, 4510720, 4510722, 4510724, 4510727, 4510729,
+4510735, 4510735, 4510738, 4510740, 4510744, 4510745, 4510746, 4510748, 4510754, 4510756, 4510758, 4510761, 4510764, 4510766,
+4510768, 4510768, 4510770, 4510770, 4510772, 4510774, 4510775, 4510775, 4510775, 4510776, 4510777, 4510780, 4510782, 4510783,
+4510785, 4510786, 4510788, 4510789, 4510791, 4510793, 4510793, 4510793, 4510795, 4510795, 4510799, 4510803, 4510804, 4510804,
+4510804, 4510805, 4510807, 4510809, 4510811, 4510811, 4510813, 4510815, 4510815, 4510816, 4510819, 4510820, 4510824, 4510827,
+4510829, 4510829, 4510830, 4510833, 4510835, 4510837, 4510838, 4510838, 4510839, 4510840, 4510840, 4510842, 4510842, 4510843,
+4510845, 4510845, 4510845, 4510847, 4510848, 4510848, 4510848, 4510850, 4510853, 4510855, 4510857, 4510859, 4510861, 4510862,
+4510864, 4510865, 4510865, 4510865, 4510869, 4510869, 4510869, 4510869, 4510869, 4510870, 4510870, 4510872, 4510872, 4510873,
+4510874, 4510875, 4510875, 4510877, 4510879, 4510879, 4510879, 4510879, 4510880, 4510881, 4510882, 4510883, 4510884, 4510885,
+4510886, 4510887, 4510890, 4510890, 4510891, 4510892, 4510892, 4510893, 4510893, 4510895, 4510895, 4510896, 4510897, 4510899,
+4510901, 4510901, 4510901, 4510902, 4510903, 4510903, 4510903, 4510905, 4510905, 4510906, 4510906, 4510907, 4510907, 4510909,
+4510910, 4510911, 4510911, 4510911, 4510913, 4510913, 4510914, 4510914, 4510914, 4510915, 4510916, 4510918, 4510918, 4510919,
+4510919, 4510919, 4510920, 4510921, 4510922, 4510923, 4510924, 4510924, 4510924, 4510924, 4510926, 4510927, 4510928, 4510928,
+4510928, 4510928, 4510928, 4510930, 4510933, 4510935, 4510935, 4510935, 4510935, 4510935, 4510936, 4510938, 4510947, 4510966,
+4510967, 4510969, 4510973, 4510973, 4510974, 4510974, 4510974, 4510974, 4510974, 4510974, 4510975, 4510976, 4510976, 4510976,
+4510976, 4510976, 4510976, 4510976, 4510977, 4510979, 4510979, 4510979, 4510979, 4510979, 4510979, 4510980, 4510980, 4510980,
+4510980, 4510981, 4510981, 4510981, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510983, 4510983, 4510984,
+4510984, 4510984, 4510984, 4510984, 4510985, 4510985, 4510985, 4510985, 4510987, 4510987, 4510987, 4510988, 4510988, 4510989,
+4510989, 4510989, 4510989, 4510989, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510991, 4510991, 4510991,
+4510991, 4510991, 4510991, 4510991, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510993, 4510993, 4510993,
+4510994, 4510994, 4510994, 4510994, 4510995, 4510995, 4510996, 4510997, 4510998, 4510999, 4510999, 4511000, 4511000, 4511001,
+4511001, 4511002, 4511002, 4511002, 4511003, 4511004, 4511004, 4511004, 4511004, 4511005, 4511006, 4511008, 4511008, 4511008,
+4511009, 4511009, 4511009, 4511009, 4511010, 4511011, 4511011, 4511012, 4511012, 4511012, 4511012, 4511013, 4511013, 4511014,
+4511014, 4511014, 4511014, 4511015, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511020, 4511020, 4511020,
+4511020, 4511020, 4511020, 4511020, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021,
+4511021
+ };
+
+ rnd %= utable[1024];
+ l = 0; r = 1023;
+ while (l < r)
+ {
+ m = (l+r)/2;
+ if (utable[m] == rnd)
+ return m;
+ if (utable[m] >= rnd)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+ return l;
+}
+
+static uns
+gen_size(uns min, uns max, uns rnd)
+{
+ if (min == max)
+ return min;
+ else
+ return min + rnd % (max - min + 1);
+}
+
+static void
+gen_random(byte *buf, uns size, uns kn)
+{
+ kn = (kn + 0x36221057) ^ (kn << 24) ^ (kn << 15);
+ while (size--)
+ {
+ *buf++ = kn >> 24;
+ kn = kn*257 + 17;
+ }
+}
+
+static int
+keygen(byte *buf, uns kn)
+{
+ uns size, rnd;
+
+ rnd = krand(kn);
+ if (key_min < 0)
+ size = gen_url_size(rnd);
+ else
+ size = gen_size(key_min, key_max, rnd);
+ *buf++ = kn >> 24;
+ *buf++ = kn >> 16;
+ *buf++ = kn >> 8;
+ *buf++ = kn;
+ if (size < 4)
+ return 4;
+ gen_random(buf, size-4, kn);
+ return size;
+}
+
+static int
+valgen(byte *buf, uns kn)
+{
+ uns size = gen_size(val_min, val_max, krand(kn));
+ gen_random(buf, size, kn);
+ return size;
+}
+
+static uns
+keydec(byte *buf)
+{
+ return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
+static void
+verb(char *msg, ...)
+{
+ int cat = 1;
+ va_list args;
+
+ va_start(args, msg);
+ if (msg[0] == '^' && msg[1])
+ {
+ cat = msg[1] - '0';
+ msg += 2;
+ }
+ if (verbose >= cat)
+ vfprintf(stderr, msg, args);
+ va_end(args);
+}
+
+static void
+parse_size(int *min, int *max, char *c)
+{
+ char *d;
+
+ if ((d = strchr(c, '-')))
+ {
+ *d++ = 0;
+ *min = atol(c);
+ *max = atol(d);
+ }
+ else
+ *min = *max = atol(c);
+}
+
+#define PROGRESS(i) if ((verbose > 2) || (verbose > 1 && !(i & 1023))) fprintf(stderr, "%d\r", i)
+
+int main(int argc, char **argv)
+{
+ int c, i, j, k, l, m;
+ byte kb[2048], vb[2048], vb2[2048];
+ uns ks, vs, vs2, perc, cnt;
+ char *ch;
+ int dont_delete = 0;
+ timestamp_t timer;
+
+ log_init("dbtest");
+ setvbuf(stdout, NULL, _IONBF, 0);
+ setvbuf(stderr, NULL, _IONBF, 0);
+ while ((c = getopt(argc, argv, "c:p:k:n:d:vsStF")) >= 0)
+ switch (c)
+ {
+ case 'c':
+ opts.cache_size = atol(optarg);
+ break;
+ case 'p':
+ opts.page_order = atol(optarg);
+ break;
+ case 'k':
+ if (!strcmp(optarg, "U"))
+ key_min = key_max = -1;
+ else
+ parse_size(&key_min, &key_max, optarg);
+ break;
+ case 'n':
+ num_keys = atol(optarg);
+ break;
+ case 'd':
+ parse_size(&val_min, &val_max, optarg);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 's':
+ opts.flags |= SDBM_SYNC;
+ break;
+ case 'S':
+ opts.flags |= SDBM_SYNC | SDBM_FSYNC;
+ break;
+ case 'F':
+ opts.flags |= SDBM_FAST;
+ break;
+ case 't':
+ dont_delete = 1;
+ break;
+ default:
+ help();
+ }
+
+ if (key_min >= 0 && key_min < 4)
+ key_min = key_max = 4;
+ if (key_min == key_max && key_min >= 0)
+ opts.key_size = key_min;
+ if (val_min == val_max)
+ opts.val_size = val_min;
+ if (!num_keys)
+ die("Number of keys not given");
+
+ printf(NAME " benchmark: %d records, keys ", num_keys);
+ if (key_min < 0)
+ printf("<URL>");
+ else
+ printf("%d-%d", key_min, key_max);
+ printf(", values %d-%d, page size %d, cache %d pages\n", val_min, val_max, 1 << opts.page_order, opts.cache_size);
+
+ verb("OPEN(%s, key=%d, val=%d, cache=%d, pgorder=%d)\n", opts.name, opts.key_size, opts.val_size,
+ opts.cache_size, opts.page_order);
+ if (!dont_delete)
+ unlink(opts.name);
+ d = sdbm_open(&opts);
+ if (!d)
+ die("open failed: %m");
+
+ while (optind < argc)
+ {
+ char *o = argv[optind++];
+ init_timer(&timer);
+ switch (*o)
+ {
+ case 'c':
+ printf("create %d: ", num_keys);
+ for(i=0; i<num_keys; i++)
+ {
+ PROGRESS(i);
+ ks = keygen(kb, i);
+ vs = valgen(vb, i);
+ if (sdbm_store(d, kb, ks, vb, vs) != 1) die("store failed");
+ }
+ break;
+ case 'r':
+ printf("rewrite %d: ", num_keys);
+ for(i=0; i<num_keys; i++)
+ {
+ PROGRESS(i);
+ ks = keygen(kb, i);
+ vs = valgen(vb, i);
+ if (sdbm_replace(d, kb, ks, vb, vs) != 1) die("replace failed");
+ }
+ break;
+ case 'f':
+ case 'F':
+ c = (*o++ == 'f');
+ if ((ch = strchr(o, '%')))
+ {
+ *ch++ = 0;
+ perc = atol(o);
+ }
+ else
+ {
+ ch = o;
+ perc = 100;
+ }
+ cnt = atol(ch);
+ if (!cnt)
+ {
+ cnt = num_keys;
+ m = (perc == 100);
+ }
+ else
+ m = 0;
+ printf("%s fetch %d (%d%% success, with%s values): ", (m ? "sequential" : "random"), cnt, perc, (c ? "" : "out"));
+ i = -1;
+ while (cnt--)
+ {
+ if (m)
+ i++;
+ else
+ i = random_max(num_keys) + ((random_max(100) < perc) ? 0 : num_keys);
+ PROGRESS(i);
+ ks = keygen(kb, i);
+ if (c)
+ {
+ vs2 = sizeof(vb2);
+ j = sdbm_fetch(d, kb, ks, vb2, &vs2);
+ }
+ else
+ j = sdbm_fetch(d, kb, ks, NULL, NULL);
+ if (j < 0)
+ die("fetch: error %d", j);
+ if ((i < num_keys) != j)
+ die("fetch mismatch at key %d, res %d", i, j);
+ if (c && j)
+ {
+ vs = valgen(vb, i);
+ if (vs != vs2 || memcmp(vb, vb2, vs))
+ die("fetch data mismatch at key %d: %d,%d", i, vs, vs2);
+ }
+ }
+ break;
+ case 'd':
+ printf("delete %d: ", num_keys);
+ for(i=0; i<num_keys; i++)
+ {
+ PROGRESS(i);
+ ks = keygen(kb, i);
+ if (sdbm_delete(d, kb, ks) != 1) die("delete failed");
+ }
+ break;
+ case 'w':
+ case 'W':
+ c = (*o == 'w');
+ i = k = l = m = 0;
+ printf("walk %d (with%s keys): ", num_keys, (c ? "" : "out"));
+ sdbm_rewind(d);
+ for(;;)
+ {
+ ks = sizeof(kb);
+ vs = sizeof(vb);
+ if (c)
+ j = sdbm_get_next(d, kb, &ks, vb, &vs);
+ else
+ j = sdbm_get_next(d, kb, &ks, NULL, NULL);
+ if (!j)
+ break;
+ if (ks < 4)
+ die("get_next: too short");
+ i = keydec(kb);
+ if (i < 0 || i >= num_keys)
+ die("get_next: %d out of range", i);
+ PROGRESS(i);
+ vs2 = keygen(vb2, i);
+ if (ks != vs2 || memcmp(kb, vb2, ks))
+ die("get_next: key mismatch at %d", i);
+ if (c)
+ {
+ vs2 = valgen(vb2, i);
+ if (vs != vs2 || memcmp(vb, vb2, vs))
+ die("get_next: data mismatch at %d", i);
+ }
+ l += k;
+ m += i;
+ k++;
+ }
+ if (k != num_keys)
+ die("fetch: wrong # of keys: %d != %d", k, num_keys);
+ if (l != m)
+ die("fetch: wrong checksum: %d != %d", l, m);
+ break;
+ default:
+ help();
+ }
+ sdbm_sync(d);
+ printf("%d ms\n", get_timer(&timer));
+ }
+
+ verb("CLOSE\n");
+ sdbm_close(d);
+
+ {
+ struct stat st;
+ if (stat(opts.name, &st)) die("stat: %m");
+ printf("file size: %d bytes\n", (int) st.st_size);
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * SDBM Database Utility
+ *
+ * (c) 2000--2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/db.h"
+#include "lib/db_internal.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+static int verbose=0;
+static int cache=1024;
+static int force_key=-2;
+static int force_val=-2;
+static int force_page=-1;
+
+#define SDBM_DUMP_MAGIC 0x321f120e
+#define SDBM_DUMP_VERSION 1
+
+static void
+dump(char *db, char *dmp)
+{
+ struct sdbm *src;
+ struct fastbuf *dest;
+ struct sdbm_options op;
+ int e, c=0;
+
+ bzero(&op, sizeof(op));
+ op.name = db;
+ op.cache_size = 16;
+ op.flags = 0;
+ src = sdbm_open(&op);
+ if (!src)
+ die("Source open failed: %m");
+
+ dest = bopen(dmp, O_WRONLY | O_CREAT | O_TRUNC, 65536);
+ bputl(dest, SDBM_DUMP_MAGIC);
+ bputl(dest, SDBM_DUMP_VERSION);
+ bputl(dest, src->page_order);
+ bputl(dest, src->key_size);
+ bputl(dest, src->val_size);
+
+ fprintf(stderr, "Dumping database...\n");
+ sdbm_rewind(src);
+ for(;;)
+ {
+ byte key[65536], val[65536];
+ int klen = sizeof(key);
+ int vlen = sizeof(val);
+ e = sdbm_get_next(src, key, &klen, val, &vlen);
+ if (!e)
+ break;
+ if (e < 0)
+ fprintf(stderr, "sdbm_get_next: error %d\n", e);
+ if (!(c++ % 1024))
+ {
+ fprintf(stderr, "%d\r", c);
+ fflush(stderr);
+ }
+ bputw(dest, klen);
+ bwrite(dest, key, klen);
+ bputw(dest, vlen);
+ bwrite(dest, val, vlen);
+ }
+
+ sdbm_close(src);
+ bclose(dest);
+ fprintf(stderr, "Dumped %d records\n", c);
+}
+
+static void
+restore(char *dmp, char *db)
+{
+ struct sdbm *dest;
+ struct fastbuf *src;
+ struct sdbm_options op;
+ int e, c=0;
+
+ src = bopen(dmp, O_RDONLY, 65536);
+ if (bgetl(src) != SDBM_DUMP_MAGIC ||
+ bgetl(src) != SDBM_DUMP_VERSION)
+ die("%s: not a sdbm dump", dmp);
+
+ bzero(&op, sizeof(op));
+ op.name = db;
+ e = unlink(op.name);
+ if (e < 0 && errno != ENOENT)
+ die("unlink: %m");
+ op.cache_size = cache;
+ op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
+ op.page_order = bgetl(src);
+ if (force_page >= 0)
+ op.page_order = force_page;
+ op.key_size = bgetl(src);
+ if (force_key >= 0)
+ op.key_size = force_key;
+ op.val_size = bgetl(src);
+ if (force_val >= 0)
+ op.val_size = force_val;
+ dest = sdbm_open(&op);
+ if (!dest)
+ die("Destination open failed");
+
+ fprintf(stderr, "Restoring database...\n");
+ for(;;)
+ {
+ byte key[65536], val[65536];
+ int klen, vlen;
+ klen = bgetw(src);
+ if (klen < 0)
+ break;
+ breadb(src, key, klen);
+ vlen = bgetw(src);
+ if (vlen < 0)
+ die("Corrupted dump file: value missing");
+ breadb(src, val, vlen);
+ if (!(c++ % 1024))
+ {
+ fprintf(stderr, "%d\r", c);
+ fflush(stderr);
+ }
+ if (sdbm_store(dest, key, klen, val, vlen) == 0)
+ fprintf(stderr, "sdbm_store: duplicate key\n");
+ }
+
+ bclose(src);
+ sdbm_close(dest);
+ fprintf(stderr, "Restored %d records\n", c);
+}
+
+static void
+rebuild(char *sdb, char *ddb)
+{
+ struct sdbm *src, *dest;
+ struct sdbm_options op;
+ int e, c=0;
+
+ bzero(&op, sizeof(op));
+ op.name = sdb;
+ op.cache_size = 16;
+ op.flags = 0;
+ src = sdbm_open(&op);
+ if (!src)
+ die("Source open failed: %m");
+
+ op.name = ddb;
+ e = unlink(op.name);
+ if (e < 0 && errno != ENOENT)
+ die("unlink: %m");
+ op.cache_size = cache;
+ op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
+ op.page_order = (force_page >= 0) ? (u32) force_page : src->root->page_order;
+ op.key_size = (force_key >= -1) ? force_key : src->root->key_size;
+ op.val_size = (force_val >= -1) ? force_val : src->root->val_size;
+ dest = sdbm_open(&op);
+ if (!dest)
+ die("Destination open failed");
+
+ fprintf(stderr, "Rebuilding database...\n");
+ sdbm_rewind(src);
+ for(;;)
+ {
+ byte key[65536], val[65536];
+ int klen = sizeof(key);
+ int vlen = sizeof(val);
+ e = sdbm_get_next(src, key, &klen, val, &vlen);
+ if (!e)
+ break;
+ if (e < 0)
+ fprintf(stderr, "sdbm_get_next: error %d\n", e);
+ if (!(c++ % 1024))
+ {
+ fprintf(stderr, "%d\r", c);
+ fflush(stderr);
+ }
+ if (sdbm_store(dest, key, klen, val, vlen) == 0)
+ fprintf(stderr, "sdbm_store: duplicate key\n");
+ }
+
+ sdbm_close(src);
+ sdbm_close(dest);
+ fprintf(stderr, "Copied %d records\n", c);
+}
+
+int
+main(int argc, char **argv)
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "vc:k:d:p:")) >= 0)
+ switch (o)
+ {
+ case 'v':
+ verbose++;
+ break;
+ case 'c':
+ cache=atol(optarg);
+ break;
+ case 'k':
+ force_key=atol(optarg);
+ break;
+ case 'd':
+ force_val=atol(optarg);
+ break;
+ case 'p':
+ force_page=atol(optarg);
+ break;
+ default:
+ bad:
+ fprintf(stderr, "Usage: db-tool [<options>] <command> <database>\n\
+\n\
+Options:\n\
+-v\t\tBe verbose\n\
+-c<n>\t\tUse cache of <n> pages\n\
+-d<n>\t\tSet data size to <n> (-1=variable) [restore,rebuild]\n\
+-k<n>\t\tSet key size to <n> (-1=variable) [restore,rebuild]\n\
+-p<n>\t\tSet page order to <n> [restore,rebuild]\n\
+\n\
+Commands:\n\
+b <db> <new>\tRebuild database\n\
+d <db> <dump>\tDump database\n\
+r <dump> <db>\tRestore database from dump\n\
+");
+ return 1;
+ }
+ argc -= optind;
+ argv += optind;
+ if (argc < 1 || strlen(argv[0]) != 1)
+ goto bad;
+
+ switch (argv[0][0])
+ {
+ case 'b':
+ if (argc != 3)
+ goto bad;
+ rebuild(argv[1], argv[2]);
+ break;
+ case 'd':
+ if (argc != 3)
+ goto bad;
+ dump(argv[1], argv[2]);
+ break;
+ case 'r':
+ if (argc != 3)
+ goto bad;
+ restore(argv[1], argv[2]);
+ break;
+ default:
+ goto bad;
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Database Management Routines
+ *
+ * (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This library uses the standard algorithm for external hashing (page directory
+ * mapping topmost K bits of hash value to page address, directory splits and
+ * so on). Peculiarities of this implementation (aka design decisions):
+ *
+ * o We allow both fixed and variable length keys and values (this includes
+ * zero size values for cases you want to represent only a set of keys).
+ * o We assume that key_size + val_size < page_size.
+ * o We never shrink the directory nor free empty pages. (The reason is that
+ * if the database was once large, it's likely it will again become large soon.)
+ * o The only pages which can be freed are those of the directory (during
+ * directory split), so we keep only a simple 32-entry free block list
+ * and we assume it's sorted.
+ * o All pointers are always given in pages from start of the file.
+ * This gives us page_size*2^32 limit for file size which should be enough.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/pagecache.h"
+#include "lib/db.h"
+#include "lib/db_internal.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define GET_PAGE(d,x) pgc_get((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define GET_ZERO_PAGE(d,x) pgc_get_zero((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define READ_PAGE(d,x) pgc_read((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define READ_DIR(d,off) pgc_read((d)->cache, (d)->fd, (((sh_off_t)(d)->root->dir_start) << (d)->page_order) + (off))
+
+struct sdbm *
+sdbm_open(struct sdbm_options *o)
+{
+ struct sdbm *d;
+ struct sdbm_root root, *r;
+ uns cache_size = o->cache_size ? o->cache_size : 16;
+
+ d = xmalloc_zero(sizeof(struct sdbm));
+ d->flags = o->flags;
+ d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666);
+ if (d->fd >= 0) /* Already exists, let's check it */
+ {
+ if (read(d->fd, &root, sizeof(root)) != sizeof(root))
+ goto bad;
+ if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION)
+ goto bad;
+ d->file_size = sh_seek(d->fd, 0, SEEK_END) >> root.page_order;
+ d->page_order = root.page_order;
+ d->page_size = 1 << root.page_order;
+ d->cache = pgc_open(d->page_size, cache_size);
+ d->root_page = pgc_read(d->cache, d->fd, 0);
+ d->root = (void *) d->root_page->data;
+ }
+ else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0)
+ {
+ struct page *q;
+ uns page_order = o->page_order;
+ if (page_order < 10)
+ page_order = 10;
+ d->page_size = 1 << page_order;
+ d->cache = pgc_open(d->page_size, cache_size);
+ d->root_page = GET_ZERO_PAGE(d, 0);
+ r = d->root = (void *) d->root_page->data; /* Build root page */
+ r->magic = SDBM_MAGIC;
+ r->version = SDBM_VERSION;
+ r->page_order = d->page_order = page_order;
+ r->key_size = o->key_size;
+ r->val_size = o->val_size;
+ r->dir_start = 1;
+ r->dir_order = 0;
+ d->file_size = 3;
+ q = GET_ZERO_PAGE(d, 1); /* Build page directory */
+ GET32(q->data, 0) = 2;
+ pgc_put(d->cache, q);
+ q = GET_ZERO_PAGE(d, 2); /* Build single data page */
+ pgc_put(d->cache, q);
+ }
+ else
+ goto bad;
+ d->dir_size = 1 << d->root->dir_order;
+ d->dir_shift = 32 - d->root->dir_order;
+ d->page_mask = d->page_size - 1;
+ d->key_size = d->root->key_size;
+ d->val_size = d->root->val_size;
+ return d;
+
+bad:
+ sdbm_close(d);
+ return NULL;
+}
+
+void
+sdbm_close(struct sdbm *d)
+{
+ if (d->root_page)
+ pgc_put(d->cache, d->root_page);
+ if (d->cache)
+ pgc_close(d->cache);
+ if (d->fd >= 0)
+ close(d->fd);
+ xfree(d);
+}
+
+static uns
+sdbm_alloc_pages(struct sdbm *d, uns number)
+{
+ uns where = d->file_size;
+ if (where + number < where) /* Wrap around? */
+ die("SDB: Database file too large, giving up");
+ d->file_size += number;
+ return where;
+}
+
+static uns
+sdbm_alloc_page(struct sdbm *d)
+{
+ uns pos;
+
+ if (!d->root->free_pool[0].count)
+ return sdbm_alloc_pages(d, 1);
+ pos = d->root->free_pool[0].first;
+ d->root->free_pool[0].first++;
+ if (!--d->root->free_pool[0].count)
+ {
+ memmove(d->root->free_pool, d->root->free_pool+1, (SDBM_NUM_FREE_PAGE_POOLS-1) * sizeof(d->root->free_pool[0]));
+ d->root->free_pool[SDBM_NUM_FREE_PAGE_POOLS-1].count = 0;
+ }
+ pgc_mark_dirty(d->cache, d->root_page);
+ return pos;
+}
+
+static void
+sdbm_free_pages(struct sdbm *d, uns start, uns number)
+{
+ uns i = 0;
+
+ while (d->root->free_pool[i].count)
+ i++;
+ ASSERT(i < SDBM_NUM_FREE_PAGE_POOLS);
+ d->root->free_pool[i].first = start;
+ d->root->free_pool[i].count = number;
+ pgc_mark_dirty(d->cache, d->root_page);
+}
+
+u32
+sdbm_hash(byte *key, uns keylen)
+{
+ /*
+ * This used to be the same hash function as GDBM uses,
+ * but it turned out that it tends to give the same results
+ * on similar keys. Damn it.
+ */
+ u32 value = 0x238F13AF * keylen;
+ while (keylen--)
+ value = 37*value + *key++;
+ return (1103515243 * value + 12345);
+}
+
+static int
+sdbm_get_entry(struct sdbm *d, byte *pos, byte **key, uns *keylen, byte **val, uns *vallen)
+{
+ byte *p = pos;
+
+ if (d->key_size >= 0)
+ *keylen = d->key_size;
+ else
+ {
+ *keylen = (p[0] << 8) | p[1];
+ p += 2;
+ }
+ *key = p;
+ p += *keylen;
+ if (d->val_size >= 0)
+ *vallen = d->val_size;
+ else
+ {
+ *vallen = (p[0] << 8) | p[1];
+ p += 2;
+ }
+ *val = p;
+ p += *vallen;
+ return p - pos;
+}
+
+static int
+sdbm_entry_len(struct sdbm *d, uns keylen, uns vallen)
+{
+ uns len = keylen + vallen;
+ if (d->key_size < 0)
+ len += 2;
+ if (d->val_size < 0)
+ len += 2;
+ return len;
+}
+
+static void
+sdbm_store_entry(struct sdbm *d, byte *pos, byte *key, uns keylen, byte *val, uns vallen)
+{
+ if (d->key_size < 0)
+ {
+ *pos++ = keylen >> 8;
+ *pos++ = keylen;
+ }
+ memmove(pos, key, keylen);
+ pos += keylen;
+ if (d->val_size < 0)
+ {
+ *pos++ = vallen >> 8;
+ *pos++ = vallen;
+ }
+ memmove(pos, val, vallen);
+}
+
+static uns
+sdbm_page_rank(struct sdbm *d, uns dirpos)
+{
+ struct page *b;
+ u32 pg, x;
+ uns l, r;
+ uns pm = d->page_mask;
+
+ b = READ_DIR(d, dirpos & ~pm);
+ pg = GET32(b->data, dirpos & pm);
+ l = dirpos;
+ while ((l & pm) && GET32(b->data, (l - 4) & pm) == pg)
+ l -= 4;
+ r = dirpos + 4;
+ /* We heavily depend on unused directory entries being zero */
+ while ((r & pm) && GET32(b->data, r & pm) == pg)
+ r += 4;
+ pgc_put(d->cache, b);
+
+ if (!(l & pm) && !(r & pm))
+ {
+ /* Note that if it spans page boundary, it must contain an integer number of pages */
+ while (l)
+ {
+ b = READ_DIR(d, (l - 4) & ~pm);
+ x = GET32(b->data, 0);
+ pgc_put(d->cache, b);
+ if (x != pg)
+ break;
+ l -= d->page_size;
+ }
+ while (r < 4*d->dir_size)
+ {
+ b = READ_DIR(d, r & ~pm);
+ x = GET32(b->data, 0);
+ pgc_put(d->cache, b);
+ if (x != pg)
+ break;
+ r += d->page_size;
+ }
+ }
+ return (r - l) >> 2;
+}
+
+static void
+sdbm_expand_directory(struct sdbm *d)
+{
+ struct page *b, *c;
+ int i, ent;
+ u32 *dir, *t;
+
+ if (d->root->dir_order >= 31)
+ die("SDB: Database directory too large, giving up");
+
+ if (4*d->dir_size < d->page_size)
+ {
+ /* It still fits within single page */
+ b = READ_DIR(d, 0);
+ dir = (u32 *) b->data;
+ for(i=d->dir_size-1; i>=0; i--)
+ dir[2*i] = dir[2*i+1] = dir[i];
+ pgc_mark_dirty(d->cache, b);
+ pgc_put(d->cache, b);
+ }
+ else
+ {
+ uns old_dir = d->root->dir_start;
+ uns old_dir_pages = 1 << (d->root->dir_order + 2 - d->page_order);
+ uns page, new_dir;
+ new_dir = d->root->dir_start = sdbm_alloc_pages(d, 2*old_dir_pages);
+ ent = 1 << (d->page_order - 3);
+ for(page=0; page < old_dir_pages; page++)
+ {
+ b = READ_PAGE(d, old_dir + page);
+ dir = (u32 *) b->data;
+ c = GET_PAGE(d, new_dir + 2*page);
+ t = (u32 *) c->data;
+ for(i=0; i<ent; i++)
+ t[2*i] = t[2*i+1] = dir[i];
+ pgc_put(d->cache, c);
+ c = GET_PAGE(d, new_dir + 2*page + 1);
+ t = (u32 *) c->data;
+ for(i=0; i<ent; i++)
+ t[2*i] = t[2*i+1] = dir[ent+i];
+ pgc_put(d->cache, c);
+ pgc_put(d->cache, b);
+ }
+ if (!(d->flags & SDBM_FAST))
+ {
+ /*
+ * Unless in super-fast mode, fill old directory pages with zeroes.
+ * This slows us down a bit, but allows database reconstruction after
+ * the free list is lost.
+ */
+ for(page=0; page < old_dir_pages; page++)
+ {
+ b = GET_ZERO_PAGE(d, old_dir + page);
+ pgc_put(d->cache, b);
+ }
+ }
+ sdbm_free_pages(d, old_dir, old_dir_pages);
+ }
+
+ d->root->dir_order++;
+ d->dir_size = 1 << d->root->dir_order;
+ d->dir_shift = 32 - d->root->dir_order;
+ pgc_mark_dirty(d->cache, d->root_page);
+ if (!(d->flags & SDBM_FAST))
+ sdbm_sync(d);
+}
+
+static void
+sdbm_split_data(struct sdbm *d, struct sdbm_bucket *s, struct sdbm_bucket *d0, struct sdbm_bucket *d1, uns sigbit)
+{
+ byte *sp = s->data;
+ byte *dp[2] = { d0->data, d1->data };
+ byte *K, *D;
+ uns Kl, Dl, sz, i;
+
+ while (sp < s->data + s->used)
+ {
+ sz = sdbm_get_entry(d, sp, &K, &Kl, &D, &Dl);
+ sp += sz;
+ i = (sdbm_hash(K, Kl) & (1 << sigbit)) ? 1 : 0;
+ sdbm_store_entry(d, dp[i], K, Kl, D, Dl);
+ dp[i] += sz;
+ }
+ d0->used = dp[0] - d0->data;
+ d1->used = dp[1] - d1->data;
+}
+
+static void
+sdbm_split_dir(struct sdbm *d, uns dirpos, uns count, uns pos)
+{
+ struct page *b;
+ uns i;
+
+ count *= 4;
+ while (count)
+ {
+ b = READ_DIR(d, dirpos & ~d->page_mask);
+ i = d->page_size - (dirpos & d->page_mask);
+ if (i > count)
+ i = count;
+ count -= i;
+ while (i)
+ {
+ GET32(b->data, dirpos & d->page_mask) = pos;
+ dirpos += 4;
+ i -= 4;
+ }
+ pgc_mark_dirty(d->cache, b);
+ pgc_put(d->cache, b);
+ }
+}
+
+static inline uns
+sdbm_dirpos(struct sdbm *d, uns hash)
+{
+ if (d->dir_shift != 32) /* avoid shifting by 32 bits */
+ return (hash >> d->dir_shift) << 2; /* offset in the directory */
+ else
+ return 0;
+}
+
+static struct page *
+sdbm_split_page(struct sdbm *d, struct page *b, u32 hash)
+{
+ struct page *p[2];
+ uns i, rank, sigbit, rank_log, dirpos, newpg;
+
+ dirpos = sdbm_dirpos(d, hash);
+ rank = sdbm_page_rank(d, dirpos); /* rank = # of pointers to this page */
+ if (rank == 1)
+ {
+ sdbm_expand_directory(d);
+ rank = 2;
+ dirpos *= 2;
+ }
+ rank_log = 1; /* rank_log = log2(rank) */
+ while ((1U << rank_log) < rank)
+ rank_log++;
+ sigbit = d->dir_shift + rank_log - 1; /* sigbit = bit we split on */
+ p[0] = b;
+ newpg = sdbm_alloc_page(d);
+ p[1] = GET_PAGE(d, newpg);
+ sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit);
+ sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, newpg);
+ pgc_mark_dirty(d->cache, p[0]);
+ i = (hash & (1 << sigbit)) ? 1 : 0;
+ pgc_put(d->cache, p[!i]);
+ return p[i];
+}
+
+static int
+sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
+{
+ if (vallen)
+ {
+ if (*vallen < Dl)
+ return 1;
+ *vallen = Dl;
+ }
+ if (val)
+ memcpy(val, D, Dl);
+ return 0;
+}
+
+static int
+sdbm_access(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen, uns mode) /* 0=read, 1=store, 2=replace */
+{
+ struct page *p, *q;
+ u32 hash, h, pos, size;
+ struct sdbm_bucket *b;
+ byte *c, *e;
+ int rc;
+
+ if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535)
+ return SDBM_ERROR_BAD_KEY_SIZE;
+ if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode)
+ return SDBM_ERROR_BAD_VAL_SIZE;
+ if (!mode && !(d->flags & SDBM_WRITE))
+ return SDBM_ERROR_READ_ONLY;
+ hash = sdbm_hash(key, keylen);
+ h = sdbm_dirpos(d, hash);
+ p = READ_DIR(d, h & ~d->page_mask);
+ pos = GET32(p->data, h & d->page_mask);
+ pgc_put(d->cache, p);
+ q = READ_PAGE(d, pos);
+ b = (void *) q->data;
+ c = b->data;
+ e = c + b->used;
+ while (c < e)
+ {
+ byte *K, *D;
+ uns Kl, Dl, s;
+ s = sdbm_get_entry(d, c, &K, &Kl, &D, &Dl);
+ if (Kl == keylen && !memcmp(K, key, Kl))
+ {
+ /* Gotcha! */
+ switch (mode)
+ {
+ case 0: /* fetch: found */
+ rc = sdbm_put_user(D, Dl, val, vallen);
+ pgc_put(d->cache, q);
+ return rc ? SDBM_ERROR_TOO_LARGE : 1;
+ case 1: /* store: already present */
+ pgc_put(d->cache, q);
+ return 0;
+ default: /* replace: delete the old one */
+ memmove(c, c+s, e-(c+s));
+ b->used -= s;
+ goto insert;
+ }
+ }
+ c += s;
+ }
+ if (!mode || !val) /* fetch or delete: no success */
+ {
+ pgc_put(d->cache, q);
+ return 0;
+ }
+
+insert:
+ if (val)
+ {
+ size = sdbm_entry_len(d, keylen, *vallen);
+ while (b->used + size > d->page_size - sizeof(struct sdbm_bucket))
+ {
+ /* Page overflow, need to split */
+ if (size >= d->page_size - sizeof(struct sdbm_bucket))
+ {
+ pgc_put(d->cache, q);
+ return SDBM_ERROR_GIANT;
+ }
+ q = sdbm_split_page(d, q, hash);
+ b = (void *) q->data;
+ }
+ sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen);
+ b->used += size;
+ }
+ pgc_mark_dirty(d->cache, q);
+ pgc_put(d->cache, q);
+ if (d->flags & SDBM_SYNC)
+ sdbm_sync(d);
+ return 1;
+}
+
+int
+sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+ return sdbm_access(d, key, keylen, val, &vallen, 1);
+}
+
+int
+sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+ return sdbm_access(d, key, keylen, val, &vallen, 2);
+}
+
+int
+sdbm_delete(struct sdbm *d, byte *key, uns keylen)
+{
+ return sdbm_access(d, key, keylen, NULL, NULL, 2);
+}
+
+int
+sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
+{
+ return sdbm_access(d, key, keylen, val, vallen, 0);
+}
+
+void
+sdbm_rewind(struct sdbm *d)
+{
+ d->find_page = 1;
+ d->find_pos = 0;
+ d->find_free_list = 0;
+}
+
+int
+sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
+{
+ uns page = d->find_page;
+ uns pos = d->find_pos;
+ byte *K, *V;
+ uns c, Kl, Vl;
+ struct page *p;
+ struct sdbm_bucket *b;
+
+ for(;;)
+ {
+ if (!pos)
+ {
+ if (page >= d->file_size)
+ break;
+ if (page == d->root->dir_start)
+ page += (4*d->dir_size + d->page_size - 1) >> d->page_order;
+ else if (page == d->root->free_pool[d->find_free_list].first)
+ page += d->root->free_pool[d->find_free_list++].count;
+ else
+ pos = 4;
+ continue;
+ }
+ p = READ_PAGE(d, page);
+ b = (void *) p->data;
+ if (pos - 4 >= b->used)
+ {
+ pos = 0;
+ page++;
+ pgc_put(d->cache, p);
+ continue;
+ }
+ c = sdbm_get_entry(d, p->data + pos, &K, &Kl, &V, &Vl);
+ d->find_page = page;
+ d->find_pos = pos + c;
+ c = sdbm_put_user(K, Kl, key, keylen) ||
+ sdbm_put_user(V, Vl, val, vallen);
+ pgc_put(d->cache, p);
+ return c ? SDBM_ERROR_TOO_LARGE : 1;
+ }
+ d->find_page = page;
+ d->find_pos = pos;
+ return 0;
+}
+
+void
+sdbm_sync(struct sdbm *d)
+{
+ pgc_flush(d->cache);
+ if (d->flags & SDBM_FSYNC)
+ fsync(d->fd);
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Database Management Routines
+ *
+ * (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_DB_H
+#define _UCW_DB_H
+
+struct sdbm;
+
+struct sdbm_options { /* Set to 0 for default */
+ char *name; /* File name */
+ uns flags; /* See SDBM_xxx below */
+ uns page_order; /* Binary logarithm of file page size */
+ uns cache_size; /* Number of cached pages */
+ int key_size; /* Key size, -1=variable */
+ int val_size; /* Value size, -1=variable */
+};
+
+struct sdbm *sdbm_open(struct sdbm_options *);
+void sdbm_close(struct sdbm *);
+int sdbm_store(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen);
+int sdbm_replace(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); /* val == NULL -> delete */
+int sdbm_delete(struct sdbm *, byte *key, uns keylen);
+int sdbm_fetch(struct sdbm *, byte *key, uns keylen, byte *val, uns *vallen); /* val can be NULL */
+void sdbm_rewind(struct sdbm *);
+int sdbm_get_next(struct sdbm *, byte *key, uns *keylen, byte *val, uns *vallen); /* val can be NULL */
+void sdbm_sync(struct sdbm *);
+u32 sdbm_hash(byte *key, uns keylen);
+
+#define SDBM_CREAT 1 /* Create the database if it doesn't exist */
+#define SDBM_WRITE 2 /* Open the database in read/write mode */
+#define SDBM_SYNC 4 /* Sync after each operation */
+#define SDBM_FAST 8 /* Don't sync on directory splits -- results in slightly faster
+ * operation, but reconstruction of database after program crash
+ * may be impossible.
+ */
+#define SDBM_FSYNC 16 /* When syncing, call fsync() */
+
+#define SDBM_ERROR_BAD_KEY_SIZE -1 /* Fixed key size doesn't match */
+#define SDBM_ERROR_BAD_VAL_SIZE -2 /* Fixed value size doesn't match */
+#define SDBM_ERROR_TOO_LARGE -3 /* Key/value doesn't fit in buffer supplied */
+#define SDBM_ERROR_READ_ONLY -4 /* Database has been opened read only */
+#define SDBM_ERROR_GIANT -5 /* Key/value too large to fit in a page */
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Database Management Routines -- Internal Declarations
+ *
+ * (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#define SDBM_NUM_FREE_PAGE_POOLS 32
+
+struct sdbm_root { /* Must fit in 1K which is minimum page size */
+ u32 magic;
+ u32 version;
+ u32 page_order; /* Binary logarithm of page size */
+ s32 key_size; /* Key/val size, -1=variable */
+ s32 val_size;
+ u32 dir_start; /* First page of the page directory */
+ u32 dir_order; /* Binary logarithm of directory size */
+ /*
+ * As we know the only thing which can be freed is the page directory
+ * and it can grow only a limited number of times, we can use a very
+ * simple-minded representation of the free page pool. We also assume
+ * these entries are sorted by start position.
+ */
+ struct {
+ u32 first;
+ u32 count;
+ } free_pool[SDBM_NUM_FREE_PAGE_POOLS];
+};
+
+struct sdbm_bucket {
+ u32 used; /* Bytes used in this bucket */
+ byte data[0];
+};
+
+struct sdbm {
+ struct page_cache *cache;
+ int fd;
+ struct sdbm_root *root;
+ struct page *root_page;
+ int key_size; /* Cached values from root page */
+ int val_size;
+ uns page_order;
+ uns page_size;
+ uns page_mask; /* page_size - 1 */
+ uns dir_size; /* Page directory size in entries */
+ uns dir_shift; /* Number of significant bits of hash function */
+ uns file_size; /* in pages */
+ uns flags;
+ uns find_page, find_pos; /* Current pointer for sdbm_find_next() */
+ uns find_free_list; /* First free list entry not skipped by sdbm_find_next() */
+};
+
+#define SDBM_MAGIC 0x5344424d
+#define SDBM_VERSION 2
+
+#define GET32(p,o) *((u32 *)((p)+(o)))
--- /dev/null
+# Configuration variables of the UCW library and their default values
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
+
+# Version of the whole package
+Set("SHERLOCK_VERSION" => "3.12.3");
+
+# Compile everything with debug information and ASSERT's
+UnSet("CONFIG_DEBUG");
+
+# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages)
+UnSet("CONFIG_EXACT_CPU");
+
+# Support files >2GB
+Set("CONFIG_LARGE_FILES");
+
+# Use shared libraries
+UnSet("CONFIG_SHARED");
+
+# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock
+# use its own regex library (a copy of the glibc one), because the default regex library
+# is likely to be crappy.
+Set("CONFIG_OWN_REGEX");
+
+# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy
+# of GNU libc's getopt. This should not be necessary on GNU libc.
+UnSet("CONFIG_OWN_GETOPT");
+
+# Install libraries and their API includes
+UnSet("CONFIG_INSTALL_API");
+
+# Build with support for multi-threaded programs
+Set("CONFIG_UCW_THREADS" => 1);
+
+# Include Perl modules
+Set("CONFIG_UCW_PERL" => 1);
+
+# Include Perl modules written in C
+UnSet("CONFIG_UCW_PERL_MODULES");
+
+# Include support utilities for shell scripts
+Set("CONFIG_UCW_SHELL_UTILS" => 1);
+
+# Default configuration file
+UnSet("DEFAULT_CONFIG");
+
+# Environment variable with configuration file
+UnSet("ENV_VAR_CONFIG");
+
+# Return success
+1;
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This allocator is optimized for intensive allocation and freeing of small
+ * blocks of identical sizes. System memory is allocated by multiples of the
+ * page size and it is returned back only when the whole eltpool is deleted.
+ *
+ * In the future, we can add returning of memory to the system and also cache
+ * coloring like in the SLAB allocator used in the Linux kernel.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/eltpool.h"
+
+struct eltpool *
+ep_new(uns elt_size, uns elts_per_chunk)
+{
+ struct eltpool *pool = xmalloc_zero(sizeof(*pool));
+ pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN);
+ pool->chunk_size = CPU_PAGE_SIZE;
+ while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size)
+ pool->chunk_size *= 2;
+ pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size;
+ DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk);
+ return pool;
+}
+
+void
+ep_delete(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch;
+ while (ch = pool->first_chunk)
+ {
+ pool->first_chunk = ch->next;
+ page_free(ch, pool->chunk_size);
+ }
+ xfree(pool);
+}
+
+void *
+ep_alloc_slow(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch = page_alloc(pool->chunk_size);
+ void *p = (void *)(ch+1);
+ for (uns i=1; i<pool->elts_per_chunk; i++)
+ {
+ struct eltpool_free *f = p;
+ f->next = pool->first_free;
+ pool->first_free = f;
+ p += pool->elt_size;
+ }
+ ch->next = pool->first_chunk;
+ pool->first_chunk = ch;
+ return p;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include "lib/clists.h"
+
+struct argh {
+ cnode n;
+ byte x[1];
+} PACKED;
+
+int main(void)
+{
+ struct eltpool *ep = ep_new(sizeof(struct argh), 64);
+ clist l;
+ clist_init(&l);
+ for (uns i=0; i<65536; i++)
+ {
+ struct argh *a = ep_alloc(ep);
+ if (i % 3)
+ clist_add_tail(&l, &a->n);
+ else
+ clist_add_head(&l, &a->n);
+ if (!(i % 5))
+ {
+ a = clist_head(&l);
+ clist_remove(&a->n);
+ ep_free(ep, a);
+ }
+ }
+ ep_delete(ep);
+ puts("OK");
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ELTPOOL_H
+#define _UCW_ELTPOOL_H
+
+struct eltpool {
+ struct eltpool_chunk *first_chunk;
+ struct eltpool_free *first_free;
+ uns elt_size;
+ uns chunk_size;
+ uns elts_per_chunk;
+ uns num_allocated; // Just for debugging
+};
+
+struct eltpool_chunk {
+ struct eltpool_chunk *next;
+ /* Chunk data continue here */
+};
+
+struct eltpool_free {
+ struct eltpool_free *next;
+};
+
+struct eltpool *ep_new(uns elt_size, uns elts_per_chunk);
+void ep_delete(struct eltpool *pool);
+void *ep_alloc_slow(struct eltpool *pool);
+
+static inline void *
+ep_alloc(struct eltpool *pool)
+{
+ pool->num_allocated++;
+#ifdef CONFIG_FAKE_ELTPOOL
+ return xmalloc(pool->elt_size);
+#else
+ struct eltpool_free *elt;
+ if (elt = pool->first_free)
+ pool->first_free = elt->next;
+ else
+ elt = ep_alloc_slow(pool);
+ return elt;
+#endif
+}
+
+static inline void
+ep_free(struct eltpool *pool, void *p)
+{
+ pool->num_allocated--;
+#ifdef CONFIG_FAKE_ELTPOOL
+ (void) pool;
+ xfree(p);
+#else
+ struct eltpool_free *elt = p;
+ elt->next = pool->first_free;
+ pool->first_free = elt;
+#endif
+}
+
+#endif
--- /dev/null
+# Tests for eltpools
+
+Run: ../obj/lib/eltpool-t
+Out: OK
--- /dev/null
+/*
+ * UCW Library -- Formatting of Process Exit Status
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <sys/wait.h>
+#include <errno.h>
+
+int
+format_exit_status(char *msg, int stat)
+{
+ if (stat < 0)
+ sprintf(msg, "failed to fork (err=%d)", errno);
+ else if (WIFEXITED(stat) && WEXITSTATUS(stat) < 256)
+ {
+ if (WEXITSTATUS(stat))
+ sprintf(msg, "died with exit code %d", WEXITSTATUS(stat));
+ else
+ {
+ msg[0] = 0;
+ return 0;
+ }
+ }
+ else if (WIFSIGNALED(stat))
+ sprintf(msg, "died on signal %d", WTERMSIG(stat));
+ else
+ sprintf(msg, "died with status %x", stat);
+ return 1;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void bclose(struct fastbuf *f)
+{
+ if (f)
+ {
+ bflush(f);
+ if (f->close)
+ f->close(f);
+ }
+}
+
+void bflush(struct fastbuf *f)
+{
+ if (f->bptr > f->bstop)
+ f->spout(f);
+ else if (f->bstop > f->buffer)
+ f->bptr = f->bstop = f->buffer;
+}
+
+inline void bsetpos(struct fastbuf *f, sh_off_t pos)
+{
+ /* We can optimize seeks only when reading */
+ if (pos >= f->pos - (f->bstop - f->buffer) && pos <= f->pos)
+ f->bptr = f->bstop + (pos - f->pos);
+ else
+ {
+ bflush(f);
+ if (!f->seek || !f->seek(f, pos, SEEK_SET))
+ die("bsetpos: stream not seekable");
+ }
+}
+
+void bseek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ switch (whence)
+ {
+ case SEEK_SET:
+ return bsetpos(f, pos);
+ case SEEK_CUR:
+ return bsetpos(f, btell(f) + pos);
+ case SEEK_END:
+ bflush(f);
+ if (!f->seek || !f->seek(f, pos, SEEK_END))
+ die("bseek: stream not seekable");
+ break;
+ default:
+ die("bseek: invalid whence=%d", whence);
+ }
+}
+
+int bgetc_slow(struct fastbuf *f)
+{
+ if (f->bptr < f->bstop)
+ return *f->bptr++;
+ if (!f->refill(f))
+ return -1;
+ return *f->bptr++;
+}
+
+int bpeekc_slow(struct fastbuf *f)
+{
+ if (f->bptr < f->bstop)
+ return *f->bptr;
+ if (!f->refill(f))
+ return -1;
+ return *f->bptr;
+}
+
+void bputc_slow(struct fastbuf *f, uns c)
+{
+ if (f->bptr >= f->bufend)
+ f->spout(f);
+ *f->bptr++ = c;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check)
+{
+ uns total = 0;
+ while (l)
+ {
+ uns k = f->bstop - f->bptr;
+
+ if (!k)
+ {
+ f->refill(f);
+ k = f->bstop - f->bptr;
+ if (!k)
+ break;
+ }
+ if (k > l)
+ k = l;
+ memcpy(b, f->bptr, k);
+ f->bptr += k;
+ b = (byte *)b + k;
+ l -= k;
+ total += k;
+ }
+ if (check && total && l)
+ die("breadb: short read");
+ return total;
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l)
+{
+ while (l)
+ {
+ uns k = f->bufend - f->bptr;
+
+ if (!k)
+ {
+ f->spout(f);
+ k = f->bufend - f->bptr;
+ }
+ if (k > l)
+ k = l;
+ memcpy(f->bptr, b, k);
+ f->bptr += k;
+ b = (byte *)b + k;
+ l -= k;
+ }
+}
+
+void
+bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+ while (l)
+ {
+ byte *fptr, *tptr;
+ uns favail, tavail, n;
+
+ favail = bdirect_read_prepare(f, &fptr);
+ if (!favail)
+ {
+ if (l == ~0U)
+ return;
+ die("bbcopy: source exhausted");
+ }
+ tavail = bdirect_write_prepare(t, &tptr);
+ n = MIN(l, favail);
+ n = MIN(n, tavail);
+ memcpy(tptr, fptr, n);
+ bdirect_read_commit(f, fptr + n);
+ bdirect_write_commit(t, tptr + n);
+ if (l != ~0U)
+ l -= n;
+ }
+}
+
+int
+bconfig(struct fastbuf *f, uns item, int value)
+{
+ return f->config ? f->config(f, item, value) : -1;
+}
+
+void
+brewind(struct fastbuf *f)
+{
+ bflush(f);
+ bsetpos(f, 0);
+}
+
+int
+bskip_slow(struct fastbuf *f, uns len)
+{
+ while (len)
+ {
+ byte *buf;
+ uns l = bdirect_read_prepare(f, &buf);
+ if (!l)
+ return 0;
+ l = MIN(l, len);
+ bdirect_read_commit(f, buf+l);
+ len -= l;
+ }
+ return 1;
+}
+
+sh_off_t
+bfilesize(struct fastbuf *f)
+{
+ if (!f)
+ return 0;
+ sh_off_t pos = btell(f);
+ bflush(f);
+ if (!f->seek(f, 0, SEEK_END))
+ return -1;
+ sh_off_t len = btell(f);
+ bsetpos(f, pos);
+ return len;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FASTBUF_H
+#define _UCW_FASTBUF_H
+
+#include <string.h>
+#include <alloca.h>
+
+/*
+ * Generic buffered I/O. You supply hooks to be called for low-level operations
+ * (swapping of buffers, seeking and closing), we do the rest.
+ *
+ * Buffer layout when reading:
+ *
+ * +----------------+---------------------------+
+ * | read data | free space |
+ * +----------------+---------------------------+
+ * ^ ^ ^ ^
+ * buffer bptr bstop bufend
+ *
+ * After the last character is read, bptr == bstop and buffer refill
+ * is deferred to the next read attempt. This gives us an easy way
+ * how to implement bungetc().
+ *
+ * When writing:
+ *
+ * +--------+--------------+--------------------+
+ * | unused | written data | free space |
+ * +--------+--------------+--------------------+
+ * ^ ^ ^ ^
+ * buffer bstop bptr bufend
+ *
+ * Dirty tricks:
+ *
+ * - You can mix reads and writes on the same stream, but you must
+ * call bflush() in between and remember that the file position
+ * points after the flushed buffer which is not necessarily the same
+ * as after the data you've read.
+ * - The spout/refill hooks can change not only bptr and bstop, but also
+ * the location of the buffer; fb-mem.c takes advantage of it.
+ * - In some cases, the user of the bdirect interface can be allowed to modify
+ * the data in the buffer to avoid unnecessary copying. If the back-end
+ * allows such modifications, it can set can_overwrite_buffer accordingly:
+ * * 0 if no modification is allowed,
+ * * 1 if the user can modify the buffer on the condition that
+ * the modifications will be undone before calling the next
+ * fastbuf operation
+ * * 2 if the user is allowed to overwrite the data in the buffer
+ * if bdirect_read_commit_modified() is called afterwards.
+ * In this case, the back-end must be prepared for trimming
+ * of the buffer which is done by the commit function.
+ */
+
+struct fastbuf {
+ byte is_fastbuf[0]; /* Dummy field for checking of type casts */
+ byte *bptr, *bstop; /* Access pointers */
+ byte *buffer, *bufend; /* Start and end of the buffer */
+ char *name; /* File name for error messages */
+ sh_off_t pos; /* Position of bstop in the file */
+ int (*refill)(struct fastbuf *); /* Get a buffer with new data */
+ void (*spout)(struct fastbuf *); /* Write buffer data to the file */
+ int (*seek)(struct fastbuf *, sh_off_t, int); /* Slow path for bseek(), buffer already flushed; returns success */
+ void (*close)(struct fastbuf *); /* Close the stream */
+ int (*config)(struct fastbuf *, uns, int); /* Configure the stream */
+ int can_overwrite_buffer; /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */
+};
+
+/* FastIO on files with several configurable back-ends */
+
+enum fb_type { /* Which back-end you want to use */
+ FB_STD, /* Standard buffered I/O */
+ FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
+ FB_MMAP /* Memory mapped files */
+};
+
+struct fb_params {
+ enum fb_type type;
+ uns buffer_size; /* 0 for default size */
+ uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
+ uns read_ahead; /* FB_DIRECT options */
+ uns write_back;
+ struct asio_queue *asio;
+};
+
+struct cf_section;
+extern struct cf_section fbpar_cf;
+extern struct fb_params fbpar_def;
+
+struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); /* Use params==NULL for defaults */
+struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params);
+struct fastbuf *bopen_tmp_file(struct fb_params *params);
+struct fastbuf *bopen_fd(int fd, struct fb_params *params);
+
+/* FastIO on standard files (shortcuts for FB_STD) */
+
+struct fastbuf *bopen(const char *name, uns mode, uns buflen);
+struct fastbuf *bopen_try(const char *name, uns mode, uns buflen);
+struct fastbuf *bopen_tmp(uns buflen);
+struct fastbuf *bfdopen(int fd, uns buflen);
+struct fastbuf *bfdopen_shared(int fd, uns buflen);
+void bfilesync(struct fastbuf *b);
+
+/* Temporary files */
+
+#define TEMP_FILE_NAME_LEN 256
+void temp_file_name(char *name);
+void bfix_tmp_file(struct fastbuf *fb, const char *name);
+
+/* Internal functions of some file back-ends */
+
+struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
+struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
+
+extern uns fbdir_cheat;
+struct asio_queue;
+struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
+
+void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
+
+/* FastIO on in-memory streams */
+
+struct fastbuf *fbmem_create(uns blocksize); /* Create stream and return its writing fastbuf */
+struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */
+
+/* FastI on file descriptors with limit */
+
+struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit);
+
+/* FastIO on static buffers */
+
+void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
+void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
+static inline uns
+fbbuf_count_written(struct fastbuf *f)
+{
+ return f->bptr - f->bstop;
+}
+
+/* FastIO on recyclable growing buffers */
+
+struct fastbuf *fbgrow_create(unsigned basic_size);
+void fbgrow_reset(struct fastbuf *b); /* Reset stream and prepare for writing */
+void fbgrow_rewind(struct fastbuf *b); /* Prepare for reading */
+
+/* FastO on memory pools */
+
+struct mempool;
+struct fbpool {
+ struct fastbuf fb;
+ struct mempool *mp;
+};
+
+void fbpool_init(struct fbpool *fb); /* Initialize a new fastbuf */
+void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
+ /* Start a new continuous block and prepare for writing (see mp_start()) */
+void *fbpool_end(struct fbpool *fb); /* Close the block and return its address (see mp_end()).
+ The length can be determined with mp_size(mp, ptr). */
+
+/* FastO with atomic writes for multi-threaded programs */
+
+struct fb_atomic {
+ struct fastbuf fb;
+ struct fb_atomic_file *af;
+ byte *expected_max_bptr;
+ uns slack_size;
+};
+#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
+
+struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
+void fbatomic_internal_write(struct fastbuf *b);
+
+static inline void
+fbatomic_commit(struct fastbuf *b)
+{
+ if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
+ fbatomic_internal_write(b);
+}
+
+/* Configuring stream parameters */
+
+enum bconfig_type {
+ BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
+ BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
+};
+
+int bconfig(struct fastbuf *f, uns type, int data);
+
+/* Universal functions working on all fastbuf's */
+
+void bclose(struct fastbuf *f);
+void bflush(struct fastbuf *f);
+void bseek(struct fastbuf *f, sh_off_t pos, int whence);
+void bsetpos(struct fastbuf *f, sh_off_t pos);
+void brewind(struct fastbuf *f);
+sh_off_t bfilesize(struct fastbuf *f); /* -1 if not seekable */
+
+static inline sh_off_t btell(struct fastbuf *f)
+{
+ return f->pos + (f->bptr - f->bstop);
+}
+
+int bgetc_slow(struct fastbuf *f);
+static inline int bgetc(struct fastbuf *f)
+{
+ return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
+}
+
+int bpeekc_slow(struct fastbuf *f);
+static inline int bpeekc(struct fastbuf *f)
+{
+ return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
+}
+
+static inline void bungetc(struct fastbuf *f)
+{
+ f->bptr--;
+}
+
+void bputc_slow(struct fastbuf *f, uns c);
+static inline void bputc(struct fastbuf *f, uns c)
+{
+ if (f->bptr < f->bufend)
+ *f->bptr++ = c;
+ else
+ bputc_slow(f, c);
+}
+
+static inline uns
+bavailr(struct fastbuf *f)
+{
+ return f->bstop - f->bptr;
+}
+
+static inline uns
+bavailw(struct fastbuf *f)
+{
+ return f->bufend - f->bptr;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
+static inline uns bread(struct fastbuf *f, void *b, uns l)
+{
+ if (bavailr(f) >= l)
+ {
+ memcpy(b, f->bptr, l);
+ f->bptr += l;
+ return l;
+ }
+ else
+ return bread_slow(f, b, l, 0);
+}
+
+static inline uns breadb(struct fastbuf *f, void *b, uns l)
+{
+ if (bavailr(f) >= l)
+ {
+ memcpy(b, f->bptr, l);
+ f->bptr += l;
+ return l;
+ }
+ else
+ return bread_slow(f, b, l, 1);
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l);
+static inline void bwrite(struct fastbuf *f, const void *b, uns l)
+{
+ if (bavailw(f) >= l)
+ {
+ memcpy(f->bptr, b, l);
+ f->bptr += l;
+ }
+ else
+ bwrite_slow(f, b, l);
+}
+
+/*
+ * Functions for reading of strings:
+ *
+ * bgets() reads a line, strip the trailing '\n' and return a pointer
+ * to the terminating 0 or NULL on EOF. Dies if the line is too long.
+ * bgets0() does the same for 0-terminated strings.
+ * bgets_nodie() a variant of bgets() which returns either the length of the
+ * string (excluding the terminator) or -1 if the line does not fit
+ * in the buffer. In such cases, it returns after reading exactly `l'
+ * bytes of input.
+ * bgets_bb() a variant of bgets() which allocates the string in a growing buffer
+ * bgets_mp() the same, but in a mempool
+ * bgets_stk() the same, but on the stack by alloca()
+ */
+
+char *bgets(struct fastbuf *f, char *b, uns l);
+char *bgets0(struct fastbuf *f, char *b, uns l);
+int bgets_nodie(struct fastbuf *f, char *b, uns l);
+
+struct mempool;
+struct bb_t;
+uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
+char *bgets_mp(struct fastbuf *f, struct mempool *mp);
+
+struct bgets_stk_struct {
+ struct fastbuf *f;
+ byte *old_buf, *cur_buf, *src;
+ uns old_len, cur_len, src_len;
+};
+void bgets_stk_init(struct bgets_stk_struct *s);
+void bgets_stk_step(struct bgets_stk_struct *s);
+#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
+
+static inline void
+bputs(struct fastbuf *f, const char *b)
+{
+ bwrite(f, b, strlen(b));
+}
+
+static inline void
+bputs0(struct fastbuf *f, const char *b)
+{
+ bwrite(f, b, strlen(b)+1);
+}
+
+static inline void
+bputsn(struct fastbuf *f, const char *b)
+{
+ bputs(f, b);
+ bputc(f, '\n');
+}
+
+void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
+static inline void
+bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+ if (bavailr(f) >= l && bavailw(t) >= l)
+ {
+ memcpy(t->bptr, f->bptr, l);
+ t->bptr += l;
+ f->bptr += l;
+ }
+ else
+ bbcopy_slow(f, t, l);
+}
+
+int bskip_slow(struct fastbuf *f, uns len);
+static inline int bskip(struct fastbuf *f, uns len)
+{
+ if (bavailr(f) >= len)
+ {
+ f->bptr += len;
+ return 1;
+ }
+ else
+ return bskip_slow(f, len);
+}
+
+/* Direct I/O on buffers */
+
+static inline uns
+bdirect_read_prepare(struct fastbuf *f, byte **buf)
+{
+ if (f->bptr == f->bstop && !f->refill(f))
+ {
+ *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings
+ return 0;
+ }
+ *buf = f->bptr;
+ return bavailr(f);
+}
+
+static inline void
+bdirect_read_commit(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+}
+
+static inline void
+bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+ f->buffer = pos; /* Avoid seeking backwards in the buffer */
+}
+
+static inline uns
+bdirect_write_prepare(struct fastbuf *f, byte **buf)
+{
+ if (f->bptr == f->bufend)
+ f->spout(f);
+ *buf = f->bptr;
+ return bavailw(f);
+}
+
+static inline void
+bdirect_write_commit(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+}
+
+/* Formatted output */
+
+int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3);
+int vbprintf(struct fastbuf *b, const char *msg, va_list args);
+
+#endif
--- /dev/null
+# Tests for fastbufs
+
+Run: ../obj/lib/fb-file-t
+Out: 112
+ <hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello>
+ 112 116
+
+Run: ../obj/lib/fb-grow-t
+Out: <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+
+Run: ../obj/lib/fb-pool-t
--- /dev/null
+/*
+ * UCW Library -- Atomic Buffered Write to Files
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This fastbuf backend is intended for cases where several threads
+ * of a single program append records to a single file and while the
+ * record can mix in an arbitrary way, the bytes inside a single
+ * record must remain uninterrupted.
+ *
+ * In case of files with fixed record size, we just allocate the
+ * buffer to hold a whole number of records and take advantage
+ * of the atomicity of the write() system call.
+ *
+ * With variable-sized records, we need another solution: when
+ * writing a record, we keep the fastbuf in a locked state, which
+ * prevents buffer flushing (and if the buffer becomes full, we extend it),
+ * and we wait for an explicit commit operation which write()s the buffer
+ * if the free space in the buffer falls below the expected maximum record
+ * length.
+ *
+ * fbatomic_open() is called with the following parameters:
+ * name - name of the file to open
+ * master - fbatomic for the master thread or NULL if it's the first open
+ * bufsize - initial buffer size
+ * record_len - record length for fixed-size records;
+ * or -(expected maximum record length) for variable-sized ones.
+ */
+
+#define LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+struct fb_atomic_file {
+ int fd;
+ int use_count;
+ int record_len;
+ uns locked;
+ byte name[1];
+};
+
+void
+fbatomic_internal_write(struct fastbuf *f)
+{
+ struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+ int size = f->bptr - f->buffer;
+ if (size)
+ {
+ ASSERT(af->record_len < 0 || !(size % af->record_len));
+ int res = write(af->fd, f->buffer, size);
+ if (res < 0)
+ die("Error writing %s: %m", f->name);
+ if (res != size)
+ die("Unexpected partial write to %s: written only %d bytes of %d", f->name, res, size);
+ f->bptr = f->buffer;
+ }
+}
+
+static void
+fbatomic_spout(struct fastbuf *f)
+{
+ if (f->bptr < f->bufend) /* Explicit flushes should be ignored */
+ return;
+
+ struct fb_atomic *F = FB_ATOMIC(f);
+ if (F->af->locked)
+ {
+ uns written = f->bptr - f->buffer;
+ uns size = f->bufend - f->buffer + F->slack_size;
+ F->slack_size *= 2;
+ DBG("Reallocating buffer for atomic file %s with slack %d", f->name, F->slack_size);
+ f->buffer = xrealloc(f->buffer, size);
+ f->bufend = f->buffer + size;
+ f->bptr = f->buffer + written;
+ F->expected_max_bptr = f->bufend - F->slack_size;
+ }
+ else
+ fbatomic_internal_write(f);
+}
+
+static void
+fbatomic_close(struct fastbuf *f)
+{
+ struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+ fbatomic_internal_write(f); /* Need to flush explicitly, because the file can be locked */
+ if (!--af->use_count)
+ {
+ close(af->fd);
+ xfree(af);
+ }
+ xfree(f);
+}
+
+struct fastbuf *
+fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len)
+{
+ struct fb_atomic *F = xmalloc_zero(sizeof(*F));
+ struct fastbuf *f = &F->fb;
+ struct fb_atomic_file *af;
+ if (master)
+ {
+ af = FB_ATOMIC(master)->af;
+ af->use_count++;
+ ASSERT(af->record_len == record_len);
+ }
+ else
+ {
+ af = xmalloc_zero(sizeof(*af) + strlen(name));
+ if ((af->fd = sh_open(name, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666)) < 0)
+ die("Cannot create %s: %m", name);
+ af->use_count = 1;
+ af->record_len = record_len;
+ af->locked = (record_len < 0);
+ strcpy(af->name, name);
+ }
+ F->af = af;
+ if (record_len > 0 && bufsize % record_len)
+ bufsize += record_len - (bufsize % record_len);
+ f->buffer = xmalloc(bufsize);
+ f->bufend = f->buffer + bufsize;
+ F->slack_size = (record_len < 0) ? -record_len : 0;
+ ASSERT(bufsize > F->slack_size);
+ F->expected_max_bptr = f->bufend - F->slack_size;
+ f->bptr = f->bstop = f->buffer;
+ f->name = af->name;
+ f->spout = fbatomic_spout;
+ f->close = fbatomic_close;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+ struct fastbuf *f, *g;
+
+ log(L_INFO, "Testing block writes");
+ f = fbatomic_open("test", NULL, 16, 4);
+ for (u32 i=0; i<17; i++)
+ bwrite(f, &i, 4);
+ bclose(f);
+
+ log(L_INFO, "Testing interleaved var-size writes");
+ f = fbatomic_open("test2", NULL, 23, -5);
+ g = fbatomic_open("test2", f, 23, -5);
+ for (int i=0; i<100; i++)
+ {
+ struct fastbuf *x = (i%2) ? g : f;
+ bprintf(x, "%c<%d>\n", "fg"[i%2], ((259309*i) % 1000000) >> (i % 8));
+ fbatomic_commit(x);
+ }
+ bclose(f);
+ bclose(g);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Static Buffers
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int
+fbbuf_refill(struct fastbuf *f UNUSED)
+{
+ return 0;
+}
+
+static int
+fbbuf_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ /* Somebody might want to seek to the end of buffer, try to be nice to him. */
+ sh_off_t len = f->bufend - f->buffer;
+ if (whence == SEEK_END)
+ pos += len;
+ ASSERT(pos >= 0 && pos <= len);
+ f->bptr = f->buffer + pos;
+ f->bstop = f->bufend;
+ f->pos = len;
+ return 1;
+}
+
+void
+fbbuf_init_read(struct fastbuf *f, byte *buf, uns size, uns can_overwrite)
+{
+ f->buffer = f->bptr = buf;
+ f->bstop = f->bufend = buf + size;
+ f->name = "fbbuf-read";
+ f->pos = size;
+ f->refill = fbbuf_refill;
+ f->spout = NULL;
+ f->seek = fbbuf_seek;
+ f->close = NULL;
+ f->config = NULL;
+ f->can_overwrite_buffer = can_overwrite;
+}
+
+static void
+fbbuf_spout(struct fastbuf *f UNUSED)
+{
+ die("fbbuf: buffer overflow on write");
+}
+
+void
+fbbuf_init_write(struct fastbuf *f, byte *buf, uns size)
+{
+ f->buffer = f->bstop = f->bptr = buf;
+ f->bufend = buf + size;
+ f->name = "fbbuf-write";
+ f->pos = size;
+ f->refill = NULL;
+ f->spout = fbbuf_spout;
+ f->seek = NULL;
+ f->close = NULL;
+ f->config = NULL;
+ f->can_overwrite_buffer = 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on O_DIRECT Files
+ *
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a fastbuf backend for fast streaming I/O using O_DIRECT and
+ * the asynchronous I/O module. It's designed for use on large files
+ * which don't fit in the disk cache.
+ *
+ * CAVEATS:
+ *
+ * - All operations with a single fbdirect handle must be done
+ * within a single thread, unless you provide a custom I/O queue
+ * and take care of locking.
+ *
+ * FIXME: what if the OS doesn't support O_DIRECT?
+ * FIXME: unaligned seeks and partial writes?
+ * FIXME: append to unaligned file
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/asio.h"
+#include "lib/conf.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+uns fbdir_cheat;
+
+static struct cf_section fbdir_cf = {
+ CF_ITEMS {
+ CF_UNS("Cheat", &fbdir_cheat),
+ CF_END
+ }
+};
+
+#define FBDIR_ALIGN 512
+
+enum fbdir_mode { // Current operating mode
+ M_NULL,
+ M_READ,
+ M_WRITE
+};
+
+struct fb_direct {
+ struct fastbuf fb;
+ int fd; // File descriptor
+ int is_temp_file;
+ struct asio_queue *io_queue; // I/O queue to use
+ struct asio_queue *user_queue; // If io_queue was supplied by the user
+ struct asio_request *pending_read;
+ struct asio_request *done_read;
+ struct asio_request *active_buffer;
+ enum fbdir_mode mode;
+ byte name[0];
+};
+#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
+
+static void CONSTRUCTOR
+fbdir_global_init(void)
+{
+ cf_declare_section("FBDirect", &fbdir_cf, 0);
+}
+
+static void
+fbdir_read_sync(struct fb_direct *F)
+{
+ while (F->pending_read)
+ {
+ struct asio_request *r = asio_wait(F->io_queue);
+ ASSERT(r);
+ struct fb_direct *G = r->user_data;
+ ASSERT(G);
+ ASSERT(G->pending_read == r && !G->done_read);
+ G->pending_read = NULL;
+ G->done_read = r;
+ }
+}
+
+static void
+fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
+{
+ if (F->mode == mode)
+ return;
+ DBG("FB-DIRECT: Switching mode to %d", mode);
+ switch (F->mode)
+ {
+ case M_NULL:
+ break;
+ case M_READ:
+ fbdir_read_sync(F); // Wait for read-ahead requests to finish
+ if (F->done_read) // Return read-ahead requests if any
+ {
+ asio_put(F->done_read);
+ F->done_read = NULL;
+ }
+ break;
+ case M_WRITE:
+ asio_sync(F->io_queue); // Wait for pending writebacks
+ break;
+ }
+ if (F->active_buffer)
+ {
+ asio_put(F->active_buffer);
+ F->active_buffer = NULL;
+ }
+ F->mode = mode;
+}
+
+static void
+fbdir_submit_read(struct fb_direct *F)
+{
+ struct asio_request *r = asio_get(F->io_queue);
+ r->fd = F->fd;
+ r->op = ASIO_READ;
+ r->len = F->io_queue->buffer_size;
+ r->user_data = F;
+ asio_submit(r);
+ F->pending_read = r;
+}
+
+static int
+fbdir_refill(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Refill");
+
+ if (!F->done_read)
+ {
+ if (!F->pending_read)
+ {
+ fbdir_change_mode(F, M_READ);
+ fbdir_submit_read(F);
+ }
+ fbdir_read_sync(F);
+ ASSERT(F->done_read);
+ }
+
+ struct asio_request *r = F->done_read;
+ F->done_read = NULL;
+ if (F->active_buffer)
+ asio_put(F->active_buffer);
+ F->active_buffer = r;
+ if (!r->status)
+ return 0;
+ if (r->status < 0)
+ die("Error reading %s: %s", f->name, strerror(r->returned_errno));
+ f->bptr = f->buffer = r->buffer;
+ f->bstop = f->bufend = f->buffer + r->status;
+ f->pos += r->status;
+
+ fbdir_submit_read(F); // Read-ahead the next block
+
+ return r->status;
+}
+
+static void
+fbdir_spout(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+ struct asio_request *r;
+
+ DBG("FB-DIRECT: Spout");
+
+ fbdir_change_mode(F, M_WRITE);
+ r = F->active_buffer;
+ if (r && f->bptr > f->bstop)
+ {
+ r->op = ASIO_WRITE_BACK;
+ r->fd = F->fd;
+ r->len = f->bptr - f->bstop;
+ ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
+ f->pos += r->len;
+ if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes
+ {
+ r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
+ asio_submit(r);
+ asio_sync(F->io_queue);
+ DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos);
+ if (sh_ftruncate(F->fd, f->pos) < 0)
+ die("Error truncating %s: %m", f->name);
+ }
+ else
+ asio_submit(r);
+ r = NULL;
+ }
+ if (!r)
+ r = asio_get(F->io_queue);
+ f->bstop = f->bptr = f->buffer = r->buffer;
+ f->bufend = f->buffer + F->io_queue->buffer_size;
+ F->active_buffer = r;
+}
+
+static int
+fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence);
+
+ if (whence == SEEK_SET && pos == f->pos)
+ return 1;
+
+ fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish
+ sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence);
+ if (l < 0)
+ return 0;
+ f->pos = l;
+ return 1;
+}
+
+static struct asio_queue *
+fbdir_get_io_queue(uns buffer_size, uns write_back)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ if (!q)
+ {
+ q = xmalloc_zero(sizeof(struct asio_queue));
+ q->buffer_size = buffer_size;
+ q->max_writebacks = write_back;
+ asio_init_queue(q);
+ ctx->io_queue = q;
+ }
+ q->use_count++;
+ DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
+ return q;
+}
+
+static void
+fbdir_put_io_queue(void)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ ASSERT(q);
+ DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
+ if (!--q->use_count)
+ {
+ asio_cleanup_queue(q);
+ xfree(q);
+ ctx->io_queue = NULL;
+ }
+}
+
+static void
+fbdir_close(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Close");
+
+ fbdir_change_mode(F, M_NULL);
+ if (!F->user_queue)
+ fbdir_put_io_queue();
+
+ bclose_file_helper(f, F->fd, F->is_temp_file);
+ xfree(f);
+}
+
+static int
+fbdir_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_DIRECT(f)->is_temp_file;
+ FB_DIRECT(f)->is_temp_file = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ DBG("FB-DIRECT: Open");
+ bzero(F, sizeof(*F));
+ f->name = F->name;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ if (q)
+ F->io_queue = F->user_queue = q;
+ else
+ F->io_queue = fbdir_get_io_queue(buffer_size, write_back);
+ f->refill = fbdir_refill;
+ f->spout = fbdir_spout;
+ f->seek = fbdir_seek;
+ f->close = fbdir_close;
+ f->config = fbdir_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+ struct fb_params par = { .type = FB_DIRECT };
+ struct fastbuf *f, *t;
+
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+ f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par);
+ t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par);
+
+ bbcopy(f, t, ~0U);
+ ASSERT(btell(f) == btell(t));
+
+#if 0 // This triggers unaligned write
+ bflush(t);
+ bputc(t, '\n');
+#endif
+
+ brewind(t);
+ bgetc(t);
+ ASSERT(btell(t) == 1);
+
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Files
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+struct fb_file {
+ struct fastbuf fb;
+ int fd; /* File descriptor */
+ int is_temp_file;
+ int keep_back_buf; /* Optimize for backwards reading */
+ sh_off_t wpos; /* Real file position */
+ uns wlen; /* Window size */
+};
+#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1)
+
+static int
+bfd_refill(struct fastbuf *f)
+{
+ struct fb_file *F = FB_FILE(f);
+ byte *read_ptr = (f->buffer = FB_BUFFER(f));
+ uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen;
+ /* Forward or no seek */
+ if (F->wpos <= f->pos)
+ {
+ sh_off_t diff = f->pos - F->wpos;
+ /* Formula for long forward seeks (prefer lseek()) */
+ if (diff > ((sh_off_t)blen << 2))
+ {
+long_seek:
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ goto seek;
+ }
+ /* Short forward seek (prefer read() to skip data )*/
+ else if ((uns)diff >= back)
+ {
+ uns skip = diff - back;
+ F->wpos += skip;
+ while (skip)
+ {
+ int l = read(F->fd, f->buffer, MIN(skip, blen));
+ if (unlikely(l <= 0))
+ if (l < 0)
+ die("Error reading %s: %m", f->name);
+ else
+ {
+ F->wpos -= skip;
+ goto eof;
+ }
+ skip -= l;
+ }
+ }
+ /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */
+ else
+ {
+ uns keep = back - (uns)diff;
+ if (keep >= F->wlen)
+ back = diff + (keep = F->wlen);
+ else
+ memmove(f->buffer, f->buffer + F->wlen - keep, keep);
+ read_len -= keep;
+ read_ptr += keep;
+ }
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ }
+ /* Backwards seek */
+ else
+ {
+ sh_off_t diff = F->wpos - f->pos;
+ /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */
+ if (diff > ((sh_off_t)blen << 1))
+ {
+ if ((sh_off_t)back > f->pos)
+ back = f->pos;
+ goto long_seek;
+ }
+ /* Seek into previous window (do nothing... for example brewind) */
+ else if ((uns)diff <= F->wlen)
+ {
+ f->bstop = f->buffer + F->wlen;
+ f->bptr = f->bstop - diff;
+ f->pos = F->wpos;
+ return 1;
+ }
+ back *= 3;
+ if ((sh_off_t)back > f->pos)
+ back = f->pos;
+ f->bptr = f->buffer + back;
+ read_len = blen;
+ f->bstop = f->buffer + read_len;
+ /* Reuse part of previous window */
+ if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen)
+ {
+ uns keep = read_len + F->wlen - back - diff;
+ memmove(f->buffer + read_len - keep, f->buffer, keep);
+ }
+seek:
+ /* Do lseek() */
+ F->wpos = f->pos + (f->buffer - f->bptr);
+ if (sh_seek(F->fd, F->wpos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+ }
+ /* Read (part of) buffer */
+ do
+ {
+ int l = read(F->fd, read_ptr, read_len);
+ if (unlikely(l < 0))
+ die("Error reading %s: %m", f->name);
+ if (!l)
+ if (unlikely(read_ptr < f->bptr))
+ goto eof;
+ else
+ break; /* Incomplete read because of EOF */
+ read_ptr += l;
+ read_len -= l;
+ F->wpos += l;
+ }
+ while (read_ptr <= f->bptr);
+ if (read_len)
+ f->bstop = read_ptr;
+ f->pos += f->bstop - f->bptr;
+ F->wlen = f->bstop - f->buffer;
+ return f->bstop - f->bptr;
+eof:
+ /* Seeked behind EOF */
+ f->bptr = f->bstop = f->buffer;
+ F->wlen = 0;
+ return 0;
+}
+
+static void
+bfd_spout(struct fastbuf *f)
+{
+ /* Do delayed lseek() if needed */
+ if (FB_FILE(f)->wpos != f->pos && sh_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+
+ int l = f->bptr - f->buffer;
+ byte *c = f->buffer;
+
+ /* Write the buffer */
+ FB_FILE(f)->wpos = (f->pos += l);
+ FB_FILE(f)->wlen = 0;
+ while (l)
+ {
+ int z = write(FB_FILE(f)->fd, c, l);
+ if (z <= 0)
+ die("Error writing %s: %m", f->name);
+ l -= z;
+ c += z;
+ }
+ f->bptr = f->buffer = FB_BUFFER(f);
+}
+
+static int
+bfd_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */
+ sh_off_t l;
+ switch (whence)
+ {
+ case SEEK_SET:
+ f->pos = pos;
+ return 1;
+ case SEEK_CUR:
+ l = f->pos + pos;
+ if ((pos > 0) ^ (l > f->pos))
+ return 0;
+ f->pos = l;
+ return 1;
+ case SEEK_END:
+ l = sh_seek(FB_FILE(f)->fd, pos, SEEK_END);
+ if (l < 0)
+ return 0;
+ FB_FILE(f)->wpos = f->pos = l;
+ FB_FILE(f)->wlen = 0;
+ return 1;
+ default:
+ ASSERT(0);
+ }
+}
+
+static void
+bfd_close(struct fastbuf *f)
+{
+ bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file);
+ xfree(f);
+}
+
+static int
+bfd_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_FILE(f)->is_temp_file;
+ FB_FILE(f)->is_temp_file = value;
+ return orig;
+ case BCONFIG_KEEP_BACK_BUF:
+ orig = FB_FILE(f)->keep_back_buf;
+ FB_FILE(f)->keep_back_buf = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+bfdopen_internal(int fd, const char *name, uns buflen)
+{
+ ASSERT(buflen);
+ int namelen = strlen(name) + 1;
+ struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->buffer = (byte *)(F+1);
+ f->bptr = f->bstop = f->buffer;
+ f->bufend = f->buffer + buflen;
+ f->name = f->bufend;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ f->refill = bfd_refill;
+ f->spout = bfd_spout;
+ f->seek = bfd_seek;
+ f->close = bfd_close;
+ f->config = bfd_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+void
+bfilesync(struct fastbuf *b)
+{
+ bflush(b);
+ if (fsync(FB_FILE(b)->fd) < 0)
+ msg(L_ERROR, "fsync(%s) failed: %m", b->name);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *f, *t;
+ f = bopen_tmp(16);
+ t = bfdopen_shared(1, 13);
+ for (uns i = 0; i < 16; i++)
+ bwrite(f, "<hello>", 7);
+ bprintf(t, "%d\n", (int)btell(f));
+ brewind(f);
+ bbcopy(f, t, ~0U);
+ bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t));
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Growing Buffers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct fb_gbuf {
+ struct fastbuf fb;
+ byte *last_written;
+};
+#define FB_GBUF(f) ((struct fb_gbuf *)(f)->is_fastbuf)
+
+static int
+fbgrow_refill(struct fastbuf *b)
+{
+ if (b->bstop != FB_GBUF(b)->last_written)
+ {
+ /* There was an intervening flush */
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = b->bstop - b->buffer;
+ return 1;
+ }
+ /* We are at the end */
+ return 0;
+}
+
+static void
+fbgrow_spout(struct fastbuf *b)
+{
+ if (b->bptr >= b->bufend)
+ {
+ uns len = b->bufend - b->buffer;
+ b->buffer = xrealloc(b->buffer, 2*len);
+ b->bufend = b->buffer + 2*len;
+ b->bstop = b->buffer;
+ b->bptr = b->buffer + len;
+ }
+}
+
+static int
+fbgrow_seek(struct fastbuf *b, sh_off_t pos, int whence)
+{
+ ASSERT(FB_GBUF(b)->last_written); /* Seeks allowed only in read mode */
+ sh_off_t len = FB_GBUF(b)->last_written - b->buffer;
+ if (whence == SEEK_END)
+ pos += len;
+ ASSERT(pos >= 0 && pos <= len);
+ b->bptr = b->buffer + pos;
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = len;
+ return 1;
+}
+
+static void
+fbgrow_close(struct fastbuf *b)
+{
+ xfree(b->buffer);
+ xfree(b);
+}
+
+struct fastbuf *
+fbgrow_create(unsigned basic_size)
+{
+ struct fastbuf *b = xmalloc_zero(sizeof(struct fb_gbuf));
+ b->buffer = xmalloc(basic_size);
+ b->bufend = b->buffer + basic_size;
+ b->bptr = b->bstop = b->buffer;
+ b->name = "<fbgbuf>";
+ b->refill = fbgrow_refill;
+ b->spout = fbgrow_spout;
+ b->seek = fbgrow_seek;
+ b->close = fbgrow_close;
+ b->can_overwrite_buffer = 1;
+ return b;
+}
+
+void
+fbgrow_reset(struct fastbuf *b)
+{
+ b->bptr = b->bstop = b->buffer;
+ b->pos = 0;
+ FB_GBUF(b)->last_written = NULL;
+}
+
+void
+fbgrow_rewind(struct fastbuf *b)
+{
+ if (!FB_GBUF(b)->last_written)
+ {
+ /* Last operation was a write, so remember the end position */
+ FB_GBUF(b)->last_written = b->bptr;
+ }
+ b->bptr = b->buffer;
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = b->bstop - b->buffer;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *f;
+ uns t;
+
+ f = fbgrow_create(3);
+ for (uns i=0; i<5; i++)
+ {
+ fbgrow_reset(f);
+ bwrite(f, "12345", 5);
+ bwrite(f, "12345", 5);
+ printf("<%d>", (int)btell(f));
+ bflush(f);
+ printf("<%d>", (int)btell(f));
+ fbgrow_rewind(f);
+ printf("<%d>", (int)btell(f));
+ while ((t = bgetc(f)) != ~0U)
+ putchar(t);
+ printf("<%d>", (int)btell(f));
+ fbgrow_rewind(f);
+ bseek(f, -1, SEEK_END);
+ printf("<%d>", (int)btell(f));
+ while ((t = bgetc(f)) != ~0U)
+ putchar(t);
+ printf("<%d>\n", (int)btell(f));
+ }
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered Input on Limited File Descriptors
+ *
+ * (c) 2003--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <unistd.h>
+
+struct fb_limfd {
+ struct fastbuf fb;
+ int fd; /* File descriptor */
+ int limit;
+};
+#define FB_LIMFD(f) ((struct fb_limfd *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_LIMFD(f) + 1)
+
+static int
+bfl_refill(struct fastbuf *f)
+{
+ f->bptr = f->buffer = FB_BUFFER(f);
+ int max = MIN(FB_LIMFD(f)->limit - f->pos, f->bufend - f->buffer);
+ int l = read(FB_LIMFD(f)->fd, f->buffer, max);
+ if (l < 0)
+ die("Error reading %s: %m", f->name);
+ f->bstop = f->buffer + l;
+ f->pos += l;
+ return l;
+}
+
+static void
+bfl_close(struct fastbuf *f)
+{
+ xfree(f);
+}
+
+struct fastbuf *
+bopen_limited_fd(int fd, uns buflen, uns limit)
+{
+ struct fb_limfd *F = xmalloc(sizeof(struct fb_limfd) + buflen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->buffer = (char *)(F+1);
+ f->bptr = f->bstop = f->buffer;
+ f->bufend = f->buffer + buflen;
+ f->name = "limited-fd";
+ F->fd = fd;
+ F->limit = limit;
+ f->refill = bfl_refill;
+ f->close = bfl_close;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ struct fastbuf *f = bopen_limited_fd(0, 3, 13);
+ struct fastbuf *o = bfdopen_shared(1, 16);
+ int c;
+ while ((c = bgetc(f)) >= 0)
+ bputc(o, c);
+ bclose(o);
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory Streams
+ *
+ * (c) 1997--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+
+struct memstream {
+ unsigned blocksize;
+ unsigned uc;
+ struct msblock *first;
+};
+
+struct msblock {
+ struct msblock *next;
+ sh_off_t pos;
+ unsigned size;
+ byte data[0];
+};
+
+struct fb_mem {
+ struct fastbuf fb;
+ struct memstream *stream;
+ struct msblock *block;
+};
+#define FB_MEM(f) ((struct fb_mem *)(f)->is_fastbuf)
+
+static int
+fbmem_refill(struct fastbuf *f)
+{
+ struct memstream *s = FB_MEM(f)->stream;
+ struct msblock *b = FB_MEM(f)->block;
+
+ if (!b)
+ {
+ b = s->first;
+ if (!b)
+ return 0;
+ }
+ else if (f->buffer == b->data && f->bstop < b->data + b->size)
+ {
+ f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ return 1;
+ }
+ else if (!b->next)
+ return 0;
+ else
+ b = b->next;
+ if (!b->size)
+ return 0;
+ f->buffer = f->bptr = b->data;
+ f->bufend = f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ FB_MEM(f)->block = b;
+ return 1;
+}
+
+static void
+fbmem_spout(struct fastbuf *f)
+{
+ struct memstream *s = FB_MEM(f)->stream;
+ struct msblock *b = FB_MEM(f)->block;
+ struct msblock *bb;
+
+ if (b)
+ {
+ b->size = f->bptr - b->data;
+ if (b->size < s->blocksize)
+ return;
+ }
+ bb = xmalloc(sizeof(struct msblock) + s->blocksize);
+ if (b)
+ {
+ b->next = bb;
+ bb->pos = b->pos + b->size;
+ }
+ else
+ {
+ s->first = bb;
+ bb->pos = 0;
+ }
+ bb->next = NULL;
+ bb->size = 0;
+ f->buffer = f->bptr = f->bstop = bb->data;
+ f->bufend = bb->data + s->blocksize;
+ f->pos = bb->pos;
+ FB_MEM(f)->block = bb;
+}
+
+static int
+fbmem_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ struct memstream *m = FB_MEM(f)->stream;
+ struct msblock *b;
+
+ ASSERT(whence == SEEK_SET || whence == SEEK_END);
+ if (whence == SEEK_END)
+ {
+ for (b=m->first; b; b=b->next)
+ pos += b->size;
+ }
+ /* Yes, this is linear. But considering the average number of buckets, it doesn't matter. */
+ for (b=m->first; b; b=b->next)
+ {
+ if (pos <= b->pos + (sh_off_t)b->size) /* <=, because we need to be able to seek just after file end */
+ {
+ f->buffer = b->data;
+ f->bptr = b->data + (pos - b->pos);
+ f->bufend = f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ FB_MEM(f)->block = b;
+ return 1;
+ }
+ }
+ if (!m->first && !pos)
+ {
+ /* Seeking to offset 0 in an empty file needs an exception */
+ f->buffer = f->bptr = f->bufend = NULL;
+ f->pos = 0;
+ FB_MEM(f)->block = NULL;
+ return 1;
+ }
+ die("fbmem_seek to invalid offset");
+}
+
+static void
+fbmem_close(struct fastbuf *f)
+{
+ struct memstream *m = FB_MEM(f)->stream;
+ struct msblock *b;
+
+ if (!--m->uc)
+ {
+ while (b = m->first)
+ {
+ m->first = b->next;
+ xfree(b);
+ }
+ xfree(m);
+ }
+ xfree(f);
+}
+
+struct fastbuf *
+fbmem_create(unsigned blocksize)
+{
+ struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+ struct memstream *s = xmalloc_zero(sizeof(struct memstream));
+
+ s->blocksize = blocksize;
+ s->uc = 1;
+
+ FB_MEM(f)->stream = s;
+ f->name = "<fbmem-write>";
+ f->spout = fbmem_spout;
+ f->close = fbmem_close;
+ return f;
+}
+
+struct fastbuf *
+fbmem_clone_read(struct fastbuf *b)
+{
+ struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+ struct memstream *s = FB_MEM(b)->stream;
+
+ bflush(b);
+ s->uc++;
+
+ FB_MEM(f)->stream = s;
+ f->name = "<fbmem-read>";
+ f->refill = fbmem_refill;
+ f->seek = fbmem_seek;
+ f->close = fbmem_close;
+ f->can_overwrite_buffer = 1;
+ return f;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *w, *r;
+ int t;
+
+ w = fbmem_create(7);
+ r = fbmem_clone_read(w);
+ bwrite(w, "12345", 5);
+ bwrite(w, "12345", 5);
+ printf("<%d>", (int)btell(w));
+ bflush(w);
+ printf("<%d>", (int)btell(w));
+ printf("<%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ printf("<%d>", (int)btell(r));
+ bwrite(w, "12345", 5);
+ bwrite(w, "12345", 5);
+ printf("<%d>", (int)btell(w));
+ bclose(w);
+ bsetpos(r, 0);
+ printf("<!%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ bsetpos(r, 3);
+ printf("<!%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ fflush(stdout);
+ bclose(r);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory-Mapped Files
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/conf.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+static uns mmap_window_size = 16*CPU_PAGE_SIZE;
+static uns mmap_extend_size = 4*CPU_PAGE_SIZE;
+
+static struct cf_section fbmm_config = {
+ CF_ITEMS {
+ CF_UNS("WindowSize", &mmap_window_size),
+ CF_UNS("ExtendSize", &mmap_extend_size),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR fbmm_init_config(void)
+{
+ cf_declare_section("FBMMap", &fbmm_config, 0);
+}
+
+struct fb_mmap {
+ struct fastbuf fb;
+ int fd;
+ int is_temp_file;
+ sh_off_t file_size;
+ sh_off_t file_extend;
+ sh_off_t window_pos;
+ uns window_size;
+ int mode;
+};
+#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf)
+
+static void
+bfmm_map_window(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+ sh_off_t pos0 = f->pos & ~(sh_off_t)(CPU_PAGE_SIZE-1);
+ int l = MIN((sh_off_t)mmap_window_size, F->file_extend - pos0);
+ uns ll = ALIGN_TO(l, CPU_PAGE_SIZE);
+ int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE);
+
+ DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend);
+ if (ll != F->window_size && f->buffer)
+ {
+ munmap(f->buffer, F->window_size);
+ f->buffer = NULL;
+ }
+ F->window_size = ll;
+ if (!f->buffer)
+ f->buffer = sh_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0);
+ else
+ f->buffer = sh_mmap(f->buffer, ll, prot, MAP_SHARED | MAP_FIXED, F->fd, pos0);
+ if (f->buffer == (byte *) MAP_FAILED)
+ die("mmap(%s): %m", f->name);
+#ifdef MADV_SEQUENTIAL
+ if (ll > CPU_PAGE_SIZE)
+ madvise(f->buffer, ll, MADV_SEQUENTIAL);
+#endif
+ f->bufend = f->buffer + l;
+ f->bptr = f->buffer + (f->pos - pos0);
+ F->window_pos = pos0;
+}
+
+static int
+bfmm_refill(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+
+ DBG("Refill <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+ if (f->pos >= F->file_size)
+ return 0;
+ if (f->bstop >= f->bufend)
+ bfmm_map_window(f);
+ if (F->window_pos + (f->bufend - f->buffer) > F->file_size)
+ f->bstop = f->buffer + (F->file_size - F->window_pos);
+ else
+ f->bstop = f->bufend;
+ f->pos = F->window_pos + (f->bstop - f->buffer);
+ DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+ return 1;
+}
+
+static void
+bfmm_spout(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+ sh_off_t end = f->pos + (f->bptr - f->bstop);
+
+ DBG("Spout <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+ if (end > F->file_size)
+ F->file_size = end;
+ if (f->bptr < f->bufend)
+ return;
+ f->pos = end;
+ if (f->pos >= F->file_extend)
+ {
+ F->file_extend = ALIGN_TO(F->file_extend + mmap_extend_size, (sh_off_t)CPU_PAGE_SIZE);
+ if (sh_ftruncate(F->fd, F->file_extend))
+ die("ftruncate(%s): %m", f->name);
+ }
+ bfmm_map_window(f);
+ f->bstop = f->bptr;
+ DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+}
+
+static int
+bfmm_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ if (whence == SEEK_END)
+ pos += FB_MMAP(f)->file_size;
+ else
+ ASSERT(whence == SEEK_SET);
+ ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size);
+ f->pos = pos;
+ f->bptr = f->bstop = f->bufend = f->buffer; /* force refill/spout call */
+ DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+ return 1;
+}
+
+static void
+bfmm_close(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+
+ if (f->buffer)
+ munmap(f->buffer, F->window_size);
+ if (F->file_extend > F->file_size &&
+ sh_ftruncate(F->fd, F->file_size))
+ die("ftruncate(%s): %m", f->name);
+ bclose_file_helper(f, F->fd, F->is_temp_file);
+ xfree(f);
+}
+
+static int
+bfmm_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_MMAP(f)->is_temp_file;
+ FB_MMAP(f)->is_temp_file = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+bfmmopen_internal(int fd, const char *name, uns mode)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->name = (byte *)(F+1);
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ F->file_extend = F->file_size = sh_seek(fd, 0, SEEK_END);
+ if (F->file_size < 0)
+ die("seek(%s): %m", name);
+ if (mode & O_APPEND)
+ f->pos = F->file_size;
+ F->mode = mode;
+
+ f->refill = bfmm_refill;
+ f->spout = bfmm_spout;
+ f->seek = bfmm_seek;
+ f->close = bfmm_close;
+ f->config = bfmm_config;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ struct fb_params par = { .type = FB_MMAP };
+ struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par);
+ struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par);
+ int c;
+
+ DBG("Copying");
+ while ((c = bgetc(f)) >= 0)
+ bputc(g, c);
+ bclose(f);
+ DBG("Seek inside last block");
+ bsetpos(g, btell(g)-1333);
+ bputc(g, 13);
+ DBG("Seek to the beginning & write");
+ bsetpos(g, 1333);
+ bputc(g, 13);
+ DBG("flush");
+ bflush(g);
+ bputc(g, 13);
+ bflush(g);
+ DBG("Seek nearby & read");
+ bsetpos(g, 133);
+ bgetc(g);
+ DBG("Seek far & read");
+ bsetpos(g, 133333);
+ bgetc(g);
+ DBG("Closing");
+ bclose(g);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- FastIO on files with run-time parametrization
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/lfs.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+
+struct fb_params fbpar_def = {
+ .buffer_size = 65536,
+ .read_ahead = 1,
+ .write_back = 1,
+};
+
+static char *
+fbpar_cf_commit(struct fb_params *p UNUSED)
+{
+#ifndef CONFIG_UCW_THREADS
+ if (p->type == FB_DIRECT)
+ return "Direct I/O is supported only with CONFIG_UCW_THREADS";
+#endif
+ return NULL;
+}
+
+struct cf_section fbpar_cf = {
+# define F(x) PTR_TO(struct fb_params, x)
+ CF_TYPE(struct fb_params),
+ CF_COMMIT(fbpar_cf_commit),
+ CF_ITEMS {
+ CF_LOOKUP("Type", (int *)F(type), ((char *[]){"std", "direct", "mmap", NULL})),
+ CF_UNS("BufSize", F(buffer_size)),
+ CF_UNS("KeepBackBuf", F(keep_back_buf)),
+ CF_UNS("ReadAhead", F(read_ahead)),
+ CF_UNS("WriteBack", F(write_back)),
+ CF_END
+ }
+# undef F
+};
+
+static struct cf_section fbpar_global_cf = {
+ CF_ITEMS {
+ CF_SECTION("Defaults", &fbpar_def, &fbpar_cf),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+fbpar_global_init(void)
+{
+ cf_declare_section("FBParam", &fbpar_global_cf, 0);
+}
+
+static struct fastbuf *
+bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name)
+{
+ char buf[32];
+ if (!name)
+ {
+ sprintf(buf, "fd%d", fd);
+ name = buf;
+ }
+ struct fastbuf *fb;
+ switch (params->type)
+ {
+#ifdef CONFIG_UCW_THREADS
+ case FB_DIRECT:
+ fb = fbdir_open_fd_internal(fd, name, params->asio,
+ params->buffer_size ? : fbpar_def.buffer_size,
+ params->read_ahead ? : fbpar_def.read_ahead,
+ params->write_back ? : fbpar_def.write_back);
+ if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0)
+ msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
+ return fb;
+#endif
+ case FB_STD:
+ fb = bfdopen_internal(fd, name,
+ params->buffer_size ? : fbpar_def.buffer_size);
+ if (params->keep_back_buf)
+ bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1);
+ return fb;
+ case FB_MMAP:
+ if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0)
+ die("Cannot get flags of fd %d: %m", fd);
+ return bfmmopen_internal(fd, name, mode);
+ default:
+ ASSERT(0);
+ }
+}
+
+static struct fastbuf *
+bopen_file_internal(const char *name, int mode, struct fb_params *params, int try)
+{
+ if (!params)
+ params = &fbpar_def;
+#ifdef CONFIG_UCW_THREADS
+ if (params->type == FB_DIRECT && !fbdir_cheat)
+ mode |= O_DIRECT;
+#endif
+ if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY)
+ mode = (mode & ~O_ACCMODE) | O_RDWR;
+ int fd = sh_open(name, mode, 0666);
+ if (fd < 0)
+ if (try)
+ return NULL;
+ else
+ die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name);
+ struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name);
+ ASSERT(fb);
+ if (mode & O_APPEND)
+ bseek(fb, 0, SEEK_END);
+ return fb;
+}
+
+struct fastbuf *
+bopen_file(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 0);
+}
+
+struct fastbuf *
+bopen_file_try(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 1);
+}
+
+struct fastbuf *
+bopen_fd(int fd, struct fb_params *params)
+{
+ return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL);
+}
+
+/* Function for use by individual file back-ends */
+
+void
+bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file)
+{
+ switch (is_temp_file)
+ {
+ case 1:
+ if (unlink(f->name) < 0)
+ msg(L_ERROR, "unlink(%s): %m", f->name);
+ case 0:
+ if (close(fd))
+ die("close(%s): %m", f->name);
+ }
+}
+
+/* Compatibility wrappers */
+
+struct fastbuf *
+bopen_try(const char *name, uns mode, uns buflen)
+{
+ return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bopen(const char *name, uns mode, uns buflen)
+{
+ return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen(int fd, uns buflen)
+{
+ return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen_shared(int fd, uns buflen)
+{
+ struct fastbuf *f = bfdopen(fd, buflen);
+ bconfig(f, BCONFIG_IS_TEMP_FILE, 2);
+ return f;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory Pools
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf)
+
+static void
+fbpool_spout(struct fastbuf *b)
+{
+ if (b->bptr >= b->bufend)
+ {
+ uns len = b->bufend - b->buffer;
+ b->buffer = mp_expand(FB_POOL(b)->mp);
+ b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp);
+ b->bstop = b->buffer;
+ b->bptr = b->buffer + len;
+ }
+}
+
+void
+fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size)
+{
+ b->mp = mp;
+ b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size);
+ b->fb.bufend = b->fb.buffer + mp_avail(mp);
+}
+
+void *
+fbpool_end(struct fbpool *b)
+{
+ return mp_end(b->mp, b->fb.bptr);
+}
+
+void
+fbpool_init(struct fbpool *b)
+{
+ bzero(b, sizeof(*b));
+ b->fb.name = "<fbpool>";
+ b->fb.spout = fbpool_spout;
+ b->fb.can_overwrite_buffer = 1;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct mempool *mp;
+ struct fbpool fb;
+ byte *p;
+ uns l;
+
+ mp = mp_new(64);
+ fbpool_init(&fb);
+ fbpool_start(&fb, mp, 16);
+ for (uns i = 0; i < 1024; i++)
+ bprintf(&fb.fb, "<hello>");
+ p = fbpool_end(&fb);
+ l = mp_size(mp, p);
+ if (l != 1024 * 7)
+ ASSERT(0);
+ for (uns i = 0; i < 1024; i++)
+ if (memcmp(p + i * 7, "<hello>", 7))
+ ASSERT(0);
+ mp_delete(mp);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Temporary Fastbufs
+ *
+ * (c) 2002--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/threads.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+static char *temp_prefix = "/tmp/temp";
+
+static struct cf_section temp_config = {
+ CF_ITEMS {
+ CF_STRING("Prefix", &temp_prefix),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR temp_global_init(void)
+{
+ cf_declare_section("Tempfiles", &temp_config, 0);
+}
+
+void
+temp_file_name(char *buf)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ int cnt = ++ctx->temp_counter;
+ int pid = getpid();
+ if (ctx->thread_id == pid)
+ sprintf(buf, "%s%d-%d", temp_prefix, pid, cnt);
+ else
+ sprintf(buf, "%s%d-%d-%d", temp_prefix, pid, ctx->thread_id, cnt);
+}
+
+struct fastbuf *
+bopen_tmp_file(struct fb_params *params)
+{
+ char name[TEMP_FILE_NAME_LEN];
+ temp_file_name(name);
+ struct fastbuf *fb = bopen_file(name, O_RDWR | O_CREAT | O_TRUNC, params);
+ bconfig(fb, BCONFIG_IS_TEMP_FILE, 1);
+ return fb;
+}
+
+struct fastbuf *
+bopen_tmp(uns buflen)
+{
+ return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+void bfix_tmp_file(struct fastbuf *fb, const char *name)
+{
+ int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0);
+ ASSERT(was_temp == 1);
+ if (rename(fb->name, name))
+ die("Cannot rename %s to %s: %m", fb->name, name);
+ bclose(fb);
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+
+ struct fastbuf *f = bopen_tmp(65536);
+ bputsn(f, "Hello, world!");
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O: Binary Numbers
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+
+#define GEN(type, name, size, endian) \
+type bget##name##_##endian##_slow(struct fastbuf *f) \
+{ \
+ byte buf[size/8]; \
+ if (bread(f, buf, sizeof(buf)) != sizeof(buf)) \
+ return ~(type)0; \
+ return get_u##size##_##endian(buf); \
+} \
+void bput##name##_##endian##_##slow(struct fastbuf *f, type x) \
+{ \
+ byte buf[size/8]; \
+ put_u##size##_##endian(buf, x); \
+ bwrite_slow(f, buf, sizeof(buf)); \
+}
+
+#define FF_ALL(type, name, size) GEN(type,name,size,be) GEN(type,name,size,le)
+
+FF_ALL(int, w, 16)
+FF_ALL(uns, l, 32)
+FF_ALL(u64, q, 64)
+FF_ALL(u64, 5, 40)
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Binary Values
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_BINARY_H
+#define _UCW_FF_BINARY_H
+
+#include "lib/fastbuf.h"
+#include "lib/unaligned.h"
+
+#ifdef CPU_BIG_ENDIAN
+#define FF_ENDIAN be
+#else
+#define FF_ENDIAN le
+#endif
+
+#define GET_FUNC(type, name, bits, endian) \
+ type bget##name##_##endian##_slow(struct fastbuf *f); \
+ static inline type bget##name##_##endian(struct fastbuf *f) \
+ { \
+ if (bavailr(f) >= bits/8) \
+ { \
+ type w = get_u##bits##_##endian(f->bptr); \
+ f->bptr += bits/8; \
+ return w; \
+ } \
+ else \
+ return bget##name##_##endian##_slow(f); \
+ }
+
+#define PUT_FUNC(type, name, bits, endian) \
+ void bput##name##_##endian##_slow(struct fastbuf *f, type x); \
+ static inline void bput##name##_##endian(struct fastbuf *f, type x) \
+ { \
+ if (bavailw(f) >= bits/8) \
+ { \
+ put_u##bits##_##endian(f->bptr, x); \
+ f->bptr += bits/8; \
+ } \
+ else \
+ return bput##name##_##endian##_slow(f, x); \
+ }
+
+#define FF_ALL_X(type, name, bits, defendian) \
+ GET_FUNC(type, name, bits, be) \
+ GET_FUNC(type, name, bits, le) \
+ PUT_FUNC(type, name, bits, be) \
+ PUT_FUNC(type, name, bits, le) \
+ static inline type bget##name(struct fastbuf *f) { return bget##name##_##defendian(f); } \
+ static inline void bput##name(struct fastbuf *f, type x) { bput##name##_##defendian(f, x); }
+
+#define FF_ALL(type, name, bits, defendian) FF_ALL_X(type, name, bits, defendian)
+
+FF_ALL(int, w, 16, FF_ENDIAN)
+FF_ALL(uns, l, 32, FF_ENDIAN)
+FF_ALL(u64, q, 64, FF_ENDIAN)
+FF_ALL(u64, 5, 40, FF_ENDIAN)
+
+#undef GET_FUNC
+#undef PUT_FUNC
+#undef FF_ENDIAN
+#undef FF_ALL_X
+#undef FF_ALL
+
+/* I/O on uintptr_t (only native endianity) */
+
+#ifdef CPU_64BIT_POINTERS
+#define bputa(x,p) bputq(x,p)
+#define bgeta(x) bgetq(x)
+#else
+#define bputa(x,p) bputl(x,p)
+#define bgeta(x) bgetl(x)
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Printf on Fastbuf Streams
+ *
+ * (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <alloca.h>
+
+int
+vbprintf(struct fastbuf *b, const char *msg, va_list args)
+{
+ byte *buf;
+ int len, r;
+ va_list args2;
+
+ len = bdirect_write_prepare(b, &buf);
+ if (len >= 16)
+ {
+ va_copy(args2, args);
+ r = vsnprintf(buf, len, msg, args2);
+ va_end(args2);
+ if (r < 0)
+ len = 256;
+ else if (r < len)
+ {
+ bdirect_write_commit(b, buf+r);
+ return r;
+ }
+ else
+ len = r+1;
+ }
+ else
+ len = 256;
+
+ while (1)
+ {
+ buf = alloca(len);
+ va_copy(args2, args);
+ r = vsnprintf(buf, len, msg, args2);
+ va_end(args2);
+ if (r < 0)
+ len += len;
+ else if (r < len)
+ {
+ bwrite(b, buf, r);
+ return r;
+ }
+ else
+ len = r+1;
+ }
+}
+
+int
+bprintf(struct fastbuf *b, const char *msg, ...)
+{
+ va_list args;
+ int res;
+
+ va_start(args, msg);
+ res = vbprintf(b, msg, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *b = bfdopen_shared(1, 65536);
+ for (int i=0; i<10000; i++)
+ bprintf(b, "13=%d str=<%s> msg=%m\n", 13, "str");
+ bclose(b);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O: Strings
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/bbuf.h"
+
+char * /* Non-standard */
+bgets(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *b++ = v;
+ }
+ if (unlikely(cnt == l))
+ die("%s: Line too long", f->name);
+ l -= cnt;
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+exit:
+ *b = 0;
+ return b;
+}
+
+int
+bgets_nodie(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src, *start = b;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return 0;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *b++ = v;
+ }
+ bdirect_read_commit(f, src);
+ if (cnt == l)
+ return -1;
+ l -= cnt;
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+exit:
+ *b++ = 0;
+ return b - (char *)start;
+}
+
+uns
+bgets_bb(struct fastbuf *f, struct bb_t *bb, uns limit)
+{
+ ASSERT(limit);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return 0;
+ bb_grow(bb, 1);
+ byte *buf = bb->ptr;
+ uns len = 0, buf_len = MIN(bb->len, limit);
+ do
+ {
+ uns cnt = MIN(src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ len += cnt;
+ if (cnt == src_len)
+ {
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ else
+ src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ if (unlikely(len == limit))
+ die("%s: Line too long", f->name);
+ bb_do_grow(bb, len + 1);
+ buf = bb->ptr + len;
+ buf_len = MIN(bb->len, limit) - len;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (src_len);
+exit:
+ *buf++ = 0;
+ return buf - bb->ptr;
+}
+
+char *
+bgets_mp(struct fastbuf *f, struct mempool *mp)
+{
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+#define BLOCK_SIZE (4096 - sizeof(void *))
+ struct block {
+ struct block *prev;
+ byte data[BLOCK_SIZE];
+ } *blocks = NULL;
+ uns sum = 0, buf_len = BLOCK_SIZE, cnt;
+ struct block first_block, *new_block = &first_block;
+ byte *buf = new_block->data;
+ do
+ {
+ cnt = MIN(src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ if (cnt == src_len)
+ {
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ else
+ src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ new_block->prev = blocks;
+ blocks = new_block;
+ sum += buf_len = BLOCK_SIZE;
+ new_block = alloca(sizeof(struct block));
+ buf = new_block->data;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (src_len);
+exit: ;
+ uns len = buf - new_block->data;
+ byte *result = mp_alloc(mp, sum + len + 1) + sum;
+ result[len] = 0;
+ memcpy(result, new_block->data, len);
+ while (blocks)
+ {
+ result -= BLOCK_SIZE;
+ memcpy(result, blocks->data, BLOCK_SIZE);
+ blocks = blocks->prev;
+ }
+ return result;
+#undef BLOCK_SIZE
+}
+
+void
+bgets_stk_init(struct bgets_stk_struct *s)
+{
+ s->src_len = bdirect_read_prepare(s->f, &s->src);
+ if (!s->src_len)
+ {
+ s->cur_buf = NULL;
+ s->cur_len = 0;
+ }
+ else
+ {
+ s->old_buf = NULL;
+ s->cur_len = 256;
+ }
+}
+
+void
+bgets_stk_step(struct bgets_stk_struct *s)
+{
+ byte *buf = s->cur_buf;
+ uns buf_len = s->cur_len;
+ if (s->old_buf)
+ {
+ memcpy( s->cur_buf, s->old_buf, s->old_len);
+ buf += s->old_len;
+ buf_len -= s->old_len;
+ }
+ do
+ {
+ uns cnt = MIN(s->src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *s->src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(s->f, s->src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ if (cnt == s->src_len)
+ {
+ bdirect_read_commit(s->f, s->src);
+ s->src_len = bdirect_read_prepare(s->f, &s->src);
+ }
+ else
+ s->src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ s->old_len = s->cur_len;
+ s->old_buf = s->cur_buf;
+ s->cur_len *= 2;
+ return;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (s->src_len);
+exit:
+ *buf = 0;
+ s->cur_len = 0;
+}
+
+char *
+bgets0(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ *b = *src++;
+ if (!*b)
+ {
+ bdirect_read_commit(f, src);
+ return b;
+ }
+ b++;
+ }
+ if (unlikely(cnt == l))
+ die("%s: Line too long", f->name);
+ l -= cnt;
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+ *b = 0;
+ return b;
+}
--- /dev/null
+/*
+ * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ * (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/unicode.h"
+#include "lib/ff-unicode.h"
+#include "lib/ff-binary.h"
+
+/*** UTF-8 ***/
+
+int
+bget_utf8_slow(struct fastbuf *b, uns repl)
+{
+ int c = bgetc(b);
+ int code;
+
+ if (c < 0x80) /* Includes EOF */
+ return c;
+ if (c < 0xc0) /* Incorrect combination */
+ return repl;
+ if (c >= 0xf0) /* Too large, skip it */
+ {
+ while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+ ;
+ goto wrong;
+ }
+ if (c >= 0xe0) /* 3 bytes */
+ {
+ code = c & 0x0f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ else /* 2 bytes */
+ {
+ code = c & 0x1f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ return code;
+
+ wrong:
+ if (c >= 0)
+ bungetc(b);
+ return repl;
+}
+
+int
+bget_utf8_32_slow(struct fastbuf *b, uns repl)
+{
+ int c = bgetc(b);
+ int code;
+ int nr;
+
+ if (c < 0x80) /* Includes EOF */
+ return c;
+ if (c < 0xc0) /* Incorrect combination */
+ return repl;
+ if (c < 0xe0)
+ {
+ code = c & 0x1f;
+ nr = 1;
+ }
+ else if (c < 0xf0)
+ {
+ code = c & 0x0f;
+ nr = 2;
+ }
+ else if (c < 0xf8)
+ {
+ code = c & 0x07;
+ nr = 3;
+ }
+ else if (c < 0xfc)
+ {
+ code = c & 0x03;
+ nr = 4;
+ }
+ else if (c < 0xfe)
+ {
+ code = c & 0x01;
+ nr = 5;
+ }
+ else /* Too large, skip it */
+ {
+ while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+ ;
+ goto wrong;
+ }
+ while (nr-- > 0)
+ {
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ return code;
+
+ wrong:
+ if (c >= 0)
+ bungetc(b);
+ return repl;
+}
+
+void
+bput_utf8_slow(struct fastbuf *b, uns u)
+{
+ ASSERT(u < 65536);
+ if (u < 0x80)
+ bputc(b, u);
+ else
+ {
+ if (u < 0x800)
+ bputc(b, 0xc0 | (u >> 6));
+ else
+ {
+ bputc(b, 0xe0 | (u >> 12));
+ bputc(b, 0x80 | ((u >> 6) & 0x3f));
+ }
+ bputc(b, 0x80 | (u & 0x3f));
+ }
+}
+
+void
+bput_utf8_32_slow(struct fastbuf *b, uns u)
+{
+ ASSERT(u < (1U<<31));
+ if (u < 0x80)
+ bputc(b, u);
+ else
+ {
+ if (u < 0x800)
+ bputc(b, 0xc0 | (u >> 6));
+ else
+ {
+ if (u < (1<<16))
+ bputc(b, 0xe0 | (u >> 12));
+ else
+ {
+ if (u < (1<<21))
+ bputc(b, 0xf0 | (u >> 18));
+ else
+ {
+ if (u < (1<<26))
+ bputc(b, 0xf8 | (u >> 24));
+ else
+ {
+ bputc(b, 0xfc | (u >> 30));
+ bputc(b, 0x80 | ((u >> 24) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 18) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 12) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 6) & 0x3f));
+ }
+ bputc(b, 0x80 | (u & 0x3f));
+ }
+}
+
+/*** UTF-16 ***/
+
+int
+bget_utf16_be_slow(struct fastbuf *b, uns repl)
+{
+ if (bpeekc(b) < 0)
+ return -1;
+ uns u = bgetw_be(b), x, y;
+ if ((int)u < 0)
+ return repl;
+ if ((x = u - 0xd800) >= 0x800)
+ return u;
+ if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
+ return repl;
+ return 0x10000 + (x << 10) + y;
+}
+
+int
+bget_utf16_le_slow(struct fastbuf *b, uns repl)
+{
+ if (bpeekc(b) < 0)
+ return -1;
+ uns u = bgetw_le(b), x, y;
+ if ((int)u < 0)
+ return repl;
+ if ((x = u - 0xd800) >= 0x800)
+ return u;
+ if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
+ return repl;
+ return 0x10000 + (x << 10) + y;
+}
+
+void
+bput_utf16_be_slow(struct fastbuf *b, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ bputc(b, u >> 8);
+ bputc(b, u & 0xff);
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ bputc(b, 0xd8 | (u >> 18));
+ bputc(b, (u >> 10) & 0xff);
+ bputc(b, 0xdc | ((u >> 8) & 0x3));
+ bputc(b, u & 0xff);
+ }
+ else
+ ASSERT(0);
+}
+
+void
+bput_utf16_le_slow(struct fastbuf *b, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ bputc(b, u & 0xff);
+ bputc(b, u >> 8);
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ bputc(b, (u >> 10) & 0xff);
+ bputc(b, 0xd8 | (u >> 18));
+ bputc(b, u & 0xff);
+ bputc(b, 0xdc | ((u >> 8) & 0x3));
+ }
+ else
+ ASSERT(0);
+}
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+#define FUNCS \
+ F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
+ F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
+
+ enum {
+#define F(x) FUNC_##x,
+ FUNCS
+#undef F
+ };
+ char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+ FUNCS
+#undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func)
+ {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ struct fastbuf *b = fbgrow_create(8);
+ if (func < FUNC_BPUT_UTF8)
+ {
+ uns u;
+ while (scanf("%x", &u) == 1)
+ bputc(b, u);
+ fbgrow_rewind(b);
+ while (bpeekc(b) >= 0)
+ {
+ if (btell(b))
+ putchar(' ');
+ switch (func)
+ {
+ case FUNC_BGET_UTF8:
+ u = bget_utf8_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF8_32:
+ u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF16_BE:
+ u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF16_LE:
+ u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
+ break;
+ default:
+ ASSERT(0);
+ }
+ printf("%04x", u);
+ }
+ putchar('\n');
+ }
+ else
+ {
+ uns u, i = 0;
+ while (scanf("%x", &u) == 1)
+ {
+ switch (func)
+ {
+ case FUNC_BPUT_UTF8:
+ bput_utf8_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF8_32:
+ bput_utf8_32_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF16_BE:
+ bput_utf16_be_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF16_LE:
+ bput_utf16_le_slow(b, u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ fbgrow_rewind(b);
+ u = 0;
+ while (bpeekc(b) >= 0)
+ {
+ if (i++)
+ putchar(' ');
+ printf("%02x", bgetc(b));
+ }
+ fbgrow_reset(b);
+ }
+ putchar('\n');
+ }
+ bclose(b);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library: Reading and writing of UTF-8 and UTF-16 on Fastbuf Streams
+ *
+ * (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_UNICODE_H
+#define _UCW_FF_UNICODE_H
+
+#include "lib/fastbuf.h"
+#include "lib/unicode.h"
+
+/*** UTF-8 ***/
+
+int bget_utf8_slow(struct fastbuf *b, uns repl);
+int bget_utf8_32_slow(struct fastbuf *b, uns repl);
+void bput_utf8_slow(struct fastbuf *b, uns u);
+void bput_utf8_32_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf8_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 3)
+ {
+ b->bptr = utf8_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf8_slow(b, repl);
+}
+
+static inline int
+bget_utf8_32_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 6)
+ {
+ b->bptr = utf8_32_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf8_32_slow(b, repl);
+}
+
+static inline int
+bget_utf8(struct fastbuf *b)
+{
+ return bget_utf8_repl(b, UNI_REPLACEMENT);
+}
+
+static inline int
+bget_utf8_32(struct fastbuf *b)
+{
+ return bget_utf8_32_repl(b, UNI_REPLACEMENT);
+}
+
+static inline void
+bput_utf8(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 3)
+ b->bptr = utf8_put(b->bptr, u);
+ else
+ bput_utf8_slow(b, u);
+}
+
+static inline void
+bput_utf8_32(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 6)
+ b->bptr = utf8_32_put(b->bptr, u);
+ else
+ bput_utf8_32_slow(b, u);
+}
+
+/*** UTF-16 ***/
+
+int bget_utf16_be_slow(struct fastbuf *b, uns repl);
+int bget_utf16_le_slow(struct fastbuf *b, uns repl);
+void bput_utf16_be_slow(struct fastbuf *b, uns u);
+void bput_utf16_le_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf16_be_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 4)
+ {
+ b->bptr = utf16_be_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf16_be_slow(b, repl);
+}
+
+static inline int
+bget_utf16_le_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 4)
+ {
+ b->bptr = utf16_le_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf16_le_slow(b, repl);
+}
+
+static inline int
+bget_utf16_be(struct fastbuf *b)
+{
+ return bget_utf16_be_repl(b, UNI_REPLACEMENT);
+}
+
+static inline int
+bget_utf16_le(struct fastbuf *b)
+{
+ return bget_utf16_le_repl(b, UNI_REPLACEMENT);
+}
+
+static inline void
+bput_utf16_be(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 4)
+ b->bptr = utf16_be_put(b->bptr, u);
+ else
+ bput_utf16_be_slow(b, u);
+}
+
+static inline void
+bput_utf16_lbe(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 4)
+ b->bptr = utf16_le_put(b->bptr, u);
+ else
+ bput_utf16_le_slow(b, u);
+}
+
+#endif
--- /dev/null
+# Tests for the Unicode module
+
+Name: bput_utf8
+Run: ../obj/lib/ff-unicode-t bput_utf8
+In: 0041 0048 004f 004a
+Out: 41 48 4f 4a
+
+Name: bget_utf8_32
+Run: ../obj/lib/ff-unicode-t bget_utf8_32
+In: fe 83 81
+Out: fffc
+
+Name: bput_utf16_be
+Run: ../obj/lib/ff-unicode-t bput_utf16_be
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name: bput_utf16_le
+Run: ../obj/lib/ff-unicode-t bput_utf16_le
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name: bget_utf16_be (1)
+Run: ../obj/lib/ff-unicode-t bget_utf16_be
+In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: bget_utf16_be (2)
+Run: ../obj/lib/ff-unicode-t bget_utf16_be
+In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out: fffc 2a5f fffc 2a5f fffc
+
+Name: bget_utf16_le (1)
+Run: ../obj/lib/ff-unicode-t bget_utf16_le
+In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: bget_utf16_le (2)
+Run: ../obj/lib/ff-unicode-t bget_utf16_le
+In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out: fffc 2a5f fffc 2a5f fffc
--- /dev/null
+/*
+ * UCW Library: An alias for lib/ff-unicode.h (for backwards compatibility)
+ *
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_UTF8_H
+#define _UCW_FF_UTF8_H
+
+#include "lib/ff-unicode.h"
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffer
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2005, Martin Mares <mj@ucw.cz>
+ *
+ * Define the following macros:
+ *
+ * GBUF_TYPE data type of records stored in the buffer
+ * GBUF_PREFIX(x) add a name prefix to all global symbols
+ * GBUF_TRACE(msg...) log growing of buffer [optional]
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#define BUF_T GBUF_PREFIX(t)
+
+typedef struct BUF_T
+{
+ uns len;
+ GBUF_TYPE *ptr;
+}
+BUF_T;
+
+static inline void
+GBUF_PREFIX(init)(BUF_T *b)
+{
+ b->ptr = NULL;
+ b->len = 0;
+}
+
+static void UNUSED
+GBUF_PREFIX(done)(BUF_T *b)
+{
+ if (b->ptr)
+ xfree(b->ptr);
+ b->ptr = NULL;
+ b->len = 0;
+}
+
+static void UNUSED
+GBUF_PREFIX(set_size)(BUF_T *b, uns len)
+{
+ b->len = len;
+ b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE));
+#ifdef GBUF_TRACE
+ GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len);
+#endif
+}
+
+static void UNUSED
+GBUF_PREFIX(do_grow)(BUF_T *b, uns len)
+{
+ if (len < 2*b->len) // to ensure logarithmic cost
+ len = 2*b->len;
+ GBUF_PREFIX(set_size)(b, len);
+}
+
+static inline GBUF_TYPE *
+GBUF_PREFIX(grow)(BUF_T *b, uns len)
+{
+ if (unlikely(len > b->len))
+ GBUF_PREFIX(do_grow)(b, len);
+ return b->ptr;
+}
+
+#undef GBUF_TYPE
+#undef GBUF_PREFIX
+#undef GBUF_TRACE
+#undef BUF_T
--- /dev/null
+#include "lib/lib.h"
+#include "lib/getopt.h"
+
+void
+reset_getopt(void)
+{
+ // Should work on GNU libc
+ optind = 0;
+}
+
+#ifdef TEST
+#include <stdio.h>
+
+static void
+parse(int argc, char **argv)
+{
+ static struct option longopts[] = {
+ { "longa", 0, 0, 'a' },
+ { "longb", 0, 0, 'b' },
+ { "longc", 1, 0, 'c' },
+ { "longd", 1, 0, 'd' },
+ { 0, 0, 0, 0 }
+ };
+ int opt;
+ while ((opt = getopt_long(argc, argv, "abc:d:", longopts, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'a':
+ case 'b':
+ printf("option %c\n", opt);
+ break;
+ case 'c':
+ case 'd':
+ printf("option %c with value `%s'\n", opt, optarg);
+ break;
+ case '?':
+ printf("unknown option\n");
+ break;
+ default:
+ printf("getopt returned unexpected char 0x%02x\n", opt);
+ break;
+ }
+ if (optind != argc)
+ printf("%d nonoption arguments\n", argc - optind);
+}
+
+int
+main(int argc, char **argv)
+{
+ opterr = 0;
+ parse(argc, argv);
+ printf("reset\n");
+ reset_getopt();
+ parse(argc, argv);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * UCW Library -- Parsing of configuration and command-line options
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_GETOPT_H
+#define _UCW_GETOPT_H
+
+#ifdef CONFIG_OWN_GETOPT
+#include "lib/getopt/getopt-sh.h"
+#else
+#include <getopt.h>
+#endif
+
+void reset_getopt(void);
+
+/* Safe loading and reloading of configuration files: conf-input.c */
+
+extern char *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */
+extern char *cf_env_file; /* ENV_VAR_CONFIG */
+int cf_reload(const char *file);
+int cf_load(const char *file);
+int cf_set(const char *string);
+
+/* Direct access to configuration items: conf-intr.c */
+
+#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \
+ T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY)
+ /* Closing brace finishes previous block.
+ * Basic attributes (static, dynamic, parsed) can be used with SET.
+ * Dynamic arrays can be used with SET, APPEND, PREPEND.
+ * Sections can be used with SET.
+ * Lists can be used with everything. */
+#define T(x) OP_##x,
+enum cf_operation { CF_OPERATIONS };
+#undef T
+
+struct cf_item;
+char *cf_find_item(const char *name, struct cf_item *item);
+char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars);
+
+/* Debug dumping: conf-dump.c */
+
+struct fastbuf;
+void cf_dump_sections(struct fastbuf *fb);
+
+/* Journaling control: conf-journal.c */
+
+struct cf_journal_item;
+struct cf_journal_item *cf_journal_new_transaction(uns new_pool);
+void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj);
+void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj);
+
+/*
+ * cf_getopt() takes care of parsing the command-line arguments, loading the
+ * default configuration file (cf_def_file) and processing configuration options.
+ * The calling convention is the same as with GNU getopt_long(), but you must prefix
+ * your own short/long options by the CF_(SHORT|LONG)_OPTS or pass CF_NO_LONG_OPTS
+ * of there are no long options.
+ *
+ * The default configuration file can be overriden by the --config options,
+ * which must come first. During parsing of all other options, the configuration
+ * is already available.
+ */
+
+#define CF_SHORT_OPTS "C:S:"
+#define CF_LONG_OPTS {"config", 1, 0, 'C'}, {"set", 1, 0, 'S'}, CF_LONG_OPTS_DEBUG
+#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } }
+#ifndef CF_USAGE_TAB
+#define CF_USAGE_TAB ""
+#endif
+#define CF_USAGE \
+"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\
+-S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG
+
+#ifdef CONFIG_DEBUG
+#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } ,
+#define CF_USAGE_DEBUG " --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n"
+#else
+#define CF_LONG_OPTS_DEBUG
+#define CF_USAGE_DEBUG
+#endif
+
+// conf-input.c
+int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index);
+
+#endif
--- /dev/null
+# Tests for getopt
+
+Run: ../obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3
+Out: option a
+ option b
+ option c with value `2819'
+ option d with value `-a'
+ 3 nonoption arguments
+ reset
+ option a
+ option b
+ option c with value `2819'
+ option d with value `-a'
+ 3 nonoption arguments
+
+Run: ../obj/lib/getopt-t -a -x
+Out: option a
+ unknown option
+ reset
+ option a
+ unknown option
--- /dev/null
+# Makefile for the UCW GetOpt Library (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+DIRS+=lib/getopt
+
+LIBUCW_MODS+=getopt/getopt-sh
--- /dev/null
+This directory contains getopt routines from the GNU libc 2.5.
+We need this as a fallback for our reset_getopt(), because there is
+no standardized interface for such instruction.
+
+They are distributed under the GNU LGPL.
+
+All files are exact copies of the original distribution with very
+few exceptions commented with `// SHERLOCK' prefix.
+I only provided my own getopt-sh.c, getopt-sh.h and Makefile.
+
+ Pavel Charvat, 2007
+
--- /dev/null
+#include "getopt-sh.h"
+#include "getopt_int.h"
+#include "getopt.c"
+#include "getopt1.c"
--- /dev/null
+#ifndef _UCW_GETOPT_GETOPT_SH_H
+#define _UCW_GETOPT_GETOPT_SH_H
+
+#define getopt sh_getopt
+#define getopt_long sh_getopt_long
+#define getopt_long_only sh_getopt_longonly
+#define optarg sh_optarg
+#define optind sh_optind
+#define opterr sh_opterr
+#define optopt sh_optopt
+
+#include "lib/getopt/getopt.h"
+
+#endif
--- /dev/null
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to drepper@gnu.org
+ before changing it!
+ Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+\f
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//# define ELIDE_CODE // SHERLOCK: disabled
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+# include <stdlib.h>
+# include <unistd.h>
+#endif /* GNU C library. */
+
+#include <string.h>
+
+#ifdef VMS
+# include <unixlib.h>
+#endif
+
+#ifdef _LIBC
+# include <libintl.h>
+#else
+//# include "gettext.h" // SHERLOCK: replaced by <libintl.h>
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#endif
+
+#if defined _LIBC && defined USE_IN_LIBIO
+# include <wchar.h>
+#endif
+
+#ifndef attribute_hidden
+# define attribute_hidden
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+#include "getopt_int.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Keep a global copy of all internal members of getopt_data. */
+
+static struct _getopt_data getopt_data;
+
+\f
+#ifndef __GNU_LIBRARY__
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+#endif /* not __GNU_LIBRARY__ */
+\f
+#ifdef _LIBC
+/* Stored original parameters.
+ XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+extern int __libc_argc;
+extern char **__libc_argv;
+
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+# ifdef USE_NONOPTION_FLAGS
+/* Defined in getopt_init.c */
+extern char *__getopt_nonoption_flags;
+# endif
+
+# ifdef USE_NONOPTION_FLAGS
+# define SWAP_FLAGS(ch1, ch2) \
+ if (d->__nonoption_flags_len > 0) \
+ { \
+ char __tmp = __getopt_nonoption_flags[ch1]; \
+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
+ __getopt_nonoption_flags[ch2] = __tmp; \
+ }
+# else
+# define SWAP_FLAGS(ch1, ch2)
+# endif
+#else /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (char **argv, struct _getopt_data *d)
+{
+ int bottom = d->__first_nonopt;
+ int middle = d->__last_nonopt;
+ int top = d->optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ /* First make sure the handling of the `__getopt_nonoption_flags'
+ string can work normally. Our top argument must be in the range
+ of the string. */
+ if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
+ {
+ /* We must extend the array. The user plays games with us and
+ presents new arguments. */
+ char *new_str = malloc (top + 1);
+ if (new_str == NULL)
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
+ else
+ {
+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
+ d->__nonoption_flags_max_len),
+ '\0', top + 1 - d->__nonoption_flags_max_len);
+ d->__nonoption_flags_max_len = top + 1;
+ __getopt_nonoption_flags = new_str;
+ }
+ }
+#endif
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ SWAP_FLAGS (bottom + i, middle + i);
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ d->__first_nonopt += (d->optind - d->__last_nonopt);
+ d->__last_nonopt = d->optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (int argc, char *const *argv, const char *optstring,
+ struct _getopt_data *d)
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ d->__first_nonopt = d->__last_nonopt = d->optind;
+
+ d->__nextchar = NULL;
+
+ d->__posixly_correct = !!getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ d->__ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ d->__ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (d->__posixly_correct)
+ d->__ordering = REQUIRE_ORDER;
+ else
+ d->__ordering = PERMUTE;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ if (!d->__posixly_correct
+ && argc == __libc_argc && argv == __libc_argv)
+ {
+ if (d->__nonoption_flags_max_len == 0)
+ {
+ if (__getopt_nonoption_flags == NULL
+ || __getopt_nonoption_flags[0] == '\0')
+ d->__nonoption_flags_max_len = -1;
+ else
+ {
+ const char *orig_str = __getopt_nonoption_flags;
+ int len = d->__nonoption_flags_max_len = strlen (orig_str);
+ if (d->__nonoption_flags_max_len < argc)
+ d->__nonoption_flags_max_len = argc;
+ __getopt_nonoption_flags =
+ (char *) malloc (d->__nonoption_flags_max_len);
+ if (__getopt_nonoption_flags == NULL)
+ d->__nonoption_flags_max_len = -1;
+ else
+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+ '\0', d->__nonoption_flags_max_len - len);
+ }
+ }
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len;
+ }
+ else
+ d->__nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+\f
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal_r (int argc, char *const *argv, const char *optstring,
+ const struct option *longopts, int *longind,
+ int long_only, struct _getopt_data *d)
+{
+ int print_errors = d->opterr;
+ if (optstring[0] == ':')
+ print_errors = 0;
+
+ if (argc < 1)
+ return -1;
+
+ d->optarg = NULL;
+
+ if (d->optind == 0 || !d->__initialized)
+ {
+ if (d->optind == 0)
+ d->optind = 1; /* Don't scan ARGV[0], the program name. */
+ optstring = _getopt_initialize (argc, argv, optstring, d);
+ d->__initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
+ || (d->optind < d->__nonoption_flags_len \
+ && __getopt_nonoption_flags[d->optind] == '1'))
+#else
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
+#endif
+
+ if (d->__nextchar == NULL || *d->__nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (d->__last_nonopt > d->optind)
+ d->__last_nonopt = d->optind;
+ if (d->__first_nonopt > d->optind)
+ d->__first_nonopt = d->optind;
+
+ if (d->__ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__last_nonopt != d->optind)
+ d->__first_nonopt = d->optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (d->optind < argc && NONOPTION_P)
+ d->optind++;
+ d->__last_nonopt = d->optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (d->optind != argc && !strcmp (argv[d->optind], "--"))
+ {
+ d->optind++;
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__first_nonopt == d->__last_nonopt)
+ d->__first_nonopt = d->optind;
+ d->__last_nonopt = argc;
+
+ d->optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (d->optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (d->__first_nonopt != d->__last_nonopt)
+ d->optind = d->__first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (d->__ordering == REQUIRE_ORDER)
+ return -1;
+ d->optarg = argv[d->optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ d->__nextchar = (argv[d->optind] + 1
+ + (longopts != NULL && argv[d->optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[d->optind][1] == '-'
+ || (long_only && (argv[d->optind][2]
+ || !strchr (optstring, argv[d->optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else if (long_only
+ || pfound->has_arg != p->has_arg
+ || pfound->flag != p->flag
+ || pfound->val != p->val)
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ d->optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind - 1][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+
+ d->optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[d->optind][1] == '-'
+ || strchr (optstring, *d->__nextchar) == NULL)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->__nextchar = (char *) "";
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *d->__nextchar++;
+ char *temp = strchr (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*d->__nextchar == '\0')
+ ++d->optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (d->__posixly_correct)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
+#endif
+ }
+ else
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `d->optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
+ nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ d->__nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ d->optind++;
+ }
+ else
+ d->optarg = NULL;
+ d->__nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+ d->__nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+_getopt_internal (int argc, char *const *argv, const char *optstring,
+ const struct option *longopts, int *longind, int long_only)
+{
+ int result;
+
+ getopt_data.optind = optind;
+ getopt_data.opterr = opterr;
+
+ result = _getopt_internal_r (argc, argv, optstring, longopts,
+ longind, long_only, &getopt_data);
+
+ optind = getopt_data.optind;
+ optarg = getopt_data.optarg;
+ optopt = getopt_data.optopt;
+
+ return result;
+}
+
+int
+getopt (int argc, char *const *argv, const char *optstring)
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* Not ELIDE_CODE. */
+\f
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
--- /dev/null
+/* Declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+ standalone, or this is the first header included in the source file.
+ If we are being used with glibc, we need to include <features.h>, but
+ that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
+ not defined, include <ctype.h>, which will pull in <features.h> for us
+ if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
+ doesn't flood the namespace with stuff the way some other headers do.) */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifndef __THROW
+# ifndef __GNUC_PREREQ
+# define __GNUC_PREREQ(maj, min) (0)
+# endif
+# if defined __cplusplus && __GNUC_PREREQ (2,8)
+# define __THROW throw ()
+# else
+# define __THROW
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+ const char *name;
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+# define no_argument 0
+# define required_argument 1
+# define optional_argument 2
+#endif /* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+ arguments in ARGV (ARGC of them, minus the program name) for
+ options given in OPTS.
+
+ Return the option character from OPTS just read. Return -1 when
+ there are no more options. For unrecognized options, or options
+ missing arguments, `optopt' is set to the option letter, and '?' is
+ returned.
+
+ The OPTS string is a list of characters which are recognized option
+ letters, optionally followed by colons, specifying that that letter
+ takes an argument, to be placed in `optarg'.
+
+ If a letter in OPTS is followed by two colons, its argument is
+ optional. This behavior is specific to the GNU `getopt'.
+
+ The argument `--' causes premature termination of argument
+ scanning, explicitly telling `getopt' that there are no more
+ options.
+
+ If OPTS begins with `--', then non-option arguments are treated as
+ arguments to the option '\0'. This behavior is specific to the GNU
+ `getopt'. */
+
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
+ __THROW;
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+
+#ifndef __need_getopt
+extern int getopt_long (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+extern int getopt_long_only (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations. */
+#undef __need_getopt
+
+#endif /* getopt.h */
--- /dev/null
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+\f
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _LIBC
+# include <getopt.h>
+#else
+# include "getopt.h"
+#endif
+#include "getopt_int.h"
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//#define ELIDE_CODE // SHERLOCK: disabled
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+int
+_getopt_long_r (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 0, d);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+int
+_getopt_long_only_r (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 1, d);
+}
+
+#endif /* Not ELIDE_CODE. */
+\f
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
--- /dev/null
+/* Perform additional initialization for getopt functions in GNU libc.
+ Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef USE_NONOPTION_FLAGS
+/* Attention: this file is *not* necessary when the GNU getopt functions
+ are used outside the GNU libc. Some additional functionality of the
+ getopt functions in GNU libc require this additional work. */
+
+#include <getopt.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <stdio-common/_itoa.h>
+
+/* Variable to synchronize work. */
+char *__getopt_nonoption_flags;
+
+
+/* Remove the environment variable "_<PID>_GNU_nonoption_argv_flags_" if
+ it is still available. If the getopt functions are also used in the
+ application it does not exist anymore since it was saved for the use
+ in getopt. */
+void
+__getopt_clean_environment (char **env)
+{
+ /* Bash 2.0 puts a special variable in the environment for each
+ command it runs, specifying which ARGV elements are the results
+ of file name wildcard expansion and therefore should not be
+ considered as options. */
+ static const char envvar_tail[] = "_GNU_nonoption_argv_flags_=";
+ char var[50];
+ char *cp, **ep;
+ size_t len;
+
+ /* Construct the "_<PID>_GNU_nonoption_argv_flags_=" string. We must
+ not use `sprintf'. */
+ cp = memcpy (&var[sizeof (var) - sizeof (envvar_tail)], envvar_tail,
+ sizeof (envvar_tail));
+ cp = _itoa_word (__getpid (), cp, 10, 0);
+ /* Note: we omit adding the leading '_' since we explicitly test for
+ it before calling strncmp. */
+ len = (var + sizeof (var) - 1) - cp;
+
+ for (ep = env; *ep != NULL; ++ep)
+ if ((*ep)[0] == '_'
+ && __builtin_expect (strncmp (*ep + 1, cp, len) == 0, 0))
+ {
+ /* Found it. Store this pointer and move later ones back. */
+ char **dp = ep;
+ __getopt_nonoption_flags = &(*ep)[len];
+ do
+ dp[0] = dp[1];
+ while (*dp++);
+ /* Continue the loop in case the name appears again. */
+ }
+}
+#endif /* USE_NONOPTION_FLAGS */
--- /dev/null
+/* Internal declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _GETOPT_INT_H
+#define _GETOPT_INT_H 1
+
+extern int _getopt_internal (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only);
+
+\f
+/* Reentrant versions which can handle parsing multiple argument
+ vectors at the same time. */
+
+/* Data type for reentrant functions. */
+struct _getopt_data
+{
+ /* These have exactly the same meaning as the corresponding global
+ variables, except that they are used for the reentrant
+ versions of getopt. */
+ int optind;
+ int opterr;
+ int optopt;
+ char *optarg;
+
+ /* Internal members. */
+
+ /* True if the internal members have been initialized. */
+ int __initialized;
+
+ /* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+ char *__nextchar;
+
+ /* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we
+ scan, so that eventually all the non-options are at the end.
+ This allows options to be given in any order, even with programs
+ that were not written to expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were
+ written to expect options and other ARGV-elements in any order
+ and that care about the ordering of the two. We describe each
+ non-option ARGV-element as if it were the argument of an option
+ with character code 1. Using `-' as the first character of the
+ list of option characters selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+ enum
+ {
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+ } __ordering;
+
+ /* If the POSIXLY_CORRECT environment variable is set. */
+ int __posixly_correct;
+
+
+ /* Handle permutation of arguments. */
+
+ /* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first
+ of them; `last_nonopt' is the index after the last of them. */
+
+ int __first_nonopt;
+ int __last_nonopt;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ int __nonoption_flags_max_len;
+ int __nonoption_flags_len;
+# endif
+};
+
+/* The initializer is necessary to set OPTIND and OPTERR to their
+ default values and to clear the initialization flag. */
+#define _GETOPT_DATA_INITIALIZER { 1, 1 }
+
+extern int _getopt_internal_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only, struct _getopt_data *__data);
+
+extern int _getopt_long_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ struct _getopt_data *__data);
+
+extern int _getopt_long_only_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts,
+ int *__longind,
+ struct _getopt_data *__data);
+
+#endif /* getopt_int.h */
--- /dev/null
+/* Tests for hash table routines */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* TEST 1: integers */
+
+struct node1 {
+ int key;
+ int data;
+};
+
+#define HASH_NODE struct node1
+#define HASH_PREFIX(x) test1_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_ZERO_FILL
+
+#define HASH_GIVE_INIT_DATA
+static inline void test1_init_data(struct node1 *n)
+{
+ n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_REMOVE
+
+#include "lib/hashtable.h"
+
+static void test1(void)
+{
+ int i;
+
+ test1_init();
+ for (i=0; i<1024; i++)
+ {
+ struct node1 *n = test1_lookup(i);
+ ASSERT(n->data == i+123);
+ }
+ for (i=1; i<1024; i+=2)
+ {
+ struct node1 *n = test1_lookup(i);
+ test1_remove(n);
+ }
+ for (i=0; i<1024; i++)
+ {
+ struct node1 *n = test1_find(i);
+ if (!n != (i&1) || (n && n->data != i+123))
+ die("Inconsistency at i=%d", i);
+ }
+ i=0;
+ HASH_FOR_ALL(test1, n)
+ {
+ i += 1 + n->key;
+ }
+ HASH_END_FOR;
+ ASSERT(i == 262144);
+ puts("OK");
+}
+
+/* TEST 2: external strings */
+
+struct node2 {
+ char *key;
+ int data;
+};
+
+#define HASH_NODE struct node2
+#define HASH_PREFIX(x) test2_##x
+#define HASH_KEY_STRING key
+#define HASH_NOCASE
+#define HASH_AUTO_POOL 4096
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#include "lib/hashtable.h"
+
+static void test2(void)
+{
+ int i;
+
+ test2_init();
+ for (i=0; i<1024; i+=2)
+ {
+ char x[32];
+ sprintf(x, "abc%d", i);
+ test2_new(xstrdup(x));
+ }
+ for (i=0; i<1024; i++)
+ {
+ char x[32];
+ struct node2 *n;
+ sprintf(x, "ABC%d", i);
+ n = test2_find(x);
+ if (!n != (i&1))
+ die("Inconsistency at i=%d", i);
+ }
+ puts("OK");
+}
+
+/* TEST 3: internal strings + pools */
+
+static struct mempool *pool3;
+
+struct node3 {
+ int data;
+ char key[1];
+};
+
+#define HASH_NODE struct node3
+#define HASH_PREFIX(x) test3_##x
+#define HASH_KEY_ENDSTRING key
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#define HASH_USE_POOL pool3
+
+#include "lib/hashtable.h"
+
+static void test3(void)
+{
+ int i;
+
+ pool3 = mp_new(16384);
+ test3_init();
+ for (i=0; i<1048576; i+=2)
+ {
+ char x[32];
+ sprintf(x, "abc%d", i);
+ test3_new(x);
+ }
+ for (i=0; i<1048576; i++)
+ {
+ char x[32];
+ struct node3 *n;
+ sprintf(x, "abc%d", i);
+ n = test3_find(x);
+ if (!n != (i&1))
+ die("Inconsistency at i=%d", i);
+ }
+ puts("OK");
+}
+
+/* TEST 4: complex keys */
+
+#include "lib/hashfunc.h"
+
+struct node4 {
+ int port;
+ int data;
+ char host[1];
+};
+
+#define HASH_NODE struct node4
+#define HASH_PREFIX(x) test4_##x
+#define HASH_KEY_COMPLEX(x) x host, x port
+#define HASH_KEY_DECL char *host, int port
+
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_DELETE
+#define HASH_WANT_REMOVE
+
+#define HASH_GIVE_HASHFN
+static uns test4_hash(char *host, int port)
+{
+ return hash_string_nocase(host) ^ hash_u32(port);
+}
+
+#define HASH_GIVE_EQ
+static inline int test4_eq(char *host1, int port1, char *host2, int port2)
+{
+ return !strcasecmp(host1,host2) && port1 == port2;
+}
+
+#define HASH_GIVE_EXTRA_SIZE
+static inline uns test4_extra_size(char *host, int port UNUSED)
+{
+ return strlen(host);
+}
+
+#define HASH_GIVE_INIT_KEY
+static inline void test4_init_key(struct node4 *n, char *host, int port)
+{
+ strcpy(n->host, host);
+ n->port = port;
+}
+
+#include "lib/hashtable.h"
+
+static void test4(void)
+{
+ int i;
+ char x[32];
+ struct node4 *n;
+
+ test4_init();
+ for (i=0; i<1024; i++)
+ if ((i % 3) == 0)
+ {
+ sprintf(x, "abc%d", i);
+ n = test4_new(x, i%10);
+ n->data = i;
+ }
+ for (i=0; i<1024; i++)
+ {
+ sprintf(x, "abc%d", i);
+ n = test4_lookup(x, i%10);
+ n->data = i;
+ }
+ for (i=0; i<1024; i++)
+ if (i % 2)
+ {
+ sprintf(x, "aBc%d", i);
+ if ((i % 7) < 3)
+ {
+ n = test4_find(x, i%10);
+ ASSERT(n);
+ test4_remove(n);
+ }
+ else
+ test4_delete(x, i%10);
+ }
+ for (i=0; i<1024; i++)
+ {
+ sprintf(x, "ABC%d", i);
+ n = test4_find(x, i%10);
+ if (!n != (i&1) || (n && n->data != i))
+ die("Inconsistency at i=%d", i);
+ }
+ test4_cleanup();
+ puts("OK");
+}
+
+/* TEST 5: integers again, but this time dynamically */
+
+struct node5 {
+ int key;
+ int data;
+};
+
+#define HASH_NODE struct node5
+#define HASH_PREFIX(x) test5_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_TABLE_DYNAMIC
+
+struct test5_table;
+
+#define HASH_GIVE_INIT_DATA
+static inline void test5_init_data(struct test5_table *table UNUSED, struct node5 *n)
+{
+ n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_DELETE
+
+#include "lib/hashtable.h"
+
+static void test5(void)
+{
+ int i;
+ struct test5_table tab;
+
+ test5_init(&tab);
+ for (i=0; i<1024; i++)
+ {
+ struct node5 *n = test5_new(&tab, i);
+ ASSERT(n->data == i+123);
+ }
+ for (i=1; i<1024; i+=2)
+ test5_delete(&tab, i);
+ for (i=0; i<1024; i++)
+ {
+ struct node5 *n = test5_find(&tab, i);
+ if (!n != (i&1) || (n && n->data != i+123))
+ die("Inconsistency at i=%d", i);
+ }
+ i=0;
+ HASH_FOR_ALL_DYNAMIC(test5, &tab, n)
+ i += 1 + n->key;
+ HASH_END_FOR;
+ ASSERT(i == 262144);
+ puts("OK");
+}
+
+int
+main(int argc, char **argv)
+{
+ uns m = ~0U;
+ if (argc > 1)
+ {
+ m = 0;
+ for (int i=1; i<argc; i++)
+ m |= 1 << atol(argv[i]);
+ }
+ if (m & (1 << 1))
+ test1();
+ if (m & (1 << 2))
+ test2();
+ if (m & (1 << 3))
+ test3();
+ if (m & (1 << 4))
+ test4();
+ if (m & (1 << 5))
+ test5();
+ return 0;
+}
--- /dev/null
+# Tests for the hash table modules
+
+Run: ../obj/lib/hash-test 1
+Out: OK
+
+Run: ../obj/lib/hash-test 2
+Out: OK
+
+Run: ../obj/lib/hash-test 3
+Out: OK
+
+Run: ../obj/lib/hash-test 4
+Out: OK
--- /dev/null
+/*
+ * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ * str_len() and hash_*() routines
+ *
+ * It is always at least as fast as the classical strlen() routine and for
+ * strings longer than 100 characters, it is substantially faster.
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/hashfunc.h"
+#include "lib/chartype.h"
+
+/* The number of bits the hash in the function hash_*() is rotated by after
+ * every pass. It should be prime with the word size. */
+#define SHIFT_BITS 7
+
+/* A bit-mask which clears higher bytes than a given threshold. */
+static uns mask_higher_bits[sizeof(uns)];
+
+static void CONSTRUCTOR
+hashfunc_init(void)
+{
+ uns i, j;
+ byte *str;
+ for (i=0; i<sizeof(uns); i++)
+ {
+ str = (byte *) (mask_higher_bits + i);
+ for (j=0; j<i; j++)
+ str[j] = -1;
+ for (j=i; j<sizeof(uns); j++)
+ str[j] = 0;
+ }
+}
+
+static inline uns CONST
+str_len_uns(uns x)
+{
+ const uns sub = ~0U / 0xff;
+ const uns and = sub * 0x80;
+ uns a, i;
+ byte *bytes;
+ a = ~x & (x - sub) & and;
+ /*
+ * x_2 = x - 0x01010101;
+ * x_3 = ~x & x_2;
+ * a = x_3 & 0x80808080;
+ *
+ * If all bytes of x are nonzero, then the highest bit of each byte of
+ * x_2 is lower or equal to the corresponding bit of x. Hence x_3 has
+ * all these highest bits cleared (the target bit is set iff the source
+ * bit has changed from 0 to 1). If a == 0, then we are sure there is
+ * no zero byte in x.
+ */
+ if (!a)
+ return sizeof(uns);
+ bytes = (byte *) &x;
+ for (i=0; i<sizeof(uns) && bytes[i]; i++);
+ return i;
+}
+
+inline uns
+str_len_aligned(const char *str)
+{
+ const uns *u = (const uns *) str;
+ uns len = 0;
+ while (1)
+ {
+ uns l = str_len_uns(*u++);
+ len += l;
+ if (l < sizeof(uns))
+ return len;
+ }
+}
+
+inline uns
+hash_string_aligned(const char *str)
+{
+ const uns *u = (const uns *) str;
+ uns hash = 0;
+ while (1)
+ {
+ uns last_len = str_len_uns(*u);
+ hash = ROL(hash, SHIFT_BITS);
+ if (last_len < sizeof(uns))
+ {
+ uns tmp = *u & mask_higher_bits[last_len];
+ hash ^= tmp;
+ return hash;
+ }
+ hash ^= *u++;
+ }
+}
+
+inline uns
+hash_block_aligned(const byte *str, uns len)
+{
+ const uns *u = (const uns *) str;
+ uns hash = 0;
+ while (len >= sizeof(uns))
+ {
+ hash = ROL(hash, SHIFT_BITS) ^ *u++;
+ len -= sizeof(uns);
+ }
+ hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
+ return hash;
+}
+
+#ifndef CPU_ALLOW_UNALIGNED
+uns
+str_len(const char *str)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return str_len_aligned(str);
+ else
+ {
+ uns i;
+ shift = sizeof(uns) - shift;
+ for (i=0; i<shift; i++)
+ if (!str[i])
+ return i;
+ return shift + str_len_aligned(str + shift);
+ }
+}
+
+uns
+hash_string(const char *str)
+{
+ const byte *s = str;
+ uns shift = UNALIGNED_PART(s, uns);
+ if (!shift)
+ return hash_string_aligned(s);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!s[i])
+ break;
+ hash ^= s[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+
+uns
+hash_block(const byte *str, uns len)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return hash_block_aligned(str, len);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (i >= len)
+ break;
+ hash ^= str[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+#endif
+
+uns
+hash_string_nocase(const char *str)
+{
+ const byte *s = str;
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!s[i])
+ break;
+ hash ^= Cupcase(s[i]) << (shift * 8);
+ }
+ return hash;
+}
--- /dev/null
+/*
+ * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ * str_len() and hash_*() routines
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#define _UCW_HASHFUNC_H
+
+#include "lib/lib.h"
+
+/* The following functions need str to be aligned to uns. */
+uns str_len_aligned(const char *str) PURE;
+uns hash_string_aligned(const char *str) PURE;
+uns hash_block_aligned(const byte *str, uns len) PURE;
+
+#ifdef CPU_ALLOW_UNALIGNED
+#define str_len(str) str_len_aligned(str)
+#define hash_string(str) hash_string_aligned(str)
+#define hash_block(str, len) hash_block_aligned(str, len)
+#else
+uns str_len(const char *str) PURE;
+uns hash_string(const char *str) PURE;
+uns hash_block(const byte *str, uns len) PURE;
+#endif
+
+uns hash_string_nocase(const char *str) PURE;
+
+/*
+ * We hash integers by multiplying by a reasonably large prime with
+ * few ones in its binary form (to gave the compiler the possibility
+ * of using shifts and adds on architectures where multiplication
+ * instructions are slow).
+ */
+static inline uns CONST hash_u32(uns x) { return 0x01008041*x; }
+static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); }
+static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); }
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Universal Hash Table
+ *
+ * (c) 2002--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2002--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of hash tables.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates a hash table with the parameters
+ * given.
+ *
+ * You need to specify:
+ *
+ * HASH_NODE data type where a node dwells (usually a struct).
+ * HASH_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the hash table generator).
+ *
+ * Then decide on type of keys:
+ *
+ * HASH_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
+ * a type which can be compared using `==')
+ * HASH_ATOMIC_TYPE (defaults to int).
+ * | HASH_KEY_STRING=f use node->f as a string key, allocated
+ * separately from the rest of the node.
+ * | HASH_KEY_ENDSTRING=f use node->f as a string key, allocated
+ * automatically at the end of the node struct
+ * (to be declared as "char f[1]" at the end).
+ * | HASH_KEY_COMPLEX use a multi-component key; as the name suggests,
+ * the passing of parameters is a bit complex then.
+ * The HASH_KEY_COMPLEX(x) macro should expand to
+ * `x k1, x k2, ... x kn' and you should also define:
+ * HASH_KEY_DECL declaration of function parameters in which key
+ * should be passed to all hash table operations.
+ * That is, `type1 k1, type2 k2, ... typen kn'.
+ * With complex keys, HASH_GIVE_HASHFN and HASH_GIVE_EQ
+ * are mandatory.
+ * | HASH_KEY_MEMORY=f use node->f as a raw data key, compared using
+ * memcmp
+ * HASH_KEY_SIZE the length of the key block
+ *
+ * Then specify what operations you request (all names are automatically
+ * prefixed by calling HASH_PREFIX):
+ *
+ * <always defined> init() -- initialize the hash table.
+ * HASH_WANT_CLEANUP cleanup() -- deallocate the hash table.
+ * HASH_WANT_FIND node *find(key) -- find first node with the specified
+ * key, return NULL if no such node exists.
+ * HASH_WANT_FIND_NEXT node *find(node *start) -- find next node with the
+ * specified key, return NULL if no such node exists.
+ * HASH_WANT_NEW node *new(key) -- create new node with given key.
+ * Doesn't check whether it already exists.
+ * HASH_WANT_LOOKUP node *lookup(key) -- find node with given key,
+ * if it doesn't exist, create it. Defining
+ * HASH_GIVE_INIT_DATA is strongly recommended.
+ * HASH_WANT_DELETE int delete(key) -- delete and deallocate node
+ * with given key. Returns success.
+ * HASH_WANT_REMOVE remove(node *) -- delete and deallocate given node.
+ *
+ * You can also supply several functions:
+ *
+ * HASH_GIVE_HASHFN unsigned int hash(key) -- calculate hash value of key.
+ * We have sensible default hash functions for strings
+ * and integers.
+ * HASH_GIVE_EQ int eq(key1, key2) -- return whether keys are equal.
+ * By default, we use == for atomic types and either
+ * strcmp or strcasecmp for strings.
+ * HASH_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ * node should be allocated for dynamic data. Default=0
+ * or length of the string with HASH_KEY_ENDSTRING.
+ * HASH_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ * created node. Defaults: assignment for atomic keys
+ * and static strings, strcpy for end-allocated strings.
+ * HASH_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
+ * newly created node. Very useful for lookup operations.
+ * HASH_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
+ * a node. Default is xmalloc() or pooled allocation, depending
+ * on HASH_USE_POOL and HASH_AUTO_POOL switches.
+ * void free(void *) -- the converse.
+ *
+ * ... and a couple of extra parameters:
+ *
+ * HASH_NOCASE String comparisons should be case-insensitive.
+ * HASH_DEFAULT_SIZE=n Initially, use hash table of approx. `n' entries.
+ * HASH_CONSERVE_SPACE Use as little space as possible.
+ * HASH_FN_BITS=n The hash function gives only `n' significant bits.
+ * HASH_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ * HASH_USE_POOL=pool Allocate all nodes from given mempool. Note, however, that
+ * deallocation is not supported by mempools, so delete/remove
+ * will leak pool memory.
+ * HASH_AUTO_POOL=size Create a pool of the given block size automatically.
+ * HASH_ZERO_FILL New entries should be initialized to all zeroes.
+ * HASH_TABLE_ALLOC The hash table itself will be allocated and freed using
+ * the same allocation functions as the nodes instead of
+ * the default xmalloc().
+ * HASH_TABLE_DYNAMIC Support multiple hash tables; the first parameter of all
+ * hash table operations is struct HASH_PREFIX(table) *.
+ *
+ * You also get a iterator macro at no extra charge:
+ *
+ * HASH_FOR_ALL(hash_prefix, variable)
+ * {
+ * // node *variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use HASH_BREAK and HASH_CONTINUE instead of break and continue
+ * // you must not alter contents of the hash table here
+ * }
+ * HASH_END_FOR;
+ *
+ * (For dynamic tables, use HASH_FOR_ALL_DYNAMIC(hash_prefix, hash_table, variable) instead.)
+ *
+ * Then include "lib/hashtable.h" and voila, you have a hash table
+ * suiting all your needs (at least those which you've revealed :) ).
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#include "lib/hashfunc.h"
+#endif
+
+#include <string.h>
+
+/* Initial setup of parameters */
+
+#if !defined(HASH_NODE) || !defined(HASH_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#if defined(HASH_KEY_ATOMIC) && !defined(HASH_CONSERVE_SPACE)
+#define HASH_CONSERVE_SPACE
+#endif
+
+#define P(x) HASH_PREFIX(x)
+
+/* Declare buckets and the hash table */
+
+typedef HASH_NODE P(node);
+
+typedef struct P(bucket) {
+ struct P(bucket) *next;
+#ifndef HASH_CONSERVE_SPACE
+ uns hash;
+#endif
+ P(node) n;
+} P(bucket);
+
+struct P(table) {
+ uns hash_size;
+ uns hash_count, hash_max, hash_min, hash_hard_max;
+ P(bucket) **ht;
+#ifdef HASH_AUTO_POOL
+ struct mempool *pool;
+#endif
+};
+
+#ifdef HASH_TABLE_DYNAMIC
+#define T (*table)
+#define TA struct P(table) *table
+#define TAC TA,
+#define TAU TA UNUSED
+#define TAUC TA UNUSED,
+#define TT table
+#define TTC table,
+#else
+struct P(table) P(table);
+#define T P(table)
+#define TA void
+#define TAC
+#define TAU void
+#define TAUC
+#define TT
+#define TTC
+#endif
+
+/* Preset parameters */
+
+#if defined(HASH_KEY_ATOMIC)
+
+#define HASH_KEY(x) x HASH_KEY_ATOMIC
+
+#ifndef HASH_ATOMIC_TYPE
+# define HASH_ATOMIC_TYPE int
+#endif
+#define HASH_KEY_DECL HASH_ATOMIC_TYPE HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+# define HASH_GIVE_HASHFN
+ static inline int P(hash) (TAUC HASH_ATOMIC_TYPE x)
+ { return ((sizeof(x) <= 4) ? hash_u32(x) : hash_u64(x)); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC HASH_ATOMIC_TYPE x, HASH_ATOMIC_TYPE y)
+ { return x == y; }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, HASH_ATOMIC_TYPE k)
+ { HASH_KEY(n->) = k; }
+#endif
+
+#elif defined(HASH_KEY_MEMORY)
+
+#define HASH_KEY(x) x HASH_KEY_MEMORY
+
+#define HASH_KEY_DECL byte HASH_KEY( )[HASH_KEY_SIZE]
+
+#ifndef HASH_GIVE_HASHFN
+# define HASH_GIVE_HASHFN
+ static inline int P(hash) (TAUC byte *x)
+ { return hash_block(x, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC byte *x, byte *y)
+ { return !memcmp(x, y, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, byte *k)
+ { memcpy(HASH_KEY(n->), k, HASH_KEY_SIZE); }
+#endif
+
+#elif defined(HASH_KEY_STRING) || defined(HASH_KEY_ENDSTRING)
+
+#ifdef HASH_KEY_STRING
+# define HASH_KEY(x) x HASH_KEY_STRING
+# ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, char *k)
+ { HASH_KEY(n->) = k; }
+# endif
+#else
+# define HASH_KEY(x) x HASH_KEY_ENDSTRING
+# define HASH_GIVE_EXTRA_SIZE
+ static inline int P(extra_size) (TAUC char *k)
+ { return strlen(k); }
+# ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, char *k)
+ { strcpy(HASH_KEY(n->), k); }
+# endif
+#endif
+#define HASH_KEY_DECL char *HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+#define HASH_GIVE_HASHFN
+ static inline uns P(hash) (TAUC char *k)
+ {
+# ifdef HASH_NOCASE
+ return hash_string_nocase(k);
+# else
+ return hash_string(k);
+# endif
+ }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC char *x, char *y)
+ {
+# ifdef HASH_NOCASE
+ return !strcasecmp(x,y);
+# else
+ return !strcmp(x,y);
+# endif
+ }
+#endif
+
+#elif defined(HASH_KEY_COMPLEX)
+
+#define HASH_KEY(x) HASH_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the hash key type.
+#endif
+
+/* Defaults for missing parameters */
+
+#ifndef HASH_GIVE_HASHFN
+#error Unable to determine which hash function to use.
+#endif
+
+#ifndef HASH_GIVE_EQ
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef HASH_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+#define HASH_EXTRA_SIZE(x) P(extra_size)(TTC x)
+#else
+/*
+ * Beware, C macros are expanded iteratively, not recursively,
+ * hence we get only a _single_ argument, although the expansion
+ * of HASH_KEY contains commas.
+ */
+#define HASH_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+#error Unable to determine how to initialize keys.
+#endif
+
+#ifndef HASH_GIVE_INIT_DATA
+static inline void P(init_data) (TAUC P(node) *n UNUSED)
+{
+}
+#endif
+
+#ifdef HASH_GIVE_ALLOC
+/* If the caller has requested to use his own allocation functions, do so */
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_USE_POOL)
+/* If the caller has requested to use his mempool, do so */
+#include "lib/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(HASH_USE_POOL, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_AUTO_POOL)
+/* Use our own pools */
+#include "lib/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(T.pool, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { T.pool = mp_new(HASH_AUTO_POOL); }
+static inline void P(cleanup_alloc) (TAU) { mp_delete(T.pool); }
+#define HASH_USE_POOL
+
+#else
+/* The default allocation method */
+static inline void * P(alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(free) (TAUC void *x) { xfree(x); }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#endif
+
+#ifdef HASH_TABLE_ALLOC
+static inline void * P(table_alloc) (TAUC unsigned int size) { return P(alloc)(TTC size); }
+static inline void P(table_free) (TAUC void *x) { P(free)(TTC x); }
+#else
+static inline void * P(table_alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(table_free) (TAUC void *x) { xfree(x); }
+#endif
+
+#ifndef HASH_DEFAULT_SIZE
+#define HASH_DEFAULT_SIZE 32
+#endif
+
+#ifndef HASH_FN_BITS
+#define HASH_FN_BITS 32
+#endif
+
+#ifdef HASH_ZERO_FILL
+static inline void * P(new_bucket)(TAUC uns size)
+{
+ byte *buck = P(alloc)(TTC size);
+ bzero(buck, size);
+ return buck;
+}
+#else
+static inline void * P(new_bucket)(TAUC uns size) { return P(alloc)(TTC size); }
+#endif
+
+/* Now the operations */
+
+static void P(alloc_table) (TAU)
+{
+ T.hash_size = next_table_prime(T.hash_size);
+ T.ht = P(table_alloc)(TTC sizeof(void *) * T.hash_size);
+ bzero(T.ht, sizeof(void *) * T.hash_size);
+ if (2*T.hash_size < T.hash_hard_max)
+ T.hash_max = 2*T.hash_size;
+ else
+ T.hash_max = ~0U;
+ if (T.hash_size/2 > HASH_DEFAULT_SIZE)
+ T.hash_min = T.hash_size/4;
+ else
+ T.hash_min = 0;
+}
+
+static void P(init) (TA)
+{
+ T.hash_count = 0;
+ T.hash_size = HASH_DEFAULT_SIZE;
+#if HASH_FN_BITS < 28
+ T.hash_hard_max = 1 << HASH_FN_BITS;
+#else
+ T.hash_hard_max = 1 << 28;
+#endif
+ P(init_alloc)(TT);
+ P(alloc_table)(TT);
+}
+
+#ifdef HASH_WANT_CLEANUP
+static void P(cleanup) (TA)
+{
+#ifndef HASH_USE_POOL
+ uns i;
+ P(bucket) *b, *bb;
+
+ for (i=0; i<T.hash_size; i++)
+ for (b=T.ht[i]; b; b=bb)
+ {
+ bb = b->next;
+ P(free)(TTC b);
+ }
+#endif
+ P(cleanup_alloc)(TT);
+ P(table_free)(TTC T.ht);
+}
+#endif
+
+static inline uns P(bucket_hash) (TAUC P(bucket) *b)
+{
+#ifdef HASH_CONSERVE_SPACE
+ return P(hash)(TTC HASH_KEY(b->n.));
+#else
+ return b->hash;
+#endif
+}
+
+static void P(rehash) (TAC uns size)
+{
+ P(bucket) *b, *nb;
+ P(bucket) **oldt = T.ht, **newt;
+ uns oldsize = T.hash_size;
+ uns i, h;
+
+ DBG("Rehashing %d->%d at count %d", oldsize, size, T.hash_count);
+ T.hash_size = size;
+ P(alloc_table)(TT);
+ newt = T.ht;
+ for (i=0; i<oldsize; i++)
+ {
+ b = oldt[i];
+ while (b)
+ {
+ nb = b->next;
+ h = P(bucket_hash)(TTC b) % T.hash_size;
+ b->next = newt[h];
+ newt[h] = b;
+ b = nb;
+ }
+ }
+ P(table_free)(TTC oldt);
+}
+
+#ifdef HASH_WANT_FIND
+static P(node) * P(find) (TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b;
+
+ for (b=T.ht[h]; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+ return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_FIND_NEXT
+static P(node) * P(find_next) (TAC P(node) *start)
+{
+#ifndef HASH_CONSERVE_SPACE
+ uns h0 = P(hash) (TTC HASH_KEY(start->));
+#endif
+ P(bucket) *b = SKIP_BACK(P(bucket), n, start);
+
+ for (b=b->next; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY(start->), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+ return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_NEW
+static P(node) * P(new) (TAC HASH_KEY_DECL)
+{
+ uns h0, h;
+ P(bucket) *b;
+
+ h0 = P(hash) (TTC HASH_KEY( ));
+ h = h0 % T.hash_size;
+ b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+ b->next = T.ht[h];
+ T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+ b->hash = h0;
+#endif
+ P(init_key)(TTC &b->n, HASH_KEY( ));
+ P(init_data)(TTC &b->n);
+ if (T.hash_count++ >= T.hash_max)
+ P(rehash)(TTC 2*T.hash_size);
+ return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_LOOKUP
+static P(node) * P(lookup) (TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b;
+
+ for (b=T.ht[h]; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+
+ b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+ b->next = T.ht[h];
+ T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+ b->hash = h0;
+#endif
+ P(init_key)(TTC &b->n, HASH_KEY( ));
+ P(init_data)(TTC &b->n);
+ if (T.hash_count++ >= T.hash_max)
+ P(rehash)(TTC 2*T.hash_size);
+ return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_DELETE
+static int P(delete) (TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b, **bb;
+
+ for (bb=&T.ht[h]; b=*bb; bb=&b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ {
+ *bb = b->next;
+ P(free)(TTC b);
+ if (--T.hash_count < T.hash_min)
+ P(rehash)(TTC T.hash_size/2);
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+#ifdef HASH_WANT_REMOVE
+static void P(remove) (TAC P(node) *n)
+{
+ P(bucket) *x = SKIP_BACK(struct P(bucket), n, n);
+ uns h0 = P(bucket_hash)(TTC x);
+ uns h = h0 % T.hash_size;
+ P(bucket) *b, **bb;
+
+ for (bb=&T.ht[h]; (b=*bb) && b != x; bb=&b->next)
+ ;
+ ASSERT(b);
+ *bb = b->next;
+ P(free)(TTC b);
+ if (--T.hash_count < T.hash_min)
+ P(rehash)(TTC T.hash_size/2);
+}
+#endif
+
+/* And the iterator */
+
+#ifndef HASH_FOR_ALL
+
+#define HASH_FOR_ALL_DYNAMIC(h_px, h_table, h_var) \
+do { \
+ uns h_slot; \
+ struct GLUE_(h_px,bucket) *h_buck; \
+ for (h_slot=0; h_slot < (h_table)->hash_size; h_slot++) \
+ for (h_buck = (h_table)->ht[h_slot]; h_buck; h_buck = h_buck->next) \
+ { \
+ GLUE_(h_px,node) *h_var = &h_buck->n;
+#define HASH_FOR_ALL(h_px, h_var) HASH_FOR_ALL_DYNAMIC(h_px, &GLUE_(h_px,table), h_var)
+#define HASH_END_FOR } } while(0)
+#define HASH_BREAK
+#define HASH_CONTINUE continue
+
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+#undef TA
+#undef TAC
+#undef TAU
+#undef TAUC
+#undef TT
+#undef TTC
+
+#undef HASH_ATOMIC_TYPE
+#undef HASH_CONSERVE_SPACE
+#undef HASH_DEFAULT_SIZE
+#undef HASH_EXTRA_SIZE
+#undef HASH_FN_BITS
+#undef HASH_GIVE_ALLOC
+#undef HASH_GIVE_EQ
+#undef HASH_GIVE_EXTRA_SIZE
+#undef HASH_GIVE_HASHFN
+#undef HASH_GIVE_INIT_DATA
+#undef HASH_GIVE_INIT_KEY
+#undef HASH_KEY
+#undef HASH_KEY_ATOMIC
+#undef HASH_KEY_COMPLEX
+#undef HASH_KEY_DECL
+#undef HASH_KEY_ENDSTRING
+#undef HASH_KEY_STRING
+#undef HASH_KEY_MEMORY
+#undef HASH_KEY_SIZE
+#undef HASH_NOCASE
+#undef HASH_NODE
+#undef HASH_PREFIX
+#undef HASH_USE_POOL
+#undef HASH_AUTO_POOL
+#undef HASH_WANT_CLEANUP
+#undef HASH_WANT_DELETE
+#undef HASH_WANT_FIND
+#undef HASH_WANT_FIND_NEXT
+#undef HASH_WANT_LOOKUP
+#undef HASH_WANT_NEW
+#undef HASH_WANT_REMOVE
+#undef HASH_TABLE_ALLOC
+#undef HASH_TABLE_DYNAMIC
+#undef HASH_ZERO_FILL
--- /dev/null
+/*
+ * UCW Library -- Universal Heap Macros
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ for (;;) \
+ { \
+ _l = 2*_j; \
+ if (_l > num) \
+ break; \
+ if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \
+ break; \
+ if (_l != num && less(heap[_l+1],heap[_l])) \
+ _l++; \
+ swap(heap,_j,_l,x); \
+ _j = _l; \
+ }
+
+#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ while (_j > 1) \
+ { \
+ _u = _j/2; \
+ if (less(heap[_u], heap[_j])) \
+ break; \
+ swap(heap,_u,_j,x); \
+ _j = _u; \
+ }
+
+#define HEAP_INIT(type,heap,num,less,swap) \
+ do { \
+ uns _i = num; \
+ uns _j, _l; \
+ type x; \
+ while (_i >= 1) \
+ { \
+ _j = _i; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ _i--; \
+ } \
+ } while(0)
+
+#define HEAP_DELMIN(type,heap,num,less,swap) \
+ do { \
+ uns _j, _l; \
+ type x; \
+ swap(heap,1,num,x); \
+ num--; \
+ _j = 1; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+#define HEAP_INSERT(type,heap,num,less,swap) \
+ do { \
+ uns _j, _u; \
+ type x; \
+ _j = num; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+#define HEAP_INCREASE(type,heap,num,less,swap,pos) \
+ do { \
+ uns _j, _l; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+#define HEAP_DELETE(type,heap,num,less,swap,pos) \
+ do { \
+ uns _j, _l, _u; \
+ type x; \
+ _j = pos; \
+ swap(heap,_j,num,x); \
+ num--; \
+ if (less(heap[_j], heap[num+1])) \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ else \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/* Default swapping macro */
+#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
--- /dev/null
+/*
+ * UCW Library -- IP address access lists
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/clists.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include "lib/ipaccess.h"
+
+#include <string.h>
+
+struct ipaccess_entry {
+ cnode n;
+ int allow;
+ struct ip_addrmask addr;
+};
+
+static char *
+addrmask_parser(char *c, void *ptr)
+{
+ /*
+ * This is tricky: addrmasks will be compared by memcmp(), so we must ensure
+ * that even the padding between structure members is zeroed out.
+ */
+ struct ip_addrmask *am = ptr;
+ bzero(am, sizeof(*am));
+
+ char *p = strchr(c, '/');
+ if (p)
+ *p++ = 0;
+ char *err = cf_parse_ip(c, &am->addr);
+ if (err)
+ return err;
+ if (p)
+ {
+ uns len;
+ if (!cf_parse_int(p, &len) && len <= 32)
+ am->mask = ~(len == 32 ? 0 : ~0U >> len);
+ else if (cf_parse_ip(p, &am->mask))
+ return "Invalid prefix length or netmask";
+ }
+ else
+ am->mask = ~0U;
+ return NULL;
+}
+
+static void
+addrmask_dumper(struct fastbuf *fb, void *ptr)
+{
+ struct ip_addrmask *am = ptr;
+ bprintf(fb, "%08x/%08x ", am->addr, am->mask);
+}
+
+struct cf_user_type ip_addrmask_type = {
+ .size = sizeof(struct ip_addrmask),
+ .name = "ip_addrmask",
+ .parser = addrmask_parser,
+ .dumper = addrmask_dumper
+};
+
+struct cf_section ipaccess_cf = {
+ CF_TYPE(struct ipaccess_entry),
+ CF_ITEMS {
+ CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((char*[]) { "deny", "allow", NULL })),
+ CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type),
+ CF_END
+ }
+};
+
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip)
+{
+ return !((ip ^ am->addr) & am->mask);
+}
+
+int
+ipaccess_check(clist *l, u32 ip)
+{
+ CLIST_FOR_EACH(struct ipaccess_entry *, a, *l)
+ if (ip_addrmask_match(&a->addr, ip))
+ return a->allow;
+ return 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+static clist t;
+
+static struct cf_section test_cf = {
+ CF_ITEMS {
+ CF_LIST("A", &t, &ipaccess_cf),
+ CF_END
+ }
+};
+
+int main(int argc, char **argv)
+{
+ cf_declare_section("T", &test_cf, 0);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+ die("Invalid arguments");
+
+ byte buf[256];
+ while (fgets(buf, sizeof(buf), stdin))
+ {
+ char *c = strchr(buf, '\n');
+ if (c)
+ *c = 0;
+ u32 ip;
+ if (cf_parse_ip(buf, &ip))
+ puts("Invalid IP address");
+ else if (ipaccess_check(&t, ip))
+ puts("Allowed");
+ else
+ puts("Denied");
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- IP address access lists
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_IPACCESS_H
+#define _UCW_IPACCESS_H
+
+#include "lib/clists.h"
+
+extern struct cf_section ipaccess_cf;
+int ipaccess_check(clist *l, u32 ip);
+
+/* Low-level handling of addresses and masks */
+
+struct ip_addrmask {
+ u32 addr;
+ u32 mask;
+};
+
+extern struct cf_user_type ip_addrmask_type;
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip);
+
+#endif
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given. See lib/kmp.h before reading this description.
+ *
+ * This file defines:
+ *
+ * struct search structure with both the internal and the user-defined variables
+ * used during the search and accessible from all macros
+ *
+ * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output)
+ *
+ * void run(kmp,src) the same, but automatically allocates search structre from the stack
+ *
+ *
+ * Parameters to the generator (these marked with [*] are mandatory):
+ *
+ * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP search generator)
+ * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h
+ *
+ * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR);
+ * if unset, the one from lib/kmp.h is taken
+ * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c
+ *
+ * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string
+ * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one
+ *
+ * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions)
+ *
+ * KMPS_INIT(kmp,src,search) statement executed at the beginning of search()
+ * KMPS_EXIT(kmp,src,search) ... at the end
+ * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update)
+ * of the algorithm, but before KMPS_FOUND[_CHAIN]
+ * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match
+ * (stored in search->out - NOT necessary search->s!);
+ * all matches form a NULL-terminated link list (search->out, search->out->next, ...)
+ * in order of decreasing length
+ * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out)
+ * KMPS_WANT_BEST algorithm computes globally longest match, which is available
+ * in search->best in KMPS_EXIT; if there is no match, it points to the null state
+ */
+
+#define P(x) KMPS_PREFIX(x)
+#define KP(x) KMPS_KMP_PREFIX(x)
+
+#ifdef KMPS_SOURCE
+typedef KMPS_SOURCE P(search_source_t);
+#else
+typedef KP(source_t) P(search_source_t);
+#endif
+
+#ifndef KMPS_GET_CHAR
+#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c))
+#endif
+
+struct P(search) {
+ struct KP(state) *s; /* current state */
+ struct KP(state) *out; /* output state */
+# ifdef KMPS_WANT_BEST
+ struct KP(state) *best; /* longest match */
+# endif
+ KP(char_t) c; /* last character */
+# ifdef KMPS_ADD_CONTROLS
+ uns eof;
+# endif
+# ifdef KMPS_VARS
+ struct {
+ KMPS_VARS
+ } u; /* user-defined */
+# endif
+};
+
+static void
+P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src)
+{
+ s->s = &kmp->null;
+# ifdef KMPS_WANT_BEST
+ s->best = &kmp->null;
+# endif
+# ifdef KMPS_ADD_CONTROLS
+ s->c = KP(control)();
+ s->eof = 0;
+# else
+ s->c = 0;
+# endif
+# ifdef KMPS_INIT
+ { KMPS_INIT(kmp, src, s); }
+# endif
+# ifndef KMPS_ADD_CONTROLS
+ goto start_read;
+# endif
+ for (;;)
+ {
+ for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back);
+ s->s = s->s ? : &kmp->null;
+
+# ifdef KMPS_STEP
+ { KMPS_STEP(kmp, src, s); }
+# endif
+
+# if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
+ s->out = s->s->len ? s->s : s->s->next;
+ if (s->out)
+ {
+# ifdef KMPS_WANT_BEST
+ if (s->out->len > s->best->len)
+ s->best = s->out;
+# endif
+# ifdef KMPS_FOUND_CHAIN
+ { KMPS_FOUND_CHAIN(kmp, src, s); }
+# endif
+# ifdef KMPS_FOUND
+ do
+ { KMPS_FOUND(kmp, src, s); }
+ while (s->out = s->out->next);
+# endif
+ }
+# endif
+
+# ifdef KMPS_ADD_CONTROLS
+ if (s->eof)
+ break;
+# endif
+
+# ifndef KMPS_ADD_CONTROLS
+start_read: ;
+# endif
+# ifdef KMPS_MERGE_CONTROLS
+ KP(char_t) last_c = s->c;
+# endif
+
+ do
+ {
+ if (!KMPS_GET_CHAR(kmp, src, s))
+ {
+# ifdef KMPS_ADD_CONTROLS
+ if (!KP(is_control)(kmp, s->c))
+ {
+ s->c = KP(control)();
+ s->eof = 1;
+ break;
+ }
+# endif
+ goto exit;
+ }
+ }
+ while (0
+# ifdef KMPS_MERGE_CONTROLS
+ || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c))
+# endif
+ );
+ }
+exit: ;
+# ifdef KMPS_EXIT
+ { KMPS_EXIT(kmp, src, s); }
+# endif
+}
+
+static inline void
+P(run) (struct KP(struct) *kmp, P(search_source_t) src)
+{
+ struct P(search) search;
+ P(search)(kmp, &search, src);
+}
+
+#undef P
+#undef KMPS_PREFIX
+#undef KMPS_KMP_PREFIX
+#undef KMPS_SOURCE
+#undef KMPS_GET_CHAR
+#undef KMPS_ADD_CONTROLS
+#undef KMPS_MERGE_CONTROLS
+#undef KMPS_VARS
+#undef KMPS_INIT
+#undef KMPS_EXIT
+#undef KMPS_FOUND
+#undef KMPS_FOUND_CHAIN
+#undef KMPS_WANT_BEST
+#undef KMPS_STEP
--- /dev/null
+/*
+ * Test of KMP search
+ *
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include <string.h>
+
+#if 0
+#define TRACE(x...) do{log(L_DEBUG, x);}while(0)
+#else
+#define TRACE(x...) do{}while(0)
+#endif
+
+/* TEST1 - multiple searches */
+
+#define KMP_PREFIX(x) kmp1_##x
+#define KMP_WANT_CLEANUP
+#include "lib/kmp.h"
+#define KMPS_PREFIX(x) kmp1s1_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_WANT_BEST
+#define KMPS_EXIT(kmp,src,s) TRACE("Best match has %d characters", s->best->len)
+#include "lib/kmp-search.h"
+#define KMPS_PREFIX(x) kmp1s2_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_VARS uns count;
+#define KMPS_INIT(kmp,src,s) s->u.count = 0
+#define KMPS_FOUND(kmp,src,s) s->u.count++
+#include "lib/kmp-search.h"
+
+static void
+test1(void)
+{
+ TRACE("Running test1");
+ struct kmp1_struct kmp;
+ kmp1_init(&kmp);
+ kmp1_add(&kmp, "ahoj");
+ kmp1_add(&kmp, "hoj");
+ kmp1_add(&kmp, "aho");
+ kmp1_build(&kmp);
+ struct kmp1s1_search s1;
+ kmp1s1_search(&kmp, &s1, "asjlahslhalahosjkjhojsas");
+ ASSERT(s1.best->len == 3);
+ struct kmp1s2_search s2;
+ kmp1s2_search(&kmp, &s2, "asjlahslhalahojsjkjhojsas");
+ ASSERT(s2.u.count == 4);
+ kmp1_cleanup(&kmp);
+}
+
+/* TEST2 - various tracing */
+
+#define KMP_PREFIX(x) kmp2_##x
+#define KMP_USE_UTF8
+#define KMP_TOLOWER
+#define KMP_ONLYALPHA
+#define KMP_STATE_VARS char *str; uns id;
+#define KMP_ADD_EXTRA_ARGS uns id
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \
+ s->u.str = kmp->u.start; s->u.id = id; }while(0)
+#define KMP_ADD_DUP(kmp,src,s) TRACE("String %s already inserted", kmp->u.start)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#define KMPS_FOUND(kmp,src,s) TRACE("String %s with id %d found", s->out->u.str, s->out->u.id)
+#define KMPS_STEP(kmp,src,s) TRACE("Got to state %p after reading %d", s->s, s->c)
+#include "lib/kmp.h"
+
+static void
+test2(void)
+{
+ TRACE("Running test2");
+ struct kmp2_struct kmp;
+ kmp2_init(&kmp);
+ kmp2_add(&kmp, "ahoj", 1);
+ kmp2_add(&kmp, "ahoj", 2);
+ kmp2_add(&kmp, "hoj", 3);
+ kmp2_add(&kmp, "aho", 4);
+ kmp2_add(&kmp, "aba", 5);
+ kmp2_add(&kmp, "aba", 5);
+ kmp2_add(&kmp, "pěl", 5);
+ kmp2_build(&kmp);
+ kmp2_run(&kmp, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla");
+ kmp2_cleanup(&kmp);
+}
+
+/* TEST3 - random tests */
+
+#define KMP_PREFIX(x) kmp3_##x
+#define KMP_STATE_VARS uns index;
+#define KMP_ADD_EXTRA_ARGS uns index
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) s->u.index = index
+#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_VARS uns sum, *cnt;
+#define KMPS_FOUND(kmp,src,s) do{ ASSERT(s->u.cnt[s->out->u.index]); s->u.cnt[s->out->u.index]--; s->u.sum--; }while(0)
+#include "lib/kmp.h"
+
+static void
+test3(void)
+{
+ TRACE("Running test3");
+ struct mempool *pool = mp_new(1024);
+ for (uns testn = 0; testn < 100; testn++)
+ {
+ mp_flush(pool);
+ uns n = random_max(100);
+ char *s[n];
+ struct kmp3_struct kmp;
+ kmp3_init(&kmp);
+ for (uns i = 0; i < n; i++)
+ {
+ uns m = random_max(10);
+ s[i] = mp_alloc(pool, m + 1);
+ for (uns j = 0; j < m; j++)
+ s[i][j] = 'a' + random_max(3);
+ s[i][m] = 0;
+ kmp3_add(&kmp, s[i], i);
+ }
+ kmp3_build(&kmp);
+ for (uns i = 0; i < 10; i++)
+ {
+ uns m = random_max(100);
+ byte b[m + 1];
+ for (uns j = 0; j < m; j++)
+ b[j] = 'a' + random_max(4);
+ b[m] = 0;
+ uns cnt[n];
+ struct kmp3_search search;
+ search.u.sum = 0;
+ search.u.cnt = cnt;
+ for (uns j = 0; j < n; j++)
+ {
+ cnt[j] = 0;
+ if (*s[j])
+ for (uns k = 0; k < m; k++)
+ if (!strncmp(b + k, s[j], strlen(s[j])))
+ cnt[j]++, search.u.sum++;
+ }
+ kmp3_search(&kmp, &search, b);
+ ASSERT(search.u.sum == 0);
+ }
+ kmp3_cleanup(&kmp);
+ }
+ mp_delete(pool);
+}
+
+/* TEST4 - user-defined character type */
+
+struct kmp4_struct;
+struct kmp4_state;
+
+static inline int
+kmp4_eq(struct kmp4_struct *kmp UNUSED, byte *a, byte *b)
+{
+ return (a == b) || (a && b && *a == *b);
+}
+
+static inline uns
+kmp4_hash(struct kmp4_struct *kmp UNUSED, struct kmp4_state *s, byte *c)
+{
+ return (c ? (*c << 16) : 0) + (uns)(uintptr_t)s;
+}
+
+#define KMP_PREFIX(x) kmp4_##x
+#define KMP_CHAR byte *
+#define KMP_CONTROL_CHAR NULL
+#define KMP_GET_CHAR(kmp,src,c) ({ c = src++; !!*c; })
+#define KMP_GIVE_HASHFN
+#define KMP_GIVE_EQ
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_FOUND(kmp,src,s) TRACE("found")
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#include "lib/kmp.h"
+
+static void
+test4(void)
+{
+ TRACE("Running test4");
+ struct kmp4_struct kmp;
+ kmp4_init(&kmp);
+ kmp4_add(&kmp, "ahoj");
+ kmp4_build(&kmp);
+ kmp4_run(&kmp, "djdhaskjdahoahaahojojshdaksjahdahojskj");
+ kmp4_cleanup(&kmp);
+}
+
+int
+main(void)
+{
+ test1();
+ test2();
+ test3();
+ test4();
+ return 0;
+}
--- /dev/null
+# Tests for the kmp module
+
+Run: ../obj/lib/kmp-test
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given.
+ *
+ * This file contains only construction of the automaton. The search
+ * itself can be generated by inclusion of file lib/kmp-search.h.
+ * Separeted headers allow the user to define multiple search
+ * routines for one common set of key strings.
+ *
+ * Example:
+ *
+ * #define KMP_PREFIX(x) kmp_##x
+ * #define KMP_WANT_CLEANUP
+ * #define KMP_WANT_SEARCH // includes lib/kmp-search.h automatically
+ * #define KMPS_FOUND(kmp,src,s) printf("found\n")
+ * #include "lib/kmp.h"
+ *
+ * [...]
+ *
+ * struct kmp_struct kmp; // a structure describing the whole automaton
+ * kmp_init(&kmp); // initialization (must be called before all other functions)
+ *
+ * // add key strings we want to search
+ * kmp_add(&kmp, "aaa");
+ * kmp_add(&kmp, "abc");
+ *
+ * // complete the automaton, no more strings can be added later
+ * kmp_build(&kmp);
+ *
+ * // example of search, should print single "found" to stdout
+ * kmp_run(&kmp, "aabaabca");
+ *
+ * // destroy all internal structures
+ * kmp_cleanup(&kmp);
+ *
+ *
+ * Brief description of all parameters:
+ *
+ * Basic parameters:
+ * KMP_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP generator); mandatory;
+ * we abbreviate this to P(x) below
+ *
+ * KMP_CHAR alphabet type, the default is u16
+ *
+ * KMP_SOURCE user-defined text source; KMP_GET_CHAR must
+ * KMP_GET_CHAR(kmp,src,c) return zero at the end or nonzero together with the next character in c otherwise;
+ * if not defined, zero-terminated array of bytes is used as the input
+ *
+ * KMP_VARS user-defined variables in 'struct P(struct)'
+ * -- a structure describing the whole automaton;
+ * these variables are stored in .u substructure to avoid collisions
+ * KMP_STATE_VARS user-defined variables in 'struct P(state)'
+ * -- created for each state of the automaton;
+ * these variables are stored in .u substructure to avoid collisions
+ *
+ * Parameters which select how the input is interpreted (if KMP_SOURCE is unset):
+ * KMP_USE_ASCII reads single bytes from the input (default)
+ * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed)
+ * KMP_TOLOWER converts all to lowercase
+ * KMP_UNACCENT removes accents
+ * KMP_ONLYALPHA converts non-alphas to KMP_CONTROL_CHAR (see below)
+ *
+ * Parameters controlling add(kmp, src):
+ * KMP_ADD_EXTRA_ARGS extra arguments, should be used carefully because of possible collisions
+ * KMP_ADD_INIT(kmp,src) called in the beginning of add(), src is the first
+ * KMP_INIT_STATE(kmp,s) initialization of a new state s (called before KMP_ADD_{NEW,DUP});
+ * null state is not included and should be handled after init() if necessary;
+ * all user-defined data are filled by zeros before call to KMP_INIT_STATE
+ * KMP_ADD_NEW(kmp,src,s) initialize last state of every new key string (called after KMP_INIT_STATE);
+ * the string must be parsed before so src is after the last string's character
+ * KMP_ADD_DUP(kmp,src,s) analogy of KMP_ADD_NEW called for duplicates
+ *
+ * Parameters to build():
+ * KMP_BUILD_STATE(kmp,s) called for all states (including null) in order of non-decreasing tree depth
+ *
+ * Other parameters:
+ * KMP_WANT_CLEANUP define cleanup()
+ * KMP_WANT_SEARCH includes lib/kmp-search.h with the same prefix;
+ * there can be multiple search variants for a single KMP automaton
+ * KMP_USE_POOL allocates in a given pool
+ * KMP_CONTROL_CHAR special control character (default is ':')
+ * KMP_GIVE_ALLOC if set, you must supply custom allocation functions:
+ * void *alloc(unsigned int size) -- allocate space for
+ * a state. Default is pooled allocation from a local pool or HASH_USE_POOL.
+ * void free(void *) -- the converse.
+ * KMP_GIVE_HASHFN if set, you must supply custom hash function:
+ * unsigned int hash(struct P(struct) *kmp, struct P(state) *state, KMP_CHAR c);
+ * default hash function works only for integer character types
+ * KMP_GIVE_EQ if set, you must supply custom compare function of two characters:
+ * int eq(struct P(struct) *kmp, KMP_CHAR a, KMP_CHAR b);
+ * default is 'a == b'
+ */
+
+#ifndef KMP_PREFIX
+#error Missing KMP_PREFIX
+#endif
+
+#include "lib/mempool.h"
+#include <alloca.h>
+#include <string.h>
+
+#define P(x) KMP_PREFIX(x)
+
+#ifdef KMP_CHAR
+typedef KMP_CHAR P(char_t);
+#else
+typedef u16 P(char_t);
+#endif
+
+typedef u32 P(len_t);
+
+#ifdef KMP_NODE
+typedef KMP_NODE P(node_t);
+#else
+typedef struct {} P(node_t);
+#endif
+
+struct P(struct);
+
+struct P(state) {
+ struct P(state) *from; /* state with the previous character (forms a tree with null state in the root) */
+ struct P(state) *back; /* backwards edge to the longest shorter state with same suffix */
+ struct P(state) *next; /* the longest of shorter matches (or NULL) */
+ P(len_t) len; /* state depth if it represents a key string, zero otherwise */
+ P(char_t) c; /* last character of the represented string */
+ struct {
+# ifdef KMP_STATE_VARS
+ KMP_STATE_VARS
+# endif
+ } u; /* user-defined data*/
+};
+
+/* Control char */
+static inline P(char_t)
+P(control) (void)
+{
+# ifdef KMP_CONTROL_CHAR
+ return KMP_CONTROL_CHAR;
+# else
+ return ':';
+# endif
+}
+
+/* User-defined source */
+struct P(hash_table);
+
+#define HASH_GIVE_HASHFN
+#ifdef KMP_GIVE_HASHFN
+static inline uns
+P(hash_hash) (struct P(hash_table) *t, struct P(state) *f, P(char_t) c)
+{
+ return P(hash) ((struct P(struct) *) t, f, c);
+}
+#else
+static inline uns
+P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
+{
+ return (((uns)c) << 16) + (uns)(uintptr_t)f;
+}
+#endif
+
+#ifndef KMP_GIVE_EQ
+static inline int
+P(eq) (struct P(struct) *kmp UNUSED, P(char_t) c1, P(char_t) c2)
+{
+ return c1 == c2;
+}
+#endif
+
+static inline int
+P(is_control) (struct P(struct) *kmp, P(char_t) c)
+{
+ return P(eq) (kmp, c, P(control)());
+}
+
+#define HASH_GIVE_EQ
+static inline int
+P(hash_eq) (struct P(hash_table) *t, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
+{
+ return f1 == f2 && P(eq)((struct P(struct) *) t, c1, c2);
+}
+
+#ifdef KMP_GIVE_ALLOC
+#define HASH_GIVE_ALLOC
+static inline void *
+P(hash_alloc) (struct P(hash_table) *t, uns size)
+{
+ return P(alloc) ((struct P(struct) *) t, size);
+}
+
+static inline void
+P(hash_free) (struct P(hash_table) *t, void *ptr)
+{
+ P(free) ((struct P(struct) *) t, ptr);
+}
+#endif
+
+#define HASH_GIVE_INIT_KEY
+static inline void
+P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
+{
+ bzero(s, sizeof(*s));
+# ifdef KMP_INIT_STATE
+ struct P(struct) *kmp = (struct P(struct) *)t;
+ { KMP_INIT_STATE(kmp, s); }
+# endif
+ s->from = f;
+ s->c = c;
+ s->next = f->back; /* the pointers hold the link-list of sons... changed in build() */
+ f->back = s;
+}
+
+#undef P
+#define HASH_PREFIX(x) KMP_PREFIX(hash_##x)
+#define HASH_NODE struct KMP_PREFIX(state)
+#define HASH_KEY_COMPLEX(x) x from, x c
+#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
+#define HASH_WANT_NEW
+#define HASH_WANT_FIND
+#ifdef KMP_WANT_CLEANUP
+#define HASH_WANT_CLEANUP
+#endif
+#if defined(KMP_USE_POOL)
+#define HASH_USE_POOL KMP_USE_POOL
+#else
+#define HASH_AUTO_POOL 4096
+#endif
+#define HASH_CONSERVE_SPACE
+#define HASH_TABLE_DYNAMIC
+#include "lib/hashtable.h"
+#define P(x) KMP_PREFIX(x)
+
+struct P(struct) {
+ struct P(hash_table) hash; /* hash table of state transitions */
+ struct P(state) null; /* null state */
+ struct {
+# ifdef KMP_VARS
+ KMP_VARS
+# endif
+ } u; /* user-defined data */
+};
+
+#ifdef KMP_SOURCE
+typedef KMP_SOURCE P(source_t);
+#else
+typedef char *P(source_t);
+#endif
+
+#ifdef KMP_GET_CHAR
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src UNUSED, P(char_t) *c UNUSED)
+{
+ return KMP_GET_CHAR(kmp, (*src), (*c));
+}
+#else
+# if defined(KMP_USE_UTF8)
+# include "lib/unicode.h"
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
+# include "charset/unicat.h"
+# endif
+# elif defined(KMP_USE_ASCII)
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
+# include "lib/chartype.h"
+# endif
+# endif
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src, P(char_t) *c)
+{
+# ifdef KMP_USE_UTF8
+ uns cc;
+ *src = utf8_get(*src, &cc);
+# ifdef KMP_ONLYALPHA
+ if (!cc) {}
+ else if (!Ualpha(cc))
+ cc = P(control)();
+ else
+# endif
+ {
+# ifdef KMP_TOLOWER
+ cc = Utolower(cc);
+# endif
+# ifdef KMP_UNACCENT
+ cc = Uunaccent(cc);
+# endif
+ }
+# else
+ uns cc = *(*src)++;
+# ifdef KMP_ONLYALPHA
+ if (!cc) {}
+ else if (!Calpha(cc))
+ cc = P(control)();
+ else
+# endif
+# ifdef KMP_TOLOWER
+ cc = Clocase(cc);
+# endif
+# ifdef KMP_UNACCENT
+# error Do not know how to unaccent ASCII characters
+# endif
+# endif
+ *c = cc;
+ return !!cc;
+}
+#endif
+
+static struct P(state) *
+P(add) (struct P(struct) *kmp, P(source_t) src
+# ifdef KMP_ADD_EXTRA_ARGS
+ , KMP_ADD_EXTRA_ARGS
+# endif
+)
+{
+# ifdef KMP_ADD_INIT
+ { KMP_ADD_INIT(kmp, src); }
+# endif
+
+ P(char_t) c;
+ if (!P(get_char)(kmp, &src, &c))
+ return NULL;
+ struct P(state) *p = &kmp->null, *s;
+ uns len = 0;
+ do
+ {
+ s = P(hash_find)(&kmp->hash, p, c);
+ if (!s)
+ for (;;)
+ {
+ s = P(hash_new)(&kmp->hash, p, c);
+ len++;
+ if (!(P(get_char)(kmp, &src, &c)))
+ goto enter_new;
+ p = s;
+ }
+ p = s;
+ len++;
+ }
+ while (P(get_char)(kmp, &src, &c));
+ if (s->len)
+ {
+# ifdef KMP_ADD_DUP
+ { KMP_ADD_DUP(kmp, src, s); }
+# endif
+ return s;
+ }
+enter_new:
+ s->len = len;
+# ifdef KMP_ADD_NEW
+ { KMP_ADD_NEW(kmp, src, s); }
+# endif
+ return s;
+}
+
+static void
+P(init) (struct P(struct) *kmp)
+{
+ bzero(&kmp->null, sizeof(struct P(state)));
+ P(hash_init)(&kmp->hash);
+}
+
+#ifdef KMP_WANT_CLEANUP
+static inline void
+P(cleanup) (struct P(struct) *kmp)
+{
+ P(hash_cleanup)(&kmp->hash);
+}
+#endif
+
+static inline int
+P(empty) (struct P(struct) *kmp)
+{
+ return !kmp->hash.hash_count;
+}
+
+static inline struct P(state) *
+P(chain_start) (struct P(state) *s)
+{
+ return s->len ? s : s->next;
+}
+
+static void
+P(build) (struct P(struct) *kmp)
+{
+ if (P(empty)(kmp))
+ return;
+ uns read = 0, write = 0;
+ struct P(state) *fifo[kmp->hash.hash_count], *null = &kmp->null;
+ for (struct P(state) *s = null->back; s; s = s->next)
+ fifo[write++] = s;
+ null->back = NULL;
+# ifdef KMP_BUILD_STATE
+ { KMP_BUILD_STATE(kmp, null); }
+# endif
+ while (read != write)
+ {
+ struct P(state) *s = fifo[read++], *t;
+ for (t = s->back; t; t = t->next)
+ fifo[write++] = t;
+ for (t = s->from->back; 1; t = t->back)
+ {
+ if (!t)
+ {
+ s->back = null;
+ s->next = NULL;
+ break;
+ }
+ s->back = P(hash_find)(&kmp->hash, t, s->c);
+ if (s->back)
+ {
+ s->next = s->back->len ? s->back : s->back->next;
+ break;
+ }
+ }
+# ifdef KMP_BUILD_STATE
+ { KMP_BUILD_STATE(kmp, s); }
+# endif
+ }
+}
+
+#undef P
+#undef KMP_CHAR
+#undef KMP_SOURCE
+#undef KMP_GET_CHAR
+#undef KMP_VARS
+#undef KMP_STATE_VARS
+#undef KMP_CONTEXT
+#undef KMP_USE_ASCII
+#undef KMP_USE_UTF8
+#undef KMP_TOLOWER
+#undef KMP_UNACCENT
+#undef KMP_ONLYALPHA
+#undef KMP_CONTROL_CHAR
+#undef KMP_ADD_EXTRA_ARGS
+#undef KMP_ADD_INIT
+#undef KMP_ADD_NEW
+#undef KMP_ADD_DUP
+#undef KMP_INIT_STATE
+#undef KMP_BUILD_STATE
+#undef KMP_USE_POOL
+#undef KMP_GIVE_ALLOC
+#undef KMP_GIVE_HASHFN
+#undef KMP_GIVE_EQ
+
+#ifdef KMP_WANT_SEARCH
+# undef KMP_WANT_SEARCH
+# define KMPS_PREFIX(x) KMP_PREFIX(x)
+# define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
+# include "lib/kmp-search.h"
+#endif
+
+#undef KMP_PREFIX
--- /dev/null
+/* Test of large files */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define BLOCK (1<<10)
+#define COUNT (5<<20)
+#define TESTS (1<<20)
+
+int main(void)
+{
+ struct fastbuf *b;
+ byte block[BLOCK];
+ uns i;
+
+ srand(time(NULL));
+#if 0
+ b = bopen("/big/robert/large-file", O_CREAT | O_TRUNC | O_RDWR, 1<<20);
+ if (!b)
+ die("Cannot create large-file");
+
+ log(L_DEBUG, "Writing %d blocks of size %d", COUNT, BLOCK);
+ for (i=0; i<COUNT; i++)
+ {
+ memset(block, i & 0xff, BLOCK);
+ bwrite(b, block, BLOCK);
+ if ( i%1024 == 0 )
+ {
+ printf("\r%10d", i);
+ fflush(stdout);
+ }
+ }
+#else
+ b = bopen("/big/robert/large-file", O_RDWR, 1<<20);
+ if (!b)
+ die("Cannot create large-file");
+#endif
+ log(L_DEBUG, "Checking the file contents in %d tests", TESTS);
+ for (i=0; i<TESTS; i++)
+ {
+ uns idx = random()%COUNT;
+ sh_off_t ofs = idx*BLOCK;
+ bseek(b, ofs, SEEK_SET);
+ bread(b, block, BLOCK);
+ if (block[17] != (idx & 0xff))
+ die("Invalid block %d in test %d: %x != %x", idx, i, block[17], idx & 0xff);
+ if ( i%16 == 0 )
+ {
+ printf("\r%10d", i);
+ fflush(stdout);
+ }
+ }
+ log(L_DEBUG, "Done");
+
+ bclose(b);
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Large File Support
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LFS_H
+#define _UCW_LFS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef CONFIG_LFS
+
+#define sh_open open64
+#define sh_seek lseek64
+#define sh_pread pread64
+#define sh_pwrite pwrite64
+#define sh_ftruncate ftruncate64
+#define sh_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o)
+#define sh_pread pread64
+#define sh_pwrite pwrite64
+#define sh_stat stat64
+#define sh_fstat fstat64
+typedef struct stat64 sh_stat_t;
+
+#else /* !CONFIG_LFS */
+
+#define sh_open open
+#define sh_seek(f,o,w) lseek(f,o,w)
+#define sh_ftruncate(f,o) ftruncate(f,o)
+#define sh_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o)
+#define sh_pread pread
+#define sh_pwrite pwrite
+#define sh_stat stat
+#define sh_fstat fstat
+typedef struct stat sh_stat_t;
+
+#endif /* !CONFIG_LFS */
+
+#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0)
+#define sh_fdatasync fdatasync
+#else
+#define sh_fdatasync fsync
+#endif
+
+#define HAVE_PREAD
+
+static inline sh_off_t
+sh_file_size(const char *name)
+{
+ int fd = sh_open(name, O_RDONLY);
+ if (fd < 0)
+ die("Cannot open %s: %m", name);
+ sh_off_t len = sh_seek(fd, 0, SEEK_END);
+ close(fd);
+ return len;
+}
+
+#endif /* !_UCW_LFS_H */
--- /dev/null
+/*
+ * The UCW Library -- Miscellaneous Functions
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIB_H
+#define _UCW_LIB_H
+
+#include "lib/config.h"
+#include <stdarg.h>
+
+/* Macros for handling structurues, offsets and alignment */
+
+#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x))
+#define PTR_TO(s, i) &((s*)0)->i
+#define OFFSETOF(s, i) ((unsigned int) PTR_TO(s, i))
+#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i)))
+#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1))
+#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p))
+#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type))
+
+/* Some other macros */
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
+#define STRINGIFY(x) #x
+#define STRINGIFY_EXPANDED(x) STRINGIFY(x)
+#define GLUE(x,y) x##y
+#define GLUE_(x,y) x##_##y
+
+#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0)
+#define REV_COMPARE(x,y) COMPARE(y,x)
+#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0)
+#define COMPARE_GT(x,y) COMPARE_LT(y,x)
+
+#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) /* Bitwise rotation of an uns to the left */
+
+/* GCC Extensions */
+
+#ifdef __GNUC__
+
+#undef inline
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#define CONSTRUCTOR __attribute__((constructor))
+#define PACKED __attribute__((packed))
+#define CONST __attribute__((const))
+#define PURE __attribute__((pure))
+#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z)))
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#else
+#define ALWAYS_INLINE inline
+#endif
+
+#if __GNUC__ >= 4
+#define LIKE_MALLOC __attribute__((malloc))
+#define SENTINEL_CHECK __attribute__((sentinel))
+#else
+#define LIKE_MALLOC
+#define SENTINEL_CHECK
+#endif
+
+#else
+#error This program requires the GNU C compiler.
+#endif
+
+/* Logging */
+
+#define L_DEBUG 'D' /* Debugging messages */
+#define L_INFO 'I' /* Informational msgs, warnings and errors */
+#define L_WARN 'W'
+#define L_ERROR 'E'
+#define L_INFO_R 'i' /* Errors caused by external events */
+#define L_WARN_R 'w'
+#define L_ERROR_R 'e'
+#define L_FATAL '!' /* die() */
+
+extern char *log_title; /* NULL - print no title, default is log_progname */
+extern char *log_filename; /* Expanded name of the current log file */
+extern volatile int log_switch_nest; /* log_switch() nesting counter, increment to disable automatic switches */
+extern int log_pid; /* 0 if shouldn't be logged */
+extern int log_precise_timings; /* Include microsecond timestamps in log messages */
+extern void (*log_die_hook)(void);
+struct tm;
+extern void (*log_switch_hook)(struct tm *tm);
+
+void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3);
+void vmsg(uns cat, const char *fmt, va_list args);
+void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2);
+void log_init(const char *argv0);
+void log_file(const char *name);
+void log_fork(void);
+int log_switch(void);
+
+void assert_failed(const char *assertion, const char *file, int line) NONRET;
+void assert_failed_noinfo(void) NONRET;
+
+#ifdef DEBUG_ASSERTS
+#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; })
+#else
+#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; })
+#endif
+
+#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1]
+
+#ifdef LOCAL_DEBUG
+#define DBG(x,y...) msg(L_DEBUG, x,##y)
+#else
+#define DBG(x,y...) do { } while(0)
+#endif
+
+static inline void log_switch_disable(void) { log_switch_nest++; }
+static inline void log_switch_enable(void) { ASSERT(log_switch_nest); log_switch_nest--; }
+
+/* Memory allocation */
+
+#define xmalloc sh_xmalloc
+#define xrealloc sh_xrealloc
+#define xfree sh_xfree
+
+#ifdef DEBUG_DMALLOC
+/*
+ * The standard dmalloc macros tend to produce lots of namespace
+ * conflicts and we use only xmalloc and xfree, so we can define
+ * the stubs ourselves.
+ */
+#define DMALLOC_DISABLE
+#include <dmalloc.h>
+#define sh_xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size)
+#define sh_xrealloc(ptr,size) _xrealloc_leap(__FILE__, __LINE__, ptr, size)
+#define sh_xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr)
+#else
+/*
+ * Unfortunately, several libraries we might want to link to define
+ * their own xmalloc and we don't want to interfere with them, hence
+ * the renaming.
+ */
+void *xmalloc(uns) LIKE_MALLOC;
+void *xrealloc(void *, uns);
+void xfree(void *);
+#endif
+
+void *xmalloc_zero(uns) LIKE_MALLOC;
+char *xstrdup(const char *) LIKE_MALLOC;
+
+/* Content-Type pattern matching and filters */
+
+int match_ct_patt(const char *, const char *);
+
+/* wordsplit.c */
+
+int sepsplit(char *str, uns sep, char **rec, uns max);
+int wordsplit(char *str, char **rec, uns max);
+
+/* pat(i)match.c: Matching of shell patterns */
+
+int match_pattern(const char *patt, const char *str);
+int match_pattern_nocase(const char *patt, const char *str);
+
+/* md5hex.c */
+
+void md5_to_hex(const byte *s, char *d);
+void hex_to_md5(const char *s, byte *d);
+
+#define MD5_SIZE 16
+#define MD5_HEX_SIZE 33
+
+/* prime.c */
+
+int isprime(uns x);
+uns nextprime(uns x);
+
+/* primetable.c */
+
+uns next_table_prime(uns x);
+uns prev_table_prime(uns x);
+
+/* timer.c */
+
+timestamp_t get_timestamp(void);
+
+void init_timer(timestamp_t *timer);
+uns get_timer(timestamp_t *timer);
+uns switch_timer(timestamp_t *old, timestamp_t *new);
+
+/* regex.c */
+
+typedef struct regex regex;
+
+regex *rx_compile(const char *r, int icase);
+void rx_free(regex *r);
+int rx_match(regex *r, const char *s);
+int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen);
+
+/* random.c */
+
+uns random_u32(void);
+uns random_max(uns max);
+u64 random_u64(void);
+u64 random_max_u64(u64 max);
+
+/* mmap.c */
+
+void *mmap_file(const char *name, unsigned *len, int writeable);
+void munmap_file(void *start, unsigned len);
+
+/* proctitle.c */
+
+void setproctitle_init(int argc, char **argv);
+void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2);
+char *getproctitle(void);
+
+/* randomkey.c */
+
+void randomkey(byte *buf, uns size);
+
+/* exitstatus.c */
+
+#define EXIT_STATUS_MSG_SIZE 32
+int format_exit_status(char *msg, int stat);
+
+/* runcmd.c */
+
+int run_command(const char *cmd, ...);
+void NONRET exec_command(const char *cmd, ...);
+void echo_command(char *buf, int size, const char *cmd, ...);
+int run_command_v(const char *cmd, va_list args);
+void NONRET exec_command_v(const char *cmd, va_list args);
+void echo_command_v(char *buf, int size, const char *cmd, va_list args);
+
+/* carefulio.c */
+
+int careful_read(int fd, void *buf, int len);
+int careful_write(int fd, const void *buf, int len);
+
+/* sync.c */
+
+void sync_dir(const char *name);
+
+/* sighandler.c */
+
+typedef int (*sh_sighandler_t)(int); // gets signum, returns nonzero if abort() should be called
+
+void handle_signal(int signum);
+void unhandle_signal(int signum);
+sh_sighandler_t set_signal_handler(int signum, sh_sighandler_t new);
+
+/* string.c */
+
+char *str_unesc(char *dest, const char *src);
+char *str_format_flags(char *dest, const char *fmt, uns flags);
+
+/* bigalloc.c */
+
+void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
+void *page_alloc_zero(u64 len) LIKE_MALLOC;
+void page_free(void *start, u64 len);
+void *page_realloc(void *start, u64 old_len, u64 new_len);
+
+void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
+void *big_alloc_zero(u64 len) LIKE_MALLOC;
+void big_free(void *start, u64 len);
+
+#endif
--- /dev/null
+# pkg-config metadata for libucw
+
+libdir=@LIBDIR@
+incdir=.
+
+#ifdef CONFIG_UCW_THREADS
+threads=-lpthread
+#else
+threads=
+#endif
+
+Name: libucw
+Description: A library of utility functions and data structures
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lucw ${threads}
--- /dev/null
+/*
+ * UCW Library -- Linked Lists
+ *
+ * (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#define _UCW_LISTS_C
+#include "lib/lists.h"
+
+LIST_INLINE void
+add_tail(list *l, node *n)
+{
+ node *z = l->tail;
+
+ n->next = (node *) &l->null;
+ n->prev = z;
+ z->next = n;
+ l->tail = n;
+}
+
+LIST_INLINE void
+add_head(list *l, node *n)
+{
+ node *z = l->head;
+
+ n->next = z;
+ n->prev = (node *) &l->head;
+ z->prev = n;
+ l->head = n;
+}
+
+LIST_INLINE void
+insert_node(node *n, node *after)
+{
+ node *z = after->next;
+
+ n->next = z;
+ n->prev = after;
+ after->next = n;
+ z->prev = n;
+}
+
+LIST_INLINE void
+rem_node(node *n)
+{
+ node *z = n->prev;
+ node *x = n->next;
+
+ z->next = x;
+ x->prev = z;
+}
+
+LIST_INLINE void
+init_list(list *l)
+{
+ l->head = (node *) &l->null;
+ l->null = NULL;
+ l->tail = (node *) &l->head;
+}
+
+LIST_INLINE void
+add_tail_list(list *to, list *l)
+{
+ node *p = to->tail;
+ node *q = l->head;
+
+ p->next = q;
+ q->prev = p;
+ q = l->tail;
+ q->next = (node *) &to->null;
+ to->tail = q;
+}
--- /dev/null
+/*
+ * UCW Library -- Linked Lists
+ *
+ * (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LISTS_H
+#define _UCW_LISTS_H
+
+/*
+ * I admit the list structure is very tricky and also somewhat awkward,
+ * but it's both efficient and easy to manipulate once one understands the
+ * basic trick: The list head always contains two synthetic nodes which are
+ * always present in the list: the head and the tail. But as the `next'
+ * entry of the tail and the `prev' entry of the head are both NULL, the
+ * nodes can overlap each other:
+ *
+ * head head_node.next
+ * null head_node.prev tail_node.next
+ * tail tail_node.prev
+ */
+
+typedef struct node {
+ struct node *next, *prev;
+} node;
+
+typedef struct list { /* In fact two overlayed nodes */
+ struct node *head, *null, *tail;
+} list;
+
+#define NODE (node *)
+#define HEAD(list) ((void *)((list).head))
+#define TAIL(list) ((void *)((list).tail))
+#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \
+ n=(void *)((NODE (n))->next))
+#define DO_FOR_ALL(n,list) WALK_LIST(n,list)
+#define WALK_LIST_DELSAFE(n,nxt,list) \
+ for(n=HEAD(list); nxt=(void *)((NODE (n))->next); n=(void *) nxt)
+#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \
+ n=(void *)((NODE (n))->prev))
+#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \
+ for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv)
+
+#define EMPTY_LIST(list) (!(list).head->next)
+
+void add_tail(list *, node *);
+void add_head(list *, node *);
+void rem_node(node *);
+void add_tail_list(list *, list *);
+void init_list(list *);
+void insert_node(node *, node *);
+
+#if !defined(_UCW_LISTS_C) && defined(__GNUC__)
+#define LIST_INLINE extern inline
+#include "lib/lists.c"
+#undef LIST_INLINE
+#else
+#define LIST_INLINE
+#endif
+
+#endif
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/lizard.h"
+
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <errno.h>
+
+struct lizard_buffer {
+ uns len;
+ void *ptr;
+};
+
+struct lizard_buffer *
+lizard_alloc(void)
+{
+ struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
+ buf->len = 0;
+ buf->ptr = NULL;
+ handle_signal(SIGSEGV);
+ return buf;
+}
+
+void
+lizard_free(struct lizard_buffer *buf)
+{
+ unhandle_signal(SIGSEGV);
+ if (buf->ptr)
+ munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+ xfree(buf);
+}
+
+static void
+lizard_realloc(struct lizard_buffer *buf, uns max_len)
+ /* max_len needs to be aligned to CPU_PAGE_SIZE */
+{
+ if (max_len <= buf->len)
+ return;
+ if (max_len < 2*buf->len) // to ensure logarithmic cost
+ max_len = 2*buf->len;
+
+ if (buf->ptr)
+ munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+ buf->len = max_len;
+ buf->ptr = mmap(NULL, buf->len + CPU_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (buf->ptr == MAP_FAILED)
+ die("mmap(anonymous, %d bytes): %m", (uns)(buf->len + CPU_PAGE_SIZE));
+ if (mprotect(buf->ptr + buf->len, CPU_PAGE_SIZE, PROT_NONE) < 0)
+ die("mprotect: %m");
+}
+
+static jmp_buf safe_decompress_jump;
+static int
+sigsegv_handler(int signal UNUSED)
+{
+ longjmp(safe_decompress_jump, 1);
+ return 1;
+}
+
+byte *
+lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length)
+ /* Decompresses in into buf, sets *ptr to the data, and returns the
+ * uncompressed length. If an error has occured, -1 is returned and errno is
+ * set. The buffer buf is automatically reallocated. SIGSEGV is caught in
+ * case of buffer-overflow. The function is not re-entrant because of a
+ * static longjmp handler. */
+{
+ uns lock_offset = ALIGN_TO(expected_length + 3, CPU_PAGE_SIZE); // +3 due to the unaligned access
+ if (lock_offset > buf->len)
+ lizard_realloc(buf, lock_offset);
+ volatile sh_sighandler_t old_handler = set_signal_handler(SIGSEGV, sigsegv_handler);
+ byte *ptr;
+ if (!setjmp(safe_decompress_jump))
+ {
+ ptr = buf->ptr + buf->len - lock_offset;
+ int len = lizard_decompress(in, ptr);
+ if (len != (int) expected_length)
+ {
+ ptr = NULL;
+ errno = EINVAL;
+ }
+ }
+ else
+ {
+ msg(L_ERROR, "SIGSEGV caught in lizard_decompress()");
+ ptr = NULL;
+ errno = EFAULT;
+ }
+ set_signal_handler(SIGSEGV, old_handler);
+ return ptr;
+}
--- /dev/null
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/lizard.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static char *options = CF_SHORT_OPTS "cdtx";
+static char *help = "\
+Usage: lizard-test <options> input-file [output-file]\n\
+\n\
+Options:\n"
+CF_USAGE
+"-c\t\tCompress\n\
+-d\t\tDecompress\n\
+-t\t\tCompress, decompress, and compare (in memory only, default)\n\
+-x\t\tLet the test crash by shrinking the output buffer\n\
+";
+
+static void NONRET
+usage(void)
+{
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int opt;
+ uns action = 't';
+ uns crash = 0;
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'c':
+ case 'd':
+ case 't':
+ action = opt;
+ break;
+ case 'x':
+ crash++;
+ break;
+ default:
+ usage();
+ }
+ if (action == 't' && argc != optind+1
+ || action != 't' && argc != optind+2)
+ usage();
+
+ void *mi, *mo;
+ int li, lo;
+ uns adler = 0;
+
+ struct stat st;
+ stat(argv[optind], &st);
+ li = st.st_size;
+ struct fastbuf *fi = bopen(argv[optind], O_RDONLY, 1<<16);
+ if (action != 'd')
+ {
+ lo = li * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD;
+ li += LIZARD_NEEDS_CHARS;
+ }
+ else
+ {
+ lo = bgetl(fi);
+ adler = bgetl(fi);
+ li -= 8;
+ }
+ mi = xmalloc(li);
+ mo = xmalloc(lo);
+ li = bread(fi, mi, li);
+ bclose(fi);
+
+ printf("%d ", li);
+ if (action == 'd')
+ printf("->expected %d (%08x) ", lo, adler);
+ fflush(stdout);
+ if (action != 'd')
+ lo = lizard_compress(mi, li, mo);
+ else
+ {
+ lo = lizard_decompress(mi, mo);
+ if (adler32(mo, lo) != adler)
+ printf("wrong Adler32 ");
+ }
+ printf("-> %d ", lo);
+ fflush(stdout);
+
+ if (action != 't')
+ {
+ struct fastbuf *fo = bopen(argv[optind+1], O_CREAT | O_TRUNC | O_WRONLY, 1<<16);
+ if (action == 'c')
+ {
+ bputl(fo, li);
+ bputl(fo, adler32(mi, li));
+ }
+ bwrite(fo, mo, lo);
+ bclose(fo);
+ }
+ else
+ {
+ int smaller_li;
+ if (li >= (int) CPU_PAGE_SIZE)
+ smaller_li = li - CPU_PAGE_SIZE;
+ else
+ smaller_li = 0;
+ struct lizard_buffer *buf = lizard_alloc();
+ byte *ptr = lizard_decompress_safe(mo, buf, crash ? smaller_li : li);
+ if (!ptr)
+ printf("err: %m");
+ else if (memcmp(mi, ptr, li))
+ printf("WRONG");
+ else
+ printf("OK");
+ lizard_free(buf);
+ }
+ printf("\n");
+}
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ * The file format is based on LZO1X and
+ * the compression method is based on zlib.
+ */
+
+#include "lib/lib.h"
+#include "lib/lizard.h"
+
+#include <string.h>
+
+typedef u16 hash_ptr_t;
+struct hash_record {
+ /* the position in the original text is implicit; it is computed by locate_string() */
+ hash_ptr_t next; // 0=end
+ hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
+};
+
+#define HASH_SIZE (1<<14) // size of hash-table
+#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
+#define CHAIN_MAX_TESTS 8 // crop longer collision chains
+#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
+
+static inline uns
+hashf(const byte *string)
+ /* 0..HASH_SIZE-1 */
+{
+ return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
+}
+
+static inline byte *
+locate_string(const byte *string, int record_id, int head)
+ /* The strings are recorded into the hash-table regularly, hence there is no
+ * need to store the pointer there. */
+{
+ string += record_id - head;
+ if (record_id >= head)
+ string -= HASH_RECORDS-1;
+ return (byte *)string;
+}
+
+static inline uns
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
+ /* hash_tab[hash] == record_id points to the head of the double-linked
+ * link-list of strings with the same hash. The records are statically
+ * stored in circular array hash_rec (with the 1st entry unused), and the
+ * pointers are just 16-bit indices. The strings in every collision chain
+ * are ordered by age. */
+{
+ uns count = CHAIN_MAX_TESTS;
+ uns best_len = 0;
+ while (record_id && count-- > 0)
+ {
+ byte *record_string = locate_string(string, record_id, head);
+ byte *cmp = record_string;
+ if (cmp[0] == string[0] && cmp[2] == string[2])
+ /* implies cmp[1] == string[1] */
+ {
+ if (cmp[3] == string[3])
+ {
+ cmp += 4;
+ if (*cmp++ == string[4] && *cmp++ == string[5]
+ && *cmp++ == string[6] && *cmp++ == string[7])
+ {
+ const byte *str = string + 8;
+ while (str <= string_end && *cmp++ == *str++);
+ }
+ }
+ else
+ cmp += 4;
+ uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
+ if (len > best_len)
+ {
+ best_len = len;
+ *best_ptr = record_string;
+ if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
+ break;
+ }
+ }
+ record_id = hash_rec[record_id].next;
+ }
+ return best_len;
+}
+
+static uns
+hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
+ /* We reuse hash-records stored in a circular array. First, delete the old
+ * one and then add the new one in front of the link-list. */
+{
+ struct hash_record *rec = hash_rec + head;
+ if (*to_delete) /* unlink the original record */
+ {
+ uns prev_id = rec->prev & ((1<<15)-1);
+ if (rec->prev & (1<<15)) /* was a head */
+ hash_tab[prev_id] = 0;
+ else /* thanks to the ordering, this was a tail */
+ hash_rec[prev_id].next = 0;
+ }
+ rec->next = hash_tab[hash];
+ rec->prev = (1<<15) | hash;
+ hash_rec[rec->next].prev = head;
+ hash_tab[hash] = head; /* add the new record before the link-list */
+
+ if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
+ {
+ head = 1;
+ *to_delete = 1;
+ }
+ return head;
+}
+
+static inline byte *
+dump_unary_value(byte *out, uns l)
+{
+ while (l > 255)
+ {
+ l -= 255;
+ *out++ = 0;
+ }
+ *out++ = l;
+ return out;
+}
+
+static byte *
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
+{
+ if (bof && len <= 238)
+ *out++ = len + 17;
+ else if (len < 4)
+ {
+ /* cannot happen when !!bof */
+ out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) start;
+ return out + len;
+#else
+ while (len-- > 0)
+ *out++ = *start++;
+ return out;
+#endif
+ }
+ else
+ {
+ /* leave 2 least significant bits of out[-2] set to 0 */
+ if (len <= 18)
+ *out++ = len - 3;
+ else
+ {
+ *out++ = 0;
+ out = dump_unary_value(out, len - 18);
+ }
+ }
+ memcpy(out, start, len);
+ return out + len;
+}
+
+int
+lizard_compress(const byte *in, uns in_len, byte *out)
+ /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+ * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
+ * allocated after in. Returns the actual compressed length. */
+{
+ hash_ptr_t hash_tab[HASH_SIZE];
+ struct hash_record hash_rec[HASH_RECORDS];
+ const byte *in_end = in + in_len;
+ byte *out_start = out;
+ const byte *copy_start = in;
+ uns head = 1; /* 0 in unused */
+ uns to_delete = 0, bof = 1;
+ bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
+ while (in < in_end)
+ {
+ uns hash = hashf(in);
+ byte *best = NULL;
+ uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
+ if (len < 3)
+#if 0 // TODO: now, our routine does not detect matches of length 2
+ if (len == 2 && (in - best->string - 1) < (1<<10))
+ { /* pass-thru */ }
+ else
+#endif
+ {
+literal:
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+ in++; /* add a literal */
+ continue;
+ }
+
+ if (in + len > in_end) /* crop EOF */
+ {
+ len = in_end - in;
+ if (len < 3)
+ goto literal;
+ }
+ /* Record the match. */
+ uns copy_len = in - copy_start;
+ uns is_in_copy_mode = bof || copy_len >= 4;
+ uns shift = in - best - 1;
+ /* Try to use a 2-byte sequence. */
+#if 0
+ if (len == 2)
+ {
+ if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
+ goto literal;
+ else
+ goto dump_2sequence;
+ } else
+#endif
+ /* now, len >= 3 */
+ if (shift < (1<<11) && len <= 8)
+ {
+ shift |= (len-3 + 2)<<11;
+dump_2sequence:
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
+ *out++ = shift & 0xff;
+ }
+ else if (len == 3 && is_in_copy_mode)
+ {
+ if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
+ {
+ shift -= 1<<11;
+ goto dump_2sequence; /* shift has 11 bits and contains also len */
+ }
+ else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+ goto literal;
+ }
+ /* We have to use a 3-byte sequence. */
+ else
+ {
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ if (shift < (1<<14))
+ {
+ if (len <= 33)
+ *out++ = (1<<5) | (len-2);
+ else
+ {
+ *out++ = 1<<5;
+ out = dump_unary_value(out, len - 33);
+ }
+ }
+ else /* shift < (1<<15)-1 becase of HASH_RECORDS */
+ {
+ shift++; /* because shift==0 is reserved for EOF */
+ byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
+ if (len <= 9)
+ *out++ = pos_bit | (len-2);
+ else
+ {
+ *out++ = pos_bit;
+ out = dump_unary_value(out, len - 9);
+ }
+ }
+ *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
+ *out++ = shift & 0xff;
+ }
+ /* Update the hash-table. */
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+ for (uns i=1; i<len; i++)
+ head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
+ in += len;
+ copy_start = in;
+ bof = 0;
+ }
+ uns copy_len = in - copy_start;
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ *out++ = 17; /* add EOF */
+ *out++ = 0;
+ *out++ = 0;
+ return out - out_start;
+}
+
+static inline byte *
+read_unary_value(const byte *in, uns *val)
+{
+ uns l = 0;
+ while (!*in++)
+ l += 255;
+ l += in[-1];
+ *val = l;
+ return (byte *)in;
+}
+
+int
+lizard_decompress(const byte *in, byte *out)
+ /* Requires out being allocated for the decompressed length must be known
+ * beforehand. It is desirable to lock the following memory page for
+ * read-only access to prevent buffer overflow. Returns the actual
+ * decompressed length or a negative number when an error has occured. */
+{
+ byte *out_start = out;
+ uns expect_copy_command = 1;
+ uns len;
+ if (*in > 17) /* short copy command at BOF */
+ {
+ len = *in++ - 17;
+ goto perform_copy_command;
+ }
+ while (1)
+ {
+ uns c = *in++;
+ uns pos;
+ if (c < 0x10)
+ if (expect_copy_command == 1)
+ {
+ if (!c)
+ {
+ in = read_unary_value(in, &len);
+ len += 18;
+ }
+ else
+ len = c + 3;
+ goto perform_copy_command;
+ }
+ else
+ {
+ pos = ((c&0xc)<<6) | *in++;
+ if (expect_copy_command == 2)
+ {
+ pos += 1<<11;
+ len = 3;
+ }
+ else
+ len = 2;
+ pos++;
+ }
+ else if (c < 0x20)
+ {
+ pos = (c&0x8)<<11;
+ len = c&0x7;
+ if (!len)
+ {
+ in = read_unary_value(in, &len);
+ len += 9;
+ }
+ else
+ len += 2;
+ pos |= (*in++ & 0xfc)<<6;
+ pos |= *in++;
+ if (!pos) /* EOF */
+ break;
+ /* do NOT pos++ */
+ }
+ else if (c < 0x40)
+ {
+ len = c&0x1f;
+ if (!len)
+ {
+ in = read_unary_value(in, &len);
+ len += 33;
+ }
+ else
+ len += 2;
+ pos = (*in++ & 0xfc)<<6;
+ pos |= *in++;
+ pos++;
+ }
+ else /* high bits encode the length */
+ {
+ len = ((c&0xe0)>>5) -2 +3;
+ pos = (c&0x1c)<<6;
+ pos |= *in++;
+ pos++;
+ }
+ /* take from the sliding window */
+ if (len <= pos)
+ {
+ memcpy(out, out-pos, len);
+ out += len;
+ }
+ else
+ { /* overlapping */
+ for (; len-- > 0; out++)
+ *out = *(out-pos);
+ /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
+ }
+ /* extract the copy-bits */
+ len = in[-2] & 0x3;
+ if (len)
+ {
+ expect_copy_command = 0;
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) in;
+ out += len;
+ in += len;
+#else
+ while (len-- > 0)
+ *out++ = *in++;
+#endif
+ }
+ else
+ expect_copy_command = 1;
+ continue;
+
+perform_copy_command:
+ expect_copy_command = 2;
+ memcpy(out, in, len);
+ out += len;
+ in += len;
+ }
+
+ return out - out_start;
+}
+
+/*
+
+Description of the LZO1X format :
+=================================
+
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
+Beginning of file:
+------------------
+
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
+
+Compressed mode:
+----------------
+
+Read the first byte of the sequence and determine the type of bit encoding by
+looking at the most significant bits. The sequence is always at least 2 bytes
+long. Decode sequences of these types until the EOF or END marker is read.
+
+ length L = length of the text taken from the sliding window
+
+ If L=0, then count the number Z of the following zero bytes and add Z*255
+ to the value of the following non-zero byte. This allows setting L
+ arbitrarily high.
+
+ position p = relative position of the beginning of the text
+
+ Exception: 00010001 00000000 00000000 means EOF
+
+ copying C = length 1..3 of copied characters or END=0
+
+ C following characters will be copied from the compressed text to the
+ output. The number CC is always stored in the 2 least significant bits of
+ the second last byte of the sequence.
+
+ If END is read, the algorithm switches to the copy mode.
+
+pattern length position
+
+0000ppCC pppppppp 2 10 bits [default interpretation]
+0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
+0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
+001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
+LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
+
+Copy mode:
+----------
+
+Read the first byte and, if the most significant bits are 0000, perform the
+following command, otherwise switch to the compressed mode (and evaluate the
+command there).
+
+pattern length position
+
+0000LLLL L* 4..18 + extend N/A
+
+ Copy L characters from the compressed text to the output. The overhead for
+ incompressible strings is only roughly 1/256 + epsilon.
+
+*/
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIZARD_H
+#define _UCW_LIZARD_H
+
+#define LIZARD_NEEDS_CHARS 8
+ /* The compression routine needs input buffer 8 characters longer, because it
+ * does not check the input bounds all the time. */
+#define LIZARD_MAX_MULTIPLY 23./22
+#define LIZARD_MAX_ADD 4
+ /* In the worst case, the compressed file will not be longer than its
+ * original length * 23/22 + 4.
+ *
+ * The additive constant is for EOF and the header of the file.
+ *
+ * The multiplicative constant comes from 19-byte incompressible string
+ * followed by a 3-sequence that can be compressed into 2-byte link. This
+ * breaks the copy-mode and it needs to be restarted with a new header. The
+ * total length is 2(header) + 19(string) + 2(link) = 23.
+ */
+
+/* lizard.c */
+int lizard_compress(const byte *in, uns in_len, byte *out);
+int lizard_decompress(const byte *in, byte *out);
+
+/* lizard-safe.c */
+struct lizard_buffer;
+
+struct lizard_buffer *lizard_alloc(void);
+void lizard_free(struct lizard_buffer *buf);
+byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
+
+/* adler32.c */
+uns update_adler32(uns adler, const byte *ptr, uns len);
+
+static inline uns
+adler32(const byte *buf, uns len)
+{
+ return update_adler32(1, buf, len);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Keeping of Log Files
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/threads.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+
+static char *log_name_patt;
+static int log_params;
+static int log_filename_size;
+volatile int log_switch_nest;
+
+static int
+do_log_switch(struct tm *tm)
+{
+ int fd, l;
+ char name[log_filename_size];
+ int switched = 0;
+
+ if (!log_name_patt ||
+ log_filename[0] && !log_params)
+ return 0;
+ ucwlib_lock();
+ log_switch_nest++;
+ l = strftime(name, log_filename_size, log_name_patt, tm);
+ if (l < 0 || l >= log_filename_size)
+ die("Error formatting log file name: %m");
+ if (strcmp(name, log_filename))
+ {
+ strcpy(log_filename, name);
+ fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
+ if (fd < 0)
+ die("Unable to open log file %s: %m", name);
+ dup2(fd, 2);
+ close(fd);
+ switched = 1;
+ }
+ log_switch_nest--;
+ ucwlib_unlock();
+ return switched;
+}
+
+int
+log_switch(void)
+{
+ time_t tim = time(NULL);
+ return do_log_switch(localtime(&tim));
+}
+
+static void
+internal_log_switch(struct tm *tm)
+{
+ if (!log_switch_nest)
+ do_log_switch(tm);
+}
+
+void
+log_file(const char *name)
+{
+ if (name)
+ {
+ if (log_name_patt)
+ xfree(log_name_patt);
+ if (log_filename)
+ {
+ xfree(log_filename);
+ log_filename = NULL;
+ }
+ log_name_patt = xstrdup(name);
+ log_params = !!strchr(name, '%');
+ log_filename_size = strlen(name) + 64; /* 63 is an upper bound on expansion of % escapes */
+ log_filename = xmalloc(log_filename_size);
+ log_filename[0] = 0;
+ log_switch();
+ log_switch_hook = internal_log_switch;
+ }
+}
+
+void
+log_fork(void)
+{
+ log_pid = getpid();
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ log_init(argv[0]);
+ log_file("/proc/self/fd/1");
+ for (int i=1; i<argc; i++)
+ log(L_INFO, argv[i]);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Logging
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
+#include <alloca.h>
+
+static char log_progname[32];
+char *log_filename;
+char *log_title;
+int log_pid;
+int log_precise_timings;
+void (*log_die_hook)(void);
+void (*log_switch_hook)(struct tm *tm);
+
+void
+vmsg(unsigned int cat, const char *fmt, va_list args)
+{
+ struct timeval tv;
+ struct tm tm;
+ byte *buf, *p;
+ int buflen = 256;
+ int l, l0, r;
+ va_list args2;
+
+ gettimeofday(&tv, NULL);
+ if (!localtime_r(&tv.tv_sec, &tm))
+ bzero(&tm, sizeof(tm));
+
+ if (log_switch_hook)
+ log_switch_hook(&tm);
+ while (1)
+ {
+ p = buf = alloca(buflen);
+ *p++ = cat;
+ /* We cannot use strftime() here, because it's not re-entrant */
+ p += sprintf(p, " %4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+ if (log_precise_timings)
+ p += sprintf(p, ".%06d", (int)tv.tv_usec);
+ *p++ = ' ';
+ if (log_title)
+ {
+ if (log_pid)
+ p += sprintf(p, "[%s (%d)] ", log_title, log_pid);
+ else
+ p += sprintf(p, "[%s] ", log_title);
+ }
+ else
+ {
+ if (log_pid)
+ p += sprintf(p, "[%d] ", log_pid);
+ }
+ l0 = p - buf + 1;
+ r = buflen - l0;
+ va_copy(args2, args);
+ l = vsnprintf(p, r, fmt, args2);
+ va_end(args2);
+ if (l < 0)
+ l = r;
+ else if (l < r)
+ {
+ while (*p)
+ {
+ if (*p < 0x20 && *p != '\t')
+ *p = 0x7f;
+ p++;
+ }
+ *p = '\n';
+ write(2, buf, l + l0);
+ return;
+ }
+ buflen = l + l0 + 1;
+ }
+}
+
+void
+msg(unsigned int cat, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vmsg(cat, fmt, args);
+ va_end(args);
+}
+
+void
+die(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vmsg(L_FATAL, fmt, args);
+ va_end(args);
+ if (log_die_hook)
+ log_die_hook();
+#ifdef DEBUG_DIE_BY_ABORT
+ abort();
+#else
+ exit(1);
+#endif
+}
+
+void
+assert_failed(const char *assertion, const char *file, int line)
+{
+ msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
+ abort();
+}
+
+void
+assert_failed_noinfo(void)
+{
+ die("Internal error: Assertion failed.");
+}
+
+static const char *
+log_basename(const char *n)
+{
+ const char *p = n;
+
+ while (*n)
+ if (*n++ == '/')
+ p = n;
+ return p;
+}
+
+void
+log_init(const char *argv0)
+{
+ if (argv0)
+ {
+ strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1);
+ log_progname[sizeof(log_progname)-1] = 0;
+ log_title = log_progname;
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Main Loop
+ *
+ * (c) 2004--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/mainloop.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+
+timestamp_t main_now;
+sh_time_t main_now_seconds;
+uns main_shutdown;
+
+clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+static uns main_file_cnt;
+static uns main_poll_table_obsolete, main_poll_table_size;
+static struct pollfd *main_poll_table;
+static uns main_sigchld_set_up;
+
+void
+main_get_time(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ main_now_seconds = tv.tv_sec;
+ main_now = (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+ // DBG("It's %lld o'clock", (long long) main_now);
+}
+
+void
+main_init(void)
+{
+ DBG("MAIN: Initializing");
+ clist_init(&main_timer_list);
+ clist_init(&main_file_list);
+ clist_init(&main_hook_list);
+ clist_init(&main_process_list);
+ main_file_cnt = 0;
+ main_poll_table_obsolete = 1;
+ main_get_time();
+}
+
+void
+timer_add(struct main_timer *tm, timestamp_t expires)
+{
+ if (expires)
+ DBG("MAIN: Setting timer %p (expire at now+%lld)", tm, (long long)(expires-main_now));
+ else
+ DBG("MAIN: Clearing timer %p", tm);
+ if (tm->expires)
+ clist_remove(&tm->n);
+ tm->expires = expires;
+ if (expires)
+ {
+ cnode *t = main_timer_list.head.next;
+ while (t != &main_timer_list.head && ((struct main_timer *) t)->expires < expires)
+ t = t->next;
+ clist_insert_before(&tm->n, t);
+ }
+}
+
+void
+timer_del(struct main_timer *tm)
+{
+ timer_add(tm, 0);
+}
+
+static void
+file_timer_expired(struct main_timer *tm)
+{
+ struct main_file *fi = tm->data;
+ timer_del(&fi->timer);
+ if (fi->error_handler)
+ fi->error_handler(fi, MFERR_TIMEOUT);
+}
+
+void
+file_add(struct main_file *fi)
+{
+ DBG("MAIN: Adding file %p (fd=%d)", fi, fi->fd);
+ ASSERT(!fi->n.next);
+ clist_add_tail(&main_file_list, &fi->n);
+ fi->timer.handler = file_timer_expired;
+ fi->timer.data = fi;
+ main_file_cnt++;
+ main_poll_table_obsolete = 1;
+ if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0)
+ msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
+}
+
+void
+file_chg(struct main_file *fi)
+{
+ struct pollfd *p = fi->pollfd;
+ if (p)
+ {
+ p->events = 0;
+ if (fi->read_handler)
+ p->events |= POLLIN | POLLHUP | POLLERR;
+ if (fi->write_handler)
+ p->events |= POLLOUT | POLLERR;
+ }
+}
+
+void
+file_del(struct main_file *fi)
+{
+ DBG("MAIN: Deleting file %p (fd=%d)", fi, fi->fd);
+ ASSERT(fi->n.next);
+ timer_del(&fi->timer);
+ clist_remove(&fi->n);
+ main_file_cnt--;
+ main_poll_table_obsolete = 1;
+ fi->n.next = fi->n.prev = NULL;
+}
+
+static int
+file_read_handler(struct main_file *fi)
+{
+ while (fi->rpos < fi->rlen)
+ {
+ int l = read(fi->fd, fi->rbuf + fi->rpos, fi->rlen - fi->rpos);
+ DBG("MAIN: FD %d: read %d", fi->fd, l);
+ if (l < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+ fi->error_handler(fi, MFERR_READ);
+ return 0;
+ }
+ else if (!l)
+ break;
+ fi->rpos += l;
+ }
+ DBG("MAIN: FD %d done read %d of %d", fi->fd, fi->rpos, fi->rlen);
+ fi->read_handler = NULL;
+ file_chg(fi);
+ fi->read_done(fi);
+ return 1;
+}
+
+static int
+file_write_handler(struct main_file *fi)
+{
+ while (fi->wpos < fi->wlen)
+ {
+ int l = write(fi->fd, fi->wbuf + fi->wpos, fi->wlen - fi->wpos);
+ DBG("MAIN: FD %d: write %d", fi->fd, l);
+ if (l < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+ fi->error_handler(fi, MFERR_WRITE);
+ return 0;
+ }
+ fi->wpos += l;
+ }
+ DBG("MAIN: FD %d done write %d", fi->fd, fi->wpos);
+ fi->write_handler = NULL;
+ file_chg(fi);
+ fi->write_done(fi);
+ return 1;
+}
+
+void
+file_read(struct main_file *fi, void *buf, uns len)
+{
+ ASSERT(fi->n.next);
+ if (len)
+ {
+ fi->read_handler = file_read_handler;
+ fi->rbuf = buf;
+ fi->rpos = 0;
+ fi->rlen = len;
+ }
+ else
+ {
+ fi->read_handler = NULL;
+ fi->rbuf = NULL;
+ fi->rpos = fi->rlen = 0;
+ }
+ file_chg(fi);
+}
+
+void
+file_write(struct main_file *fi, void *buf, uns len)
+{
+ ASSERT(fi->n.next);
+ if (len)
+ {
+ fi->write_handler = file_write_handler;
+ fi->wbuf = buf;
+ fi->wpos = 0;
+ fi->wlen = len;
+ }
+ else
+ {
+ fi->write_handler = NULL;
+ fi->wbuf = NULL;
+ fi->wpos = fi->wlen = 0;
+ }
+ file_chg(fi);
+}
+
+void
+file_set_timeout(struct main_file *fi, timestamp_t expires)
+{
+ ASSERT(fi->n.next);
+ timer_add(&fi->timer, expires);
+}
+
+void
+file_close_all(void)
+{
+ CLIST_FOR_EACH(struct main_file *, f, main_file_list)
+ close(f->fd);
+}
+
+void
+hook_add(struct main_hook *ho)
+{
+ DBG("MAIN: Adding hook %p", ho);
+ ASSERT(!ho->n.next);
+ clist_add_tail(&main_hook_list, &ho->n);
+}
+
+void
+hook_del(struct main_hook *ho)
+{
+ DBG("MAIN: Deleting hook %p", ho);
+ ASSERT(ho->n.next);
+ clist_remove(&ho->n);
+ ho->n.next = ho->n.prev = NULL;
+}
+
+static void
+main_sigchld_handler(int x UNUSED)
+{
+ DBG("SIGCHLD received");
+}
+
+void
+process_add(struct main_process *mp)
+{
+ DBG("MAIN: Adding process %p (pid=%d)", mp, mp->pid);
+ ASSERT(!mp->n.next);
+ ASSERT(mp->handler);
+ clist_add_tail(&main_process_list, &mp->n);
+ if (!main_sigchld_set_up)
+ {
+ struct sigaction sa;
+ bzero(&sa, sizeof(sa));
+ sa.sa_handler = main_sigchld_handler;
+ sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
+ sigaction(SIGCHLD, &sa, NULL);
+ main_sigchld_set_up = 1;
+ }
+}
+
+void
+process_del(struct main_process *mp)
+{
+ DBG("MAIN: Deleting process %p (pid=%d)", mp, mp->pid);
+ ASSERT(mp->n.next);
+ clist_remove(&mp->n);
+ mp->n.next = NULL;
+}
+
+int
+process_fork(struct main_process *mp)
+{
+ pid_t pid = fork();
+ if (pid < 0)
+ {
+ DBG("MAIN: Fork failed");
+ mp->status = -1;
+ format_exit_status(mp->status_msg, -1);
+ mp->handler(mp);
+ return 1;
+ }
+ else if (!pid)
+ return 0;
+ else
+ {
+ DBG("MAIN: Forked process %d", (int) pid);
+ mp->pid = pid;
+ process_add(mp);
+ return 1;
+ }
+}
+
+void
+main_debug(void)
+{
+#ifdef CONFIG_DEBUG
+ msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
+ msg(L_DEBUG, "\tActive timers:");
+ struct main_timer *tm;
+ CLIST_WALK(tm, main_timer_list)
+ msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
+ struct main_file *fi;
+ msg(L_DEBUG, "\tActive files:");
+ CLIST_WALK(fi, main_file_list)
+ msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
+ fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler,
+ (long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data);
+ msg(L_DEBUG, "\tActive hooks:");
+ struct main_hook *ho;
+ CLIST_WALK(ho, main_hook_list)
+ msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
+ msg(L_DEBUG, "\tActive processes:");
+ struct main_process *pr;
+ CLIST_WALK(pr, main_process_list)
+ msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
+#endif
+}
+
+static void
+main_rebuild_poll_table(void)
+{
+ struct main_file *fi;
+ if (main_poll_table_size < main_file_cnt)
+ {
+ if (main_poll_table)
+ xfree(main_poll_table);
+ else
+ main_poll_table_size = 1;
+ while (main_poll_table_size < main_file_cnt)
+ main_poll_table_size *= 2;
+ main_poll_table = xmalloc(sizeof(struct pollfd) * main_poll_table_size);
+ }
+ struct pollfd *p = main_poll_table;
+ DBG("MAIN: Rebuilding poll table: %d of %d entries set", main_file_cnt, main_poll_table_size);
+ CLIST_WALK(fi, main_file_list)
+ {
+ p->fd = fi->fd;
+ fi->pollfd = p++;
+ file_chg(fi);
+ }
+ main_poll_table_obsolete = 0;
+}
+
+void
+main_loop(void)
+{
+ DBG("MAIN: Entering main_loop");
+ ASSERT(main_timer_list.head.next);
+
+ struct main_file *fi;
+ struct main_hook *ho;
+ struct main_timer *tm;
+ struct main_process *pr;
+ cnode *tmp;
+
+ for (;;)
+ {
+ main_get_time();
+ timestamp_t wake = main_now + 1000000000;
+ while ((tm = clist_head(&main_timer_list)) && tm->expires <= main_now)
+ {
+ DBG("MAIN: Timer %p expired at now-%lld", tm, (long long)(main_now - tm->expires));
+ tm->handler(tm);
+ }
+ int hook_min = HOOK_RETRY;
+ int hook_max = HOOK_SHUTDOWN;
+ CLIST_WALK_DELSAFE(ho, main_hook_list, tmp)
+ {
+ DBG("MAIN: Hook %p", ho);
+ int ret = ho->handler(ho);
+ hook_min = MIN(hook_min, ret);
+ hook_max = MAX(hook_max, ret);
+ }
+ if (hook_min == HOOK_SHUTDOWN ||
+ hook_min == HOOK_DONE && hook_max == HOOK_DONE ||
+ main_shutdown)
+ {
+ DBG("MAIN: Shut down by %s", main_shutdown ? "main_shutdown" : "a hook");
+ return;
+ }
+ if (hook_max == HOOK_RETRY)
+ wake = 0;
+ if (main_poll_table_obsolete)
+ main_rebuild_poll_table();
+ if (!clist_empty(&main_process_list))
+ {
+ int stat;
+ pid_t pid;
+ wake = MIN(wake, main_now + 10000);
+ while ((pid = waitpid(-1, &stat, WNOHANG)) > 0)
+ {
+ DBG("MAIN: Child %d exited with status %x", pid, stat);
+ CLIST_WALK(pr, main_process_list)
+ if (pr->pid == pid)
+ {
+ pr->status = stat;
+ process_del(pr);
+ format_exit_status(pr->status_msg, pr->status);
+ DBG("MAIN: Calling process exit handler");
+ pr->handler(pr);
+ break;
+ }
+ wake = 0;
+ }
+ }
+ /* FIXME: Here is a small race window where SIGCHLD can come unnoticed. */
+ if ((tm = clist_head(&main_timer_list)) && tm->expires < wake)
+ wake = tm->expires;
+ int timeout = (wake ? wake - main_now : 0);
+ DBG("MAIN: Poll for %d fds and timeout %d ms", main_file_cnt, timeout);
+ if (poll(main_poll_table, main_file_cnt, timeout))
+ {
+ struct pollfd *p = main_poll_table;
+ main_get_time();
+ CLIST_WALK(fi, main_file_list)
+ {
+ if (p->revents & (POLLIN | POLLHUP | POLLERR))
+ {
+ do
+ DBG("MAIN: Read event on fd %d", p->fd);
+ while (fi->read_handler && fi->read_handler(fi) && !main_poll_table_obsolete);
+ if (main_poll_table_obsolete) /* File entries have been inserted or deleted => better not risk continuing to nowhere */
+ break;
+ }
+ if (p->revents & (POLLOUT | POLLERR))
+ {
+ do
+ DBG("MAIN: Write event on fd %d", p->fd);
+ while (fi->write_handler && fi->write_handler(fi) && !main_poll_table_obsolete);
+ if (main_poll_table_obsolete)
+ break;
+ }
+ p++;
+ }
+ }
+ }
+}
+
+#ifdef TEST
+
+static struct main_process mp;
+static struct main_file fin, fout;
+static struct main_hook hook;
+static struct main_timer tm;
+
+static byte rb[16];
+
+static void dread(struct main_file *fi)
+{
+ if (fi->rpos < fi->rlen)
+ {
+ log(L_INFO, "Read EOF");
+ file_del(fi);
+ }
+ else
+ {
+ log(L_INFO, "Read done");
+ file_read(fi, rb, sizeof(rb));
+ }
+}
+
+static void derror(struct main_file *fi, int cause)
+{
+ log(L_INFO, "Error: %m !!! (cause %d)", cause);
+ file_del(fi);
+}
+
+static void dwrite(struct main_file *fi UNUSED)
+{
+ log(L_INFO, "Write done");
+}
+
+static int dhook(struct main_hook *ho UNUSED)
+{
+ log(L_INFO, "Hook called");
+ return 0;
+}
+
+static void dtimer(struct main_timer *tm)
+{
+ log(L_INFO, "Timer tick");
+ timer_add(tm, main_now + 10000);
+}
+
+static void dentry(void)
+{
+ log(L_INFO, "*** SUBPROCESS START ***");
+ sleep(2);
+ log(L_INFO, "*** SUBPROCESS FINISH ***");
+ exit(0);
+}
+
+static void dexit(struct main_process *pr)
+{
+ log(L_INFO, "Subprocess %d exited with status %x", pr->pid, pr->status);
+}
+
+int
+main(void)
+{
+ log_init(NULL);
+ main_init();
+
+ fin.fd = 0;
+ fin.read_done = dread;
+ fin.error_handler = derror;
+ file_add(&fin);
+ file_read(&fin, rb, sizeof(rb));
+
+ fout.fd = 1;
+ fout.write_done = dwrite;
+ fout.error_handler = derror;
+ file_add(&fout);
+ file_write(&fout, "Hello, world!\n", 14);
+
+ hook.handler = dhook;
+ hook_add(&hook);
+
+ tm.handler = dtimer;
+ timer_add(&tm, main_now + 1000);
+
+ mp.handler = dexit;
+ if (!process_fork(&mp))
+ dentry();
+
+ main_debug();
+
+ main_loop();
+ log(L_INFO, "Finished.");
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Main Loop
+ *
+ * (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_MAINLOOP_H
+#define _UCW_MAINLOOP_H
+
+#include "lib/clists.h"
+
+extern timestamp_t main_now; /* Current time in milliseconds since UNIX epoch */
+extern sh_time_t main_now_seconds; /* Current time in seconds since the epoch */
+extern uns main_shutdown;
+extern clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+
+/* User-defined fields are marked with [*], all other fields must be initialized to zero. */
+
+/* Timers */
+
+struct main_timer {
+ cnode n;
+ timestamp_t expires;
+ void (*handler)(struct main_timer *tm); /* [*] Function to be called when the timer expires. Must re-add/del the timer.*/
+ void *data; /* [*] Data for use by the handler */
+};
+
+void timer_add(struct main_timer *tm, timestamp_t expires); /* Can modify a running timer, too */
+void timer_del(struct main_timer *tm);
+
+void main_get_time(void); /* Refresh main_now */
+
+/* Files to poll */
+
+struct main_file {
+ cnode n;
+ int fd; /* [*] File descriptor */
+ int (*read_handler)(struct main_file *fi); /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */
+ int (*write_handler)(struct main_file *fi);
+ void (*error_handler)(struct main_file *fi, int cause); /* [*] Handler to call on errors */
+ void *data; /* [*] Data for use by the handlers */
+ byte *rbuf; /* Read/write pointers for use by file_read/write */
+ uns rpos, rlen;
+ byte *wbuf;
+ uns wpos, wlen;
+ void (*read_done)(struct main_file *fi); /* [*] Called when file_read is finished; rpos < rlen if EOF */
+ void (*write_done)(struct main_file *fi); /* [*] Called when file_write is finished */
+ struct main_timer timer;
+ struct pollfd *pollfd;
+};
+
+enum main_file_err_cause {
+ MFERR_READ,
+ MFERR_WRITE,
+ MFERR_TIMEOUT
+};
+
+void file_add(struct main_file *fi);
+void file_chg(struct main_file *fi);
+void file_del(struct main_file *fi);
+void file_read(struct main_file *fi, void *buf, uns len);
+void file_write(struct main_file *fi, void *buf, uns len);
+void file_set_timeout(struct main_file *fi, timestamp_t expires);
+void file_close_all(void); /* Close all known main_file's; frequently used before fork() */
+
+/* Hooks to be called in each iteration of the main loop */
+
+struct main_hook {
+ cnode n;
+ int (*handler)(struct main_hook *ho); /* [*] Hook function; returns HOOK_xxx */
+ void *data; /* [*] For use by the handler */
+};
+
+enum main_hook_return {
+ HOOK_IDLE, /* Call again when the main loop becomes idle again */
+ HOOK_RETRY, /* Call again as soon as possible */
+ HOOK_DONE = -1, /* Shut down the main loop if all hooks return this value */
+ HOOK_SHUTDOWN = -2 /* Shut down the main loop immediately */
+};
+
+void hook_add(struct main_hook *ho);
+void hook_del(struct main_hook *ho);
+
+/* Processes to watch */
+
+struct main_process {
+ cnode n;
+ int pid; /* Process id (0=not running) */
+ int status; /* Exit status (-1=fork failed) */
+ char status_msg[EXIT_STATUS_MSG_SIZE];
+ void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */
+ void *data; /* [*] For use by the handler */
+};
+
+void process_add(struct main_process *mp);
+void process_del(struct main_process *mp);
+int process_fork(struct main_process *mp);
+
+/* The main loop */
+
+void main_init(void);
+void main_loop(void);
+void main_debug(void);
+
+#endif
--- /dev/null
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest. This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+#include "lib/lib.h"
+#include "lib/md5.h"
+
+#include <string.h> /* for memcpy() */
+
+#ifdef CPU_LITTLE_ENDIAN
+#define byteReverse(buf, len) /* Nothing */
+#else
+void byteReverse(unsigned char *buf, unsigned longs);
+
+/*
+ * Note: this code is harmless on little-endian machines.
+ */
+void byteReverse(unsigned char *buf, unsigned longs)
+{
+ uint32 t;
+ do {
+ t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
+ ((unsigned) buf[1] << 8 | buf[0]);
+ *(uint32 *) buf = t;
+ buf += 4;
+ } while (--longs);
+}
+#endif
+
+/*
+ * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
+ * initialization constants.
+ */
+void MD5Init(struct MD5Context *ctx)
+{
+ ctx->buf[0] = 0x67452301;
+ ctx->buf[1] = 0xefcdab89;
+ ctx->buf[2] = 0x98badcfe;
+ ctx->buf[3] = 0x10325476;
+
+ ctx->bits[0] = 0;
+ ctx->bits[1] = 0;
+}
+
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
+ */
+void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
+{
+ uint32 t;
+
+ /* Update bitcount */
+
+ t = ctx->bits[0];
+ if ((ctx->bits[0] = t + ((uint32) len << 3)) < t)
+ ctx->bits[1]++; /* Carry from low to high */
+ ctx->bits[1] += len >> 29;
+
+ t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
+
+ /* Handle any leading odd-sized chunks */
+
+ if (t) {
+ unsigned char *p = (unsigned char *) ctx->in + t;
+
+ t = 64 - t;
+ if (len < t) {
+ memcpy(p, buf, len);
+ return;
+ }
+ memcpy(p, buf, t);
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += t;
+ len -= t;
+ }
+ /* Process data in 64-byte chunks */
+
+ while (len >= 64) {
+ memcpy(ctx->in, buf, 64);
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+
+ memcpy(ctx->in, buf, len);
+}
+
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
+{
+ unsigned count;
+ unsigned char *p;
+
+ /* Compute number of bytes mod 64 */
+ count = (ctx->bits[0] >> 3) & 0x3F;
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = ctx->in + count;
+ *p++ = 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ memset(p, 0, count);
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+
+ /* Now fill the next block with 56 bytes */
+ memset(ctx->in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ memset(p, 0, count - 8);
+ }
+ byteReverse(ctx->in, 14);
+
+ /* Append length in bits and transform */
+ ((uint32 *) ctx->in)[14] = ctx->bits[0];
+ ((uint32 *) ctx->in)[15] = ctx->bits[1];
+
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ byteReverse((unsigned char *) ctx->buf, 4);
+ memcpy(digest, ctx->buf, 16);
+ memset((char *) ctx, 0, sizeof(ctx)); /* In case it's sensitive */
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+ ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data. MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void MD5Transform(uint32 buf[4], uint32 const in[16])
+{
+ uint32 a, b, c, d;
+
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+}
--- /dev/null
+/*
+ * UCW Library -- MD5 Message Digest
+ *
+ * This file is in public domain (see lib/md5.c).
+ */
+
+#ifndef _UCW_MD5_H
+#define _UCW_MD5_H
+
+typedef u32 uint32;
+
+struct MD5Context {
+ uint32 buf[4];
+ uint32 bits[2];
+ unsigned char in[64];
+};
+
+void MD5Init(struct MD5Context *context);
+void MD5Update(struct MD5Context *context, unsigned char const *buf,
+ unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(uint32 buf[4], uint32 const in[16]);
+
+#endif /* !_UCW_MD5_H */
--- /dev/null
+/*
+ * UCW Library -- MD5 Binary <-> Hex Conversions
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#include <stdio.h>
+
+void
+md5_to_hex(const byte *s, char *d)
+{
+ int i;
+ for(i=0; i<MD5_SIZE; i++)
+ d += sprintf(d, "%02X", *s++);
+}
+
+void
+hex_to_md5(const char *s, byte *d)
+{
+ uns i, j;
+ for(i=0; i<MD5_SIZE; i++)
+ {
+ if (!Cxdigit(s[0]) || !Cxdigit(s[1]))
+ die("hex_to_md5: syntax error");
+ j = Cxvalue(*s); s++;
+ j = (j << 4) | Cxvalue(*s); s++;
+ *d++ = j;
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (Formatting)
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <alloca.h>
+#include <stdio.h>
+#include <string.h>
+
+static char *
+mp_vprintf_at(struct mempool *mp, uns ofs, const char *fmt, va_list args)
+{
+ char *ret = mp_grow(mp, ofs + 1) + ofs;
+ va_list args2;
+ va_copy(args2, args);
+ int cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+ va_end(args2);
+ if (cnt < 0)
+ {
+ /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+ do
+ {
+ ret = mp_expand(mp) + ofs;
+ va_copy(args2, args);
+ cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+ va_end(args2);
+ }
+ while (cnt < 0);
+ }
+ else if ((uns)cnt >= mp_avail(mp) - ofs)
+ {
+ ret = mp_grow(mp, cnt + 1) + ofs;
+ va_copy(args2, args);
+ int cnt2 = vsnprintf(ret, cnt + 1, fmt, args2);
+ va_end(args2);
+ ASSERT(cnt2 == cnt);
+ }
+ mp_end(mp, ret + cnt + 1);
+ return ret - ofs;
+}
+
+char *
+mp_vprintf(struct mempool *mp, const char *fmt, va_list args)
+{
+ mp_start(mp, 1);
+ return mp_vprintf_at(mp, 0, fmt, args);
+}
+
+char *
+mp_printf(struct mempool *p, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf(p, fmt, args);
+ va_end(args);
+ return res;
+}
+
+char *
+mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args)
+{
+ uns ofs = mp_open(mp, ptr);
+ ASSERT(ofs);
+ return mp_vprintf_at(mp, ofs - 1, fmt, args);
+}
+
+char *
+mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf_append(mp, ptr, fmt, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct mempool *mp = mp_new(64);
+ char *x = mp_printf(mp, "<Hello, %s!>", "World");
+ fputs(x, stdout);
+ x = mp_printf_append(mp, x, "<Appended>");
+ fputs(x, stdout);
+ x = mp_printf(mp, "<Hello, %50s!>\n", "World");
+ fputs(x, stdout);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (String Operations)
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <alloca.h>
+#include <string.h>
+
+char *
+mp_strdup(struct mempool *p, const char *s)
+{
+ uns l = strlen(s) + 1;
+ char *t = mp_alloc_fast_noalign(p, l);
+ memcpy(t, s, l);
+ return t;
+}
+
+void *
+mp_memdup(struct mempool *p, const void *s, uns len)
+{
+ void *t = mp_alloc_fast(p, len);
+ memcpy(t, s, len);
+ return t;
+}
+
+char *
+mp_multicat(struct mempool *p, ...)
+{
+ va_list args, a;
+ va_start(args, p);
+ char *x, *y;
+ uns cnt = 0;
+ va_copy(a, args);
+ while (x = va_arg(a, char *))
+ cnt++;
+ uns *sizes = alloca(cnt * sizeof(uns));
+ uns len = 1;
+ cnt = 0;
+ va_end(a);
+ va_copy(a, args);
+ while (x = va_arg(a, char *))
+ len += sizes[cnt++] = strlen(x);
+ char *buf = mp_alloc_fast_noalign(p, len);
+ y = buf;
+ va_end(a);
+ cnt = 0;
+ while (x = va_arg(args, char *))
+ {
+ memcpy(y, x, sizes[cnt]);
+ y += sizes[cnt++];
+ }
+ *y = 0;
+ va_end(args);
+ return buf;
+}
+
+char *
+mp_strjoin(struct mempool *p, char **a, uns n, uns sep)
+{
+ uns sizes[n];
+ uns len = 1;
+ for (uns i=0; i<n; i++)
+ len += sizes[i] = strlen(a[i]);
+ if (sep && n)
+ len += n-1;
+ char *dest = mp_alloc_fast_noalign(p, len);
+ char *d = dest;
+ for (uns i=0; i<n; i++)
+ {
+ if (sep && i)
+ *d++ = sep;
+ memcpy(d, a[i], sizes[i]);
+ d += sizes[i];
+ }
+ *d = 0;
+ return dest;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ struct mempool *p = mp_new(64);
+ char *s = mp_strdup(p, "12345");
+ char *c = mp_multicat(p, "<<", s, ">>", NULL);
+ puts(c);
+ char *a[] = { "bugs", "gnats", "insects" };
+ puts(mp_strjoin(p, a, 3, '.'));
+ puts(mp_strjoin(p, a, 3, 0));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (One-Time Allocation)
+ *
+ * (c) 1997--2001 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <string.h>
+
+#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN)
+#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE)
+
+struct mempool_chunk {
+ struct mempool_chunk *next;
+ uns size;
+};
+
+static uns
+mp_align_size(uns size)
+{
+#ifdef POOL_IS_MMAP
+ return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL;
+#else
+ return ALIGN_TO(size, CPU_STRUCT_ALIGN);
+#endif
+}
+
+void
+mp_init(struct mempool *pool, uns chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ *pool = (struct mempool) {
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big };
+}
+
+static void *
+mp_new_big_chunk(uns size)
+{
+ struct mempool_chunk *chunk;
+ chunk = xmalloc(size + MP_CHUNK_TAIL) + size;
+ chunk->size = size;
+ return chunk;
+}
+
+static void
+mp_free_big_chunk(struct mempool_chunk *chunk)
+{
+ xfree((void *)chunk - chunk->size);
+}
+
+static void *
+mp_new_chunk(uns size)
+{
+#ifdef POOL_IS_MMAP
+ struct mempool_chunk *chunk;
+ chunk = page_alloc(size + MP_CHUNK_TAIL) + size;
+ chunk->size = size;
+ return chunk;
+#else
+ return mp_new_big_chunk(size);
+#endif
+}
+
+static void
+mp_free_chunk(struct mempool_chunk *chunk)
+{
+#ifdef POOL_IS_MMAP
+ page_free((void *)chunk - chunk->size, chunk->size + MP_CHUNK_TAIL);
+#else
+ mp_free_big_chunk(chunk);
+#endif
+}
+
+struct mempool *
+mp_new(uns chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ struct mempool_chunk *chunk = mp_new_chunk(chunk_size);
+ struct mempool *pool = (void *)chunk - chunk_size;
+ DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size);
+ chunk->next = NULL;
+ *pool = (struct mempool) {
+ .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } },
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big };
+ return pool;
+}
+
+static void
+mp_free_chain(struct mempool_chunk *chunk)
+{
+ while (chunk)
+ {
+ struct mempool_chunk *next = chunk->next;
+ mp_free_chunk(chunk);
+ chunk = next;
+ }
+}
+
+static void
+mp_free_big_chain(struct mempool_chunk *chunk)
+{
+ while (chunk)
+ {
+ struct mempool_chunk *next = chunk->next;
+ mp_free_big_chunk(chunk);
+ chunk = next;
+ }
+}
+
+void
+mp_delete(struct mempool *pool)
+{
+ DBG("Deleting mempool %p", pool);
+ mp_free_big_chain(pool->state.last[1]);
+ mp_free_chain(pool->unused);
+ mp_free_chain(pool->state.last[0]); // can contain the mempool structure
+}
+
+void
+mp_flush(struct mempool *pool)
+{
+ mp_free_big_chain(pool->state.last[1]);
+ struct mempool_chunk *chunk, *next;
+ for (chunk = pool->state.last[0]; chunk && (void *)chunk - chunk->size != pool; chunk = next)
+ {
+ next = chunk->next;
+ chunk->next = pool->unused;
+ pool->unused = chunk;
+ }
+ pool->state.last[0] = chunk;
+ pool->state.free[0] = chunk ? chunk->size - sizeof(*pool) : 0;
+ pool->state.last[1] = NULL;
+ pool->state.free[1] = 0;
+ pool->state.next = NULL;
+ pool->last_big = &pool->last_big;
+}
+
+static void
+mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, uns idx)
+{
+ while (chunk)
+ {
+ stats->chain_size[idx] += chunk->size + sizeof(*chunk);
+ stats->chain_count[idx]++;
+ chunk = chunk->next;
+ }
+ stats->total_size += stats->chain_size[idx];
+}
+
+void
+mp_stats(struct mempool *pool, struct mempool_stats *stats)
+{
+ bzero(stats, sizeof(*stats));
+ mp_stats_chain(pool->state.last[0], stats, 0);
+ mp_stats_chain(pool->state.last[1], stats, 1);
+ mp_stats_chain(pool->unused, stats, 2);
+}
+
+void *
+mp_alloc_internal(struct mempool *pool, uns size)
+{
+ struct mempool_chunk *chunk;
+ if (size <= pool->threshold)
+ {
+ pool->idx = 0;
+ if (pool->unused)
+ {
+ chunk = pool->unused;
+ pool->unused = chunk->next;
+ }
+ else
+ chunk = mp_new_chunk(pool->chunk_size);
+ chunk->next = pool->state.last[0];
+ pool->state.last[0] = chunk;
+ pool->state.free[0] = pool->chunk_size - size;
+ return (void *)chunk - pool->chunk_size;
+ }
+ else if (likely(size <= MP_SIZE_MAX))
+ {
+ pool->idx = 1;
+ uns aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN);
+ chunk = mp_new_big_chunk(aligned);
+ chunk->next = pool->state.last[1];
+ pool->state.last[1] = chunk;
+ pool->state.free[1] = aligned - size;
+ return pool->last_big = (void *)chunk - aligned;
+ }
+ else
+ die("Cannot allocate %u bytes from a mempool", size);
+}
+
+void *
+mp_alloc(struct mempool *pool, uns size)
+{
+ return mp_alloc_fast(pool, size);
+}
+
+void *
+mp_alloc_noalign(struct mempool *pool, uns size)
+{
+ return mp_alloc_fast_noalign(pool, size);
+}
+
+void *
+mp_alloc_zero(struct mempool *pool, uns size)
+{
+ void *ptr = mp_alloc_fast(pool, size);
+ bzero(ptr, size);
+ return ptr;
+}
+
+void *
+mp_start_internal(struct mempool *pool, uns size)
+{
+ void *ptr = mp_alloc_internal(pool, size);
+ pool->state.free[pool->idx] += size;
+ return ptr;
+}
+
+void *
+mp_start(struct mempool *pool, uns size)
+{
+ return mp_start_fast(pool, size);
+}
+
+void *
+mp_start_noalign(struct mempool *pool, uns size)
+{
+ return mp_start_fast_noalign(pool, size);
+}
+
+void *
+mp_grow_internal(struct mempool *pool, uns size)
+{
+ if (unlikely(size > MP_SIZE_MAX))
+ die("Cannot allocate %u bytes of memory", size);
+ uns avail = mp_avail(pool);
+ void *ptr = mp_ptr(pool);
+ if (pool->idx)
+ {
+ uns amortized = likely(avail <= MP_SIZE_MAX / 2) ? avail * 2 : MP_SIZE_MAX;
+ amortized = MAX(amortized, size);
+ amortized = ALIGN_TO(amortized, CPU_STRUCT_ALIGN);
+ struct mempool_chunk *chunk = pool->state.last[1], *next = chunk->next;
+ ptr = xrealloc(ptr, amortized + MP_CHUNK_TAIL);
+ chunk = ptr + amortized;
+ chunk->next = next;
+ chunk->size = amortized;
+ pool->state.last[1] = chunk;
+ pool->state.free[1] = amortized;
+ pool->last_big = ptr;
+ return ptr;
+ }
+ else
+ {
+ void *p = mp_start_internal(pool, size);
+ memcpy(p, ptr, avail);
+ return p;
+ }
+}
+
+uns
+mp_open(struct mempool *pool, void *ptr)
+{
+ return mp_open_fast(pool, ptr);
+}
+
+void *
+mp_realloc(struct mempool *pool, void *ptr, uns size)
+{
+ return mp_realloc_fast(pool, ptr, size);
+}
+
+void *
+mp_realloc_zero(struct mempool *pool, void *ptr, uns size)
+{
+ uns old_size = mp_open_fast(pool, ptr);
+ ptr = mp_grow(pool, size);
+ if (size > old_size)
+ bzero(ptr + old_size, size - old_size);
+ mp_end(pool, ptr + size);
+ return ptr;
+}
+
+void *
+mp_spread_internal(struct mempool *pool, void *p, uns size)
+{
+ void *old = mp_ptr(pool);
+ void *new = mp_grow_internal(pool, p-old+size);
+ return p-old+new;
+}
+
+void
+mp_restore(struct mempool *pool, struct mempool_state *state)
+{
+ struct mempool_chunk *chunk, *next;
+ struct mempool_state s = *state;
+ for (chunk = pool->state.last[0]; chunk != s.last[0]; chunk = next)
+ {
+ next = chunk->next;
+ chunk->next = pool->unused;
+ pool->unused = chunk;
+ }
+ for (chunk = pool->state.last[1]; chunk != s.last[1]; chunk = next)
+ {
+ next = chunk->next;
+ mp_free_big_chunk(chunk);
+ }
+ pool->state = s;
+ pool->last_big = &pool->last_big;
+}
+
+struct mempool_state *
+mp_push(struct mempool *pool)
+{
+ struct mempool_state state = pool->state;
+ struct mempool_state *p = mp_alloc_fast(pool, sizeof(*p));
+ *p = state;
+ pool->state.next = p;
+ return p;
+}
+
+void
+mp_pop(struct mempool *pool)
+{
+ ASSERT(pool->state.next);
+ struct mempool_state state = pool->state;
+ mp_restore(pool, &state);
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+static void
+fill(byte *ptr, uns len, uns magic)
+{
+ while (len--)
+ *ptr++ = (magic++ & 255);
+}
+
+static void
+check(byte *ptr, uns len, uns magic, uns align)
+{
+ ASSERT(!((uintptr_t)ptr & (align - 1)));
+ while (len--)
+ if (*ptr++ != (magic++ & 255))
+ ASSERT(0);
+}
+
+int main(int argc, char **argv)
+{
+ srand(time(NULL));
+ log_init(argv[0]);
+ cf_def_file = NULL;
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || argc != optind)
+ die("Invalid usage");
+
+ uns max = 1000, n = 0, m = 0, can_realloc = 0;
+ void *ptr[max];
+ struct mempool_state *state[max];
+ uns len[max], num[max], align[max];
+ struct mempool *mp = mp_new(128), mp_static;
+
+ for (uns i = 0; i < 5000; i++)
+ {
+ for (uns j = 0; j < n; j++)
+ check(ptr[j], len[j], j, align[j]);
+#if 0
+ DBG("free_small=%u free_big=%u idx=%u chunk_size=%u last_big=%p", mp->state.free[0], mp->state.free[1], mp->idx, mp->chunk_size, mp->last_big);
+ for (struct mempool_chunk *ch = mp->state.last[0]; ch; ch = ch->next)
+ DBG("small %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+ for (struct mempool_chunk *ch = mp->state.last[1]; ch; ch = ch->next)
+ DBG("big %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+#endif
+ int r = random_max(100);
+ if ((r -= 1) < 0)
+ {
+ DBG("flush");
+ mp_flush(mp);
+ n = m = 0;
+ }
+ else if ((r -= 1) < 0)
+ {
+ DBG("delete & new");
+ mp_delete(mp);
+ if (random_max(2))
+ mp = mp_new(random_max(0x1000) + 1);
+ else
+ mp = &mp_static, mp_init(mp, random_max(512) + 1);
+ n = m = 0;
+ }
+ else if (n < max && (r -= 30) < 0)
+ {
+ len[n] = random_max(0x2000);
+ DBG("alloc(%u)", len[n]);
+ align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+ ptr[n] = (align[n] == 1) ? mp_alloc_fast_noalign(mp, len[n]) : mp_alloc_fast(mp, len[n]);
+ DBG(" -> (%p)", ptr[n]);
+ fill(ptr[n], len[n], n);
+ n++;
+ can_realloc = 1;
+ }
+ else if (n < max && (r -= 20) < 0)
+ {
+ len[n] = random_max(0x2000);
+ DBG("start(%u)", len[n]);
+ align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+ ptr[n] = (align[n] == 1) ? mp_start_fast_noalign(mp, len[n]) : mp_start_fast(mp, len[n]);
+ DBG(" -> (%p)", ptr[n]);
+ fill(ptr[n], len[n], n);
+ n++;
+ can_realloc = 1;
+ goto grow;
+ }
+ else if (can_realloc && n && (r -= 10) < 0)
+ {
+ if (mp_open(mp, ptr[n - 1]) != len[n - 1])
+ ASSERT(0);
+grow:
+ {
+ uns k = n - 1;
+ for (uns i = random_max(4); i--; )
+ {
+ uns l = len[k];
+ len[k] = random_max(0x2000);
+ DBG("grow(%u)", len[k]);
+ ptr[k] = mp_grow(mp, len[k]);
+ DBG(" -> (%p)", ptr[k]);
+ check(ptr[k], MIN(l, len[k]), k, align[k]);
+ fill(ptr[k], len[k], k);
+ }
+ mp_end(mp, ptr[k] + len[k]);
+ }
+ }
+ else if (can_realloc && n && (r -= 20) < 0)
+ {
+ uns i = n - 1, l = len[i];
+ DBG("realloc(%p, %u)", ptr[i], len[i]);
+ ptr[i] = mp_realloc(mp, ptr[i], len[i] = random_max(0x2000));
+ DBG(" -> (%p, %u)", ptr[i], len[i]);
+ check(ptr[i], MIN(len[i], l), i, align[i]);
+ fill(ptr[i], len[i], i);
+ }
+ else if (m < max && (r -= 5) < 0)
+ {
+ DBG("push(%u)", m);
+ num[m] = n;
+ state[m++] = mp_push(mp);
+ can_realloc = 0;
+ }
+ else if (m && (r -= 2) < 0)
+ {
+ m--;
+ DBG("pop(%u)", m);
+ mp_pop(mp);
+ n = num[m];
+ can_realloc = 0;
+ }
+ else if (m && (r -= 1) < 0)
+ {
+ uns i = random_max(m);
+ DBG("restore(%u)", i);
+ mp_restore(mp, state[i]);
+ n = num[m = i];
+ can_realloc = 0;
+ }
+ else if (can_realloc && n && (r -= 5) < 0)
+ ASSERT(mp_size(mp, ptr[n - 1]) == len[n - 1]);
+ }
+
+ mp_delete(mp);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools
+ *
+ * (c) 1997--2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_POOLS_H
+#define _UCW_POOLS_H
+
+/* Memory pool state (see mp_push(), ...) */
+struct mempool_state {
+ uns free[2];
+ void *last[2];
+ struct mempool_state *next;
+};
+
+/* Memory pool */
+struct mempool {
+ struct mempool_state state;
+ void *unused, *last_big;
+ uns chunk_size, threshold, idx;
+};
+
+/* Statistics (see mp_stats()) */
+struct mempool_stats {
+ uns total_size; /* Real allocated size in bytes */
+ uns chain_count[3]; /* Number of allocated chunks in small/big/unused chains */
+ uns chain_size[3]; /* Size of allocated chunks in small/big/unused chains */
+};
+
+/* Initialize a given mempool structure. Chunk size must be in the interval [1, UINT_MAX / 2] */
+void mp_init(struct mempool *pool, uns chunk_size);
+
+/* Allocate and initialize a new memory pool. See mp_init for chunk size limitations. */
+struct mempool *mp_new(uns chunk_size);
+
+/* Cleanup mempool initialized by mp_init or mp_new */
+void mp_delete(struct mempool *pool);
+
+/* Free all data on a memory pool (saves some empty chunks for later allocations) */
+void mp_flush(struct mempool *pool);
+
+/* Compute some statistics for debug purposes. See the definition of the mempool_stats structure. */
+void mp_stats(struct mempool *pool, struct mempool_stats *stats);
+
+
+/*** Allocation routines ***/
+
+/* For internal use only, do not call directly */
+void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+
+/* The function allocates new <size> bytes on a given memory pool.
+ * If the <size> is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * CPU_STRUCT_ALIGN bytes and this condition remains true also
+ * after future reallocations.
+ */
+void *mp_alloc(struct mempool *pool, uns size);
+
+/* The same as mp_alloc, but the result may not be aligned */
+void *mp_alloc_noalign(struct mempool *pool, uns size);
+
+/* The same as mp_alloc, but fills the newly allocated data with zeroes */
+void *mp_alloc_zero(struct mempool *pool, uns size);
+
+/* Inlined version of mp_alloc() */
+static inline void *
+mp_alloc_fast(struct mempool *pool, uns size)
+{
+ uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+ if (size <= avail)
+ {
+ pool->state.free[0] = avail - size;
+ return pool->state.last[0] - avail;
+ }
+ else
+ return mp_alloc_internal(pool, size);
+}
+
+/* Inlined version of mp_alloc_noalign() */
+static inline void *
+mp_alloc_fast_noalign(struct mempool *pool, uns size)
+{
+ if (size <= pool->state.free[0])
+ {
+ void *ptr = pool->state.last[0] - pool->state.free[0];
+ pool->state.free[0] -= size;
+ return ptr;
+ }
+ else
+ return mp_alloc_internal(pool, size);
+}
+
+
+/*** Usage as a growing buffer ***/
+
+/* For internal use only, do not call directly */
+void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+void *mp_grow_internal(struct mempool *pool, uns size);
+void *mp_spread_internal(struct mempool *pool, void *p, uns size);
+
+static inline uns
+mp_idx(struct mempool *pool, void *ptr)
+{
+ return ptr == pool->last_big;
+}
+
+/* Open a new growing buffer (at least <size> bytes long).
+ * If the <size> is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * CPU_STRUCT_ALIGN bytes and this condition remains true also
+ * after future reallocations. There is an unaligned version as well.
+ *
+ * Keep in mind that you can't make any other <pool> allocations
+ * before you "close" the growing buffer with mp_end().
+ */
+void *mp_start(struct mempool *pool, uns size);
+void *mp_start_noalign(struct mempool *pool, uns size);
+
+/* Inlined version of mp_start() */
+static inline void *
+mp_start_fast(struct mempool *pool, uns size)
+{
+ uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+ if (size <= avail)
+ {
+ pool->idx = 0;
+ pool->state.free[0] = avail;
+ return pool->state.last[0] - avail;
+ }
+ else
+ return mp_start_internal(pool, size);
+}
+
+/* Inlined version of mp_start_noalign() */
+static inline void *
+mp_start_fast_noalign(struct mempool *pool, uns size)
+{
+ if (size <= pool->state.free[0])
+ {
+ pool->idx = 0;
+ return pool->state.last[0] - pool->state.free[0];
+ }
+ else
+ return mp_start_internal(pool, size);
+}
+
+/* Return start pointer of the growing buffer allocated by mp_start() or a similar function */
+static inline void *
+mp_ptr(struct mempool *pool)
+{
+ return pool->state.last[pool->idx] - pool->state.free[pool->idx];
+}
+
+/* Return the number of bytes available for extending the growing buffer */
+static inline uns
+mp_avail(struct mempool *pool)
+{
+ return pool->state.free[pool->idx];
+}
+
+/* Grow the buffer allocated by mp_start() to be at least <size> bytes long
+ * (<size> may be less than mp_avail(), even zero). Reallocated buffer may
+ * change its starting position. The content will be unchanged to the minimum
+ * of the old and new sizes; newly allocated memory will be uninitialized.
+ * Multiple calls to mp_grow have amortized linear cost wrt. the maximum value of <size>. */
+static inline void *
+mp_grow(struct mempool *pool, uns size)
+{
+ return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size);
+}
+
+/* Grow the buffer by at least one byte -- equivalent to mp_grow(pool, mp_avail(pool) + 1) */
+static inline void *
+mp_expand(struct mempool *pool)
+{
+ return mp_grow_internal(pool, mp_avail(pool) + 1);
+}
+
+/* Ensure that there is at least <size> bytes free after <p>, if not, reallocate and adjust <p>. */
+static inline void *
+mp_spread(struct mempool *pool, void *p, uns size)
+{
+ return (((uns)(pool->state.last[pool->idx] - p) >= size) ? p : mp_spread_internal(pool, p, size));
+}
+
+/* Close the growing buffer. The <end> must point just behind the data, you want to keep
+ * allocated (so it can be in the interval [mp_ptr(pool), mp_ptr(pool) + mp_avail(pool)]).
+ * Returns a pointer to the beginning of the just closed block. */
+static inline void *
+mp_end(struct mempool *pool, void *end)
+{
+ void *p = mp_ptr(pool);
+ pool->state.free[pool->idx] = pool->state.last[pool->idx] - end;
+ return p;
+}
+
+/* Return size in bytes of the last allocated memory block (with mp_alloc*() or mp_end()). */
+static inline uns
+mp_size(struct mempool *pool, void *ptr)
+{
+ uns idx = mp_idx(pool, ptr);
+ return pool->state.last[idx] - ptr - pool->state.free[idx];
+}
+
+/* Open the last memory block (allocated with mp_alloc*() or mp_end())
+ * for growing and return its size in bytes. The contents and the start pointer
+ * remain unchanged. Do not forget to call mp_end() to close it. */
+uns mp_open(struct mempool *pool, void *ptr);
+
+/* Inlined version of mp_open() */
+static inline uns
+mp_open_fast(struct mempool *pool, void *ptr)
+{
+ pool->idx = mp_idx(pool, ptr);
+ uns size = pool->state.last[pool->idx] - ptr - pool->state.free[pool->idx];
+ pool->state.free[pool->idx] += size;
+ return size;
+}
+
+/* Reallocate the last memory block (allocated with mp_alloc*() or mp_end())
+ * to the new <size>. Behavior is similar to mp_grow(), but the resulting
+ * block is closed. */
+void *mp_realloc(struct mempool *pool, void *ptr, uns size);
+
+/* The same as mp_realloc(), but fills the additional bytes (if any) with zeroes */
+void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size);
+
+/* Inlined version of mp_realloc() */
+static inline void *
+mp_realloc_fast(struct mempool *pool, void *ptr, uns size)
+{
+ mp_open_fast(pool, ptr);
+ ptr = mp_grow(pool, size);
+ mp_end(pool, ptr + size);
+ return ptr;
+}
+
+
+/*** Usage as a stack ***/
+
+/* Save the current state of a memory pool.
+ * Do not call this function with an opened growing buffer. */
+static inline void
+mp_save(struct mempool *pool, struct mempool_state *state)
+{
+ *state = pool->state;
+ pool->state.next = state;
+}
+
+/* Save the current state to a newly allocated mempool_state structure.
+ * Do not call this function with an opened growing buffer. */
+struct mempool_state *mp_push(struct mempool *pool);
+
+/* Restore the state saved by mp_save() or mp_push() and free all
+ * data allocated after that point (including the state structure itself).
+ * You can't reallocate the last memory block from the saved state. */
+void mp_restore(struct mempool *pool, struct mempool_state *state);
+
+/* Restore the state saved by the last call to mp_push().
+ * mp_pop() and mp_push() works as a stack so you can push more states safely. */
+void mp_pop(struct mempool *pool);
+
+
+/*** mempool-str.c ***/
+
+char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC;
+void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC;
+char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK;
+static inline char * LIKE_MALLOC
+mp_strcat(struct mempool *mp, const char *x, const char *y)
+{
+ return mp_multicat(mp, x, y, NULL);
+}
+char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC;
+
+
+/*** mempool-fmt.c ***/
+
+char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC;
+char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC;
+char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4);
+char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args);
+
+#endif
--- /dev/null
+# Tests for mempool modules
+
+Run: ../obj/lib/mempool-t
+
+Run: ../obj/lib/mempool-fmt-t
+Out: <Hello, World!><Hello, World!><Appended><Hello, World!>
+
+Run: ../obj/lib/mempool-str-t
+Out: <<12345>>
+ bugs.gnats.insects
+ bugsgnatsinsects
--- /dev/null
+/*
+ * UCW Library -- Mapping of Files
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+void *
+mmap_file(const char *name, unsigned *len, int writeable)
+{
+ int fd = open(name, writeable ? O_RDWR : O_RDONLY);
+ struct stat st;
+ void *x;
+
+ if (fd < 0)
+ die("open(%s): %m", name);
+ if (fstat(fd, &st) < 0)
+ die("fstat(%s): %m", name);
+ if (len)
+ *len = st.st_size;
+ if (st.st_size)
+ {
+ x = mmap(NULL, st.st_size, writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, 0);
+ if (x == MAP_FAILED)
+ die("mmap(%s): %m", name);
+ }
+ else /* For empty file, we can return any non-zero address */
+ x = "";
+ close(fd);
+ return x;
+}
+
+void
+munmap_file(void *start, unsigned len)
+{
+ munmap(start, len);
+}
--- /dev/null
+/*
+ * UCW Library -- File Page Cache
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/pagecache.h"
+#include "lib/lfs.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <alloca.h>
+
+struct page_cache {
+ list free_pages; /* LRU queue of free non-dirty pages */
+ list locked_pages; /* List of locked pages (starts with dirty ones) */
+ list dirty_pages; /* List of free dirty pages */
+ uns page_size; /* Bytes per page (must be a power of two) */
+ uns free_count; /* Number of free / dirty pages */
+ uns total_count; /* Total number of pages */
+ uns max_pages; /* Maximum number of free pages */
+ uns hash_size; /* Hash table size */
+ uns stat_hit; /* Number of cache hits */
+ uns stat_miss; /* Number of cache misses */
+ uns stat_write; /* Number of writes */
+ list *hash_table; /* List heads corresponding to hash buckets */
+#ifndef HAVE_PREAD
+ sh_off_t pos; /* Current position in the file */
+ int pos_fd; /* FD the position corresponds to */
+#endif
+};
+
+#define PAGE_NUMBER(pos) ((pos) & ~(sh_off_t)(c->page_size - 1))
+#define PAGE_OFFSET(pos) ((pos) & (c->page_size - 1))
+
+struct page_cache *
+pgc_open(uns page_size, uns max_pages)
+{
+ struct page_cache *c = xmalloc_zero(sizeof(struct page_cache));
+ uns i;
+
+ init_list(&c->free_pages);
+ init_list(&c->locked_pages);
+ init_list(&c->dirty_pages);
+ c->page_size = page_size;
+ c->max_pages = max_pages;
+ c->hash_size = nextprime(c->max_pages);
+ c->hash_table = xmalloc(sizeof(list) * c->hash_size);
+ for(i=0; i<c->hash_size; i++)
+ init_list(&c->hash_table[i]);
+#ifndef HAVE_PREAD
+ c->pos_fd = -1;
+#endif
+ return c;
+}
+
+void
+pgc_close(struct page_cache *c)
+{
+ pgc_cleanup(c);
+ ASSERT(EMPTY_LIST(c->locked_pages));
+ ASSERT(EMPTY_LIST(c->dirty_pages));
+ ASSERT(EMPTY_LIST(c->free_pages));
+ xfree(c->hash_table);
+ xfree(c);
+}
+
+static void
+pgc_debug_page(struct page *p)
+{
+ printf("\tp=%08x d=%d f=%x c=%d\n", (uns) p->pos, p->fd, p->flags, p->lock_count);
+}
+
+void
+pgc_debug(struct page_cache *c, int mode)
+{
+ struct page *p;
+
+ printf(">> Page cache dump: pgsize=%d, pages=%d, freepages=%d of %d, hash=%d\n", c->page_size, c->total_count, c->free_count, c->max_pages, c->hash_size);
+ printf(">> stats: %d hits, %d misses, %d writes\n", c->stat_hit, c->stat_miss, c->stat_write);
+ if (mode)
+ {
+ puts("LRU list:");
+ WALK_LIST(p, c->free_pages)
+ pgc_debug_page(p);
+ puts("Locked list:");
+ WALK_LIST(p, c->locked_pages)
+ pgc_debug_page(p);
+ puts("Dirty list:");
+ WALK_LIST(p, c->dirty_pages)
+ pgc_debug_page(p);
+ }
+}
+
+static void
+flush_page(struct page_cache *c, struct page *p)
+{
+ int s;
+
+ ASSERT(p->flags & PG_FLAG_DIRTY);
+#ifdef HAVE_PREAD
+ s = sh_pwrite(p->fd, p->data, c->page_size, p->pos);
+#else
+ if (c->pos != p->pos || c->pos_fd != (int) p->fd)
+ sh_seek(p->fd, p->pos, SEEK_SET);
+ s = write(p->fd, p->data, c->page_size);
+ c->pos = p->pos + s;
+ c->pos_fd = p->fd;
+#endif
+ if (s < 0)
+ die("pgc_write(%d): %m", p->fd);
+ if (s != (int) c->page_size)
+ die("pgc_write(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
+ p->flags &= ~PG_FLAG_DIRTY;
+ c->stat_write++;
+}
+
+static int
+flush_cmp(const void *X, const void *Y)
+{
+ struct page *x = *((struct page **)X);
+ struct page *y = *((struct page **)Y);
+
+ if (x->fd < y->fd)
+ return -1;
+ if (x->fd > y->fd)
+ return 1;
+ if (x->pos < y->pos)
+ return -1;
+ if (x->pos > y->pos)
+ return 1;
+ return 0;
+}
+
+static void
+flush_pages(struct page_cache *c, uns force)
+{
+ uns cnt = 0;
+ uns max = force ? ~0U : c->free_count / 2;
+ uns i;
+ struct page *p, *q, **req, **rr;
+
+ WALK_LIST(p, c->dirty_pages)
+ {
+ cnt++;
+ if (cnt >= max)
+ break;
+ }
+ req = rr = alloca(cnt * sizeof(struct page *));
+ i = cnt;
+ p = HEAD(c->dirty_pages);
+ while ((q = (struct page *) p->n.next) && i--)
+ {
+ rem_node(&p->n);
+ add_tail(&c->free_pages, &p->n);
+ *rr++ = p;
+ p = q;
+ }
+ qsort(req, cnt, sizeof(struct page *), flush_cmp);
+ for(i=0; i<cnt; i++)
+ flush_page(c, req[i]);
+}
+
+static inline uns
+hash_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+ return (pos + fd) % c->hash_size;
+}
+
+static struct page *
+get_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+ node *n;
+ struct page *p;
+ uns hash = hash_page(c, pos, fd);
+
+ /*
+ * Return locked buffer for given page.
+ */
+
+ WALK_LIST(n, c->hash_table[hash])
+ {
+ p = SKIP_BACK(struct page, hn, n);
+ if (p->pos == pos && p->fd == fd)
+ {
+ /* Found in the cache */
+ rem_node(&p->n);
+ if (!p->lock_count)
+ c->free_count--;
+ return p;
+ }
+ }
+ if (c->total_count < c->max_pages || !c->free_count)
+ {
+ /* Enough free space, expand the cache */
+ p = xmalloc(sizeof(struct page) + c->page_size);
+ c->total_count++;
+ }
+ else
+ {
+ /* Discard the oldest unlocked page */
+ p = HEAD(c->free_pages);
+ if (!p->n.next)
+ {
+ /* There are only dirty pages here */
+ flush_pages(c, 0);
+ p = HEAD(c->free_pages);
+ ASSERT(p->n.next);
+ }
+ ASSERT(!p->lock_count);
+ rem_node(&p->n);
+ rem_node(&p->hn);
+ c->free_count--;
+ }
+ p->pos = pos;
+ p->fd = fd;
+ p->flags = 0;
+ p->lock_count = 0;
+ add_tail(&c->hash_table[hash], &p->hn);
+ return p;
+}
+
+void
+pgc_flush(struct page_cache *c)
+{
+ struct page *p;
+
+ flush_pages(c, 1);
+ WALK_LIST(p, c->locked_pages)
+ if (p->flags & PG_FLAG_DIRTY)
+ flush_page(c, p);
+ else
+ break;
+}
+
+void
+pgc_cleanup(struct page_cache *c)
+{
+ struct page *p;
+ node *n;
+
+ pgc_flush(c);
+ WALK_LIST_DELSAFE(p, n, c->free_pages)
+ {
+ ASSERT(!(p->flags & PG_FLAG_DIRTY) && !p->lock_count);
+ rem_node(&p->n);
+ rem_node(&p->hn);
+ c->free_count--;
+ c->total_count--;
+ xfree(p);
+ }
+ ASSERT(!c->free_count);
+}
+
+static inline struct page *
+get_and_lock_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+ struct page *p = get_page(c, pos, fd);
+
+ add_tail(&c->locked_pages, &p->n);
+ p->lock_count++;
+ return p;
+}
+
+struct page *
+pgc_read(struct page_cache *c, int fd, sh_off_t pos)
+{
+ struct page *p;
+ int s;
+
+ ASSERT(!PAGE_OFFSET(pos));
+ p = get_and_lock_page(c, pos, fd);
+ if (p->flags & PG_FLAG_VALID)
+ c->stat_hit++;
+ else
+ {
+ c->stat_miss++;
+#ifdef HAVE_PREAD
+ s = sh_pread(fd, p->data, c->page_size, pos);
+#else
+ if (c->pos != pos || c->pos_fd != (int)fd)
+ sh_seek(fd, pos, SEEK_SET);
+ s = read(fd, p->data, c->page_size);
+ c->pos = pos + s;
+ c->pos_fd = fd;
+#endif
+ if (s < 0)
+ die("pgc_read(%d): %m", fd);
+ if (s != (int) c->page_size)
+ die("pgc_read(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
+ p->flags |= PG_FLAG_VALID;
+ }
+ return p;
+}
+
+struct page *
+pgc_get(struct page_cache *c, int fd, sh_off_t pos)
+{
+ struct page *p;
+
+ ASSERT(!PAGE_OFFSET(pos));
+ p = get_and_lock_page(c, pos, fd);
+ p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
+ return p;
+}
+
+struct page *
+pgc_get_zero(struct page_cache *c, int fd, sh_off_t pos)
+{
+ struct page *p;
+
+ ASSERT(!PAGE_OFFSET(pos));
+ p = get_and_lock_page(c, pos, fd);
+ bzero(p->data, c->page_size);
+ p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
+ return p;
+}
+
+void
+pgc_put(struct page_cache *c, struct page *p)
+{
+ ASSERT(p->lock_count);
+ if (--p->lock_count)
+ return;
+ rem_node(&p->n);
+ if (p->flags & PG_FLAG_DIRTY)
+ {
+ add_tail(&c->dirty_pages, &p->n);
+ c->free_count++;
+ }
+ else if (c->free_count < c->max_pages)
+ {
+ add_tail(&c->free_pages, &p->n);
+ c->free_count++;
+ }
+ else
+ {
+ rem_node(&p->hn);
+ xfree(p);
+ c->total_count--;
+ }
+}
+
+void
+pgc_mark_dirty(struct page_cache *c, struct page *p)
+{
+ ASSERT(p->lock_count);
+ if (!(p->flags & PG_FLAG_DIRTY))
+ {
+ p->flags |= PG_FLAG_DIRTY;
+ rem_node(&p->n);
+ add_head(&c->locked_pages, &p->n);
+ }
+}
+
+byte *
+pgc_read_data(struct page_cache *c, int fd, sh_off_t pos, uns *len)
+{
+ struct page *p;
+ sh_off_t page = PAGE_NUMBER(pos);
+ uns offset = PAGE_OFFSET(pos);
+
+ p = pgc_read(c, fd, page);
+ pgc_put(c, p);
+ *len = c->page_size - offset;
+ return p->data + offset;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ struct page_cache *c = pgc_open(1024, 2);
+ struct page *p, *q, *r;
+ int fd = open("test", O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (fd < 0)
+ die("open: %m");
+ pgc_debug(c, 1);
+ p = pgc_get(c, fd, 0);
+ pgc_debug(c, 1);
+ strcpy(p->data, "one");
+ pgc_put(c, p);
+ pgc_debug(c, 1);
+ p = pgc_get(c, fd, 1024);
+ pgc_debug(c, 1);
+ strcpy(p->data, "two");
+ pgc_put(c, p);
+ pgc_debug(c, 1);
+ p = pgc_get(c, fd, 2048);
+ pgc_debug(c, 1);
+ strcpy(p->data, "three");
+ pgc_put(c, p);
+ pgc_debug(c, 1);
+ pgc_flush(c);
+ pgc_debug(c, 1);
+ p = pgc_read(c, fd, 0);
+ pgc_debug(c, 1);
+ strcpy(p->data, "odin");
+ pgc_mark_dirty(c, p);
+ pgc_debug(c, 1);
+ pgc_flush(c);
+ pgc_debug(c, 1);
+ q = pgc_read(c, fd, 1024);
+ pgc_debug(c, 1);
+ r = pgc_read(c, fd, 2048);
+ pgc_debug(c, 1);
+ pgc_put(c, p);
+ pgc_put(c, q);
+ pgc_put(c, r);
+ pgc_debug(c, 1);
+ p = pgc_get(c, fd, 3072);
+ pgc_debug(c, 1);
+ strcpy(p->data, "four");
+ pgc_put(c, p);
+ pgc_debug(c, 1);
+ pgc_cleanup(c);
+ pgc_debug(c, 1);
+ pgc_close(c);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- File Page Cache
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PAGECACHE_H
+#define _UCW_PAGECACHE_H
+
+#include "lib/lists.h"
+
+struct page_cache;
+
+struct page {
+ node n; /* Node in page list */
+ node hn; /* Node in hash table */
+ sh_off_t pos;
+ uns fd;
+ uns flags;
+ uns lock_count;
+ byte data[0];
+};
+
+#define PG_FLAG_DIRTY 1
+#define PG_FLAG_VALID 2
+
+struct page_cache *pgc_open(uns page_size, uns max_pages);
+void pgc_close(struct page_cache *);
+void pgc_debug(struct page_cache *, int mode);
+void pgc_flush(struct page_cache *); /* Write all unwritten pages */
+void pgc_cleanup(struct page_cache *); /* Deallocate all unused buffers */
+struct page *pgc_read(struct page_cache *, int fd, sh_off_t); /* Read page and lock it */
+struct page *pgc_get(struct page_cache *, int fd, sh_off_t); /* Get page for writing */
+struct page *pgc_get_zero(struct page_cache *, int fd, sh_off_t); /* ... and clear it */
+void pgc_put(struct page_cache *, struct page *); /* Release page */
+void pgc_mark_dirty(struct page_cache *, struct page *); /* Mark locked page as dirty */
+byte *pgc_read_data(struct page_cache *, int fd, sh_off_t, uns *); /* Partial reading */
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Mapping of File Parts
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2003--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/partmap.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#ifdef CONFIG_PARTMAP_IS_MMAP
+#define PARTMAP_WINDOW ~(size_t)0
+#else
+#ifdef TEST
+#define PARTMAP_WINDOW 4096
+#else
+#define PARTMAP_WINDOW 16777216
+#endif
+#endif
+
+struct partmap *
+partmap_open(char *name, int writeable)
+{
+ struct partmap *p = xmalloc_zero(sizeof(struct partmap));
+
+ p->fd = sh_open(name, writeable ? O_RDWR : O_RDONLY);
+ if (p->fd < 0)
+ die("open(%s): %m", name);
+ if ((p->file_size = sh_seek(p->fd, 0, SEEK_END)) < 0)
+ die("lseek(%s): %m", name);
+ p->writeable = writeable;
+#ifdef CONFIG_PARTMAP_IS_MMAP
+ partmap_load(p, 0, p->file_size);
+#endif
+ return p;
+}
+
+sh_off_t
+partmap_size(struct partmap *p)
+{
+ return p->file_size;
+}
+
+void
+partmap_close(struct partmap *p)
+{
+ if (p->start_map)
+ munmap(p->start_map, p->end_off - p->start_off);
+ close(p->fd);
+ xfree(p);
+}
+
+void
+partmap_load(struct partmap *p, sh_off_t start, uns size)
+{
+ if (p->start_map)
+ munmap(p->start_map, p->end_off - p->start_off);
+ sh_off_t end = start + size;
+ sh_off_t win_start = start/CPU_PAGE_SIZE * CPU_PAGE_SIZE;
+ size_t win_len = PARTMAP_WINDOW;
+ if ((sh_off_t) (win_start+win_len) > p->file_size)
+ win_len = ALIGN_TO(p->file_size - win_start, CPU_PAGE_SIZE);
+ if ((sh_off_t) (win_start+win_len) < end)
+ die("partmap_map: Window is too small for mapping %d bytes", size);
+ p->start_map = sh_mmap(NULL, win_len, p->writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, p->fd, win_start);
+ if (p->start_map == MAP_FAILED)
+ die("mmap failed at position %lld: %m", (long long)win_start);
+ p->start_off = win_start;
+ p->end_off = win_start+win_len;
+ madvise(p->start_map, win_len, MADV_SEQUENTIAL);
+}
+
+#ifdef TEST
+int main(int argc, char **argv)
+{
+ struct partmap *p = partmap_open(argv[1], 0);
+ uns l = partmap_size(p);
+ uns i;
+ for (i=0; i<l; i++)
+ putchar(*(char *)partmap_map(p, i, 1));
+ partmap_close(p);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * UCW Library -- Mapping of File Parts
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2003--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PARTMAP_H
+#define _UCW_PARTMAP_H
+
+struct partmap {
+ int fd;
+ sh_off_t file_size;
+ sh_off_t start_off, end_off;
+ byte *start_map;
+ int writeable;
+};
+
+struct partmap *partmap_open(char *name, int writeable);
+void partmap_close(struct partmap *p);
+sh_off_t partmap_size(struct partmap *p);
+void partmap_load(struct partmap *p, sh_off_t start, uns size);
+
+static inline void *
+partmap_map(struct partmap *p, sh_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_PARTMAP_IS_MMAP
+ if (unlikely(!p->start_map || start < p->start_off || (sh_off_t) (start+size) > p->end_off))
+ partmap_load(p, start, size);
+#endif
+ return p->start_map + (start - p->start_off);
+}
+
+static inline void *
+partmap_map_forward(struct partmap *p, sh_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_PARTMAP_IS_MMAP
+ if (unlikely((sh_off_t) (start+size) > p->end_off))
+ partmap_load(p, start, size);
+#endif
+ return p->start_map + (start - p->start_off);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Shell-Like Case-Insensitive Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#define Convert(x) Cupcase(x)
+#define MATCH_FUNC_NAME match_pattern_nocase
+
+#include "lib/patmatch.h"
--- /dev/null
+/*
+ * UCW Library -- Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#define Convert(x) (x)
+#define MATCH_FUNC_NAME match_pattern
+
+#include "lib/patmatch.h"
--- /dev/null
+/*
+ * UCW Library -- Generic Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+int
+MATCH_FUNC_NAME(const char *p, const char *s)
+{
+ while (*p)
+ {
+ if (*p == '?' && *s)
+ p++, s++;
+ else if (*p == '*')
+ {
+ int z = p[1];
+
+ if (!z)
+ return 1;
+ if (z == '\\' && p[2])
+ z = p[2];
+ z = Convert(z);
+ for(;;)
+ {
+ while (*s && Convert(*s) != z)
+ s++;
+ if (!*s)
+ return 0;
+ if (MATCH_FUNC_NAME(p+1, s))
+ return 1;
+ s++;
+ }
+ }
+ else
+ {
+ if (*p == '\\' && p[1])
+ p++;
+ if (Convert(*p++) != Convert(*s++))
+ return 0;
+ }
+ }
+ return !*s;
+}
--- /dev/null
+# Poor Man's CGI Module for Perl
+#
+# (c) 2002--2007 Martin Mares <mj@ucw.cz>
+# Slightly modified by Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+# FIXME:
+# - respond with proper HTTP error codes
+# - if we get invalid parameters, generate HTTP error or redirect
+
+package UCW::CGI;
+
+# First of all, set up error handling, so that even errors during parsing
+# will be reported properly.
+
+# Variables to be set by the calling module:
+# $UCW::CGI::error_mail mail address of the script admin (optional)
+# (this one has to be set in the BEGIN block!)
+# $UCW::CGI::error_hook function to be called for reporting errors
+
+my $error_reported;
+my $exit_code;
+my $debug = 0;
+
+sub report_bug($)
+{
+ if (!defined $error_reported) {
+ $error_reported = 1;
+ print STDERR $_[0];
+ if (defined($UCW::CGI::error_hook)) {
+ &$UCW::CGI::error_hook($_[0]);
+ } else {
+ print "Content-type: text/plain\n\n";
+ print "Internal bug:\n";
+ print $_[0], "\n";
+ print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail;
+ }
+ }
+ die;
+}
+
+BEGIN {
+ $SIG{__DIE__} = sub { report_bug($_[0]); };
+ $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); };
+ $exit_code = 0;
+}
+
+END {
+ $? = $exit_code;
+}
+
+use strict;
+use warnings;
+
+require Exporter;
+our $VERSION = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&html_escape &url_escape &url_param_escape &self_ref &self_form &http_get);
+our @EXPORT_OK = qw();
+
+### Escaping ###
+
+sub url_escape($) {
+ my $x = shift @_;
+ $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge;
+ return $x;
+}
+
+sub url_param_escape($) {
+ my $x = shift @_;
+ $x = url_escape($x);
+ $x =~ s/%20/+/g;
+ return $x;
+}
+
+sub html_escape($) {
+ my $x = shift @_;
+ $x =~ s/&/&/g;
+ $x =~ s/</</g;
+ $x =~ s/>/>/g;
+ $x =~ s/"/"/g;
+ return $x;
+}
+
+### Analysing RFC 822 Style Headers ###
+
+sub rfc822_prepare($) {
+ my $x = shift @_;
+ # Convert all %'s and backslash escapes to %xx escapes
+ $x =~ s/%/%25/g;
+ $x =~ s/\\(.)/"%".unpack("H2",$1)/ge;
+ # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically)
+ while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { }
+ # Remove quotes and escape dangerous characters inside (again closing at the end automatically)
+ $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge;
+ # All control characters are properly escaped, tokens are clearly visible.
+ # Finally remove all unnecessary spaces.
+ $x =~ s/\s+/ /g;
+ $x =~ s/(^ | $)//g;
+ $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g;
+ return $x;
+}
+
+sub rfc822_deescape($) {
+ my $x = shift @_;
+ $x =~ s/%(..)/pack("H2",$1)/ge;
+ return $x;
+}
+
+### Reading of HTTP headers ###
+
+sub http_get($) {
+ my $h = shift @_;
+ $h =~ tr/a-z-/A-Z_/;
+ return $ENV{"HTTP_$h"} || $ENV{"$h"};
+}
+
+### Parsing of Arguments ###
+
+my $arg_table;
+
+sub parse_arg_string($) {
+ my ($s) = @_;
+ $s =~ s/\s+//;
+ foreach $_ (split /[&:]/,$s) {
+ (/^([^=]+)=(.*)$/) or next;
+ my $arg = $arg_table->{$1} or next;
+ $_ = $2;
+ s/\+/ /g;
+ s/%(..)/pack("H2",$1)/eg;
+ s/\r\n/\n/g;
+ s/\r/\n/g;
+ $arg->{'multiline'} || s/(\n|\t)/ /g;
+ s/^\s+//;
+ s/\s+$//;
+ if (my $rx = $arg->{'check'}) {
+ if (!/^$rx$/) { $_ = $arg->{'default'}; }
+ }
+
+ my $r = ref($arg->{'var'});
+ if ($r eq 'SCALAR') {
+ ${$arg->{'var'}} = $_;
+ } elsif ($r eq 'ARRAY') {
+ push @{$arg->{'var'}}, $_;
+ }
+ }
+}
+
+sub parse_multipart_form_data();
+
+sub parse_args($) {
+ $arg_table = shift @_;
+ if (!defined $ENV{"GATEWAY_INTERFACE"}) {
+ print STDERR "Must be called as a CGI script.\n";
+ $exit_code = 1;
+ exit;
+ }
+ foreach my $a (values %$arg_table) {
+ my $r = ref($a->{'var'});
+ defined($a->{'default'}) or $a->{'default'}="";
+ if ($r eq 'SCALAR') {
+ ${$a->{'var'}} = $a->{'default'};
+ } elsif ($r eq 'ARRAY') {
+ @{$a->{'var'}} = ();
+ }
+ }
+ my $method = $ENV{"REQUEST_METHOD"};
+ my $qs = $ENV{"QUERY_STRING"};
+ parse_arg_string($qs) if defined($qs);
+ if ($method eq "GET") {
+ } elsif ($method eq "POST") {
+ if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) {
+ while (<STDIN>) {
+ chomp;
+ parse_arg_string($_);
+ }
+ } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) {
+ parse_multipart_form_data();
+ } else {
+ die "Unknown content type for POST data";
+ }
+ } else {
+ die "Unknown request method";
+ }
+}
+
+### Parsing Multipart Form Data ###
+
+my $boundary;
+my $boundary_len;
+my $mp_buffer;
+my $mp_buffer_i;
+my $mp_buffer_boundary;
+my $mp_eof;
+
+sub refill_mp_data($) {
+ my ($more) = @_;
+ if ($mp_buffer_boundary >= $mp_buffer_i) {
+ return $mp_buffer_boundary - $mp_buffer_i;
+ } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) {
+ return $more;
+ } else {
+ if ($mp_buffer_i) {
+ $mp_buffer = substr($mp_buffer, $mp_buffer_i);
+ $mp_buffer_i = 0;
+ }
+ while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) {
+ last if $mp_eof;
+ my $data;
+ my $n = read(STDIN, $data, 2048);
+ if ($n > 0) {
+ $mp_buffer .= $data;
+ } else {
+ $mp_eof = 1;
+ }
+ }
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ($mp_buffer_boundary >= 0) {
+ return $mp_buffer_boundary;
+ } elsif ($mp_eof) {
+ return length($mp_buffer);
+ } else {
+ return length($mp_buffer) - $boundary_len;
+ }
+ }
+}
+
+sub get_mp_line($) {
+ my ($allow_empty) = @_;
+ my $n = refill_mp_data(1024);
+ my $i = index($mp_buffer, "\r\n", $mp_buffer_i);
+ if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) {
+ my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i);
+ $mp_buffer_i = $i + 2;
+ return $s;
+ } elsif ($allow_empty) {
+ if ($n) { # An incomplete line
+ my $s = substr($mp_buffer, $mp_buffer_i, $n);
+ $mp_buffer_i += $n;
+ return $s;
+ } else { # No more lines
+ return undef;
+ }
+ } else {
+ die "Premature end of multipart POST data";
+ }
+}
+
+sub skip_mp_boundary() {
+ if ($mp_buffer_boundary != $mp_buffer_i) {
+ die "Premature end of multipart POST data";
+ }
+ $mp_buffer_boundary = -1;
+ $mp_buffer_i += 2;
+ my $b = get_mp_line(0);
+ print STDERR "SEP $b\n" if $debug;
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ("\r\n$b" =~ /^$boundary--/) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub parse_mp_header() {
+ my $h = {};
+ my $last;
+ while ((my $l = get_mp_line(0)) ne "") {
+ print STDERR "HH $l\n" if $debug;
+ if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) {
+ $name =~ tr/A-Z/a-z/;
+ $h->{$name} = $value;
+ $last = $name;
+ } elsif ($l =~ /^\s+/ && $last) {
+ $h->{$last} .= $l;
+ } else {
+ $last = undef;
+ }
+ }
+ foreach my $n (keys %$h) {
+ $h->{$n} = rfc822_prepare($h->{$n});
+ print STDERR "H $n: $h->{$n}\n" if $debug;
+ }
+ return (keys %$h) ? $h : undef;
+}
+
+sub parse_multipart_form_data() {
+ # First of all, find the boundary string
+ my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"});
+ if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) {
+ die "Multipart content with no boundary string received";
+ }
+ $boundary = rfc822_deescape($boundary);
+ print STDERR "BOUNDARY IS $boundary\n" if $debug;
+
+ # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string
+ # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution.
+ my $agent = http_get("User-agent") || "";
+ $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/;
+ $boundary = "\r\n$boundary";
+ $boundary_len = length($boundary) + 2;
+
+ # Check upload size in advance
+ if (my $size = http_get("Content-Length")) {
+ my $max_allowed = 0;
+ foreach my $a (values %$arg_table) {
+ $max_allowed += $a->{"maxsize"} || 65536;
+ }
+ if ($size > $max_allowed) {
+ die "Maximum form data length exceeded";
+ }
+ }
+
+ # Initialize our buffering mechanism and part splitter
+ $mp_buffer = "\r\n";
+ $mp_buffer_i = 0;
+ $mp_buffer_boundary = -1;
+ $mp_eof = 0;
+
+ # Skip garbage before the 1st part
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ skip_mp_boundary() || return;
+
+ # Process individual parts
+ do { PART: {
+ print STDERR "NEXT PART\n" if $debug;
+ my $h = parse_mp_header();
+ my ($field, $cdisp, $a);
+ if ($h &&
+ ($cdisp = $h->{"content-disposition"}) &&
+ $cdisp =~ /^form-data/ &&
+ (($field) = ($cdisp =~ /;name=([^;]+)/)) &&
+ ($a = $arg_table->{"$field"})) {
+ print STDERR "FIELD $field\n" if $debug;
+ if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; }
+ if (defined $a->{"var"}) {
+ while (defined (my $l = get_mp_line(1))) {
+ print STDERR "VALUE $l\n" if $debug;
+ parse_arg_string("$field=$l");
+ }
+ next PART;
+ } elsif (defined $a->{"file"}) {
+ require File::Temp;
+ require IO::Handle;
+ my $max_size = $a->{"maxsize"} || 1048576;
+ my @tmpargs = (undef, UNLINK => 1);
+ push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"};
+ my ($fh, $fn) = File::Temp::tempfile(@tmpargs);
+ print STDERR "FILE UPLOAD to $fn\n" if $debug;
+ ${$a->{"file"}} = $fn;
+ ${$a->{"fh"}} = $fh if defined $a->{"fh"};
+ my $total_size = 0;
+ while (my $i = refill_mp_data(4096)) {
+ print $fh substr($mp_buffer, $mp_buffer_i, $i);
+ $mp_buffer_i += $i;
+ $total_size += $i;
+ if ($total_size > $max_size) { die "Uploaded file too long"; }
+ }
+ $fh->flush(); # Don't close the handle, the file would disappear otherwise
+ next PART;
+ }
+ }
+ print STDERR "SKIPPING\n" if $debug;
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ } } while (skip_mp_boundary());
+}
+
+### Generating Self-ref URL's ###
+
+sub make_out_args($) {
+ my ($overrides) = @_;
+ my $out = {};
+ foreach my $name (keys %$arg_table) {
+ my $arg = $arg_table->{$name};
+ defined($arg->{'var'}) || next;
+ defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next;
+ my $value;
+ if (!defined($value = $overrides->{$name})) {
+ if (exists $overrides->{$name}) {
+ $value = $arg->{'default'};
+ } else {
+ $value = ${$arg->{'var'}};
+ }
+ }
+ if ($value ne $arg->{'default'}) {
+ $out->{$name} = $value;
+ }
+ }
+ return $out;
+}
+
+sub self_ref(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out);
+}
+
+sub self_form(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return join('', map { "<input type=hidden name=$_ value='" . html_escape($out->{$_}) . "'>\n" } sort keys %$out);
+}
+
+### Cookies
+
+sub cookie_esc($) {
+ my $x = shift @_;
+ if ($x !~ /^[a-zA-Z0-9%]+$/) {
+ $x =~ s/([\\\"])/\\$1/g;
+ $x = "\"$x\"";
+ }
+ return $x;
+}
+
+sub set_cookie($$@) {
+ my $key = shift @_;
+ my $value = shift @_;
+ my %other = @_;
+ $other{'version'} = 1 unless defined $other{'version'};
+ print "Set-Cookie: $key=", cookie_esc($value);
+ foreach my $k (keys %other) {
+ print ";$k=", cookie_esc($other{$k});
+ }
+ print "\n";
+}
+
+sub parse_cookies() {
+ my $h = http_get("Cookie") or return ();
+ my @cook = ();
+ while (my ($padding,$name,$val,$xx,$rest) = ($h =~ /\s*([,;]\s*)*([^ =]+)=([^ =,;\"]*|\"([^\"\\]|\\.)*\")(\s.*|;.*|$)/)) {
+ if ($val =~ /^\"/) {
+ $val =~ s/^\"//;
+ $val =~ s/\"$//;
+ $val =~ s/\\(.)/$1/g;
+ }
+ push @cook, $name, $val;
+ $h = $rest;
+ }
+ return @cook;
+}
+
+1; # OK
--- /dev/null
+# Perl module for parsing Sherlock configuration files (using the config utility)
+#
+# (c) 2002--2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+package UCW::Config;
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+our %Sections = ();
+
+our $DefaultConfigFile = "";
+our $Usage = "-C, --config filename Override the default configuration file
+-S, --set sec.item=val Manual setting of a configuration item";
+
+
+sub Parse(@) {
+ my @options = @_;
+ my $defargs = "";
+ my $override_config = 0;
+ push @options, "config|C=s" => sub { my ($o,$a)=@_; $defargs .= " -C'$a'"; $override_config=1; };
+ push @options, "set|S=s" => sub { my ($o,$a)=@_; $defargs .= " -S'$a'"; };
+ Getopt::Long::Configure("bundling");
+ Getopt::Long::GetOptions(@options) or return 0;
+ if (!$override_config && $DefaultConfigFile) {
+ $defargs = "-C'$DefaultConfigFile' $defargs";
+ }
+ foreach my $section (keys %Sections) {
+ my $opts = $Sections{$section};
+ my $optlist = join(";", keys %$opts);
+ my %filtered_opts = map { my $t=$_; $t=~s/[#\$]+$//; $t => $$opts{$_} } keys %$opts;
+ my @l = `bin/config $defargs "$section\{$optlist\}"`;
+ $? && exit 1;
+ foreach my $o (@l) {
+ $o =~ /^CF_.*_([^=]+)='(.*)'\n$/ or die "Cannot parse bin/config output: $_";
+ my $var = $filtered_opts{$1};
+ my $val = $2;
+ if (ref $var eq "SCALAR") {
+ $$var = $val;
+ } elsif (ref $var eq "ARRAY") {
+ push @$var, $val;
+ } elsif (ref $var) {
+ die ("UCW::Config::Parse: don't know how to set $o");
+ }
+ }
+ }
+ 1;
+}
+
+1; # OK
--- /dev/null
+# Perl module for UCW Configure Scripts
+#
+# (c) 2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+package UCW::Configure;
+
+use strict;
+use warnings;
+
+BEGIN {
+ # The somewhat hairy Perl export mechanism
+ use Exporter();
+ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+ $VERSION = 1.0;
+ @ISA = qw(Exporter);
+ @EXPORT = qw(&Init &Log &Notice &Warn &Fail &IsSet &Set &UnSet &Append &Override &Get &Test &Include &Finish &FindFile &TryFindFile);
+ @EXPORT_OK = qw();
+ %EXPORT_TAGS = ();
+}
+
+our %vars = ();
+our %overriden = ();
+
+sub Log($) {
+ print @_;
+}
+
+sub Notice($) {
+ print @_ if $vars{"VERBOSE"};
+}
+
+sub Warn($) {
+ print "WARNING: ", @_;
+}
+
+sub Fail($) {
+ Log("ERROR: " . (shift @_) . "\n");
+ exit 1;
+}
+
+sub IsSet($) {
+ my ($x) = @_;
+ return exists $vars{$x};
+}
+
+sub Get($) {
+ my ($x) = @_;
+ return $vars{$x};
+}
+
+sub Set($;$) {
+ my ($x,$y) = @_;
+ $y=1 unless defined $y;
+ $vars{$x}=$y unless $overriden{$x};
+}
+
+sub UnSet($) {
+ my ($x) = @_;
+ delete $vars{$x} unless $overriden{$x};
+}
+
+sub Append($$) {
+ my ($x,$y) = @_;
+ Set($x, (IsSet($x) ? (Get($x) . " $y") : $y));
+}
+
+sub Override($;$) {
+ my ($x,$y) = @_;
+ $y=1 unless defined $y;
+ $vars{$x}=$y;
+ $overriden{$x} = 1;
+}
+
+sub Test($$$) {
+ my ($var,$msg,$sub) = @_;
+ Log "$msg ... ";
+ if (!IsSet($var)) {
+ Set $var, &$sub();
+ }
+ Log Get($var) . "\n";
+}
+
+sub TryFindFile($) {
+ my ($f) = @_;
+ if (-f $f) {
+ return $f;
+ } elsif ($f !~ /^\// && -f (Get("SRCDIR")."/$f")) {
+ return Get("SRCDIR")."/$f";
+ } else {
+ return undef;
+ }
+}
+
+sub FindFile($) {
+ my ($f) = @_;
+ my $F;
+ defined ($F = TryFindFile($f)) or Fail "Cannot find file $f";
+ return $F;
+}
+
+sub Init($$) {
+ my ($srcdir,$defconfig) = @_;
+ if ((!defined $defconfig && !@ARGV) || @ARGV && $ARGV[0] eq "--help") {
+ print STDERR "Usage: [<srcdir>/]configure " . (defined $defconfig ? "[" : "") . "<config-name>" . (defined $defconfig ? "]" : "") .
+ " [<option>[=<value>] | -<option>] ...\n";
+ exit 1;
+ }
+ if (@ARGV && $ARGV[0] !~ /=/) {
+ Set('CONFIG' => shift @ARGV);
+ } else {
+ Set('CONFIG' => $defconfig);
+ }
+ Set("SRCDIR", $srcdir);
+
+ foreach my $x (@ARGV) {
+ if ($x =~ /^(\w+)=(.*)/) {
+ Override($1 => $2);
+ } elsif ($x =~ /^-(\w+)$/) {
+ Override($1 => 0);
+ delete $vars{$1};
+ } elsif ($x =~ /^(\w+)$/) {
+ Override($1 => 1);
+ } else {
+ print STDERR "Invalid option $x\n";
+ exit 1;
+ }
+ }
+
+ if (!TryFindFile(Get("CONFIG"))) {
+ TryFindFile(Get("CONFIG")."/config") or Fail "Cannot find configuration " . Get("CONFIG");
+ Override("CONFIG" => Get("CONFIG")."/config");
+ }
+}
+
+sub Include($) {
+ my ($f) = @_;
+ $f = FindFile($f);
+ Notice "Loading configuration $f\n";
+ require $f;
+}
+
+sub Finish() {
+ print "\n";
+
+ if (Get("SRCDIR") ne ".") {
+ Log "Preparing for compilation from directory " . Get("SRCDIR") . " to obj/ ... ";
+ -l "src" and unlink "src";
+ symlink Get("SRCDIR"), "src" or Fail "Cannot link source directory to src: $!";
+ Override("SRCDIR" => "src");
+ -l "Makefile" and unlink "Makefile";
+ -f "Makefile" and Fail "Makefile already exists";
+ symlink "src/Makefile", "Makefile" or Fail "Cannot link Makefile: $!";
+ } else {
+ Log "Preparing for compilation from current directory to obj/ ... ";
+ }
+ `rm -rf obj` if -d "obj"; Fail "Cannot delete old obj directory" if $?;
+ -d "obj" or mkdir("obj", 0777) or Fail "Cannot create obj directory: $!";
+ -d "obj/lib" or mkdir("obj/lib", 0777) or Fail "Cannot create obj/lib directory: $!";
+ Log "done\n";
+
+ Log "Generating autoconf.h ... ";
+ open X, ">obj/autoconf.h" or Fail $!;
+ print X "/* Generated automatically by $0, please don't touch manually. */\n";
+ foreach my $x (sort keys %vars) {
+ # Don't export variables which contain no underscores
+ next unless $x =~ /_/;
+ my $v = $vars{$x};
+ # Try to add quotes if necessary
+ $v = '"' . $v . '"' unless ($v =~ /^"/ || $v =~ /^\d*$/);
+ print X "#define $x $v\n";
+ }
+ close X;
+ Log "done\n";
+
+ Log "Generating config.mk ... ";
+ open X, ">obj/config.mk" or Fail $!;
+ print X "# Generated automatically by $0, please don't touch manually.\n";
+ foreach my $x (sort keys %vars) {
+ print X "$x=$vars{$x}\n";
+ }
+ print X "s=\${SRCDIR}\n";
+ print X "o=obj\n";
+ close X;
+ Log "done\n";
+}
+
+1; # OK
--- /dev/null
+# Perl module for setting process limits
+#
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+# UCW::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
+#
+
+package UCW::Filelock;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Filelock $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
--- /dev/null
+/*
+ * PerlXS module for managing file locks
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <unistd.h>
+#include <fcntl.h>
+
+
+MODULE = UCW::Filelock PACKAGE = UCW::Filelock
+
+PROTOTYPES: ENABLED
+
+int
+fcntl_lock(IN int fd, IN int cmd, IN int type, IN int whence, IN int start, IN int len)
+CODE:
+ struct flock fl;
+ fl.l_type = type;
+ fl.l_whence = whence;
+ fl.l_start = start;
+ fl.l_len = len;
+
+ RETVAL = fcntl(fd, cmd, &fl);
+OUTPUT:
+ RETVAL
--- /dev/null
+Makefile
+Makefile.PL
+MANIFEST
+Fcntllock.pm
+Fcntllock.xs
+lib/Sherlock/.exists
--- /dev/null
+# Makefile for the Filelock Perl module (c) 2007 Pavel Chrvat <pchar@ucw.cz>
+
+DIRS+=lib/perl/Filelock/arch/auto/UCW/Filelock
+FILELOCK_DIR=lib/perl/Filelock
+
+PROGS+=$(o)/lib/perl/Filelock/Filelock.pm
+
+extras:: $(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT)
+
+$(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
+
+$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs
+ $(Q)cp $^ $@
+
+$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL
+ $(Q)cp $^ $@
--- /dev/null
+# Makefile for Perl MakeMaker (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+ 'NAME' => 'UCW::Filelock',
+ 'VERSION_FROM' => 'Filelock.pm',
+ 'INST_LIB' => 'lib',
+ 'INST_ARCHLIB' => 'arch',
+);
--- /dev/null
+#
+# Perl module for Logging
+#
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+
+package UCW::Log;
+
+use lib 'lib/perl5';
+use strict;
+use warnings;
+use POSIX;
+use Exporter;
+
+our $version = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = ();
+our %EXPORT_TAGS = ( all => [qw(&Log &Die)]);
+our @EXPORT_OK = (@{$EXPORT_TAGS{'all'}});
+
+my $Prog = (reverse split(/\//, $0))[0];
+
+sub Log {
+ my $level = shift;
+ my $text = join(' ', @_);
+ print STDERR $level, strftime(" %Y-%m-%d %H:%M:%S ", localtime()), "[$Prog] ", $text, "\n";
+}
+
+sub Die {
+ Log('!', @_);
+ exit 1;
+}
+
+1;
--- /dev/null
+# Perl modules
+
+DIRS+=lib/perl
+EXTRA_RUNDIRS+=lib/perl5/UCW
+PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm CGI.pm)
+
+ifdef CONFIG_UCW_PERL_MODULES
+include $(s)/lib/perl/Ulimit/Makefile
+include $(s)/lib/perl/Filelock/Makefile
+endif
--- /dev/null
+Makefile
+Makefile.PL
+MANIFEST
+Ulimit.pm
+Ulimit.xs
+lib/Sherlock/.exists
--- /dev/null
+# Makefile for the Ulimit Perl module (c) 2003 Tomas Valla <tom@ucw.cz>
+
+DIRS+=lib/perl/Ulimit/arch/auto/UCW/Ulimit
+ULIMIT_DIR=lib/perl/Ulimit
+
+PROGS+=$(o)/lib/perl/Ulimit/Ulimit.pm
+
+extras:: $(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT)
+
+$(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
+
+$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs
+ $(Q)cp $^ $@
+
+$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL
+ $(Q)cp $^ $@
--- /dev/null
+# Makefile for Perl MakeMaker (c) 2003 Tomas Valla <tom@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+ 'NAME' => 'UCW::Ulimit',
+ 'VERSION_FROM' => 'Ulimit.pm',
+ 'INST_LIB' => 'lib',
+ 'INST_ARCHLIB' => 'arch',
+);
--- /dev/null
+# Perl module for setting process limits
+#
+# (c) 2003 Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+# UCW::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
+# UCW::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
+#
+# setlimit sets limit to values supplied in softlimit and hardlimit
+# getlimit reads limits into softlimit and hardlimit
+# $resource constants are defined below
+#
+
+package UCW::Ulimit;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $CPU = 0;
+our $FSIZE = 1;
+our $DATA = 2;
+our $STACK = 3;
+our $CORE = 4;
+our $RSS = 5;
+our $NPROC = 6;
+our $NOFILE = 7;
+our $MEMLOCK = 8;
+our $AS = 9;
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Ulimit $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
--- /dev/null
+/*
+ * PerlXS module for managing process limits
+ *
+ * (c) 2003 Tomas Valla <tom@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <sys/resource.h>
+#include <unistd.h>
+
+
+MODULE = UCW::Ulimit PACKAGE = UCW::Ulimit
+
+PROTOTYPES: ENABLED
+
+int
+setlimit(IN int resource, IN int soft, IN int hard)
+CODE:
+ struct rlimit rl;
+ int r;
+
+ switch(resource) {
+ case 0:
+ r = RLIMIT_CPU; break;
+ case 1:
+ r = RLIMIT_FSIZE; break;
+ case 2:
+ r = RLIMIT_DATA; break;
+ case 3:
+ r = RLIMIT_STACK; break;
+ case 4:
+ r = RLIMIT_CORE; break;
+ case 5:
+ r = RLIMIT_RSS; break;
+ case 6:
+ r = RLIMIT_NPROC; break;
+ case 7:
+ r = RLIMIT_NOFILE; break;
+ case 8:
+ r = RLIMIT_MEMLOCK; break;
+ case 9:
+ r = RLIMIT_AS; break;
+ }
+ rl.rlim_cur = soft;
+ rl.rlim_max = hard;
+ RETVAL = setrlimit(r, &rl);
+OUTPUT:
+ RETVAL
+
+
+int
+getlimit(IN int resource, OUT int soft, OUT int hard)
+CODE:
+ struct rlimit rl;
+ int r;
+
+ switch(resource) {
+ case 0:
+ r = RLIMIT_CPU; break;
+ case 1:
+ r = RLIMIT_FSIZE; break;
+ case 2:
+ r = RLIMIT_DATA; break;
+ case 3:
+ r = RLIMIT_STACK; break;
+ case 4:
+ r = RLIMIT_CORE; break;
+ case 5:
+ r = RLIMIT_RSS; break;
+ case 6:
+ r = RLIMIT_NPROC; break;
+ case 7:
+ r = RLIMIT_NOFILE; break;
+ case 8:
+ r = RLIMIT_MEMLOCK; break;
+ case 9:
+ r = RLIMIT_AS; break;
+ }
+
+ RETVAL = getrlimit(r, &rl);
+ soft = rl.rlim_cur;
+ hard = rl.rlim_max;
+OUTPUT:
+ RETVAL
--- /dev/null
+/*
+ * UCW Library -- Prefetch
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PREFETCH_H
+#define _UCW_PREFETCH_H
+
+#if defined(__k6)
+ /* K6 doesn't have prefetches */
+
+#elif defined(__athlon) || defined(__k8) || \
+ defined(__i686) || \
+ defined(__pentium4) || defined(__prescott) || defined(__nocona)
+
+#define HAVE_PREFETCH
+static inline void prefetch(void *addr)
+{
+ asm volatile ("prefetcht0 %0" : : "m" (*(byte*)addr));
+}
+
+#else
+#warning "Don't know how to prefetch on your CPU. Please fix lib/prefetch.h."
+#endif
+
+#ifndef HAVE_PREFETCH
+static inline void prefetch(void *addr UNUSED)
+{
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Prime Number Tests
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+static int /* Sequential search */
+__isprime(uns x) /* We know x != 2 && x != 3 */
+{
+ uns test = 5;
+
+ if (x == 5)
+ return 1;
+ for(;;)
+ {
+ if (!(x % test))
+ return 0;
+ if (x / test <= test)
+ return 1;
+ test += 2; /* 6k+1 */
+ if (!(x % test))
+ return 0;
+ if (x / test <= test)
+ return 1;
+ test += 4; /* 6k-1 */
+ }
+}
+
+int
+isprime(uns x)
+{
+ if (x < 5)
+ return (x == 2 || x == 3);
+ switch (x % 6)
+ {
+ case 1:
+ case 5:
+ return __isprime(x);
+ default:
+ return 0;
+ }
+}
+
+uns
+nextprime(uns x) /* Returns some prime greater than x */
+{
+ x += 5 - (x % 6); /* x is 6k-1 */
+ for(;;)
+ {
+ x += 2; /* 6k+1 */
+ if (__isprime(x))
+ return x;
+ x += 4; /* 6k-1 */
+ if (__isprime(x))
+ return x;
+ }
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+ uns k = atol(argv[1]);
+ printf("%d is%s prime\n", k, isprime(k) ? "" : "n't");
+ printf("Next prime is %d\n", nextprime(k));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Prime Number Table
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/binsearch.h"
+
+/* A table of odd primes, each is about 1.2 times the previous one */
+static uns prime_table[] = {
+ 3,
+ 7,
+ 13,
+ 19,
+ 29,
+ 37,
+ 53,
+ 67,
+ 89,
+ 109,
+ 137,
+ 173,
+ 211,
+ 263,
+ 331,
+ 409,
+ 499,
+ 601,
+ 727,
+ 877,
+ 1061,
+ 1279,
+ 1543,
+ 1861,
+ 2239,
+ 2689,
+ 3229,
+ 3877,
+ 4657,
+ 5623,
+ 6761,
+ 8123,
+ 9767,
+ 11731,
+ 14083,
+ 16903,
+ 20287,
+ 24359,
+ 29243,
+ 35099,
+ 42131,
+ 50581,
+ 60703,
+ 72859,
+ 87433,
+ 104933,
+ 125927,
+ 151121,
+ 181361,
+ 217643,
+ 261223,
+ 313471,
+ 376171,
+ 451411,
+ 541699,
+ 650059,
+ 780119,
+ 936151,
+ 1123391,
+ 1348111,
+ 1617739,
+ 1941293,
+ 2329559,
+ 2795477,
+ 3354581,
+ 4025507,
+ 4830619,
+ 5796797,
+ 6956203,
+ 8347483,
+ 10017011,
+ 12020431,
+ 14424539,
+ 17309471,
+ 20771371,
+ 24925661,
+ 29910821,
+ 35892991,
+ 43071601,
+ 51685939,
+ 62023139,
+ 74427803,
+ 89313379,
+ 107176057,
+ 128611313,
+ 154333591,
+ 185200339,
+ 222240413,
+ 266688509,
+ 320026249,
+ 384031507,
+ 460837813,
+ 553005391,
+ 663606499,
+ 796327811,
+ 955593439,
+ 1146712139,
+ 1376054569,
+ 1651265507,
+ 1981518631,
+ 2377822387,
+ 2853386881,
+ 3424064269,
+ 4108877153,
+ 4294967291
+};
+
+#define NPRIMES ARRAY_SIZE(prime_table)
+
+uns
+next_table_prime(uns x)
+{
+ if (x >= prime_table[NPRIMES-1])
+ return 0;
+ else
+ return prime_table[BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x+1)];
+}
+
+uns
+prev_table_prime(uns x)
+{
+ int i = BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x);
+ return i ? prime_table[i-1] : 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+#if 0 /* Generate the table */
+ uns x = 3, xx;
+ do
+ {
+ printf(" %u,\n", x);
+ xx = x;
+ x = nextprime(1.2*x);
+ }
+ while (x > xx);
+#else
+ for (int i=1; i<=100; i++)
+ printf("%d\t%d\t%d\n", i, next_table_prime(i), prev_table_prime(i));
+ for (uns i=0xfffffff0; i; i++)
+ printf("%u\t%u\t%u\n", i, next_table_prime(i), prev_table_prime(i));
+ return 0;
+#endif
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Setting of Process Title
+ *
+ * (c) 2001--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+static char **spt_argv;
+static char *spt_start, *spt_end;
+
+void
+setproctitle_init(int argc, char **argv)
+{
+#ifdef CONFIG_LINUX
+ int i, len;
+ char **env, **oldenv, *t;
+
+ spt_argv = argv;
+
+ /* Create a backup copy of environment */
+ oldenv = __environ;
+ len = 0;
+ for (i=0; oldenv[i]; i++)
+ len += strlen(oldenv[i]) + 1;
+ __environ = env = xmalloc(sizeof(char *)*(i+1));
+ t = xmalloc(len);
+ for (i=0; oldenv[i]; i++)
+ {
+ env[i] = t;
+ len = strlen(oldenv[i]) + 1;
+ memcpy(t, oldenv[i], len);
+ t += len;
+ }
+ env[i] = NULL;
+
+ /* Scan for consecutive free space */
+ spt_start = spt_end = argv[0];
+ for (i=0; i<argc; i++)
+ if (!i || spt_end+1 == argv[i])
+ spt_end = argv[i] + strlen(argv[i]);
+ for (i=0; oldenv[i]; i++)
+ if (spt_end+1 == oldenv[i])
+ spt_end = oldenv[i] + strlen(oldenv[i]);
+#endif
+}
+
+void
+setproctitle(const char *msg, ...)
+{
+ va_list args;
+ byte buf[256];
+ int n;
+
+ va_start(args, msg);
+ if (spt_end > spt_start)
+ {
+ n = vsnprintf(buf, sizeof(buf), msg, args);
+ if (n >= (int) sizeof(buf) || n < 0)
+ sprintf(buf, "<too-long>");
+ n = spt_end - spt_start;
+ strncpy(spt_start, buf, n);
+ spt_start[n] = 0;
+ spt_argv[0] = spt_start;
+ spt_argv[1] = NULL;
+ }
+ va_end(args);
+}
+
+char *
+getproctitle(void)
+{
+ return (spt_start < spt_end) ? spt_start : NULL;
+}
--- /dev/null
+/*
+ * UCW Library -- Poor Man's Profiler
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/profile.h"
+
+#include <stdio.h>
+
+/* PROFILE_TOD */
+
+#include <sys/time.h>
+
+void
+prof_tod_init(struct prof_tod *c)
+{
+ c->sec = c->usec = 0;
+}
+
+void
+prof_tod_switch(struct prof_tod *o, struct prof_tod *n)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ if (n)
+ {
+ n->start_sec = tv.tv_sec;
+ n->start_usec = tv.tv_usec;
+ }
+ if (o)
+ {
+ o->sec += tv.tv_sec - o->start_sec;
+ o->usec += tv.tv_usec - o->start_usec;
+ if (o->usec < 0)
+ {
+ o->usec += 1000000;
+ o->sec--;
+ }
+ else while (o->usec >= 1000000)
+ {
+ o->usec -= 1000000;
+ o->sec++;
+ }
+ }
+}
+
+int
+prof_tod_format(char *buf, struct prof_tod *c)
+{
+ return sprintf(buf, "%d.%06d", c->sec, c->usec);
+}
+
+/* PROFILE_TSC */
+
+#ifdef CPU_I386
+
+void
+prof_tsc_init(struct prof_tsc *c)
+{
+ c->ticks = 0;
+}
+
+int
+prof_tsc_format(char *buf, struct prof_tsc *c)
+{
+ return sprintf(buf, "%lld", c->ticks);
+}
+
+#endif
+
+/* PROFILE_KTSC */
+
+#ifdef CONFIG_LINUX
+
+#include <fcntl.h>
+#include <unistd.h>
+static int self_prof_fd = -1;
+
+void
+prof_ktsc_init(struct prof_ktsc *c)
+{
+ if (self_prof_fd < 0)
+ {
+ self_prof_fd = open("/proc/self/profile", O_RDONLY, 0);
+ if (self_prof_fd < 0)
+ die("Unable to open /proc/self/profile: %m");
+ }
+ c->ticks_user = 0;
+ c->ticks_sys = 0;
+}
+
+void
+prof_ktsc_switch(struct prof_ktsc *o, struct prof_ktsc *n)
+{
+ unsigned long long u, s;
+ byte buf[256];
+
+ int l = pread(self_prof_fd, buf, sizeof(buf)-1, 0);
+ ASSERT(l > 0 && l < (int)sizeof(buf)-1);
+ buf[l] = 0;
+ l = sscanf(buf, "%lld%lld", &u, &s);
+ ASSERT(l == 2);
+
+ if (n)
+ {
+ n->start_user = u;
+ n->start_sys = s;
+ }
+ if (o)
+ {
+ u -= o->start_user;
+ o->ticks_user += u;
+ s -= o->start_sys;
+ o->ticks_sys += s;
+ }
+}
+
+int
+prof_ktsc_format(char *buf, struct prof_ktsc *c)
+{
+ return sprintf(buf, "%lld+%lld", (long long) c->ticks_user, (long long) c->ticks_sys);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Poor Man's Profiler
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * Usage:
+ * #define PROFILE_xxx
+ * #include "lib/profile.h"
+ * prof_t cnt;
+ * prof_init(&cnt);
+ * ...
+ * prof_start(&cnt);
+ * ...
+ * prof_stop(&cnt);
+ * printf("%s\n", PROF_STR(cnt));
+ */
+
+/* PROFILE_TOD: gettimeofday() profiler */
+
+struct prof_tod {
+ u32 start_sec, start_usec;
+ s32 sec, usec;
+};
+
+void prof_tod_init(struct prof_tod *);
+void prof_tod_switch(struct prof_tod *, struct prof_tod *);
+int prof_tod_format(char *, struct prof_tod *);
+
+/* PROFILE_TSC: i386 TSC profiler */
+
+#ifdef CPU_I386
+
+struct prof_tsc {
+ u64 start_tsc;
+ u64 ticks;
+};
+
+void prof_tsc_init(struct prof_tsc *);
+int prof_tsc_format(char *, struct prof_tsc *);
+
+#endif
+
+/* PROFILE_KTSC: Linux kernel TSC profiler */
+
+#ifdef CONFIG_LINUX
+
+struct prof_ktsc {
+ u64 start_user, start_sys;
+ u64 ticks_user, ticks_sys;
+};
+
+void prof_ktsc_init(struct prof_ktsc *);
+void prof_ktsc_switch(struct prof_ktsc *, struct prof_ktsc *);
+int prof_ktsc_format(char *, struct prof_ktsc *);
+
+#endif
+
+/* Select the right profiler */
+
+#if defined(PROFILE_TOD)
+
+#define PROFILER
+#define PROF_STR_SIZE 21
+typedef struct prof_tod prof_t;
+#define prof_init prof_tod_init
+#define prof_switch prof_tod_switch
+#define prof_format prof_tod_format
+
+#elif defined(PROFILE_TSC)
+
+#define PROFILER
+#define PROFILER_INLINE
+#define PROF_STR_SIZE 24
+
+typedef struct prof_tsc prof_t;
+#define prof_init prof_tsc_init
+#define prof_format prof_tsc_format
+
+#define rdtscll(val) __asm__ __volatile__("rdtsc" : "=A" (val))
+
+static inline void prof_start(prof_t *c)
+{
+ rdtscll(c->start_tsc);
+}
+
+static inline void prof_stop(prof_t *c)
+{
+ u64 tsc;
+ rdtscll(tsc);
+ tsc -= c->start_tsc;
+ c->ticks += tsc;
+}
+
+static inline void prof_switch(prof_t *o, prof_t *n)
+{
+ u64 tsc;
+ rdtscll(tsc);
+ n->start_tsc = tsc;
+ tsc -= o->start_tsc;
+ o->ticks += tsc;
+}
+
+#elif defined(PROFILE_KTSC)
+
+#define PROFILER
+#define PROF_STR_SIZE 50
+typedef struct prof_ktsc prof_t;
+#define prof_init prof_ktsc_init
+#define prof_switch prof_ktsc_switch
+#define prof_format prof_ktsc_format
+
+#endif
+
+#ifdef PROFILER
+
+/* Stuff common for all profilers */
+#ifndef PROFILER_INLINE
+static inline void prof_start(prof_t *c) { prof_switch(NULL, c); }
+static inline void prof_stop(prof_t *c) { prof_switch(c, NULL); }
+#endif
+#define PROF_STR(C) ({ static char _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
+
+#else
+
+/* Dummy profiler with no output */
+typedef struct { } prof_t;
+static inline void prof_init(prof_t *c UNUSED) { }
+static inline void prof_start(prof_t *c UNUSED) { }
+static inline void prof_stop(prof_t *c UNUSED) { }
+static inline void prof_switch(prof_t *c UNUSED, prof_t *d UNUSED) { }
+static inline void prof_format(char *b, prof_t *c UNUSED) { b[0]='?'; b[1]=0; }
+#define PROF_STR_SIZE 2
+#define PROF_STR(C) "?"
+
+#endif
--- /dev/null
+/*
+ * Simple and Quick Shared Memory Cache
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/qache.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/*
+ * The cache lives in a mmapped file of the following format:
+ * qache_header
+ * qache_entry[max_entries] table of entries and their keys
+ * u32 qache_hash[hash_size] hash table pointing to keys
+ * u32 block_next[num_blocks] next block pointers
+ * padding to a multiple of block size
+ * blocks[] data blocks
+ */
+
+struct qache_header {
+ u32 magic; /* QCACHE_MAGIC */
+ u32 block_size; /* Parameters as in qache_params */
+ u32 block_shift; /* block_size = 1 << block_shift */
+ u32 num_blocks;
+ u32 format_id;
+ u32 entry_table_start; /* Array of qache_entry's */
+ u32 max_entries;
+ u32 hash_table_start; /* Hash table containing all keys */
+ u32 hash_size;
+ u32 next_table_start; /* Array of next pointers */
+ u32 first_data_block;
+};
+
+#define QACHE_MAGIC 0xb79f6d12
+
+struct qache_entry {
+ u32 lru_prev, lru_next; /* Entry #0: head of the cyclic LRU list */
+ u32 data_len; /* Entry #0: number of free blocks, Free entries: ~0U */
+ u32 first_data_block; /* Entry #0: first free block */
+ qache_key_t key;
+ u32 hash_next; /* Entry #0: first free entry, Free entries: next free */
+};
+
+struct qache {
+ struct qache_header *hdr;
+ struct qache_entry *entry_table;
+ u32 *hash_table;
+ u32 *next_table;
+ int fd;
+ byte *mmap_data;
+ uns file_size;
+ char *file_name;
+ uns locked;
+};
+
+#define first_free_entry entry_table[0].hash_next
+#define first_free_block entry_table[0].first_data_block
+#define num_free_blocks entry_table[0].data_len
+
+static inline char *
+format_key(qache_key_t *key)
+{
+ static char keybuf[2*sizeof(qache_key_t)+1];
+ for (uns i=0; i<sizeof(qache_key_t); i++)
+ sprintf(keybuf+2*i, "%02x", (*key)[i]);
+ return keybuf;
+}
+
+static void
+qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
+{
+#ifndef CONFIG_LINUX
+ /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
+ len += (start % CPU_PAGE_SIZE);
+ start -= start % CPU_PAGE_SIZE;
+ len = ALIGN_TO(len, CPU_PAGE_SIZE);
+ if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
+ msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
+#endif
+}
+
+static void
+qache_msync_block(struct qache *q, uns blk)
+{
+ DBG("\tSyncing block %d", blk);
+ qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
+}
+
+static void
+qache_lock(struct qache *q)
+{
+ /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
+ ASSERT(!q->locked);
+ struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+ if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+ die("fcntl lock on %s: %m", q->file_name);
+ q->locked = 1;
+ DBG("Locked cache %s", q->file_name);
+}
+
+static void
+qache_unlock(struct qache *q, uns dirty)
+{
+ ASSERT(q->locked);
+ if (dirty) /* Sync header, entry table and hash table */
+ qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
+ struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+ if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+ die("fcntl unlock on %s: %m", q->file_name);
+ q->locked = 0;
+ DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
+}
+
+enum entry_audit_flags {
+ ET_FREE_LIST = 1,
+ ET_LRU = 2,
+ ET_HASH = 4
+};
+
+static char *
+audit_entries(struct qache *q, byte *entrymap)
+{
+ uns i, j;
+
+ DBG("Auditing entries");
+
+ /* Check the free list */
+ i = q->first_free_entry;
+ while (i)
+ {
+ if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
+ return "inconsistent free entry list";
+ entrymap[i] |= ET_FREE_LIST;
+ i = q->entry_table[i].hash_next;
+ }
+
+ /* Check the hash table */
+ for (i=0; i<q->hdr->hash_size; i++)
+ {
+ j = q->hash_table[i];
+ while (j)
+ {
+ if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
+ return "inconsistent hash chains";
+ entrymap[j] |= ET_HASH;
+ j = q->entry_table[j].hash_next;
+ }
+ }
+
+ /* Check the LRU */
+ i = 0;
+ do
+ {
+ j = q->entry_table[i].lru_next;
+ if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
+ return "inconsistent LRU list";
+ entrymap[i] |= ET_LRU;
+ i = j;
+ }
+ while (i);
+
+ /* Check if all non-free items are in all lists */
+ for (i=1; i<q->hdr->max_entries; i++)
+ {
+ if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
+ return "inconsistent lists";
+ }
+ return NULL;
+}
+
+enum block_audit_flags {
+ BT_FREE_LIST = 1,
+ BT_ALLOC = 2
+};
+
+static char *
+audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
+{
+ uns i, j;
+
+ DBG("Auditing blocks");
+
+ /* Check the free list */
+ for (i=q->first_free_block; i; i=q->next_table[i])
+ {
+ if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
+ return "inconsistent free block list";
+ blockmap[i] |= BT_FREE_LIST;
+ }
+
+ /* Check allocation lists of entries */
+ for (i=1; i<q->hdr->max_entries; i++)
+ if (!(entrymap[i] & ET_FREE_LIST))
+ {
+ uns blocks = 0;
+ for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
+ {
+ if (blockmap[j])
+ return "inconsistent entry block list";
+ blockmap[j] |= BT_ALLOC;
+ blocks++;
+ }
+ if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
+ return "inconsistent entry data length";
+ }
+
+ /* Check if all blocks belong somewhere */
+ for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
+ if (!blockmap[i])
+ {
+ DBG("Block %d unreferenced", i);
+ return "unreferenced blocks found";
+ }
+
+ return NULL;
+}
+
+static char *
+do_audit(struct qache *q)
+{
+ byte *entry_map = xmalloc_zero(q->hdr->max_entries);
+ byte *block_map = xmalloc_zero(q->hdr->num_blocks);
+ byte *err = audit_entries(q, entry_map);
+ if (!err)
+ err = audit_blocks(q, entry_map, block_map);
+ xfree(block_map);
+ xfree(entry_map);
+ return err;
+}
+
+static void
+qache_setup_pointers(struct qache *q)
+{
+ q->hdr = (struct qache_header *) q->mmap_data;
+ q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
+ q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
+ q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
+}
+
+static int
+qache_open_existing(struct qache *q, struct qache_params *par)
+{
+ if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
+ return 0;
+
+ struct stat st;
+ char *err = "stat failed";
+ if (fstat(q->fd, &st) < 0)
+ goto close_and_fail;
+
+ err = "invalid file size";
+ if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
+ goto close_and_fail;
+ q->file_size = st.st_size;
+
+ err = "requested size change";
+ if (q->file_size != par->cache_size)
+ goto close_and_fail;
+
+ err = "cannot mmap";
+ if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+ goto close_and_fail;
+ struct qache_header *h = (struct qache_header *) q->mmap_data;
+
+ qache_setup_pointers(q);
+ qache_lock(q);
+
+ err = "incompatible format";
+ if (h->magic != QACHE_MAGIC ||
+ h->block_size != par->block_size ||
+ h->max_entries != par->max_entries ||
+ h->format_id != par->format_id)
+ goto unlock_and_fail;
+
+ err = "incomplete file";
+ if (h->num_blocks*h->block_size != q->file_size)
+ goto unlock_and_fail;
+
+ if (err = do_audit(q))
+ goto unlock_and_fail;
+
+ qache_unlock(q, 0);
+ msg(L_INFO, "Cache %s: using existing data", q->file_name);
+ return 1;
+
+ unlock_and_fail:
+ qache_unlock(q, 0);
+ munmap(q->mmap_data, q->file_size);
+ close_and_fail:
+ msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
+ close(q->fd);
+ return 0;
+}
+
+static void
+qache_create(struct qache *q, struct qache_params *par)
+{
+ q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (q->fd < 0)
+ die("Cache %s: unable to create (%m)", q->file_name);
+ struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
+
+ struct qache_header h;
+ bzero(&h, sizeof(h));
+ h.magic = QACHE_MAGIC;
+ h.block_size = par->block_size;
+ h.block_shift = bit_fls(h.block_size);
+ h.num_blocks = par->cache_size >> h.block_shift;
+ h.format_id = par->format_id;
+ h.entry_table_start = sizeof(h);
+ h.max_entries = par->max_entries;
+ h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
+ h.hash_size = 1;
+ while (h.hash_size < h.max_entries)
+ h.hash_size *= 2;
+ h.next_table_start = h.hash_table_start + h.hash_size * 4;
+ h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
+ if (h.first_data_block >= h.num_blocks)
+ die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
+ bwrite(fb, &h, sizeof(h));
+
+ /* Entry #0: heads of all lists */
+ ASSERT(btell(fb) == (sh_off_t)h.entry_table_start);
+ struct qache_entry ent;
+ bzero(&ent, sizeof(ent));
+ ent.first_data_block = h.first_data_block;
+ ent.data_len = h.num_blocks - h.first_data_block;
+ ent.hash_next = 1;
+ bwrite(fb, &ent, sizeof(ent));
+
+ /* Other entries */
+ bzero(&ent, sizeof(ent));
+ ent.data_len = ~0U;
+ for (uns i=1; i<h.max_entries; i++)
+ {
+ ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
+ bwrite(fb, &ent, sizeof(ent));
+ }
+
+ /* The hash table */
+ ASSERT(btell(fb) == (sh_off_t)h.hash_table_start);
+ for (uns i=0; i<h.hash_size; i++)
+ bputl(fb, 0);
+
+ /* The next pointers */
+ ASSERT(btell(fb) == (sh_off_t)h.next_table_start);
+ for (uns i=0; i<h.num_blocks; i++)
+ bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
+
+ /* Padding */
+ ASSERT(btell(fb) <= (sh_off_t)(h.first_data_block << h.block_shift));
+ while (btell(fb) < (sh_off_t)(h.first_data_block << h.block_shift))
+ bputc(fb, 0);
+
+ /* Data blocks */
+ for (uns i=h.first_data_block; i<h.num_blocks; i++)
+ for (uns j=0; j<h.block_size; j+=4)
+ bputl(fb, 0);
+
+ ASSERT(btell(fb) == (sh_off_t)par->cache_size);
+ bclose(fb);
+ msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
+
+ if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+ die("Cache %s: mmap failed (%m)", par->file_name);
+ q->file_size = par->cache_size;
+ qache_setup_pointers(q);
+}
+
+struct qache *
+qache_open(struct qache_params *par)
+{
+ struct qache *q = xmalloc_zero(sizeof(*q));
+ q->file_name = xstrdup(par->file_name);
+
+ ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
+ par->cache_size = ALIGN_TO(par->cache_size, par->block_size);
+
+ if (par->force_reset <= 0 && qache_open_existing(q, par))
+ ;
+ else if (par->force_reset < 0)
+ die("Cache %s: read-only access requested, but no data available", q->file_name);
+ else
+ qache_create(q, par);
+ return q;
+}
+
+void
+qache_close(struct qache *q, uns retain_data)
+{
+ munmap(q->mmap_data, q->file_size);
+ close(q->fd);
+ if (!retain_data && unlink(q->file_name) < 0)
+ msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
+ xfree(q->file_name);
+ xfree(q);
+}
+
+static uns
+qache_hash(struct qache *q, qache_key_t *key)
+{
+ uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
+ return h % q->hdr->hash_size;
+}
+
+static uns
+qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+ ASSERT(q->locked);
+
+ if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
+ return pos_hint;
+
+ uns h = qache_hash(q, key);
+ for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
+ if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
+ return e;
+ return 0;
+}
+
+static void
+qache_hash_insert(struct qache *q, uns e)
+{
+ uns h = qache_hash(q, &q->entry_table[e].key);
+ q->entry_table[e].hash_next = q->hash_table[h];
+ q->hash_table[h] = e;
+}
+
+static void
+qache_hash_remove(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ uns f, *hh;
+ for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
+ if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
+ {
+ *hh = entry->hash_next;
+ return;
+ }
+ ASSERT(0);
+}
+
+static uns
+qache_alloc_entry(struct qache *q)
+{
+ uns e = q->first_free_entry;
+ ASSERT(q->locked && e);
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(entry->data_len == ~0U);
+ q->first_free_entry = entry->hash_next;
+ entry->data_len = 0;
+ return e;
+}
+
+static void
+qache_free_entry(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(q->locked && entry->data_len != ~0U);
+ entry->data_len = ~0U;
+ entry->hash_next = q->first_free_entry;
+ q->first_free_entry = e;
+}
+
+static inline void *
+get_block_start(struct qache *q, uns block)
+{
+ ASSERT(block && block < q->hdr->num_blocks);
+ return q->mmap_data + (block << q->hdr->block_shift);
+}
+
+static uns
+qache_alloc_block(struct qache *q)
+{
+ ASSERT(q->locked && q->num_free_blocks);
+ uns blk = q->first_free_block;
+ q->first_free_block = q->next_table[blk];
+ q->num_free_blocks--;
+ DBG("\tAllocated block %d", blk);
+ return blk;
+}
+
+static void
+qache_free_block(struct qache *q, uns blk)
+{
+ ASSERT(q->locked);
+ q->next_table[blk] = q->first_free_block;
+ q->first_free_block = blk;
+ q->num_free_blocks++;
+ DBG("\tFreed block %d", blk);
+}
+
+static void
+qache_lru_insert(struct qache *q, uns e)
+{
+ struct qache_entry *head = &q->entry_table[0];
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
+ uns succe = head->lru_next;
+ struct qache_entry *succ = &q->entry_table[succe];
+ head->lru_next = e;
+ entry->lru_prev = 0;
+ entry->lru_next = succe;
+ succ->lru_prev = e;
+}
+
+static void
+qache_lru_remove(struct qache *q, uns e)
+{
+ ASSERT(q->locked);
+ struct qache_entry *entry = &q->entry_table[e];
+ q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
+ q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
+ entry->lru_prev = entry->lru_next = 0;
+}
+
+static uns
+qache_lru_get(struct qache *q)
+{
+ return q->entry_table[0].lru_prev;
+}
+
+static void
+qache_ll_delete(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ uns blk = entry->first_data_block;
+ while (entry->data_len)
+ {
+ uns next = q->next_table[blk];
+ qache_free_block(q, blk);
+ blk = next;
+ if (entry->data_len >= q->hdr->block_size)
+ entry->data_len -= q->hdr->block_size;
+ else
+ entry->data_len = 0;
+ }
+ qache_lru_remove(q, e);
+ qache_hash_remove(q, e);
+ qache_free_entry(q, e);
+}
+
+uns
+qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
+{
+ qache_lock(q);
+
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ qache_ll_delete(q ,e);
+ DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
+ }
+
+ uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
+ if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
+ {
+ qache_unlock(q, 0);
+ return 0;
+ }
+ while (q->num_free_blocks < blocks || !q->first_free_entry)
+ {
+ e = qache_lru_get(q);
+ DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
+ ASSERT(e);
+ qache_ll_delete(q, e);
+ }
+ e = qache_alloc_entry(q);
+ struct qache_entry *entry = &q->entry_table[e];
+ entry->data_len = size;
+ memcpy(entry->key, key, sizeof(*key));
+ DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
+
+ entry->first_data_block = 0;
+ while (size)
+ {
+ uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
+ uns blk = qache_alloc_block(q);
+ q->next_table[blk] = entry->first_data_block;
+ memcpy(get_block_start(q, blk), data+size-chunk, chunk);
+ qache_msync_block(q, blk);
+ entry->first_data_block = blk;
+ size -= chunk;
+ }
+
+ qache_lru_insert(q, e);
+ qache_hash_insert(q, e);
+ qache_unlock(q, 1);
+ return e;
+}
+
+static void
+copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
+{
+ if (sizep)
+ {
+ uns size = *sizep;
+ uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
+ uns xfer = MIN(size, avail);
+ *sizep = avail;
+ if (datap)
+ {
+ if (!*datap)
+ *datap = xmalloc(xfer);
+ uns blk = entry->first_data_block;
+ while (start >= q->hdr->block_size)
+ {
+ blk = q->next_table[blk];
+ start -= q->hdr->block_size;
+ }
+ byte *data = *datap;
+ while (xfer)
+ {
+ uns len = MIN(xfer, q->hdr->block_size - start);
+ memcpy(data, get_block_start(q, blk), len);
+ blk = q->next_table[blk];
+ data += len;
+ xfer -= len;
+ start = 0;
+ }
+ }
+ }
+ else
+ ASSERT(!datap);
+}
+
+uns
+qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
+{
+ qache_lock(q);
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ struct qache_entry *entry = &q->entry_table[e];
+ DBG("Lookup <%s>: found entry %d", format_key(key), e);
+ qache_lru_remove(q, e);
+ qache_lru_insert(q, e);
+ copy_out(q, entry, datap, sizep, start);
+ qache_unlock(q, 1); /* Yes, modified -- we update the LRU */
+ }
+ else
+ {
+ DBG("Lookup <%s>: not found", format_key(key));
+ qache_unlock(q, 0);
+ }
+ return e;
+}
+
+uns
+qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
+{
+ if (!pos || pos >= q->hdr->max_entries)
+ {
+ DBG("Probe %d: Out of range", pos);
+ return ~0U;
+ }
+
+ qache_lock(q);
+ uns ret = 0;
+ struct qache_entry *entry = &q->entry_table[pos];
+ if (entry->data_len != ~0U)
+ {
+ DBG("Probe %d: Found key <%s>", format_key(entry->key));
+ if (key)
+ memcpy(key, entry->key, sizeof(qache_key_t));
+ copy_out(q, entry, datap, sizep, start);
+ ret = pos;
+ }
+ else
+ DBG("Probe %d: Empty", pos);
+ qache_unlock(q, 0);
+ return ret;
+}
+
+uns
+qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+ qache_lock(q);
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ DBG("Delete <%s: deleting entry %d", format_key(key), e);
+ qache_ll_delete(q, e);
+ }
+ else
+ DBG("Delete <%s>: No match", format_key(key));
+ qache_unlock(q, 1);
+ return e;
+}
+
+void
+qache_debug(struct qache *q)
+{
+ msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
+ q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
+ q->hdr->max_entries, q->hdr->hash_size);
+
+ msg(L_DEBUG, "Table of cache entries:");
+ msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
+ for (uns e=0; e<q->hdr->max_entries; e++)
+ {
+ struct qache_entry *ent = &q->entry_table[e];
+ msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
+ ent->first_data_block, ent->hash_next, format_key(&ent->key));
+ }
+
+ msg(L_DEBUG, "Hash table:");
+ for (uns h=0; h<q->hdr->hash_size; h++)
+ msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
+
+ msg(L_DEBUG, "Next pointers:");
+ for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
+ msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
+}
+
+void
+qache_audit(struct qache *q)
+{
+ char *err;
+ qache_lock(q);
+ if (err = do_audit(q))
+ die("Cache %s: %s", q->file_name, err);
+ qache_unlock(q, 0);
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+ struct qache_params par = {
+ .file_name = "tmp/test",
+ .block_size = 256,
+ .cache_size = 65536,
+ .max_entries = 123,
+ .force_reset = 0,
+ .format_id = 0xfeedcafe
+ };
+ struct qache *q = qache_open(&par);
+
+ qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
+#define N 100
+ uns i, j;
+ byte data[11*N];
+ for (i=0; i<N; i++)
+ {
+ key[3] = i / 16; key[15] = i % 16;
+ for (j=0; j<11*i; j++)
+ data[j] = 0x33 + i*j;
+ qache_insert(q, &key, 0, data, 11*i);
+ }
+ qache_debug(q);
+ qache_audit(q);
+
+ uns found = 0;
+ for (i=0; i<100; i++)
+ {
+ key[3] = i / 16; key[15] = i % 16;
+ byte *dptr = data;
+ uns sz = sizeof(data);
+ uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
+ if (e)
+ {
+ ASSERT(sz == 11*i);
+ for (j=0; j<sz; j++)
+ ASSERT(data[j] == (byte)(0x33 + i*j));
+ found++;
+ }
+ }
+ msg(L_INFO, "Found %d of %d entries", found, N);
+
+ qache_close(q, 1);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * Simple and Quick Shared Memory Cache
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _UCW_QACHE_H
+#define _UCW_QACHE_H
+
+struct qache_params {
+ char *file_name;
+ uns block_size; /* Cache block size (a power of two) */
+ uns cache_size; /* Size of the whole cache */
+ uns max_entries; /* Maximum number of cached entries */
+ int force_reset; /* Force creation of a new cache even if the old one seems usable, -1 if reset should never be done */
+ uns format_id; /* Data format ID (old cache not used if formats differ) */
+};
+
+typedef byte qache_key_t[16];
+
+struct qache;
+
+/* Create and destroy a cache */
+struct qache *qache_open(struct qache_params *p);
+void qache_close(struct qache *q, uns retain_data);
+
+/* Insert new item to the cache with a given key and data. If pos_hint is non-zero, it serves
+ * as a hint about the position of the entry (if it's known that an entry with the particular key
+ * was located there a moment ago). Returns position of the new entry.
+ */
+uns qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size);
+
+/* Look up data in the cache, given a key and a position hint (as above). If datap is non-NULL, data
+ * from the cache entry are copied either to *datap (if *datap is NULL, new memory is allocated by
+ * calling xmalloc and *datap is set to point to that memory). The *sizep contains the maximum number
+ * of bytes to be copied (~0U if unlimited) and it is replaced by the number of bytes available (so it
+ * can be greater than the original value requested). The start indicates starting offset inside the
+ * entry's data.
+ */
+uns qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start);
+
+/* Inspect data in the cache (but don't modify LRU nor anything else), given a position.
+ * If key is non-NULL, it's filled with the cache key. The rest works as in qache_lookup.
+ * Returns 0 if the entry is empty, ~0 for position out of range, entry number otherwise.
+ */
+uns qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start);
+
+/* Delete data from the cache, given a key and a position hint. */
+uns qache_delete(struct qache *q, qache_key_t *key, uns pos_hint);
+
+/* Debugging dump (beware, doesn't lock the cache!) */
+void qache_debug(struct qache *q);
+
+/* Check consistency of the cache structure */
+void qache_audit(struct qache *q);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Unbiased Random Numbers
+ *
+ * (c) 1998--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+
+/* We expect the random generator in libc to give at least 30 bits of randomness */
+COMPILE_ASSERT(RAND_MAX_RANGE_TEST, RAND_MAX >= (1 << 30)-1);
+
+uns
+random_u32(void)
+{
+ return (random() & 0xffff) | ((random() & 0xffff) << 16);
+}
+
+uns
+random_max(uns max)
+{
+ uns r, l;
+
+ ASSERT(max <= (1 << 30));
+ l = (RAND_MAX + 1U) - ((RAND_MAX + 1U) % max);
+ do
+ r = random();
+ while (r >= l);
+ return r % max;
+}
+
+u64
+random_u64(void)
+{
+ return
+ ((u64)(random() & 0xffff) << 48) |
+ ((u64)(random() & 0xffffff) << 24) |
+ (random() & 0xffffff);
+}
+
+u64
+random_max_u64(u64 max)
+{
+ if (max < (1 << 30))
+ return random_max(max);
+
+ u64 r, l, m;
+ m = 0xffffffffffffffff;
+ l = m - (m % max);
+ do
+ r = random_u64();
+ while (r >= l);
+ return r % max;
+}
--- /dev/null
+/*
+ * UCW Library -- Cryptographically Safe Random Key Generator
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+randomkey(byte *buf, uns size)
+{
+ int fd;
+
+ if ((fd = open("/dev/urandom", O_RDONLY, 0)) < 0)
+ die("Unable to open /dev/urandom: %m");
+ if (read(fd, buf, size) != (int) size)
+ die("Error reading /dev/urandom: %m");
+ close(fd);
+}
--- /dev/null
+/*
+ * UCW Library -- Memory Re-allocation
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+
+#ifndef DEBUG_DMALLOC
+
+void *
+xrealloc(void *old, uns size)
+{
+ /* We assume that realloc(NULL, x) works like malloc(x), which is true with the glibc. */
+ void *x = realloc(old, size);
+ if (!x)
+ die("Cannot reallocate %d bytes of memory", size);
+ return x;
+}
+
+#endif
--- /dev/null
+/*
+ * Test of red-black trees
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+struct my1_node
+{
+ int key;
+ int x;
+};
+
+static void my_dump_key(struct fastbuf *fb, struct my1_node *n)
+{
+ char tmp[20];
+ sprintf(tmp, "key=%d ", n->key);
+ bputs(fb, tmp);
+}
+
+static void my_dump_data(struct fastbuf *fb, struct my1_node *n)
+{
+ char tmp[20];
+ sprintf(tmp, "x=%d ", n->x);
+ bputs(fb, tmp);
+}
+
+#define TREE_NODE struct my1_node
+#define TREE_PREFIX(x) my_##x
+#define TREE_KEY_ATOMIC key
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_LOOKUP
+#define TREE_WANT_DELETE
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void my_check_order(struct fastbuf *fb, struct my_tree *t)
+{
+ int last_key = 0x80000000;
+ TREE_FOR_ALL(my, t, n)
+ {
+ ASSERT(n->key >= last_key);
+ last_key = n->key;
+ if (fb)
+ {
+ char tmp[30];
+ sprintf(tmp, "%d -> %d\n", n->key, n->x);
+ bputs(fb, tmp);
+ }
+ }
+ TREE_END_FOR;
+ if (fb)
+ bflush(fb);
+}
+
+struct my2_node
+{
+ char key[1];
+};
+
+static void my2_dump_key(struct fastbuf *fb, struct my2_node *n)
+{
+ bputs(fb, "key=");
+ bputs(fb, n->key);
+ bputc(fb, ' ');
+}
+
+static void my2_dump_data(struct fastbuf *fb UNUSED, struct my2_node *n UNUSED)
+{
+}
+
+#define TREE_NODE struct my2_node
+#define TREE_PREFIX(x) my2_##x
+#define TREE_KEY_ENDSTRING key
+#define TREE_NOCASE
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_NEW
+#define TREE_WANT_SEARCH
+#define TREE_WANT_REMOVE
+#define TREE_WANT_FIND_NEXT
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_STATIC
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void random_string(char *txt, uns max_len)
+{
+ uns len = random() % max_len;
+ uns j;
+ for (j=0; j<len; j++)
+ txt[j] = random() % 96 + 32;
+ txt[len] = 0;
+}
+
+static char *options = CF_SHORT_OPTS "vn:a";
+
+static char *help = "\
+Usage: test1.bin <options>\n\
+Options:\n"
+CF_USAGE
+"-v\tSet verbose mode\n\
+-n num\tNumber of inserted nodes\n\
+-a\tProbe some ASSERTs\n\
+";
+
+static void NONRET
+usage(void)
+{
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int verbose = 0, number = 1000, asserts = 0;
+ int opt;
+ struct fastbuf *fb, *dump_fb;
+ struct my_tree t;
+ struct my2_tree t2;
+ int i;
+ cf_def_file = NULL;
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'v':
+ verbose++;
+ break;
+ case 'n':
+ number = atoi(optarg);
+ break;
+ case 'a':
+ asserts++;
+ break;
+ default:
+ usage();
+ break;
+ }
+ if (optind < argc)
+ usage();
+ fb = bfdopen(1, 4096);
+ if (verbose > 1)
+ dump_fb = fb;
+ else
+ dump_fb = NULL;
+
+ my_init(&t);
+ for (i=0; i<number; i++)
+ my_lookup(&t, random() % 1000000)->x = i;
+ my_dump(dump_fb, &t);
+ my_check_order(dump_fb, &t);
+ if (asserts)
+ {
+ my_new(&t, 1);
+ my_new(&t, 1);
+ }
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Load test passed\n");
+
+ my_init(&t);
+ for (i=0; i<100; i++)
+ {
+ my_new(&t, i)->x = i;
+ my_dump(dump_fb, &t);
+ }
+ for (i=0; i<100; i++)
+ {
+ int a = i/10, b = i%10, j = a*10 + (b + a) % 10;
+ int res UNUSED = my_delete(&t, j);
+ ASSERT(res);
+ my_dump(dump_fb, &t);
+ }
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Sequential adding and deleting passed\n");
+
+ my_init(&t);
+ for (i=0; i<997; i++)
+ {
+ my_new(&t, i*238 % 997)->x = i;
+ my_dump(NULL, &t);
+ }
+ my_dump(dump_fb, &t);
+ i = 0;
+ TREE_FOR_ALL(my, &t, n)
+ {
+ ASSERT(n->key == i);
+ i++;
+ }
+ TREE_END_FOR;
+ ASSERT(i == 997);
+ for (i=0; i<997; i++)
+ {
+ int res UNUSED = my_delete(&t, i*111 % 997);
+ ASSERT(res);
+ my_dump(NULL, &t);
+ }
+ my_dump(dump_fb, &t);
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Complete tree passed\n");
+
+ my2_init(&t2);
+ for (i=0; i<number; i++)
+ {
+ char txt[30];
+ random_string(txt, 30);
+ my2_new(&t2, txt);
+ }
+ my2_dump(dump_fb, &t2);
+ TREE_FOR_ALL(my2, &t2, n)
+ {
+ my2_node *tmp;
+ int count = 0;
+ for (tmp=n; tmp; tmp = my2_find_next(tmp))
+ count++;
+ if (dump_fb)
+ {
+ char txt[20];
+ bputs(dump_fb, n->key);
+ sprintf(txt, ": %d\n", count);
+ bputs(dump_fb, txt);
+ }
+ }
+ TREE_END_FOR;
+ while (t2.count > 0)
+ {
+ char txt[30];
+ my2_node *n;
+ random_string(txt, 30);
+ n = my2_search(&t2, txt);
+ ASSERT(n);
+ my2_remove(&t2, n);
+ }
+ my2_dump(dump_fb, &t2);
+ my2_cleanup(&t2);
+ if (verbose > 0)
+ bputs(fb, "String test passed\n");
+
+ bclose(fb);
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Red-black trees
+ *
+ * (c) 2002--2005, Robert Spalek <robert@ucw.cz>
+ *
+ * Skeleton based on hash-tables by:
+ *
+ * (c) 2002, Martin Mares <mj@ucw.cz>
+ *
+ */
+
+/*
+ * Data structure description:
+ *
+ * A red-black tree is a binary search tree, where records are stored
+ * in nodes (may be also leaves). Every node has a colour. The
+ * following restrictions hold:
+ *
+ * - a parent of a red node is black
+ * - every path from the root to a node with less than 2 children
+ * contains the same number of black nodes
+ *
+ * A usual interpretation is, that leaves are intervals between records
+ * and contain no data. Every leaf is black. This is equivalent, but
+ * saves the space.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of red-black trees.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates a tree structure with the parameters
+ * given.
+ *
+ * You need to specify:
+ *
+ * TREE_NODE data type where a node dwells (usually a struct).
+ * TREE_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the tree generator).
+ *
+ * Then decide on type of keys:
+ *
+ * TREE_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
+ * a type which can be compared using '>', `==', and '<')
+ * & TREE_ATOMIC_TYPE (defaults to int).
+ * | TREE_KEY_STRING=f use node->f as a string key, allocated
+ * separately from the rest of the node.
+ * | TREE_KEY_ENDSTRING=f use node->f as a string key, allocated
+ * automatically at the end of the node struct
+ * (to be declared as "char f[1]" at the end).
+ * | TREE_KEY_COMPLEX use a multi-component key; as the name suggests,
+ * the passing of parameters is a bit complex then.
+ * The TREE_KEY_COMPLEX(x) macro should expand to
+ * `x k1, x k2, ... x kn' and you should also define:
+ * & TREE_KEY_DECL declaration of function parameters in which key
+ * should be passed to all tree operations.
+ * That is, `type1 k1, type2 k2, ... typen kn'.
+ * With complex keys, TREE_GIVE_CMP is mandatory.
+ *
+ * Then specify what operations you request (all names are automatically
+ * prefixed by calling TREE_PREFIX):
+ *
+ * <always defined> init() -- initialize the tree.
+ * TREE_WANT_CLEANUP cleanup() -- deallocate the tree.
+ * TREE_WANT_FIND node *find(key) -- find first node with the specified
+ * key, return NULL if no such node exists.
+ * TREE_WANT_FIND_NEXT node *find_next(node *start) -- find next node with the
+ * specified key, return NULL if no such node exists.
+ * Implies TREE_DUPLICATES.
+ * TREE_WANT_SEARCH node *search(key) -- find the node with the specified
+ * or, if it does not exist, the nearest one.
+ * TREE_WANT_SEARCH_DOWN node *search_down(key) -- find either the node with
+ * specified value, or if it does not exist, the node
+ * with nearest smaller value.
+ * TREE_WANT_BOUNDARY node *boundary(uns direction) -- finds smallest
+ * (direction==0) or largest (direction==1) node.
+ * TREE_WANT_ADJACENT node *adjacent(node *, uns direction) -- finds next
+ * (direction==1) or previous (direction==0) node.
+ * TREE_WANT_NEW node *new(key) -- create new node with given key.
+ * If it already exists, it is created as the last one.
+ * TREE_WANT_LOOKUP node *lookup(key) -- find node with given key,
+ * if it doesn't exist, create it. Defining
+ * TREE_GIVE_INIT_DATA is strongly recommended.
+ * TREE_WANT_DELETE int delete(key) -- delete and deallocate node
+ * with a given key. Returns success.
+ * TREE_WANT_REMOVE remove(node *) -- delete and deallocate given node.
+ *
+ * TREE_WANT_DUMP dump() -- dumps the whole tree to stdout
+ *
+ * You can also supply several functions:
+ *
+ * TREE_GIVE_CMP int cmp(key1, key2) -- return -1, 0, and 1 according to
+ * the relation of keys. By default, we use <, ==, > for
+ * atomic types and either strcmp or strcasecmp for
+ * strings.
+ * TREE_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ * node should be allocated for dynamic data. Default=0
+ * or length of the string with TREE_KEY_ENDSTRING.
+ * TREE_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ * created node. Defaults: assignment for atomic keys
+ * and static strings, strcpy for end-allocated strings.
+ * TREE_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
+ * newly created node. Very useful for lookup operations.
+ * TREE_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
+ * a node. Default is either normal or pooled allocation
+ * depending on whether we want deletions.
+ * void free(void *) -- the converse.
+ *
+ * ... and a couple of extra parameters:
+ *
+ * TREE_NOCASE string comparisons should be case-insensitive.
+ * TREE_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ * TREE_USE_POOL=pool Allocate all nodes from given mempool.
+ * Collides with delete/remove functions.
+ * TREE_GLOBAL Functions are exported (i.e., not static).
+ * TREE_CONSERVE_SPACE Use as little space as possible at the price of a
+ * little slowdown.
+ * TREE_DUPLICATES Records with duplicate keys are allowed.
+ * TREE_MAX_DEPTH Maximal depth of a tree (for stack allocation).
+ *
+ * If you set TREE_WANT_ITERATOR, you also get a iterator macro at no
+ * extra charge:
+ *
+ * TREE_FOR_ALL(tree_prefix, tree_pointer, variable)
+ * {
+ * // node *variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use TREE_BREAK and TREE_CONTINUE instead of break and continue
+ * // you must not alter contents of the tree here
+ * }
+ * TREE_END_FOR;
+ *
+ * Then include "lib/redblack.h" and voila, you have a tree suiting all your
+ * needs (at least those which you've revealed :) ).
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(TREE_NODE) || !defined(TREE_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#define P(x) TREE_PREFIX(x)
+
+/* Declare buckets and the tree. */
+
+typedef TREE_NODE P(node);
+
+#if defined(TREE_WANT_FIND_NEXT) || defined(TREE_WANT_ADJACENT) || defined(TREE_WANT_ITERATOR) || defined(TREE_WANT_REMOVE)
+# define TREE_STORE_PARENT
+#endif
+
+typedef struct P(bucket) {
+ struct P(bucket) *son[2];
+#ifdef TREE_STORE_PARENT
+ struct P(bucket) *parent;
+#endif
+#if !defined(TREE_CONSERVE_SPACE) && (defined(TREE_GIVE_EXTRA_SIZE) || defined(TREE_KEY_ENDSTRING))
+ uns red_flag:1;
+#endif
+ P(node) n;
+#if !defined(TREE_CONSERVE_SPACE) && !defined(TREE_GIVE_EXTRA_SIZE) && !defined(TREE_KEY_ENDSTRING)
+ uns red_flag:1;
+#endif
+} P(bucket);
+
+struct P(tree) {
+ uns count;
+ uns height; /* of black nodes */
+ P(bucket) *root;
+};
+
+typedef struct P(stack_entry) {
+ P(bucket) *buck;
+ uns son;
+} P(stack_entry);
+
+#define T struct P(tree)
+
+/* Preset parameters */
+
+#if defined(TREE_KEY_ATOMIC)
+
+#define TREE_KEY(x) x TREE_KEY_ATOMIC
+
+#ifndef TREE_ATOMIC_TYPE
+# define TREE_ATOMIC_TYPE int
+#endif
+#define TREE_KEY_DECL TREE_ATOMIC_TYPE TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+# define TREE_GIVE_CMP
+ static inline int P(cmp) (TREE_ATOMIC_TYPE x, TREE_ATOMIC_TYPE y)
+ {
+ if (x < y)
+ return -1;
+ else if (x > y)
+ return 1;
+ else
+ return 0;
+ }
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, TREE_ATOMIC_TYPE k)
+ { TREE_KEY(n->) = k; }
+#endif
+
+#elif defined(TREE_KEY_STRING) || defined(TREE_KEY_ENDSTRING)
+
+#ifdef TREE_KEY_STRING
+# define TREE_KEY(x) x TREE_KEY_STRING
+# ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, char *k)
+ { TREE_KEY(n->) = k; }
+# endif
+#else
+# define TREE_KEY(x) x TREE_KEY_ENDSTRING
+# define TREE_GIVE_EXTRA_SIZE
+ static inline int P(extra_size) (char *k)
+ { return strlen(k); }
+# ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, char *k)
+ { strcpy(TREE_KEY(n->), k); }
+# endif
+#endif
+#define TREE_KEY_DECL char *TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+# define TREE_GIVE_CMP
+ static inline int P(cmp) (char *x, char *y)
+ {
+# ifdef TREE_NOCASE
+ return strcasecmp(x,y);
+# else
+ return strcmp(x,y);
+# endif
+ }
+#endif
+
+#elif defined(TREE_KEY_COMPLEX)
+
+#define TREE_KEY(x) TREE_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the tree key type.
+#endif
+
+#ifndef TREE_CONSERVE_SPACE
+ static inline uns P(red_flag) (P(bucket) *node)
+ { return node->red_flag; }
+ static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+ { node->red_flag = flag; }
+ static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+ { return node->son[id]; }
+ static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+ { node->son[id] = son; }
+#else
+ /* Pointers are aligned, hence we can use lower bits. */
+ static inline uns P(red_flag) (P(bucket) *node)
+ { return ((uintptr_t) node->son[0]) & 1L; }
+ static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+ { node->son[0] = (void*) ( (((uintptr_t) node->son[0]) & ~1L) | (flag & 1L) ); }
+ static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+ { return (void *) (((uintptr_t) node->son[id]) & ~1L); }
+ static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+ { node->son[id] = (void *) ((uintptr_t) son | (((uintptr_t) node->son[id]) & 1L) ); }
+#endif
+
+/* Defaults for missing parameters. */
+
+#ifndef TREE_GIVE_CMP
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef TREE_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+# define TREE_EXTRA_SIZE P(extra_size)
+#else
+/*
+ * Beware, C macros are expanded iteratively, not recursively,
+ * hence we get only a _single_ argument, although the expansion
+ * of TREE_KEY contains commas.
+ */
+# define TREE_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+# error Unable to determine how to initialize keys.
+#endif
+
+#ifndef TREE_GIVE_INIT_DATA
+static inline void P(init_data) (P(node) *n UNUSED)
+{
+}
+#endif
+
+#include <stdlib.h>
+
+#ifndef TREE_GIVE_ALLOC
+# ifdef TREE_USE_POOL
+ static inline void * P(alloc) (unsigned int size)
+ { return mp_alloc_fast(TREE_USE_POOL, size); }
+# define TREE_SAFE_FREE(x)
+# else
+ static inline void * P(alloc) (unsigned int size)
+ { return xmalloc(size); }
+
+ static inline void P(free) (void *x)
+ { xfree(x); }
+# endif
+#endif
+
+#ifndef TREE_SAFE_FREE
+# define TREE_SAFE_FREE(x) P(free) (x)
+#endif
+
+#ifdef TREE_GLOBAL
+# define STATIC
+#else
+# define STATIC static
+#endif
+
+#ifndef TREE_MAX_DEPTH
+# define TREE_MAX_DEPTH 64
+#endif
+
+#if defined(TREE_WANT_FIND_NEXT) && !defined(TREE_DUPLICATES)
+# define TREE_DUPLICATES
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+#ifndef TREE_WANT_FIND
+# define TREE_WANT_FIND
+#endif
+#ifndef TREE_WANT_NEW
+# define TREE_WANT_NEW
+#endif
+#endif
+
+/* Now the operations */
+
+STATIC void P(init) (T *t)
+{
+ t->count = t->height = 0;
+ t->root = NULL;
+}
+
+#ifdef TREE_WANT_CLEANUP
+static void P(cleanup_subtree) (T *t, P(bucket) *node)
+{
+ if (!node)
+ return;
+ P(cleanup_subtree) (t, P(tree_son) (node, 0));
+ P(cleanup_subtree) (t, P(tree_son) (node, 1));
+ P(free) (node);
+ t->count--;
+}
+
+STATIC void P(cleanup) (T *t)
+{
+ P(cleanup_subtree) (t, t->root);
+ ASSERT(!t->count);
+ t->height = 0;
+}
+#endif
+
+static uns P(fill_stack) (P(stack_entry) *stack, uns max_depth, P(bucket) *node, TREE_KEY_DECL, uns son_id UNUSED)
+{
+ uns i;
+ stack[0].buck = node;
+ for (i=0; stack[i].buck; i++)
+ {
+ int cmp;
+ cmp = P(cmp) (TREE_KEY(), TREE_KEY(stack[i].buck->n.));
+ if (cmp == 0)
+ break;
+ else if (cmp < 0)
+ stack[i].son = 0;
+ else
+ stack[i].son = 1;
+ ASSERT(i+1 < max_depth);
+ stack[i+1].buck = P(tree_son) (stack[i].buck, stack[i].son);
+ }
+#ifdef TREE_DUPLICATES
+ if (stack[i].buck)
+ {
+ uns idx;
+ /* Find first/last of equal keys according to son_id. */
+ idx = P(fill_stack) (stack+i+1, max_depth-i-1,
+ P(tree_son) (stack[i].buck, son_id), TREE_KEY(), son_id);
+ if (stack[i+1+idx].buck)
+ {
+ stack[i].son = son_id;
+ i = i+1+idx;
+ }
+ }
+#endif
+ stack[i].son = 10;
+ return i;
+}
+
+#ifdef TREE_WANT_FIND
+STATIC P(node) * P(find) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+ return stack[depth].buck ? &stack[depth].buck->n : NULL;
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH_DOWN
+STATIC P(node) * P(search_down) (T *t, TREE_KEY_DECL)
+{
+ P(node) *last_right=NULL;
+ P(bucket) *node=t->root;
+ while(node)
+ {
+ int cmp;
+ cmp = P(cmp) (TREE_KEY(), TREE_KEY(node->n.));
+ if (cmp == 0)
+ return &node->n;
+ else if (cmp < 0)
+ node=P(tree_son) (node, 0);
+ else
+ {
+ last_right=&node->n;
+ node=P(tree_son) (node, 1);
+ }
+ }
+ return last_right;
+}
+#endif
+
+#ifdef TREE_WANT_BOUNDARY
+STATIC P(node) * P(boundary) (T *t, uns direction)
+{
+ P(bucket) *n = t->root, *ns;
+ if (!n)
+ return NULL;
+ else
+ {
+ uns son = !!direction;
+ while ((ns = P(tree_son) (n, son)))
+ n = ns;
+ return &n->n;
+ }
+}
+#endif
+
+#ifdef TREE_STORE_PARENT
+STATIC P(node) * P(adjacent) (P(node) *start, uns direction)
+{
+ P(bucket) *node = SKIP_BACK(P(bucket), n, start);
+ P(bucket) *next = P(tree_son) (node, direction);
+ if (next)
+ {
+ while (1)
+ {
+ node = P(tree_son) (next, 1 - direction);
+ if (!node)
+ break;
+ next = node;
+ }
+ }
+ else
+ {
+ next = node->parent;
+ while (next && node == P(tree_son) (next, direction))
+ {
+ node = next;
+ next = node->parent;
+ }
+ if (!next)
+ return NULL;
+ ASSERT(node == P(tree_son) (next, 1 - direction));
+ }
+ return &next->n;
+}
+#endif
+
+#if defined(TREE_DUPLICATES) || defined(TREE_WANT_DELETE) || defined(TREE_WANT_REMOVE)
+static int P(find_next_node) (P(stack_entry) *stack, uns max_depth, uns direction)
+{
+ uns depth = 0;
+ if (stack[0].buck)
+ {
+ ASSERT(depth+1 < max_depth);
+ stack[depth].son = direction;
+ stack[depth+1].buck = P(tree_son) (stack[depth].buck, direction);
+ depth++;
+ while (stack[depth].buck)
+ {
+ ASSERT(depth+1 < max_depth);
+ stack[depth].son = 1 - direction;
+ stack[depth+1].buck = P(tree_son) (stack[depth].buck, 1 - direction);
+ depth++;
+ }
+ }
+ return depth;
+}
+#endif
+
+#ifdef TREE_WANT_FIND_NEXT
+STATIC P(node) * P(find_next) (P(node) *start)
+{
+ P(node) *next = P(adjacent) (start, 1);
+ if (next && P(cmp) (TREE_KEY(start->), TREE_KEY(next->)) == 0)
+ return next;
+ else
+ return NULL;
+
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH
+STATIC P(node) * P(search) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+ if (!stack[depth].buck)
+ {
+ if (depth > 0)
+ depth--;
+ else
+ return NULL;
+ }
+ return &stack[depth].buck->n;
+}
+#endif
+
+#if 0
+#define TREE_TRACE(txt...) do { printf(txt); fflush(stdout); } while (0)
+#else
+#define TREE_TRACE(txt...)
+#endif
+
+static inline P(bucket) * P(rotation) (P(bucket) *node, uns son_id)
+{
+ /* Destroys red_flag's in node, son. Returns new root. */
+ P(bucket) *son = P(tree_son) (node, son_id);
+ TREE_TRACE("Rotation (node %d, son %d), direction %d\n", node->n.key, son->n.key, son_id);
+ node->son[son_id] = P(tree_son) (son, 1-son_id);
+ son->son[1-son_id] = node;
+#ifdef TREE_STORE_PARENT
+ if (node->son[son_id])
+ node->son[son_id]->parent = node;
+ son->parent = node->parent;
+ node->parent = son;
+#endif
+ return son;
+}
+
+static void P(rotate_after_insert) (T *t, P(stack_entry) *stack, uns depth)
+{
+ P(bucket) *node;
+ P(bucket) *parent, *grand, *uncle;
+ int s1, s2;
+try_it_again:
+ node = stack[depth].buck;
+ ASSERT(P(red_flag) (node));
+ /* At this moment, node became red. The paths sum have
+ * been preserved, but we have to check the parental
+ * condition. */
+ if (depth == 0)
+ {
+ ASSERT(t->root == node);
+ return;
+ }
+ parent = stack[depth-1].buck;
+ if (!P(red_flag) (parent))
+ return;
+ if (depth == 1)
+ {
+ ASSERT(t->root == parent);
+ P(set_red_flag) (parent, 0);
+ t->height++;
+ return;
+ }
+ grand = stack[depth-2].buck;
+ ASSERT(!P(red_flag) (grand));
+ /* The parent is also red, the grandparent exists and it
+ * is black. */
+ s1 = stack[depth-1].son;
+ s2 = stack[depth-2].son;
+ uncle = P(tree_son) (grand, 1-s2);
+ if (uncle && P(red_flag) (uncle))
+ {
+ /* Red parent and uncle, black grandparent.
+ * Exchange and try another iteration. */
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (uncle, 0);
+ P(set_red_flag) (grand, 1);
+ depth -= 2;
+ TREE_TRACE("Swapping colours (parent %d, uncle %d, grand %d), passing thru\n", parent->n.key, uncle->n.key, grand->n.key);
+ goto try_it_again;
+ }
+ /* Black uncle and grandparent, we need to rotate. Test
+ * the direction. */
+ if (s1 == s2)
+ {
+ node = P(rotation) (grand, s2);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (grand, 1);
+ }
+ else
+ {
+ grand->son[s2] = P(rotation) (parent, s1);
+ node = P(rotation) (grand, s2);
+ P(set_red_flag) (grand, 1);
+ P(set_red_flag) (parent, 1);
+ P(set_red_flag) (node, 0);
+ }
+ if (depth >= 3)
+ P(set_tree_son) (stack[depth-3].buck, stack[depth-3].son, node);
+ else
+ t->root = node;
+}
+
+#ifdef TREE_WANT_NEW
+STATIC P(node) * P(new) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ P(bucket) *added;
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+#ifdef TREE_DUPLICATES
+ /* It is the last found value, hence everything in the right subtree is
+ * strongly _bigger_. */
+ depth += P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+#endif
+ ASSERT(!stack[depth].buck);
+ /* We are in a leaf, hence we can easily append a new leaf to it. */
+ added = P(alloc) (sizeof(struct P(bucket)) + TREE_EXTRA_SIZE(TREE_KEY()) );
+ added->son[0] = added->son[1] = NULL;
+ stack[depth].buck = added;
+ if (depth > 0)
+ {
+#ifdef TREE_STORE_PARENT
+ added->parent = stack[depth-1].buck;
+#endif
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, added);
+ }
+ else
+ {
+#ifdef TREE_STORE_PARENT
+ added->parent = NULL;
+#endif
+ t->root = added;
+ }
+ P(set_red_flag) (added, 1); /* Set it red to not disturb the path sum. */
+ P(init_key) (&added->n, TREE_KEY());
+ P(init_data) (&added->n);
+ t->count++;
+ /* Let us reorganize the red_flag's and the structure of the tree. */
+ P(rotate_after_insert) (t, stack, depth);
+ return &added->n;
+}
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+STATIC P(node) * P(lookup) (T *t, TREE_KEY_DECL)
+{
+ P(node) *node;
+ node = P(find) (t, TREE_KEY());
+ if (node)
+ return node;
+ return P(new) (t, TREE_KEY());
+}
+#endif
+
+#if defined(TREE_WANT_REMOVE) || defined(TREE_WANT_DELETE)
+static void P(rotate_after_delete) (T *t, P(stack_entry) *stack, int depth)
+{
+ uns iteration = 0;
+ P(bucket) *parent, *sibling, *instead;
+ uns parent_red, del_son, sibl_red;
+missing_black:
+ if (depth < 0)
+ {
+ t->height--;
+ return;
+ }
+ parent = stack[depth].buck;
+ parent_red = P(red_flag) (parent);
+ del_son = stack[depth].son;
+ /* For the 1st iteration: we have deleted parent->son[del_son], which
+ * was a black node with no son. Hence there is one mising black
+ * vertex in that path, which we are going to fix now.
+ *
+ * For other iterations: in that path, there is also missing a black
+ * node. */
+ if (!iteration)
+ ASSERT(!P(tree_son) (parent, del_son));
+ sibling = P(tree_son) (parent, 1-del_son);
+ ASSERT(sibling);
+ sibl_red = P(red_flag) (sibling);
+ instead = NULL;
+ if (!sibl_red)
+ {
+ P(bucket) *son[2];
+ uns red[2];
+ son[0] = P(tree_son) (sibling, 0);
+ son[1] = P(tree_son) (sibling, 1);
+ red[0] = son[0] ? P(red_flag) (son[0]) : 0;
+ red[1] = son[1] ? P(red_flag) (son[1]) : 0;
+ if (!red[0] && !red[1])
+ {
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (parent, 0);
+ if (parent_red)
+ return;
+ else
+ {
+ depth--;
+ iteration++;
+ TREE_TRACE("Swapping colours (parent %d, sibling %d), passing thru\n", parent->n.key, sibling->n.key);
+ goto missing_black;
+ }
+ } else if (!red[del_son])
+ {
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, parent_red);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (son[1-del_son], 0);
+ } else /* red[del_son] */
+ {
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, parent_red);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 0);
+ }
+ } else /* sibl_red */
+ {
+ P(bucket) *grand[2], *son;
+ uns red[2];
+ ASSERT(!parent_red);
+ son = P(tree_son) (sibling, del_son);
+ ASSERT(son && !P(red_flag) (son));
+ grand[0] = P(tree_son) (son, 0);
+ grand[1] = P(tree_son) (son, 1);
+ red[0] = grand[0] ? P(red_flag) (grand[0]) : 0;
+ red[1] = grand[1] ? P(red_flag) (grand[1]) : 0;
+ if (!red[0] && !red[1])
+ {
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (son, 1);
+ }
+ else if (!red[del_son])
+ {
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (grand[1-del_son], 0);
+ } else /* red[del_son] */
+ {
+ sibling->son[del_son] = P(rotation) (son, del_son);
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (son, 0);
+ }
+ }
+ /* We have performed all desired rotations and need to store the new
+ * pointer to the subtree. */
+ ASSERT(instead);
+ if (depth > 0)
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, instead);
+ else
+ t->root = instead;
+}
+
+static void P(remove_by_stack) (T *t, P(stack_entry) *stack, uns depth)
+{
+ P(bucket) *node = stack[depth].buck;
+ P(bucket) *son;
+ uns i;
+ for (i=0; i<depth; i++)
+ ASSERT(P(tree_son) (stack[i].buck, stack[i].son) == stack[i+1].buck);
+ if (P(tree_son) (node, 0) && P(tree_son) (node, 1))
+ {
+ P(bucket) *xchg;
+ uns flag_node, flag_xchg;
+ uns d = P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+
+ ASSERT(d >= 2);
+ d--;
+ xchg = stack[depth+d].buck;
+ flag_node = P(red_flag) (node);
+ flag_xchg = P(red_flag) (xchg);
+ ASSERT(!P(tree_son) (xchg, 0));
+ son = P(tree_son) (xchg, 1);
+ stack[depth].buck = xchg; /* Magic iff d == 1. */
+ stack[depth+d].buck = node;
+ xchg->son[0] = P(tree_son) (node, 0);
+ xchg->son[1] = P(tree_son) (node, 1);
+ if (depth > 0)
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, xchg);
+ else
+ t->root = xchg;
+ node->son[0] = NULL;
+ node->son[1] = son;
+ P(set_tree_son) (stack[depth+d-1].buck, stack[depth+d-1].son, node);
+#ifdef TREE_STORE_PARENT
+ xchg->parent = depth > 0 ? stack[depth-1].buck : NULL;
+ xchg->son[0]->parent = xchg;
+ xchg->son[1]->parent = xchg;
+ node->parent = stack[depth+d-1].buck;
+ if (son)
+ son->parent = node;
+#endif
+ P(set_red_flag) (xchg, flag_node);
+ P(set_red_flag) (node, flag_xchg);
+ depth += d;
+ }
+ else if (P(tree_son) (node, 0))
+ son = P(tree_son) (node, 0);
+ else
+ son = P(tree_son) (node, 1);
+ /* At this moment, stack[depth].buck == node and it has at most one son
+ * and it is stored in the variable son. */
+ t->count--;
+ if (depth > 0)
+ {
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, son);
+#ifdef TREE_STORE_PARENT
+ if (son)
+ son->parent = stack[depth-1].buck;
+#endif
+ }
+ else
+ {
+ t->root = son;
+#ifdef TREE_STORE_PARENT
+ if (son)
+ son->parent = NULL;
+#endif
+ }
+ if (P(red_flag) (node))
+ {
+ ASSERT(!son);
+ return;
+ }
+ TREE_SAFE_FREE(node);
+ /* We have deleted a black node. */
+ if (son)
+ {
+ ASSERT(P(red_flag) (son));
+ P(set_red_flag) (son, 0);
+ return;
+ }
+ P(rotate_after_delete) (t, stack, (int) depth - 1);
+}
+#endif
+
+#ifdef TREE_WANT_REMOVE
+STATIC void P(remove) (T *t, P(node) *Node)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ P(bucket) *node = SKIP_BACK(P(bucket), n, Node);
+ uns depth = 0, i;
+ stack[0].buck = node;
+ stack[0].son = 10;
+ while (node->parent)
+ {
+ depth++;
+ ASSERT(depth < TREE_MAX_DEPTH);
+ stack[depth].buck = node->parent;
+ stack[depth].son = P(tree_son) (node->parent, 0) == node ? 0 : 1;
+ node = node->parent;
+ }
+ for (i=0; i<(depth+1)/2; i++)
+ {
+ P(stack_entry) tmp = stack[i];
+ stack[i] = stack[depth-i];
+ stack[depth-i] = tmp;
+ }
+ P(remove_by_stack) (t, stack, depth);
+}
+#endif
+
+#ifdef TREE_WANT_DELETE
+STATIC int P(delete) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+ if (stack[depth].buck)
+ {
+ P(remove_by_stack) (t, stack, depth);
+ return 1;
+ }
+ else
+ return 0;
+}
+#endif
+
+#ifdef TREE_WANT_DUMP
+static void P(dump_subtree) (struct fastbuf *fb, T *t, P(bucket) *node, P(bucket) *parent, int cmp_res, int level, uns black)
+{
+ uns flag;
+ int i;
+ if (!node)
+ {
+ ASSERT(black == t->height);
+ return;
+ }
+ flag = P(red_flag) (node);
+#ifdef TREE_STORE_PARENT
+ ASSERT(node->parent == parent);
+#endif
+ if (parent)
+ {
+ ASSERT(!flag || !P(red_flag) (parent));
+ cmp_res *= P(cmp) (TREE_KEY(node->n.), TREE_KEY(parent->n.));
+#ifdef TREE_DUPLICATES
+ ASSERT(cmp_res >= 0);
+#else
+ ASSERT(cmp_res > 0);
+#endif
+ }
+ P(dump_subtree) (fb, t, P(tree_son) (node, 0), node, -1, level+1, black + (1-flag));
+ if (fb)
+ {
+ char tmp[20];
+ for (i=0; i<level; i++)
+ bputs(fb, " ");
+ sprintf(tmp, "L%d %c\t", level, flag ? 'R' : 'B');
+ bputs(fb, tmp);
+ P(dump_key) (fb, &node->n);
+ P(dump_data) (fb, &node->n);
+ bputs(fb, "\n");
+ }
+ P(dump_subtree) (fb, t, P(tree_son) (node, 1), node, +1, level+1, black + (1-flag));
+}
+
+STATIC void P(dump) (struct fastbuf *fb, T *t)
+{
+ if (fb)
+ {
+ char tmp[50];
+ sprintf(tmp, "Tree of %d nodes and height %d\n", t->count, t->height);
+ bputs(fb, tmp);
+ }
+ P(dump_subtree) (fb, t, t->root, NULL, 0, 0, 0);
+ if (fb)
+ {
+ bputs(fb, "\n");
+ bflush(fb);
+ }
+}
+#endif
+
+/* And the iterator */
+
+#ifdef TREE_WANT_ITERATOR
+static P(node) * P(first_node) (T *t, uns direction)
+{
+ P(bucket) *node = t->root, *prev = NULL;
+ while (node)
+ {
+ prev = node;
+ node = P(tree_son) (node, direction);
+ }
+ return prev ? &prev->n : NULL;
+}
+
+#ifndef TREE_FOR_ALL
+
+#define TREE_FOR_ALL(t_px, t_ptr, t_var) \
+do \
+{ \
+ GLUE_(t_px,node) *t_var = GLUE_(t_px,first_node)(t_ptr, 0); \
+ for (; t_var; t_var = GLUE_(t_px,adjacent)(t_var, 1)) \
+ {
+#define TREE_END_FOR } } while(0)
+#define TREE_BREAK break
+#define TREE_CONTINUE continue
+
+#endif
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+
+#undef TREE_NODE
+#undef TREE_PREFIX
+#undef TREE_KEY_ATOMIC
+#undef TREE_KEY_STRING
+#undef TREE_KEY_ENDSTRING
+#undef TREE_KEY_COMPLEX
+#undef TREE_KEY_DECL
+#undef TREE_WANT_CLEANUP
+#undef TREE_WANT_FIND
+#undef TREE_WANT_FIND_NEXT
+#undef TREE_WANT_SEARCH
+#undef TREE_WANT_SEARCH_DOWN
+#undef TREE_WANT_BOUNDARY
+#undef TREE_WANT_ADJACENT
+#undef TREE_WANT_NEW
+#undef TREE_WANT_LOOKUP
+#undef TREE_WANT_DELETE
+#undef TREE_WANT_REMOVE
+#undef TREE_WANT_DUMP
+#undef TREE_WANT_ITERATOR
+#undef TREE_GIVE_CMP
+#undef TREE_GIVE_EXTRA_SIZE
+#undef TREE_GIVE_INIT_KEY
+#undef TREE_GIVE_INIT_DATA
+#undef TREE_GIVE_ALLOC
+#undef TREE_NOCASE
+#undef TREE_ATOMIC_TYPE
+#undef TREE_USE_POOL
+#undef TREE_STATIC
+#undef TREE_CONSERVE_SPACE
+#undef TREE_DUPLICATES
+#undef TREE_MAX_DEPTH
+#undef TREE_STORE_PARENT
+#undef TREE_KEY
+#undef TREE_EXTRA_SIZE
+#undef TREE_SAFE_FREE
+#undef TREE_TRACE
+#undef STATIC
--- /dev/null
+/*
+ * UCW Library -- Interface to Regular Expression Libraries
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+#include "lib/hashfunc.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX)
+
+/* POSIX regular expression library */
+
+#ifdef CONFIG_OWN_REGEX
+#include "lib/regex/regex-sh.h"
+#else
+#include <regex.h>
+#endif
+
+struct regex {
+ regex_t rx;
+ regmatch_t matches[10];
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ regex *r = xmalloc_zero(sizeof(regex));
+
+ int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
+ if (err)
+ {
+ char msg[256];
+ regerror(err, &r->rx, msg, sizeof(msg)-1);
+ /* regfree(&r->rx) not needed */
+ die("Error parsing regular expression `%s': %s", p, msg);
+ }
+ return r;
+}
+
+void
+rx_free(regex *r)
+{
+ regfree(&r->rx);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int err = regexec(&r->rx, s, 10, r->matches, 0);
+ if (!err)
+ {
+ /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */
+ return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]);
+ }
+ else if (err == REG_NOMATCH)
+ return 0;
+ else if (err == REG_ESPACE)
+ die("Regex matching ran out of memory");
+ else
+ die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
+ {
+ const char *s = src + r->matches[j].rm_so;
+ uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#elif defined(CONFIG_PCRE)
+
+/* PCRE library */
+
+#include <pcre.h>
+
+struct regex {
+ pcre *rx;
+ pcre_extra *extra;
+ uns match_array_size;
+ uns real_matches;
+ int matches[0]; /* (max_matches+1) pairs (pos,len) plus some workspace */
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ const char *err;
+ int errpos, match_array_size, eno;
+
+ pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL);
+ if (!rx)
+ die("Error parsing regular expression `%s': %s at position %d", p, err, errpos);
+ eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size);
+ if (eno)
+ die("Internal error: pcre_fullinfo() failed with error %d", eno);
+ match_array_size = 3*(match_array_size+1);
+ regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int));
+ r->rx = rx;
+ r->match_array_size = match_array_size;
+ r->extra = pcre_study(r->rx, 0, &err);
+ if (err)
+ die("Error studying regular expression `%s': %s", p, err);
+ return r;
+}
+
+void
+rx_free(regex *r)
+{
+ xfree(r->rx);
+ xfree(r->extra);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int len = str_len(s);
+ int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
+ if (err >= 0)
+ {
+ r->real_matches = err;
+ /* need to check that the full string matches */
+ return !(r->matches[0] || s[r->matches[1]]);
+ }
+ else if (err == PCRE_ERROR_NOMATCH)
+ return 0;
+ else if (err == PCRE_ERROR_NOMEMORY)
+ die("Regex matching ran out of memory");
+ else
+ die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j < r->real_matches && r->matches[2*j] >= 0)
+ {
+ char *s = src + r->matches[2*j];
+ uns i = r->matches[2*j+1] - r->matches[2*j];
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#else
+
+/* BSD regular expression library */
+
+#ifdef CONFIG_OWN_BSD_REGEX
+#include "lib/regex/regex-sh.h"
+#else
+#include <regex.h>
+#endif
+
+#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */
+#define CHAR_SET_SIZE 256 /* How many characters in the character set. */
+
+struct regex {
+ struct re_pattern_buffer buf;
+ struct re_registers regs; /* Must not change between re_match() calls */
+ int len_cache;
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ regex *r = xmalloc_zero(sizeof(regex));
+ const char *msg;
+
+ r->buf.buffer = xmalloc(INITIAL_MEM);
+ r->buf.allocated = INITIAL_MEM;
+ if (icase)
+ {
+ unsigned i;
+ r->buf.translate = xmalloc (CHAR_SET_SIZE);
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ r->buf.translate[i] = Cupcase(i);
+ }
+ else
+ r->buf.translate = NULL;
+ re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
+ msg = re_compile_pattern(p, strlen(p), &r->buf);
+ if (!msg)
+ return r;
+ die("Error parsing pattern `%s': %s", p, msg);
+}
+
+void
+rx_free(regex *r)
+{
+ xfree(r->buf.buffer);
+ if (r->buf.translate)
+ xfree(r->buf.translate);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int len = strlen(s);
+
+ r->len_cache = len;
+ if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
+ return 0;
+ if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
+ return 0;
+ return 1;
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j < r->regs.num_regs)
+ {
+ const char *s = src + r->regs.start[j];
+ uns i = r->regs.end[j] - r->regs.start[j];
+ if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
+ return -1;
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#endif
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ regex *r;
+ char buf1[4096], buf2[4096];
+ int opt_i = 0;
+
+ if (!strcmp(argv[1], "-i"))
+ {
+ opt_i = 1;
+ argv++;
+ argc--;
+ }
+ r = rx_compile(argv[1], opt_i);
+ while (fgets(buf1, sizeof(buf1), stdin))
+ {
+ char *p = strchr(buf1, '\n');
+ if (p)
+ *p = 0;
+ if (argc == 2)
+ {
+ if (rx_match(r, buf1))
+ puts("MATCH");
+ else
+ puts("NO MATCH");
+ }
+ else
+ {
+ int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2));
+ if (i < 0)
+ puts("OVERFLOW");
+ else if (!i)
+ puts("NO MATCH");
+ else
+ puts(buf2);
+ }
+ }
+ rx_free(r);
+}
+
+#endif
--- /dev/null
+# Tests for the regex module
+
+Run: ../obj/lib/regex-t 'a.*b.*c'
+In: abc
+ ajkhkbbbbbc
+ Aabc
+Out: MATCH
+ MATCH
+ NO MATCH
+
+Run: ../obj/lib/regex-t -i 'a.*b.*c'
+In: aBc
+ ajkhkbBBBBC
+ Aabc
+Out: MATCH
+ MATCH
+ MATCH
+
+Run: ../obj/lib/regex-t -i '(ahoj|nebo)'
+In: Ahoj
+ nEBo
+ ahoja
+ (ahoj|nebo)
+Out: MATCH
+ MATCH
+ NO MATCH
+ NO MATCH
+
+Run: ../obj/lib/regex-t '\(ahoj\)'
+In: (ahoj)
+ ahoj
+Out: MATCH
+ NO MATCH
+
+Run: ../obj/lib/regex-t '(.*b)*'
+In: ababababab
+ ababababababababababababababababababababababababababababa
+Out: MATCH
+ NO MATCH
+
+Run: ../obj/lib/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
+In: aaabbb
+ aabbccb
+ abcabc
+ aaccbb
+Out: a<aabb>b
+ aabb<>b
+ NO MATCH
+ aa<>bb
+
+Run: ../obj/lib/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
+In: /nemecky/ubytovani/hotel.php?sort=&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3
+ /test...?f=1&s=3&sid=123&q=3&
+Out: NO MATCH
+ MATCH
+
+Run: ../obj/lib/regex-t '.*[0-9a-f]{8,16}.*'
+In: abcdabcdabcd
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ asddajlkdkajlqwepoiequwiouio
+ 000001111p101010101010q12032
+Out: MATCH
+ MATCH
+ NO MATCH
+ MATCH
--- /dev/null
+# Makefile for the UCW Regex Library (c) 2004 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib/regex
+
+LIBUCW_MODS+=regex/regex
+
+$(o)/lib/regex/regex.o $(o)/lib/regex/regex.oo: CWARNS=
--- /dev/null
+This directory contains regular expression routines from the GNU libc 2.3.2
+which are significantly faster than the default regex libraries on most systems.
+
+They are distributed under the GNU LGPL.
+
+All files are exact copies of the original distribution, I only provided my
+own regex.c, regex-sh.h and Makefile.
+
+ Martin Mares, March 2004
--- /dev/null
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+ int length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+ const re_dfastate_t *init_state,
+ char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
+static reg_errcode_t init_word_char (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+static reg_errcode_t analyze (re_dfa_t *dfa);
+static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
+static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
+ int top_clone_node, int root_node,
+ unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+ unsigned int constraint);
+static int search_duplicated_node (re_dfa_t *dfa, int org_node,
+ unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
+static void calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+ reg_syntax_t syntax);
+static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax);
+static int peek_token_bracket (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax);
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+ re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax,
+ reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token, int token_len,
+ re_dfa_t *dfa,
+ reg_syntax_t syntax);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token);
+#ifndef _LIBC
+# ifdef RE_ENABLE_I18N
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *coll_sym_alloc,
+ const unsigned char *name);
+# else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+ bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+ const unsigned char *name);
+# endif /* not RE_ENABLE_I18N */
+#endif /* not _LIBC */
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *equiv_class_alloc,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *char_class_alloc,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err);
+static void free_bin_tree (bin_tree_t *tree);
+static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type, int index);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+\f
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+const char __re_error_msgid[] attribute_hidden =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+\f
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+ if (!ret)
+ return NULL;
+ return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ char *fastmap = bufp->fastmap;
+
+ memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+ re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+ if (dfa->init_state != dfa->init_state_word)
+ re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+ if (dfa->init_state != dfa->init_state_nl)
+ re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+ if (dfa->init_state != dfa->init_state_begbuf)
+ re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+ bufp->fastmap_accurate = 1;
+ return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+ fastmap[ch] = 1;
+ if (icase)
+ fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+ Compile fastmap for the initial_state INIT_STATE. */
+
+static void
+re_compile_fastmap_iter (bufp, init_state, fastmap)
+ regex_t *bufp;
+ const re_dfastate_t *init_state;
+ char *fastmap;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ int node_cnt;
+ int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE));
+ for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+ {
+ int node = init_state->nodes.elems[node_cnt];
+ re_token_type_t type = dfa->nodes[node].type;
+
+ if (type == CHARACTER)
+ re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ int i, j, ch;
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET)
+ {
+ int i;
+ re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+ if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+ || cset->nranges || cset->nchar_classes)
+ {
+# ifdef _LIBC
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ {
+ /* In this case we want to catch the bytes which are
+ the first byte of any collation elements.
+ e.g. In da_DK, we want to catch 'a' since "aa"
+ is a valid collation element, and don't catch
+ 'b' since 'b' is the only collation element
+ which starts from 'b'. */
+ int j, ch;
+ const int32_t *table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (table[ch] < 0)
+ re_set_fastmap (fastmap, icase, ch);
+ }
+# else
+ if (MB_CUR_MAX > 1)
+ for (i = 0; i < SBC_MAX; ++i)
+ if (__btowc (i) == WEOF)
+ re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+ }
+ for (i = 0; i < cset->nmbchars; ++i)
+ {
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ __wcrtomb (buf, cset->mbchars[i], &state);
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ else if (type == END_OF_RE || type == OP_PERIOD)
+ {
+ memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+ if (type == END_OF_RE)
+ bufp->can_be_null = 1;
+ return;
+ }
+ }
+}
+\f
+/* Entry point for POSIX code. */
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = re_malloc (char, SBC_MAX);
+ if (BE (preg->fastmap == NULL, 0))
+ return REG_ESPACE;
+
+ syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+ preg->no_sub = !!(cflags & REG_NOSUB);
+ preg->translate = NULL;
+
+ ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ /* We have already checked preg->fastmap != NULL. */
+ if (BE (ret == REG_NOERROR, 1))
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. This function nevers fails in this implementation. */
+ (void) re_compile_fastmap (preg);
+ else
+ {
+ /* Some error occurred while compiling the expression. */
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (BE (errcode < 0
+ || errcode >= (int) (sizeof (__re_error_msgid_idx)
+ / sizeof (__re_error_msgid_idx[0])), 0))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (BE (errbuf_size != 0, 1))
+ {
+ if (BE (msg_size > errbuf_size, 0))
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+ int i, j;
+
+ re_free (dfa->subexps);
+
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ re_token_t *node = dfa->nodes + i;
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+ }
+ re_free (dfa->nexts);
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ if (dfa->eclosures != NULL)
+ re_node_set_free (dfa->eclosures + i);
+ if (dfa->inveclosures != NULL)
+ re_node_set_free (dfa->inveclosures + i);
+ if (dfa->edests != NULL)
+ re_node_set_free (dfa->edests + i);
+ }
+ re_free (dfa->edests);
+ re_free (dfa->eclosures);
+ re_free (dfa->inveclosures);
+ re_free (dfa->nodes);
+
+ for (i = 0; i <= dfa->state_hash_mask; ++i)
+ {
+ struct re_state_table_entry *entry = dfa->state_table + i;
+ for (j = 0; j < entry->num; ++j)
+ {
+ re_dfastate_t *state = entry->array[j];
+ free_state (state);
+ }
+ re_free (entry->array);
+ }
+ re_free (dfa->state_table);
+
+ if (dfa->word_char != NULL)
+ re_free (dfa->word_char);
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
+
+ re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ if (BE (dfa != NULL, 1))
+ free_dfa_content (dfa);
+
+ re_free (preg->fastmap);
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+\f
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec above without link errors. */
+weak_function
+# endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+ char *fastmap;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (re_comp_buf.buffer)
+ {
+ fastmap = re_comp_buf.fastmap;
+ re_comp_buf.fastmap = NULL;
+ __regfree (&re_comp_buf);
+ memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+ re_comp_buf.fastmap = fastmap;
+ }
+
+ if (re_comp_buf.fastmap == NULL)
+ {
+ re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (__re_error_msgid
+ + __re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+ __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+\f
+/* Internal entry point.
+ Compile the regular expression PATTERN, whose length is LENGTH.
+ SYNTAX indicate regular expression's syntax. */
+
+static reg_errcode_t
+re_compile_internal (preg, pattern, length, syntax)
+ regex_t *preg;
+ const char * pattern;
+ int length;
+ reg_syntax_t syntax;
+{
+ reg_errcode_t err = REG_NOERROR;
+ re_dfa_t *dfa;
+ re_string_t regexp;
+
+ /* Initialize the pattern buffer. */
+ preg->fastmap_accurate = 0;
+ preg->syntax = syntax;
+ preg->not_bol = preg->not_eol = 0;
+ preg->used = 0;
+ preg->re_nsub = 0;
+ preg->can_be_null = 0;
+ preg->regs_allocated = REGS_UNALLOCATED;
+
+ /* Initialize the dfa. */
+ dfa = (re_dfa_t *) preg->buffer;
+ if (preg->allocated < sizeof (re_dfa_t))
+ {
+ /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. If ->buffer is NULL this
+ is a simple allocation. */
+ dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+ if (dfa == NULL)
+ return REG_ESPACE;
+ preg->allocated = sizeof (re_dfa_t);
+ }
+ preg->buffer = (unsigned char *) dfa;
+ preg->used = sizeof (re_dfa_t);
+
+ err = init_dfa (dfa, length);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+#ifdef DEBUG
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+ err = re_string_construct (®exp, pattern, length, preg->translate,
+ syntax & RE_ICASE);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+
+ /* Parse the regular expression, and build a structure tree. */
+ preg->re_nsub = 0;
+ dfa->str_tree = parse (®exp, preg, syntax, &err);
+ if (BE (dfa->str_tree == NULL, 0))
+ goto re_compile_internal_free_return;
+
+ /* Analyze the tree and collect information which is necessary to
+ create the dfa. */
+ err = analyze (dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
+ /* Then create the initial state of the dfa. */
+ err = create_initial_state (dfa);
+
+ /* Release work areas. */
+ free_workarea_compile (preg);
+ re_string_destruct (®exp);
+
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_compile_internal_free_return:
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ }
+
+ return err;
+}
+
+/* Initialize DFA. We use the length of the regular expression PAT_LEN
+ as the initial length of some arrays. */
+
+static reg_errcode_t
+init_dfa (dfa, pat_len)
+ re_dfa_t *dfa;
+ int pat_len;
+{
+ int table_size;
+
+ memset (dfa, '\0', sizeof (re_dfa_t));
+
+ dfa->nodes_alloc = pat_len + 1;
+ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+ dfa->states_alloc = pat_len + 1;
+
+ /* table_size = 2 ^ ceil(log pat_len) */
+ for (table_size = 1; table_size > 0; table_size <<= 1)
+ if (table_size > pat_len)
+ break;
+
+ dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+ dfa->state_hash_mask = table_size - 1;
+
+ dfa->subexps_alloc = 1;
+ dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
+ dfa->word_char = NULL;
+
+ if (BE (dfa->nodes == NULL || dfa->state_table == NULL
+ || dfa->subexps == NULL, 0))
+ {
+ /* We don't bother to free anything which was allocated. Very
+ soon the process will go down anyway. */
+ dfa->subexps = NULL;
+ dfa->state_table = NULL;
+ dfa->nodes = NULL;
+ return REG_ESPACE;
+ }
+ return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+ "word". In this case "word" means that it is the word construction
+ character used by some operators like "\<", "\>", etc. */
+
+static reg_errcode_t
+init_word_char (dfa)
+ re_dfa_t *dfa;
+{
+ int i, j, ch;
+ dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ if (BE (dfa->word_char == NULL, 0))
+ return REG_ESPACE;
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (isalnum (ch) || ch == '_')
+ dfa->word_char[i] |= 1 << j;
+ return REG_NOERROR;
+}
+
+/* Free the work area which are only used while compiling. */
+
+static void
+free_workarea_compile (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ free_bin_tree (dfa->str_tree);
+ dfa->str_tree = NULL;
+ re_free (dfa->org_indices);
+ dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts. */
+
+static reg_errcode_t
+create_initial_state (dfa)
+ re_dfa_t *dfa;
+{
+ int first, i;
+ reg_errcode_t err;
+ re_node_set init_nodes;
+
+ /* Initial states have the epsilon closure of the node which is
+ the first node of the regular expression. */
+ first = dfa->str_tree->first;
+ dfa->init_node = first;
+ err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* The back-references which are in initial states can epsilon transit,
+ since in this case all of the subexpressions can be null.
+ Then we add epsilon closures of the nodes which are the next nodes of
+ the back-references. */
+ if (dfa->nbackref > 0)
+ for (i = 0; i < init_nodes.nelem; ++i)
+ {
+ int node_idx = init_nodes.elems[i];
+ re_token_type_t type = dfa->nodes[node_idx].type;
+
+ int clexp_idx;
+ if (type != OP_BACK_REF)
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
+ if (type == OP_BACK_REF)
+ {
+ int dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
+ {
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ i = 0;
+ }
+ }
+ }
+
+ /* It must be the first time to invoke acquire_state. */
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (BE (dfa->init_state == NULL, 0))
+ return err;
+ if (dfa->init_state->has_constraint)
+ {
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_WORD);
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_NEWLINE);
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
+ CONTEXT_NEWLINE
+ | CONTEXT_BEGBUF);
+ if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return err;
+ }
+ else
+ dfa->init_state_word = dfa->init_state_nl
+ = dfa->init_state_begbuf = dfa->init_state;
+
+ re_node_set_free (&init_nodes);
+ return REG_NOERROR;
+}
+\f
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+ "eclosure", and "inveclosure". */
+
+static reg_errcode_t
+analyze (dfa)
+ re_dfa_t *dfa;
+{
+ int i;
+ reg_errcode_t ret;
+
+ /* Allocate arrays. */
+ dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+ dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+ dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+ || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ /* Initialize them. */
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ dfa->nexts[i] = -1;
+ re_node_set_init_empty (dfa->edests + i);
+ re_node_set_init_empty (dfa->eclosures + i);
+ re_node_set_init_empty (dfa->inveclosures + i);
+ }
+
+ ret = analyze_tree (dfa, dfa->str_tree);
+ if (BE (ret == REG_NOERROR, 1))
+ {
+ ret = calc_eclosure (dfa);
+ if (ret == REG_NOERROR)
+ calc_inveclosure (dfa);
+ }
+ return ret;
+}
+
+/* Helper functions for analyze.
+ This function calculate "first", "next", and "edest" for the subtree
+ whose root is NODE. */
+
+static reg_errcode_t
+analyze_tree (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ reg_errcode_t ret;
+ if (node->first == -1)
+ calc_first (dfa, node);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ if (node->eclosure.nelem == 0)
+ calc_epsdest (dfa, node);
+ /* Calculate "first" etc. for the left child. */
+ if (node->left != NULL)
+ {
+ ret = analyze_tree (dfa, node->left);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ /* Calculate "first" etc. for the right child. */
+ if (node->right != NULL)
+ {
+ ret = analyze_tree (dfa, node->right);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate "first" for the node NODE. */
+static void
+calc_first (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx, type;
+ idx = node->node_idx;
+ type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
+
+ switch (type)
+ {
+#ifdef DEBUG
+ case OP_OPEN_BRACKET:
+ case OP_CLOSE_BRACKET:
+ case OP_OPEN_DUP_NUM:
+ case OP_CLOSE_DUP_NUM:
+ case OP_NON_MATCH_LIST:
+ case OP_OPEN_COLL_ELEM:
+ case OP_CLOSE_COLL_ELEM:
+ case OP_OPEN_EQUIV_CLASS:
+ case OP_CLOSE_EQUIV_CLASS:
+ case OP_OPEN_CHAR_CLASS:
+ case OP_CLOSE_CHAR_CLASS:
+ /* These must not be appeared here. */
+ assert (0);
+#endif
+ case END_OF_RE:
+ case CHARACTER:
+ case OP_PERIOD:
+ case OP_DUP_ASTERISK:
+ case OP_DUP_QUESTION:
+#ifdef RE_ENABLE_I18N
+ case COMPLEX_BRACKET:
+#endif /* RE_ENABLE_I18N */
+ case SIMPLE_BRACKET:
+ case OP_BACK_REF:
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ node->first = idx;
+ break;
+ case OP_DUP_PLUS:
+#ifdef DEBUG
+ assert (node->left != NULL);
+#endif
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ node->first = node->left->first;
+ break;
+ case OP_ALT:
+ node->first = idx;
+ break;
+ /* else fall through */
+ default:
+#ifdef DEBUG
+ assert (node->left != NULL);
+#endif
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ node->first = node->left->first;
+ break;
+ }
+}
+
+/* Calculate "next" for the node NODE. */
+
+static void
+calc_next (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx, type;
+ bin_tree_t *parent = node->parent;
+ if (parent == NULL)
+ {
+ node->next = -1;
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
+ return;
+ }
+
+ idx = parent->node_idx;
+ type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
+
+ switch (type)
+ {
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ node->next = idx;
+ break;
+ case CONCAT:
+ if (parent->left == node)
+ {
+ if (parent->right->first == -1)
+ calc_first (dfa, parent->right);
+ node->next = parent->right->first;
+ break;
+ }
+ /* else fall through */
+ default:
+ if (parent->next == -1)
+ calc_next (dfa, parent);
+ node->next = parent->next;
+ break;
+ }
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
+}
+
+/* Calculate "edest" for the node NODE. */
+
+static void
+calc_epsdest (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx;
+ idx = node->node_idx;
+ if (node->type == 0)
+ {
+ if (dfa->nodes[idx].type == OP_DUP_ASTERISK
+ || dfa->nodes[idx].type == OP_DUP_PLUS
+ || dfa->nodes[idx].type == OP_DUP_QUESTION)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ re_node_set_init_2 (dfa->edests + idx, node->left->first,
+ node->next);
+ }
+ else if (dfa->nodes[idx].type == OP_ALT)
+ {
+ int left, right;
+ if (node->left != NULL)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ left = node->left->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ left = node->next;
+ }
+ if (node->right != NULL)
+ {
+ if (node->right->first == -1)
+ calc_first (dfa, node->right);
+ right = node->right->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ right = node->next;
+ }
+ re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ else if (dfa->nodes[idx].type == ANCHOR
+ || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+ || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+ || dfa->nodes[idx].type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, node->next);
+ }
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
+ init_constraint)
+ re_dfa_t *dfa;
+ int top_org_node, top_clone_node, root_node;
+ unsigned int init_constraint;
+{
+ reg_errcode_t err;
+ int org_node, clone_node, ret;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ int org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ if (dfa->nodes[org_node].type == ANCHOR)
+ {
+ /* In case of the node has another constraint, append it. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ /* ...but if the node is root_node itself, it means the
+ epsilon closure have a loop, then tie it to the
+ destination of the root_node. */
+ ret = re_node_set_insert (dfa->edests + clone_node,
+ org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
+ }
+ constraint |= dfa->nodes[org_node].opr.ctx_type;
+ }
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. E.g. '|', '*', '+', '?'. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* Search for a duplicated node which satisfies the constraint. */
+ clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+ if (clone_dest == -1)
+ {
+ /* There are no such a duplicated node, create a new one. */
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ {
+ /* There are a duplicated node which satisfy the constraint,
+ use it to avoid infinite loop. */
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+
+ org_dest = dfa->edests[org_node].elems[1];
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+ satisfies the constraint CONSTRAINT. */
+
+static int
+search_duplicated_node (dfa, org_node, constraint)
+ re_dfa_t *dfa;
+ int org_node;
+ unsigned int constraint;
+{
+ int idx;
+ for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+ {
+ if (org_node == dfa->org_indices[idx]
+ && constraint == dfa->nodes[idx].constraint)
+ return idx; /* Found. */
+ }
+ return -1; /* Not found. */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+ otherwise return the error code. */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
+ re_dfa_t *dfa;
+ int *new_idx, org_idx;
+ unsigned int constraint;
+{
+ re_token_t dup;
+ int dup_idx;
+
+ dup = dfa->nodes[org_idx];
+ dup_idx = re_dfa_add_node (dfa, dup, 1);
+ if (BE (dup_idx == -1, 0))
+ return REG_ESPACE;
+ dfa->nodes[dup_idx].constraint = constraint;
+ if (dfa->nodes[org_idx].type == ANCHOR)
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+ dfa->nodes[dup_idx].duplicated = 1;
+ re_node_set_init_empty (dfa->edests + dup_idx);
+ re_node_set_init_empty (dfa->eclosures + dup_idx);
+ re_node_set_init_empty (dfa->inveclosures + dup_idx);
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ *new_idx = dup_idx;
+ return REG_NOERROR;
+}
+
+static void
+calc_inveclosure (dfa)
+ re_dfa_t *dfa;
+{
+ int src, idx, dest;
+ for (src = 0; src < dfa->nodes_len; ++src)
+ {
+ for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+ {
+ dest = dfa->eclosures[src].elems[idx];
+ re_node_set_insert (dfa->inveclosures + dest, src);
+ }
+ }
+}
+
+/* Calculate "eclosure" for all the node in DFA. */
+
+static reg_errcode_t
+calc_eclosure (dfa)
+ re_dfa_t *dfa;
+{
+ int node_idx, incomplete;
+#ifdef DEBUG
+ assert (dfa->nodes_len > 0);
+#endif
+ incomplete = 0;
+ /* For each nodes, calculate epsilon closure. */
+ for (node_idx = 0; ; ++node_idx)
+ {
+ reg_errcode_t err;
+ re_node_set eclosure_elem;
+ if (node_idx == dfa->nodes_len)
+ {
+ if (!incomplete)
+ break;
+ incomplete = 0;
+ node_idx = 0;
+ }
+
+#ifdef DEBUG
+ assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+ /* If we have already calculated, skip it. */
+ if (dfa->eclosures[node_idx].nelem != 0)
+ continue;
+ /* Calculate epsilon closure of `node_idx'. */
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (dfa->eclosures[node_idx].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE. */
+
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+ re_node_set *new_set;
+ re_dfa_t *dfa;
+ int node, root;
+{
+ reg_errcode_t err;
+ unsigned int constraint;
+ int i, incomplete;
+ re_node_set eclosure;
+ incomplete = 0;
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* This indicates that we are calculating this node now.
+ We reference this value to avoid infinite loop. */
+ dfa->eclosures[node].nelem = -1;
+
+ constraint = ((dfa->nodes[node].type == ANCHOR)
+ ? dfa->nodes[node].opr.ctx_type : 0);
+ /* If the current node has constraints, duplicate all nodes.
+ Since they must inherit the constraints. */
+ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ int org_node, cur_node;
+ org_node = cur_node = node;
+ err = duplicate_node_closure (dfa, node, node, node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Expand each epsilon destination nodes. */
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ for (i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ re_node_set eclosure_elem;
+ int edest = dfa->edests[node].elems[i];
+ /* If calculating the epsilon closure of `edest' is in progress,
+ return intermediate result. */
+ if (dfa->eclosures[edest].nelem == -1)
+ {
+ incomplete = 1;
+ continue;
+ }
+ /* If we haven't calculated the epsilon closure of `edest' yet,
+ calculate now. Otherwise use calculated epsilon closure. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ eclosure_elem = dfa->eclosures[edest];
+ /* Merge the epsilon closure of `edest'. */
+ re_node_set_merge (&eclosure, &eclosure_elem);
+ /* If the epsilon closure of `edest' is incomplete,
+ the epsilon closure of this node is also incomplete. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+
+ /* Epsilon closures include itself. */
+ re_node_set_insert (&eclosure, node);
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
+}
+\f
+/* Functions for token which are used in the parser. */
+
+/* Fetch a token from INPUT.
+ We must not use this function inside bracket expressions. */
+
+static re_token_t
+fetch_token (input, syntax)
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ re_token_t token;
+ int consumed_byte;
+ consumed_byte = peek_token (&token, input, syntax);
+ re_string_skip_bytes (input, consumed_byte);
+ return token;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function inside bracket expressions. */
+
+static int
+peek_token (token, input, syntax)
+ re_token_t *token;
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ unsigned char c;
+
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ token->mb_partial = 0;
+ if (MB_CUR_MAX > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ token->mb_partial = 1;
+ return 1;
+ }
+#endif
+ if (c == '\\')
+ {
+ unsigned char c2;
+ if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+ {
+ token->type = BACK_SLASH;
+ return 1;
+ }
+
+ c2 = re_string_peek_byte_case (input, 1);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ switch (c2)
+ {
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (!(syntax & RE_NO_BK_REFS))
+ {
+ token->type = OP_BACK_REF;
+ token->opr.idx = c2 - '0';
+ }
+ break;
+ case '<':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = WORD_FIRST;
+ }
+ break;
+ case '>':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = WORD_LAST;
+ }
+ break;
+ case 'b':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = WORD_DELIM;
+ }
+ break;
+ case 'B':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = INSIDE_WORD;
+ }
+ break;
+ case 'w':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_WORD;
+ break;
+ case 'W':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTWORD;
+ break;
+ case '`':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = BUF_FIRST;
+ }
+ break;
+ case '\'':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.idx = BUF_LAST;
+ }
+ break;
+ case '(':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ default:
+ break;
+ }
+ return 2;
+ }
+
+ token->type = CHARACTER;
+ switch (c)
+ {
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ token->type = OP_ALT;
+ break;
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '*':
+ token->type = OP_DUP_ASTERISK;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '[':
+ token->type = OP_OPEN_BRACKET;
+ break;
+ case '.':
+ token->type = OP_PERIOD;
+ break;
+ case '^':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) != 0)
+ {
+ char prev = re_string_peek_byte (input, -1);
+ if (prev != '|' && prev != '(' &&
+ (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.idx = LINE_FIRST;
+ break;
+ case '$':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) + 1 != re_string_length (input))
+ {
+ re_token_t next;
+ re_string_skip_bytes (input, 1);
+ peek_token (&next, input, syntax);
+ re_string_skip_bytes (input, -1);
+ if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.idx = LINE_LAST;
+ break;
+ default:
+ break;
+ }
+ return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function out of bracket expressions. */
+
+static int
+peek_token_bracket (token, input, syntax)
+ re_token_t *token;
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ unsigned char c;
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ return 1;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ {
+ /* In this case, '\' escape a character. */
+ unsigned char c2;
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ return 1;
+ }
+ if (c == '[') /* '[' is a special char in a bracket exps. */
+ {
+ unsigned char c2;
+ int token_len;
+ c2 = re_string_peek_byte (input, 1);
+ token->opr.c = c2;
+ token_len = 2;
+ switch (c2)
+ {
+ case '.':
+ token->type = OP_OPEN_COLL_ELEM;
+ break;
+ case '=':
+ token->type = OP_OPEN_EQUIV_CLASS;
+ break;
+ case ':':
+ if (syntax & RE_CHAR_CLASSES)
+ {
+ token->type = OP_OPEN_CHAR_CLASS;
+ break;
+ }
+ /* else fall through. */
+ default:
+ token->type = CHARACTER;
+ token->opr.c = c;
+ token_len = 1;
+ break;
+ }
+ return token_len;
+ }
+ switch (c)
+ {
+ case '-':
+ token->type = OP_CHARSET_RANGE;
+ break;
+ case ']':
+ token->type = OP_CLOSE_BRACKET;
+ break;
+ case '^':
+ token->type = OP_NON_MATCH_LIST;
+ break;
+ default:
+ token->type = CHARACTER;
+ }
+ return 1;
+}
+\f
+/* Functions for parser. */
+
+/* Entry point of the parser.
+ Parse the regular expression REGEXP and return the structure tree.
+ If an error is occured, ERR is set by error code, and return NULL.
+ This function build the following tree, from regular expression <reg_exp>:
+ CAT
+ / \
+ / \
+ <reg_exp> EOR
+
+ CAT means concatenation.
+ EOR means end of regular expression. */
+
+static bin_tree_t *
+parse (regexp, preg, syntax, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *eor, *root;
+ re_token_t current_token;
+ int new_idx;
+ current_token = fetch_token (regexp, syntax);
+ tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ new_idx = re_dfa_add_node (dfa, current_token, 0);
+ eor = create_tree (NULL, NULL, 0, new_idx);
+ if (tree != NULL)
+ root = create_tree (tree, eor, CONCAT, 0);
+ else
+ root = eor;
+ if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ return root;
+}
+
+/* This function build the following tree, from regular expression
+ <branch1>|<branch2>:
+ ALT
+ / \
+ / \
+ <branch1> <branch2>
+
+ ALT means alternative, which represents the operator `|'. */
+
+static bin_tree_t *
+parse_reg_exp (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *branch = NULL;
+ int new_idx;
+ tree = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type == OP_ALT)
+ {
+ re_token_t alt_token = *token;
+ new_idx = re_dfa_add_node (dfa, alt_token, 0);
+ *token = fetch_token (regexp, syntax);
+ if (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ branch = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && branch == NULL, 0))
+ {
+ free_bin_tree (tree);
+ return NULL;
+ }
+ }
+ else
+ branch = NULL;
+ tree = create_tree (tree, branch, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->has_plural_match = 1;
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ <exp1><exp2>:
+ CAT
+ / \
+ / \
+ <exp1> <exp2>
+
+ CAT means concatenation. */
+
+static bin_tree_t *
+parse_branch (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ bin_tree_t *tree, *exp;
+ tree = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ exp = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && exp == NULL, 0))
+ {
+ free_bin_tree (tree);
+ return NULL;
+ }
+ if (tree != NULL && exp != NULL)
+ {
+ tree = create_tree (tree, exp, CONCAT, 0);
+ if (tree == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else if (tree == NULL)
+ tree = exp;
+ /* Otherwise exp == NULL, we don't need to create new tree. */
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+ *
+ |
+ a
+*/
+
+static bin_tree_t *
+parse_expression (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ int new_idx;
+ switch (token->type)
+ {
+ case CHARACTER:
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ while (!re_string_eoi (regexp)
+ && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+ {
+ bin_tree_t *mbc_remain;
+ *token = fetch_token (regexp, syntax);
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ mbc_remain = create_tree (NULL, NULL, 0, new_idx);
+ tree = create_tree (tree, mbc_remain, CONCAT, 0);
+ if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+#endif
+ break;
+ case OP_OPEN_SUBEXP:
+ tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_OPEN_BRACKET:
+ tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_BACK_REF:
+ if (BE (preg->re_nsub < token->opr.idx
+ || dfa->subexps[token->opr.idx - 1].end == -1, 0))
+ {
+ *err = REG_ESUBREG;
+ return NULL;
+ }
+ dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ ++dfa->nbackref;
+ dfa->has_mb_node = 1;
+ break;
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ case OP_DUP_QUESTION:
+ case OP_OPEN_DUP_NUM:
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ {
+ *token = fetch_token (regexp, syntax);
+ return parse_expression (regexp, preg, token, syntax, nest, err);
+ }
+ /* else fall through */
+ case OP_CLOSE_SUBEXP:
+ if ((token->type == OP_CLOSE_SUBEXP) &&
+ !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
+ /* else fall through */
+ case OP_CLOSE_DUP_NUM:
+ /* We treat it as a normal character. */
+
+ /* Then we can these characters as normal characters. */
+ token->type = CHARACTER;
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ break;
+ case ANCHOR:
+ if (dfa->word_char == NULL)
+ {
+ *err = init_word_char (dfa);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ bin_tree_t *tree_first, *tree_last;
+ int idx_first, idx_last;
+ token->opr.ctx_type = WORD_FIRST;
+ idx_first = re_dfa_add_node (dfa, *token, 0);
+ tree_first = create_tree (NULL, NULL, 0, idx_first);
+ token->opr.ctx_type = WORD_LAST;
+ idx_last = re_dfa_add_node (dfa, *token, 0);
+ tree_last = create_tree (NULL, NULL, 0, idx_last);
+ token->type = OP_ALT;
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (tree_first, tree_last, 0, new_idx);
+ if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
+ || tree_first == NULL || tree_last == NULL
+ || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else
+ {
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ /* We must return here, since ANCHORs can't be followed
+ by repetition operators.
+ eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+ it must not be "<ANCHOR(^)><REPEAT(*)>". */
+ *token = fetch_token (regexp, syntax);
+ return tree;
+ case OP_PERIOD:
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ if (MB_CUR_MAX > 1)
+ dfa->has_mb_node = 1;
+ break;
+ case OP_WORD:
+ tree = build_word_op (dfa, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_NOTWORD:
+ tree = build_word_op (dfa, 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_ALT:
+ case END_OF_RE:
+ return NULL;
+ case BACK_SLASH:
+ *err = REG_EESCAPE;
+ return NULL;
+ default:
+ /* Must not happen? */
+#ifdef DEBUG
+ assert (0);
+#endif
+ return NULL;
+ }
+ *token = fetch_token (regexp, syntax);
+
+ while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+ || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+ {
+ tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ dfa->has_plural_match = 1;
+ }
+
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ (<reg_exp>):
+ SUBEXP
+ |
+ <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *left_par, *right_par;
+ size_t cur_nsub;
+ int new_idx;
+ cur_nsub = preg->re_nsub++;
+ if (dfa->subexps_alloc < preg->re_nsub)
+ {
+ re_subexp_t *new_array;
+ dfa->subexps_alloc *= 2;
+ new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
+ if (BE (new_array == NULL, 0))
+ {
+ dfa->subexps_alloc /= 2;
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->subexps = new_array;
+ }
+ dfa->subexps[cur_nsub].start = dfa->nodes_len;
+ dfa->subexps[cur_nsub].end = -1;
+
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ left_par = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || left_par == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->nodes[new_idx].opr.idx = cur_nsub;
+ *token = fetch_token (regexp, syntax);
+
+ /* The subexpression may be a null string. */
+ if (token->type == OP_CLOSE_SUBEXP)
+ tree = NULL;
+ else
+ {
+ tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ }
+ if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+ {
+ free_bin_tree (tree);
+ *err = REG_BADPAT;
+ return NULL;
+ }
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ dfa->subexps[cur_nsub].end = dfa->nodes_len;
+ right_par = create_tree (NULL, NULL, 0, new_idx);
+ tree = ((tree == NULL) ? right_par
+ : create_tree (tree, right_par, CONCAT, 0));
+ tree = create_tree (left_par, tree, CONCAT, 0);
+ if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->nodes[new_idx].opr.idx = cur_nsub;
+
+ return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
+
+static bin_tree_t *
+parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
+ bin_tree_t *dup_elem;
+ re_string_t *regexp;
+ re_dfa_t *dfa;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+ re_token_t dup_token;
+ bin_tree_t *tree = dup_elem, *work_tree;
+ int new_idx, start_idx = re_string_cur_idx (regexp);
+ re_token_t start_token = *token;
+ if (token->type == OP_OPEN_DUP_NUM)
+ {
+ int i;
+ int end = 0;
+ int start = fetch_number (regexp, token, syntax);
+ bin_tree_t *elem;
+ if (start == -1)
+ {
+ if (token->type == CHARACTER && token->opr.c == ',')
+ start = 0; /* We treat "{,m}" as "{0,m}". */
+ else
+ {
+ *err = REG_BADBR; /* <re>{} is invalid. */
+ return NULL;
+ }
+ }
+ if (BE (start != -2, 1))
+ {
+ /* We treat "{n}" as "{n,n}". */
+ end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+ : ((token->type == CHARACTER && token->opr.c == ',')
+ ? fetch_number (regexp, token, syntax) : -2));
+ }
+ if (BE (start == -2 || end == -2, 0))
+ {
+ /* Invalid sequence. */
+ if (token->type == OP_CLOSE_DUP_NUM)
+ goto parse_dup_op_invalid_interval;
+ else
+ goto parse_dup_op_ebrace;
+ }
+ if (BE (start == 0 && end == 0, 0))
+ {
+ /* We treat "<re>{0}" and "<re>{0,0}" as null string. */
+ *token = fetch_token (regexp, syntax);
+ free_bin_tree (dup_elem);
+ return NULL;
+ }
+
+ /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
+ elem = tree;
+ for (i = 0; i < start; ++i)
+ if (i != 0)
+ {
+ work_tree = duplicate_tree (elem, dfa);
+ tree = create_tree (tree, work_tree, CONCAT, 0);
+ if (BE (work_tree == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (end == -1)
+ {
+ /* We treat "<re>{0,}" as "<re>*". */
+ dup_token.type = OP_DUP_ASTERISK;
+ if (start > 0)
+ {
+ elem = duplicate_tree (elem, dfa);
+ new_idx = re_dfa_add_node (dfa, dup_token, 0);
+ work_tree = create_tree (elem, NULL, 0, new_idx);
+ tree = create_tree (tree, work_tree, CONCAT, 0);
+ if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
+ || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+ else
+ {
+ new_idx = re_dfa_add_node (dfa, dup_token, 0);
+ tree = create_tree (elem, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+ }
+ else if (end - start > 0)
+ {
+ /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?". */
+ dup_token.type = OP_DUP_QUESTION;
+ if (start > 0)
+ {
+ elem = duplicate_tree (elem, dfa);
+ new_idx = re_dfa_add_node (dfa, dup_token, 0);
+ elem = create_tree (elem, NULL, 0, new_idx);
+ tree = create_tree (tree, elem, CONCAT, 0);
+ if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+ else
+ {
+ new_idx = re_dfa_add_node (dfa, dup_token, 0);
+ tree = elem = create_tree (elem, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+ for (i = 1; i < end - start; ++i)
+ {
+ work_tree = duplicate_tree (elem, dfa);
+ tree = create_tree (tree, work_tree, CONCAT, 0);
+ if (BE (work_tree == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+ }
+ else
+ {
+ new_idx = re_dfa_add_node (dfa, *token, 0);
+ tree = create_tree (tree, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ *token = fetch_token (regexp, syntax);
+ return tree;
+
+ parse_dup_op_espace:
+ free_bin_tree (tree);
+ *err = REG_ESPACE;
+ return NULL;
+
+ parse_dup_op_ebrace:
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ *err = REG_EBRACE;
+ return NULL;
+ }
+ goto parse_dup_op_rollback;
+ parse_dup_op_invalid_interval:
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ *err = REG_BADBR;
+ return NULL;
+ }
+ parse_dup_op_rollback:
+ re_string_set_index (regexp, start_idx);
+ *token = start_token;
+ token->type = CHARACTER;
+ return dup_elem;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+ I'm not sure, but maybe enough. */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+ /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+# else /* not RE_ENABLE_I18N */
+build_range_exp (sbcset, start_elem, end_elem)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+{
+ unsigned int start_ch, end_ch;
+ /* Equivalence Classes and Character Classes can't be a range start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ /* We can handle no multi character collating elements without libc
+ support. */
+ if (BE ((start_elem->type == COLL_SYM
+ && strlen ((char *) start_elem->opr.name) > 1)
+ || (end_elem->type == COLL_SYM
+ && strlen ((char *) end_elem->opr.name) > 1), 0))
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ {
+ wchar_t wc, start_wc, end_wc;
+ wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+ start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+ ? __btowc (start_ch) : start_elem->opr.wch);
+ end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+ ? __btowc (end_ch) : end_elem->opr.wch);
+ cmp_buf[0] = start_wc;
+ cmp_buf[4] = end_wc;
+ if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+ return REG_ERANGE;
+
+ /* Check the space of the arrays. */
+ if (*range_alloc == mbcset->nranges)
+ {
+ /* There are not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+
+ /* Build the table for single byte characters. */
+ for (wc = 0; wc <= SBC_MAX; ++wc)
+ {
+ cmp_buf[2] = wc;
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ bitset_set (sbcset, wc);
+ }
+ }
+# else /* not RE_ENABLE_I18N */
+ {
+ unsigned int ch;
+ start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ if (start_ch > end_ch)
+ return REG_ERANGE;
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch <= SBC_MAX; ++ch)
+ if (start_ch <= ch && ch <= end_ch)
+ bitset_set (sbcset, ch);
+ }
+# endif /* not RE_ENABLE_I18N */
+ return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (sbcset, name)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+{
+ size_t name_len = strlen ((const char *) name);
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (regexp, dfa, token, syntax, err)
+ re_string_t *regexp;
+ re_dfa_t *dfa;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE. */
+
+ static inline int32_t
+ seek_collating_symbol_entry (name, name_len)
+ const unsigned char *name;
+ size_t name_len;
+ {
+ int32_t hash = elem_hash ((const char *) name, name_len);
+ int32_t elem = hash % table_size;
+ int32_t second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+ return elem;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+ static inline unsigned int
+ lookup_collation_sequence_value (br_elem)
+ bracket_elem_t *br_elem;
+ {
+ if (br_elem->type == SB_CHAR)
+ {
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ return collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+ static inline reg_errcode_t
+# ifdef RE_ENABLE_I18N
+ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+# else /* not RE_ENABLE_I18N */
+ build_range_exp (sbcset, start_elem, end_elem)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+ {
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+# ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (*range_alloc == mbcset->nranges)
+ {
+ /* There are not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+# endif /* RE_ENABLE_I18N */
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem);
+ end_collseq = lookup_collation_sequence_value (end_elem);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
+
+# ifdef RE_ENABLE_I18N
+ /* Got valid collation sequence values, add them as a new entry. */
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+# endif /* RE_ENABLE_I18N */
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch <= SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
+
+ static inline reg_errcode_t
+# ifdef RE_ENABLE_I18N
+ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+# else /* not RE_ENABLE_I18N */
+ build_collating_symbol (sbcset, name)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+ {
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (symb_table[2 * elem] == 0 && name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (*coll_sym_alloc == mbcset->ncoll_syms)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ *coll_sym_alloc);
+ if (BE (mbcset->coll_syms == NULL, 0))
+ return REG_ESPACE;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+# endif /* RE_ENABLE_I18N */
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+ }
+#endif
+
+ re_token_t br_token;
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+ int equiv_class_alloc = 0, char_class_alloc = 0;
+#else /* not RE_ENABLE_I18N */
+ int non_match = 0;
+#endif /* not RE_ENABLE_I18N */
+ bin_tree_t *work_tree;
+ int token_len, new_idx;
+#ifdef _LIBC
+ collseqmb = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules)
+ {
+ /*
+ if (MB_CUR_MAX > 1)
+ */
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+ }
+#endif
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+ if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_NON_MATCH_LIST)
+ {
+#ifdef RE_ENABLE_I18N
+ int i;
+ mbcset->non_match = 1;
+#else /* not RE_ENABLE_I18N */
+ non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set (sbcset, '\0');
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ for (i = 0; i < SBC_MAX; ++i)
+ if (__btowc (i) == WEOF)
+ bitset_set (sbcset, i);
+#endif /* RE_ENABLE_I18N */
+ }
+
+ /* We treat the first ']' as a normal character. */
+ if (token->type == OP_CLOSE_BRACKET)
+ token->type = CHARACTER;
+
+ while (1)
+ {
+ bracket_elem_t start_elem, end_elem;
+ unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+ unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+ reg_errcode_t ret;
+ int token_len2 = 0, is_range_exp = 0;
+ re_token_t token2;
+
+ start_elem.opr.name = start_name_buf;
+ ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+ syntax);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CHARSET_RANGE)
+ {
+ re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
+ token_len2 = peek_token_bracket (&token2, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token2.type == OP_CLOSE_BRACKET)
+ {
+ /* We treat the last '-' as a normal character. */
+ re_string_skip_bytes (regexp, -token_len);
+ token->type = CHARACTER;
+ }
+ else
+ is_range_exp = 1;
+ }
+
+ if (is_range_exp == 1)
+ {
+ end_elem.opr.name = end_name_buf;
+ ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+ dfa, syntax);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ *err = build_range_exp (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &range_alloc,
+#endif /* RE_ENABLE_I18N */
+ &start_elem, &end_elem);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ }
+ else
+ {
+ switch (start_elem.type)
+ {
+ case SB_CHAR:
+ bitset_set (sbcset, start_elem.opr.ch);
+ break;
+#ifdef RE_ENABLE_I18N
+ case MB_CHAR:
+ /* Check whether the array has enough space. */
+ if (mbchar_alloc == mbcset->nmbchars)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (BE (mbcset->mbchars == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+ break;
+#endif /* RE_ENABLE_I18N */
+ case EQUIV_CLASS:
+ *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case COLL_SYM:
+ *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case CHAR_CLASS:
+ *err = build_charclass (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name, syntax);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ default:
+ assert (0);
+ break;
+ }
+ }
+ if (token->type == OP_CLOSE_BRACKET)
+ break;
+ }
+
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+
+ /* If it is non-matching list. */
+#ifdef RE_ENABLE_I18N
+ if (mbcset->non_match)
+#else /* not RE_ENABLE_I18N */
+ if (non_match)
+#endif /* not RE_ENABLE_I18N */
+ bitset_not (sbcset);
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ new_idx = re_dfa_add_node (dfa, br_token, 0);
+ work_tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+ || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes
+ || mbcset->non_match)))
+ {
+ re_token_t alt_token;
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ new_idx = re_dfa_add_node (dfa, br_token, 0);
+ mbc_tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ /* Then join them by ALT node. */
+ dfa->has_plural_match = 1;
+ alt_token.type = OP_ALT;
+ new_idx = re_dfa_add_node (dfa, alt_token, 0);
+ work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
+ if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+ return work_tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return work_tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return work_tree;
+#endif /* not RE_ENABLE_I18N */
+
+ parse_bracket_exp_espace:
+ *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ return NULL;
+}
+
+/* Parse an element in the bracket expression. */
+
+static reg_errcode_t
+parse_bracket_element (elem, regexp, token, token_len, dfa, syntax)
+ bracket_elem_t *elem;
+ re_string_t *regexp;
+ re_token_t *token;
+ int token_len;
+ re_dfa_t *dfa;
+ reg_syntax_t syntax;
+{
+#ifdef RE_ENABLE_I18N
+ int cur_char_size;
+ cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+ if (cur_char_size > 1)
+ {
+ elem->type = MB_CHAR;
+ elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+ re_string_skip_bytes (regexp, cur_char_size);
+ return REG_NOERROR;
+ }
+#endif /* RE_ENABLE_I18N */
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+ || token->type == OP_OPEN_EQUIV_CLASS)
+ return parse_bracket_symbol (elem, regexp, token);
+ elem->type = SB_CHAR;
+ elem->opr.ch = token->opr.c;
+ return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression. Bracket symbols are
+ such as [:<character_class>:], [.<collating_element>.], and
+ [=<equivalent_class>=]. */
+
+static reg_errcode_t
+parse_bracket_symbol (elem, regexp, token)
+ bracket_elem_t *elem;
+ re_string_t *regexp;
+ re_token_t *token;
+{
+ unsigned char ch, delim = token->opr.c;
+ int i = 0;
+ for (;; ++i)
+ {
+ if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE)
+ return REG_EBRACK;
+ if (token->type == OP_OPEN_CHAR_CLASS)
+ ch = re_string_fetch_byte_case (regexp);
+ else
+ ch = re_string_fetch_byte (regexp);
+ if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+ break;
+ elem->opr.name[i] = ch;
+ }
+ re_string_skip_bytes (regexp, 1);
+ elem->opr.name[i] = '\0';
+ switch (token->type)
+ {
+ case OP_OPEN_COLL_ELEM:
+ elem->type = COLL_SYM;
+ break;
+ case OP_OPEN_EQUIV_CLASS:
+ elem->type = EQUIV_CLASS;
+ break;
+ case OP_OPEN_CHAR_CLASS:
+ elem->type = CHAR_CLASS;
+ break;
+ default:
+ break;
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the equivalence class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
+ re_charset_t *mbcset;
+ int *equiv_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (sbcset, name)
+#endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+{
+#if defined _LIBC && defined RE_ENABLE_I18N
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra, *cp;
+ unsigned char char_buf[2];
+ int32_t idx1, idx2;
+ unsigned int ch;
+ size_t len;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+ /* Calculate the index for equivalence class. */
+ cp = name;
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ idx1 = findidx (&cp);
+ if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ /* This isn't a valid character. */
+ return REG_ECOLLATE;
+
+ /* Build single byte matcing table for this equivalence class. */
+ char_buf[1] = (unsigned char) '\0';
+ len = weights[idx1];
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ {
+ char_buf[0] = ch;
+ cp = char_buf;
+ idx2 = findidx (&cp);
+/*
+ idx2 = table[ch];
+*/
+ if (idx2 == 0)
+ /* This isn't a valid character. */
+ continue;
+ if (len == weights[idx2])
+ {
+ int cnt = 0;
+ while (cnt <= len &&
+ weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+ ++cnt;
+
+ if (cnt > len)
+ bitset_set (sbcset, ch);
+ }
+ }
+ /* Check whether the array has enough space. */
+ if (*equiv_class_alloc == mbcset->nequiv_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
+ *equiv_class_alloc);
+ if (BE (mbcset->equiv_classes == NULL, 0))
+ return REG_ESPACE;
+ }
+ mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+ }
+ else
+#endif /* _LIBC && RE_ENABLE_I18N */
+ {
+ if (BE (strlen ((const char *) name) != 1, 0))
+ return REG_ECOLLATE;
+ bitset_set (sbcset, *name);
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the character class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax)
+ re_charset_t *mbcset;
+ int *char_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_charclass (sbcset, class_name, syntax)
+#endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *class_name;
+ reg_syntax_t syntax;
+{
+ int i;
+ const char *name = (const char *) class_name;
+
+ /* In case of REG_ICASE "upper" and "lower" match the both of
+ upper and lower cases. */
+ if ((syntax & RE_ICASE)
+ && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+ name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (*char_class_alloc == mbcset->nchar_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ *char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ *char_class_alloc);
+ if (BE (mbcset->char_classes == NULL, 0))
+ return REG_ESPACE;
+ }
+ mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)\
+ for (i = 0; i < SBC_MAX; ++i) \
+ { \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, i); \
+ }
+
+ if (strcmp (name, "alnum") == 0)
+ BUILD_CHARCLASS_LOOP (isalnum)
+ else if (strcmp (name, "cntrl") == 0)
+ BUILD_CHARCLASS_LOOP (iscntrl)
+ else if (strcmp (name, "lower") == 0)
+ BUILD_CHARCLASS_LOOP (islower)
+ else if (strcmp (name, "space") == 0)
+ BUILD_CHARCLASS_LOOP (isspace)
+ else if (strcmp (name, "alpha") == 0)
+ BUILD_CHARCLASS_LOOP (isalpha)
+ else if (strcmp (name, "digit") == 0)
+ BUILD_CHARCLASS_LOOP (isdigit)
+ else if (strcmp (name, "print") == 0)
+ BUILD_CHARCLASS_LOOP (isprint)
+ else if (strcmp (name, "upper") == 0)
+ BUILD_CHARCLASS_LOOP (isupper)
+ else if (strcmp (name, "blank") == 0)
+ BUILD_CHARCLASS_LOOP (isblank)
+ else if (strcmp (name, "graph") == 0)
+ BUILD_CHARCLASS_LOOP (isgraph)
+ else if (strcmp (name, "punct") == 0)
+ BUILD_CHARCLASS_LOOP (ispunct)
+ else if (strcmp (name, "xdigit") == 0)
+ BUILD_CHARCLASS_LOOP (isxdigit)
+ else
+ return REG_ECTYPE;
+
+ return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_word_op (dfa, not, err)
+ re_dfa_t *dfa;
+ int not;
+ reg_errcode_t *err;
+{
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int alloc = 0;
+#else /* not RE_ENABLE_I18N */
+ int non_match = 0;
+#endif /* not RE_ENABLE_I18N */
+ reg_errcode_t ret;
+ re_token_t br_token;
+ bin_tree_t *tree;
+ int new_idx;
+
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+ if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ if (not)
+ {
+#ifdef RE_ENABLE_I18N
+ int i;
+ /*
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set(cset->sbcset, '\0');
+ */
+ mbcset->non_match = 1;
+ if (MB_CUR_MAX > 1)
+ for (i = 0; i < SBC_MAX; ++i)
+ if (__btowc (i) == WEOF)
+ bitset_set (sbcset, i);
+#else /* not RE_ENABLE_I18N */
+ non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ }
+
+ /* We don't care the syntax in this case. */
+ ret = build_charclass (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+ (const unsigned char *) "alpha", 0);
+
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = ret;
+ return NULL;
+ }
+ /* \w match '_' also. */
+ bitset_set (sbcset, '_');
+
+ /* If it is non-matching list. */
+#ifdef RE_ENABLE_I18N
+ if (mbcset->non_match)
+#else /* not RE_ENABLE_I18N */
+ if (non_match)
+#endif /* not RE_ENABLE_I18N */
+ bitset_not (sbcset);
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ new_idx = re_dfa_add_node (dfa, br_token, 0);
+ tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || tree == NULL, 0))
+ goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ re_token_t alt_token;
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ new_idx = re_dfa_add_node (dfa, br_token, 0);
+ mbc_tree = create_tree (NULL, NULL, 0, new_idx);
+ if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+ goto build_word_op_espace;
+ /* Then join them by ALT node. */
+ alt_token.type = OP_ALT;
+ new_idx = re_dfa_add_node (dfa, alt_token, 0);
+ tree = create_tree (tree, mbc_tree, 0, new_idx);
+ if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+ return tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+ Fetch a number from `input', and return the number.
+ Return -1, if the number field is empty like "{,1}".
+ Return -2, If an error is occured. */
+
+static int
+fetch_number (input, token, syntax)
+ re_string_t *input;
+ re_token_t *token;
+ reg_syntax_t syntax;
+{
+ int num = -1;
+ unsigned char c;
+ while (1)
+ {
+ *token = fetch_token (input, syntax);
+ c = token->opr.c;
+ if (BE (token->type == END_OF_RE, 0))
+ return -2;
+ if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+ break;
+ num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+ ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+ num = (num > RE_DUP_MAX) ? -2 : num;
+ }
+ return num;
+}
+\f
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+ re_free (cset->mbchars);
+# ifdef _LIBC
+ re_free (cset->coll_syms);
+ re_free (cset->equiv_classes);
+ re_free (cset->range_starts);
+ re_free (cset->range_ends);
+# endif
+ re_free (cset->char_classes);
+ re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+\f
+/* Functions for binary tree operation. */
+
+/* Create a node of tree.
+ Note: This function automatically free left and right if malloc fails. */
+
+static bin_tree_t *
+create_tree (left, right, type, index)
+ bin_tree_t *left;
+ bin_tree_t *right;
+ re_token_type_t type;
+ int index;
+{
+ bin_tree_t *tree;
+ tree = re_malloc (bin_tree_t, 1);
+ if (BE (tree == NULL, 0))
+ {
+ free_bin_tree (left);
+ free_bin_tree (right);
+ return NULL;
+ }
+ tree->parent = NULL;
+ tree->left = left;
+ tree->right = right;
+ tree->type = type;
+ tree->node_idx = index;
+ tree->first = -1;
+ tree->next = -1;
+ re_node_set_init_empty (&tree->eclosure);
+
+ if (left != NULL)
+ left->parent = tree;
+ if (right != NULL)
+ right->parent = tree;
+ return tree;
+}
+
+/* Free the sub tree pointed by TREE. */
+
+static void
+free_bin_tree (tree)
+ bin_tree_t *tree;
+{
+ if (tree == NULL)
+ return;
+ /*re_node_set_free (&tree->eclosure);*/
+ free_bin_tree (tree->left);
+ free_bin_tree (tree->right);
+ re_free (tree);
+}
+
+/* Duplicate the node SRC, and return new node. */
+
+static bin_tree_t *
+duplicate_tree (src, dfa)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
+{
+ bin_tree_t *left = NULL, *right = NULL, *new_tree;
+ int new_node_idx;
+ /* Since node indies must be according to Post-order of the tree,
+ we must duplicate the left at first. */
+ if (src->left != NULL)
+ {
+ left = duplicate_tree (src->left, dfa);
+ if (left == NULL)
+ return NULL;
+ }
+
+ /* Secondaly, duplicate the right. */
+ if (src->right != NULL)
+ {
+ right = duplicate_tree (src->right, dfa);
+ if (right == NULL)
+ {
+ free_bin_tree (left);
+ return NULL;
+ }
+ }
+
+ /* At last, duplicate itself. */
+ if (src->type == NON_TYPE)
+ {
+ new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
+ dfa->nodes[new_node_idx].duplicated = 1;
+ if (BE (new_node_idx == -1, 0))
+ {
+ free_bin_tree (left);
+ free_bin_tree (right);
+ return NULL;
+ }
+ }
+ else
+ new_node_idx = src->type;
+
+ new_tree = create_tree (left, right, src->type, new_node_idx);
+ if (BE (new_tree == NULL, 0))
+ {
+ free_bin_tree (left);
+ free_bin_tree (right);
+ }
+ return new_tree;
+}
--- /dev/null
+/*
+ * Regular Expression Functions from glibc 2.3.2
+ * (renamed to sh_* to avoid clashes with the system libraries)
+ */
+
+#ifndef _UCW_REGEX_H
+#define _UCW_REGEX_H
+
+#define regfree sh_regfree
+#define regexec sh_regexec
+#define regcomp sh_regcomp
+#define regerror sh_regerror
+#define re_set_registers sh_re_set_registers
+#define re_match_2 sh_re_match2
+#define re_match sh_re_match
+#define re_search sh_re_search
+#define re_compile_pattern sh_re_compile_pattern
+#define re_set_syntax sh_re_set_syntax
+#define re_search_2 sh_re_search_2
+#define re_compile_fastmap sh_re_compile_fastmap
+
+#include "lib/regex/regex.h"
+
+#endif
--- /dev/null
+/*
+ * Regular Expression Functions from glibc 2.3.2
+ */
+
+#include <sys/types.h>
+#include "regex-sh.h"
+#include "regex_internal.h"
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
--- /dev/null
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+# include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+\f
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+\f
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+\f
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+\f
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+\f
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+# define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+# define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+ char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
+\f
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
--- /dev/null
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static void re_string_construct_common (const char *str, int len,
+ re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase);
+#ifdef RE_ENABLE_I18N
+static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
+ wint_t *last_wc);
+#endif /* RE_ENABLE_I18N */
+static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash);
+static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash);
+static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash);
+static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context,
+ unsigned int hash);
+static unsigned int inline calc_state_hash (const re_node_set *nodes,
+ unsigned int context);
+\f
+/* Functions for string operation. */
+
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
+static reg_errcode_t
+re_string_allocate (pstr, str, len, init_len, trans, icase)
+ re_string_t *pstr;
+ const char *str;
+ int len, init_len, icase;
+ RE_TRANSLATE_TYPE trans;
+{
+ reg_errcode_t ret;
+ int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase);
+ pstr->stop = pstr->len;
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *) str);
+ pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+ pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
+ || MB_CUR_MAX > 1) ? pstr->valid_len : len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+re_string_construct (pstr, str, len, trans, icase)
+ re_string_t *pstr;
+ const char *str;
+ int len, icase;
+ RE_TRANSLATE_TYPE trans;
+{
+ reg_errcode_t ret;
+ re_string_construct_common (str, len, pstr, trans, icase);
+ pstr->stop = pstr->len;
+ /* Set 0 so that this function can initialize whole buffers. */
+ pstr->valid_len = 0;
+
+ if (len > 0)
+ {
+ ret = re_string_realloc_buffers (pstr, len + 1);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *) str);
+ pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_upper_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ pstr->valid_len = len;
+ }
+ }
+
+ /* Initialized whole buffers, then valid_len == bufs_len. */
+ pstr->valid_len = pstr->bufs_len;
+ return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct. */
+
+static reg_errcode_t
+re_string_realloc_buffers (pstr, new_buf_len)
+ re_string_t *pstr;
+ int new_buf_len;
+{
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_array;
+ }
+#endif /* RE_ENABLE_I18N */
+ if (MBS_ALLOCATED (pstr))
+ {
+ unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs = new_array;
+ }
+ if (MBS_CASE_ALLOCATED (pstr))
+ {
+ unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
+ new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs_case = new_array;
+ if (!MBS_ALLOCATED (pstr))
+ pstr->mbs = pstr->mbs_case;
+ }
+ pstr->bufs_len = new_buf_len;
+ return REG_NOERROR;
+}
+
+
+static void
+re_string_construct_common (str, len, pstr, trans, icase)
+ const char *str;
+ int len;
+ re_string_t *pstr;
+ RE_TRANSLATE_TYPE trans;
+ int icase;
+{
+ memset (pstr, '\0', sizeof (re_string_t));
+ pstr->raw_mbs = (const unsigned char *) str;
+ pstr->len = len;
+ pstr->trans = trans;
+ pstr->icase = icase ? 1 : 0;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+ If the byte sequence of the string are:
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+ Then wide character buffer will be:
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
+ We use WEOF for padding, they indicate that the position isn't
+ a first byte of a multibyte character.
+
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
+build_wcs_buffer (pstr)
+ re_string_t *pstr;
+{
+ mbstate_t prev_st;
+ int byte_idx, end_idx, mbclen, remain_len;
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Apply the translateion if we need. */
+ if (pstr->trans != NULL && mbclen == 1)
+ {
+ int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
+ pstr->mbs_case[byte_idx] = ch;
+ }
+ /* Write wide character and padding. */
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ pstr->valid_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static void
+build_wcs_upper_buffer (pstr)
+ re_string_t *pstr;
+{
+ mbstate_t prev_st;
+ int byte_idx, end_idx, mbclen, remain_len;
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* In case of a singlebyte character. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ /* Apply the translateion if we need. */
+ if (pstr->trans != NULL && mbclen == 1)
+ {
+ ch = pstr->trans[ch];
+ pstr->mbs_case[byte_idx] = ch;
+ }
+ pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
+ pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else /* mbclen > 1 */
+ {
+ if (iswlower (wc))
+ wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
+ else
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ }
+ pstr->valid_len = byte_idx;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static int
+re_string_skip_chars (pstr, new_raw_idx, last_wc)
+ re_string_t *pstr;
+ int new_raw_idx;
+ wint_t *last_wc;
+{
+ mbstate_t prev_st;
+ int rawbuf_idx, mbclen;
+ wchar_t wc = 0;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ int remain_len;
+ remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ *last_wc = (wint_t) wc;
+ return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
+build_upper_buffer (pstr)
+ re_string_t *pstr;
+{
+ int char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (pstr->trans != NULL)
+ {
+ ch = pstr->trans[ch];
+ pstr->mbs_case[char_idx] = ch;
+ }
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
+ }
+ pstr->valid_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR. */
+
+static void
+re_string_translate_buffer (pstr)
+ re_string_t *pstr;
+{
+ int buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs_case[buf_idx] = pstr->trans[ch];
+ }
+
+ pstr->valid_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of MB_CUR_MAX > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+re_string_reconstruct (pstr, idx, eflags, newline)
+ re_string_t *pstr;
+ int idx, eflags, newline;
+{
+ int offset = idx - pstr->raw_mbs_idx;
+ if (offset < 0)
+ {
+ /* Reset buffer. */
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+ pstr->len += pstr->raw_mbs_idx;
+ pstr->stop += pstr->raw_mbs_idx;
+ pstr->valid_len = pstr->raw_mbs_idx = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!MBS_CASE_ALLOCATED (pstr))
+ pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
+ if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
+ offset = idx;
+ }
+
+ if (offset != 0)
+ {
+ /* Are the characters which are already checked remain? */
+ if (offset < pstr->valid_len)
+ {
+ /* Yes, move them to the front of the buffer. */
+ pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
+ newline);
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+ if (MBS_ALLOCATED (pstr))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ if (MBS_CASE_ALLOCATED (pstr))
+ memmove (pstr->mbs_case, pstr->mbs_case + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
+#endif
+ }
+ else
+ {
+ /* No, skip all characters until IDX. */
+ pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ int wcs_idx;
+ wint_t wc;
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ if (pstr->trans && wc <= 0xff)
+ wc = pstr->trans[wc];
+ pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
+ : ((newline && IS_WIDE_NEWLINE (wc))
+ ? CONTEXT_NEWLINE : 0));
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ if (pstr->trans)
+ c = pstr->trans[c];
+ pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
+ : ((newline && IS_NEWLINE (c))
+ ? CONTEXT_NEWLINE : 0));
+ }
+ }
+ if (!MBS_CASE_ALLOCATED (pstr))
+ {
+ pstr->mbs_case += offset;
+ /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
+ if (!MBS_ALLOCATED (pstr))
+ pstr->mbs += offset;
+ }
+ }
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+ pstr->stop -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ if (pstr->icase)
+ build_wcs_upper_buffer (pstr);
+ else
+ build_wcs_buffer (pstr);
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ pstr->cur_idx = 0;
+
+ return REG_NOERROR;
+}
+
+static void
+re_string_destruct (pstr)
+ re_string_t *pstr;
+{
+#ifdef RE_ENABLE_I18N
+ re_free (pstr->wcs);
+#endif /* RE_ENABLE_I18N */
+ if (MBS_ALLOCATED (pstr))
+ re_free (pstr->mbs);
+ if (MBS_CASE_ALLOCATED (pstr))
+ re_free (pstr->mbs_case);
+}
+
+/* Return the context at IDX in INPUT. */
+
+static unsigned int
+re_string_context_at (input, idx, eflags, newline_anchor)
+ const re_string_t *input;
+ int idx, eflags, newline_anchor;
+{
+ int c;
+ if (idx < 0 || idx == input->len)
+ {
+ if (idx < 0)
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+ else /* (idx == input->len) */
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+ }
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ {
+ wint_t wc;
+ int wc_idx = idx;
+ while(input->wcs[wc_idx] == WEOF)
+ {
+#ifdef DEBUG
+ /* It must not happen. */
+ assert (wc_idx >= 0);
+#endif
+ --wc_idx;
+ if (wc_idx < 0)
+ return input->tip_context;
+ }
+ wc = input->wcs[wc_idx];
+ if (IS_WIDE_WORD_CHAR (wc))
+ return CONTEXT_WORD;
+ return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
+ }
+ else
+#endif
+ {
+ c = re_string_byte_at (input, idx);
+ if (IS_WORD_CHAR (c))
+ return CONTEXT_WORD;
+ return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
+ }
+}
+\f
+/* Functions for set operation. */
+
+static reg_errcode_t
+re_node_set_alloc (set, size)
+ re_node_set *set;
+ int size;
+{
+ set->alloc = size;
+ set->nelem = 0;
+ set->elems = re_malloc (int, size);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_1 (set, elem)
+ re_node_set *set;
+ int elem;
+{
+ set->alloc = 1;
+ set->nelem = 1;
+ set->elems = re_malloc (int, 1);
+ if (BE (set->elems == NULL, 0))
+ {
+ set->alloc = set->nelem = 0;
+ return REG_ESPACE;
+ }
+ set->elems[0] = elem;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_2 (set, elem1, elem2)
+ re_node_set *set;
+ int elem1, elem2;
+{
+ set->alloc = 2;
+ set->elems = re_malloc (int, 2);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ if (elem1 == elem2)
+ {
+ set->nelem = 1;
+ set->elems[0] = elem1;
+ }
+ else
+ {
+ set->nelem = 2;
+ if (elem1 < elem2)
+ {
+ set->elems[0] = elem1;
+ set->elems[1] = elem2;
+ }
+ else
+ {
+ set->elems[0] = elem2;
+ set->elems[1] = elem1;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_copy (dest, src)
+ re_node_set *dest;
+ const re_node_set *src;
+{
+ dest->nelem = src->nelem;
+ if (src->nelem > 0)
+ {
+ dest->alloc = dest->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ {
+ dest->alloc = dest->nelem = 0;
+ return REG_ESPACE;
+ }
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ }
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
+static reg_errcode_t
+re_node_set_add_intersect (dest, src1, src2)
+ re_node_set *dest;
+ const re_node_set *src1, *src2;
+{
+ int i1, i2, id;
+ if (src1->nelem > 0 && src2->nelem > 0)
+ {
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+ {
+ dest->alloc = src1->nelem + src2->nelem + dest->nelem;
+ dest->elems = re_realloc (dest->elems, int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ }
+ else
+ return REG_NOERROR;
+
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ ++i2;
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ {
+ while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
+ ++id;
+ if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
+ ++id;
+ else
+ {
+ memmove (dest->elems + id + 1, dest->elems + id,
+ sizeof (int) * (dest->nelem - id));
+ dest->elems[id++] = src2->elems[i2++];
+ ++dest->nelem;
+ }
+ }
+ ++i1;
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+re_node_set_init_union (dest, src1, src2)
+ re_node_set *dest;
+ const re_node_set *src1, *src2;
+{
+ int i1, i2, id;
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+ {
+ dest->alloc = src1->nelem + src2->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (src1 != NULL && src1->nelem > 0)
+ return re_node_set_init_copy (dest, src1);
+ else if (src2 != NULL && src2->nelem > 0)
+ return re_node_set_init_copy (dest, src2);
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+ }
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ dest->elems[id++] = src2->elems[i2++];
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ ++i2;
+ dest->elems[id++] = src1->elems[i1++];
+ }
+ if (i1 < src1->nelem)
+ {
+ memcpy (dest->elems + id, src1->elems + i1,
+ (src1->nelem - i1) * sizeof (int));
+ id += src1->nelem - i1;
+ }
+ else if (i2 < src2->nelem)
+ {
+ memcpy (dest->elems + id, src2->elems + i2,
+ (src2->nelem - i2) * sizeof (int));
+ id += src2->nelem - i2;
+ }
+ dest->nelem = id;
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+re_node_set_merge (dest, src)
+ re_node_set *dest;
+ const re_node_set *src;
+{
+ int si, di;
+ if (src == NULL || src->nelem == 0)
+ return REG_NOERROR;
+ if (dest->alloc < src->nelem + dest->nelem)
+ {
+ int *new_buffer;
+ dest->alloc = 2 * (src->nelem + dest->alloc);
+ new_buffer = re_realloc (dest->elems, int, dest->alloc);
+ if (BE (new_buffer == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_buffer;
+ }
+
+ for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
+ {
+ int cp_from, ncp, mid, right, src_elem = src->elems[si];
+ /* Binary search the spot we will add the new element. */
+ right = dest->nelem;
+ while (di < right)
+ {
+ mid = (di + right) / 2;
+ if (dest->elems[mid] < src_elem)
+ di = mid + 1;
+ else
+ right = mid;
+ }
+ if (di >= dest->nelem)
+ break;
+
+ if (dest->elems[di] == src_elem)
+ {
+ /* Skip since, DEST already has the element. */
+ ++di;
+ ++si;
+ continue;
+ }
+
+ /* Skip the src elements which are less than dest->elems[di]. */
+ cp_from = si;
+ while (si < src->nelem && src->elems[si] < dest->elems[di])
+ ++si;
+ /* Copy these src elements. */
+ ncp = si - cp_from;
+ memmove (dest->elems + di + ncp, dest->elems + di,
+ sizeof (int) * (dest->nelem - di));
+ memcpy (dest->elems + di, src->elems + cp_from,
+ sizeof (int) * ncp);
+ /* Update counters. */
+ di += ncp;
+ dest->nelem += ncp;
+ }
+
+ /* Copy remaining src elements. */
+ if (si < src->nelem)
+ {
+ memcpy (dest->elems + di, src->elems + si,
+ sizeof (int) * (src->nelem - si));
+ dest->nelem += src->nelem - si;
+ }
+ return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ return 0 if SET already has ELEM,
+ return -1 if an error is occured, return 1 otherwise. */
+
+static int
+re_node_set_insert (set, elem)
+ re_node_set *set;
+ int elem;
+{
+ int idx, right, mid;
+ /* In case of the set is empty. */
+ if (set->elems == NULL || set->alloc == 0)
+ {
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+ return 1;
+ else
+ return -1;
+ }
+
+ /* Binary search the spot we will add the new element. */
+ idx = 0;
+ right = set->nelem;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+
+ /* Realloc if we need. */
+ if (set->alloc < set->nelem + 1)
+ {
+ int *new_array;
+ set->alloc = set->alloc * 2;
+ new_array = re_malloc (int, set->alloc);
+ if (BE (new_array == NULL, 0))
+ return -1;
+ /* Copy the elements they are followed by the new element. */
+ if (idx > 0)
+ memcpy (new_array, set->elems, sizeof (int) * (idx));
+ /* Copy the elements which follows the new element. */
+ if (set->nelem - idx > 0)
+ memcpy (new_array + idx + 1, set->elems + idx,
+ sizeof (int) * (set->nelem - idx));
+ re_free (set->elems);
+ set->elems = new_array;
+ }
+ else
+ {
+ /* Move the elements which follows the new element. */
+ if (set->nelem - idx > 0)
+ memmove (set->elems + idx + 1, set->elems + idx,
+ sizeof (int) * (set->nelem - idx));
+ }
+ /* Insert the new element. */
+ set->elems[idx] = elem;
+ ++set->nelem;
+ return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+ return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
+
+static int
+re_node_set_compare (set1, set2)
+ const re_node_set *set1, *set2;
+{
+ int i;
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+ return 0;
+ for (i = 0 ; i < set1->nelem ; i++)
+ if (set1->elems[i] != set2->elems[i])
+ return 0;
+ return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
+
+static int
+re_node_set_contains (set, elem)
+ const re_node_set *set;
+ int elem;
+{
+ int idx, right, mid;
+ if (set->nelem <= 0)
+ return 0;
+
+ /* Binary search the element. */
+ idx = 0;
+ right = set->nelem - 1;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+ return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+re_node_set_remove_at (set, idx)
+ re_node_set *set;
+ int idx;
+{
+ if (idx < 0 || idx >= set->nelem)
+ return;
+ if (idx < set->nelem - 1)
+ memmove (set->elems + idx, set->elems + idx + 1,
+ sizeof (int) * (set->nelem - idx - 1));
+ --set->nelem;
+}
+\f
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+ Or return -1, if an error will be occured. */
+
+static int
+re_dfa_add_node (dfa, token, mode)
+ re_dfa_t *dfa;
+ re_token_t token;
+ int mode;
+{
+ if (dfa->nodes_len >= dfa->nodes_alloc)
+ {
+ re_token_t *new_array;
+ dfa->nodes_alloc *= 2;
+ new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
+ if (BE (new_array == NULL, 0))
+ return -1;
+ else
+ dfa->nodes = new_array;
+ if (mode)
+ {
+ int *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
+ new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set,
+ dfa->nodes_alloc);
+ new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+ dfa->nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL
+ || new_inveclosures == NULL, 0))
+ return -1;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->inveclosures = new_inveclosures;
+ }
+ }
+ dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].duplicated = 0;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
+ return dfa->nodes_len++;
+}
+
+static unsigned int inline
+calc_state_hash (nodes, context)
+ const re_node_set *nodes;
+ unsigned int context;
+{
+ unsigned int hash = nodes->nelem + context;
+ int i;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ hash += nodes->elems[i];
+ return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state (err, dfa, nodes)
+ reg_errcode_t *err;
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (BE (nodes->nelem == 0, 0))
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, 0);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (&state->nodes, nodes))
+ return state;
+ }
+
+ /* There are no appropriate state in the dfa, create the new one. */
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (BE (new_state != NULL, 1))
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+ whose context is equivalent to CONTEXT.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state_context (err, dfa, nodes, context)
+ reg_errcode_t *err;
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int context;
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (nodes->nelem == 0)
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, context);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (state->entrance_nodes, nodes)
+ && state->context == context)
+ return state;
+ }
+ /* There are no appropriate state in `dfa', create the new one. */
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (BE (new_state != NULL, 1))
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+}
+
+/* Allocate memory for DFA state and initialize common properties.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_newstate_common (dfa, nodes, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int hash;
+{
+ re_dfastate_t *newstate;
+ reg_errcode_t err;
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+ newstate->trtable = NULL;
+ newstate->trtable_search = NULL;
+ newstate->hash = hash;
+ return newstate;
+}
+
+/* Store the new state NEWSTATE whose hash value is HASH in appropriate
+ position. Return value indicate the error code if failed. */
+
+static reg_errcode_t
+register_state (dfa, newstate, hash)
+ re_dfa_t *dfa;
+ re_dfastate_t *newstate;
+ unsigned int hash;
+{
+ struct re_state_table_entry *spot;
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ if (spot->alloc <= spot->num)
+ {
+ re_dfastate_t **new_array;
+ spot->alloc = 2 * spot->num + 2;
+ new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ spot->array = new_array;
+ }
+ spot->array[spot->num++] = newstate;
+ return REG_NOERROR;
+}
+
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_ci_newstate (dfa, nodes, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int hash;
+{
+ int i;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+ newstate = create_newstate_common (dfa, nodes, hash);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (type == CHARACTER && !node->constraint)
+ continue;
+
+ /* If the state has the halt node, the state is a halt state. */
+ else if (type == END_OF_RE)
+ newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || (type == OP_PERIOD && MB_CUR_MAX > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_cd_newstate (dfa, nodes, context, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int context, hash;
+{
+ int i, nctx_nodes = 0;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = create_newstate_common (dfa, nodes, hash);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ newstate->context = context;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ unsigned int constraint = 0;
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (node->constraint)
+ constraint = node->constraint;
+
+ if (type == CHARACTER && !constraint)
+ continue;
+ /* If the state has the halt node, the state is a halt state. */
+ else if (type == END_OF_RE)
+ newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || (type == OP_PERIOD && MB_CUR_MAX > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR)
+ constraint = node->opr.ctx_type;
+
+ if (constraint)
+ {
+ if (newstate->entrance_nodes == &newstate->nodes)
+ {
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
+ if (BE (newstate->entrance_nodes == NULL, 0))
+ {
+ free_state (newstate);
+ return NULL;
+ }
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ nctx_nodes = 0;
+ newstate->has_constraint = 1;
+ }
+
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+ {
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+ ++nctx_nodes;
+ }
+ }
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+static void
+free_state (state)
+ re_dfastate_t *state;
+{
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->trtable);
+ re_free (state->trtable_search);
+ re_free (state);
+}
--- /dev/null
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+
+/* In case that the system doesn't have isblank(). */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+# define _RE_DEFINE_LOCALE_FUNCTIONS 1
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) \
+ INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of bits in a byte. */
+#define BYTE_BITS 8
+/* Number of single byte character. */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline. */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc. */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define attribute_hidden
+#endif /* not _LIBC */
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* Number of bits in an unsinged int. */
+#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
+/* Number of unsigned int in an bit_set. */
+#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
+typedef unsigned int bitset[BITSET_UINTS];
+typedef unsigned int *re_bitset_ptr_t;
+
+#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
+#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
+#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
+#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_set_all(set) \
+ memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_copy(dest,src) \
+ memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
+static inline void bitset_not (bitset set);
+static inline void bitset_merge (bitset dest, const bitset src);
+static inline void bitset_not_merge (bitset dest, const bitset src);
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define DUMMY_CONSTRAINT 0x0100
+
+typedef enum
+{
+ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+ LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+ BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+ BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+ WORD_DELIM = DUMMY_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+ int alloc;
+ int nelem;
+ int *elems;
+} re_node_set;
+
+typedef enum
+{
+ NON_TYPE = 0,
+
+ /* Token type, these are used only by token. */
+ OP_OPEN_BRACKET,
+ OP_CLOSE_BRACKET,
+ OP_CHARSET_RANGE,
+ OP_OPEN_DUP_NUM,
+ OP_CLOSE_DUP_NUM,
+ OP_NON_MATCH_LIST,
+ OP_OPEN_COLL_ELEM,
+ OP_CLOSE_COLL_ELEM,
+ OP_OPEN_EQUIV_CLASS,
+ OP_CLOSE_EQUIV_CLASS,
+ OP_OPEN_CHAR_CLASS,
+ OP_CLOSE_CHAR_CLASS,
+ OP_WORD,
+ OP_NOTWORD,
+ BACK_SLASH,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT,
+ ALT,
+ SUBEXP,
+ SIMPLE_BRACKET,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET,
+#endif /* RE_ENABLE_I18N */
+
+ /* Node type, These are used by token, node, tree. */
+ OP_OPEN_SUBEXP,
+ OP_CLOSE_SUBEXP,
+ OP_PERIOD,
+ CHARACTER,
+ END_OF_RE,
+ OP_ALT,
+ OP_DUP_ASTERISK,
+ OP_DUP_PLUS,
+ OP_DUP_QUESTION,
+ OP_BACK_REF,
+ ANCHOR,
+
+ /* Dummy marker. */
+ END_OF_RE_TOKEN_T
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+ /* Multibyte characters. */
+ wchar_t *mbchars;
+
+ /* Collating symbols. */
+# ifdef _LIBC
+ int32_t *coll_syms;
+# endif
+
+ /* Equivalence classes. */
+# ifdef _LIBC
+ int32_t *equiv_classes;
+# endif
+
+ /* Range expressions. */
+# ifdef _LIBC
+ uint32_t *range_starts;
+ uint32_t *range_ends;
+# else /* not _LIBC */
+ wchar_t *range_starts;
+ wchar_t *range_ends;
+# endif /* not _LIBC */
+
+ /* Character classes. */
+ wctype_t *char_classes;
+
+ /* If this character set is the non-matching list. */
+ unsigned int non_match : 1;
+
+ /* # of multibyte characters. */
+ int nmbchars;
+
+ /* # of collating symbols. */
+ int ncoll_syms;
+
+ /* # of equivalence classes. */
+ int nequiv_classes;
+
+ /* # of range expressions. */
+ int nranges;
+
+ /* # of character classes. */
+ int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+ union
+ {
+ unsigned char c; /* for CHARACTER */
+ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset; /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+ int idx; /* for BACK_REF */
+ re_context_type ctx_type; /* for ANCHOR */
+ } opr;
+#if __GNUC__ >= 2
+ re_token_type_t type : 8;
+#else
+ re_token_type_t type;
+#endif
+ unsigned int constraint : 10; /* context constraint */
+ unsigned int duplicated : 1;
+#ifdef RE_ENABLE_I18N
+ unsigned int mb_partial : 1;
+#endif
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) \
+ ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
+ || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
+ || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
+
+#define ACCEPT_MB_NODE(type) \
+ ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
+
+struct re_string_t
+{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Store the multibyte string. In case of "case insensitive mode" like
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
+ /* Store the case sensitive multibyte string. In case of
+ "case insensitive mode", the original string are stored,
+ otherwise MBS_CASE points the same address that MBS points. */
+ unsigned char *mbs_case;
+#ifdef RE_ENABLE_I18N
+ /* Store the wide character string which is corresponding to MBS. */
+ wint_t *wcs;
+ mbstate_t cur_state;
+#endif
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ int raw_mbs_idx;
+ /* The length of the valid characters in the buffers. */
+ int valid_len;
+ /* The length of the buffers MBS, MBS_CASE, and WCS. */
+ int bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ int cur_idx;
+ /* This is length_of_RAW_MBS - RAW_MBS_IDX. */
+ int len;
+ /* End of the buffer may be shorter than its length in the cases such
+ as re_match_2, re_search_2. Then, we use STOP for end of the buffer
+ instead of LEN. */
+ int stop;
+
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ RE_TRANSLATE_TYPE trans;
+ /* 1 if REG_ICASE. */
+ unsigned int icase : 1;
+};
+typedef struct re_string_t re_string_t;
+/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
+#define MBS_ALLOCATED(pstr) (pstr->icase)
+/* In case that we need translation, we allocate the buffer dynamically
+ for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
+#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
+
+
+static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
+ int len, int init_len,
+ RE_TRANSLATE_TYPE trans, int icase);
+static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
+ int len, RE_TRANSLATE_TYPE trans,
+ int icase);
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+ int eflags, int newline);
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ int new_buf_len);
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr);
+static void build_wcs_upper_buffer (re_string_t *pstr);
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr);
+static void re_string_translate_buffer (re_string_t *pstr);
+static void re_string_destruct (re_string_t *pstr);
+#ifdef RE_ENABLE_I18N
+static int re_string_elem_size_at (const re_string_t *pstr, int idx);
+static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
+static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
+#endif /* RE_ENABLE_I18N */
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+ int eflags, int newline_anchor);
+#define re_string_peek_byte(pstr, offset) \
+ ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_peek_byte_case(pstr, offset) \
+ ((pstr)->mbs_case[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+ ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_fetch_byte_case(pstr) \
+ ((pstr)->mbs_case[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+ ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+ ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \
+ || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+ struct bin_tree_t *parent;
+ struct bin_tree_t *left;
+ struct bin_tree_t *right;
+
+ /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+ Otherwise `type' indicate the type of this node. */
+ re_token_type_t type;
+ int node_idx;
+
+ int first;
+ int next;
+ re_node_set eclosure;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+ || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+ || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+ unsigned int hash;
+ re_node_set nodes;
+ re_node_set *entrance_nodes;
+ struct re_dfastate_t **trtable;
+ struct re_dfastate_t **trtable_search;
+ /* If this state is a special state.
+ A state is a special state if the state is the halt state, or
+ a anchor. */
+ unsigned int context : 2;
+ unsigned int halt : 1;
+ /* If this state can accept `multi byte'.
+ Note that we refer to multibyte characters, and multi character
+ collating elements as `multi byte'. */
+ unsigned int accept_mb : 1;
+ /* If this state has backreference node(s). */
+ unsigned int has_backref : 1;
+ unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+typedef struct
+{
+ /* start <= node < end */
+ int start;
+ int end;
+} re_subexp_t;
+
+struct re_state_table_entry
+{
+ int num;
+ int alloc;
+ re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
+
+typedef struct
+{
+ int next_idx;
+ int alloc;
+ re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
+
+typedef struct
+{
+ int node;
+ int str_idx; /* The position NODE match at. */
+ state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+ And information about the node, whose type is OP_CLOSE_SUBEXP,
+ corresponding to NODE is stored in LASTS. */
+
+typedef struct
+{
+ int str_idx;
+ int node;
+ int next_last_offset;
+ state_array_t *path;
+ int alasts; /* Allocation size of LASTS. */
+ int nlasts; /* The number of LASTS. */
+ re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+ int node;
+ int str_idx;
+ int subexp_from;
+ int subexp_to;
+ int flag;
+};
+
+typedef struct
+{
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ int match_last;
+ int last_node;
+ /* The string object corresponding to the input string. */
+ re_string_t *input;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ int state_log_top;
+ /* Back reference cache. */
+ int nbkref_ents;
+ int abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_mb_elem_len;
+ int nsub_tops;
+ int asub_tops;
+ re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+ int cur_bkref;
+ int cls_subexp_idx;
+
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+
+ re_node_set limits;
+
+ int last_node;
+ int last_str_idx;
+ int check_subexp;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+ int idx;
+ int node;
+ regmatch_t *regs;
+ re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+ int num;
+ int alloc;
+ struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+ re_bitset_ptr_t word_char;
+
+ /* number of subexpressions `re_nsub' is in regex_t. */
+ int subexps_alloc;
+ re_subexp_t *subexps;
+
+ re_token_t *nodes;
+ int nodes_alloc;
+ int nodes_len;
+ bin_tree_t *str_tree;
+ int *nexts;
+ int *org_indices;
+ re_node_set *edests;
+ re_node_set *eclosures;
+ re_node_set *inveclosures;
+ struct re_state_table_entry *state_table;
+ unsigned int state_hash_mask;
+ re_dfastate_t *init_state;
+ re_dfastate_t *init_state_word;
+ re_dfastate_t *init_state_nl;
+ re_dfastate_t *init_state_begbuf;
+ int states_alloc;
+ int init_node;
+ int nbackref; /* The number of backreference in this dfa. */
+ /* Bitmap expressing which backreference is used. */
+ unsigned int used_bkref_map;
+#ifdef DEBUG
+ char* re_str;
+#endif
+ unsigned int has_plural_match : 1;
+ /* If this dfa has "multibyte node", which is a backreference or
+ a node which can accept multibyte character or multi character
+ collating element. */
+ unsigned int has_mb_node : 1;
+};
+typedef struct re_dfa_t re_dfa_t;
+
+static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
+static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem);
+static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
+ int elem2);
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
+ const re_node_set *src);
+static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
+ const re_node_set *src1,
+ const re_node_set *src2);
+static reg_errcode_t re_node_set_init_union (re_node_set *dest,
+ const re_node_set *src1,
+ const re_node_set *src2);
+static reg_errcode_t re_node_set_merge (re_node_set *dest,
+ const re_node_set *src);
+static int re_node_set_insert (re_node_set *set, int elem);
+static int re_node_set_compare (const re_node_set *set1,
+ const re_node_set *set2);
+static int re_node_set_contains (const re_node_set *set, int elem);
+static void re_node_set_remove_at (re_node_set *set, int idx);
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
+static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
+ const re_node_set *nodes);
+static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
+ re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context);
+static void free_state (re_dfastate_t *state);
+\f
+
+typedef enum
+{
+ SB_CHAR,
+ MB_CHAR,
+ EQUIV_CLASS,
+ COLL_SYM,
+ CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+ bracket_elem_type type;
+ union
+ {
+ unsigned char ch;
+ unsigned char *name;
+ wchar_t wch;
+ } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation. */
+static inline void
+bitset_not (set)
+ bitset set;
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (dest, src)
+ bitset dest;
+ const bitset src;
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_not_merge (dest, src)
+ bitset dest;
+ const bitset src;
+{
+ int i;
+ for (i = 0; i < BITSET_UINTS; ++i)
+ dest[i] |= ~src[i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string. */
+static inline int
+re_string_char_size_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+ int byte_idx;
+ if (MB_CUR_MAX == 1)
+ return 1;
+ for (byte_idx = 1; idx + byte_idx < pstr->len; ++byte_idx)
+ if (pstr->wcs[idx + byte_idx] != WEOF)
+ break;
+ return byte_idx;
+}
+
+static inline wint_t
+re_string_wchar_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+ if (MB_CUR_MAX == 1)
+ return (wint_t) pstr->mbs[idx];
+ return (wint_t) pstr->wcs[idx];
+}
+
+static int
+re_string_elem_size_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+#ifdef _LIBC
+ const unsigned char *p, *extra;
+ const int32_t *table, *indirect;
+ int32_t tmp;
+# include <locale/weight.h>
+ uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+ if (nrules != 0)
+ {
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ p = pstr->mbs + idx;
+ tmp = findidx (&p);
+ return p - pstr->mbs - idx;
+ }
+ else
+#endif /* _LIBC */
+ return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /* _REGEX_INTERNAL_H */
--- /dev/null
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ re_string_t *input, int n);
+static void match_ctx_clean (re_match_context_t *mctx);
+static void match_ctx_free (re_match_context_t *cache);
+static void match_ctx_free_subtops (re_match_context_t *mctx);
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int str_idx, int from, int to);
+static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
+static void match_ctx_clear_flag (re_match_context_t *mctx);
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+ int str_idx);
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ int node, int str_idx);
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx, int check_subexp);
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, int stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags);
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, int length1,
+ const char *string2, int length2,
+ int start, int range, struct re_registers *regs,
+ int stop, int ret_len);
+static int re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, int length, int start,
+ int range, int stop, struct re_registers *regs,
+ int ret_len);
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ int nregs, int regs_allocated);
+static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
+ const regex_t *preg,
+ const re_match_context_t *mctx,
+ int idx);
+static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
+ re_match_context_t *mctx);
+static int check_matching (const regex_t *preg, re_match_context_t *mctx,
+ int fl_search, int fl_longest_match);
+static int check_halt_node_context (const re_dfa_t *dfa, int node,
+ unsigned int context);
+static int check_halt_state_context (const regex_t *preg,
+ const re_dfastate_t *state,
+ const re_match_context_t *mctx, int idx);
+static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
+ int cur_idx, int nmatch);
+static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
+ const re_match_context_t *mctx,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs);
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ int str_idx, int *dests, int nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes);
+static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes);
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ int fl_backtrack);
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const regex_t *preg,
+ const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx);
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx);
+static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes);
+static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates);
+static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+ re_node_set *dest_nodes,
+ const re_node_set *and_nodes);
+static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
+ re_match_context_t *mctx, int dst_node,
+ int dst_idx, int src_node, int src_idx);
+static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
+ int limit, re_node_set *eclosures,
+ int subexp_idx, int node, int str_idx);
+static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx);
+static reg_errcode_t sift_states_bkref (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, re_node_set *dest_nodes);
+static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
+ int next_state_log_idx);
+static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num);
+static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
+ re_match_context_t *mctx,
+ re_dfastate_t *state, int fl_search);
+static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
+ re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ int str_idx);
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
+ re_dfastate_t *pstate,
+ int fl_search,
+ re_match_context_t *mctx);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (const regex_t *preg,
+ re_dfastate_t *pstate,
+ re_match_context_t *mctx);
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (const regex_t *preg,
+ re_node_set *nodes,
+ re_match_context_t *mctx);
+static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
+ int bkref_node, int bkref_str_idx);
+static reg_errcode_t get_subexp_sub (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ int bkref_node, int bkref_str);
+static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
+ int subexp_idx, int fl_open);
+static reg_errcode_t check_arrival (const regex_t *preg,
+ re_match_context_t *mctx,
+ state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str,
+ int fl_open);
+static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
+ re_dfa_t *dfa,
+ re_match_context_t *mctx,
+ int str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes);
+static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ int ex_subexp, int fl_open);
+static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ int target, int ex_subexp,
+ int fl_open);
+static reg_errcode_t expand_bkref_cache (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_node_set *cur_nodes, int cur_str,
+ int last_str, int subexp_num,
+ int fl_open);
+static re_dfastate_t **build_trtable (const regex_t *dfa,
+ const re_dfastate_t *state,
+ int fl_search);
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const regex_t *preg, int node_idx,
+ const re_string_t *input, int idx);
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len);
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const regex_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset *states_ch);
+static int check_node_accept (const regex_t *preg, const re_token_t *node,
+ const re_match_context_t *mctx, int idx);
+static reg_errcode_t extend_buffers (re_match_context_t *mctx);
+\f
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ int length = strlen (string);
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, 0, length, length, 0,
+ NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, 0, length, length, nmatch,
+ pmatch, eflags);
+ return err != REG_NOERROR;
+}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+int
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+ stop, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ const char *str;
+ int rval;
+ int len = length1 + length2;
+ int free_str = 0;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ char *s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+ str = s;
+ free_str = 1;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ if (free_str)
+ re_free ((char *) str);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is nonzero the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ int nregs, rval;
+ int eflags = 0;
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (start + range > length, 0))
+ range = length - start;
+ else if (BE (start + range < 0, 0))
+ range = -start;
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED &&
+ regs->num_regs < bufp->re_nsub + 1, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ return -2;
+
+ result = re_search_internal (bufp, string, length, start, range, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs, regs_allocated;
+{
+ int rval = REGS_REALLOCATE;
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (need_regs > regs->num_regs)
+ {
+ regs->start = re_realloc (regs->start, regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0))
+ {
+ if (regs->end != NULL)
+ re_free (regs->end);
+ return REGS_UNALLOCATED;
+ }
+ regs->end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+\f
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+\f
+static re_node_set empty_set;
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ mingings with regexec. START, and RANGE have the same meanings
+ with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+ eflags)
+ const regex_t *preg;
+ const char *string;
+ int length, start, range, stop, eflags;
+ size_t nmatch;
+ regmatch_t pmatch[];
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ re_string_t input;
+ int left_lim, right_lim, incr;
+ int fl_longest_match, match_first, match_last = -1;
+ int fast_translate, sb;
+ re_match_context_t mctx;
+ char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL);
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+ re_node_set_init_empty (&empty_set);
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ input.stop = stop;
+
+ err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (start + range >= 0 && start + range <= length);
+#endif
+
+ match_first = start;
+ input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+ sb = MB_CUR_MAX == 1;
+ fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
+
+ for (;;)
+ {
+ /* At first get the current byte from input string. */
+ if (fastmap)
+ {
+ if (BE (fast_translate, 1))
+ {
+ unsigned RE_TRANSLATE_TYPE t
+ = (unsigned RE_TRANSLATE_TYPE) preg->translate;
+ if (BE (range >= 0, 1))
+ {
+ if (BE (t != NULL, 0))
+ {
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ }
+ else
+ {
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+ }
+ if (BE (match_first == right_lim, 0))
+ {
+ int ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ break;
+ }
+ }
+ else
+ {
+ while (match_first >= left_lim)
+ {
+ int ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ break;
+ }
+ }
+ else
+ {
+ int ch;
+
+ do
+ {
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer
+ instead. */
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ if (input.raw_mbs_idx + input.valid_len <= match_first
+ || match_first < input.raw_mbs_idx)
+ {
+ err = re_string_reconstruct (&input, match_first, eflags,
+ preg->newline_anchor);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = ((match_first >= length) ? 0
+ : re_string_byte_at (&input,
+ match_first - input.raw_mbs_idx));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ }
+ while (match_first >= left_lim && match_first <= right_lim);
+ if (! fastmap[ch])
+ break;
+ }
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the begining of the buffer. */
+ err = re_string_reconstruct (&input, match_first, eflags,
+ preg->newline_anchor);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+#ifdef RE_ENABLE_I18N
+ /* Eliminate it when it is a component of a multibyte character
+ and isn't the head of a multibyte character. */
+ if (sb || re_string_first_byte (&input, 0))
+#endif
+ {
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (preg, &mctx, 0, fl_longest_match);
+ if (match_last != -1)
+ {
+ if (BE (match_last == -2, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (preg, pstate,
+ &mctx, match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (preg, &mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ }
+ else
+ break; /* We found a matching. */
+ }
+ }
+ match_ctx_clean (&mctx);
+ }
+ /* Update counter. */
+ match_first += incr;
+ if (match_first < left_lim || right_lim < match_first)
+ break;
+ }
+
+ /* Set pmatch[] if we need. */
+ if (match_last != -1 && nmatch > 0)
+ {
+ int reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that We can assume that the matching starts from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ }
+ err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&input);
+ return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (preg, mctx)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+{
+ int halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ match_ctx_clear_flag (mctx);
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last, 0);
+ ret = sift_states_backward (preg, mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (match_last < 0)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (!mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (preg,
+ mctx->state_log[match_last],
+ mctx, match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last, 0);
+ ret = sift_states_backward (preg, mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+acquire_init_state_context (err, preg, mctx, idx)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ const re_match_context_t *mctx;
+ int idx;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+ *err = REG_NOERROR;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
+ preg->newline_anchor);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+ and return the index where the matching end, return -1 if not match,
+ or return -2 in case of an error.
+ FL_SEARCH means we must search where the matching starts,
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
+
+static int
+check_matching (preg, mctx, fl_search, fl_longest_match)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ int fl_search, fl_longest_match;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ reg_errcode_t err;
+ int match = 0;
+ int match_last = -1;
+ int cur_str_idx = re_string_cur_idx (mctx->input);
+ re_dfastate_t *cur_state;
+
+ cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
+ /* An initial state must not be NULL(invalid state). */
+ if (BE (cur_state == NULL, 0))
+ return -2;
+ if (mctx->state_log != NULL)
+ mctx->state_log[cur_str_idx] = cur_state;
+
+ /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+ later. E.g. Processing back references. */
+ if (dfa->nbackref)
+ {
+ err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (preg, &cur_state->nodes, mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* If the RE accepts NULL string. */
+ if (cur_state->halt)
+ {
+ if (!cur_state->has_constraint
+ || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
+ {
+ if (!fl_longest_match)
+ return cur_str_idx;
+ else
+ {
+ match_last = cur_str_idx;
+ match = 1;
+ }
+ }
+ }
+
+ while (!re_string_eoi (mctx->input))
+ {
+ cur_state = transit_state (&err, preg, mctx, cur_state,
+ fl_search && !match);
+ if (cur_state == NULL) /* Reached at the invalid state or an error. */
+ {
+ cur_str_idx = re_string_cur_idx (mctx->input);
+ if (BE (err != REG_NOERROR, 0))
+ return -2;
+ if (fl_search && !match)
+ {
+ /* Restart from initial state, since we are searching
+ the point from where matching start. */
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX == 1
+ || re_string_first_byte (mctx->input, cur_str_idx))
+#endif /* RE_ENABLE_I18N */
+ cur_state = acquire_init_state_context (&err, preg, mctx,
+ cur_str_idx);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ return -2;
+ if (mctx->state_log != NULL)
+ mctx->state_log[cur_str_idx] = cur_state;
+ }
+ else if (!fl_longest_match && match)
+ break;
+ else /* (fl_longest_match && match) || (!fl_search && !match) */
+ {
+ if (mctx->state_log == NULL)
+ break;
+ else
+ {
+ int max = mctx->state_log_top;
+ for (; cur_str_idx <= max; ++cur_str_idx)
+ if (mctx->state_log[cur_str_idx] != NULL)
+ break;
+ if (cur_str_idx > max)
+ break;
+ }
+ }
+ }
+
+ if (cur_state != NULL && cur_state->halt)
+ {
+ /* Reached at a halt state.
+ Check the halt state can satisfy the current context. */
+ if (!cur_state->has_constraint
+ || check_halt_state_context (preg, cur_state, mctx,
+ re_string_cur_idx (mctx->input)))
+ {
+ /* We found an appropriate halt state. */
+ match_last = re_string_cur_idx (mctx->input);
+ match = 1;
+ if (!fl_longest_match)
+ break;
+ }
+ }
+ }
+ return match_last;
+}
+
+/* Check NODE match the current context. */
+
+static int check_halt_node_context (dfa, node, context)
+ const re_dfa_t *dfa;
+ int node;
+ unsigned int context;
+{
+ re_token_type_t type = dfa->nodes[node].type;
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
+ return 0;
+ if (!constraint)
+ return 1;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+ return 0;
+ return 1;
+}
+
+/* Check the halt state STATE match the current context.
+ Return 0 if not match, if the node, STATE has, is a halt node and
+ match the context, return the node. */
+
+static int
+check_halt_state_context (preg, state, mctx, idx)
+ const regex_t *preg;
+ const re_dfastate_t *state;
+ const re_match_context_t *mctx;
+ int idx;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i;
+ unsigned int context;
+#ifdef DEBUG
+ assert (state->halt);
+#endif
+ context = re_string_context_at (mctx->input, idx, mctx->eflags,
+ preg->newline_anchor);
+ for (i = 0; i < state->nodes.nelem; ++i)
+ if (check_halt_node_context (dfa, state->nodes.elems[i], context))
+ return state->nodes.elems[i];
+ return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES, return -1 in case
+ of errors. */
+
+static int
+proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
+ const regex_t *preg;
+ regmatch_t *regs;
+ const re_match_context_t *mctx;
+ int nregs, *pidx, node;
+ re_node_set *eps_via_nodes;
+ struct re_fail_stack_t *fs;
+{
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ int i, err, dest_node;
+ dest_node = -1;
+ if (IS_EPSILON_NODE (dfa->nodes[node].type))
+ {
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ int ndest, dest_nodes[2];
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -1;
+ /* Pick up valid destinations. */
+ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ int candidate = dfa->edests[node].elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
+ continue;
+ dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
+ dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
+ ++ndest;
+ }
+ if (ndest <= 1)
+ return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
+ /* In order to avoid infinite loop like "(a*)*". */
+ if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
+ return dest_nodes[1];
+ if (fs != NULL)
+ push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
+ return dest_nodes[0];
+ }
+ else
+ {
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+ if (ACCEPT_MB_NODE (type))
+ naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (type == OP_BACK_REF)
+ {
+ int subexp_idx = dfa->nodes[node].opr.idx;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+ if (fs != NULL)
+ {
+ if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+ return -1;
+ else if (naccepted)
+ {
+ char *buf = (char *) re_string_get_buffer (mctx->input);
+ if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+ naccepted) != 0)
+ return -1;
+ }
+ }
+
+ if (naccepted == 0)
+ {
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ dest_node = dfa->edests[node].elems[0];
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
+ return dest_node;
+ }
+ }
+
+ if (naccepted != 0
+ || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
+ {
+ dest_node = dfa->nexts[node];
+ *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+ if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+ || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node)))
+ return -1;
+ re_node_set_empty (eps_via_nodes);
+ return dest_node;
+ }
+ }
+ return -1;
+}
+
+static reg_errcode_t
+push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
+ struct re_fail_stack_t *fs;
+ int str_idx, *dests, nregs;
+ regmatch_t *regs;
+ re_node_set *eps_via_nodes;
+{
+ reg_errcode_t err;
+ int num = fs->num++;
+ if (fs->num == fs->alloc)
+ {
+ struct re_fail_stack_ent_t *new_array;
+ fs->alloc *= 2;
+ new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+ * fs->alloc));
+ if (new_array == NULL)
+ return REG_ESPACE;
+ fs->stack = new_array;
+ }
+ fs->stack[num].idx = str_idx;
+ fs->stack[num].node = dests[1];
+ fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+ return err;
+}
+
+static int
+pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
+ struct re_fail_stack_t *fs;
+ int *pidx, nregs;
+ regmatch_t *regs;
+ re_node_set *eps_via_nodes;
+{
+ int num = --fs->num;
+ assert (num >= 0);
+ *pidx = fs->stack[num].idx;
+ memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
+ *eps_via_nodes = fs->stack[num].eps_via_nodes;
+ return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+ PMATCH.
+ Note: We assume that pmatch[0] is already set, and
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
+
+static reg_errcode_t
+set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
+ const regex_t *preg;
+ const re_match_context_t *mctx;
+ size_t nmatch;
+ regmatch_t *pmatch;
+ int fl_backtrack;
+{
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ int idx, cur_node, real_nmatch;
+ re_node_set eps_via_nodes;
+ struct re_fail_stack_t *fs;
+ struct re_fail_stack_t fs_body = {0, 2, NULL};
+#ifdef DEBUG
+ assert (nmatch > 1);
+ assert (mctx->state_log != NULL);
+#endif
+ if (fl_backtrack)
+ {
+ fs = &fs_body;
+ fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+ }
+ else
+ fs = NULL;
+ cur_node = dfa->init_node;
+ real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
+ re_node_set_init_empty (&eps_via_nodes);
+ for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+ {
+ update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
+ if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ {
+ int reg_idx;
+ if (fs)
+ {
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+ break;
+ if (reg_idx == nmatch)
+ {
+ re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+ }
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ }
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOERROR;
+ }
+ }
+
+ /* Proceed to next node. */
+ cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
+ &eps_via_nodes, fs);
+
+ if (BE (cur_node < 0, 0))
+ {
+ if (cur_node == -2)
+ return REG_ESPACE;
+ if (fs)
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOMATCH;
+ }
+ }
+ }
+ re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+free_fail_stack_return (fs)
+ struct re_fail_stack_t *fs;
+{
+ if (fs)
+ {
+ int fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
+ return REG_NOERROR;
+}
+
+static void
+update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
+ re_dfa_t *dfa;
+ regmatch_t *pmatch;
+ int cur_node, cur_idx, nmatch;
+{
+ int type = dfa->nodes[cur_node].type;
+ int reg_num;
+ if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
+ return;
+ reg_num = dfa->nodes[cur_node].opr.idx + 1;
+ if (reg_num >= nmatch)
+ return;
+ if (type == OP_OPEN_SUBEXP)
+ {
+ /* We are at the first node of this sub expression. */
+ pmatch[reg_num].rm_so = cur_idx;
+ pmatch[reg_num].rm_eo = -1;
+ }
+ else if (type == OP_CLOSE_SUBEXP)
+ /* We are at the first node of this sub expression. */
+ pmatch[reg_num].rm_eo = cur_idx;
+}
+
+#define NUMBER_OF_STATE 1
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
+
+ Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+ 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+ If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+ the LAST_NODE, we throw away the node `a'.
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+ string `s' and transit to `b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+ away the node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+ throwed away, we throw away the node `a'.
+ 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+ node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
+ we throw away the node `a'. */
+
+#define STATE_NODE_CONTAINS(state,node) \
+ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+sift_states_backward (preg, mctx, sctx)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ int null_cnt = 0;
+ int str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+ re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
+
+#ifdef DEBUG
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+ cur_src = &mctx->state_log[str_idx]->nodes;
+
+ /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
+ transit to the last_node and the last_node itself. */
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* Then check each states in the state_log. */
+ while (str_idx > 0)
+ {
+ int i, ret;
+ /* Update counters. */
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
+ --str_idx;
+ cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'. */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ int prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[prev_node].type;
+
+ if (IS_EPSILON_NODE(type))
+ continue;
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (ACCEPT_MB_NODE (type))
+ naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+
+#endif /* RE_ENABLE_I18N */
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+
+ if (!naccepted
+ && check_node_accept (preg, dfa->nodes + prev_node, mctx,
+ str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ int to_idx = str_idx + naccepted;
+ if (check_dst_limits (dfa, &sctx->limits, mctx,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ret = re_node_set_insert (&cur_dest, prev_node);
+ if (BE (ret == -1, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+
+ /* Add all the nodes which satisfy the following conditions:
+ - It can epsilon transit to a node in CUR_DEST.
+ - It is in CUR_SRC.
+ And update state_log. */
+ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ err = REG_NOERROR;
+ free_return:
+ re_node_set_free (&cur_dest);
+ return err;
+}
+
+/* Helper functions. */
+
+static inline reg_errcode_t
+clean_state_log_if_need (mctx, next_state_log_idx)
+ re_match_context_t *mctx;
+ int next_state_log_idx;
+{
+ int top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input->bufs_len
+ || (next_state_log_idx >= mctx->input->valid_len
+ && mctx->input->valid_len < mctx->input->len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (top < next_state_log_idx)
+ {
+ memset (mctx->state_log + top + 1, '\0',
+ sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+ mctx->state_log_top = next_state_log_idx;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+merge_state_array (dfa, dst, src, num)
+ re_dfa_t *dfa;
+ re_dfastate_t **dst;
+ re_dfastate_t **src;
+ int num;
+{
+ int st_idx;
+ reg_errcode_t err;
+ for (st_idx = 0; st_idx < num; ++st_idx)
+ {
+ if (dst[st_idx] == NULL)
+ dst[st_idx] = src[st_idx];
+ else if (src[st_idx] != NULL)
+ {
+ re_node_set merged_set;
+ err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+ &src[st_idx]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+ re_node_set_free (&merged_set);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int str_idx;
+ re_node_set *dest_nodes;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ if (dest_nodes->nelem)
+ {
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Then, check the limitations in the current sift_context. */
+ if (dest_nodes->nelem && sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Update state_log. */
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+
+ if ((mctx->state_log[str_idx] != NULL
+ && mctx->state_log[str_idx]->has_backref))
+ {
+ err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+add_epsilon_src_nodes (dfa, dest_nodes, candidates)
+ re_dfa_t *dfa;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+{
+ reg_errcode_t err;
+ int src_idx;
+ re_node_set src_copy;
+
+ err = re_node_set_init_copy (&src_copy, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
+ {
+ err = re_node_set_add_intersect (dest_nodes, candidates,
+ dfa->inveclosures
+ + src_copy.elems[src_idx]);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&src_copy);
+ return err;
+ }
+ }
+ re_node_set_free (&src_copy);
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
+ re_dfa_t *dfa;
+ int node;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+{
+ int ecl_idx;
+ reg_errcode_t err;
+ re_node_set *inv_eclosure = dfa->inveclosures + node;
+ re_node_set except_nodes;
+ re_node_set_init_empty (&except_nodes);
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (cur_node == node)
+ continue;
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+ {
+ int edst1 = dfa->edests[cur_node].elems[0];
+ int edst2 = ((dfa->edests[cur_node].nelem > 1)
+ ? dfa->edests[cur_node].elems[1] : -1);
+ if ((!re_node_set_contains (inv_eclosure, edst1)
+ && re_node_set_contains (dest_nodes, edst1))
+ || (edst2 > 0
+ && !re_node_set_contains (inv_eclosure, edst2)
+ && re_node_set_contains (dest_nodes, edst2)))
+ {
+ err = re_node_set_add_intersect (&except_nodes, candidates,
+ dfa->inveclosures + cur_node);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&except_nodes);
+ return err;
+ }
+ }
+ }
+ }
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (!re_node_set_contains (&except_nodes, cur_node))
+ {
+ int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+ re_node_set_remove_at (dest_nodes, idx);
+ }
+ }
+ re_node_set_free (&except_nodes);
+ return REG_NOERROR;
+}
+
+static int
+check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
+ re_dfa_t *dfa;
+ re_node_set *limits;
+ re_match_context_t *mctx;
+ int dst_node, dst_idx, src_node, src_idx;
+{
+ int lim_idx, src_pos, dst_pos;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = mctx->bkref_ents + limits->elems[lim_idx];
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+
+ dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
+ dfa->eclosures + dst_node,
+ subexp_idx, dst_node, dst_idx);
+ src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
+ dfa->eclosures + src_node,
+ subexp_idx, src_node, src_idx);
+
+ /* In case of:
+ <src> <dst> ( <subexp> )
+ ( <subexp> ) <src> <dst>
+ ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
+ if (src_pos == dst_pos)
+ continue; /* This is unrelated limitation. */
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static int
+check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
+ str_idx)
+ re_dfa_t *dfa;
+ re_match_context_t *mctx;
+ re_node_set *eclosures;
+ int limit, subexp_idx, node, str_idx;
+{
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int pos = (str_idx < lim->subexp_from ? -1
+ : (lim->subexp_to < str_idx ? 1 : 0));
+ if (pos == 0
+ && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
+ {
+ int node_idx;
+ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+ {
+ int node = eclosures->elems[node_idx];
+ re_token_type_t type= dfa->nodes[node].type;
+ if (type == OP_BACK_REF)
+ {
+ int bi = search_cur_bkref_entry (mctx, str_idx);
+ for (; bi < mctx->nbkref_ents; ++bi)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
+ if (ent->str_idx > str_idx)
+ break;
+ if (ent->node == node && ent->subexp_from == ent->subexp_to)
+ {
+ int cpos, dst;
+ dst = dfa->edests[node].elems[0];
+ cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
+ dfa->eclosures + dst,
+ subexp_idx, dst,
+ str_idx);
+ if ((str_idx == lim->subexp_from && cpos == -1)
+ || (str_idx == lim->subexp_to && cpos == 0))
+ return cpos;
+ }
+ }
+ }
+ if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
+ && str_idx == lim->subexp_from)
+ {
+ pos = -1;
+ break;
+ }
+ if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
+ && str_idx == lim->subexp_to)
+ break;
+ }
+ if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
+ pos = 1;
+ }
+ return pos;
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+ which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
+ re_dfa_t *dfa;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+ re_node_set *limits;
+ struct re_backref_cache_entry *bkref_ents;
+ int str_idx;
+{
+ reg_errcode_t err;
+ int node_idx, lim_idx;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = bkref_ents + limits->elems[lim_idx];
+
+ if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+ continue; /* This is unrelated limitation. */
+
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+ if (ent->subexp_to == str_idx)
+ {
+ int ops_node = -1;
+ int cls_node = -1;
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type= dfa->nodes[node].type;
+ if (type == OP_OPEN_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ ops_node = node;
+ else if (type == OP_CLOSE_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ cls_node = node;
+ }
+
+ /* Check the limitation of the open subexpression. */
+ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
+ if (ops_node >= 0)
+ {
+ err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ /* Check the limitation of the close subexpression. */
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
+ && !re_node_set_contains (dfa->eclosures + node, cls_node))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ --node_idx;
+ }
+ }
+ }
+ else /* (ent->subexp_to != str_idx) */
+ {
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type= dfa->nodes[node].type;
+ if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+ {
+ if (subexp_idx != dfa->nodes[node].opr.idx)
+ continue;
+ if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
+ || (type == OP_OPEN_SUBEXP))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int str_idx;
+ re_node_set *dest_nodes;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ int node_idx, node;
+ re_sift_context_t local_sctx;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
+
+ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+ {
+ int cur_bkref_idx = re_string_cur_idx (mctx->input);
+ re_token_type_t type;
+ node = candidates->elems[node_idx];
+ type = dfa->nodes[node].type;
+ if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
+ continue;
+ /* Avoid infinite loop for the REs like "()\1+". */
+ if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+ continue;
+ if (type == OP_BACK_REF)
+ {
+ int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+ for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+ {
+ int disabled_idx, subexp_len, to_idx, dst_node;
+ struct re_backref_cache_entry *entry;
+ entry = mctx->bkref_ents + enabled_idx;
+ if (entry->str_idx > str_idx)
+ break;
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
+ dst_node)
+ || check_dst_limits (dfa, &sctx->limits, mctx, node,
+ str_idx, dst_node, to_idx))
+ continue;
+ {
+ re_dfastate_t *cur_state;
+ entry->flag = 0;
+ for (disabled_idx = enabled_idx + 1;
+ disabled_idx < mctx->nbkref_ents; ++disabled_idx)
+ {
+ struct re_backref_cache_entry *entry2;
+ entry2 = mctx->bkref_ents + disabled_idx;
+ if (entry2->str_idx > str_idx)
+ break;
+ entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
+ }
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits,
+ &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ err = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (err < 0, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (preg, mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+ /* We must not use the variable entry here, since
+ mctx->bkref_ents might be realloced. */
+ mctx->bkref_ents[enabled_idx].flag = 1;
+ }
+ }
+ enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+ for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+ {
+ struct re_backref_cache_entry *entry;
+ entry = mctx->bkref_ents + enabled_idx;
+ if (entry->str_idx > str_idx)
+ break;
+ if (entry->node == node)
+ entry->flag = 0;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ if (local_sctx.sifted_states != NULL)
+ {
+ re_node_set_free (&local_sctx.limits);
+ }
+
+ return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
+ const regex_t *preg;
+ const re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int node_idx, str_idx, max_str_idx;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int naccepted;
+ /* Check the node can accept `multi byte'. */
+ naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
+ if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+ !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+ dfa->nexts[node_idx]))
+ /* The node can't accept the `multi byte', or the
+ destination was already throwed away, then the node
+ could't accept the current input `multi byte'. */
+ naccepted = 0;
+ /* Otherwise, it is sure that the node could accept
+ `naccepted' bytes input. */
+ return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+\f
+/* Functions for state transition. */
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte, and update STATE_LOG if necessary.
+ If STATE can accept a multibyte char/collating element/back reference
+ update the destination of STATE_LOG. */
+
+static re_dfastate_t *
+transit_state (err, preg, mctx, state, fl_search)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_dfastate_t *state;
+ int fl_search;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ re_dfastate_t **trtable, *next_state;
+ unsigned char ch;
+ int cur_idx;
+
+ if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
+ || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
+ && mctx->input->valid_len < mctx->input->len))
+ {
+ *err = extend_buffers (mctx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
+ *err = REG_NOERROR;
+ if (state == NULL)
+ {
+ next_state = state;
+ re_string_skip_bytes (mctx->input, 1);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ /* If the current state can accept multibyte. */
+ if (state->accept_mb)
+ {
+ *err = transit_state_mb (preg, state, mctx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ /* Then decide the next state with the single byte. */
+ if (1)
+ {
+ /* Use transition table */
+ ch = re_string_fetch_byte (mctx->input);
+ trtable = fl_search ? state->trtable_search : state->trtable;
+ if (trtable == NULL)
+ {
+ trtable = build_trtable (preg, state, fl_search);
+ if (fl_search)
+ state->trtable_search = trtable;
+ else
+ state->trtable = trtable;
+ }
+ next_state = trtable[ch];
+ }
+ else
+ {
+ /* don't use transition table */
+ next_state = transit_state_sb (err, preg, state, fl_search, mctx);
+ if (BE (next_state == NULL && err != REG_NOERROR, 0))
+ return NULL;
+ }
+ }
+
+ cur_idx = re_string_cur_idx (mctx->input);
+ /* Update the state_log if we need. */
+ if (mctx->state_log != NULL)
+ {
+ if (cur_idx > mctx->state_log_top)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ mctx->state_log_top = cur_idx;
+ }
+ else if (mctx->state_log[cur_idx] == 0)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ }
+ else
+ {
+ re_dfastate_t *pstate;
+ unsigned int context;
+ re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+ /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
+ pstate = mctx->state_log[cur_idx];
+ log_nodes = pstate->entrance_nodes;
+ if (next_state != NULL)
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+ else
+ next_nodes = *log_nodes;
+ /* Note: We already add the nodes of the initial state,
+ then we don't need to add them here. */
+
+ context = re_string_context_at (mctx->input,
+ re_string_cur_idx (mctx->input) - 1,
+ mctx->eflags, preg->newline_anchor);
+ next_state = mctx->state_log[cur_idx]
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ if (table_nodes != NULL)
+ re_node_set_free (&next_nodes);
+ }
+ }
+
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
+ if (dfa->nbackref && next_state/* && fl_process_bkref */)
+ {
+ *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
+ cur_idx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
+ /* If the next state has back references. */
+ if (next_state != NULL && next_state->has_backref)
+ {
+ *err = transit_state_bkref (preg, &next_state->nodes, mctx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
+ return next_state;
+}
+
+/* Helper functions for transit_state. */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+ OP_OPEN_SUBEXP and which have corresponding back references in the regular
+ expression. And register them to use them later for evaluating the
+ correspoding back references. */
+
+static reg_errcode_t
+check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
+ re_dfa_t *dfa;
+ re_match_context_t *mctx;
+ re_node_set *cur_nodes;
+ int str_idx;
+{
+ int node_idx;
+ reg_errcode_t err;
+
+ /* TODO: This isn't efficient.
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+ {
+ int node = cur_nodes->elems[node_idx];
+ if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+ && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+ {
+ err = match_ctx_add_subtop (mctx, node, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte. */
+
+static re_dfastate_t *
+transit_state_sb (err, preg, state, fl_search, mctx)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ re_dfastate_t *state;
+ int fl_search;
+ re_match_context_t *mctx;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ re_node_set next_nodes;
+ re_dfastate_t *next_state;
+ int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
+ unsigned int context;
+
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+ {
+ int cur_node = state->nodes.elems[node_cnt];
+ if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ if (fl_search)
+ {
+#ifdef RE_ENABLE_I18N
+ int not_initial = 0;
+ if (MB_CUR_MAX > 1)
+ for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
+ if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
+ {
+ not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
+ break;
+ }
+ if (!not_initial)
+#endif
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->init_state->entrance_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
+ preg->newline_anchor);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ re_node_set_free (&next_nodes);
+ re_string_skip_bytes (mctx->input, 1);
+ return next_state;
+}
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+transit_state_mb (preg, pstate, mctx)
+ const regex_t *preg;
+ re_dfastate_t *pstate;
+ re_match_context_t *mctx;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i;
+
+ for (i = 0; i < pstate->nodes.nelem; ++i)
+ {
+ re_node_set dest_nodes, *new_nodes;
+ int cur_node_idx = pstate->nodes.elems[i];
+ int naccepted = 0, dest_idx;
+ unsigned int context;
+ re_dfastate_t *dest_state;
+
+ if (dfa->nodes[cur_node_idx].constraint)
+ {
+ context = re_string_context_at (mctx->input,
+ re_string_cur_idx (mctx->input),
+ mctx->eflags, preg->newline_anchor);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+ context))
+ continue;
+ }
+
+ /* How many bytes the node can accepts? */
+ if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+ naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
+ re_string_cur_idx (mctx->input));
+ if (naccepted == 0)
+ continue;
+
+ /* The node can accepts `naccepted' bytes. */
+ dest_idx = re_string_cur_idx (mctx->input) + naccepted;
+ mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+ : mctx->max_mb_elem_len);
+ err = clean_state_log_if_need (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+#ifdef DEBUG
+ assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+ /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
+ then we use pstate->nodes.elems[i] instead. */
+ new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
+
+ dest_state = mctx->state_log[dest_idx];
+ if (dest_state == NULL)
+ dest_nodes = *new_nodes;
+ else
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
+ preg->newline_anchor);
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (dest_state != NULL)
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+transit_state_bkref (preg, nodes, mctx)
+ const regex_t *preg;
+ re_node_set *nodes;
+ re_match_context_t *mctx;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i;
+ int cur_str_idx = re_string_cur_idx (mctx->input);
+
+ for (i = 0; i < nodes->nelem; ++i)
+ {
+ int dest_str_idx, prev_nelem, bkc_idx;
+ int node_idx = nodes->elems[i];
+ unsigned int context;
+ re_token_t *node = dfa->nodes + node_idx;
+ re_node_set *new_dest_nodes;
+
+ /* Check whether `node' is a backreference or not. */
+ if (node->type != OP_BACK_REF)
+ continue;
+
+ if (node->constraint)
+ {
+ context = re_string_context_at (mctx->input, cur_str_idx,
+ mctx->eflags, preg->newline_anchor);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ continue;
+ }
+
+ /* `node' is a backreference.
+ Check the substring which the substring matched. */
+ bkc_idx = mctx->nbkref_ents;
+ err = get_subexp (preg, mctx, node_idx, cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* And add the epsilon closures (which is `new_dest_nodes') of
+ the backreference to appropriate state_log. */
+#ifdef DEBUG
+ assert (dfa->nexts[node_idx] != -1);
+#endif
+ for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+ {
+ int subexp_len;
+ re_dfastate_t *dest_state;
+ struct re_backref_cache_entry *bkref_ent;
+ bkref_ent = mctx->bkref_ents + bkc_idx;
+ if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+ continue;
+ subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+ : dfa->eclosures + dfa->nexts[node_idx]);
+ dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+ - bkref_ent->subexp_from);
+ context = re_string_context_at (mctx->input, dest_str_idx - 1,
+ mctx->eflags, preg->newline_anchor);
+ dest_state = mctx->state_log[dest_str_idx];
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
+ /* Add `new_dest_node' to state_log. */
+ if (dest_state == NULL)
+ {
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes,
+ context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ re_node_set dest_nodes;
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&dest_nodes);
+ goto free_return;
+ }
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* We need to check recursively if the backreference can epsilon
+ transit. */
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ {
+ err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
+ cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ err = transit_state_bkref (preg, new_dest_nodes, mctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+ at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+ Note that we might collect inappropriate candidates here.
+ However, the cost of checking them strictly here is too high, then we
+ delay these checking for prune_impossible_nodes(). */
+
+static reg_errcode_t
+get_subexp (preg, mctx, bkref_node, bkref_str_idx)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ int bkref_node, bkref_str_idx;
+{
+ int subexp_num, sub_top_idx;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ char *buf = (char *) re_string_get_buffer (mctx->input);
+ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
+ int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+ for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ {
+ struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
+ if (entry->str_idx > bkref_str_idx)
+ break;
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ }
+ subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
+
+ /* For each sub expression */
+ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+ {
+ reg_errcode_t err;
+ re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+ re_sub_match_last_t *sub_last;
+ int sub_last_idx, sl_str;
+ char *bkref_str;
+
+ if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+ continue; /* It isn't related. */
+
+ sl_str = sub_top->str_idx;
+ bkref_str = buf + bkref_str_idx;
+ /* At first, check the last node of sub expressions we already
+ evaluated. */
+ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+ {
+ int sl_str_diff;
+ sub_last = sub_top->lasts[sub_last_idx];
+ sl_str_diff = sub_last->str_idx - sl_str;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_diff > 0
+ && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
+ break; /* We don't need to search this sub expression any more. */
+ bkref_str += sl_str_diff;
+ sl_str += sl_str_diff;
+ err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ if (sub_last_idx < sub_top->nlasts)
+ continue;
+ if (sub_last_idx > 0)
+ ++sl_str;
+ /* Then, search for the other last nodes of the sub expression. */
+ for (; sl_str <= bkref_str_idx; ++sl_str)
+ {
+ int cls_node, sl_str_off;
+ re_node_set *nodes;
+ sl_str_off = sl_str - sub_top->str_idx;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_off > 0
+ && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
+ break; /* We don't need to search this sub expression any more. */
+ if (mctx->state_log[sl_str] == NULL)
+ continue;
+ /* Does this state have a ')' of the sub expression? */
+ nodes = &mctx->state_log[sl_str]->nodes;
+ cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
+ if (cls_node == -1)
+ continue; /* No. */
+ if (sub_top->path == NULL)
+ {
+ sub_top->path = calloc (sizeof (state_array_t),
+ sl_str - sub_top->str_idx + 1);
+ if (sub_top->path == NULL)
+ return REG_ESPACE;
+ }
+ /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+ in the current context? */
+ err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
+ sub_top->str_idx, cls_node, sl_str, 0);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+ if (BE (sub_last == NULL, 0))
+ return REG_ESPACE;
+ err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp(). */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+ If it can arrive, register the sub expression expressed with SUB_TOP
+ and SUB_LAST. */
+
+static reg_errcode_t
+get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_sub_match_top_t *sub_top;
+ re_sub_match_last_t *sub_last;
+ int bkref_node, bkref_str;
+{
+ reg_errcode_t err;
+ int to_idx;
+ /* Can the subexpression arrive the back reference? */
+ err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
+ sub_last->str_idx, bkref_node, bkref_str, 1);
+ if (err != REG_NOERROR)
+ return err;
+ err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+ sub_last->str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+ clean_state_log_if_need (mctx, to_idx);
+ return REG_NOERROR;
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+ Search '(' if FL_OPEN, or search ')' otherwise.
+ TODO: This function isn't efficient...
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+
+static int
+find_subexp_node (dfa, nodes, subexp_idx, fl_open)
+ re_dfa_t *dfa;
+ re_node_set *nodes;
+ int subexp_idx, fl_open;
+{
+ int cls_idx;
+ for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+ {
+ int cls_node = nodes->elems[cls_idx];
+ re_token_t *node = dfa->nodes + cls_node;
+ if (((fl_open && node->type == OP_OPEN_SUBEXP)
+ || (!fl_open && node->type == OP_CLOSE_SUBEXP))
+ && node->opr.idx == subexp_idx)
+ return cls_node;
+ }
+ return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+ LAST_NODE at LAST_STR. We record the path onto PATH since it will be
+ heavily reused.
+ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+
+static reg_errcode_t
+check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
+ fl_open)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ state_array_t *path;
+ int top_node, top_str, last_node, last_str, fl_open;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ reg_errcode_t err;
+ int subexp_num, backup_cur_idx, str_idx, null_cnt;
+ re_dfastate_t *cur_state = NULL;
+ re_node_set *cur_nodes, next_nodes;
+ re_dfastate_t **backup_state_log;
+ unsigned int context;
+
+ subexp_num = dfa->nodes[top_node].opr.idx;
+ /* Extend the buffer if we need. */
+ if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
+ {
+ re_dfastate_t **new_array;
+ int old_alloc = path->alloc;
+ path->alloc += last_str + mctx->max_mb_elem_len + 1;
+ new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+ if (new_array == NULL)
+ return REG_ESPACE;
+ path->array = new_array;
+ memset (new_array + old_alloc, '\0',
+ sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+ }
+
+ str_idx = path->next_idx == 0 ? top_str : path->next_idx;
+
+ /* Temporary modify MCTX. */
+ backup_state_log = mctx->state_log;
+ backup_cur_idx = mctx->input->cur_idx;
+ mctx->state_log = path->array;
+ mctx->input->cur_idx = str_idx;
+
+ /* Setup initial node set. */
+ context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
+ preg->newline_anchor);
+ if (str_idx == top_str)
+ {
+ err = re_node_set_init_1 (&next_nodes, top_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ cur_state = mctx->state_log[str_idx];
+ if (cur_state && cur_state->has_backref)
+ {
+ err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+ if (BE ( err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ re_node_set_init_empty (&next_nodes);
+ }
+ if (str_idx == top_str || (cur_state && cur_state->has_backref))
+ {
+ if (next_nodes.nelem)
+ {
+ err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
+ subexp_num, fl_open);
+ if (BE ( err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ }
+
+ for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+ {
+ re_node_set_empty (&next_nodes);
+ if (mctx->state_log[str_idx + 1])
+ {
+ err = re_node_set_merge (&next_nodes,
+ &mctx->state_log[str_idx + 1]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ if (cur_state)
+ {
+ err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
+ &cur_state->nodes, &next_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ ++str_idx;
+ if (next_nodes.nelem)
+ {
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
+ fl_open);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
+ subexp_num, fl_open);
+ if (BE ( err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
+ preg->newline_anchor);
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+ }
+ re_node_set_free (&next_nodes);
+ cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+ : &mctx->state_log[last_str]->nodes);
+ path->next_idx = str_idx;
+
+ /* Fix MCTX. */
+ mctx->state_log = backup_state_log;
+ mctx->input->cur_idx = backup_cur_idx;
+
+ if (cur_nodes == NULL)
+ return REG_NOMATCH;
+ /* Then check the current node set has the node LAST_NODE. */
+ return (re_node_set_contains (cur_nodes, last_node)
+ || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
+ : REG_NOMATCH);
+}
+
+/* Helper functions for check_arrival. */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+ to NEXT_NODES.
+ TODO: This function is similar to the functions transit_state*(),
+ however this function has many additional works.
+ Can't we unify them? */
+
+static reg_errcode_t
+check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
+ const regex_t *preg;
+ re_dfa_t *dfa;
+ re_match_context_t *mctx;
+ int str_idx;
+ re_node_set *cur_nodes, *next_nodes;
+{
+ int cur_idx;
+ reg_errcode_t err;
+ re_node_set union_set;
+ re_node_set_init_empty (&union_set);
+ for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+ {
+ int naccepted = 0;
+ int cur_node = cur_nodes->elems[cur_idx];
+ re_token_type_t type = dfa->nodes[cur_node].type;
+ if (IS_EPSILON_NODE(type))
+ continue;
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (ACCEPT_MB_NODE (type))
+ {
+ naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
+ str_idx);
+ if (naccepted > 1)
+ {
+ re_dfastate_t *dest_state;
+ int next_node = dfa->nexts[cur_node];
+ int next_idx = str_idx + naccepted;
+ dest_state = mctx->state_log[next_idx];
+ re_node_set_empty (&union_set);
+ if (dest_state)
+ {
+ err = re_node_set_merge (&union_set, &dest_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ err = re_node_set_insert (&union_set, next_node);
+ if (BE (err < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ else
+ {
+ err = re_node_set_insert (&union_set, next_node);
+ if (BE (err < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+ &union_set);
+ if (BE (mctx->state_log[next_idx] == NULL
+ && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (naccepted
+ || check_node_accept (preg, dfa->nodes + cur_node, mctx,
+ str_idx))
+ {
+ err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (err < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ }
+ re_node_set_free (&union_set);
+ return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+ CUR_NODES, however exclude the nodes which are:
+ - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+ - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
+ re_dfa_t *dfa;
+ re_node_set *cur_nodes;
+ int ex_subexp, fl_open;
+{
+ reg_errcode_t err;
+ int idx, outside_node;
+ re_node_set new_nodes;
+#ifdef DEBUG
+ assert (cur_nodes->nelem);
+#endif
+ err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ /* Create a new node set NEW_NODES with the nodes which are epsilon
+ closures of the node in CUR_NODES. */
+
+ for (idx = 0; idx < cur_nodes->nelem; ++idx)
+ {
+ int cur_node = cur_nodes->elems[idx];
+ re_node_set *eclosure = dfa->eclosures + cur_node;
+ outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
+ if (outside_node == -1)
+ {
+ /* There are no problematic nodes, just merge them. */
+ err = re_node_set_merge (&new_nodes, eclosure);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ /* There are problematic nodes, re-calculate incrementally. */
+ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+ ex_subexp, fl_open);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ }
+ re_node_set_free (cur_nodes);
+ *cur_nodes = new_nodes;
+ return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+ Check incrementally the epsilon closure of TARGET, and if it isn't
+ problematic append it to DST_NODES. */
+
+static reg_errcode_t
+check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
+ re_dfa_t *dfa;
+ int target, ex_subexp, fl_open;
+ re_node_set *dst_nodes;
+{
+ int cur_node, type;
+ for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+ {
+ int err;
+ type = dfa->nodes[cur_node].type;
+
+ if (((type == OP_OPEN_SUBEXP && fl_open)
+ || (type == OP_CLOSE_SUBEXP && !fl_open))
+ && dfa->nodes[cur_node].opr.idx == ex_subexp)
+ {
+ if (!fl_open)
+ {
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ }
+ break;
+ }
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ if (dfa->edests[cur_node].nelem == 0)
+ break;
+ if (dfa->edests[cur_node].nelem == 2)
+ {
+ err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+ dfa->edests[cur_node].elems[1],
+ ex_subexp, fl_open);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ cur_node = dfa->edests[cur_node].elems[0];
+ }
+ return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+ destination of the back references by the appropriate entry
+ in MCTX->BKREF_ENTS. */
+
+static reg_errcode_t
+expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
+ fl_open)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ int cur_str, last_str, subexp_num, fl_open;
+ re_node_set *cur_nodes;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int cache_idx, cache_idx_start;
+ /* The current state. */
+
+ cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ {
+ int to_idx, next_node;
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
+ if (ent->str_idx > cur_str)
+ break;
+ /* Is this entry ENT is appropriate? */
+ if (!re_node_set_contains (cur_nodes, ent->node))
+ continue; /* No. */
+
+ to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+ /* Calculate the destination of the back reference, and append it
+ to MCTX->STATE_LOG. */
+ if (to_idx == cur_str)
+ {
+ /* The backreference did epsilon transit, we must re-check all the
+ node in the current state. */
+ re_node_set new_dests;
+ reg_errcode_t err2, err3;
+ next_node = dfa->edests[ent->node].elems[0];
+ if (re_node_set_contains (cur_nodes, next_node))
+ continue;
+ err = re_node_set_init_1 (&new_dests, next_node);
+ err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
+ fl_open);
+ err3 = re_node_set_merge (cur_nodes, &new_dests);
+ re_node_set_free (&new_dests);
+ if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+ || err3 != REG_NOERROR, 0))
+ {
+ err = (err != REG_NOERROR ? err
+ : (err2 != REG_NOERROR ? err2 : err3));
+ return err;
+ }
+ /* TODO: It is still inefficient... */
+ cache_idx = cache_idx_start - 1;
+ continue;
+ }
+ else
+ {
+ re_node_set union_set;
+ next_node = dfa->nexts[ent->node];
+ if (mctx->state_log[to_idx])
+ {
+ int ret;
+ if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+ next_node))
+ continue;
+ err = re_node_set_init_copy (&union_set,
+ &mctx->state_log[to_idx]->nodes);
+ ret = re_node_set_insert (&union_set, next_node);
+ if (BE (err != REG_NOERROR || ret < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ err = err != REG_NOERROR ? err : REG_ESPACE;
+ return err;
+ }
+ }
+ else
+ {
+ err = re_node_set_init_1 (&union_set, next_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+ re_node_set_free (&union_set);
+ if (BE (mctx->state_log[to_idx] == NULL
+ && err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+ Return the new table if succeeded, otherwise return NULL. */
+
+static re_dfastate_t **
+build_trtable (preg, state, fl_search)
+ const regex_t *preg;
+ const re_dfastate_t *state;
+ int fl_search;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i, j, k, ch;
+ int dests_node_malloced = 0, dest_states_malloced = 0;
+ int ndests; /* Number of the destination states from `state'. */
+ re_dfastate_t **trtable;
+ re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+ re_node_set follows, *dests_node;
+ bitset *dests_ch;
+ bitset acceptable;
+
+ /* We build DFA states which corresponds to the destination nodes
+ from `state'. `dests_node[i]' represents the nodes which i-th
+ destination state contains, and `dests_ch[i]' represents the
+ characters which i-th destination state accepts. */
+#ifdef _LIBC
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
+ dests_node = (re_node_set *)
+ alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ else
+#endif
+ {
+ dests_node = (re_node_set *)
+ malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ if (BE (dests_node == NULL, 0))
+ return NULL;
+ dests_node_malloced = 1;
+ }
+ dests_ch = (bitset *) (dests_node + SBC_MAX);
+
+ /* Initialize transiton table. */
+ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ {
+ if (dests_node_malloced)
+ free (dests_node);
+ return NULL;
+ }
+
+ /* At first, group all nodes belonging to `state' into several
+ destinations. */
+ ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
+ if (BE (ndests <= 0, 0))
+ {
+ if (dests_node_malloced)
+ free (dests_node);
+ /* Return NULL in case of an error, trtable otherwise. */
+ if (ndests == 0)
+ return trtable;
+ free (trtable);
+ return NULL;
+ }
+
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+
+#ifdef _LIBC
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+ + ndests * 3 * sizeof (re_dfastate_t *)))
+ dest_states = (re_dfastate_t **)
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ else
+#endif
+ {
+ dest_states = (re_dfastate_t **)
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ if (BE (dest_states == NULL, 0))
+ {
+out_free:
+ if (dest_states_malloced)
+ free (dest_states);
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ free (trtable);
+ if (dests_node_malloced)
+ free (dests_node);
+ return NULL;
+ }
+ dest_states_malloced = 1;
+ }
+ dest_states_word = dest_states + ndests;
+ dest_states_nl = dest_states_word + ndests;
+ bitset_empty (acceptable);
+
+ /* Then build the states for all destinations. */
+ for (i = 0; i < ndests; ++i)
+ {
+ int next_node;
+ re_node_set_empty (&follows);
+ /* Merge the follows of this destination states. */
+ for (j = 0; j < dests_node[i].nelem; ++j)
+ {
+ next_node = dfa->nexts[dests_node[i].elems[j]];
+ if (next_node != -1)
+ {
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ /* If search flag is set, merge the initial state. */
+ if (fl_search)
+ {
+#ifdef RE_ENABLE_I18N
+ int not_initial = 0;
+ for (j = 0; j < follows.nelem; ++j)
+ if (dfa->nodes[follows.elems[j]].type == CHARACTER)
+ {
+ not_initial = dfa->nodes[follows.elems[j]].mb_partial;
+ break;
+ }
+ if (!not_initial)
+#endif
+ {
+ err = re_node_set_merge (&follows,
+ dfa->init_state->entrance_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ /* If the new state has context constraint,
+ build appropriate states for these contexts. */
+ if (dest_states[i]->has_constraint)
+ {
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_WORD);
+ if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_NEWLINE);
+ if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ else
+ {
+ dest_states_word[i] = dest_states[i];
+ dest_states_nl[i] = dest_states[i];
+ }
+ bitset_merge (acceptable, dests_ch[i]);
+ }
+
+ /* Update the transition table. */
+ /* For all characters ch...: */
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if ((acceptable[i] >> j) & 1)
+ {
+ /* The current state accepts the character ch. */
+ if (IS_WORD_CHAR (ch))
+ {
+ for (k = 0; k < ndests; ++k)
+ if ((dests_ch[k][i] >> j) & 1)
+ {
+ /* k-th destination accepts the word character ch. */
+ trtable[ch] = dest_states_word[k];
+ /* There must be only one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+ else /* not WORD_CHAR */
+ {
+ for (k = 0; k < ndests; ++k)
+ if ((dests_ch[k][i] >> j) & 1)
+ {
+ /* k-th destination accepts the non-word character ch. */
+ trtable[ch] = dest_states[k];
+ /* There must be only one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+ }
+ /* new line */
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (k = 0; k < ndests; ++k)
+ if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[k];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+
+ if (dest_states_malloced)
+ free (dest_states);
+
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+
+ if (dests_node_malloced)
+ free (dests_node);
+
+ return trtable;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+ Then for all destinations, set the nodes belonging to the destination
+ to DESTS_NODE[i] and set the characters accepted by the destination
+ to DEST_CH[i]. This function return the number of destinations. */
+
+static int
+group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
+ const regex_t *preg;
+ const re_dfastate_t *state;
+ re_node_set *dests_node;
+ bitset *dests_ch;
+{
+ reg_errcode_t err;
+ const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i, j, k;
+ int ndests; /* Number of the destinations from `state'. */
+ bitset accepts; /* Characters a node can accept. */
+ const re_node_set *cur_nodes = &state->nodes;
+ bitset_empty (accepts);
+ ndests = 0;
+
+ /* For all the nodes belonging to `state', */
+ for (i = 0; i < cur_nodes->nelem; ++i)
+ {
+ re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ /* Enumerate all single byte character this node can accept. */
+ if (type == CHARACTER)
+ bitset_set (accepts, node->opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ bitset_merge (accepts, node->opr.sbcset);
+ }
+ else if (type == OP_PERIOD)
+ {
+ bitset_set_all (accepts);
+ if (!(preg->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (preg->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+ else
+ continue;
+
+ /* Check the `accepts' and sift the characters which are not
+ match it the context. */
+ if (constraint)
+ {
+ if (constraint & NEXT_WORD_CONSTRAINT)
+ for (j = 0; j < BITSET_UINTS; ++j)
+ accepts[j] &= dfa->word_char[j];
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ for (j = 0; j < BITSET_UINTS; ++j)
+ accepts[j] &= ~dfa->word_char[j];
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ {
+ int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bitset_empty (accepts);
+ if (accepts_newline)
+ bitset_set (accepts, NEWLINE_CHAR);
+ else
+ continue;
+ }
+ }
+
+ /* Then divide `accepts' into DFA states, or create a new
+ state. */
+ for (j = 0; j < ndests; ++j)
+ {
+ bitset intersec; /* Intersection sets, see below. */
+ bitset remains;
+ /* Flags, see below. */
+ int has_intersec, not_subset, not_consumed;
+
+ /* Optimization, skip if this state doesn't accept the character. */
+ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+ continue;
+
+ /* Enumerate the intersection set of this state and `accepts'. */
+ has_intersec = 0;
+ for (k = 0; k < BITSET_UINTS; ++k)
+ has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+ /* And skip if the intersection set is empty. */
+ if (!has_intersec)
+ continue;
+
+ /* Then check if this state is a subset of `accepts'. */
+ not_subset = not_consumed = 0;
+ for (k = 0; k < BITSET_UINTS; ++k)
+ {
+ not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+ not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+ }
+
+ /* If this state isn't a subset of `accepts', create a
+ new group state, which has the `remains'. */
+ if (not_subset)
+ {
+ bitset_copy (dests_ch[ndests], remains);
+ bitset_copy (dests_ch[j], intersec);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ }
+
+ /* Put the position in the current group. */
+ err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (err < 0, 0))
+ goto error_return;
+
+ /* If all characters are consumed, go to next node. */
+ if (!not_consumed)
+ break;
+ }
+ /* Some characters remain, create a new group. */
+ if (j == ndests)
+ {
+ bitset_copy (dests_ch[ndests], accepts);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ bitset_empty (accepts);
+ }
+ }
+ return ndests;
+ error_return:
+ for (j = 0; j < ndests; ++j)
+ re_node_set_free (dests_node + j);
+ return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+ Return the number of the bytes the node accepts.
+ STR_IDX is the current index of the input string.
+
+ This function handles the nodes which can accept one character, or
+ one collating element like '.', '[a-z]', opposite to the other nodes
+ can only accept one byte. */
+
+static int
+check_node_accept_bytes (preg, node_idx, input, str_idx)
+ const regex_t *preg;
+ int node_idx, str_idx;
+ const re_string_t *input;
+{
+ const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ const re_token_t *node = dfa->nodes + node_idx;
+ int elem_len = re_string_elem_size_at (input, str_idx);
+ int char_len = re_string_char_size_at (input, str_idx);
+ int i;
+# ifdef _LIBC
+ int j;
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif /* _LIBC */
+ if (elem_len <= 1 && char_len <= 1)
+ return 0;
+ if (node->type == OP_PERIOD)
+ {
+ /* '.' accepts any one character except the following two cases. */
+ if ((!(preg->syntax & RE_DOT_NEWLINE) &&
+ re_string_byte_at (input, str_idx) == '\n') ||
+ ((preg->syntax & RE_DOT_NOT_NULL) &&
+ re_string_byte_at (input, str_idx) == '\0'))
+ return 0;
+ return char_len;
+ }
+ else if (node->type == COMPLEX_BRACKET)
+ {
+ const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+ const unsigned char *pin = ((char *) re_string_get_buffer (input)
+ + str_idx);
+# endif /* _LIBC */
+ int match_len = 0;
+ wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+ ? re_string_wchar_at (input, str_idx) : 0);
+
+ /* match with multibyte character? */
+ for (i = 0; i < cset->nmbchars; ++i)
+ if (wc == cset->mbchars[i])
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ /* match with character_class? */
+ for (i = 0; i < cset->nchar_classes; ++i)
+ {
+ wctype_t wt = cset->char_classes[i];
+ if (__iswctype (wc, wt))
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+# ifdef _LIBC
+ if (nrules != 0)
+ {
+ unsigned int in_collseq = 0;
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
+ int32_t idx;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ /* match with collating_symbol? */
+ if (cset->ncoll_syms)
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ for (i = 0; i < cset->ncoll_syms; ++i)
+ {
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
+ /* Compare the length of input collating element and
+ the length of current collating element. */
+ if (*coll_sym != elem_len)
+ continue;
+ /* Compare each bytes. */
+ for (j = 0; j < *coll_sym; j++)
+ if (pin[j] != coll_sym[1 + j])
+ break;
+ if (j == *coll_sym)
+ {
+ /* Match if every bytes is equal. */
+ match_len = j;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+ if (cset->nranges)
+ {
+ if (elem_len <= char_len)
+ {
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ in_collseq = collseq_table_lookup (collseqwc, wc);
+ }
+ else
+ in_collseq = find_collation_sequence_value (pin, elem_len);
+ }
+ /* match with range expression? */
+ for (i = 0; i < cset->nranges; ++i)
+ if (cset->range_starts[i] <= in_collseq
+ && in_collseq <= cset->range_ends[i])
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+
+ /* match with equivalence_class? */
+ if (cset->nequiv_classes)
+ {
+ const unsigned char *cp = pin;
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+ idx = findidx (&cp);
+ if (idx > 0)
+ for (i = 0; i < cset->nequiv_classes; ++i)
+ {
+ int32_t equiv_class_idx = cset->equiv_classes[i];
+ size_t weight_len = weights[idx];
+ if (weight_len == weights[equiv_class_idx])
+ {
+ int cnt = 0;
+ while (cnt <= weight_len
+ && (weights[equiv_class_idx + 1 + cnt]
+ == weights[idx + 1 + cnt]))
+ ++cnt;
+ if (cnt > weight_len)
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ }
+ }
+ else
+# endif /* _LIBC */
+ {
+ /* match with range expression? */
+#if __GNUC__ >= 2
+ wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+ wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+ cmp_buf[2] = wc;
+#endif
+ for (i = 0; i < cset->nranges; ++i)
+ {
+ cmp_buf[0] = cset->range_starts[i];
+ cmp_buf[4] = cset->range_ends[i];
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ check_node_accept_bytes_match:
+ if (!cset->non_match)
+ return match_len;
+ else
+ {
+ if (match_len > 0)
+ return 0;
+ else
+ return (elem_len > char_len) ? elem_len : char_len;
+ }
+ }
+ return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+find_collation_sequence_value (mbs, mbs_len)
+ const unsigned char *mbs;
+ size_t mbs_len;
+{
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules == 0)
+ {
+ if (mbs_len == 1)
+ {
+ /* No valid character. Match it as a single byte character. */
+ const unsigned char *collseq = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ return collseq[mbs[0]];
+ }
+ return UINT_MAX;
+ }
+ else
+ {
+ int32_t idx;
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+
+ for (idx = 0; ;)
+ {
+ int mbs_cnt, found = 0;
+ int32_t elem_mbs_len;
+ /* Skip the name of collating element name. */
+ idx = idx + extra[idx] + 1;
+ elem_mbs_len = extra[idx++];
+ if (mbs_len == elem_mbs_len)
+ {
+ for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+ if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+ break;
+ if (mbs_cnt == elem_mbs_len)
+ /* Found the entry. */
+ found = 1;
+ }
+ /* Skip the byte sequence of the collating element. */
+ idx += elem_mbs_len;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ /* Skip the wide char sequence of the collating element. */
+ idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ /* If we found the entry, return the sequence value. */
+ if (found)
+ return *(uint32_t *) (extra + idx);
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ }
+ }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+ byte of the INPUT. */
+
+static int
+check_node_accept (preg, node, mctx, idx)
+ const regex_t *preg;
+ const re_token_t *node;
+ const re_match_context_t *mctx;
+ int idx;
+{
+ unsigned char ch;
+ if (node->constraint)
+ {
+ /* The node has constraints. Check whether the current context
+ satisfies the constraints. */
+ unsigned int context = re_string_context_at (mctx->input, idx,
+ mctx->eflags,
+ preg->newline_anchor);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ return 0;
+ }
+ ch = re_string_byte_at (mctx->input, idx);
+ if (node->type == CHARACTER)
+ return node->opr.c == ch;
+ else if (node->type == SIMPLE_BRACKET)
+ return bitset_contain (node->opr.sbcset, ch);
+ else if (node->type == OP_PERIOD)
+ return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
+ || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
+ else
+ return 0;
+}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+extend_buffers (mctx)
+ re_match_context_t *mctx;
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = mctx->input;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ re_dfastate_t **new_array;
+ new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len * 2);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->state_log = new_array;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_upper_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ pstr->valid_len = pstr->bufs_len;
+ }
+ }
+ return REG_NOERROR;
+}
+
+\f
+/* Functions for matching context. */
+
+/* Initialize MCTX. */
+
+static reg_errcode_t
+match_ctx_init (mctx, eflags, input, n)
+ re_match_context_t *mctx;
+ int eflags, n;
+ re_string_t *input;
+{
+ mctx->eflags = eflags;
+ mctx->input = input;
+ mctx->match_last = -1;
+ if (n > 0)
+ {
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+ if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ mctx->bkref_ents = NULL;
+ mctx->nbkref_ents = 0;
+ mctx->abkref_ents = n;
+ mctx->max_mb_elem_len = 1;
+ mctx->nsub_tops = 0;
+ mctx->asub_tops = n;
+ return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+ This function must be invoked when the matcher changes the start index
+ of the input, or changes the input string. */
+
+static void
+match_ctx_clean (mctx)
+ re_match_context_t *mctx;
+{
+ match_ctx_free_subtops (mctx);
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX. */
+
+static void
+match_ctx_free (mctx)
+ re_match_context_t *mctx;
+{
+ match_ctx_free_subtops (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+/* Free all the memory associated with MCTX->SUB_TOPS. */
+
+static void
+match_ctx_free_subtops (mctx)
+ re_match_context_t *mctx;
+{
+ int st_idx;
+ for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+ {
+ int sl_idx;
+ re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+ for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+ {
+ re_sub_match_last_t *last = top->lasts[sl_idx];
+ re_free (last->path.array);
+ re_free (last);
+ }
+ re_free (top->lasts);
+ if (top->path)
+ {
+ re_free (top->path->array);
+ re_free (top->path);
+ }
+ free (top);
+ }
+}
+
+/* Add a new backreference entry to MCTX.
+ Note that we assume that caller never call this function with duplicate
+ entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+match_ctx_add_entry (mctx, node, str_idx, from, to)
+ re_match_context_t *mctx;
+ int node, str_idx, from, to;
+{
+ if (mctx->nbkref_ents >= mctx->abkref_ents)
+ {
+ struct re_backref_cache_entry* new_entry;
+ new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+ mctx->abkref_ents * 2);
+ if (BE (new_entry == NULL, 0))
+ {
+ re_free (mctx->bkref_ents);
+ return REG_ESPACE;
+ }
+ mctx->bkref_ents = new_entry;
+ memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+ sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+ mctx->abkref_ents *= 2;
+ }
+ mctx->bkref_ents[mctx->nbkref_ents].node = node;
+ mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+ mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
+ if (mctx->max_mb_elem_len < to - from)
+ mctx->max_mb_elem_len = to - from;
+ return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx.
+ Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+
+static int
+search_cur_bkref_entry (mctx, str_idx)
+ re_match_context_t *mctx;
+ int str_idx;
+{
+ int left, right, mid;
+ right = mctx->nbkref_ents;
+ for (left = 0; left < right;)
+ {
+ mid = (left + right) / 2;
+ if (mctx->bkref_ents[mid].str_idx < str_idx)
+ left = mid + 1;
+ else
+ right = mid;
+ }
+ return left;
+}
+
+static void
+match_ctx_clear_flag (mctx)
+ re_match_context_t *mctx;
+{
+ int i;
+ for (i = 0; i < mctx->nbkref_ents; ++i)
+ {
+ mctx->bkref_ents[i].flag = 0;
+ }
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+ at STR_IDX. */
+
+static reg_errcode_t
+match_ctx_add_subtop (mctx, node, str_idx)
+ re_match_context_t *mctx;
+ int node, str_idx;
+{
+#ifdef DEBUG
+ assert (mctx->sub_tops != NULL);
+ assert (mctx->asub_tops > 0);
+#endif
+ if (mctx->nsub_tops == mctx->asub_tops)
+ {
+ re_sub_match_top_t **new_array;
+ mctx->asub_tops *= 2;
+ new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
+ mctx->asub_tops);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops = new_array;
+ }
+ mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+ if (mctx->sub_tops[mctx->nsub_tops] == NULL)
+ return REG_ESPACE;
+ mctx->sub_tops[mctx->nsub_tops]->node = node;
+ mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+ return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+
+static re_sub_match_last_t *
+match_ctx_add_sublast (subtop, node, str_idx)
+ re_sub_match_top_t *subtop;
+ int node, str_idx;
+{
+ re_sub_match_last_t *new_entry;
+ if (subtop->nlasts == subtop->alasts)
+ {
+ re_sub_match_last_t **new_array;
+ subtop->alasts = 2 * subtop->alasts + 1;
+ new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
+ subtop->alasts);
+ if (BE (new_array == NULL, 0))
+ return NULL;
+ subtop->lasts = new_array;
+ }
+ new_entry = calloc (1, sizeof (re_sub_match_last_t));
+ if (BE (new_entry == NULL, 0))
+ return NULL;
+ subtop->lasts[subtop->nlasts] = new_entry;
+ new_entry->node = node;
+ new_entry->str_idx = str_idx;
+ ++subtop->nlasts;
+ return new_entry;
+}
+
+static void
+sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
+ check_subexp)
+ re_sift_context_t *sctx;
+ re_dfastate_t **sifted_sts, **limited_sts;
+ int last_node, last_str_idx, check_subexp;
+{
+ sctx->sifted_states = sifted_sts;
+ sctx->limited_states = limited_sts;
+ sctx->last_node = last_node;
+ sctx->last_str_idx = last_str_idx;
+ sctx->check_subexp = check_subexp;
+ sctx->cur_bkref = -1;
+ sctx->cls_subexp_idx = -1;
+ re_node_set_init_empty (&sctx->limits);
+}
--- /dev/null
+/*
+ * UCW Library -- Running of Commands
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+void NONRET
+exec_command_v(const char *cmd, va_list args)
+{
+ va_list cargs;
+ va_copy(cargs, args);
+ int cnt = 2;
+ char *arg;
+ while (arg = va_arg(cargs, char *))
+ cnt++;
+ va_end(cargs);
+ char **argv = alloca(sizeof(char *) * cnt);
+ argv[0] = (char *)cmd;
+ cnt = 1;
+ va_copy(cargs, args);
+ while (arg = va_arg(cargs, char *))
+ argv[cnt++] = arg;
+ va_end(cargs);
+ argv[cnt] = NULL;
+ execv(cmd, argv);
+ char echo[256];
+ echo_command_v(echo, sizeof(echo), cmd, args);
+ msg(L_ERROR, "Cannot execute %s: %m", echo);
+ exit(255);
+}
+
+int
+run_command_v(const char *cmd, va_list args)
+{
+ pid_t p = fork();
+ if (p < 0)
+ {
+ msg(L_ERROR, "fork() failed: %m");
+ return 0;
+ }
+ else if (!p)
+ exec_command_v(cmd, args);
+ else
+ {
+ int stat;
+ char status_msg[EXIT_STATUS_MSG_SIZE];
+ p = waitpid(p, &stat, 0);
+ if (p < 0)
+ die("waitpid() failed: %m");
+ if (format_exit_status(status_msg, stat))
+ {
+ char echo[256];
+ echo_command_v(echo, sizeof(echo), cmd, args);
+ msg(L_ERROR, "`%s' failed: %s", echo, status_msg);
+ return 0;
+ }
+ return 1;
+ }
+}
+
+void
+echo_command_v(char *buf, int size, const char *cmd, va_list args)
+{
+ char *limit = buf + size - 4;
+ char *p = buf;
+ const char *arg = cmd;
+ do
+ {
+ int l = strlen(arg);
+ if (p != buf && p < limit)
+ *p++ = ' ';
+ if (p+l > limit)
+ {
+ memcpy(p, arg, limit-p);
+ strcpy(limit, "...");
+ return;
+ }
+ memcpy(p, arg, l);
+ p += l;
+ }
+ while (arg = va_arg(args, char *));
+ *p = 0;
+}
+
+int
+run_command(const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ int e = run_command_v(cmd, args);
+ va_end(args);
+ return e;
+}
+
+void NONRET
+exec_command(const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ exec_command_v(cmd, args);
+}
+
+void
+echo_command(char *buf, int len, const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ echo_command_v(buf, len, cmd, args);
+ va_end(args);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ char msg[1024];
+ echo_command(msg, sizeof(msg), "/bin/echo", "datel", "strakapoud", NULL);
+ log(L_INFO, "Running <%s>", msg);
+ run_command("/bin/echo", "datel", "strakapoud", NULL);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- POSIX semaphores wrapper
+ *
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SEMAPHORE_H
+#define _UCW_SEMAPHORE_H
+
+#include <semaphore.h>
+
+#ifdef CONFIG_DARWIN
+
+#include <unistd.h>
+#include <stdio.h>
+
+/* In Darwin, sem_init() is unfortunately not implemented and the guide
+ * recommends emulating it using sem_open(). */
+
+static inline sem_t *
+sem_alloc(void)
+{
+ static uns cnt = 0;
+ char buf[20];
+ sprintf(buf, "tmp/sem-%d-%d", getpid(), cnt++);
+ sem_t *sem = sem_open(buf, O_CREAT, 0777, 0);
+ ASSERT(sem != (sem_t*) SEM_FAILED);
+ return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+ sem_close(sem);
+}
+
+#else
+
+static inline sem_t *
+sem_alloc(void)
+{
+ sem_t *sem = xmalloc(sizeof(sem_t));
+ int res = sem_init(sem, 0, 0);
+ ASSERT(!res);
+ return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+ sem_destroy(sem);
+ xfree(sem);
+}
+
+#endif
+
+#endif
--- /dev/null
+# Support routines for shell scripts
+
+DIRS+=lib/shell
+PROGS+=$(o)/lib/shell/config $(o)/lib/shell/logger
+DATAFILES+=$(o)/lib/shell/libucw.sh
+
+$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBUCW)
+$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/lib/shell/,config.test)
+
+$(o)/lib/shell/config.test: $(o)/lib/shell/config
--- /dev/null
+/*
+ * UCW Library -- Shell Interface to Configuration Files
+ *
+ * (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * Once we were using this beautiful Shell version, but it turned out
+ * that it doesn't work with nested config files:
+ *
+ * eval `sed <cf/sherlock '/^#/d;/^ *$/d;s/ \+$//;
+ * h;s@[^ ]*@@;x;s@[ ].*@@;y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;G;s/\n//;
+ * /^\[SECTION\]/,/^\[/ {; /^[A-Z]/ { s/^\([^ ]\+\)[ ]*\(.*\)$/SH_\1="\2"/; p; }; };
+ * d;'`
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/mempool.h"
+#include "lib/chartype.h"
+#include "lib/bbuf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <alloca.h>
+
+static void
+help(void)
+{
+ fputs("\n\
+Usage: config [-C<configfile>] [-S<section>.<option>=<value>] <sections>\n\
+\n\
+<sections>\t<section>[;<sections>]\n\
+<section>\t[!]<name>{[<items>]}\n\
+<items>\t\t[-]<item>[;<items>]\n\
+<item>\t\t<static> | <array> | <list>\n\
+<static>\t<type><name>[=<value>]\n\
+<list>\t\t@<name>{[<items>]}\n\
+<array>\t\t<type><name><left-bracket>[<number>]<right-bracket>\n\
+<value>\t\t[a-zA-Z0-9.-/]* | 'string without single quotes'<value> | \"c-like string\"<value>\n\
+\n\
+Types:\n\
+<empty>\t\tString\n\
+#\t\t32-bit integer\n\
+##\t\t64-bit integer\n\
+$\t\tFloating point number\n\
+\n\
+Modifiers:\n\
+!\t\tReport unknown items as errors\n\
+-\t\tDo not dump item's value\n\
+", stderr);
+ exit(1);
+}
+
+union value {
+ void *v_ptr;
+ int v_int;
+ u64 v_u64;
+ double v_double;
+ clist list;
+};
+
+#define FLAG_HIDE 0x1
+#define FLAG_NO_UNKNOWN 0x2
+
+struct item {
+ cnode node;
+ uns flags;
+ struct cf_item cf;
+ union value value;
+ uns index;
+};
+
+struct section {
+ struct item item;
+ clist list;
+ uns count;
+ uns size;
+};
+
+static struct mempool *pool;
+static clist sections;
+static byte *pos;
+
+static void
+parse_white(void)
+{
+ while (Cspace(*pos))
+ pos++;
+}
+
+static void
+parse_char(byte c)
+{
+ if (*pos++ != c)
+ die("Missing '%c'", c);
+}
+
+static byte *
+parse_name(void)
+{
+ byte *name = pos;
+ while (Cword(*pos))
+ pos++;
+ uns len = pos - name;
+ if (!len)
+ die("Expected item/section name");
+ byte *buf = mp_alloc(pool, len + 1);
+ memcpy(buf, name, len);
+ buf[len] = 0;
+ return buf;
+}
+
+static void
+parse_section(struct section *section)
+{
+#define TRY(x) do{byte *_err=(x); if (_err) die(_err); }while(0)
+ for (uns sep = 0; ; sep = 1)
+ {
+ parse_white();
+ if (!*pos || *pos == '}')
+ break;
+ if (sep)
+ parse_char(';');
+ parse_white();
+
+ struct item *item;
+
+ if (*pos == '@')
+ {
+ pos++;
+ struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ sec->size = sizeof(cnode);
+ clist_init(&sec->list);
+ item = &sec->item;
+ item->cf.name = parse_name();
+ item->cf.cls = CC_LIST;
+ item->cf.number = 1;
+ parse_white();
+ parse_char('{');
+ parse_section(sec);
+ parse_char('}');
+ }
+ else
+ {
+ item = mp_alloc_zero(pool, sizeof(*item));
+ if (*pos == '-')
+ {
+ item->flags |= FLAG_HIDE;
+ pos++;
+ }
+ item->cf.cls = CC_STATIC;
+ item->cf.number = 1;
+ switch (*pos)
+ {
+ case '#':
+ if (*++pos == '#')
+ {
+ pos++;
+ item->cf.type = CT_U64;
+ }
+ else
+ item->cf.type = CT_INT;
+ break;
+ case '$':
+ pos++;
+ item->cf.type = CT_DOUBLE;
+ break;
+ default:
+ if (!Cword(*pos))
+ die("Invalid type syntax");
+ item->cf.type = CT_STRING;
+ break;
+ }
+ parse_white();
+ item->cf.name = parse_name();
+ parse_white();
+ if (*pos == '[')
+ {
+ pos++;
+ parse_white();
+ item->cf.cls = CC_DYNAMIC;
+ byte *num = pos;
+ while (*pos && *pos != ']')
+ pos++;
+ if (!*pos)
+ die("Missing ']'");
+ *pos++ = 0;
+ if (!*num)
+ item->cf.number = CF_ANY_NUM;
+ else
+ {
+ int inum;
+ TRY(cf_parse_int(num, &inum));
+ if (!inum)
+ die("Invalid array length");
+ item->cf.number = inum;
+ }
+ parse_white();
+ }
+ if (*pos == '=')
+ {
+ pos++;
+ parse_white();
+ if (section->item.cf.cls == CC_LIST)
+ die("List items can not have default values");
+ if (item->cf.cls == CC_DYNAMIC)
+ die("Arrays can not have default values");
+ byte *def = pos, *d = def;
+ while (*pos != ';' && *pos != '}' && !Cspace(*pos))
+ {
+ if (*pos == '\'')
+ {
+ pos++;
+ while (*pos != '\'')
+ {
+ if (!*pos)
+ die("Unterminated string");
+ *d++ = *pos++;
+ }
+ pos++;
+ }
+ else if (*pos == '"')
+ {
+ pos++;
+ byte *start = d;
+ uns esc = 0;
+ while (*pos != '"' || esc)
+ {
+ if (!*pos)
+ die("Unterminated string");
+ if (*pos == '\\')
+ esc ^= 1;
+ else
+ esc = 0;
+ *d++ = *pos++;
+ }
+ pos++;
+ *d = 0;
+ d = str_unesc(start, start);
+ }
+ else
+ *d++ = *pos++;
+ }
+ uns len = d - def;
+ byte *buf = mp_alloc(pool, len + 1);
+ memcpy(buf, def, len);
+ buf[len] = 0;
+ switch (item->cf.type)
+ {
+ case CT_STRING:
+ item->value.v_ptr = buf;
+ break;
+ case CT_INT:
+ TRY(cf_parse_int(buf, &item->value.v_int));
+ break;
+ case CT_U64:
+ TRY(cf_parse_u64(buf, &item->value.v_u64));
+ break;
+ case CT_DOUBLE:
+ TRY(cf_parse_double(buf, &item->value.v_double));
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+ }
+ if (section->item.cf.cls == CC_LIST)
+ {
+ item->cf.ptr = (void *)(uintptr_t)section->size;
+ section->size += sizeof(union value);
+ }
+ else
+ item->cf.ptr = &item->value;
+ clist_add_tail(§ion->list, &item->node);
+ section->count++;
+ }
+#undef TRY
+}
+
+static void
+parse_outer(void)
+{
+ for (uns sep = 0; ; sep = 1)
+ {
+ parse_white();
+ if (!*pos)
+ break;
+ if (sep)
+ parse_char(';');
+ parse_white();
+ struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ if (*pos == '!')
+ {
+ pos++;
+ sec->item.flags |= FLAG_NO_UNKNOWN;
+ }
+ sec->item.cf.name = parse_name();
+ parse_white();
+ parse_char('{');
+ clist_add_tail(§ions, &sec->item.node);
+ clist_init(&sec->list);
+ parse_section(sec);
+ parse_char('}');
+ }
+}
+
+static struct cf_section *
+generate_section(struct section *section)
+{
+ struct cf_section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ if (section->item.cf.cls == CC_LIST)
+ sec->size = section->size;
+ struct cf_item *c = sec->cfg = mp_alloc_zero(pool, sizeof(struct cf_item) * (section->count + 1));
+ CLIST_FOR_EACH(struct item *, item, section->list)
+ {
+ *c = item->cf;
+ if (c->cls == CC_LIST)
+ c->u.sec = generate_section((struct section *)item);
+ c++;
+ }
+ c->cls = CC_END;
+ return sec;
+}
+
+static bb_t path;
+
+static void
+dump_value(uns array, struct item *item, void *v)
+{
+ byte buf[128], *value = buf;
+ if (!array)
+ printf("CF_%s_%s='", path.ptr, item->cf.name);
+ else
+ printf("CF_%s_%s[%u]='", path.ptr, item->cf.name, ++item->index);
+ switch (item->cf.type)
+ {
+ case CT_INT:
+ sprintf(buf, "%d", *(int *)v);
+ break;
+ case CT_U64:
+ sprintf(buf, "%llu", (long long) *(u64 *)v);
+ break;
+ case CT_DOUBLE:
+ sprintf(buf, "%g", *(double *)v);
+ break;
+ case CT_STRING:
+ if (*(byte **)v)
+ value = *(byte **)v;
+ else
+ *value = 0;
+ break;
+ default:
+ ASSERT(0);
+ }
+ while (*value) {
+ if (*value == '\'')
+ printf("'\\''");
+ else
+ putchar(*value);
+ value++;
+ }
+ printf("'\n");
+}
+
+static void
+dump_item(struct item *item, void *ptr, uns path_len)
+{
+ if (item->flags & FLAG_HIDE)
+ return;
+ byte *val = (byte *)((uintptr_t)ptr + (uintptr_t)item->cf.ptr);
+ if (item->cf.cls == CC_LIST)
+ {
+ uns len = strlen(item->cf.name);
+ bb_grow(&path, path_len + len + 1);
+ path.ptr[path_len] = '_';
+ memcpy(path.ptr + path_len + 1, item->cf.name, len);
+ CLIST_FOR_EACH(cnode *, ptr2, *(clist *)val)
+ CLIST_FOR_EACH(struct item *, item2, ((struct section *)item)->list)
+ dump_item(item2, ptr2, path_len + len + 1);
+ }
+ else
+ {
+ bb_grow(&path, path_len + 1)[path_len] = 0;
+ if (item->cf.cls == CC_STATIC)
+ dump_value(!!ptr, item, val);
+ else
+ {
+ val = *(void **)val;
+ uns len = DARY_LEN(val);
+ uns size = cf_type_size(item->cf.type, NULL);
+ for (uns i = 0; i < len; i++, val += size)
+ dump_value(1, item, val);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ log_init("config");
+ if (argc < 2)
+ help();
+ pos = argv[argc - 1];
+ argv[argc - 1] = NULL;
+
+ pool = mp_new(0x1000);
+ clist_init(§ions);
+ parse_outer();
+ CLIST_FOR_EACH(struct section *, sec, sections)
+ cf_declare_section(sec->item.cf.name, generate_section(sec), !(sec->item.flags & FLAG_NO_UNKNOWN));
+
+ if (cf_getopt(argc - 1, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+ help();
+
+ bb_init(&path);
+ CLIST_FOR_EACH(struct section *, section, sections)
+ {
+ uns len = strlen(section->item.cf.name);
+ memcpy(bb_grow(&path, len), section->item.cf.name, len);
+ CLIST_FOR_EACH(struct item *, item, section->list)
+ dump_item(item, NULL, len);
+ }
+ bb_done(&path);
+
+ return 0;
+}
+
--- /dev/null
+# Tests for configuration parser
+
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
+Out: CF_sec1_int1='23'
+ CF_sec1_long1='1234567812345678'
+ CF_sec1_str2='s2'
+ CF_sec1_int2='123'
+ CF_sec1_long2='4321'
+ CF_sec1_int3='16'
+ CF_sec1_int4='0'
+ CF_sec1_dbl1='1.1'
+ CF_sec1_dbl2='0'
+ CF_sec2_str3=''
+
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
+Out: CF_sec1_list1_int1[1]='2'
+ CF_sec1_list1_str1[1]='a2'
+ CF_sec1_list1_int1[2]='3'
+ CF_sec1_list1_str1[2]='a3'
+
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
+Out: CF_sec1_ar1[1]='a'
+ CF_sec1_ar1[2]='b'
+ CF_sec1_ar1[3]='c'
+ CF_sec1_ar2[1]='1'
+ CF_sec1_ar2[2]='2'
+ CF_sec1_ar3[1]='1.1'
+
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
+Out: CF_sec1_list1_str1[1]='1'
+ CF_sec1_list1_list2_str2[1]='a'
+ CF_sec1_list1_list2_str2[2]='b'
+ CF_sec1_list1_list2_str2[3]='c'
+ CF_sec1_list1_str1[2]='2'
+ CF_sec1_list1_list2_str2[4]='d'
+ CF_sec1_list1_list2_str2[5]='e'
+
+Run: ../obj/lib/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
+Out: CF_sec_str='ab"cd\e'\''fg'
--- /dev/null
+# The UCW Library -- Shell Functions
+# (c) 2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+UCW_CF=
+while [ "${1:0:2}" = "-C" -o "${1:0:2}" = "-S" ] ; do
+ if [ -z "${1:2:1}" ] ; then
+ UCW_CF="$UCW_CF $1 $2"
+ shift 2
+ else
+ UCW_CF="$UCW_CF $1"
+ shift 1
+ fi
+done
+
+function log # msg
+{
+ bin/logger $UCW_PROGNAME I "$1"
+}
+
+function errlog # msg
+{
+ bin/logger $UCW_PROGNAME E "$1"
+}
+
+function warnlog # msg
+{
+ bin/logger $UCW_PROGNAME E "$1"
+}
+
+function die # msg
+{
+ bin/logger $UCW_PROGNAME ! "$1"
+ exit 1
+}
+
+function parse-config # section vars...
+{
+ eval `bin/config$UCW_CF "$@"`
+}
--- /dev/null
+/*
+ * UCW Library Utilities -- A Simple Logger for use in shell scripts
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+
+int
+main(int argc, char **argv)
+{
+ byte buf[1024], *c;
+
+ log_init("logger");
+ if (argc < 3 || argc > 4 || strlen(argv[2]) != 1)
+ die("Usage: logger [<logname>:]<progname> <level> [<text>]");
+ if (c = strchr(argv[1], ':'))
+ {
+ *c++ = 0;
+ log_init(c);
+ log_file(argv[1]);
+ }
+ else
+ log_init(argv[1]);
+ if (argc > 3)
+ msg(argv[2][0], argv[3]);
+ else
+ while (fgets(buf, sizeof(buf), stdin))
+ {
+ c = strchr(buf, '\n');
+ if (c)
+ *c = 0;
+ msg(argv[2][0], buf);
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Catching of signals and calling callback functions
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+static int sig_handler_nest[NSIG];
+static struct sigaction sig_handler_old[NSIG];
+
+static void
+signal_handler_internal(int sig)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ if (!ctx->signal_handlers || !ctx->signal_handlers[sig] || ctx->signal_handlers[sig](sig))
+ abort();
+}
+
+void
+handle_signal(int signum)
+{
+ ucwlib_lock();
+ if (!sig_handler_nest[signum]++)
+ {
+ struct sigaction act;
+ bzero(&act, sizeof(act));
+ act.sa_handler = signal_handler_internal;
+ act.sa_flags = SA_NODEFER;
+ if (sigaction(signum, &act, &sig_handler_old[signum]) < 0)
+ die("sigaction: %m");
+ }
+ ucwlib_unlock();
+}
+
+void
+unhandle_signal(int signum)
+{
+ ucwlib_lock();
+ ASSERT(sig_handler_nest[signum]);
+ if (!--sig_handler_nest[signum])
+ {
+ if (sigaction(signum, &sig_handler_old[signum], NULL) < 0)
+ die("sigaction: %m");
+ }
+ ucwlib_unlock();
+}
+
+sh_sighandler_t
+set_signal_handler(int signum, sh_sighandler_t new)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ if (!ctx->signal_handlers)
+ ctx->signal_handlers = xmalloc_zero(NSIG * sizeof(sh_sighandler_t));
+ sh_sighandler_t old = ctx->signal_handlers[signum];
+ ctx->signal_handlers[signum] = new;
+ return old;
+}
--- /dev/null
+/*
+ * UCW Library -- Linked Lists of Simple Items
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/conf.h"
+#include "lib/simple-lists.h"
+
+simp_node *
+simp_append(struct mempool *mp, clist *l)
+{
+ simp_node *n = mp_alloc_fast(mp, sizeof(*n));
+ clist_add_tail(l, &n->n);
+ return n;
+}
+
+simp2_node *
+simp2_append(struct mempool *mp, clist *l)
+{
+ simp2_node *n = mp_alloc_fast(mp, sizeof(*n));
+ clist_add_tail(l, &n->n);
+ return n;
+}
+
+/* Configuration sections for common lists */
+
+struct cf_section cf_string_list_config = {
+ CF_TYPE(simp_node),
+ CF_ITEMS {
+ CF_STRING("String", PTR_TO(simp_node, s)),
+ CF_END
+ }
+};
+
+struct cf_section cf_2string_list_config = {
+ CF_TYPE(simp2_node),
+ CF_ITEMS {
+ CF_STRING("Src", PTR_TO(simp2_node, s1)),
+ CF_STRING("Dest", PTR_TO(simp2_node, s2)),
+ CF_END
+ }
+};
--- /dev/null
+/*
+ * UCW Library -- Linked Lists of Simple Items
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SIMPLE_LISTS_H
+#define _UCW_SIMPLE_LISTS_H
+
+#include "lib/clists.h"
+
+typedef struct simp_node {
+ cnode n;
+ union {
+ char *s;
+ void *p;
+ int i;
+ uns u;
+ };
+} simp_node;
+
+typedef struct simp2_node {
+ cnode n;
+ union {
+ char *s1;
+ void *p1;
+ int i1;
+ uns u1;
+ };
+ union {
+ char *s2;
+ void *p2;
+ int i2;
+ uns u2;
+ };
+} simp2_node;
+
+struct mempool;
+simp_node *simp_append(struct mempool *mp, clist *l);
+simp2_node *simp2_append(struct mempool *mp, clist *l);
+
+/* Configuration sections */
+extern struct cf_section cf_string_list_config;
+extern struct cf_section cf_2string_list_config;
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Single-Linked Lists
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/slists.h"
+
+static inline snode *
+slist_raw_prev(slist *l, snode *n)
+{
+ snode *m = &l->head;
+ while (m)
+ {
+ if (n == m->next)
+ return m;
+ m = m->next;
+ }
+ ASSERT(0);
+}
+
+void *
+slist_prev(slist *l, snode *n)
+{
+ snode *p = slist_raw_prev(l, n);
+ return (p == &l->head) ? NULL : p;
+}
+
+void
+slist_insert_before(slist *l, snode *what, snode *before)
+{
+ what->next = before;
+ slist_raw_prev(l, before)->next = what;
+}
+
+void
+slist_remove(slist *l, snode *n)
+{
+ snode *p = slist_raw_prev(l, n);
+ slist_remove_after(l, p);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <alloca.h>
+
+int main(void)
+{
+ slist l;
+
+ struct x {
+ snode n;
+ int val;
+ };
+
+ slist_init(&l);
+ for (int i=1; i<=10; i++)
+ {
+ struct x *x = alloca(sizeof(*x));
+ x->val = i;
+ if (i % 2)
+ slist_add_head(&l, &x->n);
+ else
+ slist_add_tail(&l, &x->n);
+ }
+
+ struct x *x, *prev;
+ SLIST_WALK_DELSAFE(x, l, prev)
+ if (x->val == 5)
+ slist_remove_after(&l, &prev->n);
+ else if (x->val == 6)
+ slist_remove(&l, &x->n);
+ SLIST_FOR_EACH(struct x *, x, l)
+ printf("%d/", x->val);
+ putchar('\n');
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Single-Linked Lists
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SLISTS_H
+#define _UCW_SLISTS_H
+
+typedef struct snode {
+ struct snode *next;
+} snode;
+
+typedef struct slist {
+ struct snode head, *last;
+} slist;
+
+static inline void *slist_head(slist *l)
+{
+ return l->head.next;
+}
+
+static inline void *slist_tail(slist *l)
+{
+ return l->last;
+}
+
+static inline void *slist_next(snode *n)
+{
+ return n->next;
+}
+
+static inline int slist_empty(slist *l)
+{
+ return !l->head.next;
+}
+
+#define SLIST_WALK(n,list) for(n=(void*)(list).head.next; (n); (n)=(void*)((snode*)(n))->next)
+#define SLIST_WALK_DELSAFE(n,list,prev) for((prev)=(void*)&(list).head; (n)=(void*)((snode*)prev)->next; (prev)=(((snode*)(prev))->next==(snode*)(n) ? (void*)(n) : (void*)(prev)))
+#define SLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; n; n=(void*)((snode*)(n))->next)
+
+static inline void slist_insert_after(slist *l, snode *what, snode *after)
+{
+ what->next = after->next;
+ after->next = what;
+ if (!what->next)
+ l->last = what;
+}
+
+static inline void slist_add_head(slist *l, snode *n)
+{
+ n->next = l->head.next;
+ l->head.next = n;
+ if (!l->last)
+ l->last = n;
+}
+
+static inline void slist_add_tail(slist *l, snode *n)
+{
+ if (l->last)
+ l->last->next = n;
+ else
+ l->head.next = n;
+ n->next = NULL;
+ l->last = n;
+}
+
+static inline void slist_init(slist *l)
+{
+ l->head.next = l->last = NULL;
+}
+
+static inline void slist_remove_after(slist *l, snode *after)
+{
+ snode *n = after->next;
+ after->next = n->next;
+ if (l->last == n)
+ l->last = (after == &l->head) ? NULL : after;
+}
+
+/* Non-trivial functions */
+
+void *slist_prev(slist *l, snode *n);
+void slist_insert_before(slist *l, snode *what, snode *before);
+void slist_remove(slist *l, snode *n);
+
+#endif
--- /dev/null
+# Test for slists module
+
+Run: ../obj/lib/slists-t
+Out: 9/7/3/1/2/4/8/10/
--- /dev/null
+# Makefile for the UCW Sorter (c) 2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib/sorter
+
+LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array)
+LIBUCW_INCLUDES+=$(addprefix sorter/, array.h common.h s-fixint.h \
+ s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h)
+
+ifdef CONFIG_DEBUG_TOOLS
+PROGS+=$(o)/lib/sorter/sort-test
+endif
+
+$(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW)
--- /dev/null
+Cleanups:
+o Log messages should show both original and new size of the data. The speed
+ should be probably calculated from the former.
+o Buffer sizing in shep-export.
+
+Improvements:
+o When quicksorting a large input (especially in threaded case), invest more
+ time to picking a good pivot.
+o Overlay presorter I/O with internal sorting.
+
+Users of lib/sorter/array.h which might use radix-sorting:
+indexer/chewer.c
+indexer/lexfreq.c
+indexer/mkgraph.c
+indexer/reftexts.c
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <alloca.h>
+
+#define ASORT_MIN_SHIFT 2
+
+#define ASORT_TRACE(x...) ASORT_XTRACE(1, x)
+#define ASORT_XTRACE(level, x...) do { if (sorter_trace_array >= level) msg(L_DEBUG, x); } while(0)
+
+static void
+asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ // swap_output == 0 if result should be returned in `array', otherwise in `buffer'
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+
+#if 0
+ static int reported[64];
+ if (!reported[hash_bits]++)
+#endif
+ DBG(">>> n=%u h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output);
+
+ bzero(cnt, sizeof(cnt));
+ ctx->radix_count(array, num_elts, cnt, shift);
+
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ ctx->radix_split(array, buffer, num_elts, cnt, shift);
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (n < ctx->radix_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ ctx->quicksort(buffer, n);
+ if (!swapped_output)
+ memcpy(array, buffer, n * ctx->elt_size);
+ }
+ else
+ asort_radix(ctx, buffer, array, n, shift, !swapped_output);
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ pos = cnt[i];
+ }
+}
+
+#ifdef CONFIG_UCW_THREADS
+
+#include "lib/threads.h"
+#include "lib/workqueue.h"
+#include "lib/eltpool.h"
+
+static uns asort_threads_use_count;
+static uns asort_threads_ready;
+static struct worker_pool asort_thread_pool;
+
+static uns
+rs_estimate_stack(void)
+{
+ // Stack space needed by the recursive radix-sorter
+ uns ctrsize = sizeof(uns) * (1 << CONFIG_UCW_RADIX_SORTER_BITS);
+ uns maxdepth = (64 / CONFIG_UCW_RADIX_SORTER_BITS) + 1;
+ return ctrsize * maxdepth;
+}
+
+void
+asort_start_threads(uns run)
+{
+ ucwlib_lock();
+ asort_threads_use_count++;
+ if (run && !asort_threads_ready)
+ {
+ // XXX: If somebody overrides the radix-sorter parameters to insane values,
+ // he also should override the stack size to insane values.
+ asort_thread_pool.stack_size = default_thread_stack_size + rs_estimate_stack();
+ asort_thread_pool.num_threads = sorter_threads;
+ ASORT_TRACE("Initializing thread pool (%d threads, %dK stack)", sorter_threads, asort_thread_pool.stack_size >> 10);
+ worker_pool_init(&asort_thread_pool);
+ asort_threads_ready = 1;
+ }
+ ucwlib_unlock();
+}
+
+void
+asort_stop_threads(void)
+{
+ ucwlib_lock();
+ if (!--asort_threads_use_count && asort_threads_ready)
+ {
+ ASORT_TRACE("Shutting down thread pool");
+ worker_pool_cleanup(&asort_thread_pool);
+ asort_threads_ready = 0;
+ }
+ ucwlib_unlock();
+}
+
+struct qs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array;
+ uns num_elts;
+ int left, right;
+#define LR_UNDEF -100
+};
+
+static void
+qs_handle_work(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct qs_work *w = (struct qs_work *) ww;
+ struct asort_context *ctx = w->ctx;
+
+ DBG("Thread %d: got %u elts", thr->id, w->num_elts);
+ if (w->num_elts < ctx->thread_threshold)
+ {
+ ctx->quicksort(w->array, w->num_elts);
+ w->left = w->right = LR_UNDEF;
+ }
+ else
+ ctx->quicksplit(w->array, w->num_elts, &w->left, &w->right);
+ DBG("Thread %d: returning l=%u r=%u", thr->id, w->left, w->right);
+}
+
+static struct qs_work *
+qs_alloc_work(struct asort_context *ctx)
+{
+ struct qs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = qs_handle_work;
+ w->ctx = ctx;
+ return w;
+}
+
+static void
+threaded_quicksort(struct asort_context *ctx)
+{
+ struct work_queue q;
+ struct qs_work *v, *w;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+ ctx->eltpool = ep_new(sizeof(struct qs_work), 1000);
+
+ w = qs_alloc_work(ctx);
+ w->array = ctx->array;
+ w->num_elts = ctx->num_elts;
+ work_submit(&q, &w->w);
+
+ while (v = (struct qs_work *) work_wait(&q))
+ {
+ if (v->left != LR_UNDEF)
+ {
+ if (v->right > 0)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array;
+ w->num_elts = v->right + 1;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ if (v->left < (int)v->num_elts - 1)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array + v->left * ctx->elt_size;
+ w->num_elts = v->num_elts - v->left;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ }
+ ep_free(ctx->eltpool, v);
+ }
+
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+struct rs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array, *buffer; // Like asort_radix().
+ uns num_elts;
+ uns shift;
+ uns swap_output;
+ uns cnt[0];
+};
+
+static void
+rs_count(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Counting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_count(w->array, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Counting done", thr->id);
+}
+
+static void
+rs_split(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Splitting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_split(w->array, w->buffer, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Splitting done", thr->id);
+}
+
+static void
+rs_finish(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ if (thr)
+ DBG("Thread %d: Finishing %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ if (w->shift < ASORT_MIN_SHIFT || w->num_elts < w->ctx->radix_threshold)
+ {
+ w->ctx->quicksort(w->array, w->num_elts);
+ if (w->swap_output)
+ memcpy(w->buffer, w->array, w->num_elts * w->ctx->elt_size);
+ }
+ else
+ asort_radix(w->ctx, w->array, w->buffer, w->num_elts, w->shift, w->swap_output);
+ if (thr)
+ DBG("Thread %d: Finishing done", thr->id);
+}
+
+static void
+rs_wait_small(struct asort_context *ctx)
+{
+ struct rs_work *w;
+
+ while (w = (struct rs_work *) work_wait(ctx->rs_work_queue))
+ {
+ DBG("Reaping small chunk of %u items", w->num_elts);
+ ep_free(ctx->eltpool, w);
+ }
+}
+
+static void
+rs_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+ uns blksize = num_elts / sorter_threads;
+ DBG(">>> n=%u h=%d s=%d blk=%u sw=%d", num_elts, hash_bits, shift, blksize, swapped_output);
+
+ // If there are any small chunks in progress, wait for them to finish
+ rs_wait_small(ctx);
+
+ // Start parallel counting
+ void *iptr = array;
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.priority = 0;
+ w->w.go = rs_count;
+ w->ctx = ctx;
+ w->array = iptr;
+ w->buffer = buffer;
+ w->num_elts = blksize;
+ if (i == sorter_threads-1)
+ w->num_elts += num_elts % sorter_threads;
+ w->shift = shift;
+ iptr += w->num_elts * ctx->elt_size;
+ bzero(w->cnt, sizeof(uns) * buckets);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+
+ // Get bucket sizes from the counts
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = (struct rs_work *) work_wait(ctx->rs_work_queue);
+ ASSERT(w);
+ for (uns j=0; j<buckets; j++)
+ cnt[j] += w->cnt[j];
+ }
+
+ // Calculate bucket starts
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ // Start parallel splitting
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.go = rs_split;
+ for (uns j=0; j<buckets; j++)
+ {
+ uns k = w->cnt[j];
+ w->cnt[j] = cnt[j];
+ cnt[j] += k;
+ }
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ ASSERT(cnt[buckets-1] == num_elts);
+
+ // Wait for splits to finish
+ while (work_wait(ctx->rs_work_queue))
+ ;
+
+ // Recurse on buckets
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (!n)
+ continue;
+ if (n < ctx->thread_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ struct rs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = rs_finish;
+ w->ctx = ctx;
+ w->array = buffer;
+ w->buffer = array;
+ w->num_elts = n;
+ w->shift = shift;
+ w->swap_output = !swapped_output;
+ if (n < ctx->thread_chunk)
+ {
+ DBG("Sorting block %u+%u inline", pos, n);
+ rs_finish(NULL, &w->w);
+ ep_free(ctx->eltpool, w);
+ }
+ else
+ {
+ DBG("Scheduling block %u+%u", pos, n);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ }
+ else
+ rs_radix(ctx, buffer, array, n, shift, !swapped_output);
+ pos = cnt[i];
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ }
+}
+
+static void
+threaded_radixsort(struct asort_context *ctx, uns swap)
+{
+ struct work_queue q;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+
+ // Prepare work structures for counting and splitting.
+ // We use big_alloc(), because we want to avoid cacheline aliasing between threads.
+ ctx->rs_work_queue = &q;
+ ctx->rs_works = alloca(sizeof(struct rs_work *) * sorter_threads);
+ for (uns i=0; i<sorter_threads; i++)
+ ctx->rs_works[i] = big_alloc(sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Prepare a pool for all remaining small bits which will be sorted on background.
+ ctx->eltpool = ep_new(sizeof(struct rs_work), 1000);
+
+ // Do the big splitting
+ rs_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ for (uns i=0; i<sorter_threads; i++)
+ big_free(ctx->rs_works[i], sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Finish the small blocks
+ rs_wait_small(ctx);
+
+ ASSERT(!ctx->eltpool->num_allocated);
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+#else
+
+void asort_start_threads(uns run UNUSED) { }
+void asort_stop_threads(void) { }
+
+#endif
+
+static uns
+predict_swap(struct asort_context *ctx)
+{
+ uns bits = ctx->radix_bits;
+ uns elts = ctx->num_elts;
+ uns swap = 0;
+
+ while (elts >= ctx->radix_threshold && bits >= ASORT_MIN_SHIFT)
+ {
+ DBG("Predicting pass: %u elts, %d bits", elts, bits);
+ swap = !swap;
+ elts >>= ctx->radix_bits;
+ bits = MAX(bits, ctx->radix_bits) - ctx->radix_bits;
+ }
+ return swap;
+}
+
+void
+asort_run(struct asort_context *ctx)
+{
+ ctx->thread_threshold = MIN(sorter_thread_threshold / ctx->elt_size, ~0U);
+ ctx->thread_chunk = MIN(sorter_thread_chunk / ctx->elt_size, ~0U);
+ ctx->radix_threshold = MIN(sorter_radix_threshold / ctx->elt_size, ~0U);
+
+ ASORT_TRACE("Array-sorting %u items per %u bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits);
+ ASORT_XTRACE(2, "Limits: thread_threshold=%u, thread_chunk=%u, radix_threshold=%u",
+ ctx->thread_threshold, ctx->thread_chunk, ctx->radix_threshold);
+ uns allow_threads UNUSED = (sorter_threads > 1 &&
+ ctx->num_elts >= ctx->thread_threshold &&
+ !(sorter_debug & SORT_DEBUG_ASORT_NO_THREADS));
+
+ if (ctx->num_elts < ctx->radix_threshold ||
+ ctx->hash_bits <= ASORT_MIN_SHIFT ||
+ !ctx->radix_split ||
+ (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX))
+ {
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel quicksort");
+ threaded_quicksort(ctx);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential quicksort");
+ ctx->quicksort(ctx->array, ctx->num_elts);
+ }
+ }
+ else
+ {
+ uns swap = predict_swap(ctx);
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel radix-sort (swap=%d)", swap);
+ threaded_radixsort(ctx, swap);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential radix-sort (swap=%d)", swap);
+ asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ }
+ if (swap)
+ ctx->array = ctx->buffer;
+ }
+
+ ASORT_XTRACE(2, "Array-sort finished");
+}
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a generator of routines for sorting huge arrays, similar to the one
+ * in lib/arraysort.h. It cannot handle discontiguous arrays, but it is able
+ * to employ radix-sorting if a monotone hash function is available and also
+ * use several threads in parallel on SMP systems (this assumes that all
+ * callbacks you provide are thread-safe).
+ *
+ * It is usually called internally by the generic shorter machinery, but
+ * you are free to use it explicitly if you need.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y")
+ * ASORT_HASH(x) a monotone hash function (safisfying hash(x) < hash(y) => x<y)
+ * ASORT_LONG_HASH hashes are 64-bit numbers (default is 32 bits)
+ *
+ * Fine-tuning parameters: (if you really insist)
+ *
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_RADIX_BITS how many bits of the hash functions are to be used at once for
+ * radix-sorting.
+ *
+ * After including this file, a function
+ * ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts [, ASORT_KEY_TYPE *buf, uns hash_bits])
+ * is declared and all parameter macros are automatically undef'd. Here `buf' is an
+ * auxiliary buffer of the same size as the input array, required whenever radix
+ * sorting should be used, and `hash_bits' is the number of significant bits returned
+ * by the hash function. If the buffer is specified, the sorting function returns either
+ * a pointer to the input array or to the buffer, depending on where the result is stored.
+ * If you do not use hashing, these parameters should be omitted.
+ */
+
+#include "lib/sorter/common.h"
+
+#define Q(x) ASORT_PREFIX(x)
+
+typedef ASORT_KEY_TYPE Q(key);
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { Q(key) tmp = array[i]; array[i]=array[j]; array[j]=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_RADIX_BITS
+#define ASORT_RADIX_BITS CONFIG_UCW_RADIX_SORTER_BITS
+#endif
+#define ASORT_RADIX_MASK ((1 << (ASORT_RADIX_BITS)) - 1)
+
+/* QuickSort with optimizations a'la Sedgewick, inspired by qsort() from GNU libc. */
+
+static void Q(quicksort)(void *array_ptr, uns num_elts)
+{
+ Q(key) *array = array_ptr;
+ struct stk { int l, r; } stack[8*sizeof(uns)];
+ int l, r, left, right, m;
+ uns sp = 0;
+ Q(key) pivot;
+
+ if (num_elts <= 1)
+ return;
+
+ left = 0;
+ right = num_elts - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(num_elts, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(array[l], array[m]))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)num_elts; m++)
+ {
+ l=m;
+ while (ASORT_LT(array[m], array[l-1]))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+/* Just the splitting part of QuickSort */
+
+static void Q(quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp)
+{
+ Q(key) *array = array_ptr;
+ int l, r, m;
+ Q(key) pivot;
+
+ l = 0;
+ r = num_elts - 1;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ *leftp = l;
+ *rightp = r;
+}
+
+#ifdef ASORT_HASH
+
+static void Q(radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift)
+{
+ Q(key) *src = src_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RC(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ cnt[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ] ++; \
+ break; \
+
+#ifdef ASORT_LONG_HASH
+ RC(63); RC(62); RC(61); RC(60); RC(59); RC(58); RC(57); RC(56);
+ RC(55); RC(54); RC(53); RC(52); RC(51); RC(50); RC(49); RC(48);
+ RC(47); RC(46); RC(45); RC(44); RC(43); RC(42); RC(41); RC(40);
+ RC(39); RC(38); RC(37); RC(36); RC(35); RC(34); RC(33); RC(32);
+#endif
+ RC(31); RC(30); RC(29); RC(28); RC(27); RC(26); RC(25); RC(24);
+ RC(23); RC(22); RC(21); RC(20); RC(19); RC(18); RC(17); RC(16);
+ RC(15); RC(14); RC(13); RC(12); RC(11); RC(10); RC(9); RC(8);
+ RC(7); RC(6); RC(5); RC(4); RC(3); RC(2); RC(1); RC(0);
+ default:
+ ASSERT(0);
+ }
+#undef RC
+}
+
+static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift)
+{
+ Q(key) *src = src_ptr, *dest = dest_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RS(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ dest[ ptrs[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ]++ ] = src[i]; \
+ break;
+
+#ifdef ASORT_LONG_HASH
+ RS(63); RS(62); RS(61); RS(60); RS(59); RS(58); RS(57); RS(56);
+ RS(55); RS(54); RS(53); RS(52); RS(51); RS(50); RS(49); RS(48);
+ RS(47); RS(46); RS(45); RS(44); RS(43); RS(42); RS(41); RS(40);
+ RS(39); RS(38); RS(37); RS(36); RS(35); RS(34); RS(33); RS(32);
+#endif
+ RS(31); RS(30); RS(29); RS(28); RS(27); RS(26); RS(25); RS(24);
+ RS(23); RS(22); RS(21); RS(20); RS(19); RS(18); RS(17); RS(16);
+ RS(15); RS(14); RS(13); RS(12); RS(11); RS(10); RS(9); RS(8);
+ RS(7); RS(6); RS(5); RS(4); RS(3); RS(2); RS(1); RS(0);
+ default:
+ ASSERT(0);
+ }
+#undef RS
+}
+
+#endif
+
+static Q(key) *Q(sort)(Q(key) *array, uns num_elts
+#ifdef ASORT_HASH
+ , Q(key) *buffer, uns hash_bits
+#endif
+ )
+{
+ struct asort_context ctx = {
+ .array = array,
+ .num_elts = num_elts,
+ .elt_size = sizeof(Q(key)),
+ .quicksort = Q(quicksort),
+ .quicksplit = Q(quicksplit),
+#ifdef ASORT_HASH
+ .buffer = buffer,
+ .hash_bits = hash_bits,
+ .radix_count = Q(radix_count),
+ .radix_split = Q(radix_split),
+ .radix_bits = ASORT_RADIX_BITS,
+#endif
+ };
+ asort_run(&ctx);
+ return ctx.array;
+}
+
+#undef ASORT_HASH
+#undef ASORT_KEY_TYPE
+#undef ASORT_LONG_HASH
+#undef ASORT_LT
+#undef ASORT_PAGE_ALIGNED
+#undef ASORT_PREFIX
+#undef ASORT_RADIX_BITS
+#undef ASORT_RADIX_MASK
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef Q
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Common Declarations
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SORTER_COMMON_H
+#define _UCW_SORTER_COMMON_H
+
+#include "lib/clists.h"
+
+/* Configuration variables */
+extern uns sorter_trace, sorter_trace_array, sorter_stream_bufsize;
+extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits, sorter_add_radix_bits;
+extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
+extern uns sorter_threads;
+extern u64 sorter_bufsize, sorter_small_input;
+extern u64 sorter_thread_threshold, sorter_thread_chunk, sorter_radix_threshold;
+extern struct fb_params sorter_fb_params, sorter_small_fb_params;
+
+#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
+#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
+
+enum sort_debug {
+ SORT_DEBUG_NO_PRESORT = 1,
+ SORT_DEBUG_NO_JOIN = 2,
+ SORT_DEBUG_KEEP_BUCKETS = 4,
+ SORT_DEBUG_NO_RADIX = 8,
+ SORT_DEBUG_NO_MULTIWAY = 16,
+ SORT_DEBUG_ASORT_NO_RADIX = 32,
+ SORT_DEBUG_ASORT_NO_THREADS = 64
+};
+
+struct sort_bucket;
+
+struct sort_context {
+ struct fastbuf *in_fb;
+ struct fastbuf *out_fb;
+ uns hash_bits;
+ u64 in_size;
+ struct fb_params *fb_params;
+
+ struct mempool *pool;
+ clist bucket_list;
+ void *big_buf;
+ size_t big_buf_size;
+
+ int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
+
+ // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
+ // Return 1 if there is more data available in the source bucket.
+ int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
+
+ // Estimate how much input data from `b' will fit in the internal sorting buffer.
+ u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
+
+ // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
+ // Bucket arrays are NULL-terminated.
+ void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
+
+ // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
+ void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
+
+ // Radix split according to hash function
+ void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
+
+ // State variables of internal_sort
+ void *key_buf;
+ int more_keys;
+
+ // Timing
+ timestamp_t start_time;
+ uns last_pass_time;
+ uns total_int_time, total_pre_time, total_ext_time;
+};
+
+void sorter_run(struct sort_context *ctx);
+
+/* Buffers */
+
+void *sorter_alloc(struct sort_context *ctx, uns size);
+void sorter_prepare_buf(struct sort_context *ctx);
+void sorter_alloc_buf(struct sort_context *ctx);
+void sorter_free_buf(struct sort_context *ctx);
+
+/* Buckets */
+
+struct sort_bucket {
+ cnode n;
+ struct sort_context *ctx;
+ uns flags;
+ struct fastbuf *fb;
+ byte *filename;
+ u64 size; // Size in bytes (not valid when writing)
+ uns runs; // Number of runs, 0 if not sorted
+ uns hash_bits; // Remaining bits of the hash function
+ byte *ident; // Identifier used in debug messages
+};
+
+enum sort_bucket_flags {
+ SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run)
+ SBF_SOURCE = 2, // Contains the source file (always 0 runs)
+ SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter
+ SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf
+ SBF_OPEN_READ = 512, // We are reading from the fastbuf
+ SBF_DESTROYED = 1024, // Already done with, no further references allowed
+ SBF_SWAPPED_OUT = 2048, // Swapped out to a named file
+};
+
+struct sort_bucket *sbuck_new(struct sort_context *ctx);
+void sbuck_drop(struct sort_bucket *b);
+int sbuck_have(struct sort_bucket *b);
+int sbuck_has_file(struct sort_bucket *b);
+sh_off_t sbuck_size(struct sort_bucket *b);
+struct fastbuf *sbuck_read(struct sort_bucket *b);
+struct fastbuf *sbuck_write(struct sort_bucket *b);
+void sbuck_swap_out(struct sort_bucket *b);
+
+/* Contexts and helper functions for the array sorter */
+
+struct asort_context {
+ // Interface between generic code in array.c and functions generated by array.h
+ void *array; // Array to sort
+ void *buffer; // Auxiliary buffer (required when radix-sorting)
+ uns num_elts; // Number of elements in the array
+ uns elt_size; // Bytes per element
+ uns hash_bits; // Remaining bits of the hash function
+ uns radix_bits; // How many bits to process in a single radix-sort pass
+ void (*quicksort)(void *array_ptr, uns num_elts);
+ void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
+ void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
+ void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
+
+ // Used internally by array.c
+ struct rs_work **rs_works;
+ struct work_queue *rs_work_queue;
+ struct eltpool *eltpool;
+
+ // Configured limits translated from bytes to elements
+ uns thread_threshold;
+ uns thread_chunk;
+ uns radix_threshold;
+};
+
+void asort_run(struct asort_context *ctx);
+void asort_start_threads(uns run);
+void asort_stop_threads(void);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Configuration
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/sorter/common.h"
+
+uns sorter_trace;
+uns sorter_trace_array;
+u64 sorter_bufsize = 65536;
+uns sorter_debug;
+uns sorter_min_radix_bits;
+uns sorter_max_radix_bits;
+uns sorter_add_radix_bits;
+uns sorter_min_multiway_bits;
+uns sorter_max_multiway_bits;
+uns sorter_threads;
+u64 sorter_thread_threshold = 1048576;
+u64 sorter_thread_chunk = 4096;
+u64 sorter_radix_threshold = 4096;
+struct fb_params sorter_fb_params;
+struct fb_params sorter_small_fb_params;
+u64 sorter_small_input;
+
+static struct cf_section sorter_config = {
+ CF_ITEMS {
+ CF_UNS("Trace", &sorter_trace),
+ CF_UNS("TraceArray", &sorter_trace_array),
+ CF_SECTION("FileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_SECTION("SmallFileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_U64("SmallInput", &sorter_small_input),
+ CF_U64("SortBuffer", &sorter_bufsize),
+ CF_UNS("Debug", &sorter_debug),
+ CF_UNS("MinRadixBits", &sorter_min_radix_bits),
+ CF_UNS("MaxRadixBits", &sorter_max_radix_bits),
+ CF_UNS("AddRadixBits", &sorter_add_radix_bits),
+ CF_UNS("MinMultiwayBits", &sorter_min_multiway_bits),
+ CF_UNS("MaxMultiwayBits", &sorter_max_multiway_bits),
+ CF_UNS("Threads", &sorter_threads),
+ CF_U64("ThreadThreshold", &sorter_thread_threshold),
+ CF_U64("ThreadChunk", &sorter_thread_chunk),
+ CF_U64("RadixThreshold", &sorter_radix_threshold),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR sorter_init_config(void)
+{
+ cf_declare_section("Sorter", &sorter_config, 0);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Governing Routines
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define F_BSIZE(b) stk_fsize(sbuck_size(b))
+
+static void
+sorter_start_timer(struct sort_context *ctx)
+{
+ init_timer(&ctx->start_time);
+}
+
+static void
+sorter_stop_timer(struct sort_context *ctx, uns *account_to)
+{
+ ctx->last_pass_time = get_timer(&ctx->start_time);
+ *account_to += ctx->last_pass_time;
+}
+
+static uns
+sorter_speed(struct sort_context *ctx, u64 size)
+{
+ if (!size)
+ return 0;
+ if (!ctx->last_pass_time)
+ return 0;
+ return (uns)((double)size / (1<<20) * 1000 / ctx->last_pass_time);
+}
+
+static int
+sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
+{
+ sorter_alloc_buf(ctx);
+ if (in->flags & SBF_CUSTOM_PRESORT)
+ {
+ /*
+ * The trick with automatic joining, which we use for the normal presorter,
+ * is not necessary with the custom presorter, because the custom presorter
+ * is never called in the middle of the sorted data.
+ */
+ struct fastbuf *f = sbuck_write(out);
+ out->runs++;
+ return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size);
+ }
+ return ctx->internal_sort(ctx, in, out, out_only);
+}
+
+static struct sort_bucket *
+sbuck_join_to(struct sort_bucket *b, sh_off_t *sizep)
+{
+ if (sorter_debug & SORT_DEBUG_NO_JOIN)
+ return NULL;
+
+ struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
+ if (!(out->flags & SBF_FINAL))
+ return NULL;
+ ASSERT(out->runs == 1);
+ *sizep = sbuck_size(out);
+ return out;
+}
+
+static sh_off_t
+sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, sh_off_t join_size)
+{
+ if (join && join->runs >= 2)
+ {
+ if (b)
+ sbuck_drop(b);
+ ASSERT(join->runs == 2);
+ join->runs--;
+ return sbuck_size(join) - join_size;
+ }
+ else if (b)
+ {
+ clist_insert_after(&b->n, list_pos);
+ return sbuck_size(b);
+ }
+ else
+ return 0;
+}
+
+static void
+sorter_join(struct sort_bucket *b)
+{
+ struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
+ ASSERT(join->flags & SBF_FINAL);
+ ASSERT(b->runs == 1);
+
+ if (!sbuck_has_file(join))
+ {
+ // The final bucket doesn't have any file associated yet, so replace
+ // it with the new bucket.
+ SORT_XTRACE(3, "Replaced final bucket");
+ b->flags |= SBF_FINAL;
+ sbuck_drop(join);
+ }
+ else
+ {
+ SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
+ struct fastbuf *src = sbuck_read(b);
+ struct fastbuf *dest = sbuck_write(join);
+ bbcopy(src, dest, ~0U);
+ sbuck_drop(b);
+ }
+}
+
+static void
+sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
+ cnode *list_pos = b->n.prev;
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+
+ if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
+ {
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ sorter_start_timer(ctx);
+ ins[0] = sbuck_new(ctx);
+ if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
+ {
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sh_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
+ SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 3), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(b);
+ return;
+ }
+
+ ins[1] = sbuck_new(ctx);
+ int i = 1;
+ while (sorter_presort(ctx, b, ins[i], ins[i]))
+ i = 1-i;
+ sbuck_drop(b);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ SORT_TRACE("Presorting pass (%d+%d runs, %s+%s, %dMB/s)",
+ ins[0]->runs, ins[1]->runs,
+ F_BSIZE(ins[0]), F_BSIZE(ins[1]),
+ sorter_speed(ctx, sbuck_size(ins[0]) + sbuck_size(ins[1])));
+ }
+ else
+ {
+ SORT_XTRACE(2, "Presorting disabled");
+ ins[0] = b;
+ }
+
+ SORT_XTRACE(3, "Main sorting");
+ uns pass = 0;
+ do {
+ ++pass;
+ sorter_start_timer(ctx);
+ if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
+ {
+ // This is guaranteed to produce a single run, so join if possible
+ outs[0] = join;
+ outs[1] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ sh_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ return;
+ }
+ outs[0] = sbuck_new(ctx);
+ outs[1] = sbuck_new(ctx);
+ outs[2] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s, %dMB/s)", pass,
+ outs[0]->runs, outs[1]->runs,
+ F_BSIZE(outs[0]), F_BSIZE(outs[1]),
+ sorter_speed(ctx, sbuck_size(outs[0]) + sbuck_size(outs[1])));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
+ } while (sbuck_have(ins[1]));
+
+ sbuck_drop(ins[1]);
+ clist_insert_after(&ins[0]->n, list_pos);
+}
+
+static void
+sorter_multiway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ clist parts;
+ cnode *list_pos = b->n.prev;
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+ uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 3;
+
+ clist_init(&parts);
+ ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ uns cont;
+ uns part_cnt = 0;
+ u64 total_size = 0;
+ sorter_start_timer(ctx);
+ do
+ {
+ struct sort_bucket *p = sbuck_new(ctx);
+ cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
+ if (sbuck_have(p))
+ {
+ part_cnt++;
+ clist_add_tail(&parts, &p->n);
+ total_size += sbuck_size(p);
+ sbuck_swap_out(p);
+ }
+ else
+ sbuck_drop(p);
+ }
+ while (cont);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sorter_free_buf(ctx);
+ sbuck_drop(b);
+
+ if (part_cnt <= 1)
+ {
+ sh_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
+ SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+
+ SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
+
+ uns max_ways = 1 << sorter_max_multiway_bits;
+ struct sort_bucket *ways[max_ways+1];
+ SORT_XTRACE(3, "Starting up to %d-way merge", max_ways);
+ for (;;)
+ {
+ uns n = 0;
+ struct sort_bucket *p;
+ while (n < max_ways && (p = clist_head(&parts)))
+ {
+ clist_remove(&p->n);
+ ways[n++] = p;
+ }
+ ways[n] = NULL;
+ ASSERT(n > 1);
+
+ struct sort_bucket *out;
+ if (clist_empty(&parts) && join)
+ out = join;
+ else
+ out = sbuck_new(ctx);
+ sorter_start_timer(ctx);
+ ctx->multiway_merge(ctx, ways, out);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+
+ for (uns i=0; i<n; i++)
+ sbuck_drop(ways[i]);
+
+ if (clist_empty(&parts))
+ {
+ sh_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
+ SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+ else
+ {
+ sbuck_swap_out(out);
+ clist_add_tail(&parts, &out->n);
+ SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
+ }
+ }
+}
+
+static void
+sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
+{
+ // Add more bits if requested and allowed.
+ bits = MIN(bits + sorter_add_radix_bits, sorter_max_radix_bits);
+
+ uns nbuck = 1 << bits;
+ SORT_XTRACE(3, "Running radix split on %s with hash %d bits of %d (expecting %s buckets)",
+ F_BSIZE(b), bits, b->hash_bits, stk_fsize(sbuck_size(b) / nbuck));
+ sorter_free_buf(ctx);
+ sorter_start_timer(ctx);
+
+ struct sort_bucket **outs = alloca(nbuck * sizeof(struct sort_bucket *));
+ for (uns i=nbuck; i--; )
+ {
+ outs[i] = sbuck_new(ctx);
+ outs[i]->hash_bits = b->hash_bits - bits;
+ clist_insert_after(&outs[i]->n, &b->n);
+ }
+
+ ctx->radix_split(ctx, b, outs, b->hash_bits - bits, bits);
+
+ u64 min = ~(u64)0, max = 0, sum = 0;
+ for (uns i=0; i<nbuck; i++)
+ {
+ u64 s = sbuck_size(outs[i]);
+ min = MIN(min, s);
+ max = MAX(max, s);
+ sum += s;
+ if (nbuck > 4)
+ sbuck_swap_out(outs[i]);
+ }
+
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Radix split (%d buckets, %s min, %s max, %s avg, %dMB/s)", nbuck,
+ stk_fsize(min), stk_fsize(max), stk_fsize(sum / nbuck), sorter_speed(ctx, sum));
+ sbuck_drop(b);
+}
+
+static void
+sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
+{
+ // Drop empty buckets
+ if (!sbuck_have(b))
+ {
+ SORT_XTRACE(4, "Dropping empty bucket");
+ sbuck_drop(b);
+ return;
+ }
+
+ // How many bits of bucket size we have to reduce before it fits in the RAM?
+ // (this is insanely large if the input size is unknown, but it serves our purpose)
+ u64 insize = sbuck_size(b);
+ u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // Magical factor accounting for various non-uniformities
+ uns bits = 0;
+ while ((insize >> bits) > mem)
+ bits++;
+
+ // Calculate the possibilities of radix splits
+ uns radix_bits;
+ if (!ctx->radix_split ||
+ (b->flags & SBF_CUSTOM_PRESORT) ||
+ (sorter_debug & SORT_DEBUG_NO_RADIX))
+ radix_bits = 0;
+ else
+ {
+ radix_bits = MIN(bits, b->hash_bits);
+ radix_bits = MIN(radix_bits, sorter_max_radix_bits);
+ if (radix_bits < sorter_min_radix_bits)
+ radix_bits = 0;
+ }
+
+ // The same for multi-way merges
+ uns multiway_bits;
+ if (!ctx->multiway_merge ||
+ (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
+ (sorter_debug & SORT_DEBUG_NO_PRESORT))
+ multiway_bits = 0;
+ else
+ {
+ multiway_bits = MIN(bits, sorter_max_multiway_bits);
+ if (multiway_bits < sorter_min_multiway_bits)
+ multiway_bits = 0;
+ }
+
+ SORT_XTRACE(3, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
+ stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
+ radix_bits, multiway_bits);
+
+ // If the input already consists of a single run, just join it
+ if (b->runs)
+ return sorter_join(b);
+
+ // If everything fits in memory, the 2-way strategy will sort it in memory
+ if (!bits)
+ return sorter_twoway(ctx, b);
+
+ // If we can reduce everything in one pass, do so and prefer radix splits
+ if (radix_bits == bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits == bits)
+ return sorter_multiway(ctx, b);
+
+ // Otherwise, reduce as much as possible and again prefer radix splits
+ if (radix_bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits)
+ return sorter_multiway(ctx, b);
+
+ // Fall back to 2-way strategy if nothing else applies
+ return sorter_twoway(ctx, b);
+}
+
+void
+sorter_run(struct sort_context *ctx)
+{
+ ctx->pool = mp_new(4096);
+ clist_init(&ctx->bucket_list);
+ sorter_prepare_buf(ctx);
+ asort_start_threads(0);
+
+ // Create bucket containing the source
+ struct sort_bucket *bin = sbuck_new(ctx);
+ bin->flags = SBF_SOURCE | SBF_OPEN_READ;
+ if (ctx->custom_presort)
+ bin->flags |= SBF_CUSTOM_PRESORT;
+ else
+ bin->fb = ctx->in_fb;
+ bin->ident = "in";
+ bin->size = ctx->in_size;
+ bin->hash_bits = ctx->hash_bits;
+ clist_add_tail(&ctx->bucket_list, &bin->n);
+ SORT_XTRACE(2, "Input size: %s, %d hash bits", F_BSIZE(bin), bin->hash_bits);
+ ctx->fb_params = (bin->size < sorter_small_input) ? &sorter_small_fb_params : &sorter_fb_params;
+
+ // Create bucket for the output
+ struct sort_bucket *bout = sbuck_new(ctx);
+ bout->flags = SBF_FINAL;
+ if (bout->fb = ctx->out_fb)
+ bout->flags |= SBF_OPEN_WRITE;
+ bout->ident = "out";
+ bout->runs = 1;
+ clist_add_head(&ctx->bucket_list, &bout->n);
+
+ // Repeatedly sort buckets
+ struct sort_bucket *b;
+ while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
+ sorter_decide(ctx, b);
+
+ asort_stop_threads();
+ sorter_free_buf(ctx);
+ sbuck_write(bout); // Force empty bucket to a file
+ SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
+ SORT_XTRACE(2, "Final timings: %.3fs external sorting, %.3fs presorting, %.3fs internal sorting",
+ ctx->total_ext_time/1000., ctx->total_pre_time/1000., ctx->total_int_time/1000.);
+ ctx->out_fb = sbuck_read(bout);
+ mp_delete(ctx->pool);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Fixed-Size Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(key)
+#define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0)
+#ifdef SORT_INTERNAL_RADIX
+# define ASORT_HASH(x) P(hash)(&(x))
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ * This is a more efficient implementation of the internal sorter,
+ * which runs under the following assumptions:
+ *
+ * - the keys have fixed (and small) size
+ * - no data are present after the key
+ * - unification does not require any workspace
+ */
+
+static size_t P(internal_workspace)(void)
+{
+ size_t workspace = 0;
+#ifdef SORT_UNIFY
+ workspace = sizeof(P(key) *);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ workspace = MAX(workspace, sizeof(P(key)));
+#endif
+ return workspace;
+}
+
+static uns P(internal_num_keys)(struct sort_context *ctx)
+{
+ size_t bufsize = ctx->big_buf_size;
+ size_t workspace = P(internal_workspace)();
+ if (workspace)
+ bufsize -= CPU_PAGE_SIZE;
+ u64 maxkeys = bufsize / (sizeof(P(key)) + workspace);
+ return MIN(maxkeys, ~0U); // The number of records must fit in uns
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) *buf = ctx->big_buf;
+ uns maxkeys = P(internal_num_keys)(ctx);
+
+ SORT_XTRACE(5, "s-fixint: Reading (maxkeys=%u, hash_bits=%d)", maxkeys, bin->hash_bits);
+ uns n = 0;
+ while (n < maxkeys && P(read_key)(in, &buf[n]))
+ n++;
+ if (!n)
+ return 0;
+ void *workspace UNUSED = ALIGN_PTR(&buf[n], CPU_PAGE_SIZE);
+
+ SORT_XTRACE(4, "s-fixint: Sorting %u items (%s items, %s workspace)",
+ n,
+ stk_fsize(n * sizeof(P(key))),
+ stk_fsize(n * P(internal_workspace)()));
+ timestamp_t timer;
+ init_timer(&timer);
+ buf = P(array_sort)(buf, n
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)buf != ctx->big_buf)
+ workspace = ctx->big_buf;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-fixint: Writing");
+ if (n < maxkeys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (uns i=0; i<n; i++)
+ {
+#ifdef SORT_UNIFY
+ if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
+ {
+ P(key) **keys = workspace;
+ uns n = 2;
+ keys[0] = &buf[i];
+ keys[1] = &buf[i+1];
+ while (!P(compare)(&buf[i], &buf[i+n]))
+ {
+ keys[n] = &buf[i+n];
+ n++;
+ }
+ P(write_merged)(out, keys, NULL, n, NULL);
+ merged += n - 1;
+ i += n - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(i == n-1 || P(compare)(&buf[i], &buf[i+1]) < 0);
+#endif
+ P(write_key)(out, &buf[i]);
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return (n == maxkeys);
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1; // -1 since if the buffer is full, we don't recognize EOF
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#ifdef SORT_INTERNAL_RADIX
+/* Keep copies of the items' hashes to save cache misses */
+#define SORT_COPY_HASH
+#endif
+
+typedef struct {
+ P(key) *key;
+#ifdef SORT_COPY_HASH
+ P(hash_t) hash;
+#endif
+} P(internal_item_t);
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(internal_item_t)
+#ifdef SORT_COPY_HASH
+# ifdef SORT_INT
+# define ASORT_LT(x,y) ((x).hash < (y).hash) // In this mode, the hash is the value
+# else
+# define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
+# endif
+#else
+# define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
+#endif
+#ifdef SORT_INTERNAL_RADIX
+# ifdef SORT_COPY_HASH
+# define ASORT_HASH(x) (x).hash
+# else
+# define ASORT_HASH(x) P(hash)((x).key)
+# endif
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ * The big_buf has the following layout:
+ *
+ * +-------------------------------------------------------------------------------+
+ * | array of internal_item's |
+ * +-------------------------------------------------------------------------------+
+ * | padding to make the following part page-aligned |
+ * +--------------------------------+----------------------------------------------+
+ * | shadow copy of item array | array of pointers to data for write_merged() |
+ * | used if radix-sorting +----------------------------------------------+
+ * | | workspace for write_merged() |
+ * +--------------------------------+----------------------------------------------+
+ * | +---------+ |
+ * | | key | |
+ * | +---------+ |
+ * | sequence of | padding | |
+ * | items +---------+ |
+ * | | data | |
+ * | +---------+ |
+ * | | padding | |
+ * | +---------+ |
+ * +-------------------------------------------------------------------------------+
+ *
+ * (the data which are in different columns are never accessed simultaneously,
+ * so we use a single buffer for both)
+ */
+
+static inline void *P(internal_get_data)(P(key) *key)
+{
+ uns ksize = SORT_KEY_SIZE(*key);
+#ifdef SORT_UNIFY
+ ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#endif
+ return (byte *) key + ksize;
+}
+
+static inline size_t P(internal_workspace)(P(key) *key UNUSED)
+{
+ size_t ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += SORT_UNIFY_WORKSPACE(*key);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ return ws;
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+
+ P(key) key, *keybuf = ctx->key_buf;
+ if (!keybuf)
+ keybuf = ctx->key_buf = sorter_alloc(ctx, sizeof(key));
+ if (ctx->more_keys)
+ {
+ key = *keybuf;
+ ctx->more_keys = 0;
+ }
+ else if (!P(read_key)(in, &key))
+ return 0;
+
+ size_t bufsize = ctx->big_buf_size;
+#ifdef SORT_VAR_DATA
+ if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
+ {
+ SORT_XTRACE(4, "s-internal: Generating a giant run");
+ struct fastbuf *out = sbuck_write(bout);
+ P(copy_data)(&key, in, out);
+ bout->runs++;
+ return 1; // We don't know, but 1 is always safe
+ }
+#endif
+
+ SORT_XTRACE(5, "s-internal: Reading");
+ P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
+ byte *end = (byte *) ctx->big_buf + bufsize;
+ size_t remains = bufsize - CPU_PAGE_SIZE;
+ do
+ {
+ uns ksize = SORT_KEY_SIZE(key);
+#ifdef SORT_UNIFY
+ uns ksize_aligned = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#else
+ uns ksize_aligned = ksize;
+#endif
+ uns dsize = SORT_DATA_SIZE(key);
+ uns recsize = ALIGN_TO(ksize_aligned + dsize, CPU_STRUCT_ALIGN);
+ size_t totalsize = recsize + sizeof(P(internal_item_t)) + P(internal_workspace)(&key);
+ if (unlikely(totalsize > remains
+#ifdef CPU_64BIT_POINTERS
+ || item >= item_array + ~0U // The number of items must fit in an uns
+#endif
+ ))
+ {
+ ctx->more_keys = 1;
+ *keybuf = key;
+ break;
+ }
+ remains -= totalsize;
+ end -= recsize;
+ memcpy(end, &key, ksize);
+#ifdef SORT_VAR_DATA
+ breadb(in, end + ksize_aligned, dsize);
+#endif
+ item->key = (P(key)*) end;
+#ifdef SORT_COPY_HASH
+ item->hash = P(hash)(item->key);
+#endif
+ item++;
+ }
+ while (P(read_key)(in, &key));
+ last_item = item;
+
+ uns count = last_item - item_array;
+ void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
+ SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
+ count,
+ stk_fsize((byte*)last_item - (byte*)item_array),
+ stk_fsize(end - (byte*)last_item - remains),
+ stk_fsize((byte*)ctx->big_buf + bufsize - end));
+ timestamp_t timer;
+ init_timer(&timer);
+ item_array = P(array_sort)(item_array, count
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)item_array != ctx->big_buf)
+ workspace = ctx->big_buf;
+ last_item = item_array + count;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-internal: Writing");
+ if (!ctx->more_keys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (item = item_array; item < last_item; item++)
+ {
+#ifdef SORT_UNIFY
+ if (item < last_item - 1 && !P(compare)(item->key, item[1].key))
+ {
+ // Rewrite the item structures with just pointers to keys and place
+ // pointers to data in the workspace.
+ P(key) **key_array = (void *) item;
+ void **data_array = workspace;
+ key_array[0] = item[0].key;
+ data_array[0] = P(internal_get_data)(key_array[0]);
+ uns cnt;
+ for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++)
+ {
+ key_array[cnt] = item[cnt].key;
+ data_array[cnt] = P(internal_get_data)(key_array[cnt]);
+ }
+ P(write_merged)(out, key_array, data_array, cnt, data_array+cnt);
+ item += cnt - 1;
+ merged += cnt - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0);
+#endif
+ P(write_key)(out, item->key);
+#ifdef SORT_VAR_DATA
+ bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key));
+#endif
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return ctx->more_keys;
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ // Most of this is just wild guesses
+#ifdef SORT_VAR_KEY
+ uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
+#else
+ uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
+#endif
+ uns ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += avg;
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ // We ignore the data part of records, it probably won't make the estimate much worse
+ return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
+}
+
+#undef SORT_COPY_HASH
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Multi-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * We use a binary tree to keep track of the current minimum. The tree is
+ * represented by an array (in the same way as binary heaps usually are),
+ * leaves correspond to input streams and each internal vertex remembers
+ * the leaf in its subtree, which has the lowest key.
+ */
+
+typedef struct P(mwt) {
+ int i; // Minimum of the subtree
+#ifdef SORT_UNIFY
+ int eq; // Did we encounter equality anywhere in the subtree?
+#endif
+} P(mwt);
+
+static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
+{
+ while (i /= 2)
+ {
+ if (tree[2*i].i < 0)
+ tree[i] = tree[2*i+1];
+ else if (tree[2*i+1].i < 0)
+ tree[i] = tree[2*i];
+ else
+ {
+ int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
+ tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
+#ifdef SORT_UNIFY
+ if (!cmp)
+ tree[i].eq = 1;
+#endif
+ }
+ /*
+ * It is very tempting to stop as soon as the current node does not
+ * change, but it is wrong, because even if the stream index stored in
+ * the tree is the same, the actual key value can differ.
+ */
+ }
+ /*
+ * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
+ * leading to an assumption that tree[1] does not change during this function.
+ * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
+ */
+ asm volatile ("" : : : "memory");
+}
+
+static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
+{
+ tree[i].i = val;
+ P(update_tree)(keys, tree, i);
+}
+
+static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
+{
+ uns num_ins = 0;
+ while (ins[num_ins])
+ num_ins++;
+
+ uns n2 = 1;
+ while (n2 < num_ins)
+ n2 *= 2;
+
+ struct fastbuf *fout = sbuck_write(out);
+ struct fastbuf *fins[num_ins];
+ P(key) keys[num_ins];
+ P(mwt) tree[2*n2];
+ for (uns i=1; i<2*n2; i++)
+ tree[i] = (P(mwt)) { .i = -1 };
+
+ for (uns i=0; i<num_ins; i++)
+ {
+ fins[i] = sbuck_read(ins[i]);
+ if (P(read_key)(fins[i], &keys[i]))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+
+#ifdef SORT_UNIFY
+
+ uns hits[num_ins];
+ P(key) *mkeys[num_ins], *key;
+ struct fastbuf *mfb[num_ins];
+
+ while (likely(tree[1].i >= 0))
+ {
+ int i = tree[1].i;
+ if (!tree[1].eq)
+ {
+ /* The key is unique, so let's go through the fast path */
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+ continue;
+ }
+
+ uns m = 0;
+ key = &keys[i];
+ do
+ {
+ hits[m] = i;
+ mkeys[m] = &keys[i];
+ mfb[m] = fins[i];
+ m++;
+ P(set_tree)(keys, tree, n2+i, -1);
+ i = tree[1].i;
+ if (unlikely(i < 0))
+ break;
+ }
+ while (!P(compare)(key, &keys[i]));
+
+ P(copy_merged)(mkeys, mfb, m, fout);
+
+ for (uns j=0; j<m; j++)
+ {
+ i = hits[j];
+ if (likely(P(read_key)(fins[i], &keys[i])))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+ }
+
+#else
+
+ /* Simplified version which does not support any unification */
+ while (likely(tree[1].i >= 0))
+ {
+ uns i = tree[1].i;
+ P(key) UNUSED key = keys[i];
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);
+#endif
+ }
+
+#endif
+
+ out->runs++;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Radix-Split Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <string.h>
+
+static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
+{
+ uns nbucks = 1 << numbits;
+ uns mask = nbucks - 1;
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) k;
+
+ struct fastbuf *outs[nbucks];
+ bzero(outs, sizeof(outs));
+
+ while (P(read_key)(in, &k))
+ {
+ P(hash_t) h = P(hash)(&k);
+ uns i = (h >> bitpos) & mask;
+ if (unlikely(!outs[i]))
+ outs[i] = sbuck_write(bouts[i]);
+ P(copy_data)(&k, in, outs[i]);
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Two-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
+{
+ struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
+ P(key) kbuf1, kbuf2, kbuf3, kbuf4;
+ P(key) *kin1 = &kbuf1, *kprev1 = &kbuf2, *kin2 = &kbuf3, *kprev2 = &kbuf4;
+ P(key) *kout = NULL, *ktmp;
+ int next1, next2, run1, run2;
+ int comp;
+ uns run_count = 0;
+
+ fin1 = sbuck_read(ins[0]);
+ next1 = P(read_key)(fin1, kin1);
+ if (sbuck_have(ins[1]))
+ {
+ fin2 = sbuck_read(ins[1]);
+ next2 = P(read_key)(fin2, kin2);
+ }
+ else
+ {
+ fin2 = NULL;
+ next2 = 0;
+ }
+ fout1 = fout2 = NULL;
+
+ run1 = next1, run2 = next2;
+ while (next1 || next2)
+ {
+ if (!run1)
+ comp = 1;
+ else if (!run2)
+ comp = -1;
+ else
+ comp = P(compare)(kin1, kin2);
+ ktmp = (comp <= 0) ? kin1 : kin2;
+ if (!kout || !(P(compare)(kout, ktmp) LESS 0))
+ {
+ SWAP(fout1, fout2, ftmp);
+ if (unlikely(!fout1))
+ {
+ if (!fout2)
+ fout1 = sbuck_write(outs[0]);
+ else if (outs[1])
+ fout1 = sbuck_write(outs[1]);
+ else
+ fout1 = fout2;
+ }
+ run_count++;
+ }
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(comp != 0);
+#endif
+ if (comp LESS 0)
+ {
+ P(copy_data)(kin1, fin1, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ kout = kprev1;
+ }
+#ifdef SORT_UNIFY
+ else if (comp == 0)
+ {
+ P(key) *mkeys[] = { kin1, kin2 };
+ struct fastbuf *mfb[] = { fin1, fin2 };
+ P(copy_merged)(mkeys, mfb, 2, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+#endif
+ else
+ {
+ P(copy_data)(kin2, fin2, fout1);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+ if (!run1 && !run2)
+ {
+ run1 = next1;
+ run2 = next2;
+ }
+ }
+
+ if (fout2 && fout2 != fout1)
+ outs[1]->runs += run_count / 2;
+ if (fout1)
+ outs[0]->runs += (run_count+1) / 2;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Operations on Contexts, Buffers and Buckets
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <fcntl.h>
+
+void *
+sorter_alloc(struct sort_context *ctx, uns size)
+{
+ return mp_alloc_zero(ctx->pool, size);
+}
+
+struct sort_bucket *
+sbuck_new(struct sort_context *ctx)
+{
+ struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
+ b->ctx = ctx;
+ return b;
+}
+
+void
+sbuck_drop(struct sort_bucket *b)
+{
+ if (b)
+ {
+ ASSERT(!(b->flags & SBF_DESTROYED));
+ if (b->n.prev)
+ clist_remove(&b->n);
+ bclose(b->fb);
+ bzero(b, sizeof(*b));
+ b->flags = SBF_DESTROYED;
+ }
+}
+
+sh_off_t
+sbuck_size(struct sort_bucket *b)
+{
+ if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
+ return btell(b->fb);
+ else
+ return b->size;
+}
+
+int
+sbuck_have(struct sort_bucket *b)
+{
+ return b && sbuck_size(b);
+}
+
+int
+sbuck_has_file(struct sort_bucket *b)
+{
+ return (b->fb || (b->flags & SBF_SWAPPED_OUT));
+}
+
+static void
+sbuck_swap_in(struct sort_bucket *b)
+{
+ if (b->flags & SBF_SWAPPED_OUT)
+ {
+ b->fb = bopen_file(b->filename, O_RDWR, b->ctx->fb_params);
+ if (b->flags & SBF_OPEN_WRITE)
+ bseek(b->fb, 0, SEEK_END);
+ if (!(sorter_debug & SORT_DEBUG_KEEP_BUCKETS))
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
+ b->flags &= ~SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped in %s", b->filename);
+ }
+}
+
+struct fastbuf *
+sbuck_read(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_READ)
+ return b->fb;
+ else if (b->flags & SBF_OPEN_WRITE)
+ {
+ b->size = btell(b->fb);
+ b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
+ brewind(b->fb);
+ return b->fb;
+ }
+ else
+ ASSERT(0);
+}
+
+struct fastbuf *
+sbuck_write(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_WRITE)
+ ASSERT(b->fb);
+ else
+ {
+ ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
+ b->fb = bopen_tmp_file(b->ctx->fb_params);
+ if (sorter_debug & SORT_DEBUG_KEEP_BUCKETS)
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ b->flags |= SBF_OPEN_WRITE;
+ b->filename = mp_strdup(b->ctx->pool, b->fb->name);
+ }
+ return b->fb;
+}
+
+void
+sbuck_swap_out(struct sort_bucket *b)
+{
+ if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb && !(b->flags & SBF_SOURCE))
+ {
+ if (b->flags & SBF_OPEN_WRITE)
+ b->size = btell(b->fb);
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ bclose(b->fb);
+ b->fb = NULL;
+ b->flags |= SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped out %s", b->filename);
+ }
+}
+
+void
+sorter_prepare_buf(struct sort_context *ctx)
+{
+ u64 bs = sorter_bufsize;
+ bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
+ bs = MAX(bs, 2*(u64)CPU_PAGE_SIZE);
+ ctx->big_buf_size = bs;
+}
+
+void
+sorter_alloc_buf(struct sort_context *ctx)
+{
+ if (ctx->big_buf)
+ return;
+ ctx->big_buf = big_alloc(ctx->big_buf_size);
+ SORT_XTRACE(3, "Allocated sorting buffer (%s)", stk_fsize(ctx->big_buf_size));
+}
+
+void
+sorter_free_buf(struct sort_context *ctx)
+{
+ if (!ctx->big_buf)
+ return;
+ big_free(ctx->big_buf, ctx->big_buf_size);
+ ctx->big_buf = NULL;
+ SORT_XTRACE(3, "Freed sorting buffer");
+}
--- /dev/null
+/*
+ * UCW Library -- Testing the Sorter
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/hashfunc.h"
+#include "lib/md5.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** A hack for overriding radix-sorter configuration ***/
+
+#ifdef FORCE_RADIX_BITS
+#undef CONFIG_UCW_RADIX_SORTER_BITS
+#define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
+#endif
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+static uns test_id;
+
+static void
+start(void)
+{
+ sync();
+ init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+ sync();
+ msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+ u32 x;
+};
+
+#define SORT_KEY_REGULAR struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT(k) (k).x
+#define SORT_DELETE_INPUT 0
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+ uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+ brewind(f);
+
+ start();
+ f = s1_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ }
+ bclose(f);
+}
+
+/*** Integers with merging, but no data ***/
+
+struct key2 {
+ u32 x;
+ u32 cnt;
+};
+
+static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
+{
+ for (uns i=1; i<n; i++)
+ k[0]->cnt += k[i]->cnt;
+ bwrite(f, k[0], sizeof(struct key2));
+}
+
+#define SORT_KEY_REGULAR struct key2
+#define SORT_PREFIX(x) s2_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIFY
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_counted(int mode, u64 size)
+{
+ u64 items = size / sizeof(struct key2);
+ uns mult = 2;
+ while (items/(2*mult) > 0xffff0000)
+ mult++;
+ uns N = items ? nextprime(items/(2*mult)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns m=0; m<mult; m++)
+ for (uns i=0; i<N; i++)
+ for (uns j=0; j<2; j++)
+ {
+ bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+ bputl(f, 1);
+ }
+ brewind(f);
+
+ start();
+ f = s2_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ uns k = bgetl(f);
+ if (k != 2*mult)
+ die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
+ }
+ bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+ u32 hash[4];
+ u32 i;
+ u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+ COMPARE(x->hash[0], y->hash[0]);
+ COMPARE(x->hash[1], y->hash[1]);
+ COMPARE(x->hash[2], y->hash[2]);
+ COMPARE(x->hash[3], y->hash[3]);
+ return 0;
+}
+
+static inline uns s3_hash(struct key3 *x)
+{
+ return x->hash[0];
+}
+
+#define SORT_KEY_REGULAR struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_HASH_BITS 32
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+ k->i = i;
+ k->payload[0] = 7*i + 13;
+ k->payload[1] = 13*i + 19;
+ k->payload[2] = 19*i + 7;
+ switch (mode)
+ {
+ case 0:
+ k->hash[0] = i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ case 1:
+ k->hash[0] = ~i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ default: ;
+ struct MD5Context ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (byte*) &k->i, 4);
+ MD5Final((byte*) &k->hash, &ctx);
+ break;
+ }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ struct key3 k, lastk;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ uns hash_sum = 0;
+ for (uns i=0; i<N; i++)
+ {
+ gen_hash_key(mode, &k, i);
+ hash_sum += k.hash[3];
+ bwrite(f, &k, sizeof(k));
+ }
+ brewind(f);
+
+ start();
+ f = s3_sort(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ if (i && s3_compare(&k, &lastk) <= 0)
+ ASSERT(0);
+ gen_hash_key(mode, &lastk, k.i);
+ if (memcmp(&k, &lastk, sizeof(k)))
+ ASSERT(0);
+ hash_sum -= k.hash[3];
+ }
+ ASSERT(!hash_sum);
+ bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+ uns len;
+ byte s[KEY4_MAX];
+};
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+ uns l = MIN(x->len, y->len);
+ int c = memcmp(x->s, y->s, l);
+ if (c)
+ return c;
+ COMPARE(x->len, y->len);
+ return 0;
+}
+
+static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
+{
+ x->len = bgetl(f);
+ if (x->len == 0xffffffff)
+ return 0;
+ ASSERT(x->len < KEY4_MAX);
+ breadb(f, x->s, x->len);
+ return 1;
+}
+
+static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
+{
+ ASSERT(x->len < KEY4_MAX);
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_read_key s4_read_key
+#define s4b_write_key s4_write_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+ return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_DATA_SIZE(x) s4_data_size(&(x))
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+ k->len = random_max(KEY4_MAX);
+ for (uns i=0; i<k->len; i++)
+ k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+ while (len--)
+ {
+ *buf++ = h >> 24;
+ h = h*259309 + 17;
+ }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+ srand(1);
+
+ struct key4 k, lastk;
+ byte buf[256], buf2[256];
+ uns sum = 0;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ {
+ gen_key4(&k);
+ s4_write_key(f, &k);
+ uns h = hash_block(k.s, k.len);
+ sum += h;
+ if (mode)
+ {
+ gen_data4(buf, s4_data_size(&k), h);
+ bwrite(f, buf, s4_data_size(&k));
+ }
+ }
+ brewind(f);
+
+ start();
+ f = (mode ? s4b_sort : s4_sort)(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = s4_read_key(f, &k);
+ ASSERT(ok);
+ uns h = hash_block(k.s, k.len);
+ if (mode && s4_data_size(&k))
+ {
+ ok = breadb(f, buf, s4_data_size(&k));
+ ASSERT(ok);
+ gen_data4(buf2, s4_data_size(&k), h);
+ ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+ }
+ if (i && s4_compare(&k, &lastk) < 0)
+ ASSERT(0);
+ sum -= h;
+ lastk = k;
+ }
+ ASSERT(!sum);
+ bclose(f);
+}
+
+/*** Graph-like structure with custom presorting ***/
+
+struct key5 {
+ u32 x;
+ u32 cnt;
+};
+
+static uns s5_N, s5_K, s5_L, s5_i, s5_j;
+
+struct s5_pair {
+ uns x, y;
+};
+
+static int s5_gen(struct s5_pair *p)
+{
+ if (s5_j >= s5_N)
+ {
+ if (!s5_N || s5_i >= s5_N-1)
+ return 0;
+ s5_j = 0;
+ s5_i++;
+ }
+ p->x = ((u64)s5_j * s5_K) % s5_N;
+ p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
+ s5_j++;
+ return 1;
+}
+
+#define ASORT_PREFIX(x) s5m_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) ary[i]
+#define ASORT_EXTRA_ARGS , u32 *ary
+#include "lib/arraysort.h"
+
+static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
+{
+ u32 *a = buf;
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ memcpy(&a[m], data[i], 4*keys[i]->cnt);
+ m += keys[i]->cnt;
+ }
+ s5m_sort(m, a);
+ keys[0]->cnt = m;
+ bwrite(f, keys[0], sizeof(struct key5));
+ bwrite(f, a, 4*m);
+}
+
+static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+{
+ u32 k[n];
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ k[i] = bgetl(data[i]);
+ m += keys[i]->cnt;
+ }
+ struct key5 key = { .x = keys[0]->x, .cnt = m };
+ bwrite(dest, &key, sizeof(key));
+ while (key.cnt--)
+ {
+ uns b = 0;
+ for (uns i=1; i<n; i++)
+ if (k[i] < k[b])
+ b = i;
+ bputl(dest, k[b]);
+ if (--keys[b]->cnt)
+ k[b] = bgetl(data[b]);
+ else
+ k[b] = ~0U;
+ }
+}
+
+static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
+{
+ COMPARE_LT(x.x, y.x);
+ COMPARE_LT(x.y, y.y);
+ return 0;
+}
+
+#define ASORT_PREFIX(x) s5p_##x
+#define ASORT_KEY_TYPE struct s5_pair
+#define ASORT_LT(x,y) s5p_lt(x,y)
+#include "lib/sorter/array.h"
+
+static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+{
+ uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
+ struct s5_pair *a = buf;
+ uns n = 0;
+ while (n<max && s5_gen(&a[n]))
+ n++;
+ if (!n)
+ return 0;
+ s5p_sort(a, n);
+ uns i = 0;
+ while (i < n)
+ {
+ uns j = i;
+ while (i < n && a[i].x == a[j].x)
+ i++;
+ struct key5 k = { .x = a[j].x, .cnt = i-j };
+ bwrite(dest, &k, sizeof(k));
+ while (j < i)
+ bputl(dest, a[j++].y);
+ }
+ return 1;
+}
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_PRESORT
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5b_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+#define s5b_write_merged s5_write_merged
+#define s5b_copy_merged s5_copy_merged
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_graph(uns mode, u64 size)
+{
+ uns N = 3;
+ while ((u64)N*(N+2)*4 < size)
+ N = nextprime(N);
+ if (!size)
+ N = 0;
+ msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
+ s5_N = N;
+ s5_K = N/4*3;
+ s5_L = N/3*2;
+ s5_i = s5_j = 0;
+
+ struct fastbuf *in = NULL;
+ if (mode)
+ {
+ struct s5_pair p;
+ in = bopen_tmp(65536);
+ while (s5_gen(&p))
+ {
+ struct key5 k = { .x = p.x, .cnt = 1 };
+ bwrite(in, &k, sizeof(k));
+ bputl(in, p.y);
+ }
+ brewind(in);
+ }
+
+ start();
+ struct fastbuf *f = bopen_tmp(65536);
+ bputl(f, 0xfeedcafe);
+ struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
+ ASSERT(f == g);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ uns c = bgetl(f);
+ ASSERT(c == 0xfeedcafe);
+ for (uns i=0; i<N; i++)
+ {
+ struct key5 k;
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ ASSERT(k.x == i);
+ ASSERT(k.cnt == N);
+ for (uns j=0; j<N; j++)
+ {
+ uns y = bgetl(f);
+ ASSERT(y == j);
+ }
+ }
+ bclose(f);
+}
+
+/*** Simple 8-byte integer keys ***/
+
+struct key6 {
+ u64 x;
+};
+
+#define SORT_KEY_REGULAR struct key6
+#define SORT_PREFIX(x) s6_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT64(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int64(int mode, u64 size)
+{
+ u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
+ u64 K = N/4*3;
+ msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (u64 i=0; i<N; i++)
+ bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
+ brewind(f);
+
+ start();
+ f = s6_sort(f, NULL, 777777*(N-1));
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (u64 i=0; i<N; i++)
+ {
+ u64 j = bgetq(f);
+ if (777777*i != j)
+ die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
+ }
+ bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+ test_id = i;
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ test_counted(0, size); break;
+ case 4:
+ test_counted(1, size); break;
+ case 5:
+ test_counted(2, size); break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+ case 11:
+ test_graph(0, size); break;
+ case 12:
+ test_graph(1, size); break;
+ case 13:
+ test_int64(0, size); break;
+ case 14:
+ test_int64(1, size); break;
+ case 15:
+ test_int64(2, size); break;
+#define TMAX 16
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ log_init(NULL);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 'd':
+ sorter_debug = atol(optarg);
+ break;
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ {
+ char *w[32];
+ int f = sepsplit(optarg, ',', w, ARRAY_SIZE(w));
+ if (f < 0)
+ goto usage;
+ t = 0;
+ for (int i=0; i<f; i++)
+ {
+ int j = atol(w[i]);
+ if (j >= TMAX)
+ goto usage;
+ t |= 1 << j;
+ }
+ }
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ for (uns i=0; i<TMAX; i++)
+ if (t & (1 << i))
+ run_test(i, size);
+
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter
+ *
+ * (c) 2001--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, but a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates a file sorter
+ * with the parameters given.
+ *
+ * The sorter operates on fastbufs containing sequences of items. Each item
+ * consists of a key, optionally followed by data. The keys are represented
+ * by fixed-size structures of type SORT_KEY internally, if this format differs
+ * from the on-disk format, explicit reading and writing routines can be provided.
+ * The data are always copied verbatim, unless the sorter is in the merging
+ * mode in which it calls callbacks for merging of items with equal keys.
+ *
+ * All callbacks must be thread-safe.
+ *
+ * Basic parameters and callbacks:
+ *
+ * SORT_PREFIX(x) add a name prefix (used on all global names defined by the sorter)
+ *
+ * SORT_KEY data type capable of holding a single key in memory (the on-disk
+ * representation can be different). Alternatively, you can use:
+ * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
+ * in this case, bread() and bwrite() is used to read/write keys
+ * and it's also assumed that the keys are not very long.
+ * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
+ * compares two keys, returns result like strcmp(). Mandatory.
+ * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
+ * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
+ * Mandatory unless SORT_KEY_REGULAR is defined.
+ * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
+ * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
+ *
+ * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
+ * can be truncated to this number of bytes without any harm;
+ * used to save memory when the keys have variable sizes).
+ * Default: always store the whole SORT_KEY.
+ * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
+ * Default: records consist of keys only.
+ *
+ * Integer sorting:
+ *
+ * SORT_INT(key) we are sorting by an integer value returned by this macro.
+ * In this mode, PREFIX_compare is supplied automatically and the sorting
+ * function gets an extra parameter specifying the range of the integers.
+ * The better the range fits, the faster we sort.
+ * Sets up SORT_HASH_xxx automatically.
+ * SORT_INT64(key) the same for 64-bit integers.
+ *
+ * Hashing (optional, but it can speed sorting up):
+ *
+ * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
+ * bits is available. A monotone hash is a function f from keys to integers
+ * such that f(x) < f(y) implies x < y, which is approximately uniformly
+ * distributed. It should be declared as:
+ * uns PREFIX_hash(SORT_KEY *a)
+ *
+ * Unification:
+ *
+ * SORT_UNIFY merge items with identical keys. It requires the following functions:
+ * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
+ * takes n records in memory with keys which compare equal and writes
+ * a single record to the given fastbuf. `buf' points to a buffer which
+ * is guaranteed to hold the sum of workspace requirements (see below)
+ * over all given records. The function is allowed to modify all its inputs.
+ * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+ * takes n records with keys in memory and data in fastbufs and writes
+ * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
+ * is defined.
+ * SORT_UNIFY_WORKSPACE(key)
+ * gets a key and returns the amount of workspace required when merging
+ * the given record. Defaults to 0.
+ *
+ * Input (choose one of these):
+ *
+ * SORT_INPUT_FILE file of a given name
+ * SORT_INPUT_FB seekable fastbuf stream
+ * SORT_INPUT_PIPE non-seekable fastbuf stream
+ * SORT_INPUT_PRESORT custom presorter. Calls function
+ * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+ * to get successive batches of pre-sorted data.
+ * The function is passed a page-aligned presorting buffer.
+ * It returns 1 on success or 0 on EOF.
+ * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
+ * as soon as possible.
+ *
+ * Output (chose one of these):
+ *
+ * SORT_OUTPUT_FILE file of a given name
+ * SORT_OUTPUT_FB temporary fastbuf stream
+ * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain some data
+ *
+ * Other switches:
+ *
+ * SORT_UNIQUE all items have distinct keys (checked in debug mode)
+ *
+ * The function generated:
+ *
+ * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
+ * <in> = input file name/fastbuf or NULL
+ * <out> = output file name/fastbuf or NULL
+ * <range> = maximum integer value for the SORT_INT mode
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#include "lib/sorter/common.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+
+#define P(x) SORT_PREFIX(x)
+
+#ifdef SORT_KEY_REGULAR
+typedef SORT_KEY_REGULAR P(key);
+static inline int P(read_key) (struct fastbuf *f, P(key) *k)
+{
+ return breadb(f, k, sizeof(P(key)));
+}
+static inline void P(write_key) (struct fastbuf *f, P(key) *k)
+{
+ bwrite(f, k, sizeof(P(key)));
+}
+#elif defined(SORT_KEY)
+typedef SORT_KEY P(key);
+#else
+#error Missing definition of sorting key.
+#endif
+
+#ifdef SORT_INT64
+typedef u64 P(hash_t);
+#define SORT_INT SORT_INT64
+#define SORT_LONG_HASH
+#else
+typedef uns P(hash_t);
+#endif
+
+#ifdef SORT_INT
+static inline int P(compare) (P(key) *x, P(key) *y)
+{
+ if (SORT_INT(*x) < SORT_INT(*y))
+ return -1;
+ if (SORT_INT(*x) > SORT_INT(*y))
+ return 1;
+ return 0;
+}
+
+#ifndef SORT_HASH_BITS
+static inline P(hash_t) P(hash) (P(key) *x)
+{
+ return SORT_INT((*x));
+}
+#endif
+#endif
+
+#ifdef SORT_UNIFY
+#define LESS <
+#else
+#define LESS <=
+#endif
+#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
+
+#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
+#define SORT_ASSERT_UNIQUE
+#endif
+
+#ifdef SORT_KEY_SIZE
+#define SORT_VAR_KEY
+#else
+#define SORT_KEY_SIZE(key) sizeof(key)
+#endif
+
+#ifdef SORT_DATA_SIZE
+#define SORT_VAR_DATA
+#else
+#define SORT_DATA_SIZE(key) 0
+#endif
+
+static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
+{
+ P(write_key)(out, key);
+#ifdef SORT_VAR_DATA
+ bbcopy(in, out, SORT_DATA_SIZE(*key));
+#else
+ (void) in;
+#endif
+}
+
+#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
+static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
+{
+ P(write_merged)(dest, keys, NULL, n, NULL);
+}
+#endif
+
+#if defined(SORT_HASH_BITS) || defined(SORT_INT)
+#define SORT_INTERNAL_RADIX
+#include "lib/sorter/s-radix.h"
+#endif
+
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
+#include "lib/sorter/s-internal.h"
+#else
+#include "lib/sorter/s-fixint.h"
+#endif
+
+#include "lib/sorter/s-twoway.h"
+#include "lib/sorter/s-multiway.h"
+
+static struct fastbuf *P(sort)(
+#ifdef SORT_INPUT_FILE
+ byte *in,
+#else
+ struct fastbuf *in,
+#endif
+#ifdef SORT_OUTPUT_FILE
+ byte *out
+#else
+ struct fastbuf *out
+#endif
+#ifdef SORT_INT
+ , u64 int_range
+#endif
+ )
+{
+ struct sort_context ctx;
+ bzero(&ctx, sizeof(ctx));
+
+#ifdef SORT_INPUT_FILE
+ ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
+ ctx.in_size = bfilesize(ctx.in_fb);
+#elif defined(SORT_INPUT_FB)
+ ctx.in_fb = in;
+ ctx.in_size = bfilesize(in);
+#elif defined(SORT_INPUT_PIPE)
+ ctx.in_fb = in;
+ ctx.in_size = ~(u64)0;
+#elif defined(SORT_INPUT_PRESORT)
+ ASSERT(!in);
+ ctx.custom_presort = P(presort);
+ ctx.in_size = ~(u64)0;
+#else
+#error No input given.
+#endif
+#ifdef SORT_DELETE_INPUT
+ if (SORT_DELETE_INPUT)
+ bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
+#endif
+
+#ifdef SORT_OUTPUT_FB
+ ASSERT(!out);
+#elif defined(SORT_OUTPUT_THIS_FB)
+ ctx.out_fb = out;
+#elif defined(SORT_OUTPUT_FILE)
+ /* Just assume fastbuf output and rename the fastbuf later */
+#else
+#error No output given.
+#endif
+
+#ifdef SORT_HASH_BITS
+ ctx.hash_bits = SORT_HASH_BITS;
+ ctx.radix_split = P(radix_split);
+#elif defined(SORT_INT)
+ ctx.hash_bits = 0;
+ while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
+ ctx.hash_bits++;
+ ctx.radix_split = P(radix_split);
+#endif
+
+ ctx.internal_sort = P(internal);
+ ctx.internal_estimate = P(internal_estimate);
+ ctx.twoway_merge = P(twoway_merge);
+ ctx.multiway_merge = P(multiway_merge);
+
+ sorter_run(&ctx);
+
+#ifdef SORT_OUTPUT_FILE
+ bfix_tmp_file(ctx.out_fb, out);
+ ctx.out_fb = NULL;
+#endif
+ return ctx.out_fb;
+}
+
+#undef SORT_ASSERT_UNIQUE
+#undef SORT_DATA_SIZE
+#undef SORT_DELETE_INPUT
+#undef SORT_HASH_BITS
+#undef SORT_INPUT_FB
+#undef SORT_INPUT_FILE
+#undef SORT_INPUT_PIPE
+#undef SORT_INPUT_PRESORT
+#undef SORT_INT
+#undef SORT_INT64
+#undef SORT_INTERNAL_RADIX
+#undef SORT_KEY
+#undef SORT_KEY_REGULAR
+#undef SORT_KEY_SIZE
+#undef SORT_LONG_HASH
+#undef SORT_OUTPUT_FB
+#undef SORT_OUTPUT_FILE
+#undef SORT_OUTPUT_THIS_FB
+#undef SORT_PREFIX
+#undef SORT_UNIFY
+#undef SORT_UNIFY_WORKSPACE
+#undef SORT_UNIQUE
+#undef SORT_VAR_DATA
+#undef SORT_VAR_KEY
+#undef SWAP
+#undef LESS
+#undef P
--- /dev/null
+#include "lib/lib.h"
+#include "lib/stkstring.h"
+
+#include <stdio.h>
+
+uns
+stk_array_len(char **s, uns cnt)
+{
+ uns l = 1;
+ while (cnt--)
+ l += strlen(*s++);
+ return l;
+}
+
+void
+stk_array_join(char *x, char **s, uns cnt, uns sep)
+{
+ while (cnt--)
+ {
+ uns l = strlen(*s);
+ memcpy(x, *s, l);
+ x += l;
+ s++;
+ if (sep && cnt)
+ *x++ = sep;
+ }
+ *x = 0;
+}
+
+uns
+stk_printf_internal(const char *fmt, ...)
+{
+ uns len = 256;
+ char *buf = alloca(len);
+ va_list args, args2;
+ va_start(args, fmt);
+ for (;;)
+ {
+ va_copy(args2, args);
+ int l = vsnprintf(buf, len, fmt, args2);
+ va_end(args2);
+ if (l < 0)
+ len *= 2;
+ else
+ {
+ va_end(args);
+ return l+1;
+ }
+ buf = alloca(len);
+ }
+}
+
+uns
+stk_vprintf_internal(const char *fmt, va_list args)
+{
+ uns len = 256;
+ char *buf = alloca(len);
+ va_list args2;
+ for (;;)
+ {
+ va_copy(args2, args);
+ int l = vsnprintf(buf, len, fmt, args2);
+ va_end(args2);
+ if (l < 0)
+ len *= 2;
+ else
+ {
+ va_end(args);
+ return l+1;
+ }
+ buf = alloca(len);
+ }
+}
+
+void
+stk_hexdump_internal(char *dst, const byte *src, uns n)
+{
+ for (uns i=0; i<n; i++)
+ {
+ if (i)
+ *dst++ = ' ';
+ dst += sprintf(dst, "%02x", *src++);
+ }
+ *dst = 0;
+}
+
+void
+stk_fsize_internal(char *buf, u64 x)
+{
+ if (x < 1<<10)
+ sprintf(buf, "%dB", (int)x);
+ else if (x < 10<<10)
+ sprintf(buf, "%.1fK", (double)x/(1<<10));
+ else if (x < 1<<20)
+ sprintf(buf, "%dK", (int)(x/(1<<10)));
+ else if (x < 10<<20)
+ sprintf(buf, "%.1fM", (double)x/(1<<20));
+ else if (x < 1<<30)
+ sprintf(buf, "%dM", (int)(x/(1<<20)));
+ else if (x < (u64)10<<30)
+ sprintf(buf, "%.1fG", (double)x/(1<<30));
+ else if (x != ~(u64)0)
+ sprintf(buf, "%dG", (int)(x/(1<<30)));
+ else
+ strcpy(buf, "unknown");
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ char *a = stk_strndup("are!",3);
+ a = stk_strcat(a, " the ");
+ a = stk_strmulticat(a, stk_strdup("Jabberwock, "), "my", NULL);
+ char *arr[] = { a, " son" };
+ a = stk_strarraycat(arr, 2);
+ a = stk_printf("Bew%s!", a);
+ puts(a);
+ puts(stk_hexdump(a, 3));
+ char *ary[] = { "The", "jaws", "that", "bite" };
+ puts(stk_strjoin(ary, 4, ' '));
+ puts(stk_fsize(1234567));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Strings Allocated on the Stack
+ *
+ * (c) 2005--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_STKSTRING_H
+#define _UCW_STKSTRING_H
+
+#include <alloca.h>
+#include <string.h>
+#include <stdio.h>
+
+#define stk_strdup(s) ({ const char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
+#define stk_strndup(s,n) ({ const char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
+#define stk_strcat(s1,s2) ({ const char *_s1=(s1); const char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
+#define stk_strmulticat(s...) ({ char *_s[]={s}; char *_x=alloca(stk_array_len(_s, ARRAY_SIZE(_s)-1)); stk_array_join(_x, _s, ARRAY_SIZE(_s)-1, 0); _x; })
+#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
+#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
+#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
+#define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
+#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(char*)(s),_n); _x; })
+#define stk_str_unesc(s) ({ const char *_s=(s); char *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
+
+uns stk_array_len(char **s, uns cnt);
+void stk_array_join(char *x, char **s, uns cnt, uns sep);
+uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
+uns stk_vprintf_internal(const char *x, va_list args);
+void stk_hexdump_internal(char *dst, const byte *src, uns n);
+void stk_fsize_internal(char *dst, u64 size);
+
+#endif
--- /dev/null
+# Tests for stkstring modules
+
+Run: ../obj/lib/stkstring-t
+Out: Beware the Jabberwock, my son!
+ 42 65 77
+ The jaws that bite
+ 1.2M
--- /dev/null
+/*
+ * Checking the correctness of str_len() and hash_*() and proving, that
+ * it is faster than the classical version ;-)
+ */
+
+#include "lib/hashfunc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+/* It will be divided by (10 + strlen()). */
+#define TEST_TIME 1000000
+
+/* The shift of the string according to the alignment. */
+static uns alignment = 0;
+
+static void
+random_string(byte *str, int len)
+{
+ int i;
+ for (i=0; i<len; i++)
+ str[i] = random() % 255 + 1;
+ str[len] = 0;
+}
+
+static uns
+elapsed_time(void)
+{
+ static struct timeval last_tv, tv;
+ uns elapsed;
+ gettimeofday(&tv, NULL);
+ elapsed = (tv.tv_sec - last_tv.tv_sec) * 1000000 + (tv.tv_usec - last_tv.tv_usec);
+ last_tv = tv;
+ return elapsed;
+}
+
+int
+main(int argc, char **argv)
+{
+ byte *strings[] = {
+ "",
+ "a",
+ "aa",
+ "aaa",
+ "aaaa",
+ "aaaaa",
+ "aaaaaa",
+ "aaaaaaa",
+ "aaaaaaaa",
+ "aaaaaaaaa",
+ "aaaaaaaaaa",
+ "AHOJ",
+ "\200aaaa",
+ "\200",
+ "\200\200",
+ "\200\200\200",
+ "\200\200\200\200",
+ "\200\200\200\200\200",
+ "kelapS treboR",
+ "Robert Spalek",
+ "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu",
+ "********************************",
+ "****************************************************************",
+ NULL
+ };
+ int lengths[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 30, 40, 50, 60, 70, 80, 90, 100,
+ 200, 300, 400, 500, 600, 700, 800, 900, 1000,
+ 2000, 4000, 8000, 16000, 32000, 64000,
+ -1
+ };
+ int i;
+ if (argc > 1)
+ alignment = atoi(argv[1]);
+ printf("Alignment set to %d\n", alignment);
+ for (i=0; strings[i]; i++)
+ if (strlen(strings[i]) != str_len(strings[i]))
+ die("Internal str_len() error on string %d", i);
+ printf("%d strings tested OK\n", i);
+ for (i=0; strings[i]; i++)
+ {
+ uns h1, h2;
+ h1 = hash_string(strings[i]);
+ h2 = hash_string_nocase(strings[i]);
+ if (h1 != hash_block(strings[i], str_len(strings[i])))
+ die("Internal hash_string() error on string %d", i);
+ printf("hash %2d = %08x %08x", i, h1, h2);
+ if (h1 == h2)
+ printf(" upper case?");
+ printf("\n");
+ }
+ for (i=0; lengths[i] >= 0; i++)
+ {
+ byte str[lengths[i] + 1 + alignment];
+ uns count = TEST_TIME / (lengths[i] + 10);
+ uns el1 = 0, el2 = 0, elh = 0, elhn = 0;
+ uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0;
+ uns j;
+ for (j=0; j<count; j++)
+ {
+ random_string(str + alignment, lengths[i]);
+ elapsed_time();
+ /* Avoid "optimizing" by gcc, since the functions are
+ * attributed PURE. */
+ tot1 += strlen(str + alignment);
+ el1 += elapsed_time();
+ tot2 += str_len(str + alignment);
+ el2 += elapsed_time();
+ hash ^= hash_string(str + alignment);
+ elh += elapsed_time();
+ hashn ^= hash_string_nocase(str + alignment);
+ elhn += elapsed_time();
+ }
+ if (tot1 != tot2)
+ die("Internal error during test %d", i);
+ printf("Test %d: strlen = %d, passes = %d, classical = %d usec, speedup = %.4f\n",
+ i, lengths[i], count, el1, (el1 + 0.) / el2);
+ printf("\t\t total hash = %08x/%08x, hash time = %d/%d usec\n", hash, hashn, elh, elhn);
+ }
+/*
+ printf("test1: %d\n", hash_modify(10000000, 10000000, 99777555));
+ printf("test1: %d, %d\n", i, hash_modify(i, lengths[i-2], 99777333));
+ printf("test1: %d, %d\n", i, hash_modify(lengths[i-2], i, 99777333));
+ printf("test1: %d,%d,%d->%d\n", i, i*3-2, i*i, hash_modify(4587, i*3-2, i*i));
+ printf("test1: %d\n", hash_modify(lengths[5], 345, i));
+*/
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Character Classes
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_cat[256] = {
+#define CHAR(code,upper,lower,cat) cat,
+#include "lib/charmap.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- Lowercase Map
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_lower[256] = {
+#define CHAR(code,upper,lower,cat) lower,
+#include "lib/charmap.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- Uppercase Map
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_upper[256] = {
+#define CHAR(code,upper,lower,cat) upper,
+#include "lib/charmap.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- String Routines
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+#include <stdlib.h>
+
+/* Expands C99-like escape sequences.
+ * It is safe to use the same buffer for both input and output. */
+char *
+str_unesc(char *d, const char *s)
+{
+ while (*s)
+ {
+ if (*s == '\\')
+ switch (s[1])
+ {
+ case 'a': *d++ = '\a'; s += 2; break;
+ case 'b': *d++ = '\b'; s += 2; break;
+ case 'f': *d++ = '\f'; s += 2; break;
+ case 'n': *d++ = '\n'; s += 2; break;
+ case 'r': *d++ = '\r'; s += 2; break;
+ case 't': *d++ = '\t'; s += 2; break;
+ case 'v': *d++ = '\v'; s += 2; break;
+ case '\?': *d++ = '\?'; s += 2; break;
+ case '\'': *d++ = '\''; s += 2; break;
+ case '\"': *d++ = '\"'; s += 2; break;
+ case '\\': *d++ = '\\'; s += 2; break;
+ case 'x':
+ if (!Cxdigit(s[2]))
+ {
+ s++;
+ DBG("\\x used with no following hex digits");
+ }
+ else
+ {
+ char *p;
+ uns v = strtoul(s + 2, &p, 16);
+ if (v <= 255)
+ *d++ = v;
+ else
+ DBG("hex escape sequence out of range");
+ s = (char *)p;
+ }
+ break;
+ default:
+ if (s[1] >= '0' && s[1] <= '7')
+ {
+ uns v = s[1] - '0';
+ s += 2;
+ for (uns i = 0; i < 2 && *s >= '0' && *s <= '7'; s++, i++)
+ v = (v << 3) + *s - '0';
+ if (v <= 255)
+ *d++ = v;
+ else
+ DBG("octal escape sequence out of range");
+ }
+ *d++ = *s++;
+ break;
+ }
+ else
+ *d++ = *s++;
+ }
+ *d = 0;
+ return d;
+}
+
+char *
+str_format_flags(char *dest, const char *fmt, uns flags)
+{
+ char *start = dest;
+ for (uns i=0; fmt[i]; i++)
+ {
+ if (flags & (1 << i))
+ *dest++ = fmt[i];
+ else
+ *dest++ = '-';
+ }
+ *dest = 0;
+ return start;
+}
--- /dev/null
+/*
+ * UCW Library -- Syncing Directories
+ *
+ * (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+sync_dir(const char *name)
+{
+ int fd = open(name, O_RDONLY
+#ifdef CONFIG_LINUX
+ | O_DIRECTORY
+#endif
+);
+ if (fd < 0)
+ goto err;
+ int err = fsync(fd);
+ close(fd);
+ if (err >= 0)
+ return;
+ err:
+ msg(L_ERROR, "Unable to sync directory %s: %m", name);
+}
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/conf.h"
+
+uns default_thread_stack_size = 65556;
+
+static struct cf_section threads_config = {
+ CF_ITEMS {
+ CF_UNS("DefaultStackSize", &default_thread_stack_size),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+ucwlib_threads_conf_init(void)
+{
+ cf_declare_section("Threads", &threads_config, 0);
+}
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+
+#ifdef CONFIG_UCW_THREADS
+
+#include <pthread.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef __NR_gettid
+static pid_t
+gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+#define CONFIG_USE_GETTID
+#endif
+#endif
+
+static pthread_key_t ucwlib_context_key;
+static pthread_mutex_t ucwlib_master_mutex;
+
+static void
+ucwlib_free_thread_context(void *p)
+{
+ xfree(p);
+}
+
+static void CONSTRUCTOR
+ucwlib_threads_init(void)
+{
+ if (pthread_key_create(&ucwlib_context_key, ucwlib_free_thread_context) < 0)
+ die("Cannot create pthread_key: %m");
+ pthread_mutex_init(&ucwlib_master_mutex, NULL);
+}
+
+static int
+ucwlib_tid(void)
+{
+ static int tid_counter;
+ int tid;
+
+#ifdef CONFIG_USE_GETTID
+ tid = gettid();
+ if (tid > 0)
+ return tid;
+ /* The syscall might be unimplemented */
+#endif
+
+ ucwlib_lock();
+ tid = ++tid_counter;
+ ucwlib_unlock();
+ return tid;
+}
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+ struct ucwlib_context *c = pthread_getspecific(ucwlib_context_key);
+ if (!c)
+ {
+ c = xmalloc_zero(sizeof(*c));
+ c->thread_id = ucwlib_tid();
+ pthread_setspecific(ucwlib_context_key, c);
+ }
+ return c;
+}
+
+void
+ucwlib_lock(void)
+{
+ pthread_mutex_lock(&ucwlib_master_mutex);
+}
+
+void
+ucwlib_unlock(void)
+{
+ pthread_mutex_unlock(&ucwlib_master_mutex);
+}
+
+#else
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+ static struct ucwlib_context ucwlib_context;
+ return &ucwlib_context;
+}
+
+void
+ucwlib_lock(void)
+{
+}
+
+void
+ucwlib_unlock(void)
+{
+}
+
+#endif
+
+#ifdef TEST
+
+int main(void)
+{
+ ucwlib_lock();
+ ucwlib_unlock();
+ log(L_INFO, "tid=%d", ucwlib_thread_context()->thread_id);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_THREAD_H
+#define _UCW_THREAD_H
+
+/* This structure holds per-thread data */
+
+struct ucwlib_context {
+ int thread_id; // Thread ID (either kernel tid or a counter)
+ int temp_counter; // Counter for fb-temp.c
+ struct asio_queue *io_queue; // Async I/O queue for fb-direct.c
+ sh_sighandler_t *signal_handlers; // Signal handlers for sighandler.c
+};
+
+struct ucwlib_context *ucwlib_thread_context(void);
+
+/* Global lock used for initialization, cleanup and other not so frequently accessed global state */
+
+void ucwlib_lock(void);
+void ucwlib_unlock(void);
+
+#ifdef CONFIG_UCW_THREADS
+
+extern uns default_thread_stack_size;
+
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- A Simple Millisecond Timer
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+timestamp_t
+get_timestamp(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+}
+
+void
+init_timer(timestamp_t *timer)
+{
+ *timer = get_timestamp();
+}
+
+uns
+get_timer(timestamp_t *timer)
+{
+ timestamp_t t = *timer;
+ *timer = get_timestamp();
+ return MIN(*timer-t, ~0U);
+}
+
+uns
+switch_timer(timestamp_t *old, timestamp_t *new)
+{
+ *new = get_timestamp();
+ return MIN(*new-*old, ~0U);
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Access to Unaligned Data
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNALIGNED_H
+#define _UCW_UNALIGNED_H
+
+/* Big endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_be(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_be(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_be(const void *p) { return *(u64 *)p; }
+static inline void put_u16_be(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_be(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_be(void *p, u64 x) { *(u64 *)p = x; }
+#else
+static inline uns get_u16_be(const void *p)
+{
+ const byte *c = p;
+ return (c[0] << 8) | c[1];
+}
+static inline u32 get_u32_be(const void *p)
+{
+ const byte *c = p;
+ return (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
+}
+static inline u64 get_u64_be(const void *p)
+{
+ return ((u64) get_u32_be(p) << 32) | get_u32_be((const byte *)p+4);
+}
+static inline void put_u16_be(void *p, uns x)
+{
+ byte *c = p;
+ c[0] = x >> 8;
+ c[1] = x;
+}
+static inline void put_u32_be(void *p, u32 x)
+{
+ byte *c = p;
+ c[0] = x >> 24;
+ c[1] = x >> 16;
+ c[2] = x >> 8;
+ c[3] = x;
+}
+static inline void put_u64_be(void *p, u64 x)
+{
+ put_u32_be(p, x >> 32);
+ put_u32_be((byte *)p+4, x);
+}
+#endif
+
+/* Little-endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && !defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_le(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_le(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_le(const void *p) { return *(u64 *)p; }
+static inline void put_u16_le(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_le(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_le(void *p, u64 x) { *(u64 *)p = x; }
+#else
+static inline uns get_u16_le(const void *p)
+{
+ const byte *c = p;
+ return c[0] | (c[1] << 8);
+}
+static inline u32 get_u32_le(const void *p)
+{
+ const byte *c = p;
+ return c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
+}
+static inline u64 get_u64_le(const void *p)
+{
+ return get_u32_le(p) | ((u64) get_u32_le((const byte *)p+4) << 32);
+}
+static inline void put_u16_le(void *p, uns x)
+{
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
+}
+static inline void put_u32_le(void *p, u32 x)
+{
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
+ c[2] = x >> 16;
+ c[3] = x >> 24;
+}
+static inline void put_u64_le(void *p, u64 x)
+{
+ put_u32_le(p, x);
+ put_u32_le((byte *)p+4, x >> 32);
+}
+#endif
+
+static inline u64 get_u40_be(const void *p)
+{
+ const byte *c = p;
+ return ((u64)c[0] << 32) | get_u32_be(c+1);
+}
+
+static inline void put_u40_be(void *p, u64 x)
+{
+ byte *c = p;
+ c[0] = x >> 32;
+ put_u32_be(c+1, x);
+}
+
+static inline u64 get_u40_le(const void *p)
+{
+ const byte *c = p;
+ return get_u32_le(c) | ((u64) c[4] << 32);
+}
+
+static inline void put_u40_le(void *p, u64 x)
+{
+ byte *c = p;
+ put_u32_le(c, x);
+ c[4] = x >> 32;
+}
+
+/* The native format */
+
+#ifdef CPU_BIG_ENDIAN
+
+static inline uns get_u16(const void *p) { return get_u16_be(p); }
+static inline u32 get_u32(const void *p) { return get_u32_be(p); }
+static inline u64 get_u64(const void *p) { return get_u64_be(p); }
+static inline u64 get_u40(const void *p) { return get_u40_be(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_be(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_be(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_be(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_be(p, x); }
+
+#else
+
+static inline uns get_u16(const void *p) { return get_u16_le(p); }
+static inline u32 get_u32(const void *p) { return get_u32_le(p); }
+static inline u64 get_u64(const void *p) { return get_u64_le(p); }
+static inline u64 get_u40(const void *p) { return get_u40_le(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_le(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_le(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_le(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_le(p, x); }
+
+#endif
+
+/* Just for completeness */
+
+static inline uns get_u8(const void *p) { return *(const byte *)p; }
+static inline void put_u8(void *p, uns x) { *(byte *)p = x; }
+
+/* Backward compatibility macros */
+
+#define GET_U8(p) get_u8(p)
+#define GET_U16(p) get_u16(p)
+#define GET_U32(p) get_u32(p)
+#define GET_U64(p) get_u64(p)
+#define GET_U40(p) get_u40(p)
+
+#define PUT_U8(p,x) put_u8(p,x);
+#define PUT_U16(p,x) put_u16(p,x)
+#define PUT_U32(p,x) put_u32(p,x)
+#define PUT_U64(p,x) put_u64(p,x)
+#define PUT_U40(p,x) put_u40(p,x)
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- UTF-8 Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2003 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/unicode.h"
+
+uns
+utf8_strlen(const byte *str)
+{
+ uns len = 0;
+ while (*str)
+ {
+ UTF8_SKIP(str);
+ len++;
+ }
+ return len;
+}
+
+uns
+utf8_strnlen(const byte *str, uns n)
+{
+ uns len = 0;
+ const byte *end = str + n;
+ while (str < end)
+ {
+ UTF8_SKIP(str);
+ len++;
+ }
+ return len;
+}
+
+#ifdef TEST
+
+#include <string.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ byte buf[256];
+
+#define FUNCS \
+ F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
+ F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
+
+ enum {
+#define F(x) FUNC_##x,
+ FUNCS
+#undef F
+ };
+ char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+ FUNCS
+#undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func)
+ {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ if (func < FUNC_UTF8_PUT)
+ {
+ byte *p = buf, *q = buf, *last;
+ uns u;
+ bzero(buf, sizeof(buf));
+ while (scanf("%x", &u) == 1)
+ *q++ = u;
+ while (p < q)
+ {
+ last = p;
+ if (p != buf)
+ putchar(' ');
+ switch (func)
+ {
+ case FUNC_UTF8_GET:
+ p = utf8_get(p, &u);
+ break;
+ case FUNC_UTF8_32_GET:
+ p = utf8_32_get(p, &u);
+ break;
+ case FUNC_UTF16_BE_GET:
+ p = utf16_be_get(p, &u);
+ break;
+ case FUNC_UTF16_LE_GET:
+ p = utf16_le_get(p, &u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ printf("%04x", u);
+ ASSERT(last < p && p <= q);
+ }
+ putchar('\n');
+ }
+ else
+ {
+ uns u, i=0;
+ while (scanf("%x", &u) == 1)
+ {
+ byte *p = buf, *q = buf;
+ switch (func)
+ {
+ case FUNC_UTF8_PUT:
+ p = utf8_put(p, u);
+ break;
+ case FUNC_UTF8_32_PUT:
+ p = utf8_32_put(p, u);
+ break;
+ case FUNC_UTF16_BE_PUT:
+ p = utf16_be_put(p, u);
+ break;
+ case FUNC_UTF16_LE_PUT:
+ p = utf16_le_put(p, u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ while (q < p)
+ {
+ if (i++)
+ putchar(' ');
+ printf("%02x", *q++);
+ }
+ }
+ putchar('\n');
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Unicode Characters
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNICODE_H
+#define _UCW_UNICODE_H
+
+#include "lib/unaligned.h"
+
+/* Macros for handling UTF-8 */
+
+#define UNI_REPLACEMENT 0xfffc
+
+/* Encode a character from the basic multilingual plane [0, 0xFFFF]
+ * (subset of Unicode 4.0); up to 3 bytes needed (RFC2279) */
+static inline byte *
+utf8_put(byte *p, uns u)
+{
+ if (u < 0x80)
+ *p++ = u;
+ else if (u < 0x800)
+ {
+ *p++ = 0xc0 | (u >> 6);
+ *p++ = 0x80 | (u & 0x3f);
+ }
+ else
+ {
+ ASSERT(u < 0x10000);
+ *p++ = 0xe0 | (u >> 12);
+ *p++ = 0x80 | ((u >> 6) & 0x3f);
+ *p++ = 0x80 | (u & 0x3f);
+ }
+ return p;
+}
+
+/* Encode a value from the range [0, 0x7FFFFFFF];
+ * (superset of Unicode 4.0) up to 6 bytes needed (RFC2279) */
+static inline byte *
+utf8_32_put(byte *p, uns u)
+{
+ if (u < 0x80)
+ *p++ = u;
+ else if (u < 0x800)
+ {
+ *p++ = 0xc0 | (u >> 6);
+ goto put1;
+ }
+ else if (u < (1<<16))
+ {
+ *p++ = 0xe0 | (u >> 12);
+ goto put2;
+ }
+ else if (u < (1<<21))
+ {
+ *p++ = 0xf0 | (u >> 18);
+ goto put3;
+ }
+ else if (u < (1<<26))
+ {
+ *p++ = 0xf8 | (u >> 24);
+ goto put4;
+ }
+ else if (u < (1U<<31))
+ {
+ *p++ = 0xfc | (u >> 30);
+ *p++ = 0x80 | ((u >> 24) & 0x3f);
+put4: *p++ = 0x80 | ((u >> 18) & 0x3f);
+put3: *p++ = 0x80 | ((u >> 12) & 0x3f);
+put2: *p++ = 0x80 | ((u >> 6) & 0x3f);
+put1: *p++ = 0x80 | (u & 0x3f);
+ }
+ else
+ ASSERT(0);
+ return p;
+}
+
+#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
+
+/* Decode a character from the basic multilingual plane [0, 0xFFFF]
+ * or return 'repl' if the encoding has been corrupted */
+static inline byte *
+utf8_get_repl(const byte *p, uns *uu, uns repl)
+{
+ uns u = *p++;
+ if (u < 0x80)
+ ;
+ else if (unlikely(u < 0xc0))
+ {
+ /* Incorrect byte sequence */
+ bad:
+ u = repl;
+ }
+ else if (u < 0xe0)
+ {
+ u &= 0x1f;
+ UTF8_GET_NEXT;
+ }
+ else if (likely(u < 0xf0))
+ {
+ u &= 0x0f;
+ UTF8_GET_NEXT;
+ UTF8_GET_NEXT;
+ }
+ else
+ goto bad;
+ *uu = u;
+ return (byte *)p;
+}
+
+/* Decode a value from the range [0, 0x7FFFFFFF]
+ * or return 'repl' if the encoding has been corrupted */
+static inline byte *
+utf8_32_get_repl(const byte *p, uns *uu, uns repl)
+{
+ uns u = *p++;
+ if (u < 0x80)
+ ;
+ else if (unlikely(u < 0xc0))
+ {
+ /* Incorrect byte sequence */
+ bad:
+ u = repl;
+ }
+ else if (u < 0xe0)
+ {
+ u &= 0x1f;
+ goto get1;
+ }
+ else if (u < 0xf0)
+ {
+ u &= 0x0f;
+ goto get2;
+ }
+ else if (u < 0xf8)
+ {
+ u &= 0x07;
+ goto get3;
+ }
+ else if (u < 0xfc)
+ {
+ u &= 0x03;
+ goto get4;
+ }
+ else if (u < 0xfe)
+ {
+ u &= 0x01;
+ UTF8_GET_NEXT;
+get4: UTF8_GET_NEXT;
+get3: UTF8_GET_NEXT;
+get2: UTF8_GET_NEXT;
+get1: UTF8_GET_NEXT;
+ }
+ else
+ goto bad;
+ *uu = u;
+ return (byte *)p;
+}
+
+/* Decode a character from the basic multilingual plane [0, 0xFFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline byte *
+utf8_get(const byte *p, uns *uu)
+{
+ return utf8_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+/* Decode a value from the range [0, 0x7FFFFFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline byte *
+utf8_32_get(const byte *p, uns *uu)
+{
+ return utf8_32_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+#define PUT_UTF8(p,u) p = utf8_put(p, u)
+#define GET_UTF8(p,u) p = (byte*)utf8_get(p, &(u))
+
+#define PUT_UTF8_32(p,u) p = utf8_32_put(p, u)
+#define GET_UTF8_32(p,u) p = (byte*)utf8_32_get(p, &(u))
+
+#define UTF8_SKIP(p) do { \
+ uns c = *p++; \
+ if (c >= 0xc0) \
+ while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \
+ p++, c <<= 1; \
+ } while (0)
+
+#define UTF8_SKIP_BWD(p) while ((*--(p) & 0xc0) == 0x80)
+
+static inline uns
+utf8_space(uns u)
+{
+ if (u < 0x80)
+ return 1;
+ if (u < 0x800)
+ return 2;
+ if (u < (1<<16))
+ return 3;
+ if (u < (1<<21))
+ return 4;
+ if (u < (1<<26))
+ return 5;
+ return 6;
+}
+
+static inline uns
+utf8_encoding_len(uns c)
+{
+ if (c < 0x80)
+ return 1;
+ ASSERT(c >= 0xc0 && c < 0xfe);
+ if (c < 0xe0)
+ return 2;
+ if (c < 0xf0)
+ return 3;
+ if (c < 0xf8)
+ return 4;
+ if (c < 0xfc)
+ return 5;
+ return 6;
+}
+
+/* Encode a character from the range [0, 0xD7FF] or [0xE000,0x11FFFF];
+ * up to 4 bytes needed */
+static inline void *
+utf16_le_put(void *p, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ put_u16_le(p, u);
+ return p + 2;
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ put_u16_le(p, 0xd800 | (u >> 10));
+ put_u16_le(p + 2, 0xdc00 | (u & 0x3ff));
+ return p + 4;
+ }
+ else
+ ASSERT(0);
+}
+
+static inline void *
+utf16_be_put(void *p, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ put_u16_be(p, u);
+ return p + 2;
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ put_u16_be(p, 0xd800 | (u >> 10));
+ put_u16_be(p + 2, 0xdc00 | (u & 0x3ff));
+ return p + 4;
+ }
+ else
+ ASSERT(0);
+}
+
+/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
+ * or return `repl' if the encoding has been corrupted */
+static inline void *
+utf16_le_get_repl(const void *p, uns *uu, uns repl)
+{
+ uns u = get_u16_le(p), x, y;
+ x = u - 0xd800;
+ if (x < 0x800)
+ if (x < 0x400 && (y = get_u16_le(p + 2) - 0xdc00) < 0x400)
+ {
+ u = 0x10000 + (x << 10) + y;
+ p += 2;
+ }
+ else
+ u = repl;
+ *uu = u;
+ return (void *)(p + 2);
+}
+
+static inline void *
+utf16_be_get_repl(const void *p, uns *uu, uns repl)
+{
+ uns u = get_u16_be(p), x, y;
+ x = u - 0xd800;
+ if (x < 0x800)
+ if (x < 0x400 && (y = get_u16_be(p + 2) - 0xdc00) < 0x400)
+ {
+ u = 0x10000 + (x << 10) + y;
+ p += 2;
+ }
+ else
+ u = repl;
+ *uu = u;
+ return (void *)(p + 2);
+}
+
+/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline void *
+utf16_le_get(const void *p, uns *uu)
+{
+ return utf16_le_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+static inline void *
+utf16_be_get(const void *p, uns *uu)
+{
+ return utf16_be_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+static inline uns
+unicode_sanitize_char(uns u)
+{
+ if (u >= 0x10000 || // We don't accept anything outside the basic plane
+ u >= 0xd800 && u < 0xf900 || // neither we do surrogates
+ u >= 0x80 && u < 0xa0 || // nor latin-1 control chars
+ u < 0x20 && u != '\t')
+ return UNI_REPLACEMENT;
+ return u;
+}
+
+/* unicode-utf8.c */
+
+uns utf8_strlen(const byte *str);
+uns utf8_strnlen(const byte *str, uns n);
+
+#endif
--- /dev/null
+# Tests for the Unicode module
+
+Name: utf8_put (1)
+Run: ../obj/lib/unicode-t utf8_put
+In: 0041 0048 004f 004a
+Out: 41 48 4f 4a
+
+Name: utf8_put (2)
+Run: ../obj/lib/unicode-t utf8_put
+In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+
+Name: utf8_get (1)
+Run: ../obj/lib/unicode-t utf8_get
+In: 41 48 4f 4a
+Out: 0041 0048 004f 004a
+
+Name: utf8_get (2)
+Run: ../obj/lib/unicode-t utf8_get
+In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+
+Name: utf8_get (3)
+Run: ../obj/lib/unicode-t utf8_get
+In: 84 ff f9 f8 c2 aa 41
+Out: fffc fffc fffc fffc 00aa 0041
+
+Name: utf8_32_put
+Run: ../obj/lib/unicode-t utf8_32_put
+In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+
+Name: utf8_32_get (1)
+Run: ../obj/lib/unicode-t utf8_32_get
+In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+
+Name: utf8_32_get (2)
+Run: ../obj/lib/unicode-t utf8_32_get
+In: fe 83 81
+Out: fffc fffc fffc
+
+Name: utf16_be_put
+Run: ../obj/lib/unicode-t utf16_be_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name: utf16_le_put
+Run: ../obj/lib/unicode-t utf16_le_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name: utf16_be_get (1)
+Run: ../obj/lib/unicode-t utf16_be_get
+In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_be_get (2)
+Run: ../obj/lib/unicode-t utf16_be_get
+In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out: fffc 2a5f fffc fffc 2a5f fffc
+
+Name: utf16_le_get (1)
+Run: ../obj/lib/unicode-t utf16_le_get
+In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_le_get (2)
+Run: ../obj/lib/unicode-t utf16_le_get
+In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out: fffc 2a5f fffc fffc 2a5f fffc
--- /dev/null
+/*
+ * UCW Library -- URL Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ * The URL syntax corresponds to RFC 2396 with several exceptions:
+ *
+ * o Escaping of special characters still follows RFC 1738.
+ * o Interpretation of path parameters follows RFC 1808.
+ *
+ * XXX: The buffer handling in this module is really horrible, but it works.
+ */
+
+#include "lib/lib.h"
+#include "lib/url.h"
+#include "lib/chartype.h"
+#include "lib/conf.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <alloca.h>
+
+/* Configuration */
+
+static uns url_ignore_spaces;
+static uns url_ignore_underflow;
+static char *url_component_separators = "";
+static uns url_min_repeat_count = 0x7fffffff;
+static uns url_max_repeat_length = 0;
+static uns url_max_occurences = ~0U;
+
+static struct cf_section url_config = {
+ CF_ITEMS {
+ CF_UNS("IgnoreSpaces", &url_ignore_spaces),
+ CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
+ CF_STRING("ComponentSeparators", &url_component_separators),
+ CF_UNS("MinRepeatCount", &url_min_repeat_count),
+ CF_UNS("MaxRepeatLength", &url_max_repeat_length),
+ CF_UNS("MaxOccurences", &url_max_occurences),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR url_init_config(void)
+{
+ cf_declare_section("URL", &url_config, 0);
+}
+
+/* Escaping and de-escaping */
+
+static uns
+enhex(uns x)
+{
+ return (x<10) ? (x + '0') : (x - 10 + 'A');
+}
+
+int
+url_deescape(const byte *s, byte *d)
+{
+ byte *dstart = d;
+ byte *end = d + MAX_URL_SIZE - 10;
+ while (*s)
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ if (*s == '%')
+ {
+ unsigned int val;
+ if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
+ return URL_ERR_INVALID_ESCAPE;
+ val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
+ if (val < 0x20)
+ return URL_ERR_INVALID_ESCAPED_CHAR;
+ switch (val)
+ {
+ case ';':
+ val = NCC_SEMICOLON; break;
+ case '/':
+ val = NCC_SLASH; break;
+ case '?':
+ val = NCC_QUEST; break;
+ case ':':
+ val = NCC_COLON; break;
+ case '@':
+ val = NCC_AT; break;
+ case '=':
+ val = NCC_EQUAL; break;
+ case '&':
+ val = NCC_AND; break;
+ case '#':
+ val = NCC_HASH; break;
+ }
+ *d++ = val;
+ s += 3;
+ }
+ else if (*s > 0x20)
+ *d++ = *s++;
+ else if (Cspace(*s))
+ {
+ const byte *s0 = s;
+ while (Cspace(*s))
+ s++;
+ if (!url_ignore_spaces || !(!*s || d == dstart))
+ {
+ while (Cspace(*s0))
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ *d++ = *s0++;
+ }
+ }
+ }
+ else
+ return URL_ERR_INVALID_CHAR;
+ }
+ *d = 0;
+ return 0;
+}
+
+int
+url_enescape(const byte *s, byte *d)
+{
+ byte *end = d + MAX_URL_SIZE - 10;
+ unsigned int c;
+
+ while (c = *s)
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */
+ c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */
+ c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
+ c == ',' ||
+ c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */
+ c == '=' || c == '&' || c == '#' || c == ';')
+ *d++ = *s++;
+ else
+ {
+ uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s;
+ *d++ = '%';
+ *d++ = enhex(val >> 4);
+ *d++ = enhex(val & 0x0f);
+ s++;
+ }
+ }
+ *d = 0;
+ return 0;
+}
+
+int
+url_enescape_friendly(const byte *src, byte *dest)
+{
+ byte *end = dest + MAX_URL_SIZE - 10;
+ while (*src)
+ {
+ if (dest >= end)
+ return URL_ERR_TOO_LONG;
+ if (*src < NCC_MAX)
+ *dest++ = NCC_CHARS[*src++];
+ else if (*src >= 0x20 && *src < 0x7f)
+ *dest++ = *src++;
+ else
+ {
+ *dest++ = '%';
+ *dest++ = enhex(*src >> 4);
+ *dest++ = enhex(*src++ & 0x0f);
+ }
+ }
+ *dest = 0;
+ return 0;
+}
+
+/* Split an URL (several parts may be copied to the destination buffer) */
+
+byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
+static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
+
+uns
+identify_protocol(const byte *p)
+{
+ uns i;
+
+ for(i=1; i<URL_PROTO_MAX; i++)
+ if (!strcasecmp(p, url_proto_names[i]))
+ return i;
+ return URL_PROTO_UNKNOWN;
+}
+
+int
+url_split(byte *s, struct url *u, byte *d)
+{
+ bzero(u, sizeof(struct url));
+ u->port = ~0;
+ u->bufend = d + MAX_URL_SIZE - 10;
+
+ if (s[0] != '/') /* Seek for "protocol:" */
+ {
+ byte *p = s;
+ while (*p && Calnum(*p))
+ p++;
+ if (p != s && *p == ':')
+ {
+ u->protocol = d;
+ while (s < p)
+ *d++ = *s++;
+ *d++ = 0;
+ u->protoid = identify_protocol(u->protocol);
+ s++;
+ if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
+ {
+ /* The protocol requires complete host spec, but it's missing -> treat as a relative path instead */
+ int len = d - u->protocol;
+ d -= len;
+ s -= len;
+ u->protocol = NULL;
+ u->protoid = 0;
+ }
+ }
+ }
+
+ if (s[0] == '/') /* Host spec or absolute path */
+ {
+ if (s[1] == '/') /* Host spec */
+ {
+ byte *q, *e;
+ byte *at = NULL;
+ char *ep;
+
+ s += 2;
+ q = d;
+ while (*s && *s != '/' && *s != '?') /* Copy user:passwd@host:port */
+ {
+ if (*s != '@')
+ *d++ = *s;
+ else if (!at)
+ {
+ *d++ = 0;
+ at = d;
+ }
+ else /* This shouldn't happen with sane URL's, but we need to be sure */
+ *d++ = NCC_AT;
+ s++;
+ }
+ *d++ = 0;
+ if (at) /* user:passwd present */
+ {
+ u->user = q;
+ if (e = strchr(q, ':'))
+ {
+ *e++ = 0;
+ u->pass = e;
+ }
+ }
+ else
+ at = q;
+ e = strchr(at, ':');
+ if (e) /* host:port present */
+ {
+ uns p;
+ *e++ = 0;
+ p = strtoul(e, &ep, 10);
+ if (ep && *ep || p > 65535)
+ return URL_ERR_INVALID_PORT;
+ else if (p) /* Port 0 (e.g. in :/) is treated as default port */
+ u->port = p;
+ }
+ u->host = at;
+ }
+ }
+
+ u->rest = s;
+ u->buf = d;
+ return 0;
+}
+
+/* Normalization according to given base URL */
+
+static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
+
+static int
+relpath_merge(struct url *u, struct url *b)
+{
+ byte *a = u->rest;
+ byte *o = b->rest;
+ byte *d = u->buf;
+ byte *e = u->bufend;
+ byte *p;
+
+ if (a[0] == '/') /* Absolute path => OK */
+ return 0;
+ if (o[0] != '/' && o[0] != '?')
+ return URL_PATH_UNDERFLOW;
+
+ if (!a[0]) /* Empty URL -> inherit everything */
+ {
+ u->rest = b->rest;
+ return 0;
+ }
+
+ u->rest = d; /* We know we'll need to copy the path somewhere else */
+
+ if (a[0] == '#') /* Another fragment */
+ {
+ for(p=o; *p && *p != '#'; p++)
+ ;
+ goto copy;
+ }
+ if (a[0] == '?') /* New query */
+ {
+ for(p=o; *p && *p != '#' && *p != '?'; p++)
+ ;
+ goto copy;
+ }
+ if (a[0] == ';') /* Change parameters */
+ {
+ for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
+ ;
+ goto copy;
+ }
+
+ p = NULL; /* Copy original path and find the last slash */
+ while (*o && *o != ';' && *o != '?' && *o != '#')
+ {
+ if (d >= e)
+ return URL_ERR_TOO_LONG;
+ if ((*d++ = *o++) == '/')
+ p = d;
+ }
+ if (!p)
+ return URL_ERR_REL_NOTHING;
+ d = p;
+
+ while (*a)
+ {
+ if (a[0] == '.')
+ {
+ if (a[1] == '/' || !a[1]) /* Skip "./" and ".$" */
+ {
+ a++;
+ if (a[0])
+ a++;
+ continue;
+ }
+ else if (a[1] == '.' && (a[2] == '/' || !a[2])) /* "../" */
+ {
+ a += 2;
+ if (a[0])
+ a++;
+ if (d <= u->buf + 1)
+ {
+ /*
+ * RFC 1808 says we should leave ".." as a path segment, but
+ * we intentionally break the rule and refuse the URL.
+ */
+ if (!url_ignore_underflow)
+ return URL_PATH_UNDERFLOW;
+ }
+ else
+ {
+ d--; /* Discard trailing slash */
+ while (d[-1] != '/')
+ d--;
+ }
+ continue;
+ }
+ }
+ while (a[0] && a[0] != '/')
+ {
+ if (d >= e)
+ return URL_ERR_TOO_LONG;
+ *d++ = *a++;
+ }
+ if (a[0])
+ *d++ = *a++;
+ }
+
+okay:
+ *d++ = 0;
+ u->buf = d;
+ return 0;
+
+copy: /* Combine part of old URL with the new one */
+ while (o < p)
+ if (d < e)
+ *d++ = *o++;
+ else
+ return URL_ERR_TOO_LONG;
+ while (*a)
+ if (d < e)
+ *d++ = *a++;
+ else
+ return URL_ERR_TOO_LONG;
+ goto okay;
+}
+
+int
+url_normalize(struct url *u, struct url *b)
+{
+ int err;
+
+ /* Basic checks */
+ if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) ||
+ !u->host && u->user ||
+ !u->user && u->pass ||
+ !u->rest)
+ return URL_SYNTAX_ERROR;
+
+ if (!u->protocol)
+ {
+ /* Now we know it's a relative URL. Do we have any base? */
+ if (!b || !url_proto_path_flags[b->protoid])
+ return URL_ERR_REL_NOTHING;
+ u->protocol = b->protocol;
+ u->protoid = b->protoid;
+
+ /* Reference to the same host */
+ if (!u->host)
+ {
+ u->host = b->host;
+ u->user = b->user;
+ u->pass = b->pass;
+ u->port = b->port;
+ if (err = relpath_merge(u, b))
+ return err;
+ }
+ }
+
+ /* Change path "?" to "/?" because it's the true meaning */
+ if (u->rest[0] == '?')
+ {
+ int l = strlen(u->rest);
+ if (u->bufend - u->buf < l+1)
+ return URL_ERR_TOO_LONG;
+ u->buf[0] = '/';
+ memcpy(u->buf+1, u->rest, l+1);
+ u->rest = u->buf;
+ u->buf += l+2;
+ }
+
+ /* Fill in missing info */
+ if (u->port == ~0U)
+ u->port = std_ports[u->protoid];
+
+ return 0;
+}
+
+/* Name canonicalization */
+
+static void
+lowercase(byte *b)
+{
+ if (b)
+ while (*b)
+ {
+ if (*b >= 'A' && *b <= 'Z')
+ *b = *b + 0x20;
+ b++;
+ }
+}
+
+static void
+kill_end_dot(byte *b)
+{
+ byte *k;
+
+ if (b)
+ {
+ k = b + strlen(b) - 1;
+ while (k > b && *k == '.')
+ *k-- = 0;
+ }
+}
+
+int
+url_canonicalize(struct url *u)
+{
+ char *c;
+
+ lowercase(u->protocol);
+ lowercase(u->host);
+ kill_end_dot(u->host);
+ if ((!u->rest || !*u->rest) && url_proto_path_flags[u->protoid])
+ u->rest = "/";
+ if (u->rest && (c = strchr(u->rest, '#'))) /* Kill fragment reference */
+ *c = 0;
+ return 0;
+}
+
+/* Pack a broken-down URL */
+
+static byte *
+append(byte *d, const byte *s, byte *e)
+{
+ if (d)
+ while (*s)
+ {
+ if (d >= e)
+ return NULL;
+ *d++ = *s++;
+ }
+ return d;
+}
+
+int
+url_pack(struct url *u, byte *d)
+{
+ byte *e = d + MAX_URL_SIZE - 10;
+
+ if (u->protocol)
+ {
+ d = append(d, u->protocol, e);
+ d = append(d, ":", e);
+ u->protoid = identify_protocol(u->protocol);
+ }
+ if (u->host)
+ {
+ d = append(d, "//", e);
+ if (u->user)
+ {
+ d = append(d, u->user, e);
+ if (u->pass)
+ {
+ d = append(d, ":", e);
+ d = append(d, u->pass, e);
+ }
+ d = append(d, "@", e);
+ }
+ d = append(d, u->host, e);
+ if (u->port != std_ports[u->protoid] && u->port != ~0U)
+ {
+ char z[10];
+ sprintf(z, "%d", u->port);
+ d = append(d, ":", e);
+ d = append(d, z, e);
+ }
+ }
+ if (u->rest)
+ d = append(d, u->rest, e);
+ if (!d)
+ return URL_ERR_TOO_LONG;
+ *d = 0;
+ return 0;
+}
+
+/* Error messages */
+
+static char *errmsg[] = {
+ "Something is wrong",
+ "Too long",
+ "Invalid character",
+ "Invalid escape",
+ "Invalid escaped character",
+ "Invalid port number",
+ "Relative URL not allowed",
+ "Unknown protocol",
+ "Syntax error",
+ "Path underflow"
+};
+
+char *
+url_error(uns err)
+{
+ if (err >= sizeof(errmsg) / sizeof(char *))
+ err = 0;
+ return errmsg[err];
+}
+
+/* Standard cookbook recipes */
+
+int
+url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
+{
+ int err;
+
+ if (err = url_deescape(u, buf1))
+ return err;
+ if (err = url_split(buf1, url, buf2))
+ return err;
+ if (err = url_normalize(url, base))
+ return err;
+ return url_canonicalize(url);
+}
+
+int
+url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
+{
+ byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
+ int err;
+ struct url ur;
+
+ (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) ||
+ (err = url_pack(&ur, buf3)) ||
+ (err = url_enescape(buf3, dst)));
+ return err;
+}
+
+/* Testing */
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
+ int err;
+ struct url url, url0;
+ char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+
+ if (argc != 2 && argc != 3)
+ return 1;
+ if (argc == 3)
+ base = argv[2];
+ if (err = url_deescape(argv[1], buf1))
+ {
+ printf("deesc: error %d\n", err);
+ return 1;
+ }
+ printf("deesc: %s\n", buf1);
+ if (err = url_split(buf1, &url, buf2))
+ {
+ printf("split: error %d\n", err);
+ return 1;
+ }
+ printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_split(base, &url0, buf3))
+ {
+ printf("split base: error %d\n", err);
+ return 1;
+ }
+ if (err = url_normalize(&url0, NULL))
+ {
+ printf("normalize base: error %d\n", err);
+ return 1;
+ }
+ printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest);
+ if (err = url_normalize(&url, &url0))
+ {
+ printf("normalize: error %d\n", err);
+ return 1;
+ }
+ printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_canonicalize(&url))
+ {
+ printf("canonicalize: error %d\n", err);
+ return 1;
+ }
+ printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_pack(&url, buf4))
+ {
+ printf("pack: error %d\n", err);
+ return 1;
+ }
+ printf("pack: %s\n", buf4);
+ if (err = url_enescape(buf4, buf2))
+ {
+ printf("enesc: error %d\n", err);
+ return 1;
+ }
+ printf("enesc: %s\n", buf2);
+ return 0;
+}
+
+#endif
+
+struct component {
+ const byte *start;
+ int length;
+ uns count;
+ u32 hash;
+};
+
+static inline u32
+hashf(const byte *start, int length)
+{
+ u32 hf = length;
+ while (length-- > 0)
+ hf = (hf << 8 | hf >> 24) ^ *start++;
+ return hf;
+}
+
+static inline uns
+repeat_count(struct component *comp, uns count, uns len)
+{
+ struct component *orig_comp = comp;
+ uns found = 0;
+ while (1)
+ {
+ uns i;
+ comp += len;
+ count -= len;
+ found++;
+ if (count < len)
+ return found;
+ for (i=0; i<len; i++)
+ if (comp[i].hash != orig_comp[i].hash
+ || comp[i].length != orig_comp[i].length
+ || memcmp(comp[i].start, orig_comp[i].start, comp[i].length))
+ return found;
+ }
+}
+
+int
+url_has_repeated_component(const byte *url)
+{
+ struct component *comp;
+ uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
+ const byte *c;
+ uns i, j, k;
+
+ for (comps=0, c=url; c; comps++)
+ {
+ c = strpbrk(c, url_component_separators);
+ if (c)
+ c++;
+ }
+ if (comps < url_min_repeat_count && comps <= url_max_occurences)
+ return 0;
+ comp = alloca(comps * sizeof(*comp));
+ for (i=0, c=url; c; i++)
+ {
+ comp[i].start = c;
+ c = strpbrk(c, url_component_separators);
+ if (c)
+ {
+ comp[i].length = c - comp[i].start;
+ c++;
+ }
+ else
+ comp[i].length = strlen(comp[i].start);
+ }
+ ASSERT(i == comps);
+ for (i=0; i<comps; i++)
+ comp[i].hash = hashf(comp[i].start, comp[i].length);
+ if (comps > url_max_occurences)
+ {
+ hash_size = next_table_prime(comps);
+ hash = alloca(hash_size * sizeof(*hash));
+ next = alloca(comps * sizeof(*next));
+ memset(hash, 255, hash_size * sizeof(*hash));
+ for (i=0; i<comps; i++)
+ {
+ j = comp[i].hash % hash_size;
+ for (k = hash[j]; ~k && (comp[i].hash != comp[k].hash || comp[i].length != comp[k].length ||
+ memcmp(comp[k].start, comp[i].start, comp[i].length)); k = next[k]);
+ if (!~k)
+ {
+ next[i] = hash[j];
+ hash[j] = i;
+ comp[i].count = 1;
+ }
+ else
+ {
+ if (comp[k].count++ >= url_max_occurences)
+ return 1;
+ }
+ }
+ }
+ for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++)
+ for (rep_prefix = 0; rep_prefix <= comps - comp_len; rep_prefix++)
+ if (repeat_count(comp + rep_prefix, comps - rep_prefix, comp_len) >= url_min_repeat_count)
+ return comp_len;
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- URL Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_URL_H
+#define _UCW_URL_H
+
+#define MAX_URL_SIZE 1024
+
+/* Non-control meanings of control characters */
+
+#define NCC_SEMICOLON 1
+#define NCC_SLASH 2
+#define NCC_QUEST 3
+#define NCC_COLON 4
+#define NCC_AT 5
+#define NCC_EQUAL 6
+#define NCC_AND 7
+#define NCC_HASH 8
+#define NCC_MAX 9
+
+#define NCC_CHARS " ;/?:@=&#"
+
+/* Remove/Introduce '%' escapes */
+
+int url_deescape(const byte *s, byte *d);
+int url_enescape(const byte *s, byte *d);
+int url_enescape_friendly(const byte *src, byte *dest); // for cards.c only
+
+/* URL splitting and normalization */
+
+struct url {
+ byte *protocol;
+ uns protoid;
+ byte *user;
+ byte *pass;
+ byte *host;
+ uns port; /* ~0 if unspec */
+ byte *rest;
+ byte *buf, *bufend;
+};
+
+int url_split(byte *s, struct url *u, byte *d);
+int url_normalize(struct url *u, struct url *b);
+int url_canonicalize(struct url *u);
+int url_pack(struct url *u, byte *d);
+int url_canon_split_rel(const byte *url, byte *buf1, byte *buf2, struct url *u, struct url *base);
+int url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base);
+uns identify_protocol(const byte *p);
+int url_has_repeated_component(const byte *url);
+
+static inline int url_canon_split(const byte *url, byte *buf1, byte *buf2, struct url *u)
+{ return url_canon_split_rel(url, buf1, buf2, u, NULL); }
+
+static inline int url_auto_canonicalize(const byte *src, byte *dst)
+{ return url_auto_canonicalize_rel(src, dst, NULL); }
+
+/* Error codes */
+
+char *url_error(uns);
+
+#define URL_ERR_TOO_LONG 1
+#define URL_ERR_INVALID_CHAR 2
+#define URL_ERR_INVALID_ESCAPE 3
+#define URL_ERR_INVALID_ESCAPED_CHAR 4
+#define URL_ERR_INVALID_PORT 5
+#define URL_ERR_REL_NOTHING 6
+#define URL_ERR_UNKNOWN_PROTOCOL 7
+#define URL_SYNTAX_ERROR 8
+#define URL_PATH_UNDERFLOW 9
+
+#define URL_PROTO_UNKNOWN 0
+#define URL_PROTO_HTTP 1
+#define URL_PROTO_FTP 2
+#define URL_PROTO_FILE 3
+#define URL_PROTO_MAX 4
+
+#define URL_PNAMES { "unknown", "http", "ftp", "file" }
+#define URL_DEFPORTS { ~0, 80, 21, 0 }
+#define URL_PATH_FLAGS { 0, 1, 1, 1 }
+
+extern byte *url_proto_names[];
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Pattern Matcher for Short Wildcard Patterns (only `?' and `*' supported)
+ *
+ * Traditional NFA -> DFA method with on-the-fly DFA construction.
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/wildmatch.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_STATES 32 /* Must be <= 32, state 0 is reserved, state 1 is initial */
+#define MAX_CACHED 256 /* Maximum number of cached DFA states */
+#define HASH_SIZE 512 /* Number of entries in DFA hash table (at least MAX_CACHED+MAX_STATES) */
+#define HASH_SKIP 137
+
+struct nfa_state {
+ byte ch; /* 0 for non-matching state */
+ byte final; /* Accepting state */
+ u32 match_states; /* States to go to when input character == ch */
+ u32 default_states; /* States to go to whatever the input is */
+};
+
+struct dfa_state {
+ uintptr_t edge[256]; /* Outgoing DFA edges. Bit 0 is set for incomplete edges which
+ * contain just state set and clear for complete ones which point
+ * to other states. NULL means `no match'.
+ */
+ u32 nfa_set; /* A set of NFA states this DFA state represents */
+ int final; /* This is an accepting state */
+ struct dfa_state *next; /* Next in the chain of free states */
+};
+
+struct wildpatt {
+ struct nfa_state nfa[MAX_STATES];
+ struct dfa_state *hash[HASH_SIZE];
+ struct dfa_state *dfa_start;
+ uns nfa_states;
+ uns dfa_cache_counter;
+ struct mempool *pool;
+ struct dfa_state *free_states;
+};
+
+static inline unsigned
+wp_hash(u32 set)
+{
+ set ^= set >> 16;
+ set ^= set >> 8;
+ return set % HASH_SIZE;
+}
+
+static struct dfa_state *
+wp_new_state(struct wildpatt *w, u32 set)
+{
+ unsigned h = wp_hash(set);
+ struct dfa_state *d;
+ unsigned bit;
+ u32 def_set;
+
+ while (d = w->hash[h])
+ {
+ if (d->nfa_set == set)
+ return d;
+ h = (h + HASH_SKIP) % HASH_SIZE;
+ }
+ if (d = w->free_states)
+ w->free_states = d->next;
+ else
+ d = mp_alloc(w->pool, sizeof(*d));
+ w->hash[h] = d;
+ bzero(d, sizeof(*d));
+ d->nfa_set = set;
+ def_set = 0;
+ for(bit=1; bit <= w->nfa_states; bit++)
+ if (set & (1 << bit))
+ {
+ struct nfa_state *n = &w->nfa[bit];
+ if (n->ch)
+ d->edge[n->ch] |= n->match_states | 1;
+ d->final |= n->final;
+ def_set |= n->default_states;
+ }
+ if (def_set)
+ {
+ unsigned i;
+ def_set |= 1;
+ for(i=0; i<256; i++)
+ d->edge[i] |= def_set;
+ }
+ w->dfa_cache_counter++;
+ return d;
+}
+
+struct wildpatt *
+wp_compile(const byte *p, struct mempool *pool)
+{
+ struct wildpatt *w;
+ uns i;
+
+ if (strlen(p) >= MAX_STATES) /* Too long */
+ return NULL;
+ w = mp_alloc_zero(pool, sizeof(*w));
+ w->pool = pool;
+ for(i=1; *p; p++)
+ {
+ struct nfa_state *n = w->nfa + i;
+ if (*p == '?')
+ n->default_states |= 1 << (++i);/* Default edge to a new state */
+ else if (*p == '*')
+ n->default_states |= 1 << i; /* Default edge to the same state */
+ else
+ {
+ n->ch = *p; /* Edge to new state labelled with 'c' */
+ n->match_states = 1 << (++i);
+ }
+ }
+ w->nfa[i].final = 1;
+ w->nfa_states = i;
+ w->dfa_start = wp_new_state(w, 1 << 1);
+ return w;
+}
+
+static void
+wp_prune_cache(struct wildpatt *w)
+{
+ /*
+ * I was unable to trigger cache overflow on my large set of
+ * test cases, so I decided to handle it in an extremely dumb
+ * way. --mj
+ */
+ int i;
+ for(i=0; i<HASH_SIZE; i++)
+ if (w->hash[i] && w->hash[i]->nfa_set != (1 << 1))
+ {
+ struct dfa_state *d = w->hash[i];
+ w->hash[i] = NULL;
+ d->next = w->free_states;
+ w->free_states = d;
+ }
+ w->dfa_cache_counter = 1; /* Only the initial state remains */
+}
+
+int
+wp_match(struct wildpatt *w, const byte *s)
+{
+ struct dfa_state *d;
+
+ if (w->dfa_cache_counter >= MAX_CACHED)
+ wp_prune_cache(w);
+ d = w->dfa_start;
+ while (*s)
+ {
+ uintptr_t next = d->edge[*s];
+ if (next & 1)
+ {
+ /* Need to lookup/create the destination state */
+ struct dfa_state *new = wp_new_state(w, next & ~1);
+ d->edge[*s] = (uintptr_t) new;
+ d = new;
+ }
+ else if (!next)
+ return 0;
+ else
+ d = (struct dfa_state *) next;
+ s++;
+ }
+ return d->final;
+}
+
+int
+wp_min_size(const byte *p)
+{
+ int s = 0;
+
+ while (*p)
+ if (*p++ != '*')
+ s++;
+ return s;
+}
+
+#ifdef TEST
+
+void
+wp_dump(struct wildpatt *w)
+{
+ int i;
+
+ puts("NFA:");
+ for(i=1; i<=w->nfa_states; i++)
+ {
+ struct nfa_state *n = w->nfa + i;
+ printf("%2d: %d %02x %08x %08x\n", i, n->final, n->ch, n->match_states, n->default_states);
+ }
+ puts("DFA:");
+ for(i=0; i<HASH_SIZE; i++)
+ if (w->hash[i])
+ printf("%3d: %08x\n", i, w->hash[i]->nfa_set);
+ printf("%d DFA states cached.\n", w->dfa_cache_counter);
+}
+
+int main(int argc, char **argv)
+{
+ struct wildpatt *w;
+ char buf[1024];
+
+ if (argc != 2) return 1;
+ w = wp_compile(argv[1], mp_new(65536));
+ if (!w)
+ {
+ puts("Compile error");
+ return 1;
+ }
+ wp_dump(w);
+ while (fgets(buf, sizeof(buf)-1, stdin))
+ {
+ char *c = strchr(buf, '\n');
+ if (!c) break;
+ *c = 0;
+#if 0
+ printf("%d\n", wp_match(w, buf));
+#else
+ if (wp_match(w, buf))
+ puts(buf);
+#endif
+ }
+ wp_dump(w);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Wildcard Pattern Matcher (only `?' and `*' supported)
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+struct wildpatt;
+struct mempool;
+
+struct wildpatt *wp_compile(const byte *, struct mempool *);
+int wp_match(struct wildpatt *, const byte *);
+int wp_min_size(const byte *);
--- /dev/null
+/*
+ * UCW Library -- Word Splitting
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#include <string.h>
+
+int
+sepsplit(char *str, uns sep, char **rec, uns max)
+{
+ uns cnt = 0;
+ while (1)
+ {
+ rec[cnt++] = str;
+ str = strchr(str, sep);
+ if (!str)
+ return cnt;
+ if (cnt >= max)
+ return -1;
+ *str++ = 0;
+ }
+}
+
+int
+wordsplit(char *src, char **dst, uns max)
+{
+ uns cnt = 0;
+
+ for(;;)
+ {
+ while (Cspace(*src))
+ *src++ = 0;
+ if (!*src)
+ break;
+ if (cnt >= max)
+ return -1;
+ if (*src == '"')
+ {
+ src++;
+ dst[cnt++] = src;
+ while (*src && *src != '"')
+ src++;
+ if (*src)
+ *src++ = 0;
+ }
+ else
+ {
+ dst[cnt++] = src;
+ while (*src && !Cspace(*src))
+ src++;
+ }
+ }
+ return cnt;
+}
--- /dev/null
+/*
+ * UCW Library -- Thread Pools and Work Queues
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/workqueue.h"
+#include "lib/heap.h"
+
+static void *
+worker_thread_init(void *arg)
+{
+ struct worker_thread *t = arg;
+ struct worker_pool *pool = t->pool;
+
+ if (pool->init_thread)
+ pool->init_thread(t);
+ sem_post(pool->init_cleanup_sem);
+
+ for (;;)
+ {
+ struct work *w = raw_queue_get(&pool->requests);
+ w->go(t, w);
+ raw_queue_put(&w->reply_to->finished, w);
+ }
+
+ return NULL;
+}
+
+static void
+worker_thread_signal_finish(struct worker_thread *t, struct work *w UNUSED)
+{
+ if (t->pool->cleanup_thread)
+ t->pool->cleanup_thread(t);
+ sem_post(t->pool->init_cleanup_sem);
+ pthread_exit(NULL);
+}
+
+void
+worker_pool_init(struct worker_pool *p)
+{
+ clist_init(&p->worker_threads);
+ raw_queue_init(&p->requests);
+ p->init_cleanup_sem = sem_alloc();
+
+ pthread_attr_t attr;
+ if (pthread_attr_init(&attr) < 0 ||
+ pthread_attr_setstacksize(&attr, p->stack_size ? : default_thread_stack_size) < 0)
+ ASSERT(0);
+
+ for (uns i=0; i < p->num_threads; i++)
+ {
+ struct worker_thread *t = (p->new_thread ? p->new_thread() : xmalloc(sizeof(*t)));
+ t->pool = p;
+ t->id = i;
+ int err = pthread_create(&t->thread, &attr, worker_thread_init, t);
+ if (err)
+ die("Unable to create thread: %m");
+ clist_add_tail(&p->worker_threads, &t->n);
+ sem_wait(p->init_cleanup_sem);
+ }
+
+ pthread_attr_destroy(&attr);
+}
+
+void
+worker_pool_cleanup(struct worker_pool *p)
+{
+ for (uns i=0; i < p->num_threads; i++)
+ {
+ struct work w = {
+ .go = worker_thread_signal_finish
+ };
+ raw_queue_put(&p->requests, &w);
+ sem_wait(p->init_cleanup_sem);
+ }
+
+ struct worker_thread *tmp;
+ CLIST_FOR_EACH_DELSAFE(struct worker_thread *, t, p->worker_threads, tmp)
+ {
+ int err = pthread_join(t->thread, NULL);
+ ASSERT(!err);
+ if (p->free_thread)
+ p->free_thread(t);
+ else
+ xfree(t);
+ }
+ raw_queue_cleanup(&p->requests);
+ sem_free(p->init_cleanup_sem);
+}
+
+void
+raw_queue_init(struct raw_queue *q)
+{
+ pthread_mutex_init(&q->queue_mutex, NULL);
+ clist_init(&q->pri0_queue);
+ q->queue_sem = sem_alloc();
+ q->pri_heap = NULL;
+ q->heap_cnt = q->heap_max = 0;
+}
+
+void
+raw_queue_cleanup(struct raw_queue *q)
+{
+ ASSERT(clist_empty(&q->pri0_queue));
+ ASSERT(!q->heap_cnt);
+ xfree(q->pri_heap);
+ sem_free(q->queue_sem);
+ pthread_mutex_destroy(&q->queue_mutex);
+}
+
+#define PRI_LESS(x,y) ((x)->priority > (y)->priority)
+
+void
+raw_queue_put(struct raw_queue *q, struct work *w)
+{
+ pthread_mutex_lock(&q->queue_mutex);
+ if (!w->priority)
+ clist_add_tail(&q->pri0_queue, &w->n);
+ else
+ {
+ if (unlikely(q->heap_cnt >= q->heap_max))
+ {
+ struct work **old_heap = q->pri_heap;
+ q->heap_max = (q->heap_max ? 2*q->heap_max : 16);
+ q->pri_heap = xrealloc(old_heap, (q->heap_max + 1) * sizeof(struct work *));
+ }
+ struct work **heap = q->pri_heap;
+ heap[++q->heap_cnt] = w;
+ HEAP_INSERT(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+ }
+ pthread_mutex_unlock(&q->queue_mutex);
+ sem_post(q->queue_sem);
+}
+
+static inline struct work *
+raw_queue_do_get(struct raw_queue *q)
+{
+ pthread_mutex_lock(&q->queue_mutex);
+ struct work *w;
+ if (!q->heap_cnt)
+ {
+ w = clist_head(&q->pri0_queue);
+ ASSERT(w);
+ clist_remove(&w->n);
+ }
+ else
+ {
+ struct work **heap = q->pri_heap;
+ w = heap[1];
+ HEAP_DELMIN(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+ }
+ pthread_mutex_unlock(&q->queue_mutex);
+ return w;
+}
+
+struct work *
+raw_queue_get(struct raw_queue *q)
+{
+ sem_wait(q->queue_sem);
+ return raw_queue_do_get(q);
+}
+
+struct work *
+raw_queue_try_get(struct raw_queue *q)
+{
+ if (!sem_trywait(q->queue_sem))
+ return raw_queue_do_get(q);
+ else
+ return NULL;
+}
+
+void
+work_queue_init(struct worker_pool *p, struct work_queue *q)
+{
+ q->pool = p;
+ q->nr_running = 0;
+ raw_queue_init(&q->finished);
+}
+
+void
+work_queue_cleanup(struct work_queue *q)
+{
+ ASSERT(!q->nr_running);
+ raw_queue_cleanup(&q->finished);
+}
+
+void
+work_submit(struct work_queue *q, struct work *w)
+{
+ ASSERT(w->go);
+ w->reply_to = q;
+ raw_queue_put(&q->pool->requests, w);
+ q->nr_running++;
+}
+
+static struct work *
+work_do_wait(struct work_queue *q, int try)
+{
+ if (!q->nr_running)
+ return NULL;
+ struct work *w = (try ? raw_queue_try_get : raw_queue_get)(&q->finished);
+ if (!w)
+ return NULL;
+ q->nr_running--;
+ return w;
+}
+
+struct work *
+work_wait(struct work_queue *q)
+{
+ return work_do_wait(q, 0);
+}
+
+struct work *
+work_try_wait(struct work_queue *q)
+{
+ return work_do_wait(q, 1);
+}
+
+#ifdef TEST
+
+#include <unistd.h>
+
+static void wt_init(struct worker_thread *t)
+{
+ log(L_INFO, "INIT %d", t->id);
+}
+
+static void wt_cleanup(struct worker_thread *t)
+{
+ log(L_INFO, "CLEANUP %d", t->id);
+}
+
+struct w {
+ struct work w;
+ uns id;
+};
+
+static void go(struct worker_thread *t, struct work *w)
+{
+ log(L_INFO, "GO %d: request %d (pri %d)", t->id, ((struct w *)w)->id, w->priority);
+ usleep(1);
+}
+
+int main(void)
+{
+ struct worker_pool pool = {
+ .num_threads = 10,
+ .stack_size = 65536,
+ .init_thread = wt_init,
+ .cleanup_thread = wt_cleanup
+ };
+ worker_pool_init(&pool);
+
+ struct work_queue q;
+ work_queue_init(&pool, &q);
+ for (uns i=0; i<500; i++)
+ {
+ struct w *w = xmalloc_zero(sizeof(*w));
+ w->w.go = go;
+ w->w.priority = (i < 250 ? i : 0);
+ w->id = i;
+ work_submit(&q, &w->w);
+ log(L_INFO, "Submitted request %d (pri %d)", w->id, w->w.priority);
+ }
+
+ struct w *w;
+ while (w = (struct w *) work_wait(&q))
+ log(L_INFO, "Finished request %d", w->id);
+
+ work_queue_cleanup(&q);
+ worker_pool_cleanup(&pool);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Thread Pools and Work Queues
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_WORKQUEUE_H
+#define _UCW_WORKQUEUE_H
+
+/*
+ * A thread pool is a set of threads receiving work requests from a common queue,
+ * each work request contains a pointer to a function inside the thread.
+ *
+ * A work queue is an interface for submitting work requests. It's bound to a single
+ * thread pool, it remembers running requests and gathers replies. A single work queue
+ * should not be used by multiple threads simultaneously.
+ *
+ * Requests can have priorities. Requests with the highest priority are served first.
+ * Requests of priority 0 are guaranteed to be served on first-come-first-served
+ * basis, requests of higher priorities are unordered.
+ *
+ * When a thread pool is initialized, new_thread() is called for every thread first,
+ * allocating struct worker_thread (and user-defined thread context following it) for
+ * each thread. Then the threads are fired and each of them executes the init_thread()
+ * callback. These callbacks are serialized and worker_pool_init() function waits
+ * until all of them finish.
+ */
+
+#include "lib/semaphore.h"
+#include "lib/clists.h"
+
+#include <pthread.h>
+
+struct worker_thread { // One of threads serving requests
+ cnode n;
+ pthread_t thread;
+ struct worker_pool *pool;
+ int id; // Inside the pool
+ /* user-defined data can follow */
+};
+
+struct raw_queue { // Generic queue with locking
+ pthread_mutex_t queue_mutex;
+ clist pri0_queue; // Ordinary queue for requests with priority=0
+ struct work **pri_heap; // A heap for request with priority>0
+ uns heap_cnt, heap_max;
+ sem_t *queue_sem; // Number of requests queued
+};
+
+struct worker_pool {
+ struct raw_queue requests;
+ uns num_threads;
+ uns stack_size; // 0 for default
+ struct worker_thread *(*new_thread)(void); // default: xmalloc the struct
+ void (*free_thread)(struct worker_thread *t); // default: xfree
+ void (*init_thread)(struct worker_thread *t); // default: empty
+ void (*cleanup_thread)(struct worker_thread *t); // default: empty
+ clist worker_threads;
+ sem_t *init_cleanup_sem;
+};
+
+struct work_queue {
+ struct worker_pool *pool;
+ uns nr_running; // Number of requests in service
+ struct raw_queue finished; // Finished requests queue up here
+};
+
+struct work { // A single request
+ cnode n;
+ uns priority;
+ struct work_queue *reply_to; // Where to queue the request when it's finished
+ void (*go)(struct worker_thread *t, struct work *w); // Called inside the worker thread
+};
+
+void worker_pool_init(struct worker_pool *p);
+void worker_pool_cleanup(struct worker_pool *p);
+
+void raw_queue_init(struct raw_queue *q);
+void raw_queue_cleanup(struct raw_queue *q);
+void raw_queue_put(struct raw_queue *q, struct work *w);
+struct work *raw_queue_get(struct raw_queue *q);
+struct work *raw_queue_try_get(struct raw_queue *q);
+
+void work_queue_init(struct worker_pool *p, struct work_queue *q);
+void work_queue_cleanup(struct work_queue *q);
+void work_submit(struct work_queue *q, struct work *w);
+struct work *work_wait(struct work_queue *q);
+struct work *work_try_wait(struct work_queue *q);
+
+#endif /* !_UCW_WORKQUEUE_H */