Added libucw from Sherlock v3.12.2.

author Martin Mares <mj@ucw.cz>

Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)

committer Martin Mares <mj@ucw.cz>

Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)
author Martin Mares <mj@ucw.cz>
Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)
committer Martin Mares <mj@ucw.cz>
Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)
diff --git a/lib/Makefile b/lib/Makefile

new file mode 100644 (file)

index 0000000..7e520c2
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,138 @@
+# Makefile for the UCW Library (c) 1997--2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib
+CONFIGS+=library
+LIBUCW=$(o)/lib/libucw.pc
+
+ifdef CONFIG_UCW_DBTOOL
+PROGS+=$(o)/lib/db-tool
+endif
+
+LIBUCW_MODS= \
+       threads \
+       alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \
+       mmap pagecache partmap hashfunc \
+       lists slists simple-lists bitsig \
+       log log-file proctitle \
+       conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \
+       ipaccess \
+       profile \
+       fastbuf ff-binary ff-string ff-printf ff-unicode \
+       fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \
+       str_ctype str_upper str_lower unicode stkstring \
+       wildmatch wordsplit ctmatch patimatch patmatch regex \
+       prime primetable random timer randomkey \
+       bit-ffs bit-fls \
+       db \
+       url \
+       mainloop exitstatus runcmd sighandler \
+       lizard lizard-safe adler32 \
+       md5 md5hex \
+       base64 base224 \
+       sync \
+       qache \
+       string \
+       bbuf \
+       getopt
+
+LIBUCW_INCLUDES= \
+       lib.h config.h threads.h \
+       mempool.h pagecache.h \
+       arraysort.h \
+       lists.h clists.h slists.h simple-lists.h \
+       unaligned.h prefetch.h \
+       bbuf.h gbuf.h bitarray.h bitsig.h \
+       hashfunc.h hashtable.h \
+       heap.h binheap.h binheap-node.h \
+       redblack.h \
+       binsearch.h \
+       bitops.h \
+       conf.h getopt.h ipaccess.h \
+       profile.h \
+       fastbuf.h lfs.h ff-unicode.h ff-utf8.h ff-binary.h \
+       chartype.h unicode.h stkstring.h \
+       wildmatch.h patmatch.h \
+       db.h \
+       url.h \
+       mainloop.h \
+       lizard.h \
+       md5.h \
+       base64.h base224.h \
+       qache.h \
+       kmp.h kmp-search.h binsearch.h \
+       partmap.h
+
+ifdef CONFIG_UCW_THREADS
+# Some modules require threading
+LIBUCW_MODS+=threads-conf workqueue asio fb-direct
+LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h
+endif
+
+ifdef CONFIG_OWN_REGEX
+include $(s)/lib/regex/Makefile
+endif
+
+ifdef CONFIG_OWN_GETOPT
+include $(s)/lib/getopt/Makefile
+endif
+
+include $(s)/lib/sorter/Makefile
+
+LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS))
+
+$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
+$(o)/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
+
+$(o)/lib/hashfunc.o $(o)/lib/hashfunc.oo: CFLAGS += -funroll-loops
+$(o)/lib/lizard.o: CFLAGS += $(COPT2) -funroll-loops
+
+$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW)
+$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW)
+$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW)
+$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW)
+$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW)
+$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW)
+$(o)/lib/asort-test: $(o)/lib/asort-test.o $(LIBUCW)
+$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW)
+$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW)
+$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW)
+$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET)
+$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \
+    slists.test kmp-test.test bbuf.test getopt.test fastbuf.test ff-unicode.test eltpool.test)
+
+$(o)/lib/regex.test: $(o)/lib/regex-t
+$(o)/lib/unicode.test: $(o)/lib/unicode-t
+$(o)/lib/hash-test.test: $(o)/lib/hash-test
+$(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t
+$(o)/lib/stkstring.test: $(o)/lib/stkstring-t
+$(o)/lib/bitops.test: $(o)/lib/bit-ffs-t $(o)/lib/bit-fls-t
+$(o)/lib/slists.test: $(o)/lib/slists-t
+$(o)/lib/kmp-test.test: $(o)/lib/kmp-test
+$(o)/lib/bbuf.test: $(o)/lib/bbuf-t
+$(o)/lib/getopt.test: $(o)/lib/getopt-t
+$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t
+$(o)/lib/ff-unicode.test: $(o)/lib/ff-unicode-t
+$(o)/lib/eltpool.test: $(o)/lib/eltpool-t
+
+ifdef CONFIG_UCW_THREADS
+TESTS+=$(addprefix $(o)/lib/,asio.test)
+$(o)/lib/asio.test: $(o)/lib/asio-t
+endif
+
+API_LIBS+=libucw
+API_INCLUDES+=$(o)/lib/.include-stamp
+$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h
+       $(Q)$(s)/build/install-includes $(<D) run/include/lib $(LIBUCW_INCLUDES)
+       $(Q)$(s)/build/install-includes obj run/include/lib autoconf.h
+       $(Q)touch $@
+run/lib/pkgconfig/libucw.pc: $(o)/lib/libucw.pc
+
+ifdef CONFIG_UCW_PERL
+include $(s)/lib/perl/Makefile
+endif
+
+ifdef CONFIG_UCW_SHELL_UTILS
+include $(s)/lib/shell/Makefile
+endif
diff --git a/lib/THREADS b/lib/THREADS

new file mode 100644 (file)

index 0000000..ec58d49
--- /dev/null
+++ b/lib/THREADS
@@ -0,0 +1,7 @@
+Generally, functions in the UCW library are reentrant as long as you call them
+on different data. Calling on the same object is not, unless otherwise told,
+which also includes functions acting on any kind of global state.
+
+There are some exceptions:
+
+- setproctitle() is not safe, it modifies global state
diff --git a/lib/adler32.c b/lib/adler32.c

new file mode 100644 (file)

index 0000000..cf3a652
--- /dev/null
+++ b/lib/adler32.c
@@ -0,0 +1,48 @@
+/*
+ *     adler32.c -- compute the Adler-32 checksum of a data stream
+ *
+ *     Copyright (C) 1995--2003 Mark Adler
+ *
+ *     Taken from zlib-1.2.1 and adjusted by Robert Spalek.  For conditions of
+ *     distribution and use, see copyright notice in zlib.h.
+ */
+
+#include "lib/lib.h"
+#include "lib/lizard.h"
+
+#define BASE 65521UL   /* largest prime smaller than 65536 */
+#define NMAX 5552      /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+#define MOD(a) a %= BASE
+
+uns
+update_adler32(uns adler, const byte *buf, uns len)
+{
+  uns s1 = adler & 0xffff;
+  uns s2 = (adler >> 16) & 0xffff;
+  int k;
+
+  if (!buf) return 1L;
+
+  while (len > 0) {
+    k = len < NMAX ? (int)len : NMAX;
+    len -= k;
+    while (k >= 16) {
+      DO16(buf);
+      buf += 16;
+      k -= 16;
+    }
+    if (k != 0) do {
+      s1 += *buf++;
+      s2 += s1;
+    } while (--k);
+    MOD(s1);
+    MOD(s2);
+  }
+  return (s2 << 16) | s1;
+}
diff --git a/lib/alloc.c b/lib/alloc.c

new file mode 100644 (file)

index 0000000..678901a
--- /dev/null
+++ b/lib/alloc.c
@@ -0,0 +1,45 @@
+/*
+ *     UCW Library -- Memory Allocation
+ *
+ *     (c) 2000 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef DEBUG_DMALLOC
+
+void *
+xmalloc(uns size)
+{
+  void *x = malloc(size);
+  if (!x)
+    die("Cannot allocate %d bytes of memory", size);
+  return x;
+}
+
+#endif
+
+void *
+xmalloc_zero(uns size)
+{
+  void *x = xmalloc(size);
+  bzero(x, size);
+  return x;
+}
+
+void
+xfree(void *ptr)
+{
+  /*
+   * Maybe it is a little waste of resources to make this a function instead
+   * of a macro, but xmalloc() is not used for anything critical anyway,
+   * so let's prefer simplicity.
+   */
+  free(ptr);
+}
diff --git a/lib/alloc_str.c b/lib/alloc_str.c

new file mode 100644 (file)

index 0000000..05e803c
--- /dev/null
+++ b/lib/alloc_str.c
@@ -0,0 +1,19 @@
+/*
+ *     UCW Library -- String Allocation
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <string.h>
+
+char *
+xstrdup(const char *s)
+{
+  uns l = strlen(s) + 1;
+  return memcpy(xmalloc(l), s, l);
+}
diff --git a/lib/arraysort.h b/lib/arraysort.h

new file mode 100644 (file)

index 0000000..d5a9c78
--- /dev/null
+++ b/lib/arraysort.h
@@ -0,0 +1,174 @@
+/*
+ *     UCW Library -- Universal Array Sorter
+ *
+ *     (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This is not a normal header file, it's a generator of sorting
+ *  routines.  Each time you include it with parameters set in the
+ *  corresponding preprocessor macros, it generates an array sorter
+ *  with the parameters given.
+ *
+ *  You might wonder why the heck do we implement our own array sorter
+ *  instead of using qsort(). The primary reason is that qsort handles
+ *  only continuous arrays, but we need to sort array-like data structures
+ *  where the only way to access elements is by using an indexing macro.
+ *  Besides that, we are more than 2 times faster.
+ *
+ *  So much for advocacy, there are the parameters (those marked with [*]
+ *  are mandatory):
+ *
+ *  ASORT_PREFIX(x) [*]        add a name prefix (used on all global names
+ *                     defined by the sorter)
+ *  ASORT_KEY_TYPE  [*]        data type of a single array entry key
+ *  ASORT_ELT(i)    [*]        returns the key of i-th element
+ *  ASORT_LT(x,y)      x < y for ASORT_TYPE (default: "x<y")
+ *  ASORT_SWAP(i,j)    swap i-th and j-th element (default: assume _ELT
+ *                     is an l-value and swap just the keys)
+ *  ASORT_THRESHOLD    threshold for switching between quicksort and insertsort
+ *  ASORT_EXTRA_ARGS   extra arguments for the sort function (they are always
+ *                     visible in all the macros supplied above), starts with comma
+ *
+ *  After including this file, a function ASORT_PREFIX(sort)(uns array_size)
+ *  is declared and all parameter macros are automatically undef'd.
+ */
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8              /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_EXTRA_ARGS
+#define ASORT_EXTRA_ARGS
+#endif
+
+static void ASORT_PREFIX(sort)(uns array_size ASORT_EXTRA_ARGS)
+{
+  struct stk { int l, r; } stack[8*sizeof(uns)];
+  int l, r, left, right, m;
+  uns sp = 0;
+  ASORT_KEY_TYPE pivot;
+
+  if (array_size <= 1)
+    return;
+
+  /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */
+
+  left = 0;
+  right = array_size - 1;
+  for(;;)
+    {
+      l = left;
+      r = right;
+      m = (l+r)/2;
+      if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+       ASORT_SWAP(l,m);
+      if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m)))
+       {
+         ASORT_SWAP(m,r);
+         if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+           ASORT_SWAP(l,m);
+       }
+      pivot = ASORT_ELT(m);
+      do
+       {
+         while (ASORT_LT(ASORT_ELT(l), pivot))
+           l++;
+         while (ASORT_LT(pivot, ASORT_ELT(r)))
+           r--;
+         if (l < r)
+           {
+             ASORT_SWAP(l,r);
+             l++;
+             r--;
+           }
+         else if (l == r)
+           {
+             l++;
+             r--;
+           }
+       }
+      while (l <= r);
+      if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+       {
+         /* Both partitions ok => push the larger one */
+         if ((r - left) > (right - l))
+           {
+             stack[sp].l = left;
+             stack[sp].r = r;
+             left = l;
+           }
+         else
+           {
+             stack[sp].l = l;
+             stack[sp].r = right;
+             right = r;
+           }
+         sp++;
+       }
+      else if ((r - left) >= ASORT_THRESHOLD)
+       {
+         /* Left partition OK, right undersize */
+         right = r;
+       }
+      else if ((right - l) >= ASORT_THRESHOLD)
+       {
+         /* Right partition OK, left undersize */
+         left = l;
+       }
+      else
+       {
+         /* Both partitions undersize => pop */
+         if (!sp)
+           break;
+         sp--;
+         left = stack[sp].l;
+         right = stack[sp].r;
+       }
+    }
+
+  /*
+   * We have a partially sorted array, finish by insertsort. Inspired
+   * by qsort() in GNU libc.
+   */
+
+  /* Find minimal element which will serve as a barrier */
+  r = MIN(array_size, ASORT_THRESHOLD);
+  m = 0;
+  for (l=1; l<r; l++)
+    if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m)))
+      m = l;
+  ASORT_SWAP(0,m);
+
+  /* Insertion sort */
+  for (m=1; m<(int)array_size; m++)
+    {
+      l=m;
+      while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1)))
+       l--;
+      while (l < m)
+       {
+         ASORT_SWAP(l,m);
+         l++;
+       }
+    }
+}
+
+#undef ASORT_PREFIX
+#undef ASORT_KEY_TYPE
+#undef ASORT_ELT
+#undef ASORT_LT
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef ASORT_EXTRA_ARGS
diff --git a/lib/asio.c b/lib/asio.c

new file mode 100644 (file)

index 0000000..e33e03e
--- /dev/null
+++ b/lib/asio.c
@@ -0,0 +1,289 @@
+/*
+ *     UCW Library -- Asynchronous I/O
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/asio.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+static uns asio_num_users;
+static struct worker_pool asio_wpool;
+
+static void
+asio_init_unlocked(void)
+{
+  if (asio_num_users++)
+    return;
+
+  DBG("ASIO: INIT");
+  asio_wpool.num_threads = 1;
+  worker_pool_init(&asio_wpool);
+}
+
+static void
+asio_cleanup_unlocked(void)
+{
+  if (--asio_num_users)
+    return;
+
+  DBG("ASIO: CLEANUP");
+  worker_pool_cleanup(&asio_wpool);
+}
+
+void
+asio_init_queue(struct asio_queue *q)
+{
+  ucwlib_lock();
+  asio_init_unlocked();
+  ucwlib_unlock();
+
+  DBG("ASIO: New queue %p", q);
+  ASSERT(q->buffer_size);
+  q->allocated_requests = 0;
+  q->running_requests = 0;
+  q->running_writebacks = 0;
+  q->use_count = 0;
+  clist_init(&q->idle_list);
+  clist_init(&q->done_list);
+  work_queue_init(&asio_wpool, &q->queue);
+}
+
+void
+asio_cleanup_queue(struct asio_queue *q)
+{
+  DBG("ASIO: Removing queue %p", q);
+  ASSERT(!q->running_requests);
+  ASSERT(!q->running_writebacks);
+  ASSERT(!q->allocated_requests);
+  ASSERT(clist_empty(&q->done_list));
+
+  struct asio_request *r;
+  while (r = clist_remove_head(&q->idle_list))
+    {
+      big_free(r->buffer, q->buffer_size);
+      xfree(r);
+    }
+
+  work_queue_cleanup(&q->queue);
+
+  ucwlib_lock();
+  asio_cleanup_unlocked();
+  ucwlib_unlock();
+}
+
+struct asio_request *
+asio_get(struct asio_queue *q)
+{
+  q->allocated_requests++;
+  struct asio_request *r = clist_head(&q->idle_list);
+  if (!r)
+    {
+      r = xmalloc_zero(sizeof(*r));
+      r->queue = q;
+      r->buffer = big_alloc(q->buffer_size);
+      DBG("ASIO: Got %p (new)", r);
+    }
+  else
+    {
+      clist_remove(&r->work.n);
+      DBG("ASIO: Got %p", r);
+    }
+  r->op = ASIO_FREE;
+  r->fd = -1;
+  r->len = 0;
+  r->status = -1;
+  r->returned_errno = -1;
+  r->submitted = 0;
+  return r;
+}
+
+static int
+asio_raw_wait(struct asio_queue *q)
+{
+  struct asio_request *r = (struct asio_request *) work_wait(&q->queue);
+  if (!r)
+    return 0;
+  r->submitted = 0;
+  q->running_requests--;
+  if (r->op == ASIO_WRITE_BACK)
+    {
+      DBG("ASIO: Finished writeback %p", r);
+      if (r->status < 0)
+       die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno));
+      if (r->status != (int)r->len)
+       die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len);
+      q->running_writebacks--;
+      asio_put(r);
+    }
+  else
+    clist_add_tail(&q->done_list, &r->work.n);
+  return 1;
+}
+
+static void
+asio_handler(struct worker_thread *t UNUSED, struct work *w)
+{
+  struct asio_request *r = (struct asio_request *) w;
+
+  DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r,
+      (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len);
+  errno = 0;
+  switch (r->op)
+    {
+    case ASIO_READ:
+      r->status = read(r->fd, r->buffer, r->len);
+      break;
+    case ASIO_WRITE:
+    case ASIO_WRITE_BACK:
+      r->status = write(r->fd, r->buffer, r->len);
+      break;
+    default:
+      die("ASIO: Got unknown request type %d", r->op);
+    }
+  r->returned_errno = errno;
+  DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno);
+}
+
+void
+asio_submit(struct asio_request *r)
+{
+  struct asio_queue *q = r->queue;
+  DBG("ASIO: Submitting %p on queue %p", r, q);
+  ASSERT(r->op != ASIO_FREE);
+  ASSERT(!r->submitted);
+  if (r->op == ASIO_WRITE_BACK)
+    {
+      while (q->running_writebacks >= q->max_writebacks)
+       {
+         DBG("ASIO: Waiting for free writebacks");
+         if (!asio_raw_wait(q))
+           ASSERT(0);
+       }
+      q->running_writebacks++;
+    }
+  q->running_requests++;
+  r->submitted = 1;
+  r->work.go = asio_handler;
+  r->work.priority = 0;
+  work_submit(&q->queue, &r->work);
+}
+
+struct asio_request *
+asio_wait(struct asio_queue *q)
+{
+  struct asio_request *r;
+  while (!(r = clist_head(&q->done_list)))
+    {
+      DBG("ASIO: Waiting on queue %p", q);
+      if (!asio_raw_wait(q))
+       return NULL;
+    }
+  clist_remove(&r->work.n);
+  DBG("ASIO: Done %p", r);
+  return r;
+}
+
+void
+asio_put(struct asio_request *r)
+{
+  struct asio_queue *q = r->queue;
+  DBG("ASIO: Put %p", r);
+  ASSERT(!r->submitted);
+  ASSERT(q->allocated_requests);
+  clist_add_tail(&q->idle_list, &r->work.n);
+  q->allocated_requests--;
+}
+
+void
+asio_sync(struct asio_queue *q)
+{
+  DBG("ASIO: Syncing queue %p", q);
+  while (q->running_requests)
+    if (!asio_raw_wait(q))
+      ASSERT(0);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct asio_queue q;
+  struct asio_request *r;
+
+  q.buffer_size = 4096;
+  q.max_writebacks = 2;
+  asio_init_queue(&q);
+
+#if 0
+
+  for (;;)
+    {
+      r = asio_get(&q);
+      r->op = ASIO_READ;
+      r->fd = 0;
+      r->len = q.buffer_size;
+      asio_submit(r);
+      r = asio_wait(&q);
+      ASSERT(r);
+      if (r->status <= 0)
+       {
+         asio_put(r);
+         break;
+       }
+      r->op = ASIO_WRITE_BACK;
+      r->fd = 1;
+      r->len = r->status;
+      asio_submit(r);
+    }
+  asio_sync(&q);
+
+#else
+
+  r = asio_get(&q);
+  r->op = ASIO_READ;
+  r->fd = 0;
+  r->len = 1;
+  asio_submit(r);
+  r = asio_wait(&q);
+  ASSERT(r);
+  asio_put(r);
+
+  for (uns i=0; i<10; i++)
+    {
+      r = asio_get(&q);
+      r->op = ASIO_WRITE_BACK;
+      r->fd = 1;
+      r->len = 1;
+      r->buffer[0] = 'A' + i;
+      asio_submit(r);
+    }
+  asio_sync(&q);
+
+  r = asio_get(&q);
+  r->op = ASIO_WRITE;
+  r->fd = 1;
+  r->len = 1;
+  r->buffer[0] = '\n';
+  asio_submit(r);
+  r = asio_wait(&q);
+  ASSERT(r);
+  asio_put(r);
+
+#endif
+
+  asio_cleanup_queue(&q);
+  return 0;
+}
+
+#endif
diff --git a/lib/asio.h b/lib/asio.h

new file mode 100644 (file)

index 0000000..6773c81
--- /dev/null
+++ b/lib/asio.h
@@ -0,0 +1,70 @@
+/*
+ *     UCW Library -- Asynchronous I/O
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ASIO_H
+#define _UCW_ASIO_H
+
+#include "lib/workqueue.h"
+#include "lib/clists.h"
+
+/*
+ *  This module takes care of scheduling and executing asynchronous I/O requests
+ *  on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf
+ *  back-end, but you can use it explicitly, too.
+ *
+ *  You can define several I/O queues, each for use by a single thread. Requests
+ *  on a single queue are always processed in order of their submits, requests
+ *  from different queues may be interleaved (although the current implementation
+ *  does not do so). Normal read and write requests are returned to their queue
+ *  when they are completed. Write-back requests are automatically freed when
+ *  done, but the number of such requests in fly is limited in order to avoid
+ *  consuming all memory, so a submit of a write-back request can block.
+ */
+
+struct asio_queue {
+  uns buffer_size;                     // How large buffers do we use [user-settable]
+  uns max_writebacks;                  // Maximum number of writeback requests active [user-settable]
+  uns allocated_requests;
+  uns running_requests;                        // Total number of running requests
+  uns running_writebacks;              // How many of them are writebacks
+  clist idle_list;                     // Recycled requests waiting for get
+  clist done_list;                     // Finished requests
+  struct work_queue queue;
+  uns use_count;                       // For use by the caller
+};
+
+enum asio_op {
+  ASIO_FREE,
+  ASIO_READ,
+  ASIO_WRITE,
+  ASIO_WRITE_BACK,                     // Background write with no success notification
+};
+
+struct asio_request {
+  struct work work;                    // asio_requests are internally just work nodes
+  struct asio_queue *queue;
+  byte *buffer;
+  int fd;
+  enum asio_op op;
+  uns len;
+  int status;
+  int returned_errno;
+  int submitted;
+  void *user_data;                     // For use by the caller
+};
+
+void asio_init_queue(struct asio_queue *q);                    // Initialize a new queue
+void asio_cleanup_queue(struct asio_queue *q);
+struct asio_request *asio_get(struct asio_queue *q);           // Get an empty request
+void asio_submit(struct asio_request *r);                      // Submit the request (can block if too many writebacks)
+struct asio_request *asio_wait(struct asio_queue *q);          // Wait for the first finished request, NULL if no more
+void asio_put(struct asio_request *r);                         // Return a finished request for recycling
+void asio_sync(struct asio_queue *q);                          // Wait until all requests are finished
+
+#endif /* !_UCW_ASIO_H */
diff --git a/lib/asio.t b/lib/asio.t

new file mode 100644 (file)

index 0000000..b660657
--- /dev/null
+++ b/lib/asio.t
@@ -0,0 +1,4 @@
+# Tests for asynchronous I/O
+
+Run:   echo y | ../obj/lib/asio-t
+Out:   ABCDEFGHIJ
diff --git a/lib/asort-test.c b/lib/asort-test.c

new file mode 100644 (file)

index 0000000..9c6abd4
--- /dev/null
+++ b/lib/asort-test.c
@@ -0,0 +1,77 @@
+/*
+ *     UCW Library -- Universal Array Sorter Test and Benchmark
+ *
+ *     (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define N 4000037                      /* a prime */
+
+struct elt {
+  u32 key;
+  u32 x, y;
+};
+
+static struct elt array[N];
+
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) array[i].key
+#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
+
+static void generate(void)
+{
+  uns i;
+  for (i=0; i<N; i++)
+#if 0
+    ASORT_ELT(i) = N-i-1;
+#elif 0
+    ASORT_ELT(i) = i;
+#else
+    ASORT_ELT(i) = (i ? ASORT_ELT(i-1)+1944833754 : 3141592) % N;
+#endif
+}
+
+static void check(void)
+{
+  uns i;
+  for (i=0; i<N; i++)
+    if (ASORT_ELT(i) != i)
+      printf("error at pos %d: %08x != %08x\n", i, ASORT_ELT(i), i);
+}
+
+static int qs_comp(const struct elt *X, const struct elt *Y)
+{
+  if (X->key < Y->key)
+    return -1;
+  else if (X->key > Y->key)
+    return 1;
+  else
+    return 0;
+}
+
+#define ASORT_PREFIX(x) as_##x
+#include "lib/arraysort.h"
+
+int main(void)
+{
+  timestamp_t timer;
+
+  generate();
+  init_timer(&timer);
+  qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp);
+  printf("qsort: %d ms\n", get_timer(&timer));
+  check();
+  generate();
+  init_timer(&timer);
+  as_sort(N);
+  printf("asort: %d ms\n", get_timer(&timer));
+  check();
+  return 0;
+}
diff --git a/lib/autoconf.cfg b/lib/autoconf.cfg

new file mode 100644 (file)

index 0000000..b0e3c53
--- /dev/null
+++ b/lib/autoconf.cfg
@@ -0,0 +1,277 @@
+# Automatic configuration of the UCW Library
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
+# (c) 2006 Robert Spalek <robert@ucw.cz>
+
+### OS ###
+
+Test("OS", "Checking on which OS we run", sub {
+       my $os = `uname`;
+       chomp $os;
+       Fail "Unable to determine OS type" if $? || $os eq "";
+       return $os;
+});
+
+if (Get("OS") eq "Linux") {
+       Set("CONFIG_LINUX");
+} elsif (Get("OS") eq "Darwin") {
+       Set("CONFIG_DARWIN");
+} else {
+       Fail "Don't know how to run on this operating system.";
+}
+
+### Compiler ###
+
+# Default compiler
+Test("CC", "Checking for C compiler", sub { return "gcc"; });
+
+# GCC version
+Test("GCCVER", "Checking for GCC version", sub {
+       my $gcc = Get("CC");
+       my $ver = `$gcc --version | sed '2,\$d; s/^\\(.* \\)*\\([0-9]*\\.[0-9]*\\).*/\\2/'`;
+       chomp $ver;
+       Fail "Unable to determine GCC version" if $? || $ver eq "";
+       return $ver;
+});
+my ($gccmaj, $gccmin) = split(/\./, Get("GCCVER"));
+my $gccver = 1000*$gccmaj + $gccmin;
+$gccver >= 3000 or Fail "GCC older than 3.0 doesn't support C99 well enough.";
+
+### CPU ###
+
+Test("ARCH", "Checking for machine architecture", sub {
+       my $mach = `uname -m`;
+       chomp $mach;
+       Fail "Unable to determine machine type" if $? || $mach eq "";
+       if ($mach =~ /^i[0-9]86$/) {
+               return "i386";
+       } elsif ($mach =~ /^(x86[_-]|amd)64$/) {
+               return "amd64";
+       } else {
+               return "unknown";
+       }
+});
+
+sub parse_cpuinfo_linux() {
+       open X, "/proc/cpuinfo" || undef;
+       my %pc = ();
+       while (<X>) {
+               chomp;
+               /^$/ && last;
+               /^([^\t]+)\t+:\s*(.*)$/ and $pc{$1}=$2;
+       }
+       close X;
+       return ($pc{'vendor_id'},
+               $pc{'cpu family'},
+               $pc{'model'});
+}
+
+sub parse_cpuinfo_darwin() {
+       @cpu = (`sysctl -n machdep.cpu.vendor`,
+               `sysctl -n machdep.cpu.family`,
+               `sysctl -n machdep.cpu.model`);
+       chomp @cpu;
+       return @cpu;
+}
+
+sub parse_cpuinfo() {
+       my @cpu;
+       if (IsSet("CONFIG_LINUX")) {
+               @cpu = parse_cpuinfo_linux();
+       } elsif (IsSet("CONFIG_DARWIN")) {
+               @cpu = parse_cpuinfo_darwin();
+       }
+       $cpu[0] = "" if !defined $cpu[0];
+       $cpu[1] = 0 if !defined $cpu[1];
+       $cpu[2] = 0 if !defined $cpu[2];
+       return @cpu;
+}
+
+Test("CPU_ARCH", "Checking for CPU architecture", sub {
+       my $mach = Get("ARCH");
+       my $arch = "";
+       if ($mach eq "i386") {
+               Set("CPU_I386");
+               UnSet("CPU_64BIT_POINTERS");
+               Set("CPU_LITTLE_ENDIAN");
+               UnSet("CPU_BIG_ENDIAN");
+               Set("CPU_ALLOW_UNALIGNED");
+               Set("CPU_STRUCT_ALIGN" => 4);
+               if (IsSet("CONFIG_EXACT_CPU")) {
+                       my ($vendor, $family, $model) = parse_cpuinfo();
+                       # Try to understand CPU vendor, family and model [inspired by MPlayer's configure script]
+                       if ($vendor eq "AuthenticAMD") {
+                               if ($family >= 6) {
+                                       if ($model >= 31 && $gccver >= 3004) { $arch = "athlon64"; }
+                                       elsif ($model >= 6 && $gccver >= 3003) { $arch = "athlon-xp"; }
+                                       else { $arch = "athlon"; }
+                               }
+                       } elsif ($vendor eq "GenuineIntel") {
+                               if ($family >= 15 && $gccver >= 3003) {
+                                       if ($model >= 4) { $arch = "nocona"; }
+                                       elsif ($model >= 3) { $arch = "prescott"; }
+                                       else { $arch = "pentium4"; }
+                               } elsif ($family == 6 && $gccver >= 3003) {
+                                       if ($model == 15) { $arch = "prescott"; }
+                                       elsif (($model == 9 || $model == 13) && $gccver >= 3004) { $arch = "pentium-m"; }
+                                       elsif ($model >= 7) { $arch = "pentium3"; }
+                                       elsif ($model >= 3) { $arch = "pentium2"; }
+                               }
+                       }
+
+                       # No match on vendor, try the family
+                       if ($arch eq "") {
+                               if ($family >= 6) {
+                                       $arch = "i686";
+                               } elsif ($family >= 3) {
+                                       $arch = "i${family}86";
+                               }
+                       }
+                       Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+                       return $arch;
+               } else {
+                       return "default";
+               }
+       } elsif ($mach eq "amd64") {
+               Set("CPU_AMD64");
+               Set("CPU_64BIT_POINTERS");
+               Set("CPU_LITTLE_ENDIAN");
+               UnSet("CPU_BIG_ENDIAN");
+               Set("CPU_ALLOW_UNALIGNED");
+               Set("CPU_STRUCT_ALIGN" => 8);
+               if (IsSet("CONFIG_EXACT_CPU")) {
+                       # In x86-64 world, the detection is somewhat easier so far...
+                       my ($vendor, $family, $model) = parse_cpuinfo();
+                       if ($vendor eq "AuthenticAMD") {
+                               $arch = "athlon64";
+                       } elsif ($vendor eq "GenuineIntel") {
+                               $arch = "nocona";
+                       }
+                       Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+                       return $arch;
+               } else {
+                       return "default";
+               }
+       } else {
+               return "unknown";
+       }
+});
+
+if (Get("CPU_ARCH") eq "unknown") {
+       Warn "CPU architecture not recognized, using defaults, keep fingers crossed.\n";
+}
+
+### Compiler and its Options ###
+
+# C flags: tell the compiler we're speaking C99, and disable common symbols
+Set("CLANG" => "-std=gnu99 -fno-common");
+
+# C optimizations
+Set("COPT" => '-O2');
+if (Get("CPU_ARCH") ne "unknown" && Get("CPU_ARCH") ne "default") {
+       Append("COPT", '-march=$(CPU_ARCH)');
+}
+
+# C optimizations for highly exposed code
+Set("COPT2" => '-O3');
+
+# Warnings
+Set("CWARNS" => '-Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline');
+Set("CWARNS_OFF" => '');
+
+# Linker flags
+Set("LOPT" => "");
+
+# Extra libraries
+Set("LIBS" => "");
+
+# Extra flags for compiling and linking shared libraries
+Set("CSHARED" => '-fPIC');
+if (IsSet("CONFIG_DARWIN")) {
+       Set("LSHARED" => '-dynamiclib -install_name lib/$(@F) -undefined dynamic_lookup');
+} else {
+       Set("LSHARED" => '-shared -Wl,-soname,lib/$(@F)');
+}
+
+# Extra switches depending on GCC version:
+if ($gccver == 3000) {
+       Append("COPT" => "-fstrict-aliasing");
+} elsif ($gccver == 3003) {
+       Append("CWARNS" => "-Wundef -Wredundant-decls");
+       Append("COPT" => "-finline-limit=20000 --param max-inline-insns-auto=1000");
+} elsif ($gccver == 3004) {
+       Append("CWARNS" => "-Wundef -Wredundant-decls");
+       Append("COPT" => "-finline-limit=2000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver == 4000 || $gccver == 4001) {
+       Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+       Append("CWARNS_OFF" => "-Wno-pointer-sign");
+       Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver == 4002) {
+       Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+       Append("CWARNS_OFF" => "-Wno-pointer-sign");
+       Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400 -fgnu89-inline");
+} else {
+       Warn "Don't know anything about this GCC version, using default switches.\n";
+}
+
+if (IsSet("CONFIG_DEBUG")) {
+       # If debugging:
+       Set("DEBUG_ASSERTS");
+       Set("DEBUG_DIE_BY_ABORT") if Get("CONFIG_DEBUG") > 1;
+       Set("CDEBUG" => "-ggdb");
+} else {
+       # If building a release version:
+       Append("COPT" => "-fomit-frame-pointer");
+       Append("LOPT" => "-s");
+}
+
+if (IsSet("CONFIG_DARWIN")) {
+       # gcc-4.0 on Darwin doesn't set this in the gnu99 mode
+       Append("CLANG" => "-fnested-functions");
+       # Directory hierarchy of the fink project
+       Append("LIBS" => "-L/sw/lib");
+       Append("COPT" => "-I/sw/include");
+       # Fill in some constants not found in the system header files
+       Set("SOL_TCP" => 6);            # missing in /usr/include/netinet/tcp.h
+}
+
+# Determine page size
+Test("CPU_PAGE_SIZE", "Determining page size", sub {
+       my $p;
+       if (IsSet("CONFIG_DARWIN")) {
+               $p = `sysctl -n hw.pagesize`;
+               defined $p or Fail "sysctl hw.pagesize failed";
+       } elsif (IsSet("CONFIG_LINUX")) {
+               $p = `getconf PAGE_SIZE`;
+               defined $p or Fail "getconf PAGE_SIZE failed";
+       }
+       chomp $p;
+       return $p;
+});
+
+if (IsSet("CONFIG_LARGE_FILES") && IsSet("CONFIG_LINUX")) {
+       # Use 64-bit versions of file functions
+       Set("CONFIG_LFS");
+}
+
+# Decide how will lib/partmap.c work
+Set("PARTMAP_IS_MMAP") if IsSet("CPU_64BIT_POINTERS");
+
+# Option for lib/mempool.c
+Set("POOL_IS_MMAP");
+
+# Guess optimal bit width of the radix-sorter
+if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) {
+       # This should be safe everywhere
+       Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10);
+} else {
+       # Use this on modern CPU's
+       Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12);
+}
+
+# If debugging memory allocations:
+#LIBS+=-lefence
+#CDEBUG+=-DDEBUG_DMALLOC
+#LIBS+=-ldmalloc
+
+# Return success
+1;
diff --git a/lib/base224.c b/lib/base224.c

new file mode 100644 (file)

index 0000000..3d62471
--- /dev/null
+++ b/lib/base224.c
@@ -0,0 +1,213 @@
+/*
+ *     UCW Library -- Base 224 Encoding & Decoding
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     The `base-224' encoding transforms general sequences of bytes
+ *     to sequences of non-control 8-bit characters (0x20-0xff). Since
+ *     224 and 256 are incompatible bases (there is no k,l: 224^k=256^l)
+ *     and we want to avoid lengthy calculations, we cheat a bit:
+ *
+ *     Each base-224 digit can be represented as a (base-7 digit, base-32 digit)
+ *     pair, so we pass the lower 5 bits directly and use a base-7 encoder
+ *     for the upper part. We process blocks of 39 bits and encode them
+ *     to 5 base-224 digits: we take 5x5 bits as the lower halves and convert
+ *     the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get
+ *     the 7 upper parts we need (with a little redundancy). Little endian
+ *     ordering is used to make handling of partial blocks easy.
+ *
+ *     We transform 39 source bits to 40 destination bits, stretching the data
+ *     by 1/39 = approx. 2.56%.
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/base224.h"
+
+static void
+encode_block(byte *w, u32 hi, u32 lo)
+{
+  uns x, y;
+
+  /*
+   *   Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7.
+   *   +----------------+----------------+----------------+----------------+----------------+
+   *   +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0|
+   *   +----------------+----------------+----------------+----------------+----------------+
+   */
+
+  w[0] = lo & 0x1f;
+  w[1] = (lo >> 7) & 0x1f;
+  w[2] = (lo >> 15) & 0x1f;
+  w[3] = (lo >> 23) & 0x1f;
+  w[4] = (lo >> 31) | ((hi << 1) & 0x1e);
+  x = (lo >> 5)  & 0x0003
+    | (lo >> 10) & 0x001c
+    | (lo >> 15) & 0x00e0
+    | (lo >> 20) & 0x0700
+    | (hi << 7)  & 0x3800;
+  DBG("<<< h=%08x l=%08x x=%d", hi, lo, x);
+  for (y=0; y<5; y++)
+    {
+      w[y] += 0x20 + ((x % 7) << 5);
+      x /= 7;
+    }
+}
+
+uns
+base224_encode(byte *dest, const byte *src, uns len)
+{
+  u32 lo=0, hi=0;                      /* 64-bit buffer accumulating input bits */
+  uns i=0;                             /* How many source bits do we have buffered */
+  u32 x;
+  byte *w=dest;
+
+  while (len--)
+    {
+      x = *src++;
+      if (i < 32)
+       {
+         lo |= x << i;
+         if (i > 24)
+           hi |= x >> (32-i);
+       }
+      else
+       hi |= x << (i-32);
+      i += 8;
+      if (i >= 39)
+       {
+         encode_block(w, hi, lo);
+         w += 5;
+         lo = hi >> 7;
+         hi = 0;
+         i -= 39;
+       }
+    }
+  if (i)                               /* Partial block */
+    {
+      encode_block(w, hi, lo);
+      w += (i+8)/8;                    /* Just check logarithms if you want to understand */
+    }
+  return w - dest;
+}
+
+uns
+base224_decode(byte *dest, const byte *src, uns len)
+{
+  u32 hi=0, lo=0;                      /* 64-bit buffer accumulating output bits */
+  uns i=0;                             /* How many bits do we have accumulated */
+  u32 h, l;                            /* Decoding of the current block */
+  uns x;                               /* base-7 part of the current block */
+  uns len0;
+  byte *start = dest;
+
+  do
+    {
+      if (!len)
+       break;
+      len0 = len;
+
+      ASSERT(*src >= 0x20);            /* byte 0 */
+      h = 0;
+      l = *src & 0x1f;
+      x = (*src++ >> 5) - 1;
+      if (!--len)
+       goto blockend;
+
+      ASSERT(*src >= 0x20);            /* byte 1 */
+      l |= (*src & 0x1f) << 7;
+      x += ((*src++ >> 5) - 1) * 7;
+      if (!--len)
+       goto blockend;
+
+      ASSERT(*src >= 0x20);            /* byte 2 */
+      l |= (*src & 0x1f) << 15;
+      x += ((*src++ >> 5) - 1) * 7*7;
+      if (!--len)
+       goto blockend;
+
+      ASSERT(*src >= 0x20);            /* byte 3 */
+      l |= (*src & 0x1f) << 23;
+      x += ((*src++ >> 5) - 1) * 7*7*7;
+      if (!--len)
+       goto blockend;
+
+      ASSERT(*src >= 0x20);            /* byte 4 */
+      l |= *src << 31;
+      h = (*src & 0x1f) >> 1;
+      x += ((*src++ >> 5) - 1) * 7*7*7*7;
+      --len;
+
+    blockend:
+      len0 -= len;
+      l |= ((x & 0x0003) << 5)         /* Decode base-7 */
+       |  ((x & 0x001c) << 10)
+       |  ((x & 0x00e0) << 15)
+       |  ((x & 0x0700) << 20);
+      h |=  (x & 0x3800) >> 7;
+
+      DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0);
+      lo |= l << i;
+      hi |= h << i;
+      if (i)
+       hi |= l >> (32-i);
+      i += len0*8 - 1;
+
+      while (i >= 8)
+       {
+         *dest++ = lo;
+         lo = (lo >> 8U) | (hi << 24);
+         hi >>= 8;
+         i -= 8;
+       }
+    }
+  while (len0 == 5);
+  return dest-start;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+#if 0
+  byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 };
+  byte o[256], w[256];
+  uns l;
+  l = base224_encode(o, i, sizeof(i));
+  fwrite(o, 1, l, stdout);
+  fputc(0xaa, stdout);
+  l = base224_decode(w, o, l);
+  fwrite(w, 1, l, stdout);
+#else
+  if (argc > 1)
+    {
+      byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17];
+      uns l;
+      while (l = fread(i, 1, sizeof(i), stdin))
+       {
+         l = base224_decode(o, i, l);
+         fwrite(o, 1, l, stdout);
+       }
+    }
+  else
+    {
+      byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23];
+      uns l;
+      while (l = fread(i, 1, sizeof(i), stdin))
+       {
+         l = base224_encode(o, i, l);
+         fwrite(o, 1, l, stdout);
+       }
+    }
+#endif
+
+  return 0;
+}
+
+#endif
diff --git a/lib/base224.h b/lib/base224.h

new file mode 100644 (file)

index 0000000..7e815d8
--- /dev/null
+++ b/lib/base224.h
@@ -0,0 +1,25 @@
+/*
+ *     UCW Library -- Base 224 Encoding & Decoding
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+uns base224_encode(byte *dest, const byte *src, uns len);
+uns base224_decode(byte *dest, const byte *src, uns len);
+
+/*
+ * Warning: when encoding, at least 4 bytes of extra space are needed.
+ * Better use this macro to calculate buffer size.
+ */
+#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5)
+
+/*
+ * When called for BASE224_IN_CHUNK-byte chunks, the result will be
+ * always BASE224_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE224_IN_CHUNK 39
+#define BASE224_OUT_CHUNK 40
diff --git a/lib/base64.c b/lib/base64.c

new file mode 100644 (file)

index 0000000..ef8faa4
--- /dev/null
+++ b/lib/base64.c
@@ -0,0 +1,120 @@
+/*
+ *     UCW Library -- Base 64 Encoding & Decoding
+ *
+ *     (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/base64.h"
+
+#include <string.h>
+
+static const byte base64_table[] =
+       { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+         'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+         'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
+       };
+static const byte base64_pad = '=';
+
+uns
+base64_encode(byte *dest, const byte *src, uns len)
+{
+       const byte *current = src;
+       uns i = 0;
+
+       while (len > 2) { /* keep going until we have less than 24 bits */
+               dest[i++] = base64_table[current[0] >> 2];
+               dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+               dest[i++] = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
+               dest[i++] = base64_table[current[2] & 0x3f];
+
+               current += 3;
+               len -= 3; /* we just handle 3 octets of data */
+       }
+
+       /* now deal with the tail end of things */
+       if (len != 0) {
+               dest[i++] = base64_table[current[0] >> 2];
+               if (len > 1) {
+                       dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+                       dest[i++] = base64_table[(current[1] & 0x0f) << 2];
+                       dest[i++] = base64_pad;
+               }
+               else {
+                       dest[i++] = base64_table[(current[0] & 0x03) << 4];
+                       dest[i++] = base64_pad;
+                       dest[i++] = base64_pad;
+               }
+       }
+       return i;
+}
+
+/* as above, but backwards. :) */
+uns
+base64_decode(byte *dest, const byte *src, uns len)
+{
+       const byte *current = src;
+       uns ch;
+       uns i = 0, j = 0;
+       static byte reverse_table[256];
+       static uns table_built = 0;
+
+       if (table_built == 0) {
+               byte *chp;
+               table_built = 1;
+               for(ch = 0; ch < 256; ch++) {
+                       chp = strchr(base64_table, ch);
+                       if(chp) {
+                               reverse_table[ch] = chp - base64_table;
+                       } else {
+                               reverse_table[ch] = 0xff;
+                       }
+               }
+       }
+
+       /* run through the whole string, converting as we go */
+       ch = 0;
+       while (len > 0) {
+               len--;
+               ch = *current++;
+               if (ch == base64_pad) break;
+
+               /* When Base64 gets POSTed, all pluses are interpreted as spaces.
+                  This line changes them back.  It's not exactly the Base64 spec,
+                  but it is completely compatible with it (the spec says that
+                  spaces are invalid).  This will also save many people considerable
+                  headache.  - Turadg Aleahmad <turadg@wise.berkeley.edu>
+                */
+
+               if (ch == ' ') ch = '+';
+
+               ch = reverse_table[ch];
+               if (ch == 0xff) continue;
+
+               switch(i % 4) {
+               case 0:
+                       dest[j] = ch << 2;
+                       break;
+               case 1:
+                       dest[j++] |= ch >> 4;
+                       dest[j] = (ch & 0x0f) << 4;
+                       break;
+               case 2:
+                       dest[j++] |= ch >>2;
+                       dest[j] = (ch & 0x03) << 6;
+                       break;
+               case 3:
+                       dest[j++] |= ch;
+                       break;
+               }
+               i++;
+       }
+       return j;
+}
diff --git a/lib/base64.h b/lib/base64.h

new file mode 100644 (file)

index 0000000..7890966
--- /dev/null
+++ b/lib/base64.h
@@ -0,0 +1,25 @@
+/*
+ *     UCW Library -- Base 64 Encoding & Decoding
+ *
+ *     (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+uns base64_encode(byte *dest, const byte *src, uns len);
+uns base64_decode(byte *dest, const byte *src, uns len);
+
+/*
+ * Use this macro to calculate buffer size.
+ */
+#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4)
+
+/*
+ * When called for BASE64_IN_CHUNK-byte chunks, the result will be
+ * always BASE64_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE64_IN_CHUNK 3
+#define BASE64_OUT_CHUNK 4
+
diff --git a/lib/bbuf.c b/lib/bbuf.c

new file mode 100644 (file)

index 0000000..9d4af26
--- /dev/null
+++ b/lib/bbuf.c
@@ -0,0 +1,86 @@
+/*
+ *     UCW Library -- A simple growing buffers for byte-sized items
+ *
+ *     (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bbuf.h"
+
+#include <stdio.h>
+
+char *
+bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args)
+{
+  bb_grow(bb, ofs + 1);
+  va_list args2;
+  va_copy(args2, args);
+  int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+  va_end(args2);
+  if (cnt < 0)
+    {
+      /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+      do
+        {
+         bb_do_grow(bb, bb->len + 1);
+          va_copy(args2, args);
+          cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+          va_end(args2);
+       }
+      while (cnt < 0);
+    }
+  else if ((uns)cnt >= bb->len - ofs)
+    {
+      bb_do_grow(bb, ofs + cnt + 1);
+      va_copy(args2, args);
+      int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+      va_end(args2);
+      ASSERT(cnt2 == cnt);
+    }
+  return bb->ptr + ofs;
+}
+
+char *
+bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...)
+{
+  va_list args;
+  va_start(args, fmt);
+  char *res = bb_vprintf_at(bb, ofs, fmt, args);
+  va_end(args);
+  return res;
+}
+
+char *
+bb_vprintf(bb_t *bb, const char *fmt, va_list args)
+{
+  return bb_vprintf_at(bb, 0, fmt, args);
+}
+
+char *
+bb_printf(bb_t *bb, const char *fmt, ...)
+{
+  va_list args;
+  va_start(args, fmt);
+  char *res = bb_vprintf_at(bb, 0, fmt, args);
+  va_end(args);
+  return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  bb_t bb;
+  bb_init(&bb);
+  char *x = bb_printf(&bb, "<Hello, %s!>", "World");
+  fputs(x, stdout);
+  x = bb_printf_at(&bb, 5, "<Hello, %50s!>\n", "World");
+  fputs(x, stdout);
+  bb_done(&bb);
+  return 0;
+}
+
+#endif
diff --git a/lib/bbuf.h b/lib/bbuf.h

new file mode 100644 (file)

index 0000000..22e62bb
--- /dev/null
+++ b/lib/bbuf.h
@@ -0,0 +1,22 @@
+/*
+ *     UCW Library -- A simple growing buffer for byte-sized items.
+ *
+ *     (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BBUF_H
+#define _UCW_BBUF_H
+
+#define        GBUF_TYPE       byte
+#define        GBUF_PREFIX(x)  bb_##x
+#include "lib/gbuf.h"
+
+char *bb_vprintf(bb_t *bb, const char *fmt, va_list args);
+char *bb_printf(bb_t *bb, const char *fmt, ...);
+char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args);
+char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...);
+
+#endif
diff --git a/lib/bbuf.t b/lib/bbuf.t

new file mode 100644 (file)

index 0000000..ebf9ecc
--- /dev/null
+++ b/lib/bbuf.t
@@ -0,0 +1,4 @@
+# Tests for growing buffers
+
+Run:   ../obj/lib/bbuf-t
+Out:   <Hello, World!><Hello,                                              World!>
diff --git a/lib/bigalloc.c b/lib/bigalloc.c

new file mode 100644 (file)

index 0000000..9581188
--- /dev/null
+++ b/lib/bigalloc.c
@@ -0,0 +1,111 @@
+/*
+ *     UCW Library -- Allocation of Large Aligned Buffers
+ *
+ *     (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2007 Pavel Charvat <char@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <sys/mman.h>
+#include <string.h>
+#include <limits.h>
+
+void *
+page_alloc(u64 len)
+{
+  if (len > SIZE_MAX)
+    die("page_alloc: Size %llu is too large for the current architecture", (long long) len);
+  ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+  byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (p == (byte*) MAP_FAILED)
+    die("Cannot mmap %llu bytes of memory: %m", (long long)len);
+  return p;
+}
+
+void *
+page_alloc_zero(u64 len)
+{
+  void *p = page_alloc(len);
+  bzero(p, len);
+  return p;
+}
+
+void
+page_free(void *start, u64 len)
+{
+  ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+  ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
+  munmap(start, len);
+}
+
+void *
+page_realloc(void *start, u64 old_len, u64 new_len)
+{
+  void *p = page_alloc(new_len);
+  memcpy(p, start, MIN(old_len, new_len));
+  page_free(start, old_len);
+  return p;
+}
+
+static u64
+big_round(u64 len)
+{
+  return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
+}
+
+void *
+big_alloc(u64 len)
+{
+  u64 l = big_round(len);
+  if (l > SIZE_MAX - 2*CPU_PAGE_SIZE)
+    die("big_alloc: Size %llu is too large for the current architecture", (long long) len);
+#ifdef CONFIG_DEBUG
+  l += 2*CPU_PAGE_SIZE;
+#endif
+  byte *p = page_alloc(l);
+#ifdef CONFIG_DEBUG
+  *(u64*)p = len;
+  mprotect(p, CPU_PAGE_SIZE, PROT_NONE);
+  mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
+  p += CPU_PAGE_SIZE;
+#endif
+  return p;
+}
+
+void *
+big_alloc_zero(u64 len)
+{
+  void *p = big_alloc(len);
+  bzero(p, big_round(len));
+  return p;
+}
+
+void
+big_free(void *start, u64 len)
+{
+  byte *p = start;
+  u64 l = big_round(len);
+#ifdef CONFIG_DEBUG
+  p -= CPU_PAGE_SIZE;
+  mprotect(p, CPU_PAGE_SIZE, PROT_READ);
+  ASSERT(*(u64*)p == len);
+  l += 2*CPU_PAGE_SIZE;
+#endif
+  page_free(p, l);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  byte *p = big_alloc(123456);
+  // p[-1] = 1;
+  big_free(p, 123456);
+  return 0;
+}
+
+#endif
diff --git a/lib/binheap-node.h b/lib/binheap-node.h

new file mode 100644 (file)

index 0000000..44be9f4
--- /dev/null
+++ b/lib/binheap-node.h
@@ -0,0 +1,19 @@
+/*
+ *     UCW Library -- Binomial Heaps: Declarations
+ *
+ *     (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+struct bh_node {
+  struct bh_node *first_son;
+  struct bh_node *last_son;
+  struct bh_node *next_sibling;
+  byte order;
+};
+
+struct bh_heap {
+  struct bh_node root;
+};
diff --git a/lib/binheap-test.c b/lib/binheap-test.c

new file mode 100644 (file)

index 0000000..bfd28a0
--- /dev/null
+++ b/lib/binheap-test.c
@@ -0,0 +1,94 @@
+/*
+ *     UCW Library -- Binomial Heaps: Testing
+ *
+ *     (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define BH_PREFIX(x) bht_##x
+#define BH_WANT_INSERT
+#define BH_WANT_FINDMIN
+#define BH_WANT_DELETEMIN
+#include "lib/binheap-node.h"
+
+struct item {
+  struct bh_node n;
+  uns key;
+};
+
+static inline uns bht_key(struct bh_node *n)
+{
+  return ((struct item *)n)->key;
+}
+
+static inline uns bht_less(struct bh_node *a, struct bh_node *b)
+{
+  return bht_key(a) < bht_key(b);
+}
+
+static void
+bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uns offset)
+{
+  if (!a)
+    return;
+  printf("%*s", offset, "");
+  printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : "");
+  for (struct bh_node *b=a->first_son; b; b=b->next_sibling)
+    bht_do_dump(b, a->last_son, offset+1);
+}
+
+static void
+bht_dump(struct bh_heap *h)
+{
+  printf("root\n");
+  for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling)
+    bht_do_dump(b, b->last_son, 1);
+}
+
+#include "lib/binheap.h"
+
+int main(void)
+{
+  uns i;
+  struct bh_heap h;
+#define N 1048576
+#define K(i) ((259309*i+1009)%N)
+
+  bht_init(&h);
+
+  for (i=0; i<N; i++)
+    {
+      struct item *a = xmalloc_zero(sizeof(*a));
+      a->key = K(i);
+      // printf("Insert %d\n", a->key);
+      bht_insert(&h, &a->n);
+      // bht_dump(&h);
+    }
+  // bht_dump(&h);
+  ASSERT(bht_key(bht_findmin(&h)) == 0);
+  uns cnt = 0;
+  BH_FOR_ALL(bht_, &h, a)
+    {
+      cnt++;
+    }
+  BH_END_FOR;
+  printf("cnt=%d\n", cnt);
+  ASSERT(cnt == N);
+  for (i=0; i<N; i++)
+    {
+      struct item *a = (struct item *) bht_deletemin(&h);
+      // printf("\nDeleted %d:\n", a->key);
+      ASSERT(a->key == i);
+      // bht_dump(&h);
+    }
+  bht_dump(&h);
+
+  return 0;
+}
diff --git a/lib/binheap.h b/lib/binheap.h

new file mode 100644 (file)

index 0000000..0c6ee5a
--- /dev/null
+++ b/lib/binheap.h
@@ -0,0 +1,203 @@
+/*
+ *     UCW Library -- Binomial Heaps
+ *
+ *     (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This is a generic implementation of Binomial Heaps. Each time you include
+ *  this file with parameters set in the corresponding preprocessor macros
+ *  as described below, it generates functions for manipulating the particular
+ *  version of the binomial heap.
+ *
+ *  You need to specify:
+ *
+ *  BH_PREFIX(x)       macro to add a name prefix (used on all global names
+ *                     defined by the hash table generator). All further
+ *                     names mentioned here except for macro names will be
+ *                     implicitly prefixed.
+ *
+ *  Then you continue by including "lib/binheap-node.h" which defines struct bh_node
+ *  and struct bh_root (both without prefix). The heap elements are always allocated by
+ *  you and they must include struct bh_node which serves as a handle used for all
+ *  the heap functions and it contains all information needed for heap-keeping.
+ *  The heap itself is also allocated by you and it's represented by struct bh_heap.
+ *
+ *  When you have the declaration of heap nodes, you continue with defining:
+ *
+ *  less(p,q)          returns 1 if the key corresponding to bh_node *p
+ *                     is less than the one corresponding to *q.
+ *
+ *  Then specify what operations you request:
+ *
+ *  <always defined>   init(heap*) -- initialize the heap.
+ *  BH_WANT_INSERT     insert(heap*, node*) -- insert the node to the heap.
+ *  BH_WANT_FINDMIN    node *findmin(heap*) -- find node with minimum key.
+ *  BH_WANT_DELETEMIN  node *deletemin(heap*) -- findmin and delete the node.
+ *
+ *  Then include "lib/binheap.h" and voila, you have a binomial heap
+ *  suiting all your needs (at least those which you've revealed :) ).
+ *
+ *  You also get a iterator macro at no extra charge:
+ *
+ *  BH_FOR_ALL(bh_prefix, hash*, variable)
+ *    {
+ *      // node *variable gets declared automatically
+ *      do_something_with_node(variable);
+ *      // use BH_BREAK and BH_CONTINUE instead of break and continue
+ *     // you must not alter contents of the hash table here
+ *    }
+ *  BH_END_FOR;
+ *
+ *  After including this file, all parameter macros are automatically
+ *  undef'd.
+ */
+
+#define BH_NODE struct bh_node
+#define BH_HEAP struct bh_heap
+
+static void
+BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b)
+{
+  BH_NODE **pp = &a->first_son;
+  BH_NODE *q = b->first_son;
+  BH_NODE *p, *r, *s;
+
+  while ((p = *pp) && q)
+    {
+      /* p,q are the next nodes of a,b; pp points to where p is linked */
+      if (p->order < q->order)         /* p is smaller => skip it */
+       pp = &p->next_sibling;
+      else if (p->order > q->order)    /* q is smaller => insert it before p */
+       {
+         r = q;
+         q = q->next_sibling;
+         r->next_sibling = p;
+         *pp = r;
+         pp = &r->next_sibling;
+       }
+      else                             /* p and q are of the same order => need to merge them */
+       {
+         if (BH_PREFIX(less)(p, q))    /* we'll hang r below s */
+           {
+             r = q;
+             s = p;
+           }
+         else
+           {
+             r = p;
+             s = q;
+           }
+         *pp = p->next_sibling;        /* unlink p,q from their lists */
+         q = q->next_sibling;
+
+         if (s->last_son)              /* merge r to s, increasing order */
+           s->last_son->next_sibling = r;
+         else
+           s->first_son = r;
+         s->last_son = r;
+         s->order++;
+         r->next_sibling = NULL;
+
+         if (!q || q->order > s->order) /* put the result into the b's list if possible */
+           {
+             s->next_sibling = q;
+             q = s;
+           }
+         else                          /* otherwise put the result to the a's list */
+           {
+             p = s->next_sibling = *pp;
+             *pp = s;
+             if (p && p->order == s->order) /* 3-collision */
+               pp = &s->next_sibling;
+           }
+       }
+    }
+  if (!p)
+    *pp = q;
+}
+
+#ifdef BH_WANT_INSERT
+static void
+BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a)
+{
+  BH_NODE sh;
+
+  sh.first_son = a;
+  a->first_son = a->last_son = a->next_sibling = NULL;
+  BH_PREFIX(merge)(&heap->root, &sh);
+}
+#endif
+
+#ifdef BH_WANT_FINDMIN
+static BH_NODE *
+BH_PREFIX(findmin)(BH_HEAP *heap)
+{
+  BH_NODE *p, *best;
+
+  best = NULL;
+  for (p=heap->root.first_son; p; p=p->next_sibling)
+    if (!best || BH_PREFIX(less)(p, best))
+      best = p;
+  return best;
+}
+#endif
+
+#ifdef BH_WANT_DELETEMIN
+static BH_NODE *
+BH_PREFIX(deletemin)(BH_HEAP *heap)
+{
+  BH_NODE *p, **pp, **bestp;
+
+  bestp = NULL;
+  for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling)
+    if (!bestp || BH_PREFIX(less)(p, *bestp))
+      bestp = pp;
+  if (!bestp)
+    return NULL;
+
+  p = *bestp;
+  *bestp = p->next_sibling;
+  BH_PREFIX(merge)(&heap->root, p);
+  return p;
+}
+#endif
+
+static inline void
+BH_PREFIX(init)(BH_HEAP *heap)
+{
+  bzero(heap, sizeof(*heap));
+}
+
+#ifndef BH_FOR_ALL
+
+#define BH_FOR_ALL(bh_px, bh_heap, bh_var)     \
+do {                                           \
+  struct bh_node *bh_stack[32];                        \
+  uns bh_sp = 0;                               \
+  if (bh_stack[0] = (bh_heap)->root.first_son)  \
+    bh_sp++;                                   \
+  while (bh_sp) {                              \
+    struct bh_node *bh_var = bh_stack[--bh_sp];        \
+    if (bh_var->next_sibling)                  \
+      bh_stack[bh_sp++] = bh_var->next_sibling; \
+    if (bh_var->first_son)                     \
+      bh_stack[bh_sp++] = bh_var->first_son;
+#define BH_END_FOR                             \
+  }                                            \
+} while (0)
+
+#define BH_BREAK { bh_sp=0; break; }
+#define BH_CONTINUE continue
+
+#endif
+
+#undef BH_PREFIX
+#undef BH_NODE
+#undef BH_HEAP
+#undef BH_WANT_INSERT
+#undef BH_WANT_FINDMIN
+#undef BH_WANT_DELETEMIN
diff --git a/lib/binsearch.h b/lib/binsearch.h

new file mode 100644 (file)

index 0000000..6741956
--- /dev/null
+++ b/lib/binsearch.h
@@ -0,0 +1,26 @@
+/*
+ *     UCW Library -- Generic Binary Search
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x)  ({          \
+  uns l = 0, r = (N);                                          \
+  while (l < r)                                                        \
+    {                                                          \
+      uns m = (l+r)/2;                                         \
+      if (ary_lt_x(ary,m,x))                                   \
+       l = m+1;                                                \
+      else                                                     \
+       r = m;                                                  \
+    }                                                          \
+  l;                                                           \
+})
+
+#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x)
+
+#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM)
+#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; })
diff --git a/lib/bit-ffs.c b/lib/bit-ffs.c

new file mode 100644 (file)

index 0000000..8a9198d
--- /dev/null
+++ b/lib/bit-ffs.c
@@ -0,0 +1,46 @@
+/*
+ *     UCW Library -- Find Lowest Set Bit
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+
+/* Just a table, the rest is in bitops.h */
+
+const byte ffs_table[] = {
+  0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+  uns i;
+  while (scanf("%x", &i) == 1)
+    printf("%d\n", bit_ffs(i));
+  return 0;
+}
+
+#endif
diff --git a/lib/bit-fls.c b/lib/bit-fls.c

new file mode 100644 (file)

index 0000000..6a6227d
--- /dev/null
+++ b/lib/bit-fls.c
@@ -0,0 +1,42 @@
+/*
+ *     UCW Library -- Find Highest Set Bit
+ *
+ *     (c) 1997-2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+
+int
+bit_fls(u32 x)
+{
+  uns l;
+
+  if (!x)
+       return -1;
+
+  l = 0;
+  if (x & 0xffff0000) { l += 16; x &= 0xffff0000; }
+  if (x & 0xff00ff00) { l += 8;  x &= 0xff00ff00; }
+  if (x & 0xf0f0f0f0) { l += 4;  x &= 0xf0f0f0f0; }
+  if (x & 0xcccccccc) { l += 2;  x &= 0xcccccccc; }
+  if (x & 0xaaaaaaaa) l++;
+  return l;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+  uns i;
+  while (scanf("%x", &i) == 1)
+    printf("%d\n", bit_fls(i));
+  return 0;
+}
+
+#endif
diff --git a/lib/bitarray.h b/lib/bitarray.h

new file mode 100644 (file)

index 0000000..7248041
--- /dev/null
+++ b/lib/bitarray.h
@@ -0,0 +1,107 @@
+/*
+ *     UCW Library -- Bit Array Operations
+ *
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITARRAY_H
+#define _UCW_BITARRAY_H
+
+#include <string.h>
+
+typedef u32 *bitarray_t;
+#define BIT_ARRAY_WORDS(n) (((n)+31)/32)
+#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n))
+#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)]
+
+static inline bitarray_t
+bit_array_xmalloc(uns n)
+{
+  return xmalloc(BIT_ARRAY_BYTES(n));
+}
+
+static inline bitarray_t
+bit_array_xmalloc_zero(uns n)
+{
+  return xmalloc_zero(BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_zero(bitarray_t a, uns n)
+{
+  bzero(a, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set_all(bitarray_t a, uns n)
+{
+  memset(a, 255, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set(bitarray_t a, uns i)
+{
+  a[i/32] |= (1 << (i%32));
+}
+
+static inline void
+bit_array_clear(bitarray_t a, uns i)
+{
+  a[i/32] &= ~(1 << (i%32));
+}
+
+static inline void
+bit_array_assign(bitarray_t a, uns i, uns x)
+{
+  if (x)
+    bit_array_set(a, i);
+  else
+    bit_array_clear(a, i);
+}
+
+static inline uns
+bit_array_isset(bitarray_t a, uns i)
+{
+  return a[i/32] & (1 << (i%32));
+}
+
+static inline uns
+bit_array_get(bitarray_t a, uns i)
+{
+  return !! bit_array_isset(a, i);
+}
+
+static inline uns
+bit_array_test_and_set(bitarray_t a, uns i)
+{
+  uns t = bit_array_isset(a, i);
+  bit_array_set(a, i);
+  return t;
+}
+
+static inline uns
+bit_array_test_and_clear(bitarray_t a, uns i)
+{
+  uns t = bit_array_isset(a, i);
+  bit_array_clear(a, i);
+  return t;
+}
+
+/* Iterate over all set bits, possibly destructively */
+#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size)                                        \
+  for (uns var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++)           \
+    for (uns var##_lo=0; ary[var##_hi]; var##_lo++)                            \
+      if (ary[var##_hi] & (1 << var##_lo))                                     \
+        {                                                                      \
+         uns var = 32*var##_hi + var##_lo;                                     \
+         ary[var##_hi] &= ~(1 << var##_lo);                                    \
+         do
+
+#define BIT_ARRAY_FISH_BITS_END                                                        \
+         while (0);                                                            \
+       }
+
+#endif
diff --git a/lib/bitops.h b/lib/bitops.h

new file mode 100644 (file)

index 0000000..c1e6371
--- /dev/null
+++ b/lib/bitops.h
@@ -0,0 +1,40 @@
+/*
+ *     UCW Library -- Bit Operations
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITOPS_H
+#define _UCW_BITOPS_H
+
+/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */
+
+int bit_fls(u32 x);            /* bit_fls(0)=-1 */
+
+/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */
+
+extern const byte ffs_table[256];
+
+#ifdef __pentium4              /* On other ia32 machines, the C version is faster */
+
+static inline uns bit_ffs(uns w)
+{
+  asm("bsfl %1,%0" :"=r" (w) :"rm" (w));
+  return w;
+}
+
+#else
+
+static inline uns bit_ffs(uns w)
+{
+  uns b = (w & 0xffff) ? 0 : 16;
+  b += ((w >> b) & 0xff) ? 0 : 8;
+  return b + ffs_table[(w >> b) & 0xff];
+}
+
+#endif
+
+#endif
diff --git a/lib/bitops.t b/lib/bitops.t

new file mode 100644 (file)

index 0000000..97b2b35
--- /dev/null
+++ b/lib/bitops.t
@@ -0,0 +1,53 @@
+# Tests for bitops modules
+
+Run:   ../obj/lib/bit-ffs-t
+In:    1
+       2
+       3
+       4
+       5
+       6
+       12345678
+       23030300
+       23030000
+       23000000
+       40000000
+       80000000
+Out:   0
+       1
+       0
+       2
+       0
+       1
+       3
+       8
+       16
+       24
+       30
+       31
+
+Run:   ../obj/lib/bit-fls-t
+In:    1
+       2
+       3
+       4
+       5
+       6
+       12345678
+       23030303
+       03030303
+       00030303
+       00000303
+       0fedcba9
+Out:   0
+       1
+       1
+       2
+       2
+       2
+       28
+       29
+       25
+       17
+       9
+       27
diff --git a/lib/bitsig.c b/lib/bitsig.c

new file mode 100644 (file)

index 0000000..8ffe8db
--- /dev/null
+++ b/lib/bitsig.c
@@ -0,0 +1,162 @@
+/*
+ *     UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files
+ *     (An access method for documents and its analytical performance evaluation),
+ *     ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984.
+ *
+ *     This data structure provides a very compact representation
+ *     of a set of strings with insertion and membership search,
+ *     but with a certain low probability it cheats by incidentally
+ *     reporting a non-member as a member. Generally the larger you
+ *     create the structure, the lower this probability is.
+ *
+ *     How does it work: the structure is just an array of M bits
+ *     and each possible element is hashed to a set of (at most) L
+ *     bit positions. For each element of the represented set, we
+ *     set its L bits to ones and we report as present all elements
+ *     whose all L bits ar set.
+ *
+ *     Analysis: Let's assume N items have already been stored and let A
+ *     denote L/M (density of the hash function). The probability that
+ *     a fixed bit of the array is set by any of the N items is
+ *     1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA.
+ *     This is minimized by setting A=(ln 2)/N (try taking derivative).
+ *     Given a non-present item, the probability that all of the bits
+ *     corresponding to this item are set by the other items (that is,
+ *     the structure gives a false answer) is (1-e^-NA)^L = 2^-L.
+ *     Hence, if we want to give false answers with probability less
+ *     than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L.
+ *
+ *     Example: For a set of 10^7 items with P[error] < 10^-6, we set
+ *     L := 20 and M :=  290*10^6 bits = cca 34.5 MB (29 bits per item).
+ *
+ *     We leave L and an upper bound for N as parameters set during
+ *     creation of the structure. Currently, the structure is limited
+ *     to 4 Gb = 512 MB.
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/bitsig.h"
+#include "lib/md5.h"
+
+#include <string.h>
+
+struct bitsig {
+  uns l, m, n, maxn, max_m_mult;
+  u32 hash[4];
+  uns hindex;
+  byte array[0];
+};
+
+struct bitsig *
+bitsig_init(uns perrlog, uns maxn)
+{
+  struct bitsig *b;
+  u64 m;
+  uns mbytes;
+
+  m = ((u64) maxn * perrlog * 145 + 99) / 100;
+  if (m >= (u64) 1 << 32)
+    die("bitsig_init: bitsig array too large (maximum is 4 Gb)");
+  mbytes = (m + 7) >> 3U;
+  b = xmalloc(sizeof(struct bitsig) + mbytes);
+  b->l = perrlog;
+  b->m = m;
+  b->n = 0;
+  b->maxn = maxn;
+  b->max_m_mult = (0xffffffff / m) * m;
+  bzero(b->array, mbytes);
+  msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
+  return b;
+}
+
+void
+bitsig_free(struct bitsig *b)
+{
+  xfree(b);
+}
+
+static void
+bitsig_hash_init(struct bitsig *b, byte *item)
+{
+  struct MD5Context c;
+
+  MD5Init(&c);
+  MD5Update(&c, item, strlen(item));
+  MD5Final((byte *) b->hash, &c);
+  b->hindex = 0;
+}
+
+static inline uns
+bitsig_hash_bit(struct bitsig *b)
+{
+  u32 h;
+  do
+    {
+      h = b->hash[b->hindex];
+      b->hash[b->hindex] *= 3006477127U;
+      b->hindex = (b->hindex+1) % 4;
+    }
+  while (h >= b->max_m_mult);
+  return h % b->m;
+}
+
+int
+bitsig_member(struct bitsig *b, byte *item)
+{
+  uns i, bit;
+
+  bitsig_hash_init(b, item);
+  for (i=0; i<b->l; i++)
+    {
+      bit = bitsig_hash_bit(b);
+      if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+       return 0;
+    }
+  return 1;
+}
+
+int
+bitsig_insert(struct bitsig *b, byte *item)
+{
+  uns i, bit, was;
+
+  bitsig_hash_init(b, item);
+  was = 1;
+  for (i=0; i<b->l; i++)
+    {
+      bit = bitsig_hash_bit(b);
+      if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+       {
+         was = 0;
+         b->array[bit >> 3] |= (1 << (bit & 7));
+       }
+    }
+  if (!was && b->n++ == b->maxn+1)
+    msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
+  return was;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv)
+{
+  struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2]));
+  byte buf[1024];
+
+  while (fgets(buf, 1024, stdin))
+    printf("%d\n", bitsig_insert(b, buf));
+
+  return 0;
+}
+
+#endif
diff --git a/lib/bitsig.h b/lib/bitsig.h

new file mode 100644 (file)

index 0000000..60a5b14
--- /dev/null
+++ b/lib/bitsig.h
@@ -0,0 +1,15 @@
+/*
+ *     UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+struct bitsig;
+
+struct bitsig *bitsig_init(uns perrlog, uns maxn);
+void bitsig_free(struct bitsig *b);
+int bitsig_member(struct bitsig *b, byte *item);
+int bitsig_insert(struct bitsig *b, byte *item);
diff --git a/lib/carefulio.c b/lib/carefulio.c

new file mode 100644 (file)

index 0000000..b8d865d
--- /dev/null
+++ b/lib/carefulio.c
@@ -0,0 +1,51 @@
+/*
+ *     UCW Library -- Careful Read/Write
+ *
+ *     (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <unistd.h>
+
+/*
+ *  Reads and writes on sockets and pipes can return partial results,
+ *  so we implement an iterated read/write call.
+ */
+
+int
+careful_read(int fd, void *buf, int len)
+{
+  byte *pos = buf;
+  while (len)
+    {
+      int l = read(fd, pos, len);
+      if (l < 0)
+       return -1;
+      if (!l)
+       return 0;
+      pos += l;
+      len -= l;
+    }
+  return 1;
+}
+
+int
+careful_write(int fd, const void *buf, int len)
+{
+  const byte *pos = buf;
+  while (len)
+    {
+      int l = write(fd, pos, len);
+      if (l < 0)
+       return -1;
+      if (!l)
+       return 0;
+      pos += l;
+      len -= l;
+    }
+  return 1;
+}
diff --git a/lib/charmap.h b/lib/charmap.h

new file mode 100644 (file)

index 0000000..3e348fc
--- /dev/null
+++ b/lib/charmap.h
@@ -0,0 +1,268 @@
+/*
+ *     UCW Library -- Character Code Map (UTF-8 Version)
+ *
+ *     (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/* Syntax: CHAR(code, uppercase, lowercase, category) */
+
+CHAR(0x00,0x00,0x00,_C_CTRL)                           //      <control>
+CHAR(0x01,0x01,0x01,_C_CTRL)                           //      <control>
+CHAR(0x02,0x02,0x02,_C_CTRL)                           //      <control>
+CHAR(0x03,0x03,0x03,_C_CTRL)                           //      <control>
+CHAR(0x04,0x04,0x04,_C_CTRL)                           //      <control>
+CHAR(0x05,0x05,0x05,_C_CTRL)                           //      <control>
+CHAR(0x06,0x06,0x06,_C_CTRL)                           //      <control>
+CHAR(0x07,0x07,0x07,_C_CTRL)                           //      <control>
+CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK)                        //      <control>
+CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT)     //      <control>
+CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK)                        //      <control>
+CHAR(0x0B,0x0B,0x0B,_C_CTRL)                           //      <control>
+CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK)                        //      <control>
+CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK)                        //      <control>
+CHAR(0x0E,0x0E,0x0E,_C_CTRL)                           //      <control>
+CHAR(0x0F,0x0F,0x0F,_C_CTRL)                           //      <control>
+CHAR(0x10,0x10,0x10,_C_CTRL)                           //      <control>
+CHAR(0x11,0x11,0x11,_C_CTRL)                           //      <control>
+CHAR(0x12,0x12,0x12,_C_CTRL)                           //      <control>
+CHAR(0x13,0x13,0x13,_C_CTRL)                           //      <control>
+CHAR(0x14,0x14,0x14,_C_CTRL)                           //      <control>
+CHAR(0x15,0x15,0x15,_C_CTRL)                           //      <control>
+CHAR(0x16,0x16,0x16,_C_CTRL)                           //      <control>
+CHAR(0x17,0x17,0x17,_C_CTRL)                           //      <control>
+CHAR(0x18,0x18,0x18,_C_CTRL)                           //      <control>
+CHAR(0x19,0x19,0x19,_C_CTRL)                           //      <control>
+CHAR(0x1A,0x1A,0x1A,_C_CTRL)                           //      <control>
+CHAR(0x1B,0x1B,0x1B,_C_CTRL)                           //      <control>
+CHAR(0x1C,0x1C,0x1C,_C_CTRL)                           //      <control>
+CHAR(0x1D,0x1D,0x1D,_C_CTRL)                           //      <control>
+CHAR(0x1E,0x1E,0x1E,_C_CTRL)                           //      <control>
+CHAR(0x1F,0x1F,0x1F,_C_CTRL)                           //      <control>
+CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT)               //      SPACE
+CHAR(0x21,0x21,0x21,_C_PRINT)                          //      EXCLAMATION MARK
+CHAR(0x22,0x22,0x22,_C_PRINT)                          //      QUOTATION MARK
+CHAR(0x23,0x23,0x23,_C_PRINT)                          //      NUMBER SIGN
+CHAR(0x24,0x24,0x24,_C_PRINT)                          //      DOLLAR SIGN
+CHAR(0x25,0x25,0x25,_C_PRINT)                          //      PERCENT SIGN
+CHAR(0x26,0x26,0x26,_C_PRINT)                          //      AMPERSAND
+CHAR(0x27,0x27,0x27,_C_PRINT)                          //      APOSTROPHE
+CHAR(0x28,0x28,0x28,_C_PRINT)                          //      LEFT PARENTHESIS
+CHAR(0x29,0x29,0x29,_C_PRINT)                          //      RIGHT PARENTHESIS
+CHAR(0x2A,0x2A,0x2A,_C_PRINT)                          //      ASTERISK
+CHAR(0x2B,0x2B,0x2B,_C_PRINT)                          //      PLUS SIGN
+CHAR(0x2C,0x2C,0x2C,_C_PRINT)                          //      COMMA
+CHAR(0x2D,0x2D,0x2D,_C_PRINT)                          //      HYPHEN-MINUS
+CHAR(0x2E,0x2E,0x2E,_C_PRINT)                          //      FULL STOP
+CHAR(0x2F,0x2F,0x2F,_C_PRINT)                          //      SOLIDUS
+CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT ZERO
+CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT ONE
+CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT TWO
+CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT THREE
+CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT FOUR
+CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT FIVE
+CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT SIX
+CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT SEVEN
+CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT EIGHT
+CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT)   //      DIGIT NINE
+CHAR(0x3A,0x3A,0x3A,_C_PRINT)                          //      COLON
+CHAR(0x3B,0x3B,0x3B,_C_PRINT)                          //      SEMICOLON
+CHAR(0x3C,0x3C,0x3C,_C_PRINT)                          //      LESS-THAN SIGN
+CHAR(0x3D,0x3D,0x3D,_C_PRINT)                          //      EQUALS SIGN
+CHAR(0x3E,0x3E,0x3E,_C_PRINT)                          //      GREATER-THAN SIGN
+CHAR(0x3F,0x3F,0x3F,_C_PRINT)                          //      QUESTION MARK
+CHAR(0x40,0x40,0x40,_C_PRINT)                          //      COMMERCIAL AT
+CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER A
+CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER B
+CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER C
+CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER D
+CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER E
+CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT)   //      LATIN CAPITAL LETTER F
+CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER G
+CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER H
+CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER I
+CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER J
+CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER K
+CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER L
+CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER M
+CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER N
+CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER O
+CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER P
+CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER Q
+CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER R
+CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER S
+CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER T
+CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER U
+CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER V
+CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER W
+CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER X
+CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER Y
+CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT)               //      LATIN CAPITAL LETTER Z
+CHAR(0x5B,0x5B,0x5B,_C_PRINT)                          //      LEFT SQUARE BRACKET
+CHAR(0x5C,0x5C,0x5C,_C_PRINT)                          //      REVERSE SOLIDUS
+CHAR(0x5D,0x5D,0x5D,_C_PRINT)                          //      RIGHT SQUARE BRACKET
+CHAR(0x5E,0x5E,0x5E,_C_PRINT)                          //      CIRCUMFLEX ACCENT
+CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT)               //      LOW LINE
+CHAR(0x60,0x60,0x60,_C_PRINT)                          //      GRAVE ACCENT
+CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER A
+CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER B
+CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER C
+CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER D
+CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER E
+CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT)   //      LATIN SMALL LETTER F
+CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER G
+CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER H
+CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER I
+CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER J
+CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER K
+CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER L
+CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER M
+CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER N
+CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER O
+CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER P
+CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER Q
+CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER R
+CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER S
+CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER T
+CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER U
+CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER V
+CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER W
+CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER X
+CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER Y
+CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT)               //      LATIN SMALL LETTER Z
+CHAR(0x7B,0x7B,0x7B,_C_PRINT)                          //      LEFT CURLY BRACKET
+CHAR(0x7C,0x7C,0x7C,_C_PRINT)                          //      VERTICAL LINE
+CHAR(0x7D,0x7D,0x7D,_C_PRINT)                          //      RIGHT CURLY BRACKET
+CHAR(0x7E,0x7E,0x7E,_C_PRINT)                          //      TILDE
+CHAR(0x7F,0x7F,0x7F,_C_CTRL)                           //      <control>
+CHAR(0x80,0x80,0x80,_C_PRINT)                          //      UTF-8
+CHAR(0x81,0x81,0x81,_C_PRINT)                          //      UTF-8
+CHAR(0x82,0x82,0x82,_C_PRINT)                          //      UTF-8
+CHAR(0x83,0x83,0x83,_C_PRINT)                          //      UTF-8
+CHAR(0x84,0x84,0x84,_C_PRINT)                          //      UTF-8
+CHAR(0x85,0x85,0x85,_C_PRINT)                          //      UTF-8
+CHAR(0x86,0x86,0x86,_C_PRINT)                          //      UTF-8
+CHAR(0x87,0x87,0x87,_C_PRINT)                          //      UTF-8
+CHAR(0x88,0x88,0x88,_C_PRINT)                          //      UTF-8
+CHAR(0x89,0x89,0x89,_C_PRINT)                          //      UTF-8
+CHAR(0x8A,0x8A,0x8A,_C_PRINT)                          //      UTF-8
+CHAR(0x8B,0x8B,0x8B,_C_PRINT)                          //      UTF-8
+CHAR(0x8C,0x8C,0x8C,_C_PRINT)                          //      UTF-8
+CHAR(0x8D,0x8D,0x8D,_C_PRINT)                          //      UTF-8
+CHAR(0x8E,0x8E,0x8E,_C_PRINT)                          //      UTF-8
+CHAR(0x8F,0x8F,0x8F,_C_PRINT)                          //      UTF-8
+CHAR(0x90,0x90,0x90,_C_PRINT)                          //      UTF-8
+CHAR(0x91,0x91,0x91,_C_PRINT)                          //      UTF-8
+CHAR(0x92,0x92,0x92,_C_PRINT)                          //      UTF-8
+CHAR(0x93,0x93,0x93,_C_PRINT)                          //      UTF-8
+CHAR(0x94,0x94,0x94,_C_PRINT)                          //      UTF-8
+CHAR(0x95,0x95,0x95,_C_PRINT)                          //      UTF-8
+CHAR(0x96,0x96,0x96,_C_PRINT)                          //      UTF-8
+CHAR(0x97,0x97,0x97,_C_PRINT)                          //      UTF-8
+CHAR(0x98,0x98,0x98,_C_PRINT)                          //      UTF-8
+CHAR(0x99,0x99,0x99,_C_PRINT)                          //      UTF-8
+CHAR(0x9A,0x9A,0x9A,_C_PRINT)                          //      UTF-8
+CHAR(0x9B,0x9B,0x9B,_C_PRINT)                          //      UTF-8
+CHAR(0x9C,0x9C,0x9C,_C_PRINT)                          //      UTF-8
+CHAR(0x9D,0x9D,0x9D,_C_PRINT)                          //      UTF-8
+CHAR(0x9E,0x9E,0x9E,_C_PRINT)                          //      UTF-8
+CHAR(0x9F,0x9F,0x9F,_C_PRINT)                          //      UTF-8
+CHAR(0xA0,0xA0,0xA0,_C_PRINT)                          //      UTF-8
+CHAR(0xA1,0xA1,0xA1,_C_PRINT)                          //      UTF-8
+CHAR(0xA2,0xA2,0xA2,_C_PRINT)                          //      UTF-8
+CHAR(0xA3,0xA3,0xA3,_C_PRINT)                          //      UTF-8
+CHAR(0xA4,0xA4,0xA4,_C_PRINT)                          //      UTF-8
+CHAR(0xA5,0xA5,0xA5,_C_PRINT)                          //      UTF-8
+CHAR(0xA6,0xA6,0xA6,_C_PRINT)                          //      UTF-8
+CHAR(0xA7,0xA7,0xA7,_C_PRINT)                          //      UTF-8
+CHAR(0xA8,0xA8,0xA8,_C_PRINT)                          //      UTF-8
+CHAR(0xA9,0xA9,0xA9,_C_PRINT)                          //      UTF-8
+CHAR(0xAA,0xAA,0xAA,_C_PRINT)                          //      UTF-8
+CHAR(0xAB,0xAB,0xAB,_C_PRINT)                          //      UTF-8
+CHAR(0xAC,0xAC,0xAC,_C_PRINT)                          //      UTF-8
+CHAR(0xAD,0xAD,0xAD,_C_PRINT)                          //      UTF-8
+CHAR(0xAE,0xAE,0xAE,_C_PRINT)                          //      UTF-8
+CHAR(0xAF,0xAF,0xAF,_C_PRINT)                          //      UTF-8
+CHAR(0xB0,0xB0,0xB0,_C_PRINT)                          //      UTF-8
+CHAR(0xB1,0xB1,0xB1,_C_PRINT)                          //      UTF-8
+CHAR(0xB2,0xB2,0xB2,_C_PRINT)                          //      UTF-8
+CHAR(0xB3,0xB3,0xB3,_C_PRINT)                          //      UTF-8
+CHAR(0xB4,0xB4,0xB4,_C_PRINT)                          //      UTF-8
+CHAR(0xB5,0xB5,0xB5,_C_PRINT)                          //      UTF-8
+CHAR(0xB6,0xB6,0xB6,_C_PRINT)                          //      UTF-8
+CHAR(0xB7,0xB7,0xB7,_C_PRINT)                          //      UTF-8
+CHAR(0xB8,0xB8,0xB8,_C_PRINT)                          //      UTF-8
+CHAR(0xB9,0xB9,0xB9,_C_PRINT)                          //      UTF-8
+CHAR(0xBA,0xBA,0xBA,_C_PRINT)                          //      UTF-8
+CHAR(0xBB,0xBB,0xBB,_C_PRINT)                          //      UTF-8
+CHAR(0xBC,0xBC,0xBC,_C_PRINT)                          //      UTF-8
+CHAR(0xBD,0xBD,0xBD,_C_PRINT)                          //      UTF-8
+CHAR(0xBE,0xBE,0xBE,_C_PRINT)                          //      UTF-8
+CHAR(0xBF,0xBF,0xBF,_C_PRINT)                          //      UTF-8
+CHAR(0xC0,0xC0,0xC0,_C_PRINT)                          //      UTF-8
+CHAR(0xC1,0xC1,0xC1,_C_PRINT)                          //      UTF-8
+CHAR(0xC2,0xC2,0xC2,_C_PRINT)                          //      UTF-8
+CHAR(0xC3,0xC3,0xC3,_C_PRINT)                          //      UTF-8
+CHAR(0xC4,0xC4,0xC4,_C_PRINT)                          //      UTF-8
+CHAR(0xC5,0xC5,0xC5,_C_PRINT)                          //      UTF-8
+CHAR(0xC6,0xC6,0xC6,_C_PRINT)                          //      UTF-8
+CHAR(0xC7,0xC7,0xC7,_C_PRINT)                          //      UTF-8
+CHAR(0xC8,0xC8,0xC8,_C_PRINT)                          //      UTF-8
+CHAR(0xC9,0xC9,0xC9,_C_PRINT)                          //      UTF-8
+CHAR(0xCA,0xCA,0xCA,_C_PRINT)                          //      UTF-8
+CHAR(0xCB,0xCB,0xCB,_C_PRINT)                          //      UTF-8
+CHAR(0xCC,0xCC,0xCC,_C_PRINT)                          //      UTF-8
+CHAR(0xCD,0xCD,0xCD,_C_PRINT)                          //      UTF-8
+CHAR(0xCE,0xCE,0xCE,_C_PRINT)                          //      UTF-8
+CHAR(0xCF,0xCF,0xCF,_C_PRINT)                          //      UTF-8
+CHAR(0xD0,0xD0,0xD0,_C_PRINT)                          //      UTF-8
+CHAR(0xD1,0xD1,0xD1,_C_PRINT)                          //      UTF-8
+CHAR(0xD2,0xD2,0xD2,_C_PRINT)                          //      UTF-8
+CHAR(0xD3,0xD3,0xD3,_C_PRINT)                          //      UTF-8
+CHAR(0xD4,0xD4,0xD4,_C_PRINT)                          //      UTF-8
+CHAR(0xD5,0xD5,0xD5,_C_PRINT)                          //      UTF-8
+CHAR(0xD6,0xD6,0xD6,_C_PRINT)                          //      UTF-8
+CHAR(0xD7,0xD7,0xD7,_C_PRINT)                          //      UTF-8
+CHAR(0xD8,0xD8,0xD8,_C_PRINT)                          //      UTF-8
+CHAR(0xD9,0xD9,0xD9,_C_PRINT)                          //      UTF-8
+CHAR(0xDA,0xDA,0xDA,_C_PRINT)                          //      UTF-8
+CHAR(0xDB,0xDB,0xDB,_C_PRINT)                          //      UTF-8
+CHAR(0xDC,0xDC,0xDC,_C_PRINT)                          //      UTF-8
+CHAR(0xDD,0xDD,0xDD,_C_PRINT)                          //      UTF-8
+CHAR(0xDE,0xDE,0xDE,_C_PRINT)                          //      UTF-8
+CHAR(0xDF,0xDF,0xDF,_C_PRINT)                          //      UTF-8
+CHAR(0xE0,0xE0,0xE0,_C_PRINT)                          //      UTF-8
+CHAR(0xE1,0xE1,0xE1,_C_PRINT)                          //      UTF-8
+CHAR(0xE2,0xE2,0xE2,_C_PRINT)                          //      UTF-8
+CHAR(0xE3,0xE3,0xE3,_C_PRINT)                          //      UTF-8
+CHAR(0xE4,0xE4,0xE4,_C_PRINT)                          //      UTF-8
+CHAR(0xE5,0xE5,0xE5,_C_PRINT)                          //      UTF-8
+CHAR(0xE6,0xE6,0xE6,_C_PRINT)                          //      UTF-8
+CHAR(0xE7,0xE7,0xE7,_C_PRINT)                          //      UTF-8
+CHAR(0xE8,0xE8,0xE8,_C_PRINT)                          //      UTF-8
+CHAR(0xE9,0xE9,0xE9,_C_PRINT)                          //      UTF-8
+CHAR(0xEA,0xEA,0xEA,_C_PRINT)                          //      UTF-8
+CHAR(0xEB,0xEB,0xEB,_C_PRINT)                          //      UTF-8
+CHAR(0xEC,0xEC,0xEC,_C_PRINT)                          //      UTF-8
+CHAR(0xED,0xED,0xED,_C_PRINT)                          //      UTF-8
+CHAR(0xEE,0xEE,0xEE,_C_PRINT)                          //      UTF-8
+CHAR(0xEF,0xEF,0xEF,_C_PRINT)                          //      UTF-8
+CHAR(0xF0,0xF0,0xF0,_C_PRINT)                          //      UTF-8
+CHAR(0xF1,0xF1,0xF1,_C_PRINT)                          //      UTF-8
+CHAR(0xF2,0xF2,0xF2,_C_PRINT)                          //      UTF-8
+CHAR(0xF3,0xF3,0xF3,_C_PRINT)                          //      UTF-8
+CHAR(0xF4,0xF4,0xF4,_C_PRINT)                          //      UTF-8
+CHAR(0xF5,0xF5,0xF5,_C_PRINT)                          //      UTF-8
+CHAR(0xF6,0xF6,0xF6,_C_PRINT)                          //      UTF-8
+CHAR(0xF7,0xF7,0xF7,_C_PRINT)                          //      UTF-8
+CHAR(0xF8,0xF8,0xF8,_C_PRINT)                          //      UTF-8
+CHAR(0xF9,0xF9,0xF9,_C_PRINT)                          //      UTF-8
+CHAR(0xFA,0xFA,0xFA,_C_PRINT)                          //      UTF-8
+CHAR(0xFB,0xFB,0xFB,_C_PRINT)                          //      UTF-8
+CHAR(0xFC,0xFC,0xFC,_C_PRINT)                          //      UTF-8
+CHAR(0xFD,0xFD,0xFD,_C_PRINT)                          //      UTF-8
+CHAR(0xFE,0xFE,0xFE,_C_PRINT)                          //      UTF-8
+CHAR(0xFF,0xFF,0xFF,_C_PRINT)                          //      UTF-8
diff --git a/lib/chartype.h b/lib/chartype.h

new file mode 100644 (file)

index 0000000..09dc1ec
--- /dev/null
+++ b/lib/chartype.h
@@ -0,0 +1,49 @@
+/*
+ *     UCW Library -- Character Types
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CHARTYPE_H
+#define _UCW_CHARTYPE_H
+
+#define _C_UPPER 1                     /* Upper-case letters */
+#define _C_LOWER 2                     /* Lower-case letters */
+#define _C_PRINT 4                     /* Printable */
+#define _C_DIGIT 8                     /* Digits */
+#define _C_CTRL 16                     /* Control characters */
+#define _C_XDIGIT 32                   /* Hexadecimal digits */
+#define _C_BLANK 64                    /* White spaces (spaces, tabs, newlines) */
+#define _C_INNER 128                   /* `inner punctuation' -- underscore etc. */
+
+#define _C_ALPHA (_C_UPPER | _C_LOWER)
+#define _C_ALNUM (_C_ALPHA | _C_DIGIT)
+#define _C_WORD (_C_ALNUM | _C_INNER)
+#define _C_WSTART (_C_ALPHA | _C_INNER)
+
+extern const unsigned char _c_cat[256], _c_upper[256], _c_lower[256];
+
+#define Category(x) (_c_cat[(unsigned char)(x)])
+#define Ccat(x,y) (Category(x) & y)
+
+#define Cupper(x) Ccat(x, _C_UPPER)
+#define Clower(x) Ccat(x, _C_LOWER)
+#define Calpha(x) Ccat(x, _C_ALPHA)
+#define Calnum(x) Ccat(x, _C_ALNUM)
+#define Cprint(x) Ccat(x, _C_PRINT)
+#define Cdigit(x) Ccat(x, _C_DIGIT)
+#define Cxdigit(x) Ccat(x, _C_XDIGIT)
+#define Cword(x) Ccat(x, _C_WORD)
+#define Cblank(x) Ccat(x, _C_BLANK)
+#define Cctrl(x) Ccat(x, _C_CTRL)
+#define Cspace(x) Cblank(x)
+
+#define Cupcase(x) _c_upper[(unsigned char)(x)]
+#define Clocase(x) _c_lower[(unsigned char)(x)]
+
+#define Cxvalue(x) (((x)<'A')?((x)-'0'):(((x)&0xdf)-'A'+10))
+
+#endif
diff --git a/lib/clists.h b/lib/clists.h

new file mode 100644 (file)

index 0000000..921b7dc
--- /dev/null
+++ b/lib/clists.h
@@ -0,0 +1,132 @@
+/*
+ *     UCW Library -- Circular Linked Lists
+ *
+ *     (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CLISTS_H
+#define _UCW_CLISTS_H
+
+typedef struct cnode {
+  struct cnode *next, *prev;
+} cnode;
+
+typedef struct clist {
+  struct cnode head;
+} clist;
+
+static inline void *clist_head(clist *l)
+{
+  return (l->head.next != &l->head) ? l->head.next : NULL;
+}
+
+static inline void *clist_tail(clist *l)
+{
+  return (l->head.prev != &l->head) ? l->head.prev : NULL;
+}
+
+static inline void *clist_next(clist *l, cnode *n)
+{
+  return (n->next != &l->head) ? (void *) n->next : NULL;
+}
+
+static inline void *clist_prev(clist *l, cnode *n)
+{
+  return (n->prev != &l->head) ? (void *) n->prev : NULL;
+}
+
+static inline int clist_empty(clist *l)
+{
+  return (l->head.next == &l->head);
+}
+
+#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+
+#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev)
+
+static inline void clist_insert_after(cnode *what, cnode *after)
+{
+  cnode *before = after->next;
+  what->next = before;
+  what->prev = after;
+  before->prev = what;
+  after->next = what;
+}
+
+static inline void clist_insert_before(cnode *what, cnode *before)
+{
+  cnode *after = before->prev;
+  what->next = before;
+  what->prev = after;
+  before->prev = what;
+  after->next = what;
+}
+
+static inline void clist_add_tail(clist *l, cnode *n)
+{
+  clist_insert_before(n, &l->head);
+}
+
+static inline void clist_add_head(clist *l, cnode *n)
+{
+  clist_insert_after(n, &l->head);
+}
+
+static inline void clist_remove(cnode *n)
+{
+  cnode *before = n->prev;
+  cnode *after = n->next;
+  before->next = after;
+  after->prev = before;
+}
+
+static inline void *clist_remove_head(clist *l)
+{
+  cnode *n = clist_head(l);
+  if (n)
+    clist_remove(n);
+  return n;
+}
+
+static inline void *clist_remove_tail(clist *l)
+{
+  cnode *n = clist_tail(l);
+  if (n)
+    clist_remove(n);
+  return n;
+}
+
+static inline void clist_init(clist *l)
+{
+  cnode *head = &l->head;
+  head->next = head->prev = head;
+}
+
+static inline void clist_insert_list_after(clist *what, cnode *after)
+{
+  if (!clist_empty(what))
+    {
+      cnode *w = &what->head;
+      w->prev->next = after->next;
+      after->next->prev = w->prev;
+      w->next->prev = after;
+      after->next = w->next;
+      clist_init(what);
+    }
+}
+
+static inline uns clist_size(clist *l)
+{
+  uns i = 0;
+  CLIST_FOR_EACH(cnode *, n, *l)
+    i++;
+  return i;
+}
+
+#endif
diff --git a/lib/conf-alloc.c b/lib/conf-alloc.c

new file mode 100644 (file)

index 0000000..9f02fd5
--- /dev/null
+++ b/lib/conf-alloc.c
@@ -0,0 +1,43 @@
+/*
+ *     UCW Library -- Configuration files: memory allocation
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/mempool.h"
+
+struct mempool *cf_pool;       // current pool for loading new configuration
+
+void *
+cf_malloc(uns size)
+{
+  return mp_alloc(cf_pool, size);
+}
+
+void *
+cf_malloc_zero(uns size)
+{
+  return mp_alloc_zero(cf_pool, size);
+}
+
+char *
+cf_strdup(const char *s)
+{
+  return mp_strdup(cf_pool, s);
+}
+
+char *
+cf_printf(const char *fmt, ...)
+{
+  va_list args;
+  va_start(args, fmt);
+  char *res = mp_vprintf(cf_pool, fmt, args);
+  va_end(args);
+  return res;
+}
diff --git a/lib/conf-dump.c b/lib/conf-dump.c

new file mode 100644 (file)

index 0000000..0d40924
--- /dev/null
+++ b/lib/conf-dump.c
@@ -0,0 +1,123 @@
+/*
+ *     UCW Library -- Configuration files: dumping
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/fastbuf.h"
+
+static void
+spaces(struct fastbuf *fb, uns nr)
+{
+  for (uns i=0; i<nr; i++)
+    bputs(fb, "  ");
+}
+
+static void
+dump_basic(struct fastbuf *fb, void *ptr, enum cf_type type, union cf_union *u)
+{
+  switch (type) {
+    case CT_INT:       bprintf(fb, "%d ", *(uns*)ptr); break;
+    case CT_U64:       bprintf(fb, "%llu ", (long long) *(u64*)ptr); break;
+    case CT_DOUBLE:    bprintf(fb, "%lg ", *(double*)ptr); break;
+    case CT_IP:                bprintf(fb, "%08x ", *(uns*)ptr); break;
+    case CT_STRING:
+      if (*(char**)ptr)
+       bprintf(fb, "'%s' ", *(char**)ptr);
+      else
+       bprintf(fb, "NULL ");
+      break;
+    case CT_LOOKUP:    bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : "???"); break;
+    case CT_USER:
+      if (u->utype->dumper)
+       u->utype->dumper(fb, ptr);
+      else
+       bprintf(fb, "??? ");
+      break;
+  }
+}
+
+static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr);
+
+static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
+
+static void
+dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr)
+{
+  ptr += (uintptr_t) item->ptr;
+  enum cf_type type = item->type;
+  uns size = cf_type_size(item->type, item->u.utype);
+  int i;
+  spaces(fb, level);
+  bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]);
+  if (item->number == CF_ANY_NUM)
+    bputs(fb, "any ");
+  else
+    bprintf(fb, "%d ", item->number);
+  if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) {
+    bprintf(fb, "T%s ", cf_type_names[type]);
+    if (item->type == CT_USER)
+      bprintf(fb, "U%s S%d ", item->u.utype->name, size);
+  }
+  if (item->cls == CC_STATIC) {
+    for (i=0; i<item->number; i++)
+      dump_basic(fb, ptr + i * size, type, &item->u);
+  } else if (item->cls == CC_DYNAMIC) {
+    ptr = * (void**) ptr;
+    if (ptr) {
+      int real_nr = DARY_LEN(ptr);
+      bprintf(fb, "N%d ", real_nr);
+      for (i=0; i<real_nr; i++)
+       dump_basic(fb, ptr + i * size, type, &item->u);
+    } else
+      bprintf(fb, "NULL ");
+  } else if (item->cls == CC_BITMAP) {
+    u32 mask = * (u32*) ptr;
+    for (i=0; i<32; i++) {
+      if (item->type == CT_LOOKUP && !item->u.lookup[i])
+       break;
+      if (mask & (1<<i)) {
+       if (item->type == CT_INT)
+         bprintf(fb, "%d ", i);
+       else if (item->type == CT_LOOKUP)
+         bprintf(fb, "%s ", item->u.lookup[i]);
+      }
+    }
+  }
+  bputc(fb, '\n');
+  if (item->cls == CC_SECTION)
+    dump_section(fb, item->u.sec, level+1, ptr);
+  else if (item->cls == CC_LIST) {
+    uns idx = 0;
+    CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) {
+      spaces(fb, level+1);
+      bprintf(fb, "item %d\n", ++idx);
+      dump_section(fb, item->u.sec, level+2, n);
+    }
+  }
+}
+
+static void
+dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr)
+{
+  spaces(fb, level);
+  bprintf(fb, "S%d F%x:\n", sec->size, sec->flags);
+  for (struct cf_item *item=sec->cfg; item->cls; item++)
+    dump_item(fb, item, level, ptr);
+}
+
+void
+cf_dump_sections(struct fastbuf *fb)
+{
+  dump_section(fb, &cf_sections, 0, NULL);
+}
+
diff --git a/lib/conf-input.c b/lib/conf-input.c

new file mode 100644 (file)

index 0000000..c5d2527
--- /dev/null
+++ b/lib/conf-input.c
@@ -0,0 +1,455 @@
+/*
+ *     UCW Library -- Configuration files: parsing input streams
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+#include "lib/chartype.h"
+#include "lib/stkstring.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+/* Text file parser */
+
+static const char *name_parse_fb;
+static struct fastbuf *parse_fb;
+static uns line_num;
+
+#define MAX_LINE       4096
+static char line_buf[MAX_LINE];
+static char *line = line_buf;
+
+#include "lib/bbuf.h"
+static bb_t copy_buf;
+static uns copied;
+
+#define GBUF_TYPE      uns
+#define GBUF_PREFIX(x) split_##x
+#include "lib/gbuf.h"
+static split_t word_buf;
+static uns words;
+static uns ends_by_brace;              // the line is ended by "{"
+
+static int
+get_line(char **msg)
+{
+  int err = bgets_nodie(parse_fb, line_buf, MAX_LINE);
+  line_num++;
+  if (err <= 0) {
+    *msg = err < 0 ? "Line too long" : NULL;
+    return 0;
+  }
+  line = line_buf;
+  while (Cblank(*line))
+    line++;
+  return 1;
+}
+
+static void
+append(char *start, char *end)
+{
+  uns len = end - start;
+  bb_grow(&copy_buf, copied + len + 1);
+  memcpy(copy_buf.ptr + copied, start, len);
+  copied += len + 1;
+  copy_buf.ptr[copied-1] = 0;
+}
+
+static char *
+get_word(uns is_command_name)
+{
+  char *msg;
+  if (*line == '\'') {
+    line++;
+    while (1) {
+      char *start = line;
+      while (*line && *line != '\'')
+       line++;
+      append(start, line);
+      if (*line)
+       break;
+      copy_buf.ptr[copied-1] = '\n';
+      if (!get_line(&msg))
+       return msg ? : "Unterminated apostrophe word at the end";
+    }
+    line++;
+
+  } else if (*line == '"') {
+    line++;
+    uns start_copy = copied;
+    while (1) {
+      char *start = line;
+      uns escape = 0;
+      while (*line) {
+       if (*line == '"' && !escape)
+         break;
+       else if (*line == '\\')
+         escape ^= 1;
+       else
+         escape = 0;
+       line++;
+      }
+      append(start, line);
+      if (*line)
+       break;
+      if (!escape)
+       copy_buf.ptr[copied-1] = '\n';
+      else // merge two lines
+       copied -= 2;
+      if (!get_line(&msg))
+       return msg ? : "Unterminated quoted word at the end";
+    }
+    line++;
+
+    char *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
+    uns l = strlen(tmp);
+    bb_grow(&copy_buf, start_copy + l + 1);
+    strcpy(copy_buf.ptr + start_copy, tmp);
+    copied = start_copy + l + 1;
+
+  } else {
+    // promised that *line is non-null and non-blank
+    char *start = line;
+    while (*line && !Cblank(*line)
+       && *line != '{' && *line != '}' && *line != ';'
+       && (*line != '=' || !is_command_name))
+      line++;
+    if (*line == '=') {                                // nice for setting from a command-line
+      if (line == start)
+       return "Assignment without a variable";
+      *line = ' ';
+    }
+    if (line == start)                         // already the first char is control
+      line++;
+    append(start, line);
+  }
+  while (Cblank(*line))
+    line++;
+  return NULL;
+}
+
+static char *
+get_token(uns is_command_name, char **err)
+{
+  *err = NULL;
+  while (1) {
+    if (!*line || *line == '#') {
+      if (!is_command_name || !get_line(err))
+       return NULL;
+    } else if (*line == ';') {
+      *err = get_word(0);
+      if (!is_command_name || *err)
+       return NULL;
+    } else if (*line == '\\' && !line[1]) {
+      if (!get_line(err)) {
+       if (!*err)
+         *err = "Last line ends by a backslash";
+       return NULL;
+      }
+      if (!*line || *line == '#')
+       msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num);
+    } else {
+      split_grow(&word_buf, words+1);
+      uns start = copied;
+      word_buf.ptr[words++] = copied;
+      *err = get_word(is_command_name);
+      return *err ? NULL : copy_buf.ptr + start;
+    }
+  }
+}
+
+static char *
+split_command(void)
+{
+  words = copied = ends_by_brace = 0;
+  char *msg, *start_word;
+  if (!(start_word = get_token(1, &msg)))
+    return msg;
+  if (*start_word == '{')                      // only one opening brace
+    return "Unexpected opening brace";
+  while (*line != '}')                         // stays for the next time
+  {
+    if (!(start_word = get_token(0, &msg)))
+      return msg;
+    if (*start_word == '{') {
+      words--;                                 // discard the brace
+      ends_by_brace = 1;
+      break;
+    }
+  }
+  return NULL;
+}
+
+/* Parsing multiple files */
+
+static char *
+parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth)
+{
+  char *err;
+  name_parse_fb = name_fb;
+  parse_fb = fb;
+  line_num = 0;
+  line = line_buf;
+  *line = 0;
+  while (1)
+  {
+    err = split_command();
+    if (err)
+      goto error;
+    if (!words)
+      return NULL;
+    char *name = copy_buf.ptr + word_buf.ptr[0];
+    char *pars[words-1];
+    for (uns i=1; i<words; i++)
+      pars[i-1] = copy_buf.ptr + word_buf.ptr[i];
+    if (!strcasecmp(name, "include"))
+    {
+      if (words != 2)
+       err = "Expecting one filename";
+      else if (depth > 8)
+       err = "Too many nested files";
+      else if (*line && *line != '#')          // because the contents of line_buf is not re-entrant and will be cleared
+       err = "The input command must be the last one on a line";
+      if (err)
+       goto error;
+      struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14);
+      if (!new_fb) {
+       err = cf_printf("Cannot open file %s: %m", pars[0]);
+       goto error;
+      }
+      uns ll = line_num;
+      err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
+      line_num = ll;
+      bclose(new_fb);
+      if (err)
+       goto error;
+      parse_fb = fb;
+      continue;
+    }
+    enum cf_operation op;
+    char *c = strchr(name, ':');
+    if (!c)
+      op = strcmp(name, "}") ? OP_SET : OP_CLOSE;
+    else {
+      *c++ = 0;
+      switch (Clocase(*c)) {
+       case 's': op = OP_SET; break;
+       case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break;
+       case 'a': switch (Clocase(c[1])) {
+                   case 'p': op = OP_APPEND; break;
+                   case 'f': op = OP_AFTER; break;
+                   default: op = OP_ALL;
+                 }; break;
+       case 'p': op = OP_PREPEND; break;
+       case 'r': op = OP_REMOVE; break;
+       case 'e': op = OP_EDIT; break;
+       case 'b': op = OP_BEFORE; break;
+       default: op = OP_SET; break;
+      }
+      if (strcasecmp(c, cf_op_names[op])) {
+       err = cf_printf("Unknown operation %s", c);
+       goto error;
+      }
+    }
+    if (ends_by_brace)
+      op |= OP_OPEN;
+    err = cf_interpret_line(name, op, words-1, pars);
+    if (err)
+      goto error;
+  }
+error:
+  if (name_fb)
+    msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err);
+  else if (line_num == 1)
+    msg(L_ERROR, "Manual setting of configuration: %s", err);
+  else
+    msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err);
+  return "included from here";
+}
+
+#ifndef DEFAULT_CONFIG
+#define DEFAULT_CONFIG NULL
+#endif
+char *cf_def_file = DEFAULT_CONFIG;
+
+#ifndef ENV_VAR_CONFIG
+#define ENV_VAR_CONFIG NULL
+#endif
+char *cf_env_file = ENV_VAR_CONFIG;
+
+static uns postpone_commit;                    // only for cf_getopt()
+static uns everything_committed;               // after the 1st load, this flag is set on
+
+static int
+done_stack(void)
+{
+  if (cf_check_stack())
+    return 1;
+  if (cf_commit_all(postpone_commit ? CF_NO_COMMIT : everything_committed ? CF_COMMIT : CF_COMMIT_ALL))
+    return 1;
+  if (!postpone_commit)
+    everything_committed = 1;
+  return 0;
+}
+
+static int
+load_file(const char *file)
+{
+  cf_init_stack();
+  struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14);
+  if (!fb) {
+    msg(L_ERROR, "Cannot open %s: %m", file);
+    return 1;
+  }
+  char *err_msg = parse_fastbuf(file, fb, 0);
+  bclose(fb);
+  int err = !!err_msg || done_stack();
+  if (!err)
+    cf_def_file = NULL;
+  return err;
+}
+
+static int
+load_string(const char *string)
+{
+  cf_init_stack();
+  struct fastbuf fb;
+  fbbuf_init_read(&fb, (byte *)string, strlen(string), 0);
+  char *msg = parse_fastbuf(NULL, &fb, 0);
+  return !!msg || done_stack();
+}
+
+/* Safe loading and reloading */
+
+int
+cf_reload(const char *file)
+{
+  cf_journal_swap();
+  struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+  uns ec = everything_committed;
+  everything_committed = 0;
+  int err = load_file(file);
+  if (!err)
+  {
+    cf_journal_delete();
+    cf_journal_commit_transaction(1, NULL);
+  }
+  else
+  {
+    everything_committed = ec;
+    cf_journal_rollback_transaction(1, oldj);
+    cf_journal_swap();
+  }
+  return err;
+}
+
+int
+cf_load(const char *file)
+{
+  struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+  int err = load_file(file);
+  if (!err)
+    cf_journal_commit_transaction(1, oldj);
+  else
+    cf_journal_rollback_transaction(1, oldj);
+  return err;
+}
+
+int
+cf_set(const char *string)
+{
+  struct cf_journal_item *oldj = cf_journal_new_transaction(0);
+  int err = load_string(string);
+  if (!err)
+    cf_journal_commit_transaction(0, oldj);
+  else
+    cf_journal_rollback_transaction(0, oldj);
+  return err;
+}
+
+/* Command-line parser */
+
+static void
+load_default(void)
+{
+  if (cf_def_file)
+    {
+      char *env;
+      if (cf_env_file && (env = getenv(cf_env_file)))
+        {
+         if (cf_load(env))
+           die("Cannot load config file %s", env);
+       }
+      else if (cf_load(cf_def_file))
+        die("Cannot load default config %s", cf_def_file);
+    }
+  else
+    {
+      // We need to create an empty pool
+      cf_journal_commit_transaction(1, cf_journal_new_transaction(1));
+    }
+}
+
+static void
+final_commit(void)
+{
+  if (postpone_commit) {
+    postpone_commit = 0;
+    if (done_stack())
+      die("Cannot commit after the initialization");
+  }
+}
+
+int
+cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index)
+{
+  static int other_options = 0;
+  while (1) {
+    int res = getopt_long (argc, argv, short_opts, long_opts, long_index);
+    if (res == 'S' || res == 'C' || res == 0x64436667)
+    {
+      if (other_options)
+       die("The -S and -C options must precede all other arguments");
+      if (res == 'S') {
+       postpone_commit = 1;
+       load_default();
+       if (cf_set(optarg))
+         die("Cannot set %s", optarg);
+      } else if (res == 'C') {
+       postpone_commit = 1;
+       if (cf_load(optarg))
+         die("Cannot load config file %s", optarg);
+      }
+#ifdef CONFIG_DEBUG
+      else {   /* --dumpconfig */
+       load_default();
+       final_commit();
+       struct fastbuf *b = bfdopen(1, 4096);
+       cf_dump_sections(b);
+       bclose(b);
+       exit(0);
+      }
+#endif
+    } else {
+      /* unhandled option or end of options */
+      if (res != ':' && res != '?')
+       load_default();
+      final_commit();
+      other_options++;
+      return res;
+    }
+  }
+}
+
diff --git a/lib/conf-internal.h b/lib/conf-internal.h

new file mode 100644 (file)

index 0000000..377a9cb
--- /dev/null
+++ b/lib/conf-internal.h
@@ -0,0 +1,44 @@
+/*
+ *     UCW Library -- Configuration files: only for internal use of conf-*.c
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef        _UCW_CONF_INTERNAL_H
+#define        _UCW_CONF_INTERNAL_H
+
+/* conf-intr.c */
+#define OP_MASK 0xff           // only get the operation
+#define OP_OPEN 0x100          // here we only get an opening brace instead of parameters
+#define OP_1ST 0x200           // in the 1st phase selectors are recorded into the mask
+#define OP_2ND 0x400           // in the 2nd phase real data are entered
+enum cf_operation;
+extern char *cf_op_names[];
+extern char *cf_type_names[];
+
+uns cf_type_size(enum cf_type type, struct cf_user_type *utype);
+char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars);
+void cf_init_stack(void);
+int cf_check_stack(void);
+
+/* conf-journal.c */
+void cf_journal_swap(void);
+void cf_journal_delete(void);
+
+/* conf-section.c */
+#define SEC_FLAG_DYNAMIC       0x80000000      // contains a dynamic attribute
+#define SEC_FLAG_UNKNOWN       0x40000000      // ignore unknown entriies
+#define SEC_FLAG_CANT_COPY     0x20000000      // contains lists or parsers
+#define SEC_FLAG_NUMBER                0x0fffffff      // number of entries
+enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL };
+extern struct cf_section cf_sections;
+
+struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name);
+int cf_commit_all(enum cf_commit_mode cm);
+void cf_add_dirty(struct cf_section *sec, void *ptr);
+
+#endif
diff --git a/lib/conf-intr.c b/lib/conf-intr.c

new file mode 100644 (file)

index 0000000..84f555a
--- /dev/null
+++ b/lib/conf-intr.c
@@ -0,0 +1,645 @@
+/*
+ *     UCW Library -- Configuration files: interpreter
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0)
+
+/* Register size of and parser for each basic type */
+
+static char *
+cf_parse_string(char *str, char **ptr)
+{
+  *ptr = cf_strdup(str);
+  return NULL;
+}
+
+typedef char *cf_basic_parser(char *str, void *ptr);
+static struct {
+  uns size;
+  void *parser;
+} parsers[] = {
+  { sizeof(int), cf_parse_int },
+  { sizeof(u64), cf_parse_u64 },
+  { sizeof(double), cf_parse_double },
+  { sizeof(u32), cf_parse_ip },
+  { sizeof(char*), cf_parse_string },
+  { sizeof(int), NULL },                       // lookups are parsed extra
+  { 0, NULL },                                 // user-defined types are parsed extra
+};
+
+inline uns
+cf_type_size(enum cf_type type, struct cf_user_type *utype)
+{
+  if (type < CT_USER)
+    return parsers[type].size;
+  else
+    return utype->size;
+}
+
+static char *
+cf_parse_lookup(char *str, int *ptr, char **t)
+{
+  char **n = t;
+  uns total_len = 0;
+  while (*n && strcasecmp(*n, str)) {
+    total_len += strlen(*n) + 2;
+    n++;
+  }
+  if (*n) {
+    *ptr = n - t;
+    return NULL;
+  }
+  char *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
+  c += sprintf(err, "Invalid value %s, possible values are: ", str);
+  for (n=t; *n; n++)
+    c+= sprintf(c, "%s, ", *n);
+  if (*t)
+    c[-2] = 0;
+  *ptr = -1;
+  return err;
+}
+
+static char *
+cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u)
+{
+  for (uns i=0; i<number; i++)
+  {
+    char *msg;
+    uns size = cf_type_size(type, u->utype);
+    if (type < CT_LOOKUP)
+      msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size);
+    else if (type == CT_LOOKUP)
+      msg = cf_parse_lookup(pars[i], ptr + i * size, u->lookup);
+    else if (type == CT_USER)
+      msg = u->utype->parser(pars[i], ptr + i * size);
+    else
+      ASSERT(0);
+    if (msg)
+      return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg;
+  }
+  return NULL;
+}
+
+/* Interpreter */
+
+#define T(x) #x,
+char *cf_op_names[] = { CF_OPERATIONS };
+#undef T
+char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
+
+#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN)
+
+static char *
+interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr)
+{
+  enum cf_type type = item->type;
+  cf_journal_block(ptr, sizeof(void*));
+  // boundary checks done by the caller
+  uns size = cf_type_size(item->type, item->u.utype);
+  *ptr = cf_malloc(DARY_HDR_SIZE + number * size) + DARY_HDR_SIZE;
+  DARY_LEN(*ptr) = number;
+  return cf_parse_ary(number, pars, *ptr, type, &item->u);
+}
+
+static char *
+interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op)
+{
+  enum cf_type type = item->type;
+  void *old_p = *ptr;
+  uns size = cf_type_size(item->type, item->u.utype);
+  ASSERT(size >= sizeof(uns));
+  int old_nr = old_p ? DARY_LEN(old_p) : 0;
+  int taken = MIN(number, ABS(item->number)-old_nr);
+  *processed = taken;
+  // stretch the dynamic array
+  void *new_p = cf_malloc(DARY_HDR_SIZE + (old_nr + taken) * size) + DARY_HDR_SIZE;
+  DARY_LEN(new_p) = old_nr + taken;
+  cf_journal_block(ptr, sizeof(void*));
+  *ptr = new_p;
+  if (op == OP_APPEND) {
+    memcpy(new_p, old_p, old_nr * size);
+    return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u);
+  } else if (op == OP_PREPEND) {
+    memcpy(new_p + taken * size, old_p, old_nr * size);
+    return cf_parse_ary(taken, pars, new_p, type, &item->u);
+  } else
+    return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]);
+}
+
+static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic);
+
+static char *
+interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+  cf_add_dirty(sec, ptr);
+  *processed = 0;
+  for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+  {
+    int taken;
+    char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
+    if (msg)
+      return cf_printf("Item %s: %s", ci->name, msg);
+    *processed += taken;
+    number -= taken;
+    pars += taken;
+    if (!number)               // stop parsing, because many parsers would otherwise complain that number==0
+      break;
+  }
+  return NULL;
+}
+
+static void
+add_to_list(cnode *where, cnode *new_node, enum cf_operation op)
+{
+  switch (op)
+  {
+    case OP_EDIT:              // edition has been done in-place
+      break;
+    case OP_REMOVE:
+      CF_JOURNAL_VAR(where->prev->next);
+      CF_JOURNAL_VAR(where->next->prev);
+      clist_remove(where);
+      break;
+    case OP_AFTER:             // implementation dependend (prepend_head = after(list)), and where==list, see clists.h:74
+    case OP_PREPEND:
+    case OP_COPY:
+      CF_JOURNAL_VAR(where->next->prev);
+      CF_JOURNAL_VAR(where->next);
+      clist_insert_after(new_node, where);
+      break;
+    case OP_BEFORE:            // implementation dependend (append_tail = before(list))
+    case OP_APPEND:
+    case OP_SET:
+      CF_JOURNAL_VAR(where->prev->next);
+      CF_JOURNAL_VAR(where->prev);
+      clist_insert_before(new_node, where);
+      break;
+    default:
+      ASSERT(0);
+  }
+}
+
+static char *
+interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op)
+{
+  if (op >= OP_REMOVE)
+    return cf_printf("You have to open a block for operation %s", cf_op_names[op]);
+  if (!number)
+    return "Nothing to add to the list";
+  struct cf_section *sec = item->u.sec;
+  *processed = 0;
+  uns index = 0;
+  while (number > 0)
+  {
+    void *node = cf_malloc(sec->size);
+    cf_init_section(item->name, sec, node, 1);
+    add_to_list(ptr, node, op);
+    int taken;
+    /* If the node contains any dynamic attribute at the end, we suppress
+     * auto-repetition here and pass the flag inside instead.  */
+    index++;
+    char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
+    if (msg)
+      return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg);
+    *processed += taken;
+    number -= taken;
+    pars += taken;
+    if (sec->flags & SEC_FLAG_DYNAMIC)
+      break;
+  }
+  return NULL;
+}
+
+static char *
+interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op)
+{
+  if (op != OP_SET && op != OP_REMOVE)
+    return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]);
+  else if (item->type != CT_INT && item->type != CT_LOOKUP)
+    return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]);
+  cf_journal_block(ptr, sizeof(u32));
+  for (int i=0; i<number; i++) {
+    uns idx;
+    if (item->type == CT_INT)
+      TRY( cf_parse_int(pars[i], &idx) );
+    else
+      TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) );
+    if (idx >= 32)
+      return "Bitmaps only have 32 bits";
+    if (op == OP_SET)
+      *ptr |= 1<<idx;
+    else
+      *ptr &= ~(1<<idx);
+  }
+  *processed = number;
+  return NULL;
+}
+
+static char *
+interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+  int taken;
+  switch (item->cls)
+  {
+    case CC_STATIC:
+      if (!number)
+       return "Missing value";
+      taken = MIN(number, item->number);
+      *processed = taken;
+      uns size = cf_type_size(item->type, item->u.utype);
+      cf_journal_block(ptr, taken * size);
+      return cf_parse_ary(taken, pars, ptr, item->type, &item->u);
+    case CC_DYNAMIC:
+      if (!allow_dynamic)
+       return "Dynamic array cannot be used here";
+      taken = MIN(number, ABS(item->number));
+      *processed = taken;
+      return interpret_set_dynamic(item, taken, pars, ptr);
+    case CC_PARSER:
+      if (item->number < 0 && !allow_dynamic)
+       return "Parsers with variable number of parameters cannot be used here";
+      if (item->number > 0 && number < item->number)
+       return "Not enough parameters available for the parser";
+      taken = MIN(number, ABS(item->number));
+      *processed = taken;
+      for (int i=0; i<taken; i++)
+       pars[i] = cf_strdup(pars[i]);
+      return item->u.par(taken, pars, ptr);
+    case CC_SECTION:
+      return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic);
+    case CC_LIST:
+      if (!allow_dynamic)
+       return "Lists cannot be used here";
+      return interpret_add_list(item, number, pars, processed, ptr, OP_SET);
+    case CC_BITMAP:
+      if (!allow_dynamic)
+       return "Bitmaps cannot be used here";
+      return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET);
+    default:
+      ASSERT(0);
+  }
+}
+
+static char *
+interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+  if (item->cls == CC_BITMAP) {
+    cf_journal_block(ptr, sizeof(u32));
+    if (op == OP_CLEAR)
+      * (u32*) ptr = 0;
+    else
+      if (item->type == CT_INT)
+       * (u32*) ptr = ~0u;
+      else {
+       uns nr = -1;
+       while (item->u.lookup[++nr]);
+       * (u32*) ptr = ~0u >> (32-nr);
+      }
+    return NULL;
+  } else if (op != OP_CLEAR)
+    return "The item is not a bitmap";
+
+  if (item->cls == CC_LIST) {
+    cf_journal_block(ptr, sizeof(clist));
+    clist_init(ptr);
+  } else if (item->cls == CC_DYNAMIC) {
+    cf_journal_block(ptr, sizeof(void *));
+    static uns zero = 0;
+    * (void**) ptr = (&zero) + 1;
+  } else if (item->cls == CC_STATIC && item->type == CT_STRING) {
+    cf_journal_block(ptr, item->number * sizeof(char*));
+    bzero(ptr, item->number * sizeof(char*));
+  } else
+    return "The item is not a list, dynamic array, bitmap, or string";
+  return NULL;
+}
+
+static int
+cmp_items(void *i1, void *i2, struct cf_item *item)
+{
+  ASSERT(item->cls == CC_STATIC);
+  i1 += (uintptr_t) item->ptr;
+  i2 += (uintptr_t) item->ptr;
+  if (item->type == CT_STRING)
+    return strcmp(* (char**) i1, * (char**) i2);
+  else                         // all numeric types
+    return memcmp(i1, i2, cf_type_size(item->type, item->u.utype));
+}
+
+static void *
+find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask)
+{
+  CLIST_FOR_EACH(cnode *, n, *list)
+  {
+    uns found = 1;
+    for (uns i=0; i<32; i++)
+      if (mask & (1<<i))
+       if (cmp_items(n, query, sec->cfg+i))
+       {
+         found = 0;
+         break;
+       }
+    if (found)
+      return n;
+  }
+  return NULL;
+}
+
+static char *
+record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask)
+{
+  uns nr = sec->flags & SEC_FLAG_NUMBER;
+  if (item >= sec->cfg && item < sec->cfg + nr)        // setting an attribute relative to this section
+  {
+    uns i = item - sec->cfg;
+    if (i >= 32)
+      return "Cannot select list nodes by this attribute";
+    if (sec->cfg[i].cls != CC_STATIC)
+      return "Selection can only be done based on basic attributes";
+    *mask |= 1 << i;
+  }
+  return NULL;
+}
+
+#define MAX_STACK_SIZE 10
+static struct item_stack {
+  struct cf_section *sec;      // nested section
+  void *base_ptr;              // because original pointers are often relative
+  enum cf_operation op;                // it is performed when a closing brace is encountered
+  void *list;                  // list the operations should be done on
+  u32 mask;                    // bit array of selectors searching in a list
+  struct cf_item *item;                // cf_item of the list
+} stack[MAX_STACK_SIZE];
+static uns level;
+
+static char *
+opening_brace(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+  if (level >= MAX_STACK_SIZE-1)
+    return "Too many nested sections";
+  enum cf_operation pure_op = op & OP_MASK;
+  stack[++level] = (struct item_stack) {
+    .sec = NULL,
+    .base_ptr = NULL,
+    .op = pure_op,
+    .list = NULL,
+    .mask = 0,
+    .item = NULL,
+  };
+  if (!item)                   // unknown is ignored; we just need to trace recursion
+    return NULL;
+  stack[level].sec = item->u.sec;
+  if (item->cls == CC_SECTION)
+  {
+    if (pure_op != OP_SET)
+      return "Only SET operation can be used with a section";
+    stack[level].base_ptr = ptr;
+    stack[level].op = OP_EDIT | OP_2ND;        // this list operation does nothing
+  }
+  else if (item->cls == CC_LIST)
+  {
+    stack[level].base_ptr = cf_malloc(item->u.sec->size);
+    cf_init_section(item->name, item->u.sec, stack[level].base_ptr, 1);
+    stack[level].list = ptr;
+    stack[level].item = item;
+    if (pure_op == OP_ALL)
+      return "Operation ALL cannot be applied on lists";
+    else if (pure_op < OP_REMOVE) {
+      add_to_list(ptr, stack[level].base_ptr, pure_op);
+      stack[level].op |= OP_2ND;
+    } else
+      stack[level].op |= OP_1ST;
+  }
+  else
+    return "Opening brace can only be used on sections and lists";
+  return NULL;
+}
+
+static char *
+closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars)
+{
+  if (st->op == OP_CLOSE)      // top-level
+    return "Unmatched } parenthesis";
+  if (!st->sec) {              // dummy run on unknown section
+    if (!(op & OP_OPEN))
+      level--;
+    return NULL;
+  }
+  enum cf_operation pure_op = st->op & OP_MASK;
+  if (st->op & OP_1ST)
+  {
+    st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask);
+    if (!st->list)
+      return "Cannot find a node matching the query";
+    if (pure_op != OP_REMOVE)
+    {
+      if (pure_op == OP_EDIT)
+       st->base_ptr = st->list;
+      else if (pure_op == OP_AFTER || pure_op == OP_BEFORE)
+       cf_init_section(st->item->name, st->sec, st->base_ptr, 1);
+      else if (pure_op == OP_COPY) {
+       if (st->sec->flags & SEC_FLAG_CANT_COPY)
+         return cf_printf("Item %s cannot be copied", st->item->name);
+       memcpy(st->base_ptr, st->list, st->sec->size);  // strings and dynamic arrays are shared
+       if (st->sec->copy)
+         TRY( st->sec->copy(st->base_ptr, st->list) );
+      } else
+       ASSERT(0);
+      if (op & OP_OPEN) {      // stay at the same recursion level
+       st->op = (st->op | OP_2ND) & ~OP_1ST;
+       add_to_list(st->list, st->base_ptr, pure_op);
+       return NULL;
+      }
+      int taken;               // parse parameters on 1 line immediately
+      TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) );
+      number -= taken;
+      pars += taken;
+      // and fall-thru to the 2nd phase
+    }
+    add_to_list(st->list, st->base_ptr, pure_op);
+  }
+  level--;
+  if (number)
+    return "No parameters expected after the }";
+  else if (op & OP_OPEN)
+    return "No { is expected";
+  else
+    return NULL;
+}
+
+static struct cf_item *
+find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr)
+{
+  *msg = NULL;
+  if (name[0] == '^')                          // absolute name instead of relative
+    name++, curr_sec = &cf_sections, *ptr = NULL;
+  if (!curr_sec)                               // don't even search in an unknown section
+    return NULL;
+  while (1)
+  {
+    if (curr_sec != &cf_sections)
+      cf_add_dirty(curr_sec, *ptr);
+    char *c = strchr(name, '.');
+    if (c)
+      *c++ = 0;
+    struct cf_item *ci = cf_find_subitem(curr_sec, name);
+    if (!ci->cls)
+    {
+      if (!(curr_sec->flags & SEC_FLAG_UNKNOWN))       // ignore silently unknown top-level sections and unknown attributes in flagged sections
+       *msg = cf_printf("Unknown item %s", name);
+      return NULL;
+    }
+    *ptr += (uintptr_t) ci->ptr;
+    if (!c)
+      return ci;
+    if (ci->cls != CC_SECTION)
+    {
+      *msg = cf_printf("Item %s is not a section", name);
+      return NULL;
+    }
+    curr_sec = ci->u.sec;
+    name = c;
+  }
+}
+
+char *
+cf_interpret_line(char *name, enum cf_operation op, int number, char **pars)
+{
+  char *msg;
+  if ((op & OP_MASK) == OP_CLOSE)
+    return closing_brace(stack+level, op, number, pars);
+  void *ptr = stack[level].base_ptr;
+  struct cf_item *item = find_item(stack[level].sec, name, &msg, &ptr);
+  if (msg)
+    return msg;
+  if (stack[level].op & OP_1ST)
+    TRY( record_selector(item, stack[level].sec, &stack[level].mask) );
+  if (op & OP_OPEN) {          // the operation will be performed after the closing brace
+    if (number)
+      return "Cannot open a block after a parameter has been passed on a line";
+    return opening_brace(item, ptr, op);
+  }
+  if (!item)                   // ignored item in an unknown section
+    return NULL;
+  op &= OP_MASK;
+
+  int taken = 0;               // process as many parameters as possible
+  if (op == OP_CLEAR || op == OP_ALL)
+    msg = interpret_set_all(item, ptr, op);
+  else if (op == OP_SET)
+    msg = interpret_set_item(item, number, pars, &taken, ptr, 1);
+  else if (item->cls == CC_DYNAMIC)
+    msg = interpret_add_dynamic(item, number, pars, &taken, ptr, op);
+  else if (item->cls == CC_LIST)
+    msg = interpret_add_list(item, number, pars, &taken, ptr, op);
+  else if (item->cls == CC_BITMAP)
+    msg = interpret_add_bitmap(item, number, pars, &taken, ptr, op);
+  else
+    return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name);
+  if (msg)
+    return msg;
+  if (taken < number)
+    return cf_printf("Too many parameters: %d>%d", number, taken);
+
+  return NULL;
+}
+
+char *
+cf_find_item(const char *name, struct cf_item *item)
+{
+  char *msg;
+  void *ptr = NULL;
+  struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr);
+  if (msg)
+    return msg;
+  if (ci) {
+    *item = *ci;
+    item->ptr = ptr;
+  } else
+    bzero(item, sizeof(struct cf_item));
+  return NULL;
+}
+
+char *
+cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars)
+{
+  char *msg;
+  int taken = 0;
+  switch (op) {
+    case OP_SET:
+      msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1);
+      break;
+    case OP_CLEAR:
+    case OP_ALL:
+      msg = interpret_set_all(item, item->ptr, op);
+      break;
+    case OP_APPEND:
+    case OP_PREPEND:
+      if (item->cls == CC_DYNAMIC)
+       msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op);
+      else if (item->cls == CC_LIST)
+       msg = interpret_add_list(item, number, pars, &taken, item->ptr, op);
+      else
+       return "The attribute does not support append/prepend";
+      break;
+    case OP_REMOVE:
+      if (item->cls == CC_BITMAP)
+       msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op);
+      else
+       return "Only applicable on bitmaps";
+      break;
+    default:
+      return "Unsupported operation";
+  }
+  if (msg)
+    return msg;
+  if (taken < number)
+    return "Too many parameters";
+  return NULL;
+}
+
+void
+cf_init_stack(void)
+{
+  static uns initialized = 0;
+  if (!initialized++) {
+    cf_sections.flags |= SEC_FLAG_UNKNOWN;
+    cf_sections.size = 0;                      // size of allocated array used to be stored here
+    cf_init_section(NULL, &cf_sections, NULL, 0);
+  }
+  level = 0;
+  stack[0] = (struct item_stack) {
+    .sec = &cf_sections,
+    .base_ptr = NULL,
+    .op = OP_CLOSE,
+    .list = NULL,
+    .mask = 0,
+    .item = NULL
+  };
+}
+
+int
+cf_check_stack(void)
+{
+  if (level > 0) {
+    msg(L_ERROR, "Unterminated block");
+    return 1;
+  }
+  return 0;
+}
+
diff --git a/lib/conf-journal.c b/lib/conf-journal.c

new file mode 100644 (file)

index 0000000..591aa8a
--- /dev/null
+++ b/lib/conf-journal.c
@@ -0,0 +1,117 @@
+/*
+ *     UCW Library -- Configuration files: journaling
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/mempool.h"
+
+#include <string.h>
+
+static struct old_pools {
+  struct old_pools *prev;
+  struct mempool *pool;
+} *pools;                      // link-list of older cf_pool's
+
+uns cf_need_journal = 1;       // some programs do not need journal
+static struct cf_journal_item {
+  struct cf_journal_item *prev;
+  byte *ptr;
+  uns len;
+  byte copy[0];
+} *journal;
+
+void
+cf_journal_block(void *ptr, uns len)
+{
+  if (!cf_need_journal)
+    return;
+  struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len);
+  ji->prev = journal;
+  ji->ptr = ptr;
+  ji->len = len;
+  memcpy(ji->copy, ptr, len);
+  journal = ji;
+}
+
+void
+cf_journal_swap(void)
+  // swaps the contents of the memory and the journal, and reverses the list
+{
+  struct cf_journal_item *curr, *prev, *next;
+  for (next=NULL, curr=journal; curr; next=curr, curr=prev)
+  {
+    prev = curr->prev;
+    curr->prev = next;
+    for (uns i=0; i<curr->len; i++)
+    {
+      byte x = curr->copy[i];
+      curr->copy[i] = curr->ptr[i];
+      curr->ptr[i] = x;
+    }
+  }
+  journal = next;
+}
+
+struct cf_journal_item *
+cf_journal_new_transaction(uns new_pool)
+{
+  if (new_pool)
+    cf_pool = mp_new(1<<10);
+  struct cf_journal_item *oldj = journal;
+  journal = NULL;
+  return oldj;
+}
+
+void
+cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+  if (new_pool)
+  {
+    struct old_pools *p = cf_malloc(sizeof(struct old_pools));
+    p->prev = pools;
+    p->pool = cf_pool;
+    pools = p;
+  }
+  if (oldj)
+  {
+    struct cf_journal_item **j = &journal;
+    while (*j)
+      j = &(*j)->prev;
+    *j = oldj;
+  }
+}
+
+void
+cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+  if (!cf_need_journal)
+    die("Cannot rollback the configuration, because the journal is disabled.");
+  cf_journal_swap();
+  journal = oldj;
+  if (new_pool)
+  {
+    mp_delete(cf_pool);
+    cf_pool = pools ? pools->pool : NULL;
+  }
+}
+
+void
+cf_journal_delete(void)
+{
+  for (struct old_pools *p=pools; p; p=pools)
+  {
+    pools = p->prev;
+    mp_delete(p->pool);
+  }
+}
+
+/* TODO: more space efficient journal */
diff --git a/lib/conf-parse.c b/lib/conf-parse.c

new file mode 100644 (file)

index 0000000..c828462
--- /dev/null
+++ b/lib/conf-parse.c
@@ -0,0 +1,167 @@
+/*
+ *     UCW Library -- Configuration files: parsers for basic types
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/chartype.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+struct unit {
+  uns name;                    // one-letter name of the unit
+  uns num, den;                        // fraction
+};
+
+static const struct unit units[] = {
+  { 'd', 86400, 1 },
+  { 'h', 3600, 1 },
+  { 'k', 1000, 1 },
+  { 'm', 1000000, 1 },
+  { 'g', 1000000000, 1 },
+  { 'K', 1024, 1 },
+  { 'M', 1048576, 1 },
+  { 'G', 1073741824, 1 },
+  { '%', 1, 100 },
+  { 0, 0, 0 }
+};
+
+static const struct unit *
+lookup_unit(const char *value, const char *end, char **msg)
+{
+  if (end && *end) {
+    if (end == value || end[1] || *end >= '0' && *end <= '9')
+      *msg = "Invalid number";
+    else {
+      for (const struct unit *u=units; u->name; u++)
+       if ((char)u->name == *end)
+         return u;
+      *msg = "Invalid unit";
+    }
+  }
+  return NULL;
+}
+
+static char cf_rngerr[] = "Number out of range";
+
+char *
+cf_parse_int(const char *str, int *ptr)
+{
+  char *msg = NULL;
+  if (!*str)
+    msg = "Missing number";
+  else {
+    const struct unit *u;
+    char *end;
+    errno = 0;
+    uns x = strtoul(str, &end, 0);
+    if (errno == ERANGE)
+      msg = cf_rngerr;
+    else if (u = lookup_unit(str, end, &msg)) {
+      u64 y = (u64)x * u->num;
+      if (y % u->den)
+       msg = "Number is not an integer";
+      else {
+       y /= u->den;
+       if (y > 0xffffffff)
+         msg = cf_rngerr;
+       *ptr = y;
+      }
+    } else
+      *ptr = x;
+  }
+  return msg;
+}
+
+char *
+cf_parse_u64(const char *str, u64 *ptr)
+{
+  char *msg = NULL;
+  if (!*str)
+    msg = "Missing number";
+  else {
+    const struct unit *u;
+    char *end;
+    errno = 0;
+    u64 x = strtoull(str, &end, 0);
+    if (errno == ERANGE)
+      msg = cf_rngerr;
+    else if (u = lookup_unit(str, end, &msg)) {
+      if (x > ~(u64)0 / u->num)
+       msg = "Number out of range";
+      else {
+       x *= u->num;
+       if (x % u->den)
+         msg = "Number is not an integer";
+       else
+         *ptr = x / u->den;
+      }
+    } else
+      *ptr = x;
+  }
+  return msg;
+}
+
+char *
+cf_parse_double(const char *str, double *ptr)
+{
+  char *msg = NULL;
+  if (!*str)
+    msg = "Missing number";
+  else {
+    const struct unit *u;
+    double x;
+    uns read_chars;
+    if (sscanf(str, "%lf%n", &x, &read_chars) != 1)
+      msg = "Invalid number";
+    else if (u = lookup_unit(str, str + read_chars, &msg))
+      *ptr = x * u->num / u->den;
+    else
+      *ptr = x;
+  }
+  return msg;
+}
+
+char *
+cf_parse_ip(const char *p, u32 *varp)
+{
+  if (!*p)
+    return "Missing IP address";
+  uns x = 0;
+  char *p2;
+  if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) {
+    errno = 0;
+    x = strtoul(p, &p2, 16);
+    if (errno == ERANGE || x > 0xffffffff)
+      goto error;
+    p = p2;
+  }
+  else
+    for (uns i = 0; i < 4; i++) {
+      if (i) {
+       if (*p++ != '.')
+         goto error;
+      }
+      if (!Cdigit(*p))
+       goto error;
+      errno = 0;
+      uns y = strtoul(p, &p2, 10);
+      if (errno == ERANGE || p2 == (char*) p || y > 255)
+       goto error;
+      p = p2;
+      x = (x << 8) + y;
+    }
+  *varp = x;
+  return *p ? "Trailing characters" : NULL;
+error:
+  return "Invalid IP address";
+}
+
diff --git a/lib/conf-section.c b/lib/conf-section.c

new file mode 100644 (file)

index 0000000..1a9ae17
--- /dev/null
+++ b/lib/conf-section.c
@@ -0,0 +1,203 @@
+/*
+ *     UCW Library -- Configuration files: sections
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/binsearch.h"
+
+#include <string.h>
+
+/* Dirty sections */
+
+struct dirty_section {
+  struct cf_section *sec;
+  void *ptr;
+};
+#define GBUF_TYPE      struct dirty_section
+#define GBUF_PREFIX(x) dirtsec_##x
+#include "lib/gbuf.h"
+static dirtsec_t dirty;
+static uns dirties;
+
+void
+cf_add_dirty(struct cf_section *sec, void *ptr)
+{
+  dirtsec_grow(&dirty, dirties+1);
+  struct dirty_section *dest = dirty.ptr + dirties;
+  if (dirties && dest[-1].sec == sec && dest[-1].ptr == ptr)
+    return;
+  dest->sec = sec;
+  dest->ptr = ptr;
+  dirties++;
+}
+
+#define ASORT_PREFIX(x)        dirtsec_##x
+#define ASORT_KEY_TYPE struct dirty_section
+#define ASORT_ELT(i)   dirty.ptr[i]
+#define ASORT_LT(x,y)  x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr
+#include "lib/arraysort.h"
+
+static void
+sort_dirty(void)
+{
+  if (dirties <= 1)
+    return;
+  dirtsec_sort(dirties);
+  // and compress the list
+  struct dirty_section *read = dirty.ptr + 1, *write = dirty.ptr + 1, *limit = dirty.ptr + dirties;
+  while (read < limit) {
+    if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) {
+      if (read != write)
+       *write = *read;
+      write++;
+    }
+    read++;
+  }
+  dirties = write - dirty.ptr;
+}
+
+/* Initialization */
+
+struct cf_section cf_sections; // root section
+
+struct cf_item *
+cf_find_subitem(struct cf_section *sec, const char *name)
+{
+  struct cf_item *ci = sec->cfg;
+  for (; ci->cls; ci++)
+    if (!strcasecmp(ci->name, name))
+      return ci;
+  return ci;
+}
+
+static void
+inspect_section(struct cf_section *sec)
+{
+  sec->flags = 0;
+  struct cf_item *ci;
+  for (ci=sec->cfg; ci->cls; ci++)
+    if (ci->cls == CC_SECTION) {
+      inspect_section(ci->u.sec);
+      sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY);
+    } else if (ci->cls == CC_LIST) {
+      inspect_section(ci->u.sec);
+      sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY;
+    } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP)
+      sec->flags |= SEC_FLAG_DYNAMIC;
+    else if (ci->cls == CC_PARSER) {
+      sec->flags |= SEC_FLAG_CANT_COPY;
+      if (ci->number < 0)
+       sec->flags |= SEC_FLAG_DYNAMIC;
+    }
+  if (sec->copy)
+    sec->flags &= ~SEC_FLAG_CANT_COPY;
+  sec->flags |= ci - sec->cfg;         // record the number of entries
+}
+
+void
+cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown)
+{
+  if (!cf_sections.cfg)
+  {
+    cf_sections.size = 50;
+    cf_sections.cfg = xmalloc_zero(cf_sections.size * sizeof(struct cf_item));
+  }
+  struct cf_item *ci = cf_find_subitem(&cf_sections, name);
+  if (ci->cls)
+    die("Cannot register section %s twice", name);
+  ci->cls = CC_SECTION;
+  ci->name = name;
+  ci->number = 1;
+  ci->ptr = NULL;
+  ci->u.sec = sec;
+  inspect_section(sec);
+  if (allow_unknown)
+    sec->flags |= SEC_FLAG_UNKNOWN;
+  ci++;
+  if (ci - cf_sections.cfg >= (int) cf_sections.size)
+  {
+    cf_sections.cfg = xrealloc(cf_sections.cfg, 2*cf_sections.size * sizeof(struct cf_item));
+    bzero(cf_sections.cfg + cf_sections.size, cf_sections.size * sizeof(struct cf_item));
+    cf_sections.size *= 2;
+  }
+}
+
+void
+cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero)
+{
+  if (do_bzero) {
+    ASSERT(sec->size);
+    bzero(ptr, sec->size);
+  }
+  for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+    if (ci->cls == CC_SECTION)
+      cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0);
+    else if (ci->cls == CC_LIST)
+      clist_init(ptr + (uintptr_t) ci->ptr);
+    else if (ci->cls == CC_DYNAMIC) {
+      void **dyn = ptr + (uintptr_t) ci->ptr;
+      if (!*dyn) {                     // replace NULL by an empty array
+       static uns zero = 0;
+       *dyn = (&zero) + 1;
+      }
+    }
+  if (sec->init) {
+    char *msg = sec->init(ptr);
+    if (msg)
+      die("Cannot initialize section %s: %s", name, msg);
+  }
+}
+
+static char *
+commit_section(struct cf_section *sec, void *ptr, uns commit_all)
+{
+  char *err;
+  for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+    if (ci->cls == CC_SECTION) {
+      if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) {
+       msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
+       return "commit of a subsection failed";
+      }
+    } else if (ci->cls == CC_LIST) {
+      uns idx = 0;
+      CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr))
+       if (idx++, err = commit_section(ci->u.sec, n, commit_all)) {
+         msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
+         return "commit of a list failed";
+       }
+    }
+  if (sec->commit) {
+    /* We have to process the whole tree of sections even if just a few changes
+     * have been made, because there are dependencies between commit-hooks and
+     * hence we need to call them in a fixed order.  */
+#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr
+    struct dirty_section comp = { sec, ptr };
+    uns pos = BIN_SEARCH_FIRST_GE_CMP(dirty.ptr, dirties, comp, ARY_LT_X);
+
+    if (commit_all
+       || (pos < dirties && dirty.ptr[pos].sec == sec && dirty.ptr[pos].ptr == ptr))
+      return sec->commit(ptr);
+  }
+  return 0;
+}
+
+int
+cf_commit_all(enum cf_commit_mode cm)
+{
+  sort_dirty();
+  if (cm == CF_NO_COMMIT)
+    return 0;
+  if (commit_section(&cf_sections, NULL, cm == CF_COMMIT_ALL))
+    return 1;
+  dirties = 0;
+  return 0;
+}
diff --git a/lib/conf-test.c b/lib/conf-test.c

new file mode 100644 (file)

index 0000000..61ba4bc
--- /dev/null
+++ b/lib/conf-test.c
@@ -0,0 +1,219 @@
+/*
+ *     Insane tester of reading configuration files
+ *
+ *     (c) 2006 Robert Spalek <robert@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/clists.h"
+#include "lib/fastbuf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+static int verbose;
+
+struct sub_sect_1 {
+  cnode n;
+  char *name;
+  time_t t;
+  char *level;
+  int confidence[2];
+  double *list;
+};
+
+static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) };
+
+static char *
+init_sec_1(struct sub_sect_1 *s)
+{
+  if (s == &sec1)                      // this is a static variable; skip clearing
+    return NULL;
+  s->name = "unknown";
+  s->level = "default";
+  s->confidence[0] = 5;
+  s->confidence[1] = 6;
+  // leave s->list==NULL
+  return NULL;
+}
+
+static char *
+commit_sec_1(struct sub_sect_1 *s)
+{
+  if (s->confidence[0] < 0 || s->confidence[0] > 10)
+    return "Well, this can't be";
+  return NULL;
+}
+
+static char *
+time_parser(uns number, char **pars, time_t *ptr)
+{
+  *ptr = number ? atoi(pars[0]) : time(NULL);
+  return NULL;
+}
+
+static struct cf_section cf_sec_1 = {
+  CF_TYPE(struct sub_sect_1),
+  CF_INIT(init_sec_1),
+  CF_COMMIT(commit_sec_1),
+#define F(x)   PTR_TO(struct sub_sect_1, x)
+  CF_ITEMS {
+    CF_STRING("name", F(name)),
+    //CF_PARSER("t", F(t), time_parser, 0),
+    CF_STRING("level", F(level)),
+    CF_INT_ARY("confidence", F(confidence[0]), 2),             // XXX: the [0] is needed for the sake of type checking
+    CF_DOUBLE_DYN("list", F(list), 100),
+    CF_END
+  }
+#undef F
+};
+
+static uns nr1 = 15;
+static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1);
+static int nrs2[5];
+static char *str1 = "no worries";
+static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob");
+static u64 u1 = 0xCafeBeefDeadC00ll;
+static double d1 = -1.1;
+static clist secs;
+static time_t t1, t2;
+static u32 ip;
+static int *look = DARY_ALLOC(int, 2, 2, 1);
+static u16 numbers[10] = { 2, 100, 1, 5 };
+static u32 bitmap1 = 0xff;
+static u32 bitmap2 = 3;
+
+static char *
+parse_u16(char *string, u16 *ptr)
+{
+  uns a;
+  char *msg = cf_parse_int(string, &a);
+  if (msg)
+    return msg;
+  if (a >= (1<<16))
+    return "Come on, man, this doesn't fit to 16 bits";
+  *ptr = a;
+  return NULL;
+}
+
+static void
+dump_u16(struct fastbuf *fb, u16 *ptr)
+{
+  bprintf(fb, "%d ", *ptr);
+}
+
+static struct cf_user_type u16_type = {
+  .size = sizeof(u16),
+  .name = "u16",
+  .parser = (cf_parser1*) parse_u16,
+  .dumper = (cf_dumper1*) dump_u16
+};
+
+static char *
+init_top(void *ptr UNUSED)
+{
+  for (uns i=0; i<5; i++)
+  {
+    struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed
+    cf_init_section("slaves", &cf_sec_1, s, 1);
+    s->confidence[1] = i;
+    clist_add_tail(&secs, &s->n);
+  }
+  return NULL;
+}
+
+static char *
+commit_top(void *ptr UNUSED)
+{
+  if (nr1 != 15)
+    return "Don't touch my variable!";
+  return NULL;
+}
+
+static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
+static struct cf_section cf_top = {
+  CF_INIT(init_top),
+  CF_COMMIT(commit_top),
+  CF_ITEMS {
+    CF_UNS("nr1", &nr1),
+    CF_INT_DYN("nrs1", &nrs1, 1000),
+    CF_INT_ARY("nrs2", nrs2, 5),
+    CF_STRING("str1", &str1),
+    CF_STRING_DYN("str2", &str2, 20),
+    CF_U64("u1", &u1),
+    CF_DOUBLE("d1", &d1),
+    CF_PARSER("FirstTime", &t1, time_parser, -1),
+    CF_PARSER("SecondTime", &t2, time_parser, 1),
+    CF_SECTION("master", &sec1, &cf_sec_1),
+    CF_LIST("slaves", &secs, &cf_sec_1),
+    CF_IP("ip", &ip),
+    CF_LOOKUP_DYN("look", &look, alphabet, 1000),
+    CF_USER_ARY("numbers", numbers, &u16_type, 10),
+    CF_BITMAP_INT("bitmap1", &bitmap1),
+    CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) {
+         "one", "two", "three", "four", "five", "six", "seven", "eight", 
+         "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen", 
+         "eighteen", "nineteen", "twenty", NULL        // hidden joke here
+         })),
+    CF_END
+  }
+};
+
+static byte short_opts[] = CF_SHORT_OPTS "v";
+static struct option long_opts[] = {
+       CF_LONG_OPTS
+       {"verbose",     0, 0, 'v'},
+       {NULL,          0, 0, 0}
+};
+
+static char *help = "\
+Usage: conf-test <options>\n\
+\n\
+Options:\n"
+CF_USAGE
+"-v\t\t\tBe verbose\n\
+";
+
+static void NONRET
+usage(char *msg, ...)
+{
+  va_list va;
+  va_start(va, msg);
+  if (msg)
+    vfprintf(stderr, msg, va);
+  fputs(help, stderr);
+  exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+  log_init(argv[0]);
+  cf_declare_section("top", &cf_top, 0);
+  cf_def_file = "lib/conf-test.cf";
+
+  int opt;
+  while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0)
+    switch (opt) {
+      case 'v': verbose++; break;
+      default: usage("unknown option %c\n", opt);
+    }
+  if (optind < argc)
+    usage("too many parameters (%d more)\n", argc-optind);
+
+  /*
+  cf_load("non-existent file");
+  //cf_reload("non-existent file");
+  cf_load("non-existent file");
+  cf_set("top.d1 -1.1; top.master b");
+  */
+
+  struct fastbuf *out = bfdopen(1, 1<<14);
+  cf_dump_sections(out);
+  bclose(out);
+
+  return 0;
+}
diff --git a/lib/conf-test.cf b/lib/conf-test.cf

new file mode 100644 (file)

index 0000000..91e6cc3
--- /dev/null
+++ b/lib/conf-test.cf
@@ -0,0 +1,52 @@
+# test config file
+#include lib/conf-test.t ; top.xa=1
+#include 'non-existent file'; #top.xa=1
+Top { \
+
+  nr1=16       #!!!
+  nrs1         2 3 5 \
+           7 11 13 \
+           \
+           17M
+  nrs2 3 3k 3 3 3 ; \
+  str1 "hello,\t\x2bworld%%\n"
+  str2 'Hagenuk,
+      the best' "\
+      " qu'est-ce que c'est?
+  u1   0xbadcafebadbeefc0
+  str2:prepend prepended
+  str2:append appended
+  d1 7%
+  d1   -1.14e-25
+  firsttime ; secondtime 56
+  ^top.master:set      alice HB8+
+  slaves:clear
+  ip 0xa
+  ip 195.113.31.123
+  look Alpha
+  look:prepend Beta GAMMA
+  numbers 11000 65535
+  bitmap1 31
+  bitmap1:remove 3 3
+  bitmap2:all
+  bitmap2:remove eleven twelve one
+};;;;;;
+
+unknown.ignored :-)
+
+top.slaves     cairns gpua 7 7 -10% +10%
+top.slaves     daintree rafc 4 5 -171%
+top.slaves     coogee pum 9 8
+top.slaves:prepend     {name=bondi; level=\
+  "PUG"; confidence    10 10}
+top.slaves:remove {name daintree}
+top.slaveS:edit {level PUG} Bondi PUG!
+top.slaveS:before {level pum}{
+  confidence 2
+  list 123 456 789
+}
+top.slaves:copy {name coogee} Coogee2 PUM
+
+topp.a=15
+top.nr1=   ' 15'
+a { ;-D }
diff --git a/lib/conf.h b/lib/conf.h

new file mode 100644 (file)

index 0000000..8085744
--- /dev/null
+++ b/lib/conf.h
@@ -0,0 +1,163 @@
+/*
+ *     UCW Library -- Configuration files
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef        _UCW_CONF_H
+#define        _UCW_CONF_H
+
+enum cf_class {
+  CC_END,                              // end of list
+  CC_STATIC,                           // single variable or static array
+  CC_DYNAMIC,                          // dynamically allocated array
+  CC_PARSER,                           // arbitrary parser function
+  CC_SECTION,                          // section appears exactly once
+  CC_LIST,                             // list with 0..many nodes
+  CC_BITMAP                            // of up to 32 items
+};
+
+enum cf_type {
+  CT_INT, CT_U64, CT_DOUBLE,           // number types
+  CT_IP,                               // IP address
+  CT_STRING,                           // string type
+  CT_LOOKUP,                           // in a string table
+  CT_USER                              // user-defined type
+};
+
+struct fastbuf;
+typedef char *cf_parser(uns number, char **pars, void *ptr);
+  /* A parser function gets an array of (strdup'ed) strings and a pointer with
+   * the customized information (most likely the target address).  It can store
+   * the parsed value anywhere in any way it likes, however it must first call
+   * cf_journal_block() on the overwritten memory block.  It returns an error
+   * message or NULL if everything is all right.  */
+typedef char *cf_parser1(char *string, void *ptr);
+  /* A parser function for user-defined types gets a string and a pointer to
+   * the destination variable.  It must store the value within [ptr,ptr+size),
+   * where size is fixed for each type.  It should not call cf_journal_block().  */
+typedef char *cf_hook(void *ptr);
+  /* An init- or commit-hook gets a pointer to the section or NULL if this
+   * is the global section.  It returns an error message or NULL if everything
+   * is all right.  The init-hook should fill in default values (needed for
+   * dynamically allocated nodes of link lists or for filling global variables
+   * that are run-time dependent).  The commit-hook should perform sanity
+   * checks and postprocess the parsed values.  Commit-hooks must call
+   * cf_journal_block() too.  Caveat! init-hooks for static sections must not
+   * use cf_malloc() but normal xmalloc().  */
+typedef void cf_dumper1(struct fastbuf *fb, void *ptr);
+  /* Dumps the contents of a variable of a user-defined type.  */
+typedef char *cf_copier(void *dest, void *src);
+  /* Similar to init-hook, but it copies attributes from another list node
+   * instead of setting the attributes to default values.  You have to provide
+   * it if your node contains parsed values and/or sub-lists.  */
+
+struct cf_user_type {
+  uns size;                            // of the parsed attribute
+  char *name;                          // name of the type (for dumping)
+  cf_parser1 *parser;                  // how to parse it
+  cf_dumper1 *dumper;                  // how to dump the type
+};
+
+struct cf_section;
+struct cf_item {
+  const char *name;                    // case insensitive
+  int number;                          // length of an array or #parameters of a parser (negative means at most)
+  void *ptr;                           // pointer to a global variable or an offset in a section
+  union cf_union {
+    struct cf_section *sec;            // declaration of a section or a list
+    cf_parser *par;                    // parser function
+    char **lookup;                     // NULL-terminated sequence of allowed strings for lookups
+    struct cf_user_type *utype;                // specification of the user-defined type
+  } u;
+  enum cf_class cls:16;                        // attribute class
+  enum cf_type type:16;                        // type of a static or dynamic attribute
+};
+
+struct cf_section {
+  uns size;                            // 0 for a global block, sizeof(struct) for a section
+  cf_hook *init;                       // fills in default values (no need to bzero)
+  cf_hook *commit;                     // verifies parsed data (optional)
+  cf_copier *copy;                     // copies values from another instance (optional, no need to copy basic attributes)
+  struct cf_item *cfg;                 // CC_END-terminated array of items
+  uns flags;                           // for internal use only
+};
+
+/* Declaration of cf_section */
+#define CF_TYPE(s)     .size = sizeof(s)
+#define CF_INIT(f)     .init = (cf_hook*) f
+#define CF_COMMIT(f)   .commit = (cf_hook*) f
+#define CF_COPY(f)     .copy = (cf_copier*) f
+#define CF_ITEMS       .cfg = ( struct cf_item[] )
+#define CF_END         { .cls = CC_END }
+/* Configuration items */
+#define CF_STATIC(n,p,T,t,c)   { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) }
+#define CF_DYNAMIC(n,p,T,t,c)  { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) }
+#define CF_PARSER(n,p,f,c)     { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f }
+#define CF_SECTION(n,p,s)      { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s }
+#define CF_LIST(n,p,s)         { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s }
+#define CF_BITMAP_INT(n,p)     { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) }
+#define CF_BITMAP_LOOKUP(n,p,t)        { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t }
+/* Configuration items for basic types */
+#define CF_INT(n,p)            CF_STATIC(n,p,INT,int,1)
+#define CF_INT_ARY(n,p,c)      CF_STATIC(n,p,INT,int,c)
+#define CF_INT_DYN(n,p,c)      CF_DYNAMIC(n,p,INT,int,c)
+#define CF_UNS(n,p)            CF_STATIC(n,p,INT,uns,1)
+#define CF_UNS_ARY(n,p,c)      CF_STATIC(n,p,INT,uns,c)
+#define CF_UNS_DYN(n,p,c)      CF_DYNAMIC(n,p,INT,uns,c)
+#define CF_U64(n,p)            CF_STATIC(n,p,U64,u64,1)
+#define CF_U64_ARY(n,p,c)      CF_STATIC(n,p,U64,u64,c)
+#define CF_U64_DYN(n,p,c)      CF_DYNAMIC(n,p,U64,u64,c)
+#define CF_DOUBLE(n,p)         CF_STATIC(n,p,DOUBLE,double,1)
+#define CF_DOUBLE_ARY(n,p,c)   CF_STATIC(n,p,DOUBLE,double,c)
+#define CF_DOUBLE_DYN(n,p,c)   CF_DYNAMIC(n,p,DOUBLE,double,c)
+#define CF_IP(n,p)             CF_STATIC(n,p,IP,u32,1)
+#define CF_IP_ARY(n,p,c)       CF_STATIC(n,p,IP,u32,c)
+#define CF_IP_DYN(n,p,c)       CF_DYNAMIC(n,p,IP,u32,c)
+#define CF_STRING(n,p)         CF_STATIC(n,p,STRING,char*,1)
+#define CF_STRING_ARY(n,p,c)   CF_STATIC(n,p,STRING,char*,c)
+#define CF_STRING_DYN(n,p,c)   CF_DYNAMIC(n,p,STRING,char*,c)
+#define CF_LOOKUP(n,p,t)       { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t }
+#define CF_USER(n,p,t)         { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t }
+#define CF_USER_ARY(n,p,t,c)   { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+#define CF_USER_DYN(n,p,t,c)   { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+
+/* If you aren't picky about the number of parameters */
+#define CF_ANY_NUM             -0x7fffffff
+
+#define DARY_LEN(a) ((uns*)a)[-1]
+  // length of a dynamic array
+#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a
+  // creates a static instance of a dynamic array
+
+/* Memory allocation: conf-alloc.c */
+struct mempool;
+extern struct mempool *cf_pool;
+void *cf_malloc(uns size);
+void *cf_malloc_zero(uns size);
+char *cf_strdup(const char *s);
+char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2);
+
+/* Undo journal for error recovery: conf-journal.c */
+extern uns cf_need_journal;
+void cf_journal_block(void *ptr, uns len);
+#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var))
+
+/* Declaration: conf-section.c */
+void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown);
+void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero);
+
+/* Parsers for basic types: conf-parse.c */
+char *cf_parse_int(const char *str, int *ptr);
+char *cf_parse_u64(const char *str, u64 *ptr);
+char *cf_parse_double(const char *str, double *ptr);
+char *cf_parse_ip(const char *p, u32 *varp);
+
+#endif
+
diff --git a/lib/config.h b/lib/config.h

new file mode 100644 (file)

index 0000000..e4c3b33
--- /dev/null
+++ b/lib/config.h
@@ -0,0 +1,49 @@
+/*
+ *     UCW Library -- Configuration-Dependent Definitions
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONFIG_H
+#define _UCW_CONFIG_H
+
+/* Configuration switches */
+
+#include "autoconf.h"
+
+/* Tell libc we're going to use all extensions available */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+/* Types (based on standard C99 integers) */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef uint8_t byte;                  /* exactly 8 bits, unsigned */
+typedef uint8_t u8;                    /* exactly 8 bits, unsigned */
+typedef int8_t s8;                     /* exactly 8 bits, signed */
+typedef uint16_t u16;                  /* exactly 16 bits, unsigned */
+typedef int16_t s16;                   /* exactly 16 bits, signed */
+typedef uint32_t u32;                  /* exactly 32 bits, unsigned */
+typedef int32_t s32;                   /* exactly 32 bits, signed */
+typedef uint64_t u64;                  /* exactly 64 bits, unsigned */
+typedef int64_t s64;                   /* exactly 64 bits, signed */
+
+typedef unsigned int uns;              /* at least 32 bits */
+typedef u32 sh_time_t;                 /* seconds since UNIX epoch */
+typedef s64 timestamp_t;               /* milliseconds since UNIX epoch */
+
+#ifdef CONFIG_LARGE_FILES              /* File positions */
+typedef s64 sh_off_t;
+#else
+typedef s32 sh_off_t;
+#endif
+
+#endif
diff --git a/lib/ctmatch.c b/lib/ctmatch.c

new file mode 100644 (file)

index 0000000..7e80776
--- /dev/null
+++ b/lib/ctmatch.c
@@ -0,0 +1,44 @@
+/*
+ *     UCW Library -- Content-Type Pattern Matching
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+int
+match_ct_patt(const char *p, const char *t)
+{
+  if (*p == '*' && !p[1])              /* "*" matches everything */
+    return 1;
+
+  if (*p == '*' && p[1] == '/')                /* "*" on the left-hand side */
+    {
+      while (*t && *t != ' ' && *t != ';' && *t != '/')
+       t++;
+      p += 2;
+    }
+  else                                 /* Normal left-hand side */
+    {
+      while (*p != '/')
+       if (Cupcase(*p++) != Cupcase(*t++))
+         return 0;
+      p++;
+    }
+  if (*t++ != '/')
+    return 0;
+
+  if (*p == '*' && !p[1])              /* "*" on the right-hand side */
+    return 1;
+  while (*p)
+    if (Cupcase(*p++) != Cupcase(*t++))
+      return 0;
+  if (*t && *t != ' ' && *t != ';')
+    return 0;
+
+  return 1;
+}
diff --git a/lib/db-emul.c b/lib/db-emul.c

new file mode 100644 (file)

index 0000000..62540de
--- /dev/null
+++ b/lib/db-emul.c
@@ -0,0 +1,155 @@
+/*
+ *     UCW Library -- SDBM emulator at top of GDBM
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/db.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gdbm.h>
+
+struct sdbm {
+  GDBM_FILE db;
+  datum prevkey;
+};
+
+struct sdbm *
+sdbm_open(struct sdbm_options *o)
+{
+  struct sdbm *d = xmalloc(sizeof(struct sdbm));
+  d->db = gdbm_open(o->name,
+                   (o->page_order ? (1 << o->page_order) : 0),
+                   ((o->flags & SDBM_WRITE) ? ((o->flags & SDBM_CREAT) ? GDBM_WRCREAT : GDBM_WRITER) : GDBM_READER)
+                     | ((o->flags & SDBM_SYNC) ? GDBM_SYNC : 0),
+                   0666,
+                   NULL);
+  if (o->cache_size)
+    gdbm_setopt(d->db, GDBM_CACHESIZE, &o->cache_size, sizeof(o->cache_size));
+  d->prevkey.dptr = NULL;
+  return d;
+}
+
+void
+sdbm_close(struct sdbm *d)
+{
+  sdbm_rewind(d);
+  gdbm_close(d->db);
+  xfree(d);
+}
+
+static int
+sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
+{
+  if (vallen)
+    {
+      if (*vallen < Dl)
+       return 1;
+      *vallen = Dl;
+    }
+  if (val)
+    memcpy(val, D, Dl);
+  return 0;
+}
+
+int
+sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+  datum K, V;
+  int rc;
+
+  K.dptr = key;
+  K.dsize = keylen;
+  V.dptr = val;
+  V.dsize = vallen;
+  rc = gdbm_store(d->db, K, V, GDBM_INSERT);
+  return (rc < 0) ? rc : !rc;
+}
+
+int
+sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+  datum K, V;
+  int rc;
+
+  if (!val)
+    return sdbm_delete(d, key, keylen);
+  K.dptr = key;
+  K.dsize = keylen;
+  V.dptr = val;
+  V.dsize = vallen;
+  rc = gdbm_store(d->db, K, V, GDBM_REPLACE);
+  return (rc < 0) ? rc : !rc;
+}
+
+int
+sdbm_delete(struct sdbm *d, byte *key, uns keylen)
+{
+  datum K;
+
+  K.dptr = key;
+  K.dsize = keylen;
+  return !gdbm_delete(d->db, K);
+}
+
+int
+sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
+{
+  datum K, V;
+  int rc;
+
+  K.dptr = key;
+  K.dsize = keylen;
+  if (!val && !vallen)
+    return gdbm_exists(d->db, K);
+  V = gdbm_fetch(d->db, K);
+  if (!V.dptr)
+    return 0;
+  rc = sdbm_put_user(V.dptr, V.dsize, val, vallen);
+  xfree(V.dptr);
+  return rc ? SDBM_ERROR_TOO_LARGE : 1;
+}
+
+void
+sdbm_rewind(struct sdbm *d)
+{
+  if (d->prevkey.dptr)
+    {
+      xfree(d->prevkey.dptr);
+      d->prevkey.dptr = NULL;
+    }
+}
+
+int
+sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
+{
+  datum K;
+
+  if (d->prevkey.dptr)
+    {
+      K = gdbm_nextkey(d->db, d->prevkey);
+      xfree(d->prevkey.dptr);
+    }
+  else
+    K = gdbm_firstkey(d->db);
+  d->prevkey = K;
+  if (!K.dptr)
+    return 0;
+  if (sdbm_put_user(K.dptr, K.dsize, key, keylen))
+    return SDBM_ERROR_TOO_LARGE;
+  if (val || vallen)
+    return sdbm_fetch(d, key, *keylen, val, vallen);
+  return 1;
+}
+
+void
+sdbm_sync(struct sdbm *d)
+{
+}
diff --git a/lib/db-test.c b/lib/db-test.c

new file mode 100644 (file)

index 0000000..9a6f3bb
--- /dev/null
+++ b/lib/db-test.c
@@ -0,0 +1,475 @@
+/*
+ *     UCW Library -- Database Manager -- Tests and Benchmarks
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#if 1
+#include "lib/db.c"
+#define NAME "SDBM"
+#else
+#include "lib/db-emul.c"
+#define NAME "GDBM"
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/stat.h>
+
+static struct sdbm_options opts = {
+  flags: SDBM_CREAT | SDBM_WRITE,
+  name: "db.test",
+  page_order: 10,
+  cache_size: 1024,
+  key_size: -1,
+  val_size: -1
+};
+
+static struct sdbm *d;
+static int key_min, key_max;           /* min<0 -> URL distribution */
+static int val_min, val_max;
+static int num_keys;                   /* Number of distinct keys */
+static int verbose;
+
+static void
+help(void)
+{
+  printf("Usage: dbtest [<options>] <commands>\n\
+\n\
+Options:\n\
+-c<n>          Use cache of <n> pages\n\
+-p<n>          Use pages of order <n>\n\
+-k<n>          Use key size <n>\n\
+-k<m>-<n>      Use key size uniformly distributed between <m> and <n>\n\
+-kU            Use keys with URL distribution\n\
+-n<n>          Number of distinct keys\n\
+-d<m>[-<n>]    Use specified value size (see -k<m>-<n>)\n\
+-t             Perform the tests on an existing database file\n\
+-v             Be verbose\n\
+-s             Turn on synchronous mode\n\
+-S             Turn on supersynchronous mode\n\
+-F             Turn on fast mode\n\
+\n\
+Commands:\n\
+c              Fill database\n\
+r              Rewrite database\n\
+f[<p>%%][<n>]  Find <n> records with probability of success <p>%% (default=100)\n\
+F[<p>%%][<n>]  Find, but don't fetch values\n\
+d              Delete records\n\
+w              Walk database\n\
+W              Walk, but don't fetch values\n\
+");
+  exit(0);
+}
+
+static uns
+krand(uns kn)
+{
+  return kn * 2000000011;
+}
+
+static uns
+gen_url_size(uns rnd)
+{
+  uns l, m, r;
+  static uns utable[] = {
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 22, 108, 245, 481, 979, 3992, 7648, 13110, 19946, 27256, 34993, 43222, 52859, 64563,
+80626, 117521, 147685, 188364, 233174, 290177, 347132, 407231, 465787, 540931, 628601, 710246, 808671, 922737, 1025691, 1138303,
+1238802, 1344390, 1443843, 1533207, 1636494, 1739082, 1826911, 1910725, 1993940, 2094365, 2188987, 2267827, 2350190, 2441980,
+2520713, 2593654, 2668632, 2736009, 2808356, 2889682, 2959300, 3017945, 3086488, 3146032, 3204818, 3251897, 3307001, 3349388,
+3392798, 3433429, 3476765, 3529107, 3556884, 3585120, 3633005, 3677697, 3699561, 3716660, 3739823, 3765154, 3795096, 3821184,
+3858117, 3908757, 3929095, 3943264, 3957033, 3969588, 3983441, 3994630, 4005413, 4028890, 4039678, 4058007, 4071906, 4087029,
+4094233, 4105259, 4111603, 4120338, 4127364, 4133983, 4140310, 4144843, 4150565, 4155974, 4165132, 4170648, 4176811, 4187118,
+4190866, 4199051, 4206686, 4216122, 4226109, 4233721, 4254123, 4261792, 4270396, 4276650, 4282932, 4291738, 4295932, 4299370,
+4304011, 4307098, 4311866, 4318168, 4325730, 4329774, 4332946, 4336305, 4339770, 4345237, 4349038, 4356129, 4362872, 4366542,
+4371077, 4374524, 4376733, 4378794, 4380652, 4382340, 4383552, 4385952, 4386914, 4393123, 4394106, 4395142, 4396593, 4399112,
+4399909, 4401015, 4401780, 4402616, 4403454, 4404481, 4405231, 4405947, 4406886, 4408364, 4409159, 4409982, 4410872, 4412010,
+4413341, 4414161, 4415673, 4417135, 4418032, 4419117, 4419952, 4420677, 4421387, 4421940, 4422469, 4423210, 4423696, 4424274,
+4424982, 4425665, 4426363, 4427018, 4427969, 4428992, 4429791, 4430804, 4432601, 4433440, 4434157, 4434967, 4436280, 4439784,
+4444255, 4445544, 4446416, 4447620, 4449638, 4453004, 4455470, 4456982, 4457956, 4458617, 4459538, 4460007, 4460377, 4460768,
+4461291, 4461520, 4461678, 4461911, 4462063, 4462239, 4462405, 4462607, 4462666, 4462801, 4462919, 4463108, 4463230, 4463438,
+4463530, 4463698, 4463779, 4463908, 4463991, 4464138, 4464188, 4464391, 4464580, 4464868, 4464980, 4465174, 4465255, 4465473,
+4465529, 4465681, 4465746, 4465916, 4465983, 4466171, 4466248, 4466430, 4466560, 4466751, 4466930, 4467807, 4468847, 4469940,
+4470344, 4470662, 4470716, 4471120, 4471389, 4471814, 4472141, 4472545, 4472687, 4473051, 4473253, 4473603, 4473757, 4474065,
+4474125, 4474354, 4474428, 4474655, 4474705, 4474841, 4474858, 4475133, 4475201, 4475327, 4475367, 4475482, 4475533, 4475576,
+4475586, 4475616, 4475637, 4475659, 4475696, 4475736, 4475775, 4475794, 4476156, 4476711, 4477004, 4477133, 4477189, 4477676,
+4477831, 4477900, 4477973, 4477994, 4478011, 4478040, 4478063, 4478085, 4478468, 4478715, 4479515, 4480034, 4481804, 4483259,
+4483866, 4484202, 4484932, 4485693, 4486184, 4486549, 4486869, 4487405, 4487639, 4487845, 4488086, 4488256, 4488505, 4488714,
+4492669, 4496233, 4497738, 4498122, 4498653, 4499862, 4501169, 4501627, 4501673, 4501811, 4502182, 4502475, 4502533, 4502542,
+4502548, 4502733, 4503389, 4504381, 4505070, 4505378, 4505814, 4506031, 4506336, 4506642, 4506845, 4506971, 4506986, 4507016,
+4507051, 4507098, 4507107, 4507114, 4507139, 4507478, 4507643, 4507674, 4507694, 4507814, 4507894, 4507904, 4507929, 4507989,
+4508023, 4508047, 4508053, 4508063, 4508075, 4508092, 4508104, 4508113, 4508239, 4508285, 4508324, 4508335, 4508340, 4508378,
+4508405, 4508419, 4508436, 4508449, 4508470, 4508488, 4508515, 4508541, 4508564, 4508570, 4508584, 4508594, 4508607, 4508634,
+4508652, 4508665, 4508673, 4508692, 4508704, 4508742, 4508755, 4508773, 4508788, 4508798, 4508832, 4508869, 4508885, 4508905,
+4508915, 4508947, 4508956, 4509061, 4509070, 4509357, 4509368, 4509380, 4509393, 4509401, 4509412, 4509426, 4509438, 4509451,
+4509461, 4509473, 4509489, 4509498, 4509512, 4509537, 4509568, 4509582, 4509621, 4509629, 4509747, 4509766, 4509776, 4509795,
+4509802, 4509813, 4509822, 4509829, 4509834, 4509844, 4509854, 4509863, 4509868, 4509875, 4509886, 4509898, 4509908, 4509920,
+4509932, 4509941, 4509949, 4509955, 4509967, 4509972, 4509979, 4509987, 4509999, 4510002, 4510010, 4510014, 4510018, 4510025,
+4510028, 4510049, 4510055, 4510061, 4510068, 4510079, 4510085, 4510091, 4510098, 4510102, 4510104, 4510110, 4510121, 4510128,
+4510132, 4510138, 4510144, 4510145, 4510153, 4510161, 4510174, 4510196, 4510199, 4510208, 4510209, 4510212, 4510216, 4510217,
+4510219, 4510222, 4510228, 4510231, 4510236, 4510241, 4510245, 4510248, 4510250, 4510254, 4510255, 4510261, 4510262, 4510266,
+4510266, 4510271, 4510285, 4510287, 4510291, 4510295, 4510303, 4510306, 4510308, 4510310, 4510314, 4510319, 4510320, 4510324,
+4510328, 4510333, 4510333, 4510336, 4510340, 4510342, 4510348, 4510353, 4510359, 4510362, 4510365, 4510371, 4510373, 4510375,
+4510378, 4510380, 4510385, 4510389, 4510391, 4510391, 4510394, 4510396, 4510397, 4510398, 4510400, 4510403, 4510406, 4510407,
+4510408, 4510409, 4510411, 4510413, 4510417, 4510417, 4510419, 4510422, 4510426, 4510427, 4510430, 4510435, 4510437, 4510439,
+4510440, 4510442, 4510442, 4510446, 4510447, 4510448, 4510450, 4510451, 4510451, 4510453, 4510454, 4510455, 4510457, 4510460,
+4510460, 4510460, 4510462, 4510463, 4510466, 4510468, 4510472, 4510475, 4510480, 4510482, 4510483, 4510486, 4510488, 4510492,
+4510494, 4510497, 4510497, 4510499, 4510503, 4510505, 4510506, 4510507, 4510509, 4510512, 4510514, 4510527, 4510551, 4510553,
+4510554, 4510555, 4510556, 4510558, 4510561, 4510562, 4510566, 4510567, 4510568, 4510570, 4510573, 4510574, 4510586, 4510603,
+4510605, 4510607, 4510610, 4510610, 4510613, 4510613, 4510614, 4510614, 4510615, 4510616, 4510616, 4510620, 4510622, 4510623,
+4510624, 4510627, 4510628, 4510630, 4510631, 4510632, 4510634, 4510634, 4510634, 4510636, 4510636, 4510639, 4510639, 4510640,
+4510643, 4510647, 4510649, 4510650, 4510653, 4510653, 4510653, 4510653, 4510656, 4510659, 4510661, 4510664, 4510665, 4510669,
+4510672, 4510673, 4510674, 4510675, 4510680, 4510683, 4510684, 4510686, 4510687, 4510690, 4510691, 4510693, 4510693, 4510697,
+4510699, 4510700, 4510703, 4510704, 4510709, 4510711, 4510713, 4510713, 4510720, 4510720, 4510722, 4510724, 4510727, 4510729,
+4510735, 4510735, 4510738, 4510740, 4510744, 4510745, 4510746, 4510748, 4510754, 4510756, 4510758, 4510761, 4510764, 4510766,
+4510768, 4510768, 4510770, 4510770, 4510772, 4510774, 4510775, 4510775, 4510775, 4510776, 4510777, 4510780, 4510782, 4510783,
+4510785, 4510786, 4510788, 4510789, 4510791, 4510793, 4510793, 4510793, 4510795, 4510795, 4510799, 4510803, 4510804, 4510804,
+4510804, 4510805, 4510807, 4510809, 4510811, 4510811, 4510813, 4510815, 4510815, 4510816, 4510819, 4510820, 4510824, 4510827,
+4510829, 4510829, 4510830, 4510833, 4510835, 4510837, 4510838, 4510838, 4510839, 4510840, 4510840, 4510842, 4510842, 4510843,
+4510845, 4510845, 4510845, 4510847, 4510848, 4510848, 4510848, 4510850, 4510853, 4510855, 4510857, 4510859, 4510861, 4510862,
+4510864, 4510865, 4510865, 4510865, 4510869, 4510869, 4510869, 4510869, 4510869, 4510870, 4510870, 4510872, 4510872, 4510873,
+4510874, 4510875, 4510875, 4510877, 4510879, 4510879, 4510879, 4510879, 4510880, 4510881, 4510882, 4510883, 4510884, 4510885,
+4510886, 4510887, 4510890, 4510890, 4510891, 4510892, 4510892, 4510893, 4510893, 4510895, 4510895, 4510896, 4510897, 4510899,
+4510901, 4510901, 4510901, 4510902, 4510903, 4510903, 4510903, 4510905, 4510905, 4510906, 4510906, 4510907, 4510907, 4510909,
+4510910, 4510911, 4510911, 4510911, 4510913, 4510913, 4510914, 4510914, 4510914, 4510915, 4510916, 4510918, 4510918, 4510919,
+4510919, 4510919, 4510920, 4510921, 4510922, 4510923, 4510924, 4510924, 4510924, 4510924, 4510926, 4510927, 4510928, 4510928,
+4510928, 4510928, 4510928, 4510930, 4510933, 4510935, 4510935, 4510935, 4510935, 4510935, 4510936, 4510938, 4510947, 4510966,
+4510967, 4510969, 4510973, 4510973, 4510974, 4510974, 4510974, 4510974, 4510974, 4510974, 4510975, 4510976, 4510976, 4510976,
+4510976, 4510976, 4510976, 4510976, 4510977, 4510979, 4510979, 4510979, 4510979, 4510979, 4510979, 4510980, 4510980, 4510980,
+4510980, 4510981, 4510981, 4510981, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510983, 4510983, 4510984,
+4510984, 4510984, 4510984, 4510984, 4510985, 4510985, 4510985, 4510985, 4510987, 4510987, 4510987, 4510988, 4510988, 4510989,
+4510989, 4510989, 4510989, 4510989, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510991, 4510991, 4510991,
+4510991, 4510991, 4510991, 4510991, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510993, 4510993, 4510993,
+4510994, 4510994, 4510994, 4510994, 4510995, 4510995, 4510996, 4510997, 4510998, 4510999, 4510999, 4511000, 4511000, 4511001,
+4511001, 4511002, 4511002, 4511002, 4511003, 4511004, 4511004, 4511004, 4511004, 4511005, 4511006, 4511008, 4511008, 4511008,
+4511009, 4511009, 4511009, 4511009, 4511010, 4511011, 4511011, 4511012, 4511012, 4511012, 4511012, 4511013, 4511013, 4511014,
+4511014, 4511014, 4511014, 4511015, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511020, 4511020, 4511020,
+4511020, 4511020, 4511020, 4511020, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021,
+4511021
+  };
+
+  rnd %= utable[1024];
+  l = 0; r = 1023;
+  while (l < r)
+    {
+      m = (l+r)/2;
+      if (utable[m] == rnd)
+       return m;
+      if (utable[m] >= rnd)
+       r = m - 1;
+      else
+       l = m + 1;
+    }
+  return l;
+}
+
+static uns
+gen_size(uns min, uns max, uns rnd)
+{
+  if (min == max)
+    return min;
+  else
+    return min + rnd % (max - min + 1);
+}
+
+static void
+gen_random(byte *buf, uns size, uns kn)
+{
+  kn = (kn + 0x36221057) ^ (kn << 24) ^ (kn << 15);
+  while (size--)
+    {
+      *buf++ = kn >> 24;
+      kn = kn*257 + 17;
+    }
+}
+
+static int
+keygen(byte *buf, uns kn)
+{
+  uns size, rnd;
+
+  rnd = krand(kn);
+  if (key_min < 0)
+    size = gen_url_size(rnd);
+  else
+    size = gen_size(key_min, key_max, rnd);
+  *buf++ = kn >> 24;
+  *buf++ = kn >> 16;
+  *buf++ = kn >> 8;
+  *buf++ = kn;
+  if (size < 4)
+    return 4;
+  gen_random(buf, size-4, kn);
+  return size;
+}
+
+static int
+valgen(byte *buf, uns kn)
+{
+  uns size = gen_size(val_min, val_max, krand(kn));
+  gen_random(buf, size, kn);
+  return size;
+}
+
+static uns
+keydec(byte *buf)
+{
+  return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
+static void
+verb(char *msg, ...)
+{
+  int cat = 1;
+  va_list args;
+
+  va_start(args, msg);
+  if (msg[0] == '^' && msg[1])
+    {
+      cat = msg[1] - '0';
+      msg += 2;
+    }
+  if (verbose >= cat)
+    vfprintf(stderr, msg, args);
+  va_end(args);
+}
+
+static void
+parse_size(int *min, int *max, char *c)
+{
+  char *d;
+
+  if ((d = strchr(c, '-')))
+    {
+      *d++ = 0;
+      *min = atol(c);
+      *max = atol(d);
+    }
+  else
+    *min = *max = atol(c);
+}
+
+#define PROGRESS(i) if ((verbose > 2) || (verbose > 1 && !(i & 1023))) fprintf(stderr, "%d\r", i)
+
+int main(int argc, char **argv)
+{
+  int c, i, j, k, l, m;
+  byte kb[2048], vb[2048], vb2[2048];
+  uns ks, vs, vs2, perc, cnt;
+  char *ch;
+  int dont_delete = 0;
+  timestamp_t timer;
+
+  log_init("dbtest");
+  setvbuf(stdout, NULL, _IONBF, 0);
+  setvbuf(stderr, NULL, _IONBF, 0);
+  while ((c = getopt(argc, argv, "c:p:k:n:d:vsStF")) >= 0)
+    switch (c)
+      {
+      case 'c':
+       opts.cache_size = atol(optarg);
+       break;
+      case 'p':
+       opts.page_order = atol(optarg);
+       break;
+      case 'k':
+       if (!strcmp(optarg, "U"))
+         key_min = key_max = -1;
+       else
+         parse_size(&key_min, &key_max, optarg);
+       break;
+      case 'n':
+       num_keys = atol(optarg);
+       break;
+      case 'd':
+       parse_size(&val_min, &val_max, optarg);
+       break;
+      case 'v':
+       verbose++;
+       break;
+      case 's':
+       opts.flags |= SDBM_SYNC;
+       break;
+      case 'S':
+       opts.flags |= SDBM_SYNC | SDBM_FSYNC;
+       break;
+      case 'F':
+       opts.flags |= SDBM_FAST;
+       break;
+      case 't':
+       dont_delete = 1;
+       break;
+      default:
+       help();
+      }
+
+  if (key_min >= 0 && key_min < 4)
+    key_min = key_max = 4;
+  if (key_min == key_max && key_min >= 0)
+    opts.key_size = key_min;
+  if (val_min == val_max)
+    opts.val_size = val_min;
+  if (!num_keys)
+    die("Number of keys not given");
+
+  printf(NAME " benchmark: %d records, keys ", num_keys);
+  if (key_min < 0)
+    printf("<URL>");
+  else
+    printf("%d-%d", key_min, key_max);
+  printf(", values %d-%d, page size %d, cache %d pages\n", val_min, val_max, 1 << opts.page_order, opts.cache_size);
+
+  verb("OPEN(%s, key=%d, val=%d, cache=%d, pgorder=%d)\n", opts.name, opts.key_size, opts.val_size,
+       opts.cache_size, opts.page_order);
+  if (!dont_delete)
+    unlink(opts.name);
+  d = sdbm_open(&opts);
+  if (!d)
+    die("open failed: %m");
+
+  while (optind < argc)
+    {
+      char *o = argv[optind++];
+      init_timer(&timer);
+      switch (*o)
+       {
+       case 'c':
+         printf("create %d: ", num_keys);
+         for(i=0; i<num_keys; i++)
+           {
+             PROGRESS(i);
+             ks = keygen(kb, i);
+             vs = valgen(vb, i);
+             if (sdbm_store(d, kb, ks, vb, vs) != 1) die("store failed");
+           }
+         break;
+       case 'r':
+         printf("rewrite %d: ", num_keys);
+         for(i=0; i<num_keys; i++)
+           {
+             PROGRESS(i);
+             ks = keygen(kb, i);
+             vs = valgen(vb, i);
+             if (sdbm_replace(d, kb, ks, vb, vs) != 1) die("replace failed");
+           }
+         break;
+       case 'f':
+       case 'F':
+         c = (*o++ == 'f');
+         if ((ch = strchr(o, '%')))
+           {
+             *ch++ = 0;
+             perc = atol(o);
+           }
+         else
+           {
+             ch = o;
+             perc = 100;
+           }
+         cnt = atol(ch);
+         if (!cnt)
+           {
+             cnt = num_keys;
+             m = (perc == 100);
+           }
+         else
+           m = 0;
+         printf("%s fetch %d (%d%% success, with%s values): ", (m ? "sequential" : "random"), cnt, perc, (c ? "" : "out"));
+         i = -1;
+         while (cnt--)
+           {
+             if (m)
+               i++;
+             else
+               i = random_max(num_keys) + ((random_max(100) < perc) ? 0 : num_keys);
+             PROGRESS(i);
+             ks = keygen(kb, i);
+             if (c)
+               {
+                 vs2 = sizeof(vb2);
+                 j = sdbm_fetch(d, kb, ks, vb2, &vs2);
+               }
+             else
+               j = sdbm_fetch(d, kb, ks, NULL, NULL);
+             if (j < 0)
+               die("fetch: error %d", j);
+             if ((i < num_keys) != j)
+               die("fetch mismatch at key %d, res %d", i, j);
+             if (c && j)
+               {
+                 vs = valgen(vb, i);
+                 if (vs != vs2 || memcmp(vb, vb2, vs))
+                   die("fetch data mismatch at key %d: %d,%d", i, vs, vs2);
+               }
+           }
+         break;
+       case 'd':
+         printf("delete %d: ", num_keys);
+         for(i=0; i<num_keys; i++)
+           {
+             PROGRESS(i);
+             ks = keygen(kb, i);
+             if (sdbm_delete(d, kb, ks) != 1) die("delete failed");
+           }
+         break;
+       case 'w':
+       case 'W':
+         c = (*o == 'w');
+         i = k = l = m = 0;
+         printf("walk %d (with%s keys): ", num_keys, (c ? "" : "out"));
+         sdbm_rewind(d);
+         for(;;)
+           {
+             ks = sizeof(kb);
+             vs = sizeof(vb);
+             if (c)
+               j = sdbm_get_next(d, kb, &ks, vb, &vs);
+             else
+               j = sdbm_get_next(d, kb, &ks, NULL, NULL);
+             if (!j)
+               break;
+             if (ks < 4)
+               die("get_next: too short");
+             i = keydec(kb);
+             if (i < 0 || i >= num_keys)
+               die("get_next: %d out of range", i);
+             PROGRESS(i);
+             vs2 = keygen(vb2, i);
+             if (ks != vs2 || memcmp(kb, vb2, ks))
+               die("get_next: key mismatch at %d", i);
+             if (c)
+               {
+                 vs2 = valgen(vb2, i);
+                 if (vs != vs2 || memcmp(vb, vb2, vs))
+                   die("get_next: data mismatch at %d", i);
+               }
+             l += k;
+             m += i;
+             k++;
+           }
+         if (k != num_keys)
+           die("fetch: wrong # of keys: %d != %d", k, num_keys);
+         if (l != m)
+           die("fetch: wrong checksum: %d != %d", l, m);
+         break;
+       default:
+         help();
+       }
+      sdbm_sync(d);
+      printf("%d ms\n", get_timer(&timer));
+    }
+
+  verb("CLOSE\n");
+  sdbm_close(d);
+
+  {
+    struct stat st;
+    if (stat(opts.name, &st)) die("stat: %m");
+    printf("file size: %d bytes\n", (int) st.st_size);
+  }
+  return 0;
+}
diff --git a/lib/db-tool.c b/lib/db-tool.c

new file mode 100644 (file)

index 0000000..bbb419a
--- /dev/null
+++ b/lib/db-tool.c
@@ -0,0 +1,264 @@
+/*
+ *     SDBM Database Utility
+ *
+ *     (c) 2000--2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/db.h"
+#include "lib/db_internal.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+static int verbose=0;
+static int cache=1024;
+static int force_key=-2;
+static int force_val=-2;
+static int force_page=-1;
+
+#define SDBM_DUMP_MAGIC 0x321f120e
+#define SDBM_DUMP_VERSION 1
+
+static void
+dump(char *db, char *dmp)
+{
+  struct sdbm *src;
+  struct fastbuf *dest;
+  struct sdbm_options op;
+  int e, c=0;
+
+  bzero(&op, sizeof(op));
+  op.name = db;
+  op.cache_size = 16;
+  op.flags = 0;
+  src = sdbm_open(&op);
+  if (!src)
+    die("Source open failed: %m");
+
+  dest = bopen(dmp, O_WRONLY | O_CREAT | O_TRUNC, 65536);
+  bputl(dest, SDBM_DUMP_MAGIC);
+  bputl(dest, SDBM_DUMP_VERSION);
+  bputl(dest, src->page_order);
+  bputl(dest, src->key_size);
+  bputl(dest, src->val_size);
+
+  fprintf(stderr, "Dumping database...\n");
+  sdbm_rewind(src);
+  for(;;)
+    {
+      byte key[65536], val[65536];
+      int klen = sizeof(key);
+      int vlen = sizeof(val);
+      e = sdbm_get_next(src, key, &klen, val, &vlen);
+      if (!e)
+       break;
+      if (e < 0)
+       fprintf(stderr, "sdbm_get_next: error %d\n", e);
+      if (!(c++ % 1024))
+       {
+         fprintf(stderr, "%d\r", c);
+         fflush(stderr);
+       }
+      bputw(dest, klen);
+      bwrite(dest, key, klen);
+      bputw(dest, vlen);
+      bwrite(dest, val, vlen);
+    }
+
+  sdbm_close(src);
+  bclose(dest);
+  fprintf(stderr, "Dumped %d records\n", c);
+}
+
+static void
+restore(char *dmp, char *db)
+{
+  struct sdbm *dest;
+  struct fastbuf *src;
+  struct sdbm_options op;
+  int e, c=0;
+
+  src = bopen(dmp, O_RDONLY, 65536);
+  if (bgetl(src) != SDBM_DUMP_MAGIC ||
+      bgetl(src) != SDBM_DUMP_VERSION)
+    die("%s: not a sdbm dump", dmp);
+
+  bzero(&op, sizeof(op));
+  op.name = db;
+  e = unlink(op.name);
+  if (e < 0 && errno != ENOENT)
+    die("unlink: %m");
+  op.cache_size = cache;
+  op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
+  op.page_order = bgetl(src);
+  if (force_page >= 0)
+    op.page_order = force_page;
+  op.key_size = bgetl(src);
+  if (force_key >= 0)
+    op.key_size = force_key;
+  op.val_size = bgetl(src);
+  if (force_val >= 0)
+    op.val_size = force_val;
+  dest = sdbm_open(&op);
+  if (!dest)
+    die("Destination open failed");
+
+  fprintf(stderr, "Restoring database...\n");
+  for(;;)
+    {
+      byte key[65536], val[65536];
+      int klen, vlen;
+      klen = bgetw(src);
+      if (klen < 0)
+       break;
+      breadb(src, key, klen);
+      vlen = bgetw(src);
+      if (vlen < 0)
+       die("Corrupted dump file: value missing");
+      breadb(src, val, vlen);
+      if (!(c++ % 1024))
+       {
+         fprintf(stderr, "%d\r", c);
+         fflush(stderr);
+       }
+      if (sdbm_store(dest, key, klen, val, vlen) == 0)
+       fprintf(stderr, "sdbm_store: duplicate key\n");
+    }
+
+  bclose(src);
+  sdbm_close(dest);
+  fprintf(stderr, "Restored %d records\n", c);
+}
+
+static void
+rebuild(char *sdb, char *ddb)
+{
+  struct sdbm *src, *dest;
+  struct sdbm_options op;
+  int e, c=0;
+
+  bzero(&op, sizeof(op));
+  op.name = sdb;
+  op.cache_size = 16;
+  op.flags = 0;
+  src = sdbm_open(&op);
+  if (!src)
+    die("Source open failed: %m");
+
+  op.name = ddb;
+  e = unlink(op.name);
+  if (e < 0 && errno != ENOENT)
+    die("unlink: %m");
+  op.cache_size = cache;
+  op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
+  op.page_order = (force_page >= 0) ? (u32) force_page : src->root->page_order;
+  op.key_size = (force_key >= -1) ? force_key : src->root->key_size;
+  op.val_size = (force_val >= -1) ? force_val : src->root->val_size;
+  dest = sdbm_open(&op);
+  if (!dest)
+    die("Destination open failed");
+
+  fprintf(stderr, "Rebuilding database...\n");
+  sdbm_rewind(src);
+  for(;;)
+    {
+      byte key[65536], val[65536];
+      int klen = sizeof(key);
+      int vlen = sizeof(val);
+      e = sdbm_get_next(src, key, &klen, val, &vlen);
+      if (!e)
+       break;
+      if (e < 0)
+       fprintf(stderr, "sdbm_get_next: error %d\n", e);
+      if (!(c++ % 1024))
+       {
+         fprintf(stderr, "%d\r", c);
+         fflush(stderr);
+       }
+      if (sdbm_store(dest, key, klen, val, vlen) == 0)
+       fprintf(stderr, "sdbm_store: duplicate key\n");
+    }
+
+  sdbm_close(src);
+  sdbm_close(dest);
+  fprintf(stderr, "Copied %d records\n", c);
+}
+
+int
+main(int argc, char **argv)
+{
+  int o;
+
+  while ((o = getopt(argc, argv, "vc:k:d:p:")) >= 0)
+    switch (o)
+      {
+      case 'v':
+       verbose++;
+       break;
+      case 'c':
+       cache=atol(optarg);
+       break;
+      case 'k':
+       force_key=atol(optarg);
+       break;
+      case 'd':
+       force_val=atol(optarg);
+       break;
+      case 'p':
+       force_page=atol(optarg);
+       break;
+      default:
+      bad:
+        fprintf(stderr, "Usage: db-tool [<options>] <command> <database>\n\
+\n\
+Options:\n\
+-v\t\tBe verbose\n\
+-c<n>\t\tUse cache of <n> pages\n\
+-d<n>\t\tSet data size to <n> (-1=variable) [restore,rebuild]\n\
+-k<n>\t\tSet key size to <n> (-1=variable) [restore,rebuild]\n\
+-p<n>\t\tSet page order to <n> [restore,rebuild]\n\
+\n\
+Commands:\n\
+b <db> <new>\tRebuild database\n\
+d <db> <dump>\tDump database\n\
+r <dump> <db>\tRestore database from dump\n\
+");
+       return 1;
+      }
+  argc -= optind;
+  argv += optind;
+  if (argc < 1 || strlen(argv[0]) != 1)
+    goto bad;
+
+  switch (argv[0][0])
+    {
+    case 'b':
+      if (argc != 3)
+       goto bad;
+      rebuild(argv[1], argv[2]);
+      break;
+    case 'd':
+      if (argc != 3)
+       goto bad;
+      dump(argv[1], argv[2]);
+      break;
+    case 'r':
+      if (argc != 3)
+       goto bad;
+      restore(argv[1], argv[2]);
+      break;
+    default:
+      goto bad;
+    }
+  return 0;
+}
diff --git a/lib/db.c b/lib/db.c

new file mode 100644 (file)

index 0000000..d9c984b
--- /dev/null
+++ b/lib/db.c
@@ -0,0 +1,598 @@
+/*
+ *     UCW Library -- Fast Database Management Routines
+ *
+ *     (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This library uses the standard algorithm for external hashing (page directory
+ *  mapping topmost K bits of hash value to page address, directory splits and
+ *  so on). Peculiarities of this implementation (aka design decisions):
+ *
+ *   o We allow both fixed and variable length keys and values (this includes
+ *     zero size values for cases you want to represent only a set of keys).
+ *   o We assume that key_size + val_size < page_size.
+ *   o We never shrink the directory nor free empty pages. (The reason is that
+ *     if the database was once large, it's likely it will again become large soon.)
+ *   o The only pages which can be freed are those of the directory (during
+ *     directory split), so we keep only a simple 32-entry free block list
+ *     and we assume it's sorted.
+ *   o  All pointers are always given in pages from start of the file.
+ *     This gives us page_size*2^32 limit for file size which should be enough.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/pagecache.h"
+#include "lib/db.h"
+#include "lib/db_internal.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define GET_PAGE(d,x) pgc_get((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define GET_ZERO_PAGE(d,x) pgc_get_zero((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define READ_PAGE(d,x) pgc_read((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
+#define READ_DIR(d,off) pgc_read((d)->cache, (d)->fd, (((sh_off_t)(d)->root->dir_start) << (d)->page_order) + (off))
+
+struct sdbm *
+sdbm_open(struct sdbm_options *o)
+{
+  struct sdbm *d;
+  struct sdbm_root root, *r;
+  uns cache_size = o->cache_size ? o->cache_size : 16;
+
+  d = xmalloc_zero(sizeof(struct sdbm));
+  d->flags = o->flags;
+  d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666);
+  if (d->fd >= 0)                      /* Already exists, let's check it */
+    {
+      if (read(d->fd, &root, sizeof(root)) != sizeof(root))
+       goto bad;
+      if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION)
+       goto bad;
+      d->file_size = sh_seek(d->fd, 0, SEEK_END) >> root.page_order;
+      d->page_order = root.page_order;
+      d->page_size = 1 << root.page_order;
+      d->cache = pgc_open(d->page_size, cache_size);
+      d->root_page = pgc_read(d->cache, d->fd, 0);
+      d->root = (void *) d->root_page->data;
+    }
+  else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0)
+    {
+      struct page *q;
+      uns page_order = o->page_order;
+      if (page_order < 10)
+       page_order = 10;
+      d->page_size = 1 << page_order;
+      d->cache = pgc_open(d->page_size, cache_size);
+      d->root_page = GET_ZERO_PAGE(d, 0);
+      r = d->root = (void *) d->root_page->data;                /* Build root page */
+      r->magic = SDBM_MAGIC;
+      r->version = SDBM_VERSION;
+      r->page_order = d->page_order = page_order;
+      r->key_size = o->key_size;
+      r->val_size = o->val_size;
+      r->dir_start = 1;
+      r->dir_order = 0;
+      d->file_size = 3;
+      q = GET_ZERO_PAGE(d, 1);                                 /* Build page directory */
+      GET32(q->data, 0) = 2;
+      pgc_put(d->cache, q);
+      q = GET_ZERO_PAGE(d, 2);                                 /* Build single data page */
+      pgc_put(d->cache, q);
+    }
+  else
+    goto bad;
+  d->dir_size = 1 << d->root->dir_order;
+  d->dir_shift = 32 - d->root->dir_order;
+  d->page_mask = d->page_size - 1;
+  d->key_size = d->root->key_size;
+  d->val_size = d->root->val_size;
+  return d;
+
+bad:
+  sdbm_close(d);
+  return NULL;
+}
+
+void
+sdbm_close(struct sdbm *d)
+{
+  if (d->root_page)
+    pgc_put(d->cache, d->root_page);
+  if (d->cache)
+    pgc_close(d->cache);
+  if (d->fd >= 0)
+    close(d->fd);
+  xfree(d);
+}
+
+static uns
+sdbm_alloc_pages(struct sdbm *d, uns number)
+{
+  uns where = d->file_size;
+  if (where + number < where)  /* Wrap around? */
+    die("SDB: Database file too large, giving up");
+  d->file_size += number;
+  return where;
+}
+
+static uns
+sdbm_alloc_page(struct sdbm *d)
+{
+  uns pos;
+
+  if (!d->root->free_pool[0].count)
+    return sdbm_alloc_pages(d, 1);
+  pos = d->root->free_pool[0].first;
+  d->root->free_pool[0].first++;
+  if (!--d->root->free_pool[0].count)
+    {
+      memmove(d->root->free_pool, d->root->free_pool+1, (SDBM_NUM_FREE_PAGE_POOLS-1) * sizeof(d->root->free_pool[0]));
+      d->root->free_pool[SDBM_NUM_FREE_PAGE_POOLS-1].count = 0;
+    }
+  pgc_mark_dirty(d->cache, d->root_page);
+  return pos;
+}
+
+static void
+sdbm_free_pages(struct sdbm *d, uns start, uns number)
+{
+  uns i = 0;
+
+  while (d->root->free_pool[i].count)
+    i++;
+  ASSERT(i < SDBM_NUM_FREE_PAGE_POOLS);
+  d->root->free_pool[i].first = start;
+  d->root->free_pool[i].count = number;
+  pgc_mark_dirty(d->cache, d->root_page);
+}
+
+u32
+sdbm_hash(byte *key, uns keylen)
+{
+  /*
+   *  This used to be the same hash function as GDBM uses,
+   *  but it turned out that it tends to give the same results
+   *  on similar keys. Damn it.
+   */
+  u32 value = 0x238F13AF * keylen;
+  while (keylen--)
+    value = 37*value + *key++;
+  return (1103515243 * value + 12345);
+}
+
+static int
+sdbm_get_entry(struct sdbm *d, byte *pos, byte **key, uns *keylen, byte **val, uns *vallen)
+{
+  byte *p = pos;
+
+  if (d->key_size >= 0)
+    *keylen = d->key_size;
+  else
+    {
+      *keylen = (p[0] << 8) | p[1];
+      p += 2;
+    }
+  *key = p;
+  p += *keylen;
+  if (d->val_size >= 0)
+    *vallen = d->val_size;
+  else
+    {
+      *vallen = (p[0] << 8) | p[1];
+      p += 2;
+    }
+  *val = p;
+  p += *vallen;
+  return p - pos;
+}
+
+static int
+sdbm_entry_len(struct sdbm *d, uns keylen, uns vallen)
+{
+  uns len = keylen + vallen;
+  if (d->key_size < 0)
+    len += 2;
+  if (d->val_size < 0)
+    len += 2;
+  return len;
+}
+
+static void
+sdbm_store_entry(struct sdbm *d, byte *pos, byte *key, uns keylen, byte *val, uns vallen)
+{
+  if (d->key_size < 0)
+    {
+      *pos++ = keylen >> 8;
+      *pos++ = keylen;
+    }
+  memmove(pos, key, keylen);
+  pos += keylen;
+  if (d->val_size < 0)
+    {
+      *pos++ = vallen >> 8;
+      *pos++ = vallen;
+    }
+  memmove(pos, val, vallen);
+}
+
+static uns
+sdbm_page_rank(struct sdbm *d, uns dirpos)
+{
+  struct page *b;
+  u32 pg, x;
+  uns l, r;
+  uns pm = d->page_mask;
+
+  b = READ_DIR(d, dirpos & ~pm);
+  pg = GET32(b->data, dirpos & pm);
+  l = dirpos;
+  while ((l & pm) && GET32(b->data, (l - 4) & pm) == pg)
+    l -= 4;
+  r = dirpos + 4;
+  /* We heavily depend on unused directory entries being zero */
+  while ((r & pm) && GET32(b->data, r & pm) == pg)
+    r += 4;
+  pgc_put(d->cache, b);
+
+  if (!(l & pm) && !(r & pm))
+    {
+      /* Note that if it spans page boundary, it must contain an integer number of pages */
+      while (l)
+       {
+         b = READ_DIR(d, (l - 4) & ~pm);
+         x = GET32(b->data, 0);
+         pgc_put(d->cache, b);
+         if (x != pg)
+           break;
+         l -= d->page_size;
+       }
+      while (r < 4*d->dir_size)
+       {
+         b = READ_DIR(d, r & ~pm);
+         x = GET32(b->data, 0);
+         pgc_put(d->cache, b);
+         if (x != pg)
+           break;
+         r += d->page_size;
+       }
+    }
+  return (r - l) >> 2;
+}
+
+static void
+sdbm_expand_directory(struct sdbm *d)
+{
+  struct page *b, *c;
+  int i, ent;
+  u32 *dir, *t;
+
+  if (d->root->dir_order >= 31)
+    die("SDB: Database directory too large, giving up");
+
+  if (4*d->dir_size < d->page_size)
+    {
+      /* It still fits within single page */
+      b = READ_DIR(d, 0);
+      dir = (u32 *) b->data;
+      for(i=d->dir_size-1; i>=0; i--)
+       dir[2*i] = dir[2*i+1] = dir[i];
+      pgc_mark_dirty(d->cache, b);
+      pgc_put(d->cache, b);
+    }
+  else
+    {
+      uns old_dir = d->root->dir_start;
+      uns old_dir_pages = 1 << (d->root->dir_order + 2 - d->page_order);
+      uns page, new_dir;
+      new_dir = d->root->dir_start = sdbm_alloc_pages(d, 2*old_dir_pages);
+      ent = 1 << (d->page_order - 3);
+      for(page=0; page < old_dir_pages; page++)
+       {
+         b = READ_PAGE(d, old_dir + page);
+         dir = (u32 *) b->data;
+         c = GET_PAGE(d, new_dir + 2*page);
+         t = (u32 *) c->data;
+         for(i=0; i<ent; i++)
+           t[2*i] = t[2*i+1] = dir[i];
+         pgc_put(d->cache, c);
+         c = GET_PAGE(d, new_dir + 2*page + 1);
+         t = (u32 *) c->data;
+         for(i=0; i<ent; i++)
+           t[2*i] = t[2*i+1] = dir[ent+i];
+         pgc_put(d->cache, c);
+         pgc_put(d->cache, b);
+       }
+      if (!(d->flags & SDBM_FAST))
+       {
+         /*
+          *  Unless in super-fast mode, fill old directory pages with zeroes.
+          *  This slows us down a bit, but allows database reconstruction after
+          *  the free list is lost.
+          */
+         for(page=0; page < old_dir_pages; page++)
+           {
+             b = GET_ZERO_PAGE(d, old_dir + page);
+             pgc_put(d->cache, b);
+           }
+       }
+      sdbm_free_pages(d, old_dir, old_dir_pages);
+    }
+
+  d->root->dir_order++;
+  d->dir_size = 1 << d->root->dir_order;
+  d->dir_shift = 32 - d->root->dir_order;
+  pgc_mark_dirty(d->cache, d->root_page);
+  if (!(d->flags & SDBM_FAST))
+    sdbm_sync(d);
+}
+
+static void
+sdbm_split_data(struct sdbm *d, struct sdbm_bucket *s, struct sdbm_bucket *d0, struct sdbm_bucket *d1, uns sigbit)
+{
+  byte *sp = s->data;
+  byte *dp[2] = { d0->data, d1->data };
+  byte *K, *D;
+  uns Kl, Dl, sz, i;
+
+  while (sp < s->data + s->used)
+    {
+      sz = sdbm_get_entry(d, sp, &K, &Kl, &D, &Dl);
+      sp += sz;
+      i = (sdbm_hash(K, Kl) & (1 << sigbit)) ? 1 : 0;
+      sdbm_store_entry(d, dp[i], K, Kl, D, Dl);
+      dp[i] += sz;
+    }
+  d0->used = dp[0] - d0->data;
+  d1->used = dp[1] - d1->data;
+}
+
+static void
+sdbm_split_dir(struct sdbm *d, uns dirpos, uns count, uns pos)
+{
+  struct page *b;
+  uns i;
+
+  count *= 4;
+  while (count)
+    {
+      b = READ_DIR(d, dirpos & ~d->page_mask);
+      i = d->page_size - (dirpos & d->page_mask);
+      if (i > count)
+       i = count;
+      count -= i;
+      while (i)
+       {
+         GET32(b->data, dirpos & d->page_mask) = pos;
+         dirpos += 4;
+         i -= 4;
+       }
+      pgc_mark_dirty(d->cache, b);
+      pgc_put(d->cache, b);
+    }
+}
+
+static inline uns
+sdbm_dirpos(struct sdbm *d, uns hash)
+{
+  if (d->dir_shift != 32)              /* avoid shifting by 32 bits */
+    return (hash >> d->dir_shift) << 2;        /* offset in the directory */
+  else
+    return 0;
+}
+
+static struct page *
+sdbm_split_page(struct sdbm *d, struct page *b, u32 hash)
+{
+  struct page *p[2];
+  uns i, rank, sigbit, rank_log, dirpos, newpg;
+
+  dirpos = sdbm_dirpos(d, hash);
+  rank = sdbm_page_rank(d, dirpos);    /* rank = # of pointers to this page */
+  if (rank == 1)
+    {
+      sdbm_expand_directory(d);
+      rank = 2;
+      dirpos *= 2;
+    }
+  rank_log = 1;                                /* rank_log = log2(rank) */
+  while ((1U << rank_log) < rank)
+    rank_log++;
+  sigbit = d->dir_shift + rank_log - 1;        /* sigbit = bit we split on */
+  p[0] = b;
+  newpg = sdbm_alloc_page(d);
+  p[1] = GET_PAGE(d, newpg);
+  sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit);
+  sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, newpg);
+  pgc_mark_dirty(d->cache, p[0]);
+  i = (hash & (1 << sigbit)) ? 1 : 0;
+  pgc_put(d->cache, p[!i]);
+  return p[i];
+}
+
+static int
+sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
+{
+  if (vallen)
+    {
+      if (*vallen < Dl)
+       return 1;
+      *vallen = Dl;
+    }
+  if (val)
+    memcpy(val, D, Dl);
+  return 0;
+}
+
+static int
+sdbm_access(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen, uns mode)   /* 0=read, 1=store, 2=replace */
+{
+  struct page *p, *q;
+  u32 hash, h, pos, size;
+  struct sdbm_bucket *b;
+  byte *c, *e;
+  int rc;
+
+  if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535)
+    return SDBM_ERROR_BAD_KEY_SIZE;
+  if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode)
+    return SDBM_ERROR_BAD_VAL_SIZE;
+  if (!mode && !(d->flags & SDBM_WRITE))
+    return SDBM_ERROR_READ_ONLY;
+  hash = sdbm_hash(key, keylen);
+  h = sdbm_dirpos(d, hash);
+  p = READ_DIR(d, h & ~d->page_mask);
+  pos = GET32(p->data, h & d->page_mask);
+  pgc_put(d->cache, p);
+  q = READ_PAGE(d, pos);
+  b = (void *) q->data;
+  c = b->data;
+  e = c + b->used;
+  while (c < e)
+    {
+      byte *K, *D;
+      uns Kl, Dl, s;
+      s = sdbm_get_entry(d, c, &K, &Kl, &D, &Dl);
+      if (Kl == keylen && !memcmp(K, key, Kl))
+       {
+         /* Gotcha! */
+         switch (mode)
+           {
+           case 0:                     /* fetch: found */
+             rc = sdbm_put_user(D, Dl, val, vallen);
+             pgc_put(d->cache, q);
+             return rc ? SDBM_ERROR_TOO_LARGE : 1;
+           case 1:                     /* store: already present */
+             pgc_put(d->cache, q);
+             return 0;
+           default:                    /* replace: delete the old one */
+             memmove(c, c+s, e-(c+s));
+             b->used -= s;
+             goto insert;
+           }
+       }
+      c += s;
+    }
+  if (!mode || !val)           /* fetch or delete: no success */
+    {
+      pgc_put(d->cache, q);
+      return 0;
+    }
+
+insert:
+  if (val)
+    {
+      size = sdbm_entry_len(d, keylen, *vallen);
+      while (b->used + size > d->page_size - sizeof(struct sdbm_bucket))
+       {
+         /* Page overflow, need to split */
+         if (size >= d->page_size - sizeof(struct sdbm_bucket))
+           {
+             pgc_put(d->cache, q);
+             return SDBM_ERROR_GIANT;
+           }
+         q = sdbm_split_page(d, q, hash);
+         b = (void *) q->data;
+       }
+      sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen);
+      b->used += size;
+    }
+  pgc_mark_dirty(d->cache, q);
+  pgc_put(d->cache, q);
+  if (d->flags & SDBM_SYNC)
+    sdbm_sync(d);
+  return 1;
+}
+
+int
+sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+  return sdbm_access(d, key, keylen, val, &vallen, 1);
+}
+
+int
+sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
+{
+  return sdbm_access(d, key, keylen, val, &vallen, 2);
+}
+
+int
+sdbm_delete(struct sdbm *d, byte *key, uns keylen)
+{
+  return sdbm_access(d, key, keylen, NULL, NULL, 2);
+}
+
+int
+sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
+{
+  return sdbm_access(d, key, keylen, val, vallen, 0);
+}
+
+void
+sdbm_rewind(struct sdbm *d)
+{
+  d->find_page = 1;
+  d->find_pos = 0;
+  d->find_free_list = 0;
+}
+
+int
+sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
+{
+  uns page = d->find_page;
+  uns pos = d->find_pos;
+  byte *K, *V;
+  uns c, Kl, Vl;
+  struct page *p;
+  struct sdbm_bucket *b;
+
+  for(;;)
+    {
+      if (!pos)
+       {
+         if (page >= d->file_size)
+           break;
+         if (page == d->root->dir_start)
+           page += (4*d->dir_size + d->page_size - 1) >> d->page_order;
+         else if (page == d->root->free_pool[d->find_free_list].first)
+           page += d->root->free_pool[d->find_free_list++].count;
+         else
+           pos = 4;
+         continue;
+       }
+      p = READ_PAGE(d, page);
+      b = (void *) p->data;
+      if (pos - 4 >= b->used)
+       {
+         pos = 0;
+         page++;
+         pgc_put(d->cache, p);
+         continue;
+       }
+      c = sdbm_get_entry(d, p->data + pos, &K, &Kl, &V, &Vl);
+      d->find_page = page;
+      d->find_pos = pos + c;
+      c = sdbm_put_user(K, Kl, key, keylen) ||
+         sdbm_put_user(V, Vl, val, vallen);
+      pgc_put(d->cache, p);
+      return c ? SDBM_ERROR_TOO_LARGE : 1;
+    }
+  d->find_page = page;
+  d->find_pos = pos;
+  return 0;
+}
+
+void
+sdbm_sync(struct sdbm *d)
+{
+  pgc_flush(d->cache);
+  if (d->flags & SDBM_FSYNC)
+    fsync(d->fd);
+}
diff --git a/lib/db.h b/lib/db.h

new file mode 100644 (file)

index 0000000..41b81aa
--- /dev/null
+++ b/lib/db.h
@@ -0,0 +1,50 @@
+/*
+ *     UCW Library -- Fast Database Management Routines
+ *
+ *     (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_DB_H
+#define _UCW_DB_H
+
+struct sdbm;
+
+struct sdbm_options {                  /* Set to 0 for default */
+  char *name;                          /* File name */
+  uns flags;                           /* See SDBM_xxx below */
+  uns page_order;                      /* Binary logarithm of file page size */
+  uns cache_size;                      /* Number of cached pages */
+  int key_size;                                /* Key size, -1=variable */
+  int val_size;                                /* Value size, -1=variable */
+};
+
+struct sdbm *sdbm_open(struct sdbm_options *);
+void sdbm_close(struct sdbm *);
+int sdbm_store(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen);
+int sdbm_replace(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); /* val == NULL -> delete */
+int sdbm_delete(struct sdbm *, byte *key, uns keylen);
+int sdbm_fetch(struct sdbm *, byte *key, uns keylen, byte *val, uns *vallen);          /* val can be NULL */
+void sdbm_rewind(struct sdbm *);
+int sdbm_get_next(struct sdbm *, byte *key, uns *keylen, byte *val, uns *vallen);      /* val can be NULL */
+void sdbm_sync(struct sdbm *);
+u32 sdbm_hash(byte *key, uns keylen);
+
+#define SDBM_CREAT             1       /* Create the database if it doesn't exist */
+#define SDBM_WRITE             2       /* Open the database in read/write mode */
+#define SDBM_SYNC              4       /* Sync after each operation */
+#define SDBM_FAST              8       /* Don't sync on directory splits -- results in slightly faster
+                                        * operation, but reconstruction of database after program crash
+                                        * may be impossible.
+                                        */
+#define SDBM_FSYNC             16      /* When syncing, call fsync() */
+
+#define SDBM_ERROR_BAD_KEY_SIZE -1     /* Fixed key size doesn't match */
+#define SDBM_ERROR_BAD_VAL_SIZE -2     /* Fixed value size doesn't match */
+#define SDBM_ERROR_TOO_LARGE   -3      /* Key/value doesn't fit in buffer supplied */
+#define SDBM_ERROR_READ_ONLY   -4      /* Database has been opened read only */
+#define SDBM_ERROR_GIANT       -5      /* Key/value too large to fit in a page */
+
+#endif
diff --git a/lib/db_internal.h b/lib/db_internal.h

new file mode 100644 (file)

index 0000000..b480a79
--- /dev/null
+++ b/lib/db_internal.h
@@ -0,0 +1,58 @@
+/*
+ *     UCW Library -- Fast Database Management Routines -- Internal Declarations
+ *
+ *     (c) 1999--2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#define SDBM_NUM_FREE_PAGE_POOLS 32
+
+struct sdbm_root {                     /* Must fit in 1K which is minimum page size */
+  u32 magic;
+  u32 version;
+  u32 page_order;                      /* Binary logarithm of page size */
+  s32 key_size;                                /* Key/val size, -1=variable */
+  s32 val_size;
+  u32 dir_start;                       /* First page of the page directory */
+  u32 dir_order;                       /* Binary logarithm of directory size */
+  /*
+   *  As we know the only thing which can be freed is the page directory
+   *  and it can grow only a limited number of times, we can use a very
+   *  simple-minded representation of the free page pool. We also assume
+   *  these entries are sorted by start position.
+   */
+  struct {
+    u32 first;
+    u32 count;
+  } free_pool[SDBM_NUM_FREE_PAGE_POOLS];
+};
+
+struct sdbm_bucket {
+  u32 used;                            /* Bytes used in this bucket */
+  byte data[0];
+};
+
+struct sdbm {
+  struct page_cache *cache;
+  int fd;
+  struct sdbm_root *root;
+  struct page *root_page;
+  int key_size;                                /* Cached values from root page */
+  int val_size;
+  uns page_order;
+  uns page_size;
+  uns page_mask;                       /* page_size - 1 */
+  uns dir_size;                                /* Page directory size in entries */
+  uns dir_shift;                       /* Number of significant bits of hash function */
+  uns file_size;                       /* in pages */
+  uns flags;
+  uns find_page, find_pos;             /* Current pointer for sdbm_find_next() */
+  uns find_free_list;                  /* First free list entry not skipped by sdbm_find_next() */
+};
+
+#define SDBM_MAGIC 0x5344424d
+#define SDBM_VERSION 2
+
+#define GET32(p,o) *((u32 *)((p)+(o)))
diff --git a/lib/default.cfg b/lib/default.cfg

new file mode 100644 (file)

index 0000000..ceb6d58
--- /dev/null
+++ b/lib/default.cfg
@@ -0,0 +1,50 @@
+# Configuration variables of the UCW library and their default values
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
+
+# Version of the whole package
+Set("SHERLOCK_VERSION" => "3.12.3");
+
+# Compile everything with debug information and ASSERT's
+UnSet("CONFIG_DEBUG");
+
+# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages)
+UnSet("CONFIG_EXACT_CPU");
+
+# Support files >2GB
+Set("CONFIG_LARGE_FILES");
+
+# Use shared libraries
+UnSet("CONFIG_SHARED");
+
+# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock
+# use its own regex library (a copy of the glibc one), because the default regex library
+# is likely to be crappy.
+Set("CONFIG_OWN_REGEX");
+
+# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy
+# of GNU libc's getopt. This should not be necessary on GNU libc.
+UnSet("CONFIG_OWN_GETOPT");
+
+# Install libraries and their API includes
+UnSet("CONFIG_INSTALL_API");
+
+# Build with support for multi-threaded programs
+Set("CONFIG_UCW_THREADS" => 1);
+
+# Include Perl modules
+Set("CONFIG_UCW_PERL" => 1);
+
+# Include Perl modules written in C
+UnSet("CONFIG_UCW_PERL_MODULES");
+
+# Include support utilities for shell scripts
+Set("CONFIG_UCW_SHELL_UTILS" => 1);
+
+# Default configuration file
+UnSet("DEFAULT_CONFIG");
+
+# Environment variable with configuration file
+UnSet("ENV_VAR_CONFIG");
+
+# Return success
+1;
diff --git a/lib/eltpool.c b/lib/eltpool.c

new file mode 100644 (file)

index 0000000..f82de84
--- /dev/null
+++ b/lib/eltpool.c
@@ -0,0 +1,100 @@
+/*
+ *     UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This allocator is optimized for intensive allocation and freeing of small
+ *  blocks of identical sizes. System memory is allocated by multiples of the
+ *  page size and it is returned back only when the whole eltpool is deleted.
+ *
+ *  In the future, we can add returning of memory to the system and also cache
+ *  coloring like in the SLAB allocator used in the Linux kernel.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/eltpool.h"
+
+struct eltpool *
+ep_new(uns elt_size, uns elts_per_chunk)
+{
+  struct eltpool *pool = xmalloc_zero(sizeof(*pool));
+  pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN);
+  pool->chunk_size = CPU_PAGE_SIZE;
+  while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size)
+    pool->chunk_size *= 2;
+  pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size;
+  DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk);
+  return pool;
+}
+
+void
+ep_delete(struct eltpool *pool)
+{
+  struct eltpool_chunk *ch;
+  while (ch = pool->first_chunk)
+    {
+      pool->first_chunk = ch->next;
+      page_free(ch, pool->chunk_size);
+    }
+  xfree(pool);
+}
+
+void *
+ep_alloc_slow(struct eltpool *pool)
+{
+  struct eltpool_chunk *ch = page_alloc(pool->chunk_size);
+  void *p = (void *)(ch+1);
+  for (uns i=1; i<pool->elts_per_chunk; i++)
+    {
+      struct eltpool_free *f = p;
+      f->next = pool->first_free;
+      pool->first_free = f;
+      p += pool->elt_size;
+    }
+  ch->next = pool->first_chunk;
+  pool->first_chunk = ch;
+  return p;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include "lib/clists.h"
+
+struct argh {
+  cnode n;
+  byte x[1];
+} PACKED;
+
+int main(void)
+{
+  struct eltpool *ep = ep_new(sizeof(struct argh), 64);
+  clist l;
+  clist_init(&l);
+  for (uns i=0; i<65536; i++)
+    {
+      struct argh *a = ep_alloc(ep);
+      if (i % 3)
+       clist_add_tail(&l, &a->n);
+      else
+       clist_add_head(&l, &a->n);
+      if (!(i % 5))
+       {
+         a = clist_head(&l);
+         clist_remove(&a->n);
+         ep_free(ep, a);
+       }
+    }
+  ep_delete(ep);
+  puts("OK");
+  return 0;
+}
+
+#endif
diff --git a/lib/eltpool.h b/lib/eltpool.h

new file mode 100644 (file)

index 0000000..7e295fb
--- /dev/null
+++ b/lib/eltpool.h
@@ -0,0 +1,65 @@
+/*
+ *     UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ELTPOOL_H
+#define _UCW_ELTPOOL_H
+
+struct eltpool {
+  struct eltpool_chunk *first_chunk;
+  struct eltpool_free *first_free;
+  uns elt_size;
+  uns chunk_size;
+  uns elts_per_chunk;
+  uns num_allocated;           // Just for debugging
+};
+
+struct eltpool_chunk {
+  struct eltpool_chunk *next;
+  /* Chunk data continue here */
+};
+
+struct eltpool_free {
+  struct eltpool_free *next;
+};
+
+struct eltpool *ep_new(uns elt_size, uns elts_per_chunk);
+void ep_delete(struct eltpool *pool);
+void *ep_alloc_slow(struct eltpool *pool);
+
+static inline void *
+ep_alloc(struct eltpool *pool)
+{
+  pool->num_allocated++;
+#ifdef CONFIG_FAKE_ELTPOOL
+  return xmalloc(pool->elt_size);
+#else
+  struct eltpool_free *elt;
+  if (elt = pool->first_free)
+    pool->first_free = elt->next;
+  else
+    elt = ep_alloc_slow(pool);
+  return elt;
+#endif
+}
+
+static inline void
+ep_free(struct eltpool *pool, void *p)
+{
+  pool->num_allocated--;
+#ifdef CONFIG_FAKE_ELTPOOL
+  (void) pool;
+  xfree(p);
+#else
+  struct eltpool_free *elt = p;
+  elt->next = pool->first_free;
+  pool->first_free = elt;
+#endif
+}
+
+#endif
diff --git a/lib/eltpool.test b/lib/eltpool.test

new file mode 100644 (file)

index 0000000..85bed69
--- /dev/null
+++ b/lib/eltpool.test
@@ -0,0 +1,4 @@
+# Tests for eltpools
+
+Run:   ../obj/lib/eltpool-t
+Out:   OK
diff --git a/lib/exitstatus.c b/lib/exitstatus.c

new file mode 100644 (file)

index 0000000..1095c7f
--- /dev/null
+++ b/lib/exitstatus.c
@@ -0,0 +1,36 @@
+/*
+ *     UCW Library -- Formatting of Process Exit Status
+ *
+ *     (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <sys/wait.h>
+#include <errno.h>
+
+int
+format_exit_status(char *msg, int stat)
+{
+  if (stat < 0)
+    sprintf(msg, "failed to fork (err=%d)", errno);
+  else if (WIFEXITED(stat) && WEXITSTATUS(stat) < 256)
+    {
+      if (WEXITSTATUS(stat))
+       sprintf(msg, "died with exit code %d", WEXITSTATUS(stat));
+      else
+       {
+         msg[0] = 0;
+         return 0;
+       }
+    }
+  else if (WIFSIGNALED(stat))
+    sprintf(msg, "died on signal %d", WTERMSIG(stat));
+  else
+    sprintf(msg, "died with status %x", stat);
+  return 1;
+}
diff --git a/lib/fastbuf.c b/lib/fastbuf.c

new file mode 100644 (file)

index 0000000..be7e979
--- /dev/null
+++ b/lib/fastbuf.c
@@ -0,0 +1,204 @@
+/*
+ *     UCW Library -- Fast Buffered I/O
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void bclose(struct fastbuf *f)
+{
+  if (f)
+    {
+      bflush(f);
+      if (f->close)
+       f->close(f);
+    }
+}
+
+void bflush(struct fastbuf *f)
+{
+  if (f->bptr > f->bstop)
+    f->spout(f);
+  else if (f->bstop > f->buffer)
+    f->bptr = f->bstop = f->buffer;
+}
+
+inline void bsetpos(struct fastbuf *f, sh_off_t pos)
+{
+  /* We can optimize seeks only when reading */
+  if (pos >= f->pos - (f->bstop - f->buffer) && pos <= f->pos)
+    f->bptr = f->bstop + (pos - f->pos);
+  else
+    {
+      bflush(f);
+      if (!f->seek || !f->seek(f, pos, SEEK_SET))
+       die("bsetpos: stream not seekable");
+    }
+}
+
+void bseek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  switch (whence)
+    {
+    case SEEK_SET:
+      return bsetpos(f, pos);
+    case SEEK_CUR:
+      return bsetpos(f, btell(f) + pos);
+    case SEEK_END:
+      bflush(f);
+      if (!f->seek || !f->seek(f, pos, SEEK_END))
+       die("bseek: stream not seekable");
+      break;
+    default:
+      die("bseek: invalid whence=%d", whence);
+    }
+}
+
+int bgetc_slow(struct fastbuf *f)
+{
+  if (f->bptr < f->bstop)
+    return *f->bptr++;
+  if (!f->refill(f))
+    return -1;
+  return *f->bptr++;
+}
+
+int bpeekc_slow(struct fastbuf *f)
+{
+  if (f->bptr < f->bstop)
+    return *f->bptr;
+  if (!f->refill(f))
+    return -1;
+  return *f->bptr;
+}
+
+void bputc_slow(struct fastbuf *f, uns c)
+{
+  if (f->bptr >= f->bufend)
+    f->spout(f);
+  *f->bptr++ = c;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check)
+{
+  uns total = 0;
+  while (l)
+    {
+      uns k = f->bstop - f->bptr;
+
+      if (!k)
+       {
+         f->refill(f);
+         k = f->bstop - f->bptr;
+         if (!k)
+           break;
+       }
+      if (k > l)
+       k = l;
+      memcpy(b, f->bptr, k);
+      f->bptr += k;
+      b = (byte *)b + k;
+      l -= k;
+      total += k;
+    }
+  if (check && total && l)
+    die("breadb: short read");
+  return total;
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l)
+{
+  while (l)
+    {
+      uns k = f->bufend - f->bptr;
+
+      if (!k)
+       {
+         f->spout(f);
+         k = f->bufend - f->bptr;
+       }
+      if (k > l)
+       k = l;
+      memcpy(f->bptr, b, k);
+      f->bptr += k;
+      b = (byte *)b + k;
+      l -= k;
+    }
+}
+
+void
+bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+  while (l)
+    {
+      byte *fptr, *tptr;
+      uns favail, tavail, n;
+
+      favail = bdirect_read_prepare(f, &fptr);
+      if (!favail)
+       {
+         if (l == ~0U)
+           return;
+         die("bbcopy: source exhausted");
+       }
+      tavail = bdirect_write_prepare(t, &tptr);
+      n = MIN(l, favail);
+      n = MIN(n, tavail);
+      memcpy(tptr, fptr, n);
+      bdirect_read_commit(f, fptr + n);
+      bdirect_write_commit(t, tptr + n);
+      if (l != ~0U)
+       l -= n;
+    }
+}
+
+int
+bconfig(struct fastbuf *f, uns item, int value)
+{
+  return f->config ? f->config(f, item, value) : -1;
+}
+
+void
+brewind(struct fastbuf *f)
+{
+  bflush(f);
+  bsetpos(f, 0);
+}
+
+int
+bskip_slow(struct fastbuf *f, uns len)
+{
+  while (len)
+    {
+      byte *buf;
+      uns l = bdirect_read_prepare(f, &buf);
+      if (!l)
+       return 0;
+      l = MIN(l, len);
+      bdirect_read_commit(f, buf+l);
+      len -= l;
+    }
+  return 1;
+}
+
+sh_off_t
+bfilesize(struct fastbuf *f)
+{
+  if (!f)
+    return 0;
+  sh_off_t pos = btell(f);
+  bflush(f);
+  if (!f->seek(f, 0, SEEK_END))
+    return -1;
+  sh_off_t len = btell(f);
+  bsetpos(f, pos);
+  return len;
+}
diff --git a/lib/fastbuf.h b/lib/fastbuf.h

new file mode 100644 (file)

index 0000000..920d83e
--- /dev/null
+++ b/lib/fastbuf.h
@@ -0,0 +1,410 @@
+/*
+ *     UCW Library -- Fast Buffered I/O
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FASTBUF_H
+#define _UCW_FASTBUF_H
+
+#include <string.h>
+#include <alloca.h>
+
+/*
+ *  Generic buffered I/O. You supply hooks to be called for low-level operations
+ *  (swapping of buffers, seeking and closing), we do the rest.
+ *
+ *  Buffer layout when reading:
+ *
+ *  +----------------+---------------------------+
+ *  | read data      | free space                |
+ *  +----------------+---------------------------+
+ *  ^        ^        ^                           ^
+ *  buffer   bptr     bstop                       bufend
+ *
+ *  After the last character is read, bptr == bstop and buffer refill
+ *  is deferred to the next read attempt. This gives us an easy way
+ *  how to implement bungetc().
+ *
+ *  When writing:
+ *
+ *  +--------+--------------+--------------------+
+ *  | unused | written data | free space         |
+ *  +--------+--------------+--------------------+
+ *  ^         ^              ^                    ^
+ *  buffer    bstop          bptr                 bufend
+ *
+ *  Dirty tricks:
+ *
+ *    - You can mix reads and writes on the same stream, but you must
+ *     call bflush() in between and remember that the file position
+ *     points after the flushed buffer which is not necessarily the same
+ *     as after the data you've read.
+ *    - The spout/refill hooks can change not only bptr and bstop, but also
+ *     the location of the buffer; fb-mem.c takes advantage of it.
+ *    - In some cases, the user of the bdirect interface can be allowed to modify
+ *     the data in the buffer to avoid unnecessary copying. If the back-end
+ *     allows such modifications, it can set can_overwrite_buffer accordingly:
+ *             *  0 if no modification is allowed,
+ *             *  1 if the user can modify the buffer on the condition that
+ *                  the modifications will be undone before calling the next
+ *                  fastbuf operation
+ *             *  2 if the user is allowed to overwrite the data in the buffer
+ *                  if bdirect_read_commit_modified() is called afterwards.
+ *                  In this case, the back-end must be prepared for trimming
+ *                  of the buffer which is done by the commit function.
+ */
+
+struct fastbuf {
+  byte is_fastbuf[0];                  /* Dummy field for checking of type casts */
+  byte *bptr, *bstop;                  /* Access pointers */
+  byte *buffer, *bufend;               /* Start and end of the buffer */
+  char *name;                          /* File name for error messages */
+  sh_off_t pos;                                /* Position of bstop in the file */
+  int (*refill)(struct fastbuf *);     /* Get a buffer with new data */
+  void (*spout)(struct fastbuf *);     /* Write buffer data to the file */
+  int (*seek)(struct fastbuf *, sh_off_t, int);  /* Slow path for bseek(), buffer already flushed; returns success */
+  void (*close)(struct fastbuf *);     /* Close the stream */
+  int (*config)(struct fastbuf *, uns, int);   /* Configure the stream */
+  int can_overwrite_buffer;            /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */
+};
+
+/* FastIO on files with several configurable back-ends */
+
+enum fb_type {                         /* Which back-end you want to use */
+  FB_STD,                              /* Standard buffered I/O */
+  FB_DIRECT,                           /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
+  FB_MMAP                              /* Memory mapped files */
+};
+
+struct fb_params {
+  enum fb_type type;
+  uns buffer_size;                     /* 0 for default size */
+  uns keep_back_buf;                   /* FB_STD: optimize for bi-directional access */
+  uns read_ahead;                      /* FB_DIRECT options */
+  uns write_back;
+  struct asio_queue *asio;
+};
+
+struct cf_section;
+extern struct cf_section fbpar_cf;
+extern struct fb_params fbpar_def;
+
+struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params);      /* Use params==NULL for defaults */
+struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params);
+struct fastbuf *bopen_tmp_file(struct fb_params *params);
+struct fastbuf *bopen_fd(int fd, struct fb_params *params);
+
+/* FastIO on standard files (shortcuts for FB_STD) */
+
+struct fastbuf *bopen(const char *name, uns mode, uns buflen);
+struct fastbuf *bopen_try(const char *name, uns mode, uns buflen);
+struct fastbuf *bopen_tmp(uns buflen);
+struct fastbuf *bfdopen(int fd, uns buflen);
+struct fastbuf *bfdopen_shared(int fd, uns buflen);
+void bfilesync(struct fastbuf *b);
+
+/* Temporary files */
+
+#define TEMP_FILE_NAME_LEN 256
+void temp_file_name(char *name);
+void bfix_tmp_file(struct fastbuf *fb, const char *name);
+
+/* Internal functions of some file back-ends */
+
+struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
+struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
+
+extern uns fbdir_cheat;
+struct asio_queue;
+struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
+
+void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
+
+/* FastIO on in-memory streams */
+
+struct fastbuf *fbmem_create(uns blocksize);           /* Create stream and return its writing fastbuf */
+struct fastbuf *fbmem_clone_read(struct fastbuf *);    /* Create reading fastbuf */
+
+/* FastI on file descriptors with limit */
+
+struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit);
+
+/* FastIO on static buffers */
+
+void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
+void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
+static inline uns
+fbbuf_count_written(struct fastbuf *f)
+{
+  return f->bptr - f->bstop;
+}
+
+/* FastIO on recyclable growing buffers */
+
+struct fastbuf *fbgrow_create(unsigned basic_size);
+void fbgrow_reset(struct fastbuf *b);                  /* Reset stream and prepare for writing */
+void fbgrow_rewind(struct fastbuf *b);                 /* Prepare for reading */
+
+/* FastO on memory pools */
+
+struct mempool;
+struct fbpool {
+  struct fastbuf fb;
+  struct mempool *mp;
+};
+
+void fbpool_init(struct fbpool *fb);   /* Initialize a new fastbuf */
+void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
+                                       /* Start a new continuous block and prepare for writing (see mp_start()) */
+void *fbpool_end(struct fbpool *fb);   /* Close the block and return its address (see mp_end()).
+                                          The length can be determined with mp_size(mp, ptr). */
+
+/* FastO with atomic writes for multi-threaded programs */
+
+struct fb_atomic {
+  struct fastbuf fb;
+  struct fb_atomic_file *af;
+  byte *expected_max_bptr;
+  uns slack_size;
+};
+#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
+
+struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
+void fbatomic_internal_write(struct fastbuf *b);
+
+static inline void
+fbatomic_commit(struct fastbuf *b)
+{
+  if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
+    fbatomic_internal_write(b);
+}
+
+/* Configuring stream parameters */
+
+enum bconfig_type {
+  BCONFIG_IS_TEMP_FILE,                        /* 0=normal file, 1=temporary file, 2=shared fd */
+  BCONFIG_KEEP_BACK_BUF,               /* Optimize for bi-directional access */
+};
+
+int bconfig(struct fastbuf *f, uns type, int data);
+
+/* Universal functions working on all fastbuf's */
+
+void bclose(struct fastbuf *f);
+void bflush(struct fastbuf *f);
+void bseek(struct fastbuf *f, sh_off_t pos, int whence);
+void bsetpos(struct fastbuf *f, sh_off_t pos);
+void brewind(struct fastbuf *f);
+sh_off_t bfilesize(struct fastbuf *f);         /* -1 if not seekable */
+
+static inline sh_off_t btell(struct fastbuf *f)
+{
+  return f->pos + (f->bptr - f->bstop);
+}
+
+int bgetc_slow(struct fastbuf *f);
+static inline int bgetc(struct fastbuf *f)
+{
+  return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
+}
+
+int bpeekc_slow(struct fastbuf *f);
+static inline int bpeekc(struct fastbuf *f)
+{
+  return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
+}
+
+static inline void bungetc(struct fastbuf *f)
+{
+  f->bptr--;
+}
+
+void bputc_slow(struct fastbuf *f, uns c);
+static inline void bputc(struct fastbuf *f, uns c)
+{
+  if (f->bptr < f->bufend)
+    *f->bptr++ = c;
+  else
+    bputc_slow(f, c);
+}
+
+static inline uns
+bavailr(struct fastbuf *f)
+{
+  return f->bstop - f->bptr;
+}
+
+static inline uns
+bavailw(struct fastbuf *f)
+{
+  return f->bufend - f->bptr;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
+static inline uns bread(struct fastbuf *f, void *b, uns l)
+{
+  if (bavailr(f) >= l)
+    {
+      memcpy(b, f->bptr, l);
+      f->bptr += l;
+      return l;
+    }
+  else
+    return bread_slow(f, b, l, 0);
+}
+
+static inline uns breadb(struct fastbuf *f, void *b, uns l)
+{
+  if (bavailr(f) >= l)
+    {
+      memcpy(b, f->bptr, l);
+      f->bptr += l;
+      return l;
+    }
+  else
+    return bread_slow(f, b, l, 1);
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l);
+static inline void bwrite(struct fastbuf *f, const void *b, uns l)
+{
+  if (bavailw(f) >= l)
+    {
+      memcpy(f->bptr, b, l);
+      f->bptr += l;
+    }
+  else
+    bwrite_slow(f, b, l);
+}
+
+/*
+ *  Functions for reading of strings:
+ *
+ *     bgets()         reads a line, strip the trailing '\n' and return a pointer
+ *                     to the terminating 0 or NULL on EOF. Dies if the line is too long.
+ *     bgets0()                does the same for 0-terminated strings.
+ *     bgets_nodie()   a variant of bgets() which returns either the length of the
+ *                             string (excluding the terminator) or -1 if the line does not fit
+ *                             in the buffer. In such cases, it returns after reading exactly `l'
+ *                             bytes of input.
+ *     bgets_bb()      a variant of bgets() which allocates the string in a growing buffer
+ *     bgets_mp()      the same, but in a mempool
+ *     bgets_stk()     the same, but on the stack by alloca()
+ */
+
+char *bgets(struct fastbuf *f, char *b, uns l);
+char *bgets0(struct fastbuf *f, char *b, uns l);
+int bgets_nodie(struct fastbuf *f, char *b, uns l);
+
+struct mempool;
+struct bb_t;
+uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
+char *bgets_mp(struct fastbuf *f, struct mempool *mp);
+
+struct bgets_stk_struct {
+  struct fastbuf *f;
+  byte *old_buf, *cur_buf, *src;
+  uns old_len, cur_len, src_len;
+};
+void bgets_stk_init(struct bgets_stk_struct *s);
+void bgets_stk_step(struct bgets_stk_struct *s);
+#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
+
+static inline void
+bputs(struct fastbuf *f, const char *b)
+{
+  bwrite(f, b, strlen(b));
+}
+
+static inline void
+bputs0(struct fastbuf *f, const char *b)
+{
+  bwrite(f, b, strlen(b)+1);
+}
+
+static inline void
+bputsn(struct fastbuf *f, const char *b)
+{
+  bputs(f, b);
+  bputc(f, '\n');
+}
+
+void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
+static inline void
+bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+  if (bavailr(f) >= l && bavailw(t) >= l)
+    {
+      memcpy(t->bptr, f->bptr, l);
+      t->bptr += l;
+      f->bptr += l;
+    }
+  else
+    bbcopy_slow(f, t, l);
+}
+
+int bskip_slow(struct fastbuf *f, uns len);
+static inline int bskip(struct fastbuf *f, uns len)
+{
+  if (bavailr(f) >= len)
+    {
+      f->bptr += len;
+      return 1;
+    }
+  else
+    return bskip_slow(f, len);
+}
+
+/* Direct I/O on buffers */
+
+static inline uns
+bdirect_read_prepare(struct fastbuf *f, byte **buf)
+{
+  if (f->bptr == f->bstop && !f->refill(f))
+    {
+      *buf = NULL;  // This is not needed, but it helps to get rid of spurious warnings
+      return 0;
+    }
+  *buf = f->bptr;
+  return bavailr(f);
+}
+
+static inline void
+bdirect_read_commit(struct fastbuf *f, byte *pos)
+{
+  f->bptr = pos;
+}
+
+static inline void
+bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
+{
+  f->bptr = pos;
+  f->buffer = pos;     /* Avoid seeking backwards in the buffer */
+}
+
+static inline uns
+bdirect_write_prepare(struct fastbuf *f, byte **buf)
+{
+  if (f->bptr == f->bufend)
+    f->spout(f);
+  *buf = f->bptr;
+  return bavailw(f);
+}
+
+static inline void
+bdirect_write_commit(struct fastbuf *f, byte *pos)
+{
+  f->bptr = pos;
+}
+
+/* Formatted output */
+
+int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3);
+int vbprintf(struct fastbuf *b, const char *msg, va_list args);
+
+#endif
diff --git a/lib/fastbuf.t b/lib/fastbuf.t

new file mode 100644 (file)

index 0000000..6f8681a
--- /dev/null
+++ b/lib/fastbuf.t
@@ -0,0 +1,15 @@
+# Tests for fastbufs
+
+Run:   ../obj/lib/fb-file-t
+Out:   112
+       <hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello>
+       112 116
+
+Run:   ../obj/lib/fb-grow-t
+Out:   <10><10><0>1234512345<10><9>5<10>
+       <10><10><0>1234512345<10><9>5<10>
+       <10><10><0>1234512345<10><9>5<10>
+       <10><10><0>1234512345<10><9>5<10>
+       <10><10><0>1234512345<10><9>5<10>
+
+Run:   ../obj/lib/fb-pool-t
diff --git a/lib/fb-atomic.c b/lib/fb-atomic.c

new file mode 100644 (file)

index 0000000..234d920
--- /dev/null
+++ b/lib/fb-atomic.c
@@ -0,0 +1,169 @@
+/*
+ *     UCW Library -- Atomic Buffered Write to Files
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *     This fastbuf backend is intended for cases where several threads
+ *     of a single program append records to a single file and while the
+ *     record can mix in an arbitrary way, the bytes inside a single
+ *     record must remain uninterrupted.
+ *
+ *     In case of files with fixed record size, we just allocate the
+ *     buffer to hold a whole number of records and take advantage
+ *     of the atomicity of the write() system call.
+ *
+ *     With variable-sized records, we need another solution: when
+ *     writing a record, we keep the fastbuf in a locked state, which
+ *     prevents buffer flushing (and if the buffer becomes full, we extend it),
+ *     and we wait for an explicit commit operation which write()s the buffer
+ *     if the free space in the buffer falls below the expected maximum record
+ *     length.
+ *
+ *     fbatomic_open() is called with the following parameters:
+ *         name - name of the file to open
+ *         master - fbatomic for the master thread or NULL if it's the first open
+ *         bufsize - initial buffer size
+ *         record_len - record length for fixed-size records;
+ *             or -(expected maximum record length) for variable-sized ones.
+ */
+
+#define LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+struct fb_atomic_file {
+  int fd;
+  int use_count;
+  int record_len;
+  uns locked;
+  byte name[1];
+};
+
+void
+fbatomic_internal_write(struct fastbuf *f)
+{
+  struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+  int size = f->bptr - f->buffer;
+  if (size)
+    {
+      ASSERT(af->record_len < 0 || !(size % af->record_len));
+      int res = write(af->fd, f->buffer, size);
+      if (res < 0)
+       die("Error writing %s: %m", f->name);
+      if (res != size)
+       die("Unexpected partial write to %s: written only %d bytes of %d", f->name, res, size);
+      f->bptr = f->buffer;
+    }
+}
+
+static void
+fbatomic_spout(struct fastbuf *f)
+{
+  if (f->bptr < f->bufend)             /* Explicit flushes should be ignored */
+    return;
+
+  struct fb_atomic *F = FB_ATOMIC(f);
+  if (F->af->locked)
+    {
+      uns written = f->bptr - f->buffer;
+      uns size = f->bufend - f->buffer + F->slack_size;
+      F->slack_size *= 2;
+      DBG("Reallocating buffer for atomic file %s with slack %d", f->name, F->slack_size);
+      f->buffer = xrealloc(f->buffer, size);
+      f->bufend = f->buffer + size;
+      f->bptr = f->buffer + written;
+      F->expected_max_bptr = f->bufend - F->slack_size;
+    }
+  else
+    fbatomic_internal_write(f);
+}
+
+static void
+fbatomic_close(struct fastbuf *f)
+{
+  struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+  fbatomic_internal_write(f);  /* Need to flush explicitly, because the file can be locked */
+  if (!--af->use_count)
+    {
+      close(af->fd);
+      xfree(af);
+    }
+  xfree(f);
+}
+
+struct fastbuf *
+fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len)
+{
+  struct fb_atomic *F = xmalloc_zero(sizeof(*F));
+  struct fastbuf *f = &F->fb;
+  struct fb_atomic_file *af;
+  if (master)
+    {
+      af = FB_ATOMIC(master)->af;
+      af->use_count++;
+      ASSERT(af->record_len == record_len);
+    }
+  else
+    {
+      af = xmalloc_zero(sizeof(*af) + strlen(name));
+      if ((af->fd = sh_open(name, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666)) < 0)
+       die("Cannot create %s: %m", name);
+      af->use_count = 1;
+      af->record_len = record_len;
+      af->locked = (record_len < 0);
+      strcpy(af->name, name);
+    }
+  F->af = af;
+  if (record_len > 0 && bufsize % record_len)
+    bufsize += record_len - (bufsize % record_len);
+  f->buffer = xmalloc(bufsize);
+  f->bufend = f->buffer + bufsize;
+  F->slack_size = (record_len < 0) ? -record_len : 0;
+  ASSERT(bufsize > F->slack_size);
+  F->expected_max_bptr = f->bufend - F->slack_size;
+  f->bptr = f->bstop = f->buffer;
+  f->name = af->name;
+  f->spout = fbatomic_spout;
+  f->close = fbatomic_close;
+  return f;
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+  struct fastbuf *f, *g;
+
+  log(L_INFO, "Testing block writes");
+  f = fbatomic_open("test", NULL, 16, 4);
+  for (u32 i=0; i<17; i++)
+    bwrite(f, &i, 4);
+  bclose(f);
+
+  log(L_INFO, "Testing interleaved var-size writes");
+  f = fbatomic_open("test2", NULL, 23, -5);
+  g = fbatomic_open("test2", f, 23, -5);
+  for (int i=0; i<100; i++)
+    {
+      struct fastbuf *x = (i%2) ? g : f;
+      bprintf(x, "%c<%d>\n", "fg"[i%2], ((259309*i) % 1000000) >> (i % 8));
+      fbatomic_commit(x);
+    }
+  bclose(f);
+  bclose(g);
+
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-buffer.c b/lib/fb-buffer.c

new file mode 100644 (file)

index 0000000..d8094eb
--- /dev/null
+++ b/lib/fb-buffer.c
@@ -0,0 +1,70 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Static Buffers
+ *
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int
+fbbuf_refill(struct fastbuf *f UNUSED)
+{
+  return 0;
+}
+
+static int
+fbbuf_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  /* Somebody might want to seek to the end of buffer, try to be nice to him. */
+  sh_off_t len = f->bufend - f->buffer;
+  if (whence == SEEK_END)
+    pos += len;
+  ASSERT(pos >= 0 && pos <= len);
+  f->bptr = f->buffer + pos;
+  f->bstop = f->bufend;
+  f->pos = len;
+  return 1;
+}
+
+void
+fbbuf_init_read(struct fastbuf *f, byte *buf, uns size, uns can_overwrite)
+{
+  f->buffer = f->bptr = buf;
+  f->bstop = f->bufend = buf + size;
+  f->name = "fbbuf-read";
+  f->pos = size;
+  f->refill = fbbuf_refill;
+  f->spout = NULL;
+  f->seek = fbbuf_seek;
+  f->close = NULL;
+  f->config = NULL;
+  f->can_overwrite_buffer = can_overwrite;
+}
+
+static void
+fbbuf_spout(struct fastbuf *f UNUSED)
+{
+  die("fbbuf: buffer overflow on write");
+}
+
+void
+fbbuf_init_write(struct fastbuf *f, byte *buf, uns size)
+{
+  f->buffer = f->bstop = f->bptr = buf;
+  f->bufend = buf + size;
+  f->name = "fbbuf-write";
+  f->pos = size;
+  f->refill = NULL;
+  f->spout = fbbuf_spout;
+  f->seek = NULL;
+  f->close = NULL;
+  f->config = NULL;
+  f->can_overwrite_buffer = 0;
+}
diff --git a/lib/fb-direct.c b/lib/fb-direct.c

new file mode 100644 (file)

index 0000000..865bbb4
--- /dev/null
+++ b/lib/fb-direct.c
@@ -0,0 +1,344 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on O_DIRECT Files
+ *
+ *     (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *     This is a fastbuf backend for fast streaming I/O using O_DIRECT and
+ *     the asynchronous I/O module. It's designed for use on large files
+ *     which don't fit in the disk cache.
+ *
+ *     CAVEATS:
+ *
+ *       - All operations with a single fbdirect handle must be done
+ *         within a single thread, unless you provide a custom I/O queue
+ *         and take care of locking.
+ *
+ *     FIXME: what if the OS doesn't support O_DIRECT?
+ *     FIXME: unaligned seeks and partial writes?
+ *     FIXME: append to unaligned file
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/asio.h"
+#include "lib/conf.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+uns fbdir_cheat;
+
+static struct cf_section fbdir_cf = {
+  CF_ITEMS {
+    CF_UNS("Cheat", &fbdir_cheat),
+    CF_END
+  }
+};
+
+#define FBDIR_ALIGN 512
+
+enum fbdir_mode {                              // Current operating mode
+    M_NULL,
+    M_READ,
+    M_WRITE
+};
+
+struct fb_direct {
+  struct fastbuf fb;
+  int fd;                                      // File descriptor
+  int is_temp_file;
+  struct asio_queue *io_queue;                 // I/O queue to use
+  struct asio_queue *user_queue;               // If io_queue was supplied by the user
+  struct asio_request *pending_read;
+  struct asio_request *done_read;
+  struct asio_request *active_buffer;
+  enum fbdir_mode mode;
+  byte name[0];
+};
+#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
+
+static void CONSTRUCTOR
+fbdir_global_init(void)
+{
+  cf_declare_section("FBDirect", &fbdir_cf, 0);
+}
+
+static void
+fbdir_read_sync(struct fb_direct *F)
+{
+  while (F->pending_read)
+    {
+      struct asio_request *r = asio_wait(F->io_queue);
+      ASSERT(r);
+      struct fb_direct *G = r->user_data;
+      ASSERT(G);
+      ASSERT(G->pending_read == r && !G->done_read);
+      G->pending_read = NULL;
+      G->done_read = r;
+    }
+}
+
+static void
+fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
+{
+  if (F->mode == mode)
+    return;
+  DBG("FB-DIRECT: Switching mode to %d", mode);
+  switch (F->mode)
+    {
+    case M_NULL:
+      break;
+    case M_READ:
+      fbdir_read_sync(F);                      // Wait for read-ahead requests to finish
+      if (F->done_read)                                // Return read-ahead requests if any
+       {
+         asio_put(F->done_read);
+         F->done_read = NULL;
+       }
+      break;
+    case M_WRITE:
+      asio_sync(F->io_queue);                  // Wait for pending writebacks
+      break;
+    }
+  if (F->active_buffer)
+    {
+      asio_put(F->active_buffer);
+      F->active_buffer = NULL;
+    }
+  F->mode = mode;
+}
+
+static void
+fbdir_submit_read(struct fb_direct *F)
+{
+  struct asio_request *r = asio_get(F->io_queue);
+  r->fd = F->fd;
+  r->op = ASIO_READ;
+  r->len = F->io_queue->buffer_size;
+  r->user_data = F;
+  asio_submit(r);
+  F->pending_read = r;
+}
+
+static int
+fbdir_refill(struct fastbuf *f)
+{
+  struct fb_direct *F = FB_DIRECT(f);
+
+  DBG("FB-DIRECT: Refill");
+
+  if (!F->done_read)
+    {
+      if (!F->pending_read)
+       {
+         fbdir_change_mode(F, M_READ);
+         fbdir_submit_read(F);
+       }
+      fbdir_read_sync(F);
+      ASSERT(F->done_read);
+    }
+
+  struct asio_request *r = F->done_read;
+  F->done_read = NULL;
+  if (F->active_buffer)
+    asio_put(F->active_buffer);
+  F->active_buffer = r;
+  if (!r->status)
+    return 0;
+  if (r->status < 0)
+    die("Error reading %s: %s", f->name, strerror(r->returned_errno));
+  f->bptr = f->buffer = r->buffer;
+  f->bstop = f->bufend = f->buffer + r->status;
+  f->pos += r->status;
+
+  fbdir_submit_read(F);                                // Read-ahead the next block
+
+  return r->status;
+}
+
+static void
+fbdir_spout(struct fastbuf *f)
+{
+  struct fb_direct *F = FB_DIRECT(f);
+  struct asio_request *r;
+
+  DBG("FB-DIRECT: Spout");
+
+  fbdir_change_mode(F, M_WRITE);
+  r = F->active_buffer;
+  if (r && f->bptr > f->bstop)
+    {
+      r->op = ASIO_WRITE_BACK;
+      r->fd = F->fd;
+      r->len = f->bptr - f->bstop;
+      ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
+      f->pos += r->len;
+      if (!fbdir_cheat && r->len % FBDIR_ALIGN)                        // Have to simulate incomplete writes
+       {
+         r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
+         asio_submit(r);
+         asio_sync(F->io_queue);
+         DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos);
+         if (sh_ftruncate(F->fd, f->pos) < 0)
+           die("Error truncating %s: %m", f->name);
+       }
+      else
+       asio_submit(r);
+      r = NULL;
+    }
+  if (!r)
+    r = asio_get(F->io_queue);
+  f->bstop = f->bptr = f->buffer = r->buffer;
+  f->bufend = f->buffer + F->io_queue->buffer_size;
+  F->active_buffer = r;
+}
+
+static int
+fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence);
+
+  if (whence == SEEK_SET && pos == f->pos)
+    return 1;
+
+  fbdir_change_mode(FB_DIRECT(f), M_NULL);                     // Wait for all async requests to finish
+  sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence);
+  if (l < 0)
+    return 0;
+  f->pos = l;
+  return 1;
+}
+
+static struct asio_queue *
+fbdir_get_io_queue(uns buffer_size, uns write_back)
+{
+  struct ucwlib_context *ctx = ucwlib_thread_context();
+  struct asio_queue *q = ctx->io_queue;
+  if (!q)
+    {
+      q = xmalloc_zero(sizeof(struct asio_queue));
+      q->buffer_size = buffer_size;
+      q->max_writebacks = write_back;
+      asio_init_queue(q);
+      ctx->io_queue = q;
+    }
+  q->use_count++;
+  DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
+  return q;
+}
+
+static void
+fbdir_put_io_queue(void)
+{
+  struct ucwlib_context *ctx = ucwlib_thread_context();
+  struct asio_queue *q = ctx->io_queue;
+  ASSERT(q);
+  DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
+  if (!--q->use_count)
+    {
+      asio_cleanup_queue(q);
+      xfree(q);
+      ctx->io_queue = NULL;
+    }
+}
+
+static void
+fbdir_close(struct fastbuf *f)
+{
+  struct fb_direct *F = FB_DIRECT(f);
+
+  DBG("FB-DIRECT: Close");
+
+  fbdir_change_mode(F, M_NULL);
+  if (!F->user_queue)
+    fbdir_put_io_queue();
+
+  bclose_file_helper(f, F->fd, F->is_temp_file);
+  xfree(f);
+}
+
+static int
+fbdir_config(struct fastbuf *f, uns item, int value)
+{
+  int orig;
+
+  switch (item)
+    {
+    case BCONFIG_IS_TEMP_FILE:
+      orig = FB_DIRECT(f)->is_temp_file;
+      FB_DIRECT(f)->is_temp_file = value;
+      return orig;
+    default:
+      return -1;
+    }
+}
+
+struct fastbuf *
+fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back)
+{
+  int namelen = strlen(name) + 1;
+  struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
+  struct fastbuf *f = &F->fb;
+
+  DBG("FB-DIRECT: Open");
+  bzero(F, sizeof(*F));
+  f->name = F->name;
+  memcpy(f->name, name, namelen);
+  F->fd = fd;
+  if (q)
+    F->io_queue = F->user_queue = q;
+  else
+    F->io_queue = fbdir_get_io_queue(buffer_size, write_back);
+  f->refill = fbdir_refill;
+  f->spout = fbdir_spout;
+  f->seek = fbdir_seek;
+  f->close = fbdir_close;
+  f->config = fbdir_config;
+  f->can_overwrite_buffer = 2;
+  return f;
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+  struct fb_params par = { .type = FB_DIRECT };
+  struct fastbuf *f, *t;
+
+  log_init(NULL);
+  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+    die("Hey, whaddya want?");
+  f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par);
+  t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par);
+
+  bbcopy(f, t, ~0U);
+  ASSERT(btell(f) == btell(t));
+
+#if 0          // This triggers unaligned write
+  bflush(t);
+  bputc(t, '\n');
+#endif
+
+  brewind(t);
+  bgetc(t);
+  ASSERT(btell(t) == 1);
+
+  bclose(f);
+  bclose(t);
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-file.c b/lib/fb-file.c

new file mode 100644 (file)

index 0000000..30f8eac
--- /dev/null
+++ b/lib/fb-file.c
@@ -0,0 +1,276 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Files
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+struct fb_file {
+  struct fastbuf fb;
+  int fd;                              /* File descriptor */
+  int is_temp_file;
+  int keep_back_buf;                   /* Optimize for backwards reading */
+  sh_off_t wpos;                       /* Real file position */
+  uns wlen;                            /* Window size */
+};
+#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1)
+
+static int
+bfd_refill(struct fastbuf *f)
+{
+  struct fb_file *F = FB_FILE(f);
+  byte *read_ptr = (f->buffer = FB_BUFFER(f));
+  uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen;
+  /* Forward or no seek */
+  if (F->wpos <= f->pos)
+    {
+      sh_off_t diff = f->pos - F->wpos;
+      /* Formula for long forward seeks (prefer lseek()) */
+      if (diff > ((sh_off_t)blen << 2))
+        {
+long_seek:
+         f->bptr = f->buffer + back;
+         f->bstop = f->buffer + blen;
+         goto seek;
+       }
+      /* Short forward seek (prefer read() to skip data )*/
+      else if ((uns)diff >= back)
+        {
+         uns skip = diff - back;
+         F->wpos += skip;
+         while (skip)
+           {
+             int l = read(F->fd, f->buffer, MIN(skip, blen));
+             if (unlikely(l <= 0))
+               if (l < 0)
+                 die("Error reading %s: %m", f->name);
+               else
+                 {
+                   F->wpos -= skip;
+                   goto eof;
+                 }
+             skip -= l;
+           }
+       }
+      /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */
+      else
+        {
+         uns keep = back - (uns)diff;
+         if (keep >= F->wlen)
+           back = diff + (keep = F->wlen);
+         else
+           memmove(f->buffer, f->buffer + F->wlen - keep, keep);
+         read_len -= keep;
+         read_ptr += keep;
+       }
+      f->bptr = f->buffer + back;
+      f->bstop = f->buffer + blen;
+    }
+  /* Backwards seek */
+  else
+    {
+      sh_off_t diff = F->wpos - f->pos;
+      /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */
+      if (diff > ((sh_off_t)blen << 1))
+        {
+         if ((sh_off_t)back > f->pos)
+           back = f->pos;
+         goto long_seek;
+       }
+      /* Seek into previous window (do nothing... for example brewind) */
+      else if ((uns)diff <= F->wlen) 
+        {
+         f->bstop = f->buffer + F->wlen;
+         f->bptr = f->bstop - diff;
+         f->pos = F->wpos;
+         return 1;
+       }
+      back *= 3;
+      if ((sh_off_t)back > f->pos)
+       back = f->pos;
+      f->bptr = f->buffer + back;
+      read_len = blen;
+      f->bstop = f->buffer + read_len;
+      /* Reuse part of previous window */
+      if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen)
+        {
+         uns keep = read_len + F->wlen - back - diff;
+         memmove(f->buffer + read_len - keep, f->buffer, keep);
+       }
+seek:
+      /* Do lseek() */
+      F->wpos = f->pos + (f->buffer - f->bptr);
+      if (sh_seek(F->fd, F->wpos, SEEK_SET) < 0)
+       die("Error seeking %s: %m", f->name);
+    }
+  /* Read (part of) buffer */
+  do
+    {
+      int l = read(F->fd, read_ptr, read_len);
+      if (unlikely(l < 0))
+       die("Error reading %s: %m", f->name);
+      if (!l)
+       if (unlikely(read_ptr < f->bptr))
+         goto eof;
+       else
+         break; /* Incomplete read because of EOF */
+      read_ptr += l;
+      read_len -= l;
+      F->wpos += l;
+    }
+  while (read_ptr <= f->bptr);
+  if (read_len)
+    f->bstop = read_ptr;
+  f->pos += f->bstop - f->bptr;
+  F->wlen = f->bstop - f->buffer;
+  return f->bstop - f->bptr;
+eof:
+  /* Seeked behind EOF */
+  f->bptr = f->bstop = f->buffer;
+  F->wlen = 0;
+  return 0;
+}
+
+static void
+bfd_spout(struct fastbuf *f)
+{
+  /* Do delayed lseek() if needed */
+  if (FB_FILE(f)->wpos != f->pos && sh_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0)
+    die("Error seeking %s: %m", f->name);
+
+  int l = f->bptr - f->buffer;
+  byte *c = f->buffer;
+
+  /* Write the buffer */
+  FB_FILE(f)->wpos = (f->pos += l);
+  FB_FILE(f)->wlen = 0;
+  while (l)
+    {
+      int z = write(FB_FILE(f)->fd, c, l);
+      if (z <= 0)
+       die("Error writing %s: %m", f->name);
+      l -= z;
+      c += z;
+    }
+  f->bptr = f->buffer = FB_BUFFER(f);
+}
+
+static int
+bfd_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */
+  sh_off_t l;
+  switch (whence)
+    {
+      case SEEK_SET:
+       f->pos = pos;
+       return 1;
+      case SEEK_CUR:
+       l = f->pos + pos;
+       if ((pos > 0) ^ (l > f->pos))
+         return 0;
+       f->pos = l;
+       return 1;
+      case SEEK_END:
+       l = sh_seek(FB_FILE(f)->fd, pos, SEEK_END);
+       if (l < 0)
+         return 0;
+       FB_FILE(f)->wpos = f->pos = l;
+       FB_FILE(f)->wlen = 0;
+       return 1;
+      default:
+       ASSERT(0);
+    }
+}
+
+static void
+bfd_close(struct fastbuf *f)
+{
+  bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file);
+  xfree(f);
+}
+
+static int
+bfd_config(struct fastbuf *f, uns item, int value)
+{
+  int orig;
+
+  switch (item)
+    {
+      case BCONFIG_IS_TEMP_FILE:
+       orig = FB_FILE(f)->is_temp_file;
+       FB_FILE(f)->is_temp_file = value;
+       return orig;
+      case BCONFIG_KEEP_BACK_BUF:
+        orig = FB_FILE(f)->keep_back_buf;
+       FB_FILE(f)->keep_back_buf = value;
+       return orig;
+      default:
+       return -1;
+    }
+}
+
+struct fastbuf *
+bfdopen_internal(int fd, const char *name, uns buflen)
+{
+  ASSERT(buflen);
+  int namelen = strlen(name) + 1;
+  struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen);
+  struct fastbuf *f = &F->fb;
+
+  bzero(F, sizeof(*F));
+  f->buffer = (byte *)(F+1);
+  f->bptr = f->bstop = f->buffer;
+  f->bufend = f->buffer + buflen;
+  f->name = f->bufend;
+  memcpy(f->name, name, namelen);
+  F->fd = fd;
+  f->refill = bfd_refill;
+  f->spout = bfd_spout;
+  f->seek = bfd_seek;
+  f->close = bfd_close;
+  f->config = bfd_config;
+  f->can_overwrite_buffer = 2;
+  return f;
+}
+
+void
+bfilesync(struct fastbuf *b)
+{
+  bflush(b);
+  if (fsync(FB_FILE(b)->fd) < 0)
+    msg(L_ERROR, "fsync(%s) failed: %m", b->name);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct fastbuf *f, *t;
+  f = bopen_tmp(16);
+  t = bfdopen_shared(1, 13);
+  for (uns i = 0; i < 16; i++)
+    bwrite(f, "<hello>", 7);
+  bprintf(t, "%d\n", (int)btell(f));
+  brewind(f);
+  bbcopy(f, t, ~0U);
+  bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t));
+  bclose(f);
+  bclose(t);
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-grow.c b/lib/fb-grow.c

new file mode 100644 (file)

index 0000000..f9a654a
--- /dev/null
+++ b/lib/fb-grow.c
@@ -0,0 +1,139 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Growing Buffers
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct fb_gbuf {
+  struct fastbuf fb;
+  byte *last_written;
+};
+#define FB_GBUF(f) ((struct fb_gbuf *)(f)->is_fastbuf)
+
+static int
+fbgrow_refill(struct fastbuf *b)
+{
+  if (b->bstop != FB_GBUF(b)->last_written)
+    {
+      /* There was an intervening flush */
+      b->bstop = FB_GBUF(b)->last_written;
+      b->pos = b->bstop - b->buffer;
+      return 1;
+    }
+  /* We are at the end */
+  return 0;
+}
+
+static void
+fbgrow_spout(struct fastbuf *b)
+{
+  if (b->bptr >= b->bufend)
+    {
+      uns len = b->bufend - b->buffer;
+      b->buffer = xrealloc(b->buffer, 2*len);
+      b->bufend = b->buffer + 2*len;
+      b->bstop = b->buffer;
+      b->bptr = b->buffer + len;
+    }
+}
+
+static int
+fbgrow_seek(struct fastbuf *b, sh_off_t pos, int whence)
+{
+  ASSERT(FB_GBUF(b)->last_written);    /* Seeks allowed only in read mode */
+  sh_off_t len = FB_GBUF(b)->last_written - b->buffer;
+  if (whence == SEEK_END)
+    pos += len;
+  ASSERT(pos >= 0 && pos <= len);
+  b->bptr = b->buffer + pos;
+  b->bstop = FB_GBUF(b)->last_written;
+  b->pos = len;
+  return 1;
+}
+
+static void
+fbgrow_close(struct fastbuf *b)
+{
+  xfree(b->buffer);
+  xfree(b);
+}
+
+struct fastbuf *
+fbgrow_create(unsigned basic_size)
+{
+  struct fastbuf *b = xmalloc_zero(sizeof(struct fb_gbuf));
+  b->buffer = xmalloc(basic_size);
+  b->bufend = b->buffer + basic_size;
+  b->bptr = b->bstop = b->buffer;
+  b->name = "<fbgbuf>";
+  b->refill = fbgrow_refill;
+  b->spout = fbgrow_spout;
+  b->seek = fbgrow_seek;
+  b->close = fbgrow_close;
+  b->can_overwrite_buffer = 1;
+  return b;
+}
+
+void
+fbgrow_reset(struct fastbuf *b)
+{
+  b->bptr = b->bstop = b->buffer;
+  b->pos = 0;
+  FB_GBUF(b)->last_written = NULL;
+}
+
+void
+fbgrow_rewind(struct fastbuf *b)
+{
+  if (!FB_GBUF(b)->last_written)
+    {
+      /* Last operation was a write, so remember the end position */
+      FB_GBUF(b)->last_written = b->bptr;
+    }
+  b->bptr = b->buffer;
+  b->bstop = FB_GBUF(b)->last_written;
+  b->pos = b->bstop - b->buffer;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct fastbuf *f;
+  uns t;
+
+  f = fbgrow_create(3);
+  for (uns i=0; i<5; i++)
+    {
+      fbgrow_reset(f);
+      bwrite(f, "12345", 5);
+      bwrite(f, "12345", 5);
+      printf("<%d>", (int)btell(f));
+      bflush(f);
+      printf("<%d>", (int)btell(f));
+      fbgrow_rewind(f);
+      printf("<%d>", (int)btell(f));
+      while ((t = bgetc(f)) != ~0U)
+       putchar(t);
+      printf("<%d>", (int)btell(f));
+      fbgrow_rewind(f);
+      bseek(f, -1, SEEK_END);
+      printf("<%d>", (int)btell(f));
+      while ((t = bgetc(f)) != ~0U)
+       putchar(t);
+      printf("<%d>\n", (int)btell(f));
+    }
+  bclose(f);
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-limfd.c b/lib/fb-limfd.c

new file mode 100644 (file)

index 0000000..50bc521
--- /dev/null
+++ b/lib/fb-limfd.c
@@ -0,0 +1,75 @@
+/*
+ *     UCW Library -- Fast Buffered Input on Limited File Descriptors
+ *
+ *     (c) 2003--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <unistd.h>
+
+struct fb_limfd {
+  struct fastbuf fb;
+  int fd;                              /* File descriptor */
+  int limit;
+};
+#define FB_LIMFD(f) ((struct fb_limfd *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_LIMFD(f) + 1)
+
+static int
+bfl_refill(struct fastbuf *f)
+{
+  f->bptr = f->buffer = FB_BUFFER(f);
+  int max = MIN(FB_LIMFD(f)->limit - f->pos, f->bufend - f->buffer);
+  int l = read(FB_LIMFD(f)->fd, f->buffer, max);
+  if (l < 0)
+    die("Error reading %s: %m", f->name);
+  f->bstop = f->buffer + l;
+  f->pos += l;
+  return l;
+}
+
+static void
+bfl_close(struct fastbuf *f)
+{
+  xfree(f);
+}
+
+struct fastbuf *
+bopen_limited_fd(int fd, uns buflen, uns limit)
+{
+  struct fb_limfd *F = xmalloc(sizeof(struct fb_limfd) + buflen);
+  struct fastbuf *f = &F->fb;
+
+  bzero(F, sizeof(*F));
+  f->buffer = (char *)(F+1);
+  f->bptr = f->bstop = f->buffer;
+  f->bufend = f->buffer + buflen;
+  f->name = "limited-fd";
+  F->fd = fd;
+  F->limit = limit;
+  f->refill = bfl_refill;
+  f->close = bfl_close;
+  f->can_overwrite_buffer = 2;
+  return f;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  struct fastbuf *f = bopen_limited_fd(0, 3, 13);
+  struct fastbuf *o = bfdopen_shared(1, 16);
+  int c;
+  while ((c = bgetc(f)) >= 0)
+    bputc(o, c);
+  bclose(o);
+  bclose(f);
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-mem.c b/lib/fb-mem.c

new file mode 100644 (file)

index 0000000..c304e16
--- /dev/null
+++ b/lib/fb-mem.c
@@ -0,0 +1,221 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Memory Streams
+ *
+ *     (c) 1997--2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+
+struct memstream {
+  unsigned blocksize;
+  unsigned uc;
+  struct msblock *first;
+};
+
+struct msblock {
+  struct msblock *next;
+  sh_off_t pos;
+  unsigned size;
+  byte data[0];
+};
+
+struct fb_mem {
+  struct fastbuf fb;
+  struct memstream *stream;
+  struct msblock *block;
+};
+#define FB_MEM(f) ((struct fb_mem *)(f)->is_fastbuf)
+
+static int
+fbmem_refill(struct fastbuf *f)
+{
+  struct memstream *s = FB_MEM(f)->stream;
+  struct msblock *b = FB_MEM(f)->block;
+
+  if (!b)
+    {
+      b = s->first;
+      if (!b)
+       return 0;
+    }
+  else if (f->buffer == b->data && f->bstop < b->data + b->size)
+    {
+      f->bstop = b->data + b->size;
+      f->pos = b->pos + b->size;
+      return 1;
+    }
+  else if (!b->next)
+    return 0;
+  else
+    b = b->next;
+  if (!b->size)
+    return 0;
+  f->buffer = f->bptr = b->data;
+  f->bufend = f->bstop = b->data + b->size;
+  f->pos = b->pos + b->size;
+  FB_MEM(f)->block = b;
+  return 1;
+}
+
+static void
+fbmem_spout(struct fastbuf *f)
+{
+  struct memstream *s = FB_MEM(f)->stream;
+  struct msblock *b = FB_MEM(f)->block;
+  struct msblock *bb;
+
+  if (b)
+    {
+      b->size = f->bptr - b->data;
+      if (b->size < s->blocksize)
+       return;
+    }
+  bb = xmalloc(sizeof(struct msblock) + s->blocksize);
+  if (b)
+    {
+      b->next = bb;
+      bb->pos = b->pos + b->size;
+    }
+  else
+    {
+      s->first = bb;
+      bb->pos = 0;
+    }
+  bb->next = NULL;
+  bb->size = 0;
+  f->buffer = f->bptr = f->bstop = bb->data;
+  f->bufend = bb->data + s->blocksize;
+  f->pos = bb->pos;
+  FB_MEM(f)->block = bb;
+}
+
+static int
+fbmem_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  struct memstream *m = FB_MEM(f)->stream;
+  struct msblock *b;
+
+  ASSERT(whence == SEEK_SET || whence == SEEK_END);
+  if (whence == SEEK_END)
+    {
+      for (b=m->first; b; b=b->next)
+       pos += b->size;
+    }
+  /* Yes, this is linear. But considering the average number of buckets, it doesn't matter. */
+  for (b=m->first; b; b=b->next)
+    {
+      if (pos <= b->pos + (sh_off_t)b->size) /* <=, because we need to be able to seek just after file end */
+       {
+         f->buffer = b->data;
+         f->bptr = b->data + (pos - b->pos);
+         f->bufend = f->bstop = b->data + b->size;
+         f->pos = b->pos + b->size;
+         FB_MEM(f)->block = b;
+         return 1;
+       }
+    }
+  if (!m->first && !pos)
+    {
+      /* Seeking to offset 0 in an empty file needs an exception */
+      f->buffer = f->bptr = f->bufend = NULL;
+      f->pos = 0;
+      FB_MEM(f)->block = NULL;
+      return 1;
+    }
+  die("fbmem_seek to invalid offset");
+}
+
+static void
+fbmem_close(struct fastbuf *f)
+{
+  struct memstream *m = FB_MEM(f)->stream;
+  struct msblock *b;
+
+  if (!--m->uc)
+    {
+      while (b = m->first)
+       {
+         m->first = b->next;
+         xfree(b);
+       }
+      xfree(m);
+    }
+  xfree(f);
+}
+
+struct fastbuf *
+fbmem_create(unsigned blocksize)
+{
+  struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+  struct memstream *s = xmalloc_zero(sizeof(struct memstream));
+
+  s->blocksize = blocksize;
+  s->uc = 1;
+
+  FB_MEM(f)->stream = s;
+  f->name = "<fbmem-write>";
+  f->spout = fbmem_spout;
+  f->close = fbmem_close;
+  return f;
+}
+
+struct fastbuf *
+fbmem_clone_read(struct fastbuf *b)
+{
+  struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+  struct memstream *s = FB_MEM(b)->stream;
+
+  bflush(b);
+  s->uc++;
+
+  FB_MEM(f)->stream = s;
+  f->name = "<fbmem-read>";
+  f->refill = fbmem_refill;
+  f->seek = fbmem_seek;
+  f->close = fbmem_close;
+  f->can_overwrite_buffer = 1;
+  return f;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct fastbuf *w, *r;
+  int t;
+
+  w = fbmem_create(7);
+  r = fbmem_clone_read(w);
+  bwrite(w, "12345", 5);
+  bwrite(w, "12345", 5);
+  printf("<%d>", (int)btell(w));
+  bflush(w);
+  printf("<%d>", (int)btell(w));
+  printf("<%d>", (int)btell(r));
+  while ((t = bgetc(r)) >= 0)
+    putchar(t);
+  printf("<%d>", (int)btell(r));
+  bwrite(w, "12345", 5);
+  bwrite(w, "12345", 5);
+  printf("<%d>", (int)btell(w));
+  bclose(w);
+  bsetpos(r, 0);
+  printf("<!%d>", (int)btell(r));
+  while ((t = bgetc(r)) >= 0)
+    putchar(t);
+  bsetpos(r, 3);
+  printf("<!%d>", (int)btell(r));
+  while ((t = bgetc(r)) >= 0)
+    putchar(t);
+  fflush(stdout);
+  bclose(r);
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-mmap.c b/lib/fb-mmap.c

new file mode 100644 (file)

index 0000000..a57d103
--- /dev/null
+++ b/lib/fb-mmap.c
@@ -0,0 +1,228 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Memory-Mapped Files
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/conf.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+static uns mmap_window_size = 16*CPU_PAGE_SIZE;
+static uns mmap_extend_size = 4*CPU_PAGE_SIZE;
+
+static struct cf_section fbmm_config = {
+  CF_ITEMS {
+    CF_UNS("WindowSize", &mmap_window_size),
+    CF_UNS("ExtendSize", &mmap_extend_size),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR fbmm_init_config(void)
+{
+  cf_declare_section("FBMMap", &fbmm_config, 0);
+}
+
+struct fb_mmap {
+  struct fastbuf fb;
+  int fd;
+  int is_temp_file;
+  sh_off_t file_size;
+  sh_off_t file_extend;
+  sh_off_t window_pos;
+  uns window_size;
+  int mode;
+};
+#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf)
+
+static void
+bfmm_map_window(struct fastbuf *f)
+{
+  struct fb_mmap *F = FB_MMAP(f);
+  sh_off_t pos0 = f->pos & ~(sh_off_t)(CPU_PAGE_SIZE-1);
+  int l = MIN((sh_off_t)mmap_window_size, F->file_extend - pos0);
+  uns ll = ALIGN_TO(l, CPU_PAGE_SIZE);
+  int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE);
+
+  DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend);
+  if (ll != F->window_size && f->buffer)
+    {
+      munmap(f->buffer, F->window_size);
+      f->buffer = NULL;
+    }
+  F->window_size = ll;
+  if (!f->buffer)
+    f->buffer = sh_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0);
+  else
+    f->buffer = sh_mmap(f->buffer, ll, prot, MAP_SHARED | MAP_FIXED, F->fd, pos0);
+  if (f->buffer == (byte *) MAP_FAILED)
+    die("mmap(%s): %m", f->name);
+#ifdef MADV_SEQUENTIAL
+  if (ll > CPU_PAGE_SIZE)
+    madvise(f->buffer, ll, MADV_SEQUENTIAL);
+#endif
+  f->bufend = f->buffer + l;
+  f->bptr = f->buffer + (f->pos - pos0);
+  F->window_pos = pos0;
+}
+
+static int
+bfmm_refill(struct fastbuf *f)
+{
+  struct fb_mmap *F = FB_MMAP(f);
+
+  DBG("Refill <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+  if (f->pos >= F->file_size)
+    return 0;
+  if (f->bstop >= f->bufend)
+    bfmm_map_window(f);
+  if (F->window_pos + (f->bufend - f->buffer) > F->file_size)
+    f->bstop = f->buffer + (F->file_size - F->window_pos);
+  else
+    f->bstop = f->bufend;
+  f->pos = F->window_pos + (f->bstop - f->buffer);
+  DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+  return 1;
+}
+
+static void
+bfmm_spout(struct fastbuf *f)
+{
+  struct fb_mmap *F = FB_MMAP(f);
+  sh_off_t end = f->pos + (f->bptr - f->bstop);
+
+  DBG("Spout <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+  if (end > F->file_size)
+    F->file_size = end;
+  if (f->bptr < f->bufend)
+    return;
+  f->pos = end;
+  if (f->pos >= F->file_extend)
+    {
+      F->file_extend = ALIGN_TO(F->file_extend + mmap_extend_size, (sh_off_t)CPU_PAGE_SIZE);
+      if (sh_ftruncate(F->fd, F->file_extend))
+       die("ftruncate(%s): %m", f->name);
+    }
+  bfmm_map_window(f);
+  f->bstop = f->bptr;
+  DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+}
+
+static int
+bfmm_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+  if (whence == SEEK_END)
+    pos += FB_MMAP(f)->file_size;
+  else
+    ASSERT(whence == SEEK_SET);
+  ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size);
+  f->pos = pos;
+  f->bptr = f->bstop = f->bufend = f->buffer;  /* force refill/spout call */
+  DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+  return 1;
+}
+
+static void
+bfmm_close(struct fastbuf *f)
+{
+  struct fb_mmap *F = FB_MMAP(f);
+
+  if (f->buffer)
+    munmap(f->buffer, F->window_size);
+  if (F->file_extend > F->file_size &&
+      sh_ftruncate(F->fd, F->file_size))
+    die("ftruncate(%s): %m", f->name);
+  bclose_file_helper(f, F->fd, F->is_temp_file);
+  xfree(f);
+}
+
+static int
+bfmm_config(struct fastbuf *f, uns item, int value)
+{
+  int orig;
+
+  switch (item)
+    {
+    case BCONFIG_IS_TEMP_FILE:
+      orig = FB_MMAP(f)->is_temp_file;
+      FB_MMAP(f)->is_temp_file = value;
+      return orig;
+    default:
+      return -1;
+    }
+}
+
+struct fastbuf *
+bfmmopen_internal(int fd, const char *name, uns mode)
+{
+  int namelen = strlen(name) + 1;
+  struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen);
+  struct fastbuf *f = &F->fb;
+
+  bzero(F, sizeof(*F));
+  f->name = (byte *)(F+1);
+  memcpy(f->name, name, namelen);
+  F->fd = fd;
+  F->file_extend = F->file_size = sh_seek(fd, 0, SEEK_END);
+  if (F->file_size < 0)
+    die("seek(%s): %m", name);
+  if (mode & O_APPEND)
+    f->pos = F->file_size;
+  F->mode = mode;
+
+  f->refill = bfmm_refill;
+  f->spout = bfmm_spout;
+  f->seek = bfmm_seek;
+  f->close = bfmm_close;
+  f->config = bfmm_config;
+  return f;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  struct fb_params par = { .type = FB_MMAP };
+  struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par);
+  struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par);
+  int c;
+
+  DBG("Copying");
+  while ((c = bgetc(f)) >= 0)
+    bputc(g, c);
+  bclose(f);
+  DBG("Seek inside last block");
+  bsetpos(g, btell(g)-1333);
+  bputc(g, 13);
+  DBG("Seek to the beginning & write");
+  bsetpos(g, 1333);
+  bputc(g, 13);
+  DBG("flush");
+  bflush(g);
+  bputc(g, 13);
+  bflush(g);
+  DBG("Seek nearby & read");
+  bsetpos(g, 133);
+  bgetc(g);
+  DBG("Seek far & read");
+  bsetpos(g, 133333);
+  bgetc(g);
+  DBG("Closing");
+  bclose(g);
+
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-param.c b/lib/fb-param.c

new file mode 100644 (file)

index 0000000..854ba78
--- /dev/null
+++ b/lib/fb-param.c
@@ -0,0 +1,184 @@
+/*
+ *     UCW Library -- FastIO on files with run-time parametrization
+ *
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/lfs.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+
+struct fb_params fbpar_def = {
+  .buffer_size = 65536,
+  .read_ahead = 1,
+  .write_back = 1,
+};
+
+static char *
+fbpar_cf_commit(struct fb_params *p UNUSED)
+{
+#ifndef CONFIG_UCW_THREADS
+  if (p->type == FB_DIRECT)
+    return "Direct I/O is supported only with CONFIG_UCW_THREADS";
+#endif
+  return NULL;
+}
+
+struct cf_section fbpar_cf = {
+# define F(x) PTR_TO(struct fb_params, x)
+  CF_TYPE(struct fb_params),
+  CF_COMMIT(fbpar_cf_commit),
+  CF_ITEMS {
+    CF_LOOKUP("Type", (int *)F(type), ((char *[]){"std", "direct", "mmap", NULL})),
+    CF_UNS("BufSize", F(buffer_size)),
+    CF_UNS("KeepBackBuf", F(keep_back_buf)),
+    CF_UNS("ReadAhead", F(read_ahead)),
+    CF_UNS("WriteBack", F(write_back)),
+    CF_END
+  }
+# undef F
+};
+
+static struct cf_section fbpar_global_cf = {
+  CF_ITEMS {
+    CF_SECTION("Defaults", &fbpar_def, &fbpar_cf),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR
+fbpar_global_init(void)
+{
+  cf_declare_section("FBParam", &fbpar_global_cf, 0);
+}
+
+static struct fastbuf *
+bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name)
+{
+  char buf[32];
+  if (!name)
+    {
+      sprintf(buf, "fd%d", fd);
+      name = buf;
+    }
+  struct fastbuf *fb;
+  switch (params->type)
+    {
+#ifdef CONFIG_UCW_THREADS
+      case FB_DIRECT:
+       fb = fbdir_open_fd_internal(fd, name, params->asio,
+           params->buffer_size ? : fbpar_def.buffer_size,
+           params->read_ahead ? : fbpar_def.read_ahead,
+           params->write_back ? : fbpar_def.write_back);
+       if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0)
+          msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
+       return fb;
+#endif
+      case FB_STD:
+       fb = bfdopen_internal(fd, name,
+           params->buffer_size ? : fbpar_def.buffer_size);
+       if (params->keep_back_buf)
+         bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1);
+       return fb;
+      case FB_MMAP:
+       if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0)
+          die("Cannot get flags of fd %d: %m", fd);
+       return bfmmopen_internal(fd, name, mode);
+      default:
+       ASSERT(0);
+    }
+}
+
+static struct fastbuf *
+bopen_file_internal(const char *name, int mode, struct fb_params *params, int try)
+{
+  if (!params)
+    params = &fbpar_def;
+#ifdef CONFIG_UCW_THREADS
+  if (params->type == FB_DIRECT && !fbdir_cheat)
+    mode |= O_DIRECT;
+#endif
+  if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY)
+    mode = (mode & ~O_ACCMODE) | O_RDWR;
+  int fd = sh_open(name, mode, 0666);
+  if (fd < 0)
+    if (try)
+      return NULL;
+    else
+      die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name);
+  struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name);
+  ASSERT(fb);
+  if (mode & O_APPEND)
+    bseek(fb, 0, SEEK_END);
+  return fb;
+}
+
+struct fastbuf *
+bopen_file(const char *name, int mode, struct fb_params *params)
+{
+  return bopen_file_internal(name, mode, params, 0);
+}
+
+struct fastbuf *
+bopen_file_try(const char *name, int mode, struct fb_params *params)
+{
+  return bopen_file_internal(name, mode, params, 1);
+}
+
+struct fastbuf *
+bopen_fd(int fd, struct fb_params *params)
+{
+  return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL);
+}
+
+/* Function for use by individual file back-ends */
+
+void
+bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file)
+{
+  switch (is_temp_file)
+    {
+    case 1:
+      if (unlink(f->name) < 0)
+       msg(L_ERROR, "unlink(%s): %m", f->name);
+    case 0:
+      if (close(fd))
+       die("close(%s): %m", f->name);
+    }
+}
+
+/* Compatibility wrappers */
+
+struct fastbuf *
+bopen_try(const char *name, uns mode, uns buflen)
+{
+  return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bopen(const char *name, uns mode, uns buflen)
+{
+  return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen(int fd, uns buflen)
+{
+  return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen_shared(int fd, uns buflen)
+{
+  struct fastbuf *f = bfdopen(fd, buflen);
+  bconfig(f, BCONFIG_IS_TEMP_FILE, 2);
+  return f;
+}
diff --git a/lib/fb-pool.c b/lib/fb-pool.c

new file mode 100644 (file)

index 0000000..e847b7d
--- /dev/null
+++ b/lib/fb-pool.c
@@ -0,0 +1,81 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Memory Pools
+ *
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf)
+
+static void
+fbpool_spout(struct fastbuf *b)
+{
+  if (b->bptr >= b->bufend)
+    {
+      uns len = b->bufend - b->buffer;
+      b->buffer = mp_expand(FB_POOL(b)->mp);
+      b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp);
+      b->bstop = b->buffer;
+      b->bptr = b->buffer + len;
+    }
+}
+
+void
+fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size)
+{
+  b->mp = mp;
+  b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size);
+  b->fb.bufend = b->fb.buffer + mp_avail(mp);
+}
+
+void *
+fbpool_end(struct fbpool *b)
+{
+  return mp_end(b->mp, b->fb.bptr); 
+}
+
+void
+fbpool_init(struct fbpool *b)
+{
+  bzero(b, sizeof(*b));
+  b->fb.name = "<fbpool>";
+  b->fb.spout = fbpool_spout;
+  b->fb.can_overwrite_buffer = 1;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct mempool *mp;
+  struct fbpool fb;
+  byte *p;
+  uns l;
+  
+  mp = mp_new(64);
+  fbpool_init(&fb);
+  fbpool_start(&fb, mp, 16);
+  for (uns i = 0; i < 1024; i++)
+    bprintf(&fb.fb, "<hello>");
+  p = fbpool_end(&fb);
+  l = mp_size(mp, p);
+  if (l != 1024 * 7)
+    ASSERT(0);
+  for (uns i = 0; i < 1024; i++)
+    if (memcmp(p + i * 7, "<hello>", 7))
+      ASSERT(0);
+  mp_delete(mp);
+  
+  return 0;
+}
+
+#endif
diff --git a/lib/fb-temp.c b/lib/fb-temp.c

new file mode 100644 (file)

index 0000000..1975c8c
--- /dev/null
+++ b/lib/fb-temp.c
@@ -0,0 +1,86 @@
+/*
+ *     UCW Library -- Temporary Fastbufs
+ *
+ *     (c) 2002--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/threads.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+static char *temp_prefix = "/tmp/temp";
+
+static struct cf_section temp_config = {
+  CF_ITEMS {
+    CF_STRING("Prefix", &temp_prefix),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR temp_global_init(void)
+{
+  cf_declare_section("Tempfiles", &temp_config, 0);
+}
+
+void
+temp_file_name(char *buf)
+{
+  struct ucwlib_context *ctx = ucwlib_thread_context();
+  int cnt = ++ctx->temp_counter;
+  int pid = getpid();
+  if (ctx->thread_id == pid)
+    sprintf(buf, "%s%d-%d", temp_prefix, pid, cnt);
+  else
+    sprintf(buf, "%s%d-%d-%d", temp_prefix, pid, ctx->thread_id, cnt);
+}
+
+struct fastbuf *
+bopen_tmp_file(struct fb_params *params)
+{
+  char name[TEMP_FILE_NAME_LEN];
+  temp_file_name(name);
+  struct fastbuf *fb = bopen_file(name, O_RDWR | O_CREAT | O_TRUNC, params);
+  bconfig(fb, BCONFIG_IS_TEMP_FILE, 1);
+  return fb;
+}
+
+struct fastbuf *
+bopen_tmp(uns buflen)
+{
+  return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+void bfix_tmp_file(struct fastbuf *fb, const char *name)
+{
+  int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0);
+  ASSERT(was_temp == 1);
+  if (rename(fb->name, name))
+    die("Cannot rename %s to %s: %m", fb->name, name);
+  bclose(fb);
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+  log_init(NULL);
+  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+    die("Hey, whaddya want?");
+
+  struct fastbuf *f = bopen_tmp(65536);
+  bputsn(f, "Hello, world!");
+  bclose(f);
+  return 0;
+}
+
+#endif
diff --git a/lib/ff-binary.c b/lib/ff-binary.c

new file mode 100644 (file)

index 0000000..81d36df
--- /dev/null
+++ b/lib/ff-binary.c
@@ -0,0 +1,34 @@
+/*
+ *     UCW Library -- Fast Buffered I/O: Binary Numbers
+ *
+ *     (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+
+#define GEN(type, name, size, endian)                          \
+type bget##name##_##endian##_slow(struct fastbuf *f)           \
+{                                                              \
+  byte buf[size/8];                                            \
+  if (bread(f, buf, sizeof(buf)) != sizeof(buf))               \
+    return ~(type)0;                                           \
+  return get_u##size##_##endian(buf);                          \
+}                                                              \
+void bput##name##_##endian##_##slow(struct fastbuf *f, type x) \
+{                                                              \
+  byte buf[size/8];                                            \
+  put_u##size##_##endian(buf, x);                              \
+  bwrite_slow(f, buf, sizeof(buf));                            \
+}
+
+#define FF_ALL(type, name, size) GEN(type,name,size,be) GEN(type,name,size,le)
+
+FF_ALL(int, w, 16)
+FF_ALL(uns, l, 32)
+FF_ALL(u64, q, 64)
+FF_ALL(u64, 5, 40)
diff --git a/lib/ff-binary.h b/lib/ff-binary.h

new file mode 100644 (file)

index 0000000..171ff81
--- /dev/null
+++ b/lib/ff-binary.h
@@ -0,0 +1,81 @@
+/*
+ *     UCW Library -- Fast Buffered I/O on Binary Values
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_BINARY_H
+#define _UCW_FF_BINARY_H
+
+#include "lib/fastbuf.h"
+#include "lib/unaligned.h"
+
+#ifdef CPU_BIG_ENDIAN
+#define FF_ENDIAN be
+#else
+#define FF_ENDIAN le
+#endif
+
+#define GET_FUNC(type, name, bits, endian)                     \
+  type bget##name##_##endian##_slow(struct fastbuf *f);                \
+  static inline type bget##name##_##endian(struct fastbuf *f)  \
+  {                                                            \
+    if (bavailr(f) >= bits/8)                                  \
+      {                                                                \
+       type w = get_u##bits##_##endian(f->bptr);               \
+       f->bptr += bits/8;                                      \
+       return w;                                               \
+      }                                                                \
+    else                                                       \
+      return bget##name##_##endian##_slow(f);                  \
+  }
+
+#define PUT_FUNC(type, name, bits, endian)                     \
+  void bput##name##_##endian##_slow(struct fastbuf *f, type x);        \
+  static inline void bput##name##_##endian(struct fastbuf *f, type x)  \
+  {                                                            \
+    if (bavailw(f) >= bits/8)                                  \
+      {                                                                \
+       put_u##bits##_##endian(f->bptr, x);                     \
+       f->bptr += bits/8;                                      \
+      }                                                                \
+    else                                                       \
+      return bput##name##_##endian##_slow(f, x);               \
+  }
+
+#define FF_ALL_X(type, name, bits, defendian)                  \
+  GET_FUNC(type, name, bits, be)                               \
+  GET_FUNC(type, name, bits, le)                               \
+  PUT_FUNC(type, name, bits, be)                               \
+  PUT_FUNC(type, name, bits, le)                               \
+  static inline type bget##name(struct fastbuf *f) { return bget##name##_##defendian(f); }             \
+  static inline void bput##name(struct fastbuf *f, type x) { bput##name##_##defendian(f, x); }
+
+#define FF_ALL(type, name, bits, defendian) FF_ALL_X(type, name, bits, defendian)
+
+FF_ALL(int, w, 16, FF_ENDIAN)
+FF_ALL(uns, l, 32, FF_ENDIAN)
+FF_ALL(u64, q, 64, FF_ENDIAN)
+FF_ALL(u64, 5, 40, FF_ENDIAN)
+
+#undef GET_FUNC
+#undef PUT_FUNC
+#undef FF_ENDIAN
+#undef FF_ALL_X
+#undef FF_ALL
+
+/* I/O on uintptr_t (only native endianity) */
+
+#ifdef CPU_64BIT_POINTERS
+#define bputa(x,p) bputq(x,p)
+#define bgeta(x) bgetq(x)
+#else
+#define bputa(x,p) bputl(x,p)
+#define bgeta(x) bgetl(x)
+#endif
+
+#endif
diff --git a/lib/ff-printf.c b/lib/ff-printf.c

new file mode 100644 (file)

index 0000000..0493092
--- /dev/null
+++ b/lib/ff-printf.c
@@ -0,0 +1,83 @@
+/*
+ *     UCW Library -- Printf on Fastbuf Streams
+ *
+ *     (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <alloca.h>
+
+int
+vbprintf(struct fastbuf *b, const char *msg, va_list args)
+{
+  byte *buf;
+  int len, r;
+  va_list args2;
+
+  len = bdirect_write_prepare(b, &buf);
+  if (len >= 16)
+    {
+      va_copy(args2, args);
+      r = vsnprintf(buf, len, msg, args2);
+      va_end(args2);
+      if (r < 0)
+       len = 256;
+      else if (r < len)
+       {
+         bdirect_write_commit(b, buf+r);
+         return r;
+       }
+      else
+       len = r+1;
+    }
+  else
+    len = 256;
+
+  while (1)
+    {
+      buf = alloca(len);
+      va_copy(args2, args);
+      r = vsnprintf(buf, len, msg, args2);
+      va_end(args2);
+      if (r < 0)
+       len += len;
+      else if (r < len)
+       {
+         bwrite(b, buf, r);
+         return r;
+       }
+      else
+       len = r+1;
+    }
+}
+
+int
+bprintf(struct fastbuf *b, const char *msg, ...)
+{
+  va_list args;
+  int res;
+
+  va_start(args, msg);
+  res = vbprintf(b, msg, args);
+  va_end(args);
+  return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct fastbuf *b = bfdopen_shared(1, 65536);
+  for (int i=0; i<10000; i++)
+    bprintf(b, "13=%d str=<%s> msg=%m\n", 13, "str");
+  bclose(b);
+  return 0;
+}
+
+#endif
diff --git a/lib/ff-string.c b/lib/ff-string.c

new file mode 100644 (file)

index 0000000..913c335
--- /dev/null
+++ b/lib/ff-string.c
@@ -0,0 +1,286 @@
+/*
+ *     UCW Library -- Fast Buffered I/O: Strings
+ *
+ *     (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *     (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/bbuf.h"
+
+char *                                 /* Non-standard */
+bgets(struct fastbuf *f, char *b, uns l)
+{
+  ASSERT(l);
+  byte *src;
+  uns src_len = bdirect_read_prepare(f, &src);
+  if (!src_len)
+    return NULL;
+  do
+    {
+      uns cnt = MIN(l, src_len);
+      for (uns i = cnt; i--;)
+        {
+         byte v = *src++;
+         if (v == '\n')
+           {
+              bdirect_read_commit(f, src);
+             goto exit;
+           }
+         *b++ = v;
+       }
+      if (unlikely(cnt == l))
+        die("%s: Line too long", f->name);
+      l -= cnt;
+      bdirect_read_commit(f, src);
+      src_len = bdirect_read_prepare(f, &src);
+    }
+  while (src_len);
+exit:
+  *b = 0;
+  return b;
+}
+
+int
+bgets_nodie(struct fastbuf *f, char *b, uns l)
+{
+  ASSERT(l);
+  byte *src, *start = b;
+  uns src_len = bdirect_read_prepare(f, &src);
+  if (!src_len)
+    return 0;
+  do
+    {
+      uns cnt = MIN(l, src_len);
+      for (uns i = cnt; i--;)
+        {
+         byte v = *src++;
+         if (v == '\n')
+           {
+             bdirect_read_commit(f, src);
+             goto exit;
+           }
+         *b++ = v;
+       }
+      bdirect_read_commit(f, src);
+      if (cnt == l)
+        return -1;
+      l -= cnt;
+      src_len = bdirect_read_prepare(f, &src);
+    }
+  while (src_len);
+exit:
+  *b++ = 0;
+  return b - (char *)start;
+}
+
+uns
+bgets_bb(struct fastbuf *f, struct bb_t *bb, uns limit)
+{
+  ASSERT(limit);
+  byte *src;
+  uns src_len = bdirect_read_prepare(f, &src);
+  if (!src_len)
+    return 0;
+  bb_grow(bb, 1);
+  byte *buf = bb->ptr;
+  uns len = 0, buf_len = MIN(bb->len, limit);
+  do
+    {
+      uns cnt = MIN(src_len, buf_len);
+      for (uns i = cnt; i--;)
+        {
+         byte v = *src++;
+         if (v == '\n')
+           {
+              bdirect_read_commit(f, src);
+             goto exit;
+           }
+         *buf++ = v;
+       }
+      len += cnt;
+      if (cnt == src_len)
+        {
+         bdirect_read_commit(f, src);
+         src_len = bdirect_read_prepare(f, &src);
+       }
+      else
+       src_len -= cnt;
+      if (cnt == buf_len)
+        {
+         if (unlikely(len == limit))
+            die("%s: Line too long", f->name);
+         bb_do_grow(bb, len + 1);
+         buf = bb->ptr + len;
+         buf_len = MIN(bb->len, limit) - len;
+       }
+      else
+       buf_len -= cnt;
+    }
+  while (src_len);
+exit:
+  *buf++ = 0;
+  return buf - bb->ptr;
+}
+
+char *
+bgets_mp(struct fastbuf *f, struct mempool *mp)
+{
+  byte *src;
+  uns src_len = bdirect_read_prepare(f, &src);
+  if (!src_len)
+    return NULL;
+#define BLOCK_SIZE (4096 - sizeof(void *))
+  struct block {
+    struct block *prev;
+    byte data[BLOCK_SIZE];
+  } *blocks = NULL;
+  uns sum = 0, buf_len = BLOCK_SIZE, cnt;
+  struct block first_block, *new_block = &first_block;
+  byte *buf = new_block->data;
+  do
+    {
+      cnt = MIN(src_len, buf_len);
+      for (uns i = cnt; i--;)
+        {
+         byte v = *src++;
+         if (v == '\n')
+           {
+              bdirect_read_commit(f, src);
+             goto exit;
+           }
+         *buf++ = v;
+       }
+      if (cnt == src_len)
+        {
+         bdirect_read_commit(f, src);
+         src_len = bdirect_read_prepare(f, &src);
+       }
+      else
+       src_len -= cnt;
+      if (cnt == buf_len)
+        {
+          new_block->prev = blocks;
+          blocks = new_block;
+          sum += buf_len = BLOCK_SIZE;
+         new_block = alloca(sizeof(struct block));
+         buf = new_block->data;
+       }
+      else
+       buf_len -= cnt;
+    }
+  while (src_len);
+exit: ;
+  uns len = buf - new_block->data;
+  byte *result = mp_alloc(mp, sum + len + 1) + sum;
+  result[len] = 0;
+  memcpy(result, new_block->data, len);
+  while (blocks)
+    {
+      result -= BLOCK_SIZE;
+      memcpy(result, blocks->data, BLOCK_SIZE);
+      blocks = blocks->prev;
+    }
+  return result;
+#undef BLOCK_SIZE
+}
+
+void
+bgets_stk_init(struct bgets_stk_struct *s)
+{
+  s->src_len = bdirect_read_prepare(s->f, &s->src);
+  if (!s->src_len)
+    {
+      s->cur_buf = NULL;
+      s->cur_len = 0;
+    }
+  else
+    {
+      s->old_buf = NULL;
+      s->cur_len = 256;
+    }
+}
+
+void
+bgets_stk_step(struct bgets_stk_struct *s)
+{
+  byte *buf = s->cur_buf;
+  uns buf_len = s->cur_len;
+  if (s->old_buf)
+    {
+      memcpy( s->cur_buf, s->old_buf, s->old_len);
+      buf += s->old_len;
+      buf_len -= s->old_len;
+    }
+  do
+    {
+      uns cnt = MIN(s->src_len, buf_len);
+      for (uns i = cnt; i--;)
+        {
+         byte v = *s->src++;
+         if (v == '\n')
+           {
+              bdirect_read_commit(s->f, s->src);
+             goto exit;
+           }
+         *buf++ = v;
+       }
+      if (cnt == s->src_len)
+        {
+         bdirect_read_commit(s->f, s->src);
+         s->src_len = bdirect_read_prepare(s->f, &s->src);
+       }
+      else
+       s->src_len -= cnt;
+      if (cnt == buf_len)
+        {
+         s->old_len = s->cur_len;
+         s->old_buf = s->cur_buf;
+         s->cur_len *= 2;
+         return;
+       }
+      else
+       buf_len -= cnt;
+    }
+  while (s->src_len);
+exit:
+  *buf = 0;
+  s->cur_len = 0;
+}
+
+char *
+bgets0(struct fastbuf *f, char *b, uns l)
+{
+  ASSERT(l);
+  byte *src;
+  uns src_len = bdirect_read_prepare(f, &src);
+  if (!src_len)
+    return NULL;
+  do
+    {
+      uns cnt = MIN(l, src_len);
+      for (uns i = cnt; i--;)
+        {
+         *b = *src++;
+         if (!*b)
+           {
+              bdirect_read_commit(f, src);
+             return b;
+           }
+         b++;
+       }
+      if (unlikely(cnt == l))
+        die("%s: Line too long", f->name);
+      l -= cnt;
+      bdirect_read_commit(f, src);
+      src_len = bdirect_read_prepare(f, &src);
+    }
+  while (src_len);
+  *b = 0;
+  return b;
+}
diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c

new file mode 100644 (file)

index 0000000..6057e24
--- /dev/null
+++ b/lib/ff-unicode.c
@@ -0,0 +1,346 @@
+/*
+ *     UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ *     (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/unicode.h"
+#include "lib/ff-unicode.h"
+#include "lib/ff-binary.h"
+
+/*** UTF-8 ***/
+
+int
+bget_utf8_slow(struct fastbuf *b, uns repl)
+{
+  int c = bgetc(b);
+  int code;
+
+  if (c < 0x80)                                /* Includes EOF */
+    return c;
+  if (c < 0xc0)                                /* Incorrect combination */
+    return repl;
+  if (c >= 0xf0)                       /* Too large, skip it */
+    {
+      while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+       ;
+      goto wrong;
+    }
+  if (c >= 0xe0)                       /* 3 bytes */
+    {
+      code = c & 0x0f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  else                                 /* 2 bytes */
+    {
+      code = c & 0x1f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  return code;
+
+ wrong:
+  if (c >= 0)
+    bungetc(b);
+  return repl;
+}
+
+int
+bget_utf8_32_slow(struct fastbuf *b, uns repl)
+{
+  int c = bgetc(b);
+  int code;
+  int nr;
+
+  if (c < 0x80)                                /* Includes EOF */
+    return c;
+  if (c < 0xc0)                                /* Incorrect combination */
+    return repl;
+  if (c < 0xe0)
+    {
+      code = c & 0x1f;
+      nr = 1;
+    }
+  else if (c < 0xf0)
+    {
+      code = c & 0x0f;
+      nr = 2;
+    }
+  else if (c < 0xf8)
+    {
+      code = c & 0x07;
+      nr = 3;
+    }
+  else if (c < 0xfc)
+    {
+      code = c & 0x03;
+      nr = 4;
+    }
+  else if (c < 0xfe)
+    {
+      code = c & 0x01;
+      nr = 5;
+    }
+  else                                 /* Too large, skip it */
+    {
+      while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+       ;
+      goto wrong;
+    }
+  while (nr-- > 0)
+    {
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  return code;
+
+ wrong:
+  if (c >= 0)
+    bungetc(b);
+  return repl;
+}
+
+void
+bput_utf8_slow(struct fastbuf *b, uns u)
+{
+  ASSERT(u < 65536);
+  if (u < 0x80)
+    bputc(b, u);
+  else
+    {
+      if (u < 0x800)
+       bputc(b, 0xc0 | (u >> 6));
+      else
+       {
+         bputc(b, 0xe0 | (u >> 12));
+         bputc(b, 0x80 | ((u >> 6) & 0x3f));
+       }
+      bputc(b, 0x80 | (u & 0x3f));
+    }
+}
+
+void
+bput_utf8_32_slow(struct fastbuf *b, uns u)
+{
+  ASSERT(u < (1U<<31));
+  if (u < 0x80)
+    bputc(b, u);
+  else
+    {
+      if (u < 0x800)
+       bputc(b, 0xc0 | (u >> 6));
+      else
+       {
+         if (u < (1<<16))
+           bputc(b, 0xe0 | (u >> 12));
+         else
+           {
+             if (u < (1<<21))
+               bputc(b, 0xf0 | (u >> 18));
+             else
+               {
+                 if (u < (1<<26))
+                   bputc(b, 0xf8 | (u >> 24));
+                 else
+                   {
+                     bputc(b, 0xfc | (u >> 30));
+                     bputc(b, 0x80 | ((u >> 24) & 0x3f));
+                   }
+                 bputc(b, 0x80 | ((u >> 18) & 0x3f));
+               }
+             bputc(b, 0x80 | ((u >> 12) & 0x3f));
+           }
+         bputc(b, 0x80 | ((u >> 6) & 0x3f));
+       }
+      bputc(b, 0x80 | (u & 0x3f));
+    }
+}
+
+/*** UTF-16 ***/
+
+int
+bget_utf16_be_slow(struct fastbuf *b, uns repl)
+{
+  if (bpeekc(b) < 0)
+    return -1;
+  uns u = bgetw_be(b), x, y;
+  if ((int)u < 0)
+    return repl;
+  if ((x = u - 0xd800) >= 0x800)
+    return u;
+  if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
+    return repl;
+  return 0x10000 + (x << 10) + y;
+}
+
+int
+bget_utf16_le_slow(struct fastbuf *b, uns repl)
+{
+  if (bpeekc(b) < 0)
+    return -1;
+  uns u = bgetw_le(b), x, y;
+  if ((int)u < 0)
+    return repl;
+  if ((x = u - 0xd800) >= 0x800)
+    return u;
+  if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
+    return repl;
+  return 0x10000 + (x << 10) + y;
+}
+
+void
+bput_utf16_be_slow(struct fastbuf *b, uns u)
+{
+  if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+    {
+      bputc(b, u >> 8);
+      bputc(b, u & 0xff);
+    }
+  else if ((u -= 0x10000) < 0x100000)
+    {
+      bputc(b, 0xd8 | (u >> 18));
+      bputc(b, (u >> 10) & 0xff);
+      bputc(b, 0xdc | ((u >> 8) & 0x3));
+      bputc(b, u & 0xff);
+    }
+  else
+    ASSERT(0);
+}
+
+void
+bput_utf16_le_slow(struct fastbuf *b, uns u)
+{
+  if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+    {
+      bputc(b, u & 0xff);
+      bputc(b, u >> 8);
+    }
+  else if ((u -= 0x10000) < 0x100000)
+    {
+      bputc(b, (u >> 10) & 0xff);
+      bputc(b, 0xd8 | (u >> 18));
+      bputc(b, u & 0xff);
+      bputc(b, 0xdc | ((u >> 8) & 0x3));
+    }
+  else
+    ASSERT(0);
+}
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+#define FUNCS \
+  F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
+  F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
+
+  enum {
+#define F(x) FUNC_##x,
+    FUNCS
+#undef F
+  };
+  char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+    FUNCS
+#undef F
+  };
+
+  uns func = ~0U;
+  if (argc > 1)
+    for (uns i = 0; i < ARRAY_SIZE(names); i++)
+      if (!strcasecmp(names[i], argv[1]))
+       func = i;
+  if (!~func)
+    {
+      fprintf(stderr, "Invalid usage!\n");
+      return 1;
+    }
+
+  struct fastbuf *b = fbgrow_create(8);
+  if (func < FUNC_BPUT_UTF8)
+    {
+      uns u;
+      while (scanf("%x", &u) == 1)
+       bputc(b, u);
+      fbgrow_rewind(b);
+      while (bpeekc(b) >= 0)
+        {
+         if (btell(b))
+           putchar(' ');
+         switch (func)
+           {
+             case FUNC_BGET_UTF8:
+               u = bget_utf8_slow(b, UNI_REPLACEMENT);
+               break;
+             case FUNC_BGET_UTF8_32:
+               u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
+               break;
+             case FUNC_BGET_UTF16_BE:
+               u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
+               break;
+             case FUNC_BGET_UTF16_LE:
+               u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
+               break;
+             default:
+               ASSERT(0);
+           }
+         printf("%04x", u);
+       }
+      putchar('\n');
+    }
+  else
+    {
+      uns u, i = 0;
+      while (scanf("%x", &u) == 1)
+        {
+         switch (func)
+           {
+             case FUNC_BPUT_UTF8:
+               bput_utf8_slow(b, u);
+               break;
+             case FUNC_BPUT_UTF8_32:
+               bput_utf8_32_slow(b, u);
+               break;
+             case FUNC_BPUT_UTF16_BE:
+                bput_utf16_be_slow(b, u);
+               break;
+             case FUNC_BPUT_UTF16_LE:
+               bput_utf16_le_slow(b, u);
+               break;
+             default:
+               ASSERT(0);
+           }
+         fbgrow_rewind(b);
+         u = 0;
+         while (bpeekc(b) >= 0)
+           {
+             if (i++)
+               putchar(' ');
+             printf("%02x", bgetc(b));
+           }
+         fbgrow_reset(b);
+       }
+      putchar('\n');
+    }
+  bclose(b);
+
+  return 0;
+}
+
+#endif
diff --git a/lib/ff-unicode.h b/lib/ff-unicode.h

new file mode 100644 (file)

index 0000000..af39bbd
--- /dev/null
+++ b/lib/ff-unicode.h
@@ -0,0 +1,144 @@
+/*
+ *     UCW Library: Reading and writing of UTF-8 and UTF-16 on Fastbuf Streams
+ *
+ *     (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *     (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_UNICODE_H
+#define _UCW_FF_UNICODE_H
+
+#include "lib/fastbuf.h"
+#include "lib/unicode.h"
+
+/*** UTF-8 ***/
+
+int bget_utf8_slow(struct fastbuf *b, uns repl);
+int bget_utf8_32_slow(struct fastbuf *b, uns repl);
+void bput_utf8_slow(struct fastbuf *b, uns u);
+void bput_utf8_32_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf8_repl(struct fastbuf *b, uns repl)
+{
+  uns u;
+  if (bavailr(b) >= 3)
+    {
+      b->bptr = utf8_get_repl(b->bptr, &u, repl);
+      return u;
+    }
+  else
+    return bget_utf8_slow(b, repl);
+}
+
+static inline int
+bget_utf8_32_repl(struct fastbuf *b, uns repl)
+{
+  uns u;
+  if (bavailr(b) >= 6)
+    {
+      b->bptr = utf8_32_get_repl(b->bptr, &u, repl);
+      return u;
+    }
+  else
+    return bget_utf8_32_slow(b, repl);
+}
+
+static inline int
+bget_utf8(struct fastbuf *b)
+{
+  return bget_utf8_repl(b, UNI_REPLACEMENT);
+}
+
+static inline int
+bget_utf8_32(struct fastbuf *b)
+{
+  return bget_utf8_32_repl(b, UNI_REPLACEMENT);
+}
+
+static inline void
+bput_utf8(struct fastbuf *b, uns u)
+{
+  if (bavailw(b) >= 3)
+    b->bptr = utf8_put(b->bptr, u);
+  else
+    bput_utf8_slow(b, u);
+}
+
+static inline void
+bput_utf8_32(struct fastbuf *b, uns u)
+{
+  if (bavailw(b) >= 6)
+    b->bptr = utf8_32_put(b->bptr, u);
+  else
+    bput_utf8_32_slow(b, u);
+}
+
+/*** UTF-16 ***/
+
+int bget_utf16_be_slow(struct fastbuf *b, uns repl);
+int bget_utf16_le_slow(struct fastbuf *b, uns repl);
+void bput_utf16_be_slow(struct fastbuf *b, uns u);
+void bput_utf16_le_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf16_be_repl(struct fastbuf *b, uns repl)
+{
+  uns u;
+  if (bavailr(b) >= 4)
+    {
+      b->bptr = utf16_be_get_repl(b->bptr, &u, repl);
+      return u;
+    }
+  else
+    return bget_utf16_be_slow(b, repl);
+}
+
+static inline int
+bget_utf16_le_repl(struct fastbuf *b, uns repl)
+{
+  uns u;
+  if (bavailr(b) >= 4)
+    {
+      b->bptr = utf16_le_get_repl(b->bptr, &u, repl);
+      return u;
+    }
+  else
+    return bget_utf16_le_slow(b, repl);
+}
+
+static inline int
+bget_utf16_be(struct fastbuf *b)
+{
+  return bget_utf16_be_repl(b, UNI_REPLACEMENT);
+}
+
+static inline int
+bget_utf16_le(struct fastbuf *b)
+{
+  return bget_utf16_le_repl(b, UNI_REPLACEMENT);
+}
+
+static inline void
+bput_utf16_be(struct fastbuf *b, uns u)
+{
+  if (bavailw(b) >= 4)
+    b->bptr = utf16_be_put(b->bptr, u);
+  else
+    bput_utf16_be_slow(b, u);
+}
+
+static inline void
+bput_utf16_lbe(struct fastbuf *b, uns u)
+{
+  if (bavailw(b) >= 4)
+    b->bptr = utf16_le_put(b->bptr, u);
+  else
+    bput_utf16_le_slow(b, u);
+}
+
+#endif
diff --git a/lib/ff-unicode.t b/lib/ff-unicode.t

new file mode 100644 (file)

index 0000000..2a8b3df
--- /dev/null
+++ b/lib/ff-unicode.t
@@ -0,0 +1,41 @@
+# Tests for the Unicode module
+
+Name:  bput_utf8
+Run:   ../obj/lib/ff-unicode-t bput_utf8
+In:    0041 0048 004f 004a
+Out:   41 48 4f 4a
+
+Name:   bget_utf8_32
+Run:    ../obj/lib/ff-unicode-t bget_utf8_32
+In:     fe 83 81
+Out:    fffc
+
+Name:   bput_utf16_be
+Run:    ../obj/lib/ff-unicode-t bput_utf16_be
+In:     0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out:    00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name:   bput_utf16_le
+Run:    ../obj/lib/ff-unicode-t bput_utf16_le
+In:     0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out:    41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name:   bget_utf16_be (1)
+Run:    ../obj/lib/ff-unicode-t bget_utf16_be
+In:     00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out:    0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name:   bget_utf16_be (2)
+Run:    ../obj/lib/ff-unicode-t bget_utf16_be
+In:     dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out:    fffc 2a5f fffc 2a5f fffc
+
+Name:   bget_utf16_le (1)
+Run:    ../obj/lib/ff-unicode-t bget_utf16_le
+In:     41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out:    0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name:   bget_utf16_le (2)
+Run:    ../obj/lib/ff-unicode-t bget_utf16_le
+In:     1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out:    fffc 2a5f fffc 2a5f fffc
diff --git a/lib/ff-utf8.h b/lib/ff-utf8.h

new file mode 100644 (file)

index 0000000..af7543f
--- /dev/null
+++ b/lib/ff-utf8.h
@@ -0,0 +1,15 @@
+/*
+ *     UCW Library: An alias for lib/ff-unicode.h (for backwards compatibility)
+ *
+ *     (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_UTF8_H
+#define _UCW_FF_UTF8_H
+
+#include "lib/ff-unicode.h"
+
+#endif
diff --git a/lib/gbuf.h b/lib/gbuf.h

new file mode 100644 (file)

index 0000000..daf0bfb
--- /dev/null
+++ b/lib/gbuf.h
@@ -0,0 +1,71 @@
+/*
+ *     UCW Library -- A simple growing buffer
+ *
+ *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *     (c) 2005, Martin Mares <mj@ucw.cz>
+ *
+ *     Define the following macros:
+ *
+ *     GBUF_TYPE       data type of records stored in the buffer
+ *     GBUF_PREFIX(x)  add a name prefix to all global symbols
+ *     GBUF_TRACE(msg...) log growing of buffer [optional]
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#define        BUF_T   GBUF_PREFIX(t)
+
+typedef struct BUF_T
+{
+  uns len;
+  GBUF_TYPE *ptr;
+}
+BUF_T;
+
+static inline void
+GBUF_PREFIX(init)(BUF_T *b)
+{
+  b->ptr = NULL;
+  b->len = 0;
+}
+
+static void UNUSED
+GBUF_PREFIX(done)(BUF_T *b)
+{
+  if (b->ptr)
+    xfree(b->ptr);
+  b->ptr = NULL;
+  b->len = 0;
+}
+
+static void UNUSED
+GBUF_PREFIX(set_size)(BUF_T *b, uns len)
+{
+  b->len = len;
+  b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE));
+#ifdef GBUF_TRACE
+  GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len);
+#endif
+}
+
+static void UNUSED
+GBUF_PREFIX(do_grow)(BUF_T *b, uns len)
+{
+  if (len < 2*b->len)                  // to ensure logarithmic cost
+    len = 2*b->len;
+  GBUF_PREFIX(set_size)(b, len);
+}
+
+static inline GBUF_TYPE *
+GBUF_PREFIX(grow)(BUF_T *b, uns len)
+{
+  if (unlikely(len > b->len))
+    GBUF_PREFIX(do_grow)(b, len);
+  return b->ptr;
+}
+
+#undef GBUF_TYPE
+#undef GBUF_PREFIX
+#undef  GBUF_TRACE
+#undef BUF_T
diff --git a/lib/getopt.c b/lib/getopt.c

new file mode 100644 (file)

index 0000000..a15c935
--- /dev/null
+++ b/lib/getopt.c
@@ -0,0 +1,57 @@
+#include "lib/lib.h"
+#include "lib/getopt.h"
+
+void
+reset_getopt(void)
+{
+  // Should work on GNU libc
+  optind = 0;
+}
+
+#ifdef TEST
+#include <stdio.h>
+
+static void
+parse(int argc, char **argv)
+{
+  static struct option longopts[] = {
+    { "longa", 0, 0, 'a' },
+    { "longb", 0, 0, 'b' },
+    { "longc", 1, 0, 'c' },
+    { "longd", 1, 0, 'd' },
+    { 0, 0, 0, 0 }
+  };
+  int opt;
+  while ((opt = getopt_long(argc, argv, "abc:d:", longopts, NULL)) >= 0)
+    switch (opt)
+      {
+       case 'a':
+       case 'b':
+         printf("option %c\n", opt);
+         break;
+       case 'c':
+       case 'd':
+         printf("option %c with value `%s'\n", opt, optarg);
+         break;
+       case '?':
+         printf("unknown option\n");
+         break;
+       default:
+         printf("getopt returned unexpected char 0x%02x\n", opt);
+         break;
+      }
+  if (optind != argc)
+    printf("%d nonoption arguments\n", argc - optind);
+}
+
+int
+main(int argc, char **argv)
+{
+  opterr = 0;
+  parse(argc, argv);
+  printf("reset\n");
+  reset_getopt();
+  parse(argc, argv);
+  return 0;
+}
+#endif
diff --git a/lib/getopt.h b/lib/getopt.h

new file mode 100644 (file)

index 0000000..b4ff823
--- /dev/null
+++ b/lib/getopt.h
@@ -0,0 +1,92 @@
+/*
+ *     UCW Library -- Parsing of configuration and command-line options
+ *
+ *     (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef        _UCW_GETOPT_H
+#define        _UCW_GETOPT_H
+
+#ifdef CONFIG_OWN_GETOPT
+#include "lib/getopt/getopt-sh.h"
+#else
+#include <getopt.h>
+#endif
+
+void reset_getopt(void);
+
+/* Safe loading and reloading of configuration files: conf-input.c */
+
+extern char *cf_def_file;              /* DEFAULT_CONFIG; NULL if already loaded */
+extern char *cf_env_file;              /* ENV_VAR_CONFIG */
+int cf_reload(const char *file);
+int cf_load(const char *file);
+int cf_set(const char *string);
+
+/* Direct access to configuration items: conf-intr.c */
+
+#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \
+  T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY)
+  /* Closing brace finishes previous block.
+   * Basic attributes (static, dynamic, parsed) can be used with SET.
+   * Dynamic arrays can be used with SET, APPEND, PREPEND.
+   * Sections can be used with SET.
+   * Lists can be used with everything. */
+#define T(x) OP_##x,
+enum cf_operation { CF_OPERATIONS };
+#undef T
+
+struct cf_item;
+char *cf_find_item(const char *name, struct cf_item *item);
+char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars);
+
+/* Debug dumping: conf-dump.c */
+
+struct fastbuf;
+void cf_dump_sections(struct fastbuf *fb);
+
+/* Journaling control: conf-journal.c */
+
+struct cf_journal_item;
+struct cf_journal_item *cf_journal_new_transaction(uns new_pool);
+void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj);
+void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj);
+
+/*
+ * cf_getopt() takes care of parsing the command-line arguments, loading the
+ * default configuration file (cf_def_file) and processing configuration options.
+ * The calling convention is the same as with GNU getopt_long(), but you must prefix
+ * your own short/long options by the CF_(SHORT|LONG)_OPTS or pass CF_NO_LONG_OPTS
+ * of there are no long options.
+ *
+ * The default configuration file can be overriden by the --config options,
+ * which must come first. During parsing of all other options, the configuration
+ * is already available.
+ */
+
+#define        CF_SHORT_OPTS   "C:S:"
+#define        CF_LONG_OPTS    {"config",      1, 0, 'C'}, {"set",             1, 0, 'S'}, CF_LONG_OPTS_DEBUG
+#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } }
+#ifndef CF_USAGE_TAB
+#define CF_USAGE_TAB ""
+#endif
+#define        CF_USAGE        \
+"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\
+-S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG
+
+#ifdef CONFIG_DEBUG
+#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } ,
+#define CF_USAGE_DEBUG "    --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n"
+#else
+#define CF_LONG_OPTS_DEBUG
+#define CF_USAGE_DEBUG
+#endif
+
+// conf-input.c
+int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index);
+
+#endif
diff --git a/lib/getopt.t b/lib/getopt.t

new file mode 100644 (file)

index 0000000..79e7fe2
--- /dev/null
+++ b/lib/getopt.t
@@ -0,0 +1,21 @@
+# Tests for getopt
+
+Run:   ../obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3
+Out:   option a
+       option b
+       option c with value `2819'
+       option d with value `-a'
+       3 nonoption arguments
+       reset
+       option a
+       option b
+       option c with value `2819'
+       option d with value `-a'
+       3 nonoption arguments
+
+Run:   ../obj/lib/getopt-t -a -x
+Out:   option a
+       unknown option
+       reset
+       option a
+       unknown option
diff --git a/lib/getopt/Makefile b/lib/getopt/Makefile

new file mode 100644 (file)

index 0000000..97692a2
--- /dev/null
+++ b/lib/getopt/Makefile
@@ -0,0 +1,5 @@
+# Makefile for the UCW GetOpt Library (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+DIRS+=lib/getopt
+
+LIBUCW_MODS+=getopt/getopt-sh
diff --git a/lib/getopt/README b/lib/getopt/README

new file mode 100644 (file)

index 0000000..fe24015
--- /dev/null
+++ b/lib/getopt/README
@@ -0,0 +1,12 @@
+This directory contains getopt routines from the GNU libc 2.5.
+We need this as a fallback for our reset_getopt(), because there is
+no standardized interface for such instruction.
+
+They are distributed under the GNU LGPL.
+
+All files are exact copies of the original distribution with very
+few exceptions commented with `// SHERLOCK' prefix.
+I only provided my own getopt-sh.c, getopt-sh.h and Makefile.
+
+                                        Pavel Charvat, 2007
+
diff --git a/lib/getopt/getopt-sh.c b/lib/getopt/getopt-sh.c

new file mode 100644 (file)

index 0000000..0fe271b
--- /dev/null
+++ b/lib/getopt/getopt-sh.c
@@ -0,0 +1,4 @@
+#include "getopt-sh.h"
+#include "getopt_int.h"
+#include "getopt.c"
+#include "getopt1.c"
diff --git a/lib/getopt/getopt-sh.h b/lib/getopt/getopt-sh.h

new file mode 100644 (file)

index 0000000..4597aca
--- /dev/null
+++ b/lib/getopt/getopt-sh.h
@@ -0,0 +1,14 @@
+#ifndef _UCW_GETOPT_GETOPT_SH_H
+#define _UCW_GETOPT_GETOPT_SH_H
+
+#define getopt sh_getopt
+#define getopt_long sh_getopt_long
+#define getopt_long_only sh_getopt_longonly
+#define optarg sh_optarg
+#define optind sh_optind
+#define opterr sh_opterr
+#define optopt sh_optopt
+
+#include "lib/getopt/getopt.h"
+
+#endif
diff --git a/lib/getopt/getopt.c b/lib/getopt/getopt.c

new file mode 100644 (file)

index 0000000..1e13775
--- /dev/null
+++ b/lib/getopt/getopt.c
@@ -0,0 +1,1226 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to drepper@gnu.org
+   before changing it!
+   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004
+       Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+\f
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//#  define ELIDE_CODE   // SHERLOCK: disabled
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+# include <stdlib.h>
+# include <unistd.h>
+#endif /* GNU C library.  */
+
+#include <string.h>
+
+#ifdef VMS
+# include <unixlib.h>
+#endif
+
+#ifdef _LIBC
+# include <libintl.h>
+#else
+//# include "gettext.h"  // SHERLOCK: replaced by <libintl.h>
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#endif
+
+#if defined _LIBC && defined USE_IN_LIBIO
+# include <wchar.h>
+#endif
+
+#ifndef attribute_hidden
+# define attribute_hidden
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt.h"
+#include "getopt_int.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Keep a global copy of all internal members of getopt_data.  */
+
+static struct _getopt_data getopt_data;
+
+\f
+#ifndef __GNU_LIBRARY__
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+#endif /* not __GNU_LIBRARY__ */
+\f
+#ifdef _LIBC
+/* Stored original parameters.
+   XXX This is no good solution.  We should rather copy the args so
+   that we can compare them later.  But we must not use malloc(3).  */
+extern int __libc_argc;
+extern char **__libc_argv;
+
+/* Bash 2.0 gives us an environment variable containing flags
+   indicating ARGV elements that should not be considered arguments.  */
+
+# ifdef USE_NONOPTION_FLAGS
+/* Defined in getopt_init.c  */
+extern char *__getopt_nonoption_flags;
+# endif
+
+# ifdef USE_NONOPTION_FLAGS
+#  define SWAP_FLAGS(ch1, ch2) \
+  if (d->__nonoption_flags_len > 0)                                          \
+    {                                                                        \
+      char __tmp = __getopt_nonoption_flags[ch1];                            \
+      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];         \
+      __getopt_nonoption_flags[ch2] = __tmp;                                 \
+    }
+# else
+#  define SWAP_FLAGS(ch1, ch2)
+# endif
+#else  /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+static void
+exchange (char **argv, struct _getopt_data *d)
+{
+  int bottom = d->__first_nonopt;
+  int middle = d->__last_nonopt;
+  int top = d->optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+  /* First make sure the handling of the `__getopt_nonoption_flags'
+     string can work normally.  Our top argument must be in the range
+     of the string.  */
+  if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
+    {
+      /* We must extend the array.  The user plays games with us and
+        presents new arguments.  */
+      char *new_str = malloc (top + 1);
+      if (new_str == NULL)
+       d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
+      else
+       {
+         memset (__mempcpy (new_str, __getopt_nonoption_flags,
+                            d->__nonoption_flags_max_len),
+                 '\0', top + 1 - d->__nonoption_flags_max_len);
+         d->__nonoption_flags_max_len = top + 1;
+         __getopt_nonoption_flags = new_str;
+       }
+    }
+#endif
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+       {
+         /* Bottom segment is the short one.  */
+         int len = middle - bottom;
+         register int i;
+
+         /* Swap it with the top part of the top segment.  */
+         for (i = 0; i < len; i++)
+           {
+             tem = argv[bottom + i];
+             argv[bottom + i] = argv[top - (middle - bottom) + i];
+             argv[top - (middle - bottom) + i] = tem;
+             SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+           }
+         /* Exclude the moved bottom segment from further swapping.  */
+         top -= len;
+       }
+      else
+       {
+         /* Top segment is the short one.  */
+         int len = top - middle;
+         register int i;
+
+         /* Swap it with the bottom part of the bottom segment.  */
+         for (i = 0; i < len; i++)
+           {
+             tem = argv[bottom + i];
+             argv[bottom + i] = argv[middle + i];
+             argv[middle + i] = tem;
+             SWAP_FLAGS (bottom + i, middle + i);
+           }
+         /* Exclude the moved top segment from further swapping.  */
+         bottom += len;
+       }
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  d->__first_nonopt += (d->optind - d->__last_nonopt);
+  d->__last_nonopt = d->optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+static const char *
+_getopt_initialize (int argc, char *const *argv, const char *optstring,
+                   struct _getopt_data *d)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  d->__first_nonopt = d->__last_nonopt = d->optind;
+
+  d->__nextchar = NULL;
+
+  d->__posixly_correct = !!getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      d->__ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      d->__ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (d->__posixly_correct)
+    d->__ordering = REQUIRE_ORDER;
+  else
+    d->__ordering = PERMUTE;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+  if (!d->__posixly_correct
+      && argc == __libc_argc && argv == __libc_argv)
+    {
+      if (d->__nonoption_flags_max_len == 0)
+       {
+         if (__getopt_nonoption_flags == NULL
+             || __getopt_nonoption_flags[0] == '\0')
+           d->__nonoption_flags_max_len = -1;
+         else
+           {
+             const char *orig_str = __getopt_nonoption_flags;
+             int len = d->__nonoption_flags_max_len = strlen (orig_str);
+             if (d->__nonoption_flags_max_len < argc)
+               d->__nonoption_flags_max_len = argc;
+             __getopt_nonoption_flags =
+               (char *) malloc (d->__nonoption_flags_max_len);
+             if (__getopt_nonoption_flags == NULL)
+               d->__nonoption_flags_max_len = -1;
+             else
+               memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+                       '\0', d->__nonoption_flags_max_len - len);
+           }
+       }
+      d->__nonoption_flags_len = d->__nonoption_flags_max_len;
+    }
+  else
+    d->__nonoption_flags_len = 0;
+#endif
+
+  return optstring;
+}
+\f
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+int
+_getopt_internal_r (int argc, char *const *argv, const char *optstring,
+                   const struct option *longopts, int *longind,
+                   int long_only, struct _getopt_data *d)
+{
+  int print_errors = d->opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  d->optarg = NULL;
+
+  if (d->optind == 0 || !d->__initialized)
+    {
+      if (d->optind == 0)
+       d->optind = 1;  /* Don't scan ARGV[0], the program name.  */
+      optstring = _getopt_initialize (argc, argv, optstring, d);
+      d->__initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
+                     || (d->optind < d->__nonoption_flags_len                \
+                         && __getopt_nonoption_flags[d->optind] == '1'))
+#else
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
+#endif
+
+  if (d->__nextchar == NULL || *d->__nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+        moved back by the user (who may also have changed the arguments).  */
+      if (d->__last_nonopt > d->optind)
+       d->__last_nonopt = d->optind;
+      if (d->__first_nonopt > d->optind)
+       d->__first_nonopt = d->optind;
+
+      if (d->__ordering == PERMUTE)
+       {
+         /* If we have just processed some options following some non-options,
+            exchange them so that the options come first.  */
+
+         if (d->__first_nonopt != d->__last_nonopt
+             && d->__last_nonopt != d->optind)
+           exchange ((char **) argv, d);
+         else if (d->__last_nonopt != d->optind)
+           d->__first_nonopt = d->optind;
+
+         /* Skip any additional non-options
+            and extend the range of non-options previously skipped.  */
+
+         while (d->optind < argc && NONOPTION_P)
+           d->optind++;
+         d->__last_nonopt = d->optind;
+       }
+
+      /* The special ARGV-element `--' means premature end of options.
+        Skip it like a null option,
+        then exchange with previous non-options as if it were an option,
+        then skip everything else like a non-option.  */
+
+      if (d->optind != argc && !strcmp (argv[d->optind], "--"))
+       {
+         d->optind++;
+
+         if (d->__first_nonopt != d->__last_nonopt
+             && d->__last_nonopt != d->optind)
+           exchange ((char **) argv, d);
+         else if (d->__first_nonopt == d->__last_nonopt)
+           d->__first_nonopt = d->optind;
+         d->__last_nonopt = argc;
+
+         d->optind = argc;
+       }
+
+      /* If we have done all the ARGV-elements, stop the scan
+        and back over any non-options that we skipped and permuted.  */
+
+      if (d->optind == argc)
+       {
+         /* Set the next-arg-index to point at the non-options
+            that we previously skipped, so the caller will digest them.  */
+         if (d->__first_nonopt != d->__last_nonopt)
+           d->optind = d->__first_nonopt;
+         return -1;
+       }
+
+      /* If we have come to a non-option and did not permute it,
+        either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+       {
+         if (d->__ordering == REQUIRE_ORDER)
+           return -1;
+         d->optarg = argv[d->optind++];
+         return 1;
+       }
+
+      /* We have found another option-ARGV-element.
+        Skip the initial punctuation.  */
+
+      d->__nextchar = (argv[d->optind] + 1
+                 + (longopts != NULL && argv[d->optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[d->optind][1] == '-'
+         || (long_only && (argv[d->optind][2]
+                           || !strchr (optstring, argv[d->optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
+       /* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+        or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+       if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+         {
+           if ((unsigned int) (nameend - d->__nextchar)
+               == (unsigned int) strlen (p->name))
+             {
+               /* Exact match found.  */
+               pfound = p;
+               indfound = option_index;
+               exact = 1;
+               break;
+             }
+           else if (pfound == NULL)
+             {
+               /* First nonexact match found.  */
+               pfound = p;
+               indfound = option_index;
+             }
+           else if (long_only
+                    || pfound->has_arg != p->has_arg
+                    || pfound->flag != p->flag
+                    || pfound->val != p->val)
+             /* Second or later nonexact match found.  */
+             ambig = 1;
+         }
+
+      if (ambig && !exact)
+       {
+         if (print_errors)
+           {
+#if defined _LIBC && defined USE_IN_LIBIO
+             char *buf;
+
+             if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
+                             argv[0], argv[d->optind]) >= 0)
+               {
+                 _IO_flockfile (stderr);
+
+                 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+                 __fxprintf (NULL, "%s", buf);
+
+                 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                 _IO_funlockfile (stderr);
+
+                 free (buf);
+               }
+#else
+             fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+                      argv[0], argv[d->optind]);
+#endif
+           }
+         d->__nextchar += strlen (d->__nextchar);
+         d->optind++;
+         d->optopt = 0;
+         return '?';
+       }
+
+      if (pfound != NULL)
+       {
+         option_index = indfound;
+         d->optind++;
+         if (*nameend)
+           {
+             /* Don't test has_arg with >, because some C compilers don't
+                allow it to be used on enums.  */
+             if (pfound->has_arg)
+               d->optarg = nameend + 1;
+             else
+               {
+                 if (print_errors)
+                   {
+#if defined _LIBC && defined USE_IN_LIBIO
+                     char *buf;
+                     int n;
+#endif
+
+                     if (argv[d->optind - 1][1] == '-')
+                       {
+                         /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+                         n = __asprintf (&buf, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+                                         argv[0], pfound->name);
+#else
+                         fprintf (stderr, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+                                  argv[0], pfound->name);
+#endif
+                       }
+                     else
+                       {
+                         /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+                         n = __asprintf (&buf, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+                                         argv[0], argv[d->optind - 1][0],
+                                         pfound->name);
+#else
+                         fprintf (stderr, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+                                  argv[0], argv[d->optind - 1][0],
+                                  pfound->name);
+#endif
+                       }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+                     if (n >= 0)
+                       {
+                         _IO_flockfile (stderr);
+
+                         int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                         ((_IO_FILE *) stderr)->_flags2
+                           |= _IO_FLAGS2_NOTCANCEL;
+
+                         __fxprintf (NULL, "%s", buf);
+
+                         ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                         _IO_funlockfile (stderr);
+
+                         free (buf);
+                       }
+#endif
+                   }
+
+                 d->__nextchar += strlen (d->__nextchar);
+
+                 d->optopt = pfound->val;
+                 return '?';
+               }
+           }
+         else if (pfound->has_arg == 1)
+           {
+             if (d->optind < argc)
+               d->optarg = argv[d->optind++];
+             else
+               {
+                 if (print_errors)
+                   {
+#if defined _LIBC && defined USE_IN_LIBIO
+                     char *buf;
+
+                     if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+                                     argv[0], argv[d->optind - 1]) >= 0)
+                       {
+                         _IO_flockfile (stderr);
+
+                         int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                         ((_IO_FILE *) stderr)->_flags2
+                           |= _IO_FLAGS2_NOTCANCEL;
+
+                         __fxprintf (NULL, "%s", buf);
+
+                         ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                         _IO_funlockfile (stderr);
+
+                         free (buf);
+                       }
+#else
+                     fprintf (stderr,
+                              _("%s: option `%s' requires an argument\n"),
+                              argv[0], argv[d->optind - 1]);
+#endif
+                   }
+                 d->__nextchar += strlen (d->__nextchar);
+                 d->optopt = pfound->val;
+                 return optstring[0] == ':' ? ':' : '?';
+               }
+           }
+         d->__nextchar += strlen (d->__nextchar);
+         if (longind != NULL)
+           *longind = option_index;
+         if (pfound->flag)
+           {
+             *(pfound->flag) = pfound->val;
+             return 0;
+           }
+         return pfound->val;
+       }
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+        or the option starts with '--' or is not a valid short
+        option, then it's an error.
+        Otherwise interpret it as a short option.  */
+      if (!long_only || argv[d->optind][1] == '-'
+         || strchr (optstring, *d->__nextchar) == NULL)
+       {
+         if (print_errors)
+           {
+#if defined _LIBC && defined USE_IN_LIBIO
+             char *buf;
+             int n;
+#endif
+
+             if (argv[d->optind][1] == '-')
+               {
+                 /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+                 n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
+                                 argv[0], d->__nextchar);
+#else
+                 fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+                          argv[0], d->__nextchar);
+#endif
+               }
+             else
+               {
+                 /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+                 n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
+                                 argv[0], argv[d->optind][0], d->__nextchar);
+#else
+                 fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+                          argv[0], argv[d->optind][0], d->__nextchar);
+#endif
+               }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+             if (n >= 0)
+               {
+                 _IO_flockfile (stderr);
+
+                 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+                 __fxprintf (NULL, "%s", buf);
+
+                 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                 _IO_funlockfile (stderr);
+
+                 free (buf);
+               }
+#endif
+           }
+         d->__nextchar = (char *) "";
+         d->optind++;
+         d->optopt = 0;
+         return '?';
+       }
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *d->__nextchar++;
+    char *temp = strchr (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*d->__nextchar == '\0')
+      ++d->optind;
+
+    if (temp == NULL || c == ':')
+      {
+       if (print_errors)
+         {
+#if defined _LIBC && defined USE_IN_LIBIO
+             char *buf;
+             int n;
+#endif
+
+           if (d->__posixly_correct)
+             {
+               /* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+               n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
+                               argv[0], c);
+#else
+               fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
+#endif
+             }
+           else
+             {
+#if defined _LIBC && defined USE_IN_LIBIO
+               n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
+                               argv[0], c);
+#else
+               fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
+#endif
+             }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+           if (n >= 0)
+             {
+               _IO_flockfile (stderr);
+
+               int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+               ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+               __fxprintf (NULL, "%s", buf);
+
+               ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+               _IO_funlockfile (stderr);
+
+               free (buf);
+             }
+#endif
+         }
+       d->optopt = c;
+       return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+       char *nameend;
+       const struct option *p;
+       const struct option *pfound = NULL;
+       int exact = 0;
+       int ambig = 0;
+       int indfound = 0;
+       int option_index;
+
+       /* This is an option that requires an argument.  */
+       if (*d->__nextchar != '\0')
+         {
+           d->optarg = d->__nextchar;
+           /* If we end this ARGV-element by taking the rest as an arg,
+              we must advance to the next element now.  */
+           d->optind++;
+         }
+       else if (d->optind == argc)
+         {
+           if (print_errors)
+             {
+               /* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+               char *buf;
+
+               if (__asprintf (&buf,
+                               _("%s: option requires an argument -- %c\n"),
+                               argv[0], c) >= 0)
+                 {
+                   _IO_flockfile (stderr);
+
+                   int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                   ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+                   __fxprintf (NULL, "%s", buf);
+
+                   ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                   _IO_funlockfile (stderr);
+
+                   free (buf);
+                 }
+#else
+               fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+                        argv[0], c);
+#endif
+             }
+           d->optopt = c;
+           if (optstring[0] == ':')
+             c = ':';
+           else
+             c = '?';
+           return c;
+         }
+       else
+         /* We already incremented `d->optind' once;
+            increment it again when taking next ARGV-elt as argument.  */
+         d->optarg = argv[d->optind++];
+
+       /* optarg is now the argument, see if it's in the
+          table of longopts.  */
+
+       for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
+            nameend++)
+         /* Do nothing.  */ ;
+
+       /* Test all long options for either exact match
+          or abbreviated matches.  */
+       for (p = longopts, option_index = 0; p->name; p++, option_index++)
+         if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+           {
+             if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
+               {
+                 /* Exact match found.  */
+                 pfound = p;
+                 indfound = option_index;
+                 exact = 1;
+                 break;
+               }
+             else if (pfound == NULL)
+               {
+                 /* First nonexact match found.  */
+                 pfound = p;
+                 indfound = option_index;
+               }
+             else
+               /* Second or later nonexact match found.  */
+               ambig = 1;
+           }
+       if (ambig && !exact)
+         {
+           if (print_errors)
+             {
+#if defined _LIBC && defined USE_IN_LIBIO
+               char *buf;
+
+               if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
+                               argv[0], argv[d->optind]) >= 0)
+                 {
+                   _IO_flockfile (stderr);
+
+                   int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                   ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+                   __fxprintf (NULL, "%s", buf);
+
+                   ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                   _IO_funlockfile (stderr);
+
+                   free (buf);
+                 }
+#else
+               fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+                        argv[0], argv[d->optind]);
+#endif
+             }
+           d->__nextchar += strlen (d->__nextchar);
+           d->optind++;
+           return '?';
+         }
+       if (pfound != NULL)
+         {
+           option_index = indfound;
+           if (*nameend)
+             {
+               /* Don't test has_arg with >, because some C compilers don't
+                  allow it to be used on enums.  */
+               if (pfound->has_arg)
+                 d->optarg = nameend + 1;
+               else
+                 {
+                   if (print_errors)
+                     {
+#if defined _LIBC && defined USE_IN_LIBIO
+                       char *buf;
+
+                       if (__asprintf (&buf, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+                                       argv[0], pfound->name) >= 0)
+                         {
+                           _IO_flockfile (stderr);
+
+                           int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                           ((_IO_FILE *) stderr)->_flags2
+                             |= _IO_FLAGS2_NOTCANCEL;
+
+                           __fxprintf (NULL, "%s", buf);
+
+                           ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                           _IO_funlockfile (stderr);
+
+                           free (buf);
+                         }
+#else
+                       fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+                                argv[0], pfound->name);
+#endif
+                     }
+
+                   d->__nextchar += strlen (d->__nextchar);
+                   return '?';
+                 }
+             }
+           else if (pfound->has_arg == 1)
+             {
+               if (d->optind < argc)
+                 d->optarg = argv[d->optind++];
+               else
+                 {
+                   if (print_errors)
+                     {
+#if defined _LIBC && defined USE_IN_LIBIO
+                       char *buf;
+
+                       if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+                                       argv[0], argv[d->optind - 1]) >= 0)
+                         {
+                           _IO_flockfile (stderr);
+
+                           int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                           ((_IO_FILE *) stderr)->_flags2
+                             |= _IO_FLAGS2_NOTCANCEL;
+
+                           __fxprintf (NULL, "%s", buf);
+
+                           ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                           _IO_funlockfile (stderr);
+
+                           free (buf);
+                         }
+#else
+                       fprintf (stderr,
+                                _("%s: option `%s' requires an argument\n"),
+                                argv[0], argv[d->optind - 1]);
+#endif
+                     }
+                   d->__nextchar += strlen (d->__nextchar);
+                   return optstring[0] == ':' ? ':' : '?';
+                 }
+             }
+           d->__nextchar += strlen (d->__nextchar);
+           if (longind != NULL)
+             *longind = option_index;
+           if (pfound->flag)
+             {
+               *(pfound->flag) = pfound->val;
+               return 0;
+             }
+           return pfound->val;
+         }
+         d->__nextchar = NULL;
+         return 'W';   /* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+       if (temp[2] == ':')
+         {
+           /* This is an option that accepts an argument optionally.  */
+           if (*d->__nextchar != '\0')
+             {
+               d->optarg = d->__nextchar;
+               d->optind++;
+             }
+           else
+             d->optarg = NULL;
+           d->__nextchar = NULL;
+         }
+       else
+         {
+           /* This is an option that requires an argument.  */
+           if (*d->__nextchar != '\0')
+             {
+               d->optarg = d->__nextchar;
+               /* If we end this ARGV-element by taking the rest as an arg,
+                  we must advance to the next element now.  */
+               d->optind++;
+             }
+           else if (d->optind == argc)
+             {
+               if (print_errors)
+                 {
+                   /* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+                   char *buf;
+
+                   if (__asprintf (&buf, _("\
+%s: option requires an argument -- %c\n"),
+                                   argv[0], c) >= 0)
+                     {
+                       _IO_flockfile (stderr);
+
+                       int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+                       ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+                       __fxprintf (NULL, "%s", buf);
+
+                       ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+                       _IO_funlockfile (stderr);
+
+                       free (buf);
+                     }
+#else
+                   fprintf (stderr,
+                            _("%s: option requires an argument -- %c\n"),
+                            argv[0], c);
+#endif
+                 }
+               d->optopt = c;
+               if (optstring[0] == ':')
+                 c = ':';
+               else
+                 c = '?';
+             }
+           else
+             /* We already incremented `optind' once;
+                increment it again when taking next ARGV-elt as argument.  */
+             d->optarg = argv[d->optind++];
+           d->__nextchar = NULL;
+         }
+      }
+    return c;
+  }
+}
+
+int
+_getopt_internal (int argc, char *const *argv, const char *optstring,
+                 const struct option *longopts, int *longind, int long_only)
+{
+  int result;
+
+  getopt_data.optind = optind;
+  getopt_data.opterr = opterr;
+
+  result = _getopt_internal_r (argc, argv, optstring, longopts,
+                              longind, long_only, &getopt_data);
+
+  optind = getopt_data.optind;
+  optarg = getopt_data.optarg;
+  optopt = getopt_data.optopt;
+
+  return result;
+}
+
+int
+getopt (int argc, char *const *argv, const char *optstring)
+{
+  return _getopt_internal (argc, argv, optstring,
+                          (const struct option *) 0,
+                          (int *) 0,
+                          0);
+}
+
+#endif /* Not ELIDE_CODE.  */
+\f
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (int argc, char **argv)
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == -1)
+       break;
+
+      switch (c)
+       {
+       case '0':
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+       case '8':
+       case '9':
+         if (digit_optind != 0 && digit_optind != this_option_optind)
+           printf ("digits occur in two different argv-elements.\n");
+         digit_optind = this_option_optind;
+         printf ("option %c\n", c);
+         break;
+
+       case 'a':
+         printf ("option a\n");
+         break;
+
+       case 'b':
+         printf ("option b\n");
+         break;
+
+       case 'c':
+         printf ("option c with value `%s'\n", optarg);
+         break;
+
+       case '?':
+         break;
+
+       default:
+         printf ("?? getopt returned character code 0%o ??\n", c);
+       }
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+       printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt/getopt.h b/lib/getopt/getopt.h

new file mode 100644 (file)

index 0000000..b7a026c
--- /dev/null
+++ b/lib/getopt/getopt.h
@@ -0,0 +1,177 @@
+/* Declarations for getopt.
+   Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+   standalone, or this is the first header included in the source file.
+   If we are being used with glibc, we need to include <features.h>, but
+   that does not exist if we are standalone.  So: if __GNU_LIBRARY__ is
+   not defined, include <ctype.h>, which will pull in <features.h> for us
+   if it's from glibc.  (Why ctype.h?  It's guaranteed to exist and it
+   doesn't flood the namespace with stuff the way some other headers do.)  */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifndef __THROW
+# ifndef __GNUC_PREREQ
+#  define __GNUC_PREREQ(maj, min) (0)
+# endif
+# if defined __cplusplus && __GNUC_PREREQ (2,8)
+#  define __THROW      throw ()
+# else
+#  define __THROW
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument         (or 0) if the option does not take an argument,
+   required_argument   (or 1) if the option requires an argument,
+   optional_argument   (or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+  const char *name;
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+# define no_argument           0
+# define required_argument     1
+# define optional_argument     2
+#endif /* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+   arguments in ARGV (ARGC of them, minus the program name) for
+   options given in OPTS.
+
+   Return the option character from OPTS just read.  Return -1 when
+   there are no more options.  For unrecognized options, or options
+   missing arguments, `optopt' is set to the option letter, and '?' is
+   returned.
+
+   The OPTS string is a list of characters which are recognized option
+   letters, optionally followed by colons, specifying that that letter
+   takes an argument, to be placed in `optarg'.
+
+   If a letter in OPTS is followed by two colons, its argument is
+   optional.  This behavior is specific to the GNU `getopt'.
+
+   The argument `--' causes premature termination of argument
+   scanning, explicitly telling `getopt' that there are no more
+   options.
+
+   If OPTS begins with `--', then non-option arguments are treated as
+   arguments to the option '\0'.  This behavior is specific to the GNU
+   `getopt'.  */
+
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
+       __THROW;
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+
+#ifndef __need_getopt
+extern int getopt_long (int ___argc, char *const *___argv,
+                       const char *__shortopts,
+                       const struct option *__longopts, int *__longind)
+       __THROW;
+extern int getopt_long_only (int ___argc, char *const *___argv,
+                            const char *__shortopts,
+                            const struct option *__longopts, int *__longind)
+       __THROW;
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations.  */
+#undef __need_getopt
+
+#endif /* getopt.h */
diff --git a/lib/getopt/getopt1.c b/lib/getopt/getopt1.c

new file mode 100644 (file)

index 0000000..0ea9536
--- /dev/null
+++ b/lib/getopt/getopt1.c
@@ -0,0 +1,192 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004
+     Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+\f
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _LIBC
+# include <getopt.h>
+#else
+# include "getopt.h"
+#endif
+#include "getopt_int.h"
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//#define ELIDE_CODE // SHERLOCK: disabled
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef        NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (int argc, char *const *argv, const char *options,
+            const struct option *long_options, int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+int
+_getopt_long_r (int argc, char *const *argv, const char *options,
+               const struct option *long_options, int *opt_index,
+               struct _getopt_data *d)
+{
+  return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+                            0, d);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (int argc, char *const *argv, const char *options,
+                 const struct option *long_options, int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+int
+_getopt_long_only_r (int argc, char *const *argv, const char *options,
+                    const struct option *long_options, int *opt_index,
+                    struct _getopt_data *d)
+{
+  return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+                            1, d);
+}
+
+#endif /* Not ELIDE_CODE.  */
+\f
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+       {"add", 1, 0, 0},
+       {"append", 0, 0, 0},
+       {"delete", 1, 0, 0},
+       {"verbose", 0, 0, 0},
+       {"create", 0, 0, 0},
+       {"file", 1, 0, 0},
+       {0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+                      long_options, &option_index);
+      if (c == -1)
+       break;
+
+      switch (c)
+       {
+       case 0:
+         printf ("option %s", long_options[option_index].name);
+         if (optarg)
+           printf (" with arg %s", optarg);
+         printf ("\n");
+         break;
+
+       case '0':
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+       case '8':
+       case '9':
+         if (digit_optind != 0 && digit_optind != this_option_optind)
+           printf ("digits occur in two different argv-elements.\n");
+         digit_optind = this_option_optind;
+         printf ("option %c\n", c);
+         break;
+
+       case 'a':
+         printf ("option a\n");
+         break;
+
+       case 'b':
+         printf ("option b\n");
+         break;
+
+       case 'c':
+         printf ("option c with value `%s'\n", optarg);
+         break;
+
+       case 'd':
+         printf ("option d with value `%s'\n", optarg);
+         break;
+
+       case '?':
+         break;
+
+       default:
+         printf ("?? getopt returned character code 0%o ??\n", c);
+       }
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+       printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt/getopt_init.c b/lib/getopt/getopt_init.c

new file mode 100644 (file)

index 0000000..d460098
--- /dev/null
+++ b/lib/getopt/getopt_init.c
@@ -0,0 +1,75 @@
+/* Perform additional initialization for getopt functions in GNU libc.
+   Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifdef USE_NONOPTION_FLAGS
+/* Attention: this file is *not* necessary when the GNU getopt functions
+   are used outside the GNU libc.  Some additional functionality of the
+   getopt functions in GNU libc require this additional work.  */
+
+#include <getopt.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <stdio-common/_itoa.h>
+
+/* Variable to synchronize work.  */
+char *__getopt_nonoption_flags;
+
+
+/* Remove the environment variable "_<PID>_GNU_nonoption_argv_flags_" if
+   it is still available.  If the getopt functions are also used in the
+   application it does not exist anymore since it was saved for the use
+   in getopt.  */
+void
+__getopt_clean_environment (char **env)
+{
+  /* Bash 2.0 puts a special variable in the environment for each
+     command it runs, specifying which ARGV elements are the results
+     of file name wildcard expansion and therefore should not be
+     considered as options.  */
+  static const char envvar_tail[] = "_GNU_nonoption_argv_flags_=";
+  char var[50];
+  char *cp, **ep;
+  size_t len;
+
+  /* Construct the "_<PID>_GNU_nonoption_argv_flags_=" string.  We must
+     not use `sprintf'.  */
+  cp = memcpy (&var[sizeof (var) - sizeof (envvar_tail)], envvar_tail,
+              sizeof (envvar_tail));
+  cp = _itoa_word (__getpid (), cp, 10, 0);
+  /* Note: we omit adding the leading '_' since we explicitly test for
+     it before calling strncmp.  */
+  len = (var + sizeof (var) - 1) - cp;
+
+  for (ep = env; *ep != NULL; ++ep)
+    if ((*ep)[0] == '_'
+       && __builtin_expect (strncmp (*ep + 1, cp, len) == 0, 0))
+      {
+       /* Found it.  Store this pointer and move later ones back.  */
+       char **dp = ep;
+       __getopt_nonoption_flags = &(*ep)[len];
+       do
+         dp[0] = dp[1];
+       while (*dp++);
+       /* Continue the loop in case the name appears again.  */
+      }
+}
+#endif /* USE_NONOPTION_FLAGS */
diff --git a/lib/getopt/getopt_int.h b/lib/getopt/getopt_int.h

new file mode 100644 (file)

index 0000000..d982c72
--- /dev/null
+++ b/lib/getopt/getopt_int.h
@@ -0,0 +1,130 @@
+/* Internal declarations for getopt.
+   Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _GETOPT_INT_H
+#define _GETOPT_INT_H  1
+
+extern int _getopt_internal (int ___argc, char *const *___argv,
+                            const char *__shortopts,
+                            const struct option *__longopts, int *__longind,
+                            int __long_only);
+
+\f
+/* Reentrant versions which can handle parsing multiple argument
+   vectors at the same time.  */
+
+/* Data type for reentrant functions.  */
+struct _getopt_data
+{
+  /* These have exactly the same meaning as the corresponding global
+     variables, except that they are used for the reentrant
+     versions of getopt.  */
+  int optind;
+  int opterr;
+  int optopt;
+  char *optarg;
+
+  /* Internal members.  */
+
+  /* True if the internal members have been initialized.  */
+  int __initialized;
+
+  /* The next char to be scanned in the option-element
+     in which the last option character we returned was found.
+     This allows us to pick up the scan where we left off.
+
+     If this is zero, or a null string, it means resume the scan
+     by advancing to the next ARGV-element.  */
+  char *__nextchar;
+
+  /* Describe how to deal with options that follow non-option ARGV-elements.
+
+     If the caller did not specify anything,
+     the default is REQUIRE_ORDER if the environment variable
+     POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+     REQUIRE_ORDER means don't recognize them as options;
+     stop option processing when the first non-option is seen.
+     This is what Unix does.
+     This mode of operation is selected by either setting the environment
+     variable POSIXLY_CORRECT, or using `+' as the first character
+     of the list of option characters.
+
+     PERMUTE is the default.  We permute the contents of ARGV as we
+     scan, so that eventually all the non-options are at the end.
+     This allows options to be given in any order, even with programs
+     that were not written to expect this.
+
+     RETURN_IN_ORDER is an option available to programs that were
+     written to expect options and other ARGV-elements in any order
+     and that care about the ordering of the two.  We describe each
+     non-option ARGV-element as if it were the argument of an option
+     with character code 1.  Using `-' as the first character of the
+     list of option characters selects this mode of operation.
+
+     The special argument `--' forces an end of option-scanning regardless
+     of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+     `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+  enum
+    {
+      REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+    } __ordering;
+
+  /* If the POSIXLY_CORRECT environment variable is set.  */
+  int __posixly_correct;
+
+
+  /* Handle permutation of arguments.  */
+
+  /* Describe the part of ARGV that contains non-options that have
+     been skipped.  `first_nonopt' is the index in ARGV of the first
+     of them; `last_nonopt' is the index after the last of them.  */
+
+  int __first_nonopt;
+  int __last_nonopt;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+  int __nonoption_flags_max_len;
+  int __nonoption_flags_len;
+# endif
+};
+
+/* The initializer is necessary to set OPTIND and OPTERR to their
+   default values and to clear the initialization flag.  */
+#define _GETOPT_DATA_INITIALIZER       { 1, 1 }
+
+extern int _getopt_internal_r (int ___argc, char *const *___argv,
+                              const char *__shortopts,
+                              const struct option *__longopts, int *__longind,
+                              int __long_only, struct _getopt_data *__data);
+
+extern int _getopt_long_r (int ___argc, char *const *___argv,
+                          const char *__shortopts,
+                          const struct option *__longopts, int *__longind,
+                          struct _getopt_data *__data);
+
+extern int _getopt_long_only_r (int ___argc, char *const *___argv,
+                               const char *__shortopts,
+                               const struct option *__longopts,
+                               int *__longind,
+                               struct _getopt_data *__data);
+
+#endif /* getopt_int.h */
diff --git a/lib/hash-test.c b/lib/hash-test.c

new file mode 100644 (file)

index 0000000..803a030
--- /dev/null
+++ b/lib/hash-test.c
@@ -0,0 +1,319 @@
+/* Tests for hash table routines */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* TEST 1: integers */
+
+struct node1 {
+  int key;
+  int data;
+};
+
+#define HASH_NODE struct node1
+#define HASH_PREFIX(x) test1_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_ZERO_FILL
+
+#define HASH_GIVE_INIT_DATA
+static inline void test1_init_data(struct node1 *n)
+{
+  n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_REMOVE
+
+#include "lib/hashtable.h"
+
+static void test1(void)
+{
+  int i;
+
+  test1_init();
+  for (i=0; i<1024; i++)
+    {
+      struct node1 *n = test1_lookup(i);
+      ASSERT(n->data == i+123);
+    }
+  for (i=1; i<1024; i+=2)
+    {
+      struct node1 *n = test1_lookup(i);
+      test1_remove(n);
+    }
+  for (i=0; i<1024; i++)
+    {
+      struct node1 *n = test1_find(i);
+      if (!n != (i&1) || (n && n->data != i+123))
+       die("Inconsistency at i=%d", i);
+    }
+  i=0;
+  HASH_FOR_ALL(test1, n)
+    {
+      i += 1 + n->key;
+    }
+  HASH_END_FOR;
+  ASSERT(i == 262144);
+  puts("OK");
+}
+
+/* TEST 2: external strings */
+
+struct node2 {
+  char *key;
+  int data;
+};
+
+#define HASH_NODE struct node2
+#define HASH_PREFIX(x) test2_##x
+#define HASH_KEY_STRING key
+#define HASH_NOCASE
+#define HASH_AUTO_POOL 4096
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#include "lib/hashtable.h"
+
+static void test2(void)
+{
+  int i;
+
+  test2_init();
+  for (i=0; i<1024; i+=2)
+    {
+      char x[32];
+      sprintf(x, "abc%d", i);
+      test2_new(xstrdup(x));
+    }
+  for (i=0; i<1024; i++)
+    {
+      char x[32];
+      struct node2 *n;
+      sprintf(x, "ABC%d", i);
+      n = test2_find(x);
+      if (!n != (i&1))
+       die("Inconsistency at i=%d", i);
+    }
+  puts("OK");
+}
+
+/* TEST 3: internal strings + pools */
+
+static struct mempool *pool3;
+
+struct node3 {
+  int data;
+  char key[1];
+};
+
+#define HASH_NODE struct node3
+#define HASH_PREFIX(x) test3_##x
+#define HASH_KEY_ENDSTRING key
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#define HASH_USE_POOL pool3
+
+#include "lib/hashtable.h"
+
+static void test3(void)
+{
+  int i;
+
+  pool3 = mp_new(16384);
+  test3_init();
+  for (i=0; i<1048576; i+=2)
+    {
+      char x[32];
+      sprintf(x, "abc%d", i);
+      test3_new(x);
+    }
+  for (i=0; i<1048576; i++)
+    {
+      char x[32];
+      struct node3 *n;
+      sprintf(x, "abc%d", i);
+      n = test3_find(x);
+      if (!n != (i&1))
+       die("Inconsistency at i=%d", i);
+    }
+  puts("OK");
+}
+
+/* TEST 4: complex keys */
+
+#include "lib/hashfunc.h"
+
+struct node4 {
+  int port;
+  int data;
+  char host[1];
+};
+
+#define HASH_NODE struct node4
+#define HASH_PREFIX(x) test4_##x
+#define HASH_KEY_COMPLEX(x) x host, x port
+#define HASH_KEY_DECL char *host, int port
+
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_DELETE
+#define HASH_WANT_REMOVE
+
+#define HASH_GIVE_HASHFN
+static uns test4_hash(char *host, int port)
+{
+  return hash_string_nocase(host) ^ hash_u32(port);
+}
+
+#define HASH_GIVE_EQ
+static inline int test4_eq(char *host1, int port1, char *host2, int port2)
+{
+  return !strcasecmp(host1,host2) && port1 == port2;
+}
+
+#define HASH_GIVE_EXTRA_SIZE
+static inline uns test4_extra_size(char *host, int port UNUSED)
+{
+  return strlen(host);
+}
+
+#define HASH_GIVE_INIT_KEY
+static inline void test4_init_key(struct node4 *n, char *host, int port)
+{
+  strcpy(n->host, host);
+  n->port = port;
+}
+
+#include "lib/hashtable.h"
+
+static void test4(void)
+{
+  int i;
+  char x[32];
+  struct node4 *n;
+
+  test4_init();
+  for (i=0; i<1024; i++)
+    if ((i % 3) == 0)
+      {
+       sprintf(x, "abc%d", i);
+       n = test4_new(x, i%10);
+       n->data = i;
+      }
+  for (i=0; i<1024; i++)
+    {
+      sprintf(x, "abc%d", i);
+      n = test4_lookup(x, i%10);
+      n->data = i;
+    }
+  for (i=0; i<1024; i++)
+    if (i % 2)
+      {
+       sprintf(x, "aBc%d", i);
+       if ((i % 7) < 3)
+         {
+           n = test4_find(x, i%10);
+           ASSERT(n);
+           test4_remove(n);
+         }
+       else
+         test4_delete(x, i%10);
+      }
+  for (i=0; i<1024; i++)
+    {
+      sprintf(x, "ABC%d", i);
+      n = test4_find(x, i%10);
+      if (!n != (i&1) || (n && n->data != i))
+       die("Inconsistency at i=%d", i);
+    }
+  test4_cleanup();
+  puts("OK");
+}
+
+/* TEST 5: integers again, but this time dynamically */
+
+struct node5 {
+  int key;
+  int data;
+};
+
+#define HASH_NODE struct node5
+#define HASH_PREFIX(x) test5_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_TABLE_DYNAMIC
+
+struct test5_table;
+
+#define HASH_GIVE_INIT_DATA
+static inline void test5_init_data(struct test5_table *table UNUSED, struct node5 *n)
+{
+  n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_DELETE
+
+#include "lib/hashtable.h"
+
+static void test5(void)
+{
+  int i;
+  struct test5_table tab;
+
+  test5_init(&tab);
+  for (i=0; i<1024; i++)
+    {
+      struct node5 *n = test5_new(&tab, i);
+      ASSERT(n->data == i+123);
+    }
+  for (i=1; i<1024; i+=2)
+    test5_delete(&tab, i);
+  for (i=0; i<1024; i++)
+    {
+      struct node5 *n = test5_find(&tab, i);
+      if (!n != (i&1) || (n && n->data != i+123))
+       die("Inconsistency at i=%d", i);
+    }
+  i=0;
+  HASH_FOR_ALL_DYNAMIC(test5, &tab, n)
+    i += 1 + n->key;
+  HASH_END_FOR;
+  ASSERT(i == 262144);
+  puts("OK");
+}
+
+int
+main(int argc, char **argv)
+{
+  uns m = ~0U;
+  if (argc > 1)
+    {
+      m = 0;
+      for (int i=1; i<argc; i++)
+       m |= 1 << atol(argv[i]);
+    }
+  if (m & (1 << 1))
+    test1();
+  if (m & (1 << 2))
+    test2();
+  if (m & (1 << 3))
+    test3();
+  if (m & (1 << 4))
+    test4();
+  if (m & (1 << 5))
+    test5();
+  return 0;
+}
diff --git a/lib/hash-test.t b/lib/hash-test.t

new file mode 100644 (file)

index 0000000..ba5a61e
--- /dev/null
+++ b/lib/hash-test.t
@@ -0,0 +1,13 @@
+# Tests for the hash table modules
+
+Run:   ../obj/lib/hash-test 1
+Out:   OK
+
+Run:   ../obj/lib/hash-test 2
+Out:   OK
+
+Run:   ../obj/lib/hash-test 3
+Out:   OK
+
+Run:   ../obj/lib/hash-test 4
+Out:   OK
diff --git a/lib/hashfunc.c b/lib/hashfunc.c

new file mode 100644 (file)

index 0000000..8fca5f4
--- /dev/null
+++ b/lib/hashfunc.c
@@ -0,0 +1,213 @@
+/*
+ *     UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ *     str_len() and hash_*() routines
+ *
+ *     It is always at least as fast as the classical strlen() routine and for
+ *     strings longer than 100 characters, it is substantially faster.
+ *
+ *     (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/hashfunc.h"
+#include "lib/chartype.h"
+
+/* The number of bits the hash in the function hash_*() is rotated by after
+ * every pass.  It should be prime with the word size.  */
+#define        SHIFT_BITS      7
+
+/* A bit-mask which clears higher bytes than a given threshold.  */
+static uns mask_higher_bits[sizeof(uns)];
+
+static void CONSTRUCTOR
+hashfunc_init(void)
+{
+       uns i, j;
+       byte *str;
+       for (i=0; i<sizeof(uns); i++)
+       {
+               str = (byte *) (mask_higher_bits + i);
+               for (j=0; j<i; j++)
+                       str[j] = -1;
+               for (j=i; j<sizeof(uns); j++)
+                       str[j] = 0;
+       }
+}
+
+static inline uns CONST
+str_len_uns(uns x)
+{
+       const uns sub = ~0U / 0xff;
+       const uns and = sub * 0x80;
+       uns a, i;
+       byte *bytes;
+       a = ~x & (x - sub) & and;
+       /*
+        * x_2 = x - 0x01010101;
+        * x_3 = ~x & x_2;
+        * a = x_3 & 0x80808080;
+        *
+        * If all bytes of x are nonzero, then the highest bit of each byte of
+        * x_2 is lower or equal to the corresponding bit of x.  Hence x_3 has
+        * all these highest bits cleared (the target bit is set iff the source
+        * bit has changed from 0 to 1).  If a == 0, then we are sure there is
+        * no zero byte in x.
+        */
+       if (!a)
+               return sizeof(uns);
+       bytes = (byte *) &x;
+       for (i=0; i<sizeof(uns) && bytes[i]; i++);
+       return i;
+}
+
+inline uns
+str_len_aligned(const char *str)
+{
+       const uns *u = (const uns *) str;
+       uns len = 0;
+       while (1)
+       {
+               uns l = str_len_uns(*u++);
+               len += l;
+               if (l < sizeof(uns))
+                       return len;
+       }
+}
+
+inline uns
+hash_string_aligned(const char *str)
+{
+       const uns *u = (const uns *) str;
+       uns hash = 0;
+       while (1)
+       {
+               uns last_len = str_len_uns(*u);
+               hash = ROL(hash, SHIFT_BITS);
+               if (last_len < sizeof(uns))
+               {
+                       uns tmp = *u & mask_higher_bits[last_len];
+                       hash ^= tmp;
+                       return hash;
+               }
+               hash ^= *u++;
+       }
+}
+
+inline uns
+hash_block_aligned(const byte *str, uns len)
+{
+       const uns *u = (const uns *) str;
+       uns hash = 0;
+       while (len >= sizeof(uns))
+       {
+               hash = ROL(hash, SHIFT_BITS) ^ *u++;
+               len -= sizeof(uns);
+       }
+       hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
+       return hash;
+}
+
+#ifndef        CPU_ALLOW_UNALIGNED
+uns
+str_len(const char *str)
+{
+       uns shift = UNALIGNED_PART(str, uns);
+       if (!shift)
+               return str_len_aligned(str);
+       else
+       {
+               uns i;
+               shift = sizeof(uns) - shift;
+               for (i=0; i<shift; i++)
+                       if (!str[i])
+                               return i;
+               return shift + str_len_aligned(str + shift);
+       }
+}
+
+uns
+hash_string(const char *str)
+{
+       const byte *s = str;
+       uns shift = UNALIGNED_PART(s, uns);
+       if (!shift)
+               return hash_string_aligned(s);
+       else
+       {
+               uns hash = 0;
+               uns i;
+               for (i=0; ; i++)
+               {
+                       uns modulo = i % sizeof(uns);
+                       uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+                       shift = modulo;
+#else
+                       shift = sizeof(uns) - 1 - modulo;
+#endif
+                       if (!modulo)
+                               hash = ROL(hash, SHIFT_BITS);
+                       if (!s[i])
+                               break;
+                       hash ^= s[i] << (shift * 8);
+               }
+               return hash;
+       }
+}
+
+uns
+hash_block(const byte *str, uns len)
+{
+       uns shift = UNALIGNED_PART(str, uns);
+       if (!shift)
+               return hash_block_aligned(str, len);
+       else
+       {
+               uns hash = 0;
+               uns i;
+               for (i=0; ; i++)
+               {
+                       uns modulo = i % sizeof(uns);
+                       uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+                       shift = modulo;
+#else
+                       shift = sizeof(uns) - 1 - modulo;
+#endif
+                       if (!modulo)
+                               hash = ROL(hash, SHIFT_BITS);
+                       if (i >= len)
+                               break;
+                       hash ^= str[i] << (shift * 8);
+               }
+               return hash;
+       }
+}
+#endif
+
+uns
+hash_string_nocase(const char *str)
+{
+       const byte *s = str;
+       uns hash = 0;
+       uns i;
+       for (i=0; ; i++)
+       {
+               uns modulo = i % sizeof(uns);
+               uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+               shift = modulo;
+#else
+               shift = sizeof(uns) - 1 - modulo;
+#endif
+               if (!modulo)
+                       hash = ROL(hash, SHIFT_BITS);
+               if (!s[i])
+                       break;
+               hash ^= Cupcase(s[i]) << (shift * 8);
+       }
+       return hash;
+}
diff --git a/lib/hashfunc.h b/lib/hashfunc.h

new file mode 100644 (file)

index 0000000..a9fb1a3
--- /dev/null
+++ b/lib/hashfunc.h
@@ -0,0 +1,43 @@
+/*
+ *     UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ *     str_len() and hash_*() routines
+ *
+ *     (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#define _UCW_HASHFUNC_H
+
+#include "lib/lib.h"
+
+/* The following functions need str to be aligned to uns.  */
+uns str_len_aligned(const char *str) PURE;
+uns hash_string_aligned(const char *str) PURE;
+uns hash_block_aligned(const byte *str, uns len) PURE;
+
+#ifdef CPU_ALLOW_UNALIGNED
+#define        str_len(str)            str_len_aligned(str)
+#define        hash_string(str)        hash_string_aligned(str)
+#define        hash_block(str, len)    hash_block_aligned(str, len)
+#else
+uns str_len(const char *str) PURE;
+uns hash_string(const char *str) PURE;
+uns hash_block(const byte *str, uns len) PURE;
+#endif
+
+uns hash_string_nocase(const char *str) PURE;
+
+/*
+ *  We hash integers by multiplying by a reasonably large prime with
+ *  few ones in its binary form (to gave the compiler the possibility
+ *  of using shifts and adds on architectures where multiplication
+ *  instructions are slow).
+ */
+static inline uns CONST hash_u32(uns x) { return 0x01008041*x; }
+static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); }
+static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); }
+
+#endif
diff --git a/lib/hashtable.h b/lib/hashtable.h

new file mode 100644 (file)

index 0000000..552e88e
--- /dev/null
+++ b/lib/hashtable.h
@@ -0,0 +1,663 @@
+/*
+ *     UCW Library -- Universal Hash Table
+ *
+ *     (c) 2002--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2002--2005 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This is not a normal header file, it's a generator of hash tables.
+ *  Each time you include it with parameters set in the corresponding
+ *  preprocessor macros, it generates a hash table with the parameters
+ *  given.
+ *
+ *  You need to specify:
+ *
+ *  HASH_NODE          data type where a node dwells (usually a struct).
+ *  HASH_PREFIX(x)     macro to add a name prefix (used on all global names
+ *                     defined by the hash table generator).
+ *
+ *  Then decide on type of keys:
+ *
+ *  HASH_KEY_ATOMIC=f  use node->f as a key of an atomic type (i.e.,
+ *                     a type which can be compared using `==')
+ *                     HASH_ATOMIC_TYPE (defaults to int).
+ *  | HASH_KEY_STRING=f        use node->f as a string key, allocated
+ *                     separately from the rest of the node.
+ *  | HASH_KEY_ENDSTRING=f use node->f as a string key, allocated
+ *                     automatically at the end of the node struct
+ *                     (to be declared as "char f[1]" at the end).
+ *  | HASH_KEY_COMPLEX use a multi-component key; as the name suggests,
+ *                     the passing of parameters is a bit complex then.
+ *                     The HASH_KEY_COMPLEX(x) macro should expand to
+ *                     `x k1, x k2, ... x kn' and you should also define:
+ *    HASH_KEY_DECL    declaration of function parameters in which key
+ *                     should be passed to all hash table operations.
+ *                     That is, `type1 k1, type2 k2, ... typen kn'.
+ *                     With complex keys, HASH_GIVE_HASHFN and HASH_GIVE_EQ
+ *                     are mandatory.
+ *  | HASH_KEY_MEMORY=f        use node->f as a raw data key, compared using
+ *                     memcmp
+ *    HASH_KEY_SIZE    the length of the key block
+ *
+ *  Then specify what operations you request (all names are automatically
+ *  prefixed by calling HASH_PREFIX):
+ *
+ *  <always defined>   init() -- initialize the hash table.
+ *  HASH_WANT_CLEANUP  cleanup() -- deallocate the hash table.
+ *  HASH_WANT_FIND     node *find(key) -- find first node with the specified
+ *                     key, return NULL if no such node exists.
+ *  HASH_WANT_FIND_NEXT        node *find(node *start) -- find next node with the
+ *                     specified key, return NULL if no such node exists.
+ *  HASH_WANT_NEW      node *new(key) -- create new node with given key.
+ *                     Doesn't check whether it already exists.
+ *  HASH_WANT_LOOKUP   node *lookup(key) -- find node with given key,
+ *                     if it doesn't exist, create it. Defining
+ *                     HASH_GIVE_INIT_DATA is strongly recommended.
+ *  HASH_WANT_DELETE   int delete(key) -- delete and deallocate node
+ *                     with given key. Returns success.
+ *  HASH_WANT_REMOVE   remove(node *) -- delete and deallocate given node.
+ *
+ *  You can also supply several functions:
+ *
+ *  HASH_GIVE_HASHFN   unsigned int hash(key) -- calculate hash value of key.
+ *                     We have sensible default hash functions for strings
+ *                     and integers.
+ *  HASH_GIVE_EQ       int eq(key1, key2) -- return whether keys are equal.
+ *                     By default, we use == for atomic types and either
+ *                     strcmp or strcasecmp for strings.
+ *  HASH_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ *                     node should be allocated for dynamic data. Default=0
+ *                     or length of the string with HASH_KEY_ENDSTRING.
+ *  HASH_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ *                     created node. Defaults: assignment for atomic keys
+ *                     and static strings, strcpy for end-allocated strings.
+ *  HASH_GIVE_INIT_DATA        void init_data(node *) -- initialize data fields in a
+ *                     newly created node. Very useful for lookup operations.
+ *  HASH_GIVE_ALLOC    void *alloc(unsigned int size) -- allocate space for
+ *                     a node. Default is xmalloc() or pooled allocation, depending
+ *                     on HASH_USE_POOL and HASH_AUTO_POOL switches.
+ *                     void free(void *) -- the converse.
+ *
+ *  ... and a couple of extra parameters:
+ *
+ *  HASH_NOCASE                String comparisons should be case-insensitive.
+ *  HASH_DEFAULT_SIZE=n        Initially, use hash table of approx. `n' entries.
+ *  HASH_CONSERVE_SPACE        Use as little space as possible.
+ *  HASH_FN_BITS=n     The hash function gives only `n' significant bits.
+ *  HASH_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ *  HASH_USE_POOL=pool Allocate all nodes from given mempool. Note, however, that
+ *                     deallocation is not supported by mempools, so delete/remove
+ *                     will leak pool memory.
+ *  HASH_AUTO_POOL=size        Create a pool of the given block size automatically.
+ *  HASH_ZERO_FILL     New entries should be initialized to all zeroes.
+ *  HASH_TABLE_ALLOC   The hash table itself will be allocated and freed using
+ *                     the same allocation functions as the nodes instead of
+ *                     the default xmalloc().
+ *  HASH_TABLE_DYNAMIC Support multiple hash tables; the first parameter of all
+ *                     hash table operations is struct HASH_PREFIX(table) *.
+ *
+ *  You also get a iterator macro at no extra charge:
+ *
+ *  HASH_FOR_ALL(hash_prefix, variable)
+ *    {
+ *      // node *variable gets declared automatically
+ *      do_something_with_node(variable);
+ *      // use HASH_BREAK and HASH_CONTINUE instead of break and continue
+ *     // you must not alter contents of the hash table here
+ *    }
+ *  HASH_END_FOR;
+ *
+ *  (For dynamic tables, use HASH_FOR_ALL_DYNAMIC(hash_prefix, hash_table, variable) instead.)
+ *
+ *  Then include "lib/hashtable.h" and voila, you have a hash table
+ *  suiting all your needs (at least those which you've revealed :) ).
+ *
+ *  After including this file, all parameter macros are automatically
+ *  undef'd.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#include "lib/hashfunc.h"
+#endif
+
+#include <string.h>
+
+/* Initial setup of parameters */
+
+#if !defined(HASH_NODE) || !defined(HASH_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#if defined(HASH_KEY_ATOMIC) && !defined(HASH_CONSERVE_SPACE)
+#define HASH_CONSERVE_SPACE
+#endif
+
+#define P(x) HASH_PREFIX(x)
+
+/* Declare buckets and the hash table */
+
+typedef HASH_NODE P(node);
+
+typedef struct P(bucket) {
+  struct P(bucket) *next;
+#ifndef HASH_CONSERVE_SPACE
+  uns hash;
+#endif
+  P(node) n;
+} P(bucket);
+
+struct P(table) {
+  uns hash_size;
+  uns hash_count, hash_max, hash_min, hash_hard_max;
+  P(bucket) **ht;
+#ifdef HASH_AUTO_POOL
+  struct mempool *pool;
+#endif
+};
+
+#ifdef HASH_TABLE_DYNAMIC
+#define T (*table)
+#define TA struct P(table) *table
+#define TAC TA,
+#define TAU TA UNUSED
+#define TAUC TA UNUSED,
+#define TT table
+#define TTC table,
+#else
+struct P(table) P(table);
+#define T P(table)
+#define TA void
+#define TAC
+#define TAU void
+#define TAUC
+#define TT
+#define TTC
+#endif
+
+/* Preset parameters */
+
+#if defined(HASH_KEY_ATOMIC)
+
+#define HASH_KEY(x) x HASH_KEY_ATOMIC
+
+#ifndef HASH_ATOMIC_TYPE
+#  define HASH_ATOMIC_TYPE int
+#endif
+#define HASH_KEY_DECL HASH_ATOMIC_TYPE HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+#  define HASH_GIVE_HASHFN
+   static inline int P(hash) (TAUC HASH_ATOMIC_TYPE x)
+   { return ((sizeof(x) <= 4) ? hash_u32(x) : hash_u64(x)); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+#  define HASH_GIVE_EQ
+   static inline int P(eq) (TAUC HASH_ATOMIC_TYPE x, HASH_ATOMIC_TYPE y)
+   { return x == y; }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+#  define HASH_GIVE_INIT_KEY
+   static inline void P(init_key) (TAUC P(node) *n, HASH_ATOMIC_TYPE k)
+   { HASH_KEY(n->) = k; }
+#endif
+
+#elif defined(HASH_KEY_MEMORY)
+
+#define HASH_KEY(x) x HASH_KEY_MEMORY
+
+#define HASH_KEY_DECL byte HASH_KEY( )[HASH_KEY_SIZE]
+
+#ifndef HASH_GIVE_HASHFN
+#  define HASH_GIVE_HASHFN
+   static inline int P(hash) (TAUC byte *x)
+   { return hash_block(x, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+#  define HASH_GIVE_EQ
+   static inline int P(eq) (TAUC byte *x, byte *y)
+   { return !memcmp(x, y, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+#  define HASH_GIVE_INIT_KEY
+   static inline void P(init_key) (TAUC P(node) *n, byte *k)
+   { memcpy(HASH_KEY(n->), k, HASH_KEY_SIZE); }
+#endif
+
+#elif defined(HASH_KEY_STRING) || defined(HASH_KEY_ENDSTRING)
+
+#ifdef HASH_KEY_STRING
+#  define HASH_KEY(x) x HASH_KEY_STRING
+#  ifndef HASH_GIVE_INIT_KEY
+#    define HASH_GIVE_INIT_KEY
+     static inline void P(init_key) (TAUC P(node) *n, char *k)
+     { HASH_KEY(n->) = k; }
+#  endif
+#else
+#  define HASH_KEY(x) x HASH_KEY_ENDSTRING
+#  define HASH_GIVE_EXTRA_SIZE
+   static inline int P(extra_size) (TAUC char *k)
+   { return strlen(k); }
+#  ifndef HASH_GIVE_INIT_KEY
+#    define HASH_GIVE_INIT_KEY
+     static inline void P(init_key) (TAUC P(node) *n, char *k)
+     { strcpy(HASH_KEY(n->), k); }
+#  endif
+#endif
+#define HASH_KEY_DECL char *HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+#define HASH_GIVE_HASHFN
+  static inline uns P(hash) (TAUC char *k)
+   {
+#    ifdef HASH_NOCASE
+       return hash_string_nocase(k);
+#    else
+       return hash_string(k);
+#    endif
+   }
+#endif
+
+#ifndef HASH_GIVE_EQ
+#  define HASH_GIVE_EQ
+   static inline int P(eq) (TAUC char *x, char *y)
+   {
+#    ifdef HASH_NOCASE
+       return !strcasecmp(x,y);
+#    else
+       return !strcmp(x,y);
+#    endif
+   }
+#endif
+
+#elif defined(HASH_KEY_COMPLEX)
+
+#define HASH_KEY(x) HASH_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the hash key type.
+#endif
+
+/* Defaults for missing parameters */
+
+#ifndef HASH_GIVE_HASHFN
+#error Unable to determine which hash function to use.
+#endif
+
+#ifndef HASH_GIVE_EQ
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef HASH_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+#define HASH_EXTRA_SIZE(x) P(extra_size)(TTC x)
+#else
+/*
+ *  Beware, C macros are expanded iteratively, not recursively,
+ *  hence we get only a _single_ argument, although the expansion
+ *  of HASH_KEY contains commas.
+ */
+#define HASH_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+#error Unable to determine how to initialize keys.
+#endif
+
+#ifndef HASH_GIVE_INIT_DATA
+static inline void P(init_data) (TAUC P(node) *n UNUSED)
+{
+}
+#endif
+
+#ifdef HASH_GIVE_ALLOC
+/* If the caller has requested to use his own allocation functions, do so */
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_USE_POOL)
+/* If the caller has requested to use his mempool, do so */
+#include "lib/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(HASH_USE_POOL, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_AUTO_POOL)
+/* Use our own pools */
+#include "lib/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(T.pool, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { T.pool = mp_new(HASH_AUTO_POOL); }
+static inline void P(cleanup_alloc) (TAU) { mp_delete(T.pool); }
+#define HASH_USE_POOL
+
+#else
+/* The default allocation method */
+static inline void * P(alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(free) (TAUC void *x) { xfree(x); }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#endif
+
+#ifdef HASH_TABLE_ALLOC
+static inline void * P(table_alloc) (TAUC unsigned int size) { return P(alloc)(TTC size); }
+static inline void P(table_free) (TAUC void *x) { P(free)(TTC x); }
+#else
+static inline void * P(table_alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(table_free) (TAUC void *x) { xfree(x); }
+#endif
+
+#ifndef HASH_DEFAULT_SIZE
+#define HASH_DEFAULT_SIZE 32
+#endif
+
+#ifndef HASH_FN_BITS
+#define HASH_FN_BITS 32
+#endif
+
+#ifdef HASH_ZERO_FILL
+static inline void * P(new_bucket)(TAUC uns size)
+{
+  byte *buck = P(alloc)(TTC size);
+  bzero(buck, size);
+  return buck;
+}
+#else
+static inline void * P(new_bucket)(TAUC uns size) { return P(alloc)(TTC size); }
+#endif
+
+/* Now the operations */
+
+static void P(alloc_table) (TAU)
+{
+  T.hash_size = next_table_prime(T.hash_size);
+  T.ht = P(table_alloc)(TTC sizeof(void *) * T.hash_size);
+  bzero(T.ht, sizeof(void *) * T.hash_size);
+  if (2*T.hash_size < T.hash_hard_max)
+    T.hash_max = 2*T.hash_size;
+  else
+    T.hash_max = ~0U;
+  if (T.hash_size/2 > HASH_DEFAULT_SIZE)
+    T.hash_min = T.hash_size/4;
+  else
+    T.hash_min = 0;
+}
+
+static void P(init) (TA)
+{
+  T.hash_count = 0;
+  T.hash_size = HASH_DEFAULT_SIZE;
+#if HASH_FN_BITS < 28
+  T.hash_hard_max = 1 << HASH_FN_BITS;
+#else
+  T.hash_hard_max = 1 << 28;
+#endif
+  P(init_alloc)(TT);
+  P(alloc_table)(TT);
+}
+
+#ifdef HASH_WANT_CLEANUP
+static void P(cleanup) (TA)
+{
+#ifndef HASH_USE_POOL
+  uns i;
+  P(bucket) *b, *bb;
+
+  for (i=0; i<T.hash_size; i++)
+    for (b=T.ht[i]; b; b=bb)
+      {
+       bb = b->next;
+       P(free)(TTC b);
+      }
+#endif
+  P(cleanup_alloc)(TT);
+  P(table_free)(TTC T.ht);
+}
+#endif
+
+static inline uns P(bucket_hash) (TAUC P(bucket) *b)
+{
+#ifdef HASH_CONSERVE_SPACE
+  return P(hash)(TTC HASH_KEY(b->n.));
+#else
+  return b->hash;
+#endif
+}
+
+static void P(rehash) (TAC uns size)
+{
+  P(bucket) *b, *nb;
+  P(bucket) **oldt = T.ht, **newt;
+  uns oldsize = T.hash_size;
+  uns i, h;
+
+  DBG("Rehashing %d->%d at count %d", oldsize, size, T.hash_count);
+  T.hash_size = size;
+  P(alloc_table)(TT);
+  newt = T.ht;
+  for (i=0; i<oldsize; i++)
+    {
+      b = oldt[i];
+      while (b)
+       {
+         nb = b->next;
+         h = P(bucket_hash)(TTC b) % T.hash_size;
+         b->next = newt[h];
+         newt[h] = b;
+         b = nb;
+       }
+    }
+  P(table_free)(TTC oldt);
+}
+
+#ifdef HASH_WANT_FIND
+static P(node) * P(find) (TAC HASH_KEY_DECL)
+{
+  uns h0 = P(hash) (TTC HASH_KEY( ));
+  uns h = h0 % T.hash_size;
+  P(bucket) *b;
+
+  for (b=T.ht[h]; b; b=b->next)
+    {
+      if (
+#ifndef HASH_CONSERVE_SPACE
+         b->hash == h0 &&
+#endif
+         P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+       return &b->n;
+    }
+  return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_FIND_NEXT
+static P(node) * P(find_next) (TAC P(node) *start)
+{
+#ifndef HASH_CONSERVE_SPACE
+  uns h0 = P(hash) (TTC HASH_KEY(start->));
+#endif
+  P(bucket) *b = SKIP_BACK(P(bucket), n, start);
+
+  for (b=b->next; b; b=b->next)
+    {
+      if (
+#ifndef HASH_CONSERVE_SPACE
+         b->hash == h0 &&
+#endif
+         P(eq)(TTC HASH_KEY(start->), HASH_KEY(b->n.)))
+       return &b->n;
+    }
+  return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_NEW
+static P(node) * P(new) (TAC HASH_KEY_DECL)
+{
+  uns h0, h;
+  P(bucket) *b;
+
+  h0 = P(hash) (TTC HASH_KEY( ));
+  h = h0 % T.hash_size;
+  b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+  b->next = T.ht[h];
+  T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+  b->hash = h0;
+#endif
+  P(init_key)(TTC &b->n, HASH_KEY( ));
+  P(init_data)(TTC &b->n);
+  if (T.hash_count++ >= T.hash_max)
+    P(rehash)(TTC 2*T.hash_size);
+  return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_LOOKUP
+static P(node) * P(lookup) (TAC HASH_KEY_DECL)
+{
+  uns h0 = P(hash) (TTC HASH_KEY( ));
+  uns h = h0 % T.hash_size;
+  P(bucket) *b;
+
+  for (b=T.ht[h]; b; b=b->next)
+    {
+      if (
+#ifndef HASH_CONSERVE_SPACE
+         b->hash == h0 &&
+#endif
+         P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+       return &b->n;
+    }
+
+  b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+  b->next = T.ht[h];
+  T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+  b->hash = h0;
+#endif
+  P(init_key)(TTC &b->n, HASH_KEY( ));
+  P(init_data)(TTC &b->n);
+  if (T.hash_count++ >= T.hash_max)
+    P(rehash)(TTC 2*T.hash_size);
+  return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_DELETE
+static int P(delete) (TAC HASH_KEY_DECL)
+{
+  uns h0 = P(hash) (TTC HASH_KEY( ));
+  uns h = h0 % T.hash_size;
+  P(bucket) *b, **bb;
+
+  for (bb=&T.ht[h]; b=*bb; bb=&b->next)
+    {
+      if (
+#ifndef HASH_CONSERVE_SPACE
+         b->hash == h0 &&
+#endif
+         P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+       {
+         *bb = b->next;
+         P(free)(TTC b);
+         if (--T.hash_count < T.hash_min)
+           P(rehash)(TTC T.hash_size/2);
+         return 1;
+       }
+    }
+  return 0;
+}
+#endif
+
+#ifdef HASH_WANT_REMOVE
+static void P(remove) (TAC P(node) *n)
+{
+  P(bucket) *x = SKIP_BACK(struct P(bucket), n, n);
+  uns h0 = P(bucket_hash)(TTC x);
+  uns h = h0 % T.hash_size;
+  P(bucket) *b, **bb;
+
+  for (bb=&T.ht[h]; (b=*bb) && b != x; bb=&b->next)
+    ;
+  ASSERT(b);
+  *bb = b->next;
+  P(free)(TTC b);
+  if (--T.hash_count < T.hash_min)
+    P(rehash)(TTC T.hash_size/2);
+}
+#endif
+
+/* And the iterator */
+
+#ifndef HASH_FOR_ALL
+
+#define HASH_FOR_ALL_DYNAMIC(h_px, h_table, h_var)                                     \
+do {                                                                                   \
+  uns h_slot;                                                                          \
+  struct GLUE_(h_px,bucket) *h_buck;                                                   \
+  for (h_slot=0; h_slot < (h_table)->hash_size; h_slot++)                              \
+    for (h_buck = (h_table)->ht[h_slot]; h_buck; h_buck = h_buck->next)                        \
+      {                                                                                        \
+       GLUE_(h_px,node) *h_var = &h_buck->n;
+#define HASH_FOR_ALL(h_px, h_var) HASH_FOR_ALL_DYNAMIC(h_px, &GLUE_(h_px,table), h_var)
+#define HASH_END_FOR } } while(0)
+#define HASH_BREAK
+#define HASH_CONTINUE continue
+
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+#undef TA
+#undef TAC
+#undef TAU
+#undef TAUC
+#undef TT
+#undef TTC
+
+#undef HASH_ATOMIC_TYPE
+#undef HASH_CONSERVE_SPACE
+#undef HASH_DEFAULT_SIZE
+#undef HASH_EXTRA_SIZE
+#undef HASH_FN_BITS
+#undef HASH_GIVE_ALLOC
+#undef HASH_GIVE_EQ
+#undef HASH_GIVE_EXTRA_SIZE
+#undef HASH_GIVE_HASHFN
+#undef HASH_GIVE_INIT_DATA
+#undef HASH_GIVE_INIT_KEY
+#undef HASH_KEY
+#undef HASH_KEY_ATOMIC
+#undef HASH_KEY_COMPLEX
+#undef HASH_KEY_DECL
+#undef HASH_KEY_ENDSTRING
+#undef HASH_KEY_STRING
+#undef HASH_KEY_MEMORY
+#undef HASH_KEY_SIZE
+#undef HASH_NOCASE
+#undef HASH_NODE
+#undef HASH_PREFIX
+#undef HASH_USE_POOL
+#undef HASH_AUTO_POOL
+#undef HASH_WANT_CLEANUP
+#undef HASH_WANT_DELETE
+#undef HASH_WANT_FIND
+#undef HASH_WANT_FIND_NEXT
+#undef HASH_WANT_LOOKUP
+#undef HASH_WANT_NEW
+#undef HASH_WANT_REMOVE
+#undef HASH_TABLE_ALLOC
+#undef HASH_TABLE_DYNAMIC
+#undef HASH_ZERO_FILL
diff --git a/lib/heap.h b/lib/heap.h

new file mode 100644 (file)

index 0000000..4f83776
--- /dev/null
+++ b/lib/heap.h
@@ -0,0 +1,88 @@
+/*
+ *     UCW Library -- Universal Heap Macros
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ *     (c) 2005 Tomas Valla <tom@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap)                                         \
+  for (;;)                                                                             \
+    {                                                                                  \
+      _l = 2*_j;                                                                       \
+      if (_l > num)                                                                    \
+       break;                                                                          \
+      if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1])))         \
+       break;                                                                          \
+      if (_l != num && less(heap[_l+1],heap[_l]))                                      \
+       _l++;                                                                           \
+      swap(heap,_j,_l,x);                                                              \
+      _j = _l;                                                                         \
+    }
+
+#define HEAP_BUBBLE_UP_J(heap,num,less,swap)                                           \
+  while (_j > 1)                                                                       \
+    {                                                                                  \
+      _u = _j/2;                                                                       \
+      if (less(heap[_u], heap[_j]))                                                    \
+       break;                                                                          \
+      swap(heap,_u,_j,x);                                                              \
+      _j = _u;                                                                         \
+    }
+
+#define HEAP_INIT(type,heap,num,less,swap)                                             \
+  do {                                                                                 \
+    uns _i = num;                                                                      \
+    uns _j, _l;                                                                                \
+    type x;                                                                            \
+    while (_i >= 1)                                                                    \
+      {                                                                                        \
+       _j = _i;                                                                        \
+        HEAP_BUBBLE_DOWN_J(heap,num,less,swap)                                         \
+       _i--;                                                                           \
+      }                                                                                        \
+  } while(0)
+
+#define HEAP_DELMIN(type,heap,num,less,swap)                                           \
+  do {                                                                                 \
+    uns _j, _l;                                                                                \
+    type x;                                                                            \
+    swap(heap,1,num,x);                                                                        \
+    num--;                                                                             \
+    _j = 1;                                                                            \
+    HEAP_BUBBLE_DOWN_J(heap,num,less,swap);                                            \
+  } while(0)
+
+#define HEAP_INSERT(type,heap,num,less,swap)                                           \
+  do {                                                                                 \
+    uns _j, _u;                                                                                \
+    type x;                                                                            \
+    _j = num;                                                                          \
+    HEAP_BUBBLE_UP_J(heap,num,less,swap);                                              \
+  } while(0)
+
+#define HEAP_INCREASE(type,heap,num,less,swap,pos)                                     \
+  do {                                                                                 \
+    uns _j, _l;                                                                                \
+    type x;                                                                            \
+    _j = pos;                                                                          \
+    HEAP_BUBBLE_DOWN_J(heap,num,less,swap);                                            \
+  } while(0)
+
+#define HEAP_DELETE(type,heap,num,less,swap,pos)                                       \
+  do {                                                                                 \
+    uns _j, _l, _u;                                                                    \
+    type x;                                                                            \
+    _j = pos;                                                                          \
+    swap(heap,_j,num,x);                                                               \
+    num--;                                                                             \
+    if (less(heap[_j], heap[num+1]))                                                   \
+      HEAP_BUBBLE_UP_J(heap,num,less,swap)                                             \
+    else                                                                               \
+      HEAP_BUBBLE_DOWN_J(heap,num,less,swap);                                          \
+  } while(0)
+
+/* Default swapping macro */
+#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
diff --git a/lib/ipaccess.c b/lib/ipaccess.c

new file mode 100644 (file)

index 0000000..5dd388c
--- /dev/null
+++ b/lib/ipaccess.c
@@ -0,0 +1,127 @@
+/*
+ *     UCW Library -- IP address access lists
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/clists.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include "lib/ipaccess.h"
+
+#include <string.h>
+
+struct ipaccess_entry {
+  cnode n;
+  int allow;
+  struct ip_addrmask addr;
+};
+
+static char *
+addrmask_parser(char *c, void *ptr)
+{
+  /*
+   * This is tricky: addrmasks will be compared by memcmp(), so we must ensure
+   * that even the padding between structure members is zeroed out.
+   */
+  struct ip_addrmask *am = ptr;
+  bzero(am, sizeof(*am));
+
+  char *p = strchr(c, '/');
+  if (p)
+    *p++ = 0;
+  char *err = cf_parse_ip(c, &am->addr);
+  if (err)
+    return err;
+  if (p)
+    {
+      uns len;
+      if (!cf_parse_int(p, &len) && len <= 32)
+       am->mask = ~(len == 32 ? 0 : ~0U >> len);
+      else if (cf_parse_ip(p, &am->mask))
+       return "Invalid prefix length or netmask";
+    }
+  else
+    am->mask = ~0U;
+  return NULL;
+}
+
+static void
+addrmask_dumper(struct fastbuf *fb, void *ptr)
+{
+  struct ip_addrmask *am = ptr;
+  bprintf(fb, "%08x/%08x ", am->addr, am->mask);
+}
+
+struct cf_user_type ip_addrmask_type = {
+  .size = sizeof(struct ip_addrmask),
+  .name = "ip_addrmask",
+  .parser = addrmask_parser,
+  .dumper = addrmask_dumper
+};
+
+struct cf_section ipaccess_cf = {
+  CF_TYPE(struct ipaccess_entry),
+  CF_ITEMS {
+    CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((char*[]) { "deny", "allow", NULL })),
+    CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type),
+    CF_END
+  }
+};
+
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip)
+{
+  return !((ip ^ am->addr) & am->mask);
+}
+
+int
+ipaccess_check(clist *l, u32 ip)
+{
+  CLIST_FOR_EACH(struct ipaccess_entry *, a, *l)
+    if (ip_addrmask_match(&a->addr, ip))
+      return a->allow;
+  return 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+static clist t;
+
+static struct cf_section test_cf = {
+  CF_ITEMS {
+    CF_LIST("A", &t, &ipaccess_cf),
+    CF_END
+  }
+};
+
+int main(int argc, char **argv)
+{
+  cf_declare_section("T", &test_cf, 0);
+  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+    die("Invalid arguments");
+
+  byte buf[256];
+  while (fgets(buf, sizeof(buf), stdin))
+    {
+      char *c = strchr(buf, '\n');
+      if (c)
+       *c = 0;
+      u32 ip;
+      if (cf_parse_ip(buf, &ip))
+       puts("Invalid IP address");
+      else if (ipaccess_check(&t, ip))
+       puts("Allowed");
+      else
+       puts("Denied");
+    }
+  return 0;
+}
+
+#endif
diff --git a/lib/ipaccess.h b/lib/ipaccess.h

new file mode 100644 (file)

index 0000000..b407783
--- /dev/null
+++ b/lib/ipaccess.h
@@ -0,0 +1,28 @@
+/*
+ *     UCW Library -- IP address access lists
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_IPACCESS_H
+#define _UCW_IPACCESS_H
+
+#include "lib/clists.h"
+
+extern struct cf_section ipaccess_cf;
+int ipaccess_check(clist *l, u32 ip);
+
+/* Low-level handling of addresses and masks */
+
+struct ip_addrmask {
+  u32 addr;
+  u32 mask;
+};
+
+extern struct cf_user_type ip_addrmask_type;
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip);
+
+#endif
diff --git a/lib/kmp-search.h b/lib/kmp-search.h

new file mode 100644 (file)

index 0000000..b702acc
--- /dev/null
+++ b/lib/kmp-search.h
@@ -0,0 +1,191 @@
+/*
+ *      Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ *      (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ *      (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ *      (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ *      but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ *  This is not a normal header file, it's a generator of KMP algorithm.
+ *  Each time you include it with parameters set in the corresponding
+ *  preprocessor macros, it generates KMP structures and functions
+ *  with the parameters given. See lib/kmp.h before reading this description.
+ *
+ *  This file defines:
+ *
+ *     struct search           structure with both the internal and the user-defined variables
+ *                             used during the search and accessible from all macros
+ *
+ *     void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output)
+ *
+ *     void run(kmp,src)       the same, but automatically allocates search structre from the stack
+ *
+ *
+ *  Parameters to the generator (these marked with [*] are mandatory):
+ *
+ *  [*]        KMPS_PREFIX(x)          macro to add a name prefix (used on all global names
+ *                             defined by the KMP search generator)
+ *  [*]        KMPS_KMP_PREFIX(x)      prefix used for lib/kmp.h
+ *
+ *  KMPS_SOURCE                        user-defined text source (together with KMPS_GET_CHAR);
+ *                             if unset, the one from lib/kmp.h is taken
+ *  KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c
+ *
+ *  KMPS_ADD_CONTROLS          add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string
+ *  KMPS_MERGE_CONTROLS        merge adjacent control characters to a single one
+ *
+ *  KMPS_VARS                  user-defined variables in struct search (in .u substructure to avoid collisions)
+ *
+ *  KMPS_INIT(kmp,src,search)  statement executed at the beginning of search()
+ *  KMPS_EXIT(kmp,src,search)  ... at the end
+ *  KMPS_STEP(kmp,src,search)  ... after each step (read of next character + current state update)
+ *                             of the algorithm, but before KMPS_FOUND[_CHAIN]
+ *  KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match
+ *                             (stored in search->out - NOT necessary search->s!);
+ *                             all matches form a NULL-terminated link list (search->out, search->out->next, ...)
+ *                             in order of decreasing length
+ *  KMPS_FOUND(kmp,src,search)  ... called for every match (in search->out)
+ *  KMPS_WANT_BEST             algorithm computes globally longest match, which is available
+ *                             in search->best in KMPS_EXIT; if there is no match, it points to the null state
+ */
+
+#define P(x) KMPS_PREFIX(x)
+#define KP(x) KMPS_KMP_PREFIX(x)
+
+#ifdef KMPS_SOURCE
+typedef KMPS_SOURCE P(search_source_t);
+#else
+typedef KP(source_t) P(search_source_t);
+#endif
+
+#ifndef KMPS_GET_CHAR
+#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c))
+#endif
+
+struct P(search) {
+  struct KP(state) *s;         /* current state */
+  struct KP(state) *out;       /* output state */
+# ifdef KMPS_WANT_BEST
+  struct KP(state) *best;      /* longest match */
+# endif
+  KP(char_t) c;                        /* last character */
+# ifdef KMPS_ADD_CONTROLS
+  uns eof;
+# endif
+# ifdef KMPS_VARS
+  struct {
+    KMPS_VARS
+  } u;                         /* user-defined */
+# endif
+};
+
+static void
+P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src)
+{
+  s->s = &kmp->null;
+# ifdef KMPS_WANT_BEST
+  s->best = &kmp->null;
+# endif
+# ifdef KMPS_ADD_CONTROLS
+  s->c = KP(control)();
+  s->eof = 0;
+# else
+  s->c = 0;
+# endif
+# ifdef KMPS_INIT
+  { KMPS_INIT(kmp, src, s); }
+# endif
+# ifndef KMPS_ADD_CONTROLS
+  goto start_read;
+# endif
+  for (;;)
+  {
+    for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back);
+    s->s = s->s ? : &kmp->null;
+
+#   ifdef KMPS_STEP
+    { KMPS_STEP(kmp, src, s); }
+#   endif
+
+#   if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
+    s->out = s->s->len ? s->s : s->s->next;
+    if (s->out)
+      {
+#      ifdef KMPS_WANT_BEST
+       if (s->out->len > s->best->len)
+         s->best = s->out;
+#      endif
+#       ifdef KMPS_FOUND_CHAIN
+       { KMPS_FOUND_CHAIN(kmp, src, s); }
+#       endif
+#       ifdef KMPS_FOUND
+       do
+          { KMPS_FOUND(kmp, src, s); }
+       while (s->out = s->out->next);
+#       endif
+      }
+#   endif
+
+#   ifdef KMPS_ADD_CONTROLS
+    if (s->eof)
+      break;
+#   endif
+
+#   ifndef KMPS_ADD_CONTROLS
+start_read: ;
+#   endif
+#   ifdef KMPS_MERGE_CONTROLS
+    KP(char_t) last_c = s->c;
+#   endif
+
+    do
+      {
+       if (!KMPS_GET_CHAR(kmp, src, s))
+         {
+#           ifdef KMPS_ADD_CONTROLS
+           if (!KP(is_control)(kmp, s->c))
+             {
+                s->c = KP(control)();
+                s->eof = 1;
+               break;
+             }
+#           endif
+           goto exit;
+         }
+      }
+    while (0
+#     ifdef KMPS_MERGE_CONTROLS
+      || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c))
+#     endif
+      );
+  }
+exit: ;
+# ifdef KMPS_EXIT
+  { KMPS_EXIT(kmp, src, s); }
+# endif
+}
+
+static inline void
+P(run) (struct KP(struct) *kmp, P(search_source_t) src)
+{
+  struct P(search) search;
+  P(search)(kmp, &search, src);
+}
+
+#undef P
+#undef KMPS_PREFIX
+#undef KMPS_KMP_PREFIX
+#undef KMPS_SOURCE
+#undef KMPS_GET_CHAR
+#undef KMPS_ADD_CONTROLS
+#undef KMPS_MERGE_CONTROLS
+#undef KMPS_VARS
+#undef KMPS_INIT
+#undef KMPS_EXIT
+#undef KMPS_FOUND
+#undef KMPS_FOUND_CHAIN
+#undef KMPS_WANT_BEST
+#undef KMPS_STEP
diff --git a/lib/kmp-test.c b/lib/kmp-test.c

new file mode 100644 (file)

index 0000000..c066e6e
--- /dev/null
+++ b/lib/kmp-test.c
@@ -0,0 +1,206 @@
+/*
+ *      Test of KMP search
+ *
+ *      (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include <string.h>
+
+#if 0
+#define TRACE(x...) do{log(L_DEBUG, x);}while(0)
+#else
+#define TRACE(x...) do{}while(0)
+#endif
+
+/* TEST1 - multiple searches */
+
+#define KMP_PREFIX(x) kmp1_##x
+#define KMP_WANT_CLEANUP
+#include "lib/kmp.h"
+#define KMPS_PREFIX(x) kmp1s1_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_WANT_BEST
+#define KMPS_EXIT(kmp,src,s) TRACE("Best match has %d characters", s->best->len)
+#include "lib/kmp-search.h"
+#define KMPS_PREFIX(x) kmp1s2_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_VARS uns count;
+#define KMPS_INIT(kmp,src,s) s->u.count = 0
+#define KMPS_FOUND(kmp,src,s) s->u.count++
+#include "lib/kmp-search.h"
+
+static void
+test1(void)
+{
+  TRACE("Running test1");
+  struct kmp1_struct kmp;
+  kmp1_init(&kmp);
+  kmp1_add(&kmp, "ahoj");
+  kmp1_add(&kmp, "hoj");
+  kmp1_add(&kmp, "aho");
+  kmp1_build(&kmp);
+  struct kmp1s1_search s1;
+  kmp1s1_search(&kmp, &s1, "asjlahslhalahosjkjhojsas");
+  ASSERT(s1.best->len == 3);
+  struct kmp1s2_search s2;
+  kmp1s2_search(&kmp, &s2, "asjlahslhalahojsjkjhojsas");
+  ASSERT(s2.u.count == 4);
+  kmp1_cleanup(&kmp);
+}
+
+/* TEST2 - various tracing */
+
+#define KMP_PREFIX(x) kmp2_##x
+#define KMP_USE_UTF8
+#define KMP_TOLOWER
+#define KMP_ONLYALPHA
+#define KMP_STATE_VARS char *str; uns id;
+#define KMP_ADD_EXTRA_ARGS uns id
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \
+  s->u.str = kmp->u.start; s->u.id = id; }while(0)
+#define KMP_ADD_DUP(kmp,src,s) TRACE("String %s already inserted", kmp->u.start)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#define KMPS_FOUND(kmp,src,s) TRACE("String %s with id %d found", s->out->u.str, s->out->u.id)
+#define KMPS_STEP(kmp,src,s) TRACE("Got to state %p after reading %d", s->s, s->c)
+#include "lib/kmp.h"
+
+static void
+test2(void)
+{
+  TRACE("Running test2");
+  struct kmp2_struct kmp;
+  kmp2_init(&kmp);
+  kmp2_add(&kmp, "ahoj", 1);
+  kmp2_add(&kmp, "ahoj", 2);
+  kmp2_add(&kmp, "hoj", 3);
+  kmp2_add(&kmp, "aho", 4);
+  kmp2_add(&kmp, "aba", 5);
+  kmp2_add(&kmp, "aba", 5);
+  kmp2_add(&kmp, "pěl", 5);
+  kmp2_build(&kmp);
+  kmp2_run(&kmp, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla");
+  kmp2_cleanup(&kmp);
+}
+
+/* TEST3 - random tests */
+
+#define KMP_PREFIX(x) kmp3_##x
+#define KMP_STATE_VARS uns index;
+#define KMP_ADD_EXTRA_ARGS uns index
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) s->u.index = index
+#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_VARS uns sum, *cnt;
+#define KMPS_FOUND(kmp,src,s) do{ ASSERT(s->u.cnt[s->out->u.index]); s->u.cnt[s->out->u.index]--; s->u.sum--; }while(0)
+#include "lib/kmp.h"
+
+static void
+test3(void)
+{
+  TRACE("Running test3");
+  struct mempool *pool = mp_new(1024);
+  for (uns testn = 0; testn < 100; testn++)
+  {
+    mp_flush(pool);
+    uns n = random_max(100);
+    char *s[n];
+    struct kmp3_struct kmp;
+    kmp3_init(&kmp);
+    for (uns i = 0; i < n; i++)
+      {
+        uns m = random_max(10);
+        s[i] = mp_alloc(pool, m + 1);
+        for (uns j = 0; j < m; j++)
+         s[i][j] = 'a' + random_max(3);
+        s[i][m] = 0;
+        kmp3_add(&kmp, s[i], i);
+      }
+    kmp3_build(&kmp);
+    for (uns i = 0; i < 10; i++)
+      {
+        uns m = random_max(100);
+        byte b[m + 1];
+        for (uns j = 0; j < m; j++)
+         b[j] = 'a' + random_max(4);
+        b[m] = 0;
+        uns cnt[n];
+       struct kmp3_search search;
+       search.u.sum = 0;
+       search.u.cnt = cnt;
+        for (uns j = 0; j < n; j++)
+          {
+           cnt[j] = 0;
+           if (*s[j])
+             for (uns k = 0; k < m; k++)
+               if (!strncmp(b + k, s[j], strlen(s[j])))
+                 cnt[j]++, search.u.sum++;
+         }
+        kmp3_search(&kmp, &search, b);
+        ASSERT(search.u.sum == 0);
+      }
+    kmp3_cleanup(&kmp);
+  }
+  mp_delete(pool);
+}
+
+/* TEST4 - user-defined character type */
+
+struct kmp4_struct;
+struct kmp4_state;
+
+static inline int
+kmp4_eq(struct kmp4_struct *kmp UNUSED, byte *a, byte *b)
+{
+  return (a == b) || (a && b && *a == *b);
+}
+
+static inline uns
+kmp4_hash(struct kmp4_struct *kmp UNUSED, struct kmp4_state *s, byte *c)
+{
+  return (c ? (*c << 16) : 0) + (uns)(uintptr_t)s;
+}
+
+#define KMP_PREFIX(x) kmp4_##x
+#define KMP_CHAR byte *
+#define KMP_CONTROL_CHAR NULL
+#define KMP_GET_CHAR(kmp,src,c) ({ c = src++; !!*c; })
+#define KMP_GIVE_HASHFN
+#define KMP_GIVE_EQ
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_FOUND(kmp,src,s) TRACE("found")
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#include "lib/kmp.h"
+
+static void
+test4(void)
+{
+  TRACE("Running test4");
+  struct kmp4_struct kmp;
+  kmp4_init(&kmp);
+  kmp4_add(&kmp, "ahoj");
+  kmp4_build(&kmp);
+  kmp4_run(&kmp, "djdhaskjdahoahaahojojshdaksjahdahojskj");
+  kmp4_cleanup(&kmp);
+}
+
+int
+main(void)
+{
+  test1();
+  test2();
+  test3();
+  test4();
+  return 0;
+}
diff --git a/lib/kmp-test.t b/lib/kmp-test.t

new file mode 100644 (file)

index 0000000..bfcc127
--- /dev/null
+++ b/lib/kmp-test.t
@@ -0,0 +1,3 @@
+# Tests for the kmp module
+
+Run:   ../obj/lib/kmp-test
diff --git a/lib/kmp.h b/lib/kmp.h

new file mode 100644 (file)

index 0000000..8c8c525
--- /dev/null
+++ b/lib/kmp.h
@@ -0,0 +1,463 @@
+/*
+ *      Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ *      (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ *      (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ *      (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ *      but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ *  This is not a normal header file, it's a generator of KMP algorithm.
+ *  Each time you include it with parameters set in the corresponding
+ *  preprocessor macros, it generates KMP structures and functions
+ *  with the parameters given.
+ *
+ *  This file contains only construction of the automaton. The search
+ *  itself can be generated by inclusion of file lib/kmp-search.h.
+ *  Separeted headers allow the user to define multiple search
+ *  routines for one common set of key strings.
+ *
+ *  Example:
+ *
+ *     #define KMP_PREFIX(x) kmp_##x
+ *     #define KMP_WANT_CLEANUP
+ *     #define KMP_WANT_SEARCH // includes lib/kmp-search.h automatically
+ *     #define KMPS_FOUND(kmp,src,s) printf("found\n")
+ *     #include "lib/kmp.h"
+ *
+ *    [...]
+ *
+ *     struct kmp_struct kmp;  // a structure describing the whole automaton
+ *     kmp_init(&kmp);         // initialization (must be called before all other functions)
+ *
+ *     // add key strings we want to search
+ *     kmp_add(&kmp, "aaa");
+ *     kmp_add(&kmp, "abc");
+ *
+ *     // complete the automaton, no more strings can be added later
+ *     kmp_build(&kmp);
+ *
+ *     // example of search, should print single "found" to stdout
+ *     kmp_run(&kmp, "aabaabca");
+ *
+ *     // destroy all internal structures
+ *     kmp_cleanup(&kmp);
+ *
+ *
+ *  Brief description of all parameters:
+ *
+ *    Basic parameters:
+ *             KMP_PREFIX(x)           macro to add a name prefix (used on all global names
+ *                             defined by the KMP generator); mandatory;
+ *                             we abbreviate this to P(x) below
+ *
+ *     KMP_CHAR                alphabet type, the default is u16
+ *
+ *     KMP_SOURCE              user-defined text source; KMP_GET_CHAR must
+ *     KMP_GET_CHAR(kmp,src,c) return zero at the end or nonzero together with the next character in c otherwise;
+ *                             if not defined, zero-terminated array of bytes is used as the input
+ *
+ *     KMP_VARS                user-defined variables in 'struct P(struct)'
+ *                             -- a structure describing the whole automaton;
+ *                             these variables are stored in .u substructure to avoid collisions
+ *     KMP_STATE_VARS          user-defined variables in 'struct P(state)'
+ *                             -- created for each state of the automaton;
+ *                             these variables are stored in .u substructure to avoid collisions
+ *
+ *    Parameters which select how the input is interpreted (if KMP_SOURCE is unset):
+ *     KMP_USE_ASCII           reads single bytes from the input (default)
+ *     KMP_USE_UTF8            reads UTF-8 characters from the input (valid UTF-8 needed)
+ *     KMP_TOLOWER             converts all to lowercase
+ *     KMP_UNACCENT            removes accents
+ *     KMP_ONLYALPHA           converts non-alphas to KMP_CONTROL_CHAR (see below)
+ *
+ *    Parameters controlling add(kmp, src):
+ *     KMP_ADD_EXTRA_ARGS      extra arguments, should be used carefully because of possible collisions
+ *     KMP_ADD_INIT(kmp,src)   called in the beginning of add(), src is the first
+ *      KMP_INIT_STATE(kmp,s)   initialization of a new state s (called before KMP_ADD_{NEW,DUP});
+ *                             null state is not included and should be handled after init() if necessary;
+ *                             all user-defined data are filled by zeros before call to KMP_INIT_STATE
+ *     KMP_ADD_NEW(kmp,src,s)  initialize last state of every new key string (called after KMP_INIT_STATE);
+ *                             the string must be parsed before so src is after the last string's character
+ *     KMP_ADD_DUP(kmp,src,s)  analogy of KMP_ADD_NEW called for duplicates
+ *
+ *    Parameters to build():
+ *      KMP_BUILD_STATE(kmp,s) called for all states (including null) in order of non-decreasing tree depth
+ *
+ *    Other parameters:
+ *     KMP_WANT_CLEANUP        define cleanup()
+ *     KMP_WANT_SEARCH         includes lib/kmp-search.h with the same prefix;
+ *                             there can be multiple search variants for a single KMP automaton
+ *     KMP_USE_POOL            allocates in a given pool
+ *     KMP_CONTROL_CHAR        special control character (default is ':')
+ *     KMP_GIVE_ALLOC          if set, you must supply custom allocation functions:
+ *                             void *alloc(unsigned int size) -- allocate space for
+ *                             a state. Default is pooled allocation from a local pool or HASH_USE_POOL.
+ *                             void free(void *) -- the converse.
+ *     KMP_GIVE_HASHFN         if set, you must supply custom hash function:
+ *                             unsigned int hash(struct P(struct) *kmp, struct P(state) *state, KMP_CHAR c);
+ *                             default hash function works only for integer character types
+ *     KMP_GIVE_EQ             if set, you must supply custom compare function of two characters:
+ *                             int eq(struct P(struct) *kmp, KMP_CHAR a, KMP_CHAR b);
+ *                             default is 'a == b'
+ */
+
+#ifndef KMP_PREFIX
+#error Missing KMP_PREFIX
+#endif
+
+#include "lib/mempool.h"
+#include <alloca.h>
+#include <string.h>
+
+#define P(x) KMP_PREFIX(x)
+
+#ifdef KMP_CHAR
+typedef KMP_CHAR P(char_t);
+#else
+typedef u16 P(char_t);
+#endif
+
+typedef u32 P(len_t);
+
+#ifdef KMP_NODE
+typedef KMP_NODE P(node_t);
+#else
+typedef struct {} P(node_t);
+#endif
+
+struct P(struct);
+
+struct P(state) {
+  struct P(state) *from;       /* state with the previous character (forms a tree with null state in the root) */
+  struct P(state) *back;       /* backwards edge to the longest shorter state with same suffix */
+  struct P(state) *next;       /* the longest of shorter matches (or NULL) */
+  P(len_t) len;                        /* state depth if it represents a key string, zero otherwise */
+  P(char_t) c;                 /* last character of the represented string */
+  struct {
+#   ifdef KMP_STATE_VARS
+    KMP_STATE_VARS
+#   endif
+  } u;                         /* user-defined data*/
+};
+
+/* Control char */
+static inline P(char_t)
+P(control) (void)
+{
+# ifdef KMP_CONTROL_CHAR
+  return KMP_CONTROL_CHAR;
+# else
+  return ':';
+# endif
+}
+
+/* User-defined source */
+struct P(hash_table);
+
+#define HASH_GIVE_HASHFN
+#ifdef KMP_GIVE_HASHFN
+static inline uns
+P(hash_hash) (struct P(hash_table) *t, struct P(state) *f, P(char_t) c)
+{
+  return P(hash) ((struct P(struct) *) t, f, c);
+}
+#else
+static inline uns
+P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
+{
+  return (((uns)c) << 16) + (uns)(uintptr_t)f;
+}
+#endif
+
+#ifndef KMP_GIVE_EQ
+static inline int
+P(eq) (struct P(struct) *kmp UNUSED, P(char_t) c1, P(char_t) c2)
+{
+  return c1 == c2;
+}
+#endif
+
+static inline int
+P(is_control) (struct P(struct) *kmp, P(char_t) c)
+{
+  return P(eq) (kmp, c, P(control)());
+}
+
+#define HASH_GIVE_EQ
+static inline int
+P(hash_eq) (struct P(hash_table) *t, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
+{
+  return f1 == f2 && P(eq)((struct P(struct) *) t, c1, c2);
+}
+
+#ifdef KMP_GIVE_ALLOC
+#define HASH_GIVE_ALLOC
+static inline void *
+P(hash_alloc) (struct P(hash_table) *t, uns size)
+{
+  return P(alloc) ((struct P(struct) *) t, size);
+}
+
+static inline void
+P(hash_free) (struct P(hash_table) *t, void *ptr)
+{
+  P(free) ((struct P(struct) *) t, ptr);
+}
+#endif
+
+#define HASH_GIVE_INIT_KEY
+static inline void
+P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
+{
+  bzero(s, sizeof(*s));
+# ifdef KMP_INIT_STATE
+  struct P(struct) *kmp = (struct P(struct) *)t;
+  { KMP_INIT_STATE(kmp, s); }
+# endif
+  s->from = f;
+  s->c = c;
+  s->next = f->back; /* the pointers hold the link-list of sons... changed in build() */
+  f->back = s;
+}
+
+#undef P
+#define HASH_PREFIX(x) KMP_PREFIX(hash_##x)
+#define HASH_NODE struct KMP_PREFIX(state)
+#define HASH_KEY_COMPLEX(x) x from, x c
+#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
+#define HASH_WANT_NEW
+#define HASH_WANT_FIND
+#ifdef KMP_WANT_CLEANUP
+#define HASH_WANT_CLEANUP
+#endif
+#if defined(KMP_USE_POOL)
+#define HASH_USE_POOL KMP_USE_POOL
+#else
+#define HASH_AUTO_POOL 4096
+#endif
+#define HASH_CONSERVE_SPACE
+#define HASH_TABLE_DYNAMIC
+#include "lib/hashtable.h"
+#define P(x) KMP_PREFIX(x)
+
+struct P(struct) {
+  struct P(hash_table) hash;           /* hash table of state transitions */
+  struct P(state) null;                        /* null state */
+  struct {
+#   ifdef KMP_VARS
+    KMP_VARS
+#   endif
+  } u;                                 /* user-defined data */
+};
+
+#ifdef KMP_SOURCE
+typedef KMP_SOURCE P(source_t);
+#else
+typedef char *P(source_t);
+#endif
+
+#ifdef KMP_GET_CHAR
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src UNUSED, P(char_t) *c UNUSED)
+{
+  return KMP_GET_CHAR(kmp, (*src), (*c));
+}
+#else
+#  if defined(KMP_USE_UTF8)
+#    include "lib/unicode.h"
+#    if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
+#      include "charset/unicat.h"
+#    endif
+#  elif defined(KMP_USE_ASCII)
+#    if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
+#      include "lib/chartype.h"
+#    endif
+#  endif
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src, P(char_t) *c)
+{
+# ifdef KMP_USE_UTF8
+  uns cc;
+  *src = utf8_get(*src, &cc);
+# ifdef KMP_ONLYALPHA
+  if (!cc) {}
+  else if (!Ualpha(cc))
+    cc = P(control)();
+  else
+# endif
+    {
+#     ifdef KMP_TOLOWER
+      cc = Utolower(cc);
+#     endif
+#     ifdef KMP_UNACCENT
+      cc = Uunaccent(cc);
+#     endif
+    }
+# else
+  uns cc = *(*src)++;
+# ifdef KMP_ONLYALPHA
+  if (!cc) {}
+  else if (!Calpha(cc))
+    cc = P(control)();
+  else
+# endif
+#   ifdef KMP_TOLOWER
+    cc = Clocase(cc);
+#   endif
+#   ifdef KMP_UNACCENT
+#   error Do not know how to unaccent ASCII characters
+#   endif
+# endif
+  *c = cc;
+  return !!cc;
+}
+#endif
+
+static struct P(state) *
+P(add) (struct P(struct) *kmp, P(source_t) src
+#   ifdef KMP_ADD_EXTRA_ARGS
+    , KMP_ADD_EXTRA_ARGS
+#   endif
+)
+{
+# ifdef KMP_ADD_INIT
+  { KMP_ADD_INIT(kmp, src); }
+# endif
+
+  P(char_t) c;
+  if (!P(get_char)(kmp, &src, &c))
+    return NULL;
+  struct P(state) *p = &kmp->null, *s;
+  uns len = 0;
+  do
+    {
+      s = P(hash_find)(&kmp->hash, p, c);
+      if (!s)
+       for (;;)
+         {
+           s = P(hash_new)(&kmp->hash, p, c);
+           len++;
+           if (!(P(get_char)(kmp, &src, &c)))
+             goto enter_new;
+           p = s;
+         }
+      p = s;
+      len++;
+    }
+  while (P(get_char)(kmp, &src, &c));
+  if (s->len)
+    {
+#     ifdef KMP_ADD_DUP
+      { KMP_ADD_DUP(kmp, src, s); }
+#     endif
+      return s;
+    }
+enter_new:
+  s->len = len;
+# ifdef KMP_ADD_NEW
+  { KMP_ADD_NEW(kmp, src, s); }
+# endif
+  return s;
+}
+
+static void
+P(init) (struct P(struct) *kmp)
+{
+  bzero(&kmp->null, sizeof(struct P(state)));
+  P(hash_init)(&kmp->hash);
+}
+
+#ifdef KMP_WANT_CLEANUP
+static inline void
+P(cleanup) (struct P(struct) *kmp)
+{
+  P(hash_cleanup)(&kmp->hash);
+}
+#endif
+
+static inline int
+P(empty) (struct P(struct) *kmp)
+{
+  return !kmp->hash.hash_count;
+}
+
+static inline struct P(state) *
+P(chain_start) (struct P(state) *s)
+{
+  return s->len ? s : s->next;
+}
+
+static void
+P(build) (struct P(struct) *kmp)
+{
+  if (P(empty)(kmp))
+    return;
+  uns read = 0, write = 0;
+  struct P(state) *fifo[kmp->hash.hash_count], *null = &kmp->null;
+  for (struct P(state) *s = null->back; s; s = s->next)
+    fifo[write++] = s;
+  null->back = NULL;
+# ifdef KMP_BUILD_STATE
+  { KMP_BUILD_STATE(kmp, null); }
+# endif
+  while (read != write)
+    {
+      struct P(state) *s = fifo[read++], *t;
+      for (t = s->back; t; t = t->next)
+       fifo[write++] = t;
+      for (t = s->from->back; 1; t = t->back)
+        {
+         if (!t)
+           {
+             s->back = null;
+             s->next = NULL;
+             break;
+           }
+         s->back = P(hash_find)(&kmp->hash, t, s->c);
+         if (s->back)
+           {
+             s->next = s->back->len ? s->back : s->back->next;
+             break;
+           }
+       }
+#     ifdef KMP_BUILD_STATE
+      { KMP_BUILD_STATE(kmp, s); }
+#     endif
+    }
+}
+
+#undef P
+#undef KMP_CHAR
+#undef KMP_SOURCE
+#undef KMP_GET_CHAR
+#undef KMP_VARS
+#undef KMP_STATE_VARS
+#undef KMP_CONTEXT
+#undef KMP_USE_ASCII
+#undef KMP_USE_UTF8
+#undef KMP_TOLOWER
+#undef KMP_UNACCENT
+#undef KMP_ONLYALPHA
+#undef KMP_CONTROL_CHAR
+#undef KMP_ADD_EXTRA_ARGS
+#undef KMP_ADD_INIT
+#undef KMP_ADD_NEW
+#undef KMP_ADD_DUP
+#undef KMP_INIT_STATE
+#undef KMP_BUILD_STATE
+#undef KMP_USE_POOL
+#undef KMP_GIVE_ALLOC
+#undef KMP_GIVE_HASHFN
+#undef KMP_GIVE_EQ
+
+#ifdef KMP_WANT_SEARCH
+#  undef KMP_WANT_SEARCH
+#  define KMPS_PREFIX(x) KMP_PREFIX(x)
+#  define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
+#  include "lib/kmp-search.h"
+#endif
+
+#undef KMP_PREFIX
diff --git a/lib/lfs-test.c b/lib/lfs-test.c

new file mode 100644 (file)

index 0000000..c92dcd5
--- /dev/null
+++ b/lib/lfs-test.c
@@ -0,0 +1,63 @@
+/* Test of large files */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define        BLOCK   (1<<10)
+#define        COUNT   (5<<20)
+#define        TESTS   (1<<20)
+
+int main(void)
+{
+       struct fastbuf *b;
+       byte block[BLOCK];
+       uns i;
+
+       srand(time(NULL));
+#if 0
+       b = bopen("/big/robert/large-file", O_CREAT | O_TRUNC | O_RDWR, 1<<20);
+       if (!b)
+               die("Cannot create large-file");
+
+       log(L_DEBUG, "Writing %d blocks of size %d", COUNT, BLOCK);
+       for (i=0; i<COUNT; i++)
+       {
+               memset(block, i & 0xff, BLOCK);
+               bwrite(b, block, BLOCK);
+               if ( i%1024 == 0 )
+               {
+                       printf("\r%10d", i);
+                       fflush(stdout);
+               }
+       }
+#else
+       b = bopen("/big/robert/large-file", O_RDWR, 1<<20);
+       if (!b)
+               die("Cannot create large-file");
+#endif
+       log(L_DEBUG, "Checking the file contents in %d tests", TESTS);
+       for (i=0; i<TESTS; i++)
+       {
+               uns idx = random()%COUNT;
+               sh_off_t ofs = idx*BLOCK;
+               bseek(b, ofs, SEEK_SET);
+               bread(b, block, BLOCK);
+               if (block[17] != (idx & 0xff))
+                       die("Invalid block %d in test %d: %x != %x", idx, i, block[17], idx & 0xff);
+               if ( i%16 == 0 )
+               {
+                       printf("\r%10d", i);
+                       fflush(stdout);
+               }
+       }
+       log(L_DEBUG, "Done");
+
+       bclose(b);
+       return 0;
+}
diff --git a/lib/lfs.h b/lib/lfs.h

new file mode 100644 (file)

index 0000000..ede4126
--- /dev/null
+++ b/lib/lfs.h
@@ -0,0 +1,63 @@
+/*
+ *     UCW Library -- Large File Support
+ *
+ *     (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LFS_H
+#define _UCW_LFS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef CONFIG_LFS
+
+#define sh_open open64
+#define sh_seek lseek64
+#define sh_pread pread64
+#define sh_pwrite pwrite64
+#define sh_ftruncate ftruncate64
+#define sh_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o)
+#define sh_pread pread64
+#define sh_pwrite pwrite64
+#define sh_stat stat64
+#define sh_fstat fstat64
+typedef struct stat64 sh_stat_t;
+
+#else  /* !CONFIG_LFS */
+
+#define sh_open open
+#define sh_seek(f,o,w) lseek(f,o,w)
+#define sh_ftruncate(f,o) ftruncate(f,o)
+#define sh_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o)
+#define sh_pread pread
+#define sh_pwrite pwrite
+#define sh_stat stat
+#define sh_fstat fstat
+typedef struct stat sh_stat_t;
+
+#endif /* !CONFIG_LFS */
+
+#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0)
+#define sh_fdatasync fdatasync
+#else
+#define sh_fdatasync fsync
+#endif
+
+#define HAVE_PREAD
+
+static inline sh_off_t
+sh_file_size(const char *name)
+{
+  int fd = sh_open(name, O_RDONLY);
+  if (fd < 0)
+    die("Cannot open %s: %m", name);
+  sh_off_t len = sh_seek(fd, 0, SEEK_END);
+  close(fd);
+  return len;
+}
+
+#endif /* !_UCW_LFS_H */
diff --git a/lib/lib.h b/lib/lib.h

new file mode 100644 (file)

index 0000000..f8659ed
--- /dev/null
+++ b/lib/lib.h
@@ -0,0 +1,279 @@
+/*
+ *     The UCW Library -- Miscellaneous Functions
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2005 Tomas Valla <tom@ucw.cz>
+ *     (c) 2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIB_H
+#define _UCW_LIB_H
+
+#include "lib/config.h"
+#include <stdarg.h>
+
+/* Macros for handling structurues, offsets and alignment */
+
+#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x))
+#define PTR_TO(s, i) &((s*)0)->i
+#define OFFSETOF(s, i) ((unsigned int) PTR_TO(s, i))
+#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i)))
+#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1))
+#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p))
+#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type))
+
+/* Some other macros */
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
+#define STRINGIFY(x) #x
+#define STRINGIFY_EXPANDED(x) STRINGIFY(x)
+#define GLUE(x,y) x##y
+#define GLUE_(x,y) x##_##y
+
+#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0)
+#define REV_COMPARE(x,y) COMPARE(y,x)
+#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0)
+#define COMPARE_GT(x,y) COMPARE_LT(y,x)
+
+#define        ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits))))      /* Bitwise rotation of an uns to the left */
+
+/* GCC Extensions */
+
+#ifdef __GNUC__
+
+#undef inline
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#define CONSTRUCTOR __attribute__((constructor))
+#define PACKED __attribute__((packed))
+#define CONST __attribute__((const))
+#define PURE __attribute__((pure))
+#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z)))
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#else
+#define ALWAYS_INLINE inline
+#endif
+
+#if __GNUC__ >= 4
+#define LIKE_MALLOC __attribute__((malloc))
+#define SENTINEL_CHECK __attribute__((sentinel))
+#else
+#define LIKE_MALLOC
+#define SENTINEL_CHECK
+#endif
+
+#else
+#error This program requires the GNU C compiler.
+#endif
+
+/* Logging */
+
+#define L_DEBUG                'D'             /* Debugging messages */
+#define L_INFO         'I'             /* Informational msgs, warnings and errors */
+#define L_WARN         'W'
+#define L_ERROR                'E'
+#define L_INFO_R       'i'             /* Errors caused by external events */
+#define L_WARN_R       'w'
+#define L_ERROR_R      'e'
+#define L_FATAL                '!'             /* die() */
+
+extern char *log_title;                        /* NULL - print no title, default is log_progname */
+extern char *log_filename;             /* Expanded name of the current log file */
+extern volatile int log_switch_nest;   /* log_switch() nesting counter, increment to disable automatic switches */
+extern int log_pid;                    /* 0 if shouldn't be logged */
+extern int log_precise_timings;                /* Include microsecond timestamps in log messages */
+extern void (*log_die_hook)(void);
+struct tm;
+extern void (*log_switch_hook)(struct tm *tm);
+
+void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3);
+void vmsg(uns cat, const char *fmt, va_list args);
+void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2);
+void log_init(const char *argv0);
+void log_file(const char *name);
+void log_fork(void);
+int log_switch(void);
+
+void assert_failed(const char *assertion, const char *file, int line) NONRET;
+void assert_failed_noinfo(void) NONRET;
+
+#ifdef DEBUG_ASSERTS
+#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; })
+#else
+#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; })
+#endif
+
+#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1]
+
+#ifdef LOCAL_DEBUG
+#define DBG(x,y...) msg(L_DEBUG, x,##y)
+#else
+#define DBG(x,y...) do { } while(0)
+#endif
+
+static inline void log_switch_disable(void) { log_switch_nest++; }
+static inline void log_switch_enable(void) { ASSERT(log_switch_nest); log_switch_nest--; }
+
+/* Memory allocation */
+
+#define xmalloc sh_xmalloc
+#define xrealloc sh_xrealloc
+#define xfree sh_xfree
+
+#ifdef DEBUG_DMALLOC
+/*
+ * The standard dmalloc macros tend to produce lots of namespace
+ * conflicts and we use only xmalloc and xfree, so we can define
+ * the stubs ourselves.
+ */
+#define DMALLOC_DISABLE
+#include <dmalloc.h>
+#define sh_xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size)
+#define sh_xrealloc(ptr,size) _xrealloc_leap(__FILE__, __LINE__, ptr, size)
+#define sh_xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr)
+#else
+/*
+ * Unfortunately, several libraries we might want to link to define
+ * their own xmalloc and we don't want to interfere with them, hence
+ * the renaming.
+ */
+void *xmalloc(uns) LIKE_MALLOC;
+void *xrealloc(void *, uns);
+void xfree(void *);
+#endif
+
+void *xmalloc_zero(uns) LIKE_MALLOC;
+char *xstrdup(const char *) LIKE_MALLOC;
+
+/* Content-Type pattern matching and filters */
+
+int match_ct_patt(const char *, const char *);
+
+/* wordsplit.c */
+
+int sepsplit(char *str, uns sep, char **rec, uns max);
+int wordsplit(char *str, char **rec, uns max);
+
+/* pat(i)match.c: Matching of shell patterns */
+
+int match_pattern(const char *patt, const char *str);
+int match_pattern_nocase(const char *patt, const char *str);
+
+/* md5hex.c */
+
+void md5_to_hex(const byte *s, char *d);
+void hex_to_md5(const char *s, byte *d);
+
+#define MD5_SIZE 16
+#define MD5_HEX_SIZE 33
+
+/* prime.c */
+
+int isprime(uns x);
+uns nextprime(uns x);
+
+/* primetable.c */
+
+uns next_table_prime(uns x);
+uns prev_table_prime(uns x);
+
+/* timer.c */
+
+timestamp_t get_timestamp(void);
+
+void init_timer(timestamp_t *timer);
+uns get_timer(timestamp_t *timer);
+uns switch_timer(timestamp_t *old, timestamp_t *new);
+
+/* regex.c */
+
+typedef struct regex regex;
+
+regex *rx_compile(const char *r, int icase);
+void rx_free(regex *r);
+int rx_match(regex *r, const char *s);
+int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen);
+
+/* random.c */
+
+uns random_u32(void);
+uns random_max(uns max);
+u64 random_u64(void);
+u64 random_max_u64(u64 max);
+
+/* mmap.c */
+
+void *mmap_file(const char *name, unsigned *len, int writeable);
+void munmap_file(void *start, unsigned len);
+
+/* proctitle.c */
+
+void setproctitle_init(int argc, char **argv);
+void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2);
+char *getproctitle(void);
+
+/* randomkey.c */
+
+void randomkey(byte *buf, uns size);
+
+/* exitstatus.c */
+
+#define EXIT_STATUS_MSG_SIZE 32
+int format_exit_status(char *msg, int stat);
+
+/* runcmd.c */
+
+int run_command(const char *cmd, ...);
+void NONRET exec_command(const char *cmd, ...);
+void echo_command(char *buf, int size, const char *cmd, ...);
+int run_command_v(const char *cmd, va_list args);
+void NONRET exec_command_v(const char *cmd, va_list args);
+void echo_command_v(char *buf, int size, const char *cmd, va_list args);
+
+/* carefulio.c */
+
+int careful_read(int fd, void *buf, int len);
+int careful_write(int fd, const void *buf, int len);
+
+/* sync.c */
+
+void sync_dir(const char *name);
+
+/* sighandler.c */
+
+typedef int (*sh_sighandler_t)(int);   // gets signum, returns nonzero if abort() should be called
+
+void handle_signal(int signum);
+void unhandle_signal(int signum);
+sh_sighandler_t set_signal_handler(int signum, sh_sighandler_t new);
+
+/* string.c */
+
+char *str_unesc(char *dest, const char *src);
+char *str_format_flags(char *dest, const char *fmt, uns flags);
+
+/* bigalloc.c */
+
+void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
+void *page_alloc_zero(u64 len) LIKE_MALLOC;
+void page_free(void *start, u64 len);
+void *page_realloc(void *start, u64 old_len, u64 new_len);
+
+void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
+void *big_alloc_zero(u64 len) LIKE_MALLOC;
+void big_free(void *start, u64 len);
+
+#endif
diff --git a/lib/libucw.pc b/lib/libucw.pc

new file mode 100644 (file)

index 0000000..745f031
--- /dev/null
+++ b/lib/libucw.pc
@@ -0,0 +1,16 @@
+# pkg-config metadata for libucw
+
+libdir=@LIBDIR@
+incdir=.
+
+#ifdef CONFIG_UCW_THREADS
+threads=-lpthread
+#else
+threads=
+#endif
+
+Name: libucw
+Description: A library of utility functions and data structures
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lucw ${threads}
diff --git a/lib/lists.c b/lib/lists.c

new file mode 100644 (file)

index 0000000..043c364
--- /dev/null
+++ b/lib/lists.c
@@ -0,0 +1,77 @@
+/*
+ *     UCW Library -- Linked Lists
+ *
+ *     (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#define _UCW_LISTS_C
+#include "lib/lists.h"
+
+LIST_INLINE void
+add_tail(list *l, node *n)
+{
+  node *z = l->tail;
+
+  n->next = (node *) &l->null;
+  n->prev = z;
+  z->next = n;
+  l->tail = n;
+}
+
+LIST_INLINE void
+add_head(list *l, node *n)
+{
+  node *z = l->head;
+
+  n->next = z;
+  n->prev = (node *) &l->head;
+  z->prev = n;
+  l->head = n;
+}
+
+LIST_INLINE void
+insert_node(node *n, node *after)
+{
+  node *z = after->next;
+
+  n->next = z;
+  n->prev = after;
+  after->next = n;
+  z->prev = n;
+}
+
+LIST_INLINE void
+rem_node(node *n)
+{
+  node *z = n->prev;
+  node *x = n->next;
+
+  z->next = x;
+  x->prev = z;
+}
+
+LIST_INLINE void
+init_list(list *l)
+{
+  l->head = (node *) &l->null;
+  l->null = NULL;
+  l->tail = (node *) &l->head;
+}
+
+LIST_INLINE void
+add_tail_list(list *to, list *l)
+{
+  node *p = to->tail;
+  node *q = l->head;
+
+  p->next = q;
+  q->prev = p;
+  q = l->tail;
+  q->next = (node *) &to->null;
+  to->tail = q;
+}
diff --git a/lib/lists.h b/lib/lists.h

new file mode 100644 (file)

index 0000000..3903691
--- /dev/null
+++ b/lib/lists.h
@@ -0,0 +1,64 @@
+/*
+ *     UCW Library -- Linked Lists
+ *
+ *     (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LISTS_H
+#define _UCW_LISTS_H
+
+/*
+ * I admit the list structure is very tricky and also somewhat awkward,
+ * but it's both efficient and easy to manipulate once one understands the
+ * basic trick: The list head always contains two synthetic nodes which are
+ * always present in the list: the head and the tail. But as the `next'
+ * entry of the tail and the `prev' entry of the head are both NULL, the
+ * nodes can overlap each other:
+ *
+ *     head    head_node.next
+ *     null    head_node.prev  tail_node.next
+ *     tail                    tail_node.prev
+ */
+
+typedef struct node {
+  struct node *next, *prev;
+} node;
+
+typedef struct list {                  /* In fact two overlayed nodes */
+  struct node *head, *null, *tail;
+} list;
+
+#define NODE (node *)
+#define HEAD(list) ((void *)((list).head))
+#define TAIL(list) ((void *)((list).tail))
+#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \
+                               n=(void *)((NODE (n))->next))
+#define DO_FOR_ALL(n,list) WALK_LIST(n,list)
+#define WALK_LIST_DELSAFE(n,nxt,list) \
+     for(n=HEAD(list); nxt=(void *)((NODE (n))->next); n=(void *) nxt)
+#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \
+                               n=(void *)((NODE (n))->prev))
+#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \
+     for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv)
+
+#define EMPTY_LIST(list) (!(list).head->next)
+
+void add_tail(list *, node *);
+void add_head(list *, node *);
+void rem_node(node *);
+void add_tail_list(list *, list *);
+void init_list(list *);
+void insert_node(node *, node *);
+
+#if !defined(_UCW_LISTS_C) && defined(__GNUC__)
+#define LIST_INLINE extern inline
+#include "lib/lists.c"
+#undef LIST_INLINE
+#else
+#define LIST_INLINE
+#endif
+
+#endif
diff --git a/lib/lizard-safe.c b/lib/lizard-safe.c

new file mode 100644 (file)

index 0000000..e8e8f6f
--- /dev/null
+++ b/lib/lizard-safe.c
@@ -0,0 +1,102 @@
+/*
+ *     LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/lizard.h"
+
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <errno.h>
+
+struct lizard_buffer {
+  uns len;
+  void *ptr;
+};
+
+struct lizard_buffer *
+lizard_alloc(void)
+{
+  struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
+  buf->len = 0;
+  buf->ptr = NULL;
+  handle_signal(SIGSEGV);
+  return buf;
+}
+
+void
+lizard_free(struct lizard_buffer *buf)
+{
+  unhandle_signal(SIGSEGV);
+  if (buf->ptr)
+    munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+  xfree(buf);
+}
+
+static void
+lizard_realloc(struct lizard_buffer *buf, uns max_len)
+  /* max_len needs to be aligned to CPU_PAGE_SIZE */
+{
+  if (max_len <= buf->len)
+    return;
+  if (max_len < 2*buf->len)                            // to ensure logarithmic cost
+    max_len = 2*buf->len;
+
+  if (buf->ptr)
+    munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+  buf->len = max_len;
+  buf->ptr = mmap(NULL, buf->len + CPU_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+  if (buf->ptr == MAP_FAILED)
+    die("mmap(anonymous, %d bytes): %m", (uns)(buf->len + CPU_PAGE_SIZE));
+  if (mprotect(buf->ptr + buf->len, CPU_PAGE_SIZE, PROT_NONE) < 0)
+    die("mprotect: %m");
+}
+
+static jmp_buf safe_decompress_jump;
+static int
+sigsegv_handler(int signal UNUSED)
+{
+  longjmp(safe_decompress_jump, 1);
+  return 1;
+}
+
+byte *
+lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length)
+  /* Decompresses in into buf, sets *ptr to the data, and returns the
+   * uncompressed length.  If an error has occured, -1 is returned and errno is
+   * set.  The buffer buf is automatically reallocated.  SIGSEGV is caught in
+   * case of buffer-overflow.  The function is not re-entrant because of a
+   * static longjmp handler.  */
+{
+  uns lock_offset = ALIGN_TO(expected_length + 3, CPU_PAGE_SIZE);      // +3 due to the unaligned access
+  if (lock_offset > buf->len)
+    lizard_realloc(buf, lock_offset);
+  volatile sh_sighandler_t old_handler = set_signal_handler(SIGSEGV, sigsegv_handler);
+  byte *ptr;
+  if (!setjmp(safe_decompress_jump))
+  {
+    ptr = buf->ptr + buf->len - lock_offset;
+    int len = lizard_decompress(in, ptr);
+    if (len != (int) expected_length)
+    {
+      ptr = NULL;
+      errno = EINVAL;
+    }
+  }
+  else
+  {
+    msg(L_ERROR, "SIGSEGV caught in lizard_decompress()");
+    ptr = NULL;
+    errno = EFAULT;
+  }
+  set_signal_handler(SIGSEGV, old_handler);
+  return ptr;
+}
diff --git a/lib/lizard-test.c b/lib/lizard-test.c

new file mode 100644 (file)

index 0000000..137cdc7
--- /dev/null
+++ b/lib/lizard-test.c
@@ -0,0 +1,123 @@
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/lizard.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static char *options = CF_SHORT_OPTS "cdtx";
+static char *help = "\
+Usage: lizard-test <options> input-file [output-file]\n\
+\n\
+Options:\n"
+CF_USAGE
+"-c\t\tCompress\n\
+-d\t\tDecompress\n\
+-t\t\tCompress, decompress, and compare (in memory only, default)\n\
+-x\t\tLet the test crash by shrinking the output buffer\n\
+";
+
+static void NONRET
+usage(void)
+{
+  fputs(help, stderr);
+  exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+  int opt;
+  uns action = 't';
+  uns crash = 0;
+  log_init(argv[0]);
+  while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+    switch (opt)
+    {
+      case 'c':
+      case 'd':
+      case 't':
+       action = opt;
+       break;
+      case 'x':
+       crash++;
+       break;
+      default:
+       usage();
+    }
+  if (action == 't' && argc != optind+1
+  || action != 't' && argc != optind+2)
+    usage();
+
+  void *mi, *mo;
+  int li, lo;
+  uns adler = 0;
+
+  struct stat st;
+  stat(argv[optind], &st);
+  li = st.st_size;
+  struct fastbuf *fi = bopen(argv[optind], O_RDONLY, 1<<16);
+  if (action != 'd')
+  {
+    lo = li * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD;
+    li += LIZARD_NEEDS_CHARS;
+  }
+  else
+  {
+    lo = bgetl(fi);
+    adler = bgetl(fi);
+    li -= 8;
+  }
+  mi = xmalloc(li);
+  mo = xmalloc(lo);
+  li = bread(fi, mi, li);
+  bclose(fi);
+
+  printf("%d ", li);
+  if (action == 'd')
+    printf("->expected %d (%08x) ", lo, adler);
+  fflush(stdout);
+  if (action != 'd')
+    lo = lizard_compress(mi, li, mo);
+  else
+  {
+    lo = lizard_decompress(mi, mo);
+    if (adler32(mo, lo) != adler)
+      printf("wrong Adler32 ");
+  }
+  printf("-> %d ", lo);
+  fflush(stdout);
+
+  if (action != 't')
+  {
+    struct fastbuf *fo = bopen(argv[optind+1], O_CREAT | O_TRUNC | O_WRONLY, 1<<16);
+    if (action == 'c')
+    {
+      bputl(fo, li);
+      bputl(fo, adler32(mi, li));
+    }
+    bwrite(fo, mo, lo);
+    bclose(fo);
+  }
+  else
+  {
+    int smaller_li;
+    if (li >= (int) CPU_PAGE_SIZE)
+      smaller_li = li - CPU_PAGE_SIZE;
+    else
+      smaller_li = 0;
+    struct lizard_buffer *buf = lizard_alloc();
+    byte *ptr = lizard_decompress_safe(mo, buf, crash ? smaller_li : li);
+    if (!ptr)
+      printf("err: %m");
+    else if (memcmp(mi, ptr, li))
+      printf("WRONG");
+    else
+      printf("OK");
+    lizard_free(buf);
+  }
+  printf("\n");
+}
diff --git a/lib/lizard.c b/lib/lizard.c

new file mode 100644 (file)

index 0000000..1da67cc
--- /dev/null
+++ b/lib/lizard.c
@@ -0,0 +1,478 @@
+/*
+ *     LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ *
+ *     The file format is based on LZO1X and
+ *     the compression method is based on zlib.
+ */
+
+#include "lib/lib.h"
+#include "lib/lizard.h"
+
+#include <string.h>
+
+typedef u16 hash_ptr_t;
+struct hash_record {
+  /* the position in the original text is implicit; it is computed by locate_string() */
+  hash_ptr_t next;                     // 0=end
+  hash_ptr_t prev;                     // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
+};
+
+#define        HASH_SIZE       (1<<14)         // size of hash-table
+#define        HASH_RECORDS    (1<<15)         // maximum number of records in hash-table, 0 is unused ==> subtract 1
+#define        CHAIN_MAX_TESTS         8       // crop longer collision chains
+#define        CHAIN_GOOD_MATCH        32      // we already have a good match => end
+
+static inline uns
+hashf(const byte *string)
+  /* 0..HASH_SIZE-1 */
+{
+    return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
+}
+
+static inline byte *
+locate_string(const byte *string, int record_id, int head)
+  /* The strings are recorded into the hash-table regularly, hence there is no
+   * need to store the pointer there.  */
+{
+  string += record_id - head;
+  if (record_id >= head)
+    string -= HASH_RECORDS-1;
+  return (byte *)string;
+}
+
+static inline uns
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
+  /* hash_tab[hash] == record_id points to the head of the double-linked
+   * link-list of strings with the same hash.  The records are statically
+   * stored in circular array hash_rec (with the 1st entry unused), and the
+   * pointers are just 16-bit indices.  The strings in every collision chain
+   * are ordered by age.  */
+{
+  uns count = CHAIN_MAX_TESTS;
+  uns best_len = 0;
+  while (record_id && count-- > 0)
+  {
+    byte *record_string = locate_string(string, record_id, head);
+    byte *cmp = record_string;
+    if (cmp[0] == string[0] && cmp[2] == string[2])
+    /* implies cmp[1] == string[1] */
+    {
+      if (cmp[3] == string[3])
+      {
+       cmp += 4;
+       if (*cmp++ == string[4] && *cmp++ == string[5]
+           && *cmp++ == string[6] && *cmp++ == string[7])
+       {
+         const byte *str = string + 8;
+         while (str <= string_end && *cmp++ == *str++);
+       }
+      }
+      else
+       cmp += 4;
+      uns len = cmp - record_string - 1;       /* cmp points 2 characters after the last match */
+      if (len > best_len)
+      {
+       best_len = len;
+       *best_ptr = record_string;
+       if (best_len >= CHAIN_GOOD_MATCH)       /* optimization */
+         break;
+      }
+    }
+    record_id = hash_rec[record_id].next;
+  }
+  return best_len;
+}
+
+static uns
+hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
+  /* We reuse hash-records stored in a circular array.  First, delete the old
+   * one and then add the new one in front of the link-list.  */
+{
+  struct hash_record *rec = hash_rec + head;
+  if (*to_delete)                              /* unlink the original record */
+  {
+    uns prev_id = rec->prev & ((1<<15)-1);
+    if (rec->prev & (1<<15))                   /* was a head */
+      hash_tab[prev_id] = 0;
+    else                                       /* thanks to the ordering, this was a tail */
+      hash_rec[prev_id].next = 0;
+  }
+  rec->next = hash_tab[hash];
+  rec->prev = (1<<15) | hash;
+  hash_rec[rec->next].prev = head;
+  hash_tab[hash] = head;                       /* add the new record before the link-list */
+
+  if (++head >= HASH_RECORDS)                  /* circular buffer, reuse old records, 0 is unused */
+  {
+    head = 1;
+    *to_delete = 1;
+  }
+  return head;
+}
+
+static inline byte *
+dump_unary_value(byte *out, uns l)
+{
+  while (l > 255)
+  {
+    l -= 255;
+    *out++ = 0;
+  }
+  *out++ = l;
+  return out;
+}
+
+static byte *
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
+{
+  if (bof && len <= 238)
+    *out++ = len + 17;
+  else if (len < 4)
+  {
+    /* cannot happen when !!bof */
+    out[-2] |= len;                            /* invariant: lowest 2 bits 2 bytes back */
+#ifdef CPU_ALLOW_UNALIGNED
+    * (u32*) out = * (u32*) start;
+    return out + len;
+#else
+    while (len-- > 0)
+      *out++ = *start++;
+    return out;
+#endif
+  }
+  else
+  {
+    /* leave 2 least significant bits of out[-2] set to 0 */
+    if (len <= 18)
+      *out++ = len - 3;
+    else
+    {
+      *out++ = 0;
+      out = dump_unary_value(out, len - 18);
+    }
+  }
+  memcpy(out, start, len);
+  return out + len;
+}
+
+int
+lizard_compress(const byte *in, uns in_len, byte *out)
+  /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+   * LIZARD_MAX_ADD.  There must be at least LIZARD_NEEDS_CHARS characters
+   * allocated after in.  Returns the actual compressed length. */
+{
+  hash_ptr_t hash_tab[HASH_SIZE];
+  struct hash_record hash_rec[HASH_RECORDS];
+  const byte *in_end = in + in_len;
+  byte *out_start = out;
+  const byte *copy_start = in;
+  uns head = 1;                                        /* 0 in unused */
+  uns to_delete = 0, bof = 1;
+  bzero(hash_tab, sizeof(hash_tab));           /* init the hash-table */
+  while (in < in_end)
+  {
+    uns hash = hashf(in);
+    byte *best = NULL;
+    uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
+    if (len < 3)
+#if 0                  // TODO: now, our routine does not detect matches of length 2
+      if (len == 2 && (in - best->string - 1) < (1<<10))
+      { /* pass-thru */ }
+      else
+#endif
+      {
+literal:
+       head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+       in++;                                   /* add a literal */
+       continue;
+      }
+
+    if (in + len > in_end)                     /* crop EOF */
+    {
+      len = in_end - in;
+      if (len < 3)
+       goto literal;
+    }
+    /* Record the match.  */
+    uns copy_len = in - copy_start;
+    uns is_in_copy_mode = bof || copy_len >= 4;
+    uns shift = in - best - 1;
+    /* Try to use a 2-byte sequence.  */
+#if 0
+    if (len == 2)
+    {
+      if (is_in_copy_mode || !copy_len)                /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
+       goto literal;
+      else
+       goto dump_2sequence;
+    } else
+#endif
+    /* now, len >= 3 */
+    if (shift < (1<<11) && len <= 8)
+    {
+      shift |= (len-3 + 2)<<11;
+dump_2sequence:
+      if (copy_len)
+       out = flush_copy_command(bof, out, copy_start, copy_len);
+      *out++ = (shift>>6) & ~3;                        /* shift fits into 10 bits */
+      *out++ = shift & 0xff;
+    }
+    else if (len == 3 && is_in_copy_mode)
+    {
+      if (shift < (1<<11) + (1<<10))           /* optimisation for length-3 matches after a copy command */
+      {
+       shift -= 1<<11;
+       goto dump_2sequence;                    /* shift has 11 bits and contains also len */
+      }
+      else                                     /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+       goto literal;
+    }
+    /* We have to use a 3-byte sequence.  */
+    else
+    {
+      if (copy_len)
+       out = flush_copy_command(bof, out, copy_start, copy_len);
+      if (shift < (1<<14))
+      {
+       if (len <= 33)
+         *out++ = (1<<5) | (len-2);
+       else
+       {
+         *out++ = 1<<5;
+         out = dump_unary_value(out, len - 33);
+       }
+      }
+      else /* shift < (1<<15)-1 becase of HASH_RECORDS */
+      {
+       shift++;                                /* because shift==0 is reserved for EOF */
+       byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
+       if (len <= 9)
+         *out++ = pos_bit | (len-2);
+       else
+       {
+         *out++ = pos_bit;
+         out = dump_unary_value(out, len - 9);
+       }
+      }
+      *out++ = (shift>>6) & ~3;                        /* rest of shift fits into 14 bits */
+      *out++ = shift & 0xff;
+    }
+    /* Update the hash-table.  */
+    head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+    for (uns i=1; i<len; i++)
+      head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
+    in += len;
+    copy_start = in;
+    bof = 0;
+  }
+  uns copy_len = in - copy_start;
+  if (copy_len)
+    out = flush_copy_command(bof, out, copy_start, copy_len);
+  *out++ = 17;                                 /* add EOF */
+  *out++ = 0;
+  *out++ = 0;
+  return out - out_start;
+}
+
+static inline byte *
+read_unary_value(const byte *in, uns *val)
+{
+  uns l = 0;
+  while (!*in++)
+    l += 255;
+  l += in[-1];
+  *val = l;
+  return (byte *)in;
+}
+
+int
+lizard_decompress(const byte *in, byte *out)
+  /* Requires out being allocated for the decompressed length must be known
+   * beforehand.  It is desirable to lock the following memory page for
+   * read-only access to prevent buffer overflow.  Returns the actual
+   * decompressed length or a negative number when an error has occured.  */
+{
+  byte *out_start = out;
+  uns expect_copy_command = 1;
+  uns len;
+  if (*in > 17)                                        /* short copy command at BOF */
+  {
+    len = *in++ - 17;
+    goto perform_copy_command;
+  }
+  while (1)
+  {
+    uns c = *in++;
+    uns pos;
+    if (c < 0x10)
+      if (expect_copy_command == 1)
+      {
+       if (!c)
+       {
+         in = read_unary_value(in, &len);
+         len += 18;
+       }
+       else
+         len = c + 3;
+       goto perform_copy_command;
+      }
+      else
+      {
+       pos = ((c&0xc)<<6) | *in++;
+       if (expect_copy_command == 2)
+       {
+         pos += 1<<11;
+         len = 3;
+       }
+       else
+         len = 2;
+       pos++;
+      }
+    else if (c < 0x20)
+    {
+      pos = (c&0x8)<<11;
+      len = c&0x7;
+      if (!len)
+      {
+       in = read_unary_value(in, &len);
+       len += 9;
+      }
+      else
+       len += 2;
+      pos |= (*in++ & 0xfc)<<6;
+      pos |= *in++;
+      if (!pos)                                        /* EOF */
+       break;
+      /* do NOT pos++ */
+    }
+    else if (c < 0x40)
+    {
+      len = c&0x1f;
+      if (!len)
+      {
+       in = read_unary_value(in, &len);
+       len += 33;
+      }
+      else
+       len += 2;
+      pos = (*in++ & 0xfc)<<6;
+      pos |= *in++;
+      pos++;
+    }
+    else /* high bits encode the length */
+    {
+      len = ((c&0xe0)>>5) -2 +3;
+      pos = (c&0x1c)<<6;
+      pos |= *in++;
+      pos++;
+    }
+    /* take from the sliding window */
+    if (len <= pos)
+    {
+      memcpy(out, out-pos, len);
+      out += len;
+    }
+    else
+    {                                          /* overlapping */
+      for (; len-- > 0; out++)
+       *out = *(out-pos);
+      /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
+    }
+    /* extract the copy-bits */
+    len = in[-2] & 0x3;
+    if (len)
+    {
+      expect_copy_command = 0;
+#ifdef CPU_ALLOW_UNALIGNED
+      * (u32*) out = * (u32*) in;
+      out += len;
+      in += len;
+#else
+      while (len-- > 0)
+       *out++ = *in++;
+#endif
+    }
+    else
+      expect_copy_command = 1;
+    continue;
+
+perform_copy_command:
+    expect_copy_command = 2;
+    memcpy(out, in, len);
+    out += len;
+    in += len;
+  }
+
+  return out - out_start;
+}
+
+/*
+
+Description of the LZO1X format :
+=================================
+
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
+Beginning of file:
+------------------
+
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode.  If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
+
+Compressed mode:
+----------------
+
+Read the first byte of the sequence and determine the type of bit encoding by
+looking at the most significant bits.  The sequence is always at least 2 bytes
+long.  Decode sequences of these types until the EOF or END marker is read.
+
+  length L = length of the text taken from the sliding window
+
+    If L=0, then count the number Z of the following zero bytes and add Z*255
+    to the value of the following non-zero byte.  This allows setting L
+    arbitrarily high.
+
+  position p = relative position of the beginning of the text
+
+    Exception: 00010001 00000000 00000000 means EOF
+
+  copying C = length 1..3 of copied characters or END=0
+
+    C following characters will be copied from the compressed text to the
+    output.  The number CC is always stored in the 2 least significant bits of
+    the second last byte of the sequence.
+
+    If END is read, the algorithm switches to the copy mode.
+
+pattern                                        length          position
+
+0000ppCC                pppppppp       2               10 bits         [default interpretation]
+0000ppCC                pppppppp       3               10 bits + 2048  [just after return from copy mode]
+0001pLLL L*    ppppppCC pppppppp       3..9 + extend   15 bits         [pos 0 interpreted as EOF]
+001LLLLL L*    ppppppCC pppppppp       3..33 + extend  14 bits
+LLLpppCC                pppppppp       3..8            11 bits         [LLL >= 010]
+
+Copy mode:
+----------
+
+Read the first byte and, if the most significant bits are 0000, perform the
+following command, otherwise switch to the compressed mode (and evaluate the
+command there).
+
+pattern                                        length          position
+
+0000LLLL L*                            4..18 + extend  N/A
+
+  Copy L characters from the compressed text to the output.  The overhead for
+  incompressible strings is only roughly 1/256 + epsilon.
+
+*/
diff --git a/lib/lizard.h b/lib/lizard.h

new file mode 100644 (file)

index 0000000..616d17b
--- /dev/null
+++ b/lib/lizard.h
@@ -0,0 +1,49 @@
+/*
+ *     LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIZARD_H
+#define _UCW_LIZARD_H
+
+#define        LIZARD_NEEDS_CHARS      8
+  /* The compression routine needs input buffer 8 characters longer, because it
+   * does not check the input bounds all the time.  */
+#define        LIZARD_MAX_MULTIPLY     23./22
+#define        LIZARD_MAX_ADD          4
+  /* In the worst case, the compressed file will not be longer than its
+   * original length * 23/22 + 4.
+   *
+   * The additive constant is for EOF and the header of the file.
+   *
+   * The multiplicative constant comes from 19-byte incompressible string
+   * followed by a 3-sequence that can be compressed into 2-byte link.  This
+   * breaks the copy-mode and it needs to be restarted with a new header.  The
+   * total length is 2(header) + 19(string) + 2(link) = 23.
+   */
+
+/* lizard.c */
+int lizard_compress(const byte *in, uns in_len, byte *out);
+int lizard_decompress(const byte *in, byte *out);
+
+/* lizard-safe.c */
+struct lizard_buffer;
+
+struct lizard_buffer *lizard_alloc(void);
+void lizard_free(struct lizard_buffer *buf);
+byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
+
+/* adler32.c */
+uns update_adler32(uns adler, const byte *ptr, uns len);
+
+static inline uns
+adler32(const byte *buf, uns len)
+{
+  return update_adler32(1, buf, len);
+}
+
+#endif
diff --git a/lib/log-file.c b/lib/log-file.c

new file mode 100644 (file)

index 0000000..401e797
--- /dev/null
+++ b/lib/log-file.c
@@ -0,0 +1,108 @@
+/*
+ *     UCW Library -- Keeping of Log Files
+ *
+ *     (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/threads.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+
+static char *log_name_patt;
+static int log_params;
+static int log_filename_size;
+volatile int log_switch_nest;
+
+static int
+do_log_switch(struct tm *tm)
+{
+  int fd, l;
+  char name[log_filename_size];
+  int switched = 0;
+
+  if (!log_name_patt ||
+      log_filename[0] && !log_params)
+    return 0;
+  ucwlib_lock();
+  log_switch_nest++;
+  l = strftime(name, log_filename_size, log_name_patt, tm);
+  if (l < 0 || l >= log_filename_size)
+    die("Error formatting log file name: %m");
+  if (strcmp(name, log_filename))
+    {
+      strcpy(log_filename, name);
+      fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
+      if (fd < 0)
+       die("Unable to open log file %s: %m", name);
+      dup2(fd, 2);
+      close(fd);
+      switched = 1;
+    }
+  log_switch_nest--;
+  ucwlib_unlock();
+  return switched;
+}
+
+int
+log_switch(void)
+{
+  time_t tim = time(NULL);
+  return do_log_switch(localtime(&tim));
+}
+
+static void
+internal_log_switch(struct tm *tm)
+{
+  if (!log_switch_nest)
+    do_log_switch(tm);
+}
+
+void
+log_file(const char *name)
+{
+  if (name)
+    {
+      if (log_name_patt)
+       xfree(log_name_patt);
+      if (log_filename)
+       {
+         xfree(log_filename);
+         log_filename = NULL;
+       }
+      log_name_patt = xstrdup(name);
+      log_params = !!strchr(name, '%');
+      log_filename_size = strlen(name) + 64;   /* 63 is an upper bound on expansion of % escapes */
+      log_filename = xmalloc(log_filename_size);
+      log_filename[0] = 0;
+      log_switch();
+      log_switch_hook = internal_log_switch;
+    }
+}
+
+void
+log_fork(void)
+{
+  log_pid = getpid();
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  log_init(argv[0]);
+  log_file("/proc/self/fd/1");
+  for (int i=1; i<argc; i++)
+    log(L_INFO, argv[i]);
+  return 0;
+}
+
+#endif
diff --git a/lib/log.c b/lib/log.c

new file mode 100644 (file)

index 0000000..0063f3e
--- /dev/null
+++ b/lib/log.c
@@ -0,0 +1,149 @@
+/*
+ *     UCW Library -- Logging
+ *
+ *     (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
+#include <alloca.h>
+
+static char log_progname[32];
+char *log_filename;
+char *log_title;
+int log_pid;
+int log_precise_timings;
+void (*log_die_hook)(void);
+void (*log_switch_hook)(struct tm *tm);
+
+void
+vmsg(unsigned int cat, const char *fmt, va_list args)
+{
+  struct timeval tv;
+  struct tm tm;
+  byte *buf, *p;
+  int buflen = 256;
+  int l, l0, r;
+  va_list args2;
+
+  gettimeofday(&tv, NULL);
+  if (!localtime_r(&tv.tv_sec, &tm))
+    bzero(&tm, sizeof(tm));
+
+  if (log_switch_hook)
+    log_switch_hook(&tm);
+  while (1)
+    {
+      p = buf = alloca(buflen);
+      *p++ = cat;
+      /* We cannot use strftime() here, because it's not re-entrant */
+      p += sprintf(p, " %4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+                  tm.tm_hour, tm.tm_min, tm.tm_sec);
+      if (log_precise_timings)
+        p += sprintf(p, ".%06d", (int)tv.tv_usec);
+      *p++ = ' ';
+      if (log_title)
+       {
+         if (log_pid)
+           p += sprintf(p, "[%s (%d)] ", log_title, log_pid);
+         else
+           p += sprintf(p, "[%s] ", log_title);
+       }
+      else
+       {
+         if (log_pid)
+           p += sprintf(p, "[%d] ", log_pid);
+       }
+      l0 = p - buf + 1;
+      r = buflen - l0;
+      va_copy(args2, args);
+      l = vsnprintf(p, r, fmt, args2);
+      va_end(args2);
+      if (l < 0)
+       l = r;
+      else if (l < r)
+       {
+         while (*p)
+           {
+             if (*p < 0x20 && *p != '\t')
+               *p = 0x7f;
+             p++;
+           }
+         *p = '\n';
+         write(2, buf, l + l0);
+         return;
+       }
+      buflen = l + l0 + 1;
+    }
+}
+
+void
+msg(unsigned int cat, const char *fmt, ...)
+{
+  va_list args;
+
+  va_start(args, fmt);
+  vmsg(cat, fmt, args);
+  va_end(args);
+}
+
+void
+die(const char *fmt, ...)
+{
+  va_list args;
+
+  va_start(args, fmt);
+  vmsg(L_FATAL, fmt, args);
+  va_end(args);
+  if (log_die_hook)
+    log_die_hook();
+#ifdef DEBUG_DIE_BY_ABORT
+  abort();
+#else
+  exit(1);
+#endif
+}
+
+void
+assert_failed(const char *assertion, const char *file, int line)
+{
+  msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
+  abort();
+}
+
+void
+assert_failed_noinfo(void)
+{
+  die("Internal error: Assertion failed.");
+}
+
+static const char *
+log_basename(const char *n)
+{
+  const char *p = n;
+
+  while (*n)
+    if (*n++ == '/')
+      p = n;
+  return p;
+}
+
+void
+log_init(const char *argv0)
+{
+  if (argv0)
+    {
+      strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1);
+      log_progname[sizeof(log_progname)-1] = 0;
+      log_title = log_progname;
+    }
+}
diff --git a/lib/mainloop.c b/lib/mainloop.c

new file mode 100644 (file)

index 0000000..aeb79de
--- /dev/null
+++ b/lib/mainloop.c
@@ -0,0 +1,545 @@
+/*
+ *     UCW Library -- Main Loop
+ *
+ *     (c) 2004--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/mainloop.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+
+timestamp_t main_now;
+sh_time_t main_now_seconds;
+uns main_shutdown;
+
+clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+static uns main_file_cnt;
+static uns main_poll_table_obsolete, main_poll_table_size;
+static struct pollfd *main_poll_table;
+static uns main_sigchld_set_up;
+
+void
+main_get_time(void)
+{
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  main_now_seconds = tv.tv_sec;
+  main_now = (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+  // DBG("It's %lld o'clock", (long long) main_now);
+}
+
+void
+main_init(void)
+{
+  DBG("MAIN: Initializing");
+  clist_init(&main_timer_list);
+  clist_init(&main_file_list);
+  clist_init(&main_hook_list);
+  clist_init(&main_process_list);
+  main_file_cnt = 0;
+  main_poll_table_obsolete = 1;
+  main_get_time();
+}
+
+void
+timer_add(struct main_timer *tm, timestamp_t expires)
+{
+  if (expires)
+    DBG("MAIN: Setting timer %p (expire at now+%lld)", tm, (long long)(expires-main_now));
+  else
+    DBG("MAIN: Clearing timer %p", tm);
+  if (tm->expires)
+    clist_remove(&tm->n);
+  tm->expires = expires;
+  if (expires)
+    {
+      cnode *t = main_timer_list.head.next;
+      while (t != &main_timer_list.head && ((struct main_timer *) t)->expires < expires)
+       t = t->next;
+      clist_insert_before(&tm->n, t);
+    }
+}
+
+void
+timer_del(struct main_timer *tm)
+{
+  timer_add(tm, 0);
+}
+
+static void
+file_timer_expired(struct main_timer *tm)
+{
+  struct main_file *fi = tm->data;
+  timer_del(&fi->timer);
+  if (fi->error_handler)
+    fi->error_handler(fi, MFERR_TIMEOUT);
+}
+
+void
+file_add(struct main_file *fi)
+{
+  DBG("MAIN: Adding file %p (fd=%d)", fi, fi->fd);
+  ASSERT(!fi->n.next);
+  clist_add_tail(&main_file_list, &fi->n);
+  fi->timer.handler = file_timer_expired;
+  fi->timer.data = fi;
+  main_file_cnt++;
+  main_poll_table_obsolete = 1;
+  if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0)
+    msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
+}
+
+void
+file_chg(struct main_file *fi)
+{
+  struct pollfd *p = fi->pollfd;
+  if (p)
+    {
+      p->events = 0;
+      if (fi->read_handler)
+       p->events |= POLLIN | POLLHUP | POLLERR;
+      if (fi->write_handler)
+       p->events |= POLLOUT | POLLERR;
+    }
+}
+
+void
+file_del(struct main_file *fi)
+{
+  DBG("MAIN: Deleting file %p (fd=%d)", fi, fi->fd);
+  ASSERT(fi->n.next);
+  timer_del(&fi->timer);
+  clist_remove(&fi->n);
+  main_file_cnt--;
+  main_poll_table_obsolete = 1;
+  fi->n.next = fi->n.prev = NULL;
+}
+
+static int
+file_read_handler(struct main_file *fi)
+{
+  while (fi->rpos < fi->rlen)
+    {
+      int l = read(fi->fd, fi->rbuf + fi->rpos, fi->rlen - fi->rpos);
+      DBG("MAIN: FD %d: read %d", fi->fd, l);
+      if (l < 0)
+       {
+         if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+           fi->error_handler(fi, MFERR_READ);
+         return 0;
+       }
+      else if (!l)
+       break;
+      fi->rpos += l;
+    }
+  DBG("MAIN: FD %d done read %d of %d", fi->fd, fi->rpos, fi->rlen);
+  fi->read_handler = NULL;
+  file_chg(fi);
+  fi->read_done(fi);
+  return 1;
+}
+
+static int
+file_write_handler(struct main_file *fi)
+{
+  while (fi->wpos < fi->wlen)
+    {
+      int l = write(fi->fd, fi->wbuf + fi->wpos, fi->wlen - fi->wpos);
+      DBG("MAIN: FD %d: write %d", fi->fd, l);
+      if (l < 0)
+       {
+         if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+           fi->error_handler(fi, MFERR_WRITE);
+         return 0;
+       }
+      fi->wpos += l;
+    }
+  DBG("MAIN: FD %d done write %d", fi->fd, fi->wpos);
+  fi->write_handler = NULL;
+  file_chg(fi);
+  fi->write_done(fi);
+  return 1;
+}
+
+void
+file_read(struct main_file *fi, void *buf, uns len)
+{
+  ASSERT(fi->n.next);
+  if (len)
+    {
+      fi->read_handler = file_read_handler;
+      fi->rbuf = buf;
+      fi->rpos = 0;
+      fi->rlen = len;
+    }
+  else
+    {
+      fi->read_handler = NULL;
+      fi->rbuf = NULL;
+      fi->rpos = fi->rlen = 0;
+    }
+  file_chg(fi);
+}
+
+void
+file_write(struct main_file *fi, void *buf, uns len)
+{
+  ASSERT(fi->n.next);
+  if (len)
+    {
+      fi->write_handler = file_write_handler;
+      fi->wbuf = buf;
+      fi->wpos = 0;
+      fi->wlen = len;
+    }
+  else
+    {
+      fi->write_handler = NULL;
+      fi->wbuf = NULL;
+      fi->wpos = fi->wlen = 0;
+    }
+  file_chg(fi);
+}
+
+void
+file_set_timeout(struct main_file *fi, timestamp_t expires)
+{
+  ASSERT(fi->n.next);
+  timer_add(&fi->timer, expires);
+}
+
+void
+file_close_all(void)
+{
+  CLIST_FOR_EACH(struct main_file *, f, main_file_list)
+    close(f->fd);
+}
+
+void
+hook_add(struct main_hook *ho)
+{
+  DBG("MAIN: Adding hook %p", ho);
+  ASSERT(!ho->n.next);
+  clist_add_tail(&main_hook_list, &ho->n);
+}
+
+void
+hook_del(struct main_hook *ho)
+{
+  DBG("MAIN: Deleting hook %p", ho);
+  ASSERT(ho->n.next);
+  clist_remove(&ho->n);
+  ho->n.next = ho->n.prev = NULL;
+}
+
+static void
+main_sigchld_handler(int x UNUSED)
+{
+  DBG("SIGCHLD received");
+}
+
+void
+process_add(struct main_process *mp)
+{
+  DBG("MAIN: Adding process %p (pid=%d)", mp, mp->pid);
+  ASSERT(!mp->n.next);
+  ASSERT(mp->handler);
+  clist_add_tail(&main_process_list, &mp->n);
+  if (!main_sigchld_set_up)
+    {
+      struct sigaction sa;
+      bzero(&sa, sizeof(sa));
+      sa.sa_handler = main_sigchld_handler;
+      sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
+      sigaction(SIGCHLD, &sa, NULL);
+      main_sigchld_set_up = 1;
+    }
+}
+
+void
+process_del(struct main_process *mp)
+{
+  DBG("MAIN: Deleting process %p (pid=%d)", mp, mp->pid);
+  ASSERT(mp->n.next);
+  clist_remove(&mp->n);
+  mp->n.next = NULL;
+}
+
+int
+process_fork(struct main_process *mp)
+{
+  pid_t pid = fork();
+  if (pid < 0)
+    {
+      DBG("MAIN: Fork failed");
+      mp->status = -1;
+      format_exit_status(mp->status_msg, -1);
+      mp->handler(mp);
+      return 1;
+    }
+  else if (!pid)
+    return 0;
+  else
+    {
+      DBG("MAIN: Forked process %d", (int) pid);
+      mp->pid = pid;
+      process_add(mp);
+      return 1;
+    }
+}
+
+void
+main_debug(void)
+{
+#ifdef CONFIG_DEBUG
+  msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
+  msg(L_DEBUG, "\tActive timers:");
+  struct main_timer *tm;
+  CLIST_WALK(tm, main_timer_list)
+    msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
+  struct main_file *fi;
+  msg(L_DEBUG, "\tActive files:");
+  CLIST_WALK(fi, main_file_list)
+    msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
+       fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler,
+       (long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data);
+  msg(L_DEBUG, "\tActive hooks:");
+  struct main_hook *ho;
+  CLIST_WALK(ho, main_hook_list)
+    msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
+  msg(L_DEBUG, "\tActive processes:");
+  struct main_process *pr;
+  CLIST_WALK(pr, main_process_list)
+    msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
+#endif
+}
+
+static void
+main_rebuild_poll_table(void)
+{
+  struct main_file *fi;
+  if (main_poll_table_size < main_file_cnt)
+    {
+      if (main_poll_table)
+       xfree(main_poll_table);
+      else
+       main_poll_table_size = 1;
+      while (main_poll_table_size < main_file_cnt)
+       main_poll_table_size *= 2;
+      main_poll_table = xmalloc(sizeof(struct pollfd) * main_poll_table_size);
+    }
+  struct pollfd *p = main_poll_table;
+  DBG("MAIN: Rebuilding poll table: %d of %d entries set", main_file_cnt, main_poll_table_size);
+  CLIST_WALK(fi, main_file_list)
+    {
+      p->fd = fi->fd;
+      fi->pollfd = p++;
+      file_chg(fi);
+    }
+  main_poll_table_obsolete = 0;
+}
+
+void
+main_loop(void)
+{
+  DBG("MAIN: Entering main_loop");
+  ASSERT(main_timer_list.head.next);
+
+  struct main_file *fi;
+  struct main_hook *ho;
+  struct main_timer *tm;
+  struct main_process *pr;
+  cnode *tmp;
+
+  for (;;)
+    {
+      main_get_time();
+      timestamp_t wake = main_now + 1000000000;
+      while ((tm = clist_head(&main_timer_list)) && tm->expires <= main_now)
+       {
+         DBG("MAIN: Timer %p expired at now-%lld", tm, (long long)(main_now - tm->expires));
+         tm->handler(tm);
+       }
+      int hook_min = HOOK_RETRY;
+      int hook_max = HOOK_SHUTDOWN;
+      CLIST_WALK_DELSAFE(ho, main_hook_list, tmp)
+       {
+         DBG("MAIN: Hook %p", ho);
+         int ret = ho->handler(ho);
+         hook_min = MIN(hook_min, ret);
+         hook_max = MAX(hook_max, ret);
+       }
+      if (hook_min == HOOK_SHUTDOWN ||
+         hook_min == HOOK_DONE && hook_max == HOOK_DONE ||
+         main_shutdown)
+       {
+         DBG("MAIN: Shut down by %s", main_shutdown ? "main_shutdown" : "a hook");
+         return;
+       }
+      if (hook_max == HOOK_RETRY)
+       wake = 0;
+      if (main_poll_table_obsolete)
+       main_rebuild_poll_table();
+      if (!clist_empty(&main_process_list))
+       {
+         int stat;
+         pid_t pid;
+         wake = MIN(wake, main_now + 10000);
+         while ((pid = waitpid(-1, &stat, WNOHANG)) > 0)
+           {
+             DBG("MAIN: Child %d exited with status %x", pid, stat);
+             CLIST_WALK(pr, main_process_list)
+               if (pr->pid == pid)
+                 {
+                   pr->status = stat;
+                   process_del(pr);
+                   format_exit_status(pr->status_msg, pr->status);
+                   DBG("MAIN: Calling process exit handler");
+                   pr->handler(pr);
+                   break;
+                 }
+             wake = 0;
+           }
+       }
+      /* FIXME: Here is a small race window where SIGCHLD can come unnoticed. */
+      if ((tm = clist_head(&main_timer_list)) && tm->expires < wake)
+       wake = tm->expires;
+      int timeout = (wake ? wake - main_now : 0);
+      DBG("MAIN: Poll for %d fds and timeout %d ms", main_file_cnt, timeout);
+      if (poll(main_poll_table, main_file_cnt, timeout))
+       {
+         struct pollfd *p = main_poll_table;
+         main_get_time();
+         CLIST_WALK(fi, main_file_list)
+           {
+             if (p->revents & (POLLIN | POLLHUP | POLLERR))
+               {
+                 do
+                   DBG("MAIN: Read event on fd %d", p->fd);
+                 while (fi->read_handler && fi->read_handler(fi) && !main_poll_table_obsolete);
+                 if (main_poll_table_obsolete) /* File entries have been inserted or deleted => better not risk continuing to nowhere */
+                   break;
+               }
+             if (p->revents & (POLLOUT | POLLERR))
+               {
+                 do
+                   DBG("MAIN: Write event on fd %d", p->fd);
+                 while (fi->write_handler && fi->write_handler(fi) && !main_poll_table_obsolete);
+                 if (main_poll_table_obsolete)
+                   break;
+               }
+             p++;
+           }
+       }
+    }
+}
+
+#ifdef TEST
+
+static struct main_process mp;
+static struct main_file fin, fout;
+static struct main_hook hook;
+static struct main_timer tm;
+
+static byte rb[16];
+
+static void dread(struct main_file *fi)
+{
+  if (fi->rpos < fi->rlen)
+    {
+      log(L_INFO, "Read EOF");
+      file_del(fi);
+    }
+  else
+    {
+      log(L_INFO, "Read done");
+      file_read(fi, rb, sizeof(rb));
+    }
+}
+
+static void derror(struct main_file *fi, int cause)
+{
+  log(L_INFO, "Error: %m !!! (cause %d)", cause);
+  file_del(fi);
+}
+
+static void dwrite(struct main_file *fi UNUSED)
+{
+  log(L_INFO, "Write done");
+}
+
+static int dhook(struct main_hook *ho UNUSED)
+{
+  log(L_INFO, "Hook called");
+  return 0;
+}
+
+static void dtimer(struct main_timer *tm)
+{
+  log(L_INFO, "Timer tick");
+  timer_add(tm, main_now + 10000);
+}
+
+static void dentry(void)
+{
+  log(L_INFO, "*** SUBPROCESS START ***");
+  sleep(2);
+  log(L_INFO, "*** SUBPROCESS FINISH ***");
+  exit(0);
+}
+
+static void dexit(struct main_process *pr)
+{
+  log(L_INFO, "Subprocess %d exited with status %x", pr->pid, pr->status);
+}
+
+int
+main(void)
+{
+  log_init(NULL);
+  main_init();
+
+  fin.fd = 0;
+  fin.read_done = dread;
+  fin.error_handler = derror;
+  file_add(&fin);
+  file_read(&fin, rb, sizeof(rb));
+
+  fout.fd = 1;
+  fout.write_done = dwrite;
+  fout.error_handler = derror;
+  file_add(&fout);
+  file_write(&fout, "Hello, world!\n", 14);
+
+  hook.handler = dhook;
+  hook_add(&hook);
+
+  tm.handler = dtimer;
+  timer_add(&tm, main_now + 1000);
+
+  mp.handler = dexit;
+  if (!process_fork(&mp))
+    dentry();
+
+  main_debug();
+
+  main_loop();
+  log(L_INFO, "Finished.");
+}
+
+#endif
diff --git a/lib/mainloop.h b/lib/mainloop.h

new file mode 100644 (file)

index 0000000..5c3baf6
--- /dev/null
+++ b/lib/mainloop.h
@@ -0,0 +1,108 @@
+/*
+ *     UCW Library -- Main Loop
+ *
+ *     (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_MAINLOOP_H
+#define _UCW_MAINLOOP_H
+
+#include "lib/clists.h"
+
+extern timestamp_t main_now;                   /* Current time in milliseconds since UNIX epoch */
+extern sh_time_t main_now_seconds;             /* Current time in seconds since the epoch */
+extern uns main_shutdown;
+extern clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+
+/* User-defined fields are marked with [*], all other fields must be initialized to zero. */
+
+/* Timers */
+
+struct main_timer {
+  cnode n;
+  timestamp_t expires;
+  void (*handler)(struct main_timer *tm);      /* [*] Function to be called when the timer expires. Must re-add/del the timer.*/
+  void *data;                                  /* [*] Data for use by the handler */
+};
+
+void timer_add(struct main_timer *tm, timestamp_t expires);    /* Can modify a running timer, too */
+void timer_del(struct main_timer *tm);
+
+void main_get_time(void);                      /* Refresh main_now */
+
+/* Files to poll */
+
+struct main_file {
+  cnode n;
+  int fd;                                      /* [*] File descriptor */
+  int (*read_handler)(struct main_file *fi);   /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */
+  int (*write_handler)(struct main_file *fi);
+  void (*error_handler)(struct main_file *fi, int cause);      /* [*] Handler to call on errors */
+  void *data;                                  /* [*] Data for use by the handlers */
+  byte *rbuf;                                  /* Read/write pointers for use by file_read/write */
+  uns rpos, rlen;
+  byte *wbuf;
+  uns wpos, wlen;
+  void (*read_done)(struct main_file *fi);     /* [*] Called when file_read is finished; rpos < rlen if EOF */
+  void (*write_done)(struct main_file *fi);    /* [*] Called when file_write is finished */
+  struct main_timer timer;
+  struct pollfd *pollfd;
+};
+
+enum main_file_err_cause {
+  MFERR_READ,
+  MFERR_WRITE,
+  MFERR_TIMEOUT
+};
+
+void file_add(struct main_file *fi);
+void file_chg(struct main_file *fi);
+void file_del(struct main_file *fi);
+void file_read(struct main_file *fi, void *buf, uns len);
+void file_write(struct main_file *fi, void *buf, uns len);
+void file_set_timeout(struct main_file *fi, timestamp_t expires);
+void file_close_all(void);                     /* Close all known main_file's; frequently used before fork() */
+
+/* Hooks to be called in each iteration of the main loop */
+
+struct main_hook {
+  cnode n;
+  int (*handler)(struct main_hook *ho);                /* [*] Hook function; returns HOOK_xxx */
+  void *data;                                  /* [*] For use by the handler */
+};
+
+enum main_hook_return {
+  HOOK_IDLE,                                   /* Call again when the main loop becomes idle again */
+  HOOK_RETRY,                                  /* Call again as soon as possible */
+  HOOK_DONE = -1,                              /* Shut down the main loop if all hooks return this value */
+  HOOK_SHUTDOWN = -2                           /* Shut down the main loop immediately */
+};
+
+void hook_add(struct main_hook *ho);
+void hook_del(struct main_hook *ho);
+
+/* Processes to watch */
+
+struct main_process {
+  cnode n;
+  int pid;                                     /* Process id (0=not running) */
+  int status;                                  /* Exit status (-1=fork failed) */
+  char status_msg[EXIT_STATUS_MSG_SIZE];
+  void (*handler)(struct main_process *mp);    /* [*] Called when the process exits; process_del done automatically */
+  void *data;                                  /* [*] For use by the handler */
+};
+
+void process_add(struct main_process *mp);
+void process_del(struct main_process *mp);
+int process_fork(struct main_process *mp);
+
+/* The main loop */
+
+void main_init(void);
+void main_loop(void);
+void main_debug(void);
+
+#endif
diff --git a/lib/md5.c b/lib/md5.c

new file mode 100644 (file)

index 0000000..ba8ff56
--- /dev/null
+++ b/lib/md5.c
@@ -0,0 +1,249 @@
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+#include "lib/lib.h"
+#include "lib/md5.h"
+
+#include <string.h>            /* for memcpy() */
+
+#ifdef CPU_LITTLE_ENDIAN
+#define byteReverse(buf, len)  /* Nothing */
+#else
+void byteReverse(unsigned char *buf, unsigned longs);
+
+/*
+ * Note: this code is harmless on little-endian machines.
+ */
+void byteReverse(unsigned char *buf, unsigned longs)
+{
+    uint32 t;
+    do {
+       t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
+           ((unsigned) buf[1] << 8 | buf[0]);
+       *(uint32 *) buf = t;
+       buf += 4;
+    } while (--longs);
+}
+#endif
+
+/*
+ * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
+ * initialization constants.
+ */
+void MD5Init(struct MD5Context *ctx)
+{
+    ctx->buf[0] = 0x67452301;
+    ctx->buf[1] = 0xefcdab89;
+    ctx->buf[2] = 0x98badcfe;
+    ctx->buf[3] = 0x10325476;
+
+    ctx->bits[0] = 0;
+    ctx->bits[1] = 0;
+}
+
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
+ */
+void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
+{
+    uint32 t;
+
+    /* Update bitcount */
+
+    t = ctx->bits[0];
+    if ((ctx->bits[0] = t + ((uint32) len << 3)) < t)
+       ctx->bits[1]++;         /* Carry from low to high */
+    ctx->bits[1] += len >> 29;
+
+    t = (t >> 3) & 0x3f;       /* Bytes already in shsInfo->data */
+
+    /* Handle any leading odd-sized chunks */
+
+    if (t) {
+       unsigned char *p = (unsigned char *) ctx->in + t;
+
+       t = 64 - t;
+       if (len < t) {
+           memcpy(p, buf, len);
+           return;
+       }
+       memcpy(p, buf, t);
+       byteReverse(ctx->in, 16);
+       MD5Transform(ctx->buf, (uint32 *) ctx->in);
+       buf += t;
+       len -= t;
+    }
+    /* Process data in 64-byte chunks */
+
+    while (len >= 64) {
+       memcpy(ctx->in, buf, 64);
+       byteReverse(ctx->in, 16);
+       MD5Transform(ctx->buf, (uint32 *) ctx->in);
+       buf += 64;
+       len -= 64;
+    }
+
+    /* Handle any remaining bytes of data. */
+
+    memcpy(ctx->in, buf, len);
+}
+
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
+{
+    unsigned count;
+    unsigned char *p;
+
+    /* Compute number of bytes mod 64 */
+    count = (ctx->bits[0] >> 3) & 0x3F;
+
+    /* Set the first char of padding to 0x80.  This is safe since there is
+       always at least one byte free */
+    p = ctx->in + count;
+    *p++ = 0x80;
+
+    /* Bytes of padding needed to make 64 bytes */
+    count = 64 - 1 - count;
+
+    /* Pad out to 56 mod 64 */
+    if (count < 8) {
+       /* Two lots of padding:  Pad the first block to 64 bytes */
+       memset(p, 0, count);
+       byteReverse(ctx->in, 16);
+       MD5Transform(ctx->buf, (uint32 *) ctx->in);
+
+       /* Now fill the next block with 56 bytes */
+       memset(ctx->in, 0, 56);
+    } else {
+       /* Pad block to 56 bytes */
+       memset(p, 0, count - 8);
+    }
+    byteReverse(ctx->in, 14);
+
+    /* Append length in bits and transform */
+    ((uint32 *) ctx->in)[14] = ctx->bits[0];
+    ((uint32 *) ctx->in)[15] = ctx->bits[1];
+
+    MD5Transform(ctx->buf, (uint32 *) ctx->in);
+    byteReverse((unsigned char *) ctx->buf, 4);
+    memcpy(digest, ctx->buf, 16);
+    memset((char *) ctx, 0, sizeof(ctx));      /* In case it's sensitive */
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+       ( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data.  MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void MD5Transform(uint32 buf[4], uint32 const in[16])
+{
+    uint32 a, b, c, d;
+
+    a = buf[0];
+    b = buf[1];
+    c = buf[2];
+    d = buf[3];
+
+    MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+    MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+    MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+    MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+    MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+    MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+    MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+    MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+    MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+    MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+    MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+    MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+    MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+    MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+    MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+    MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+    MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+    MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+    MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+    MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+    MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+    MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+    MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+    MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+    buf[0] += a;
+    buf[1] += b;
+    buf[2] += c;
+    buf[3] += d;
+}
diff --git a/lib/md5.h b/lib/md5.h

new file mode 100644 (file)

index 0000000..be51a52
--- /dev/null
+++ b/lib/md5.h
@@ -0,0 +1,24 @@
+/*
+ *     UCW Library -- MD5 Message Digest
+ *
+ *     This file is in public domain (see lib/md5.c).
+ */
+
+#ifndef _UCW_MD5_H
+#define _UCW_MD5_H
+
+typedef u32 uint32;
+
+struct MD5Context {
+       uint32 buf[4];
+       uint32 bits[2];
+       unsigned char in[64];
+};
+
+void MD5Init(struct MD5Context *context);
+void MD5Update(struct MD5Context *context, unsigned char const *buf,
+              unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(uint32 buf[4], uint32 const in[16]);
+
+#endif /* !_UCW_MD5_H */
diff --git a/lib/md5hex.c b/lib/md5hex.c

new file mode 100644 (file)

index 0000000..93987b0
--- /dev/null
+++ b/lib/md5hex.c
@@ -0,0 +1,35 @@
+/*
+ *     UCW Library -- MD5 Binary <-> Hex Conversions
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#include <stdio.h>
+
+void
+md5_to_hex(const byte *s, char *d)
+{
+  int i;
+  for(i=0; i<MD5_SIZE; i++)
+    d += sprintf(d, "%02X", *s++);
+}
+
+void
+hex_to_md5(const char *s, byte *d)
+{
+  uns i, j;
+  for(i=0; i<MD5_SIZE; i++)
+    {
+      if (!Cxdigit(s[0]) || !Cxdigit(s[1]))
+       die("hex_to_md5: syntax error");
+      j = Cxvalue(*s); s++;
+      j = (j << 4) | Cxvalue(*s); s++;
+      *d++ = j;
+    }
+}
diff --git a/lib/mempool-fmt.c b/lib/mempool-fmt.c

new file mode 100644 (file)

index 0000000..397a772
--- /dev/null
+++ b/lib/mempool-fmt.c
@@ -0,0 +1,99 @@
+/*
+ *     UCW Library -- Memory Pools (Formatting)
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <alloca.h>
+#include <stdio.h>
+#include <string.h>
+
+static char *
+mp_vprintf_at(struct mempool *mp, uns ofs, const char *fmt, va_list args)
+{
+  char *ret = mp_grow(mp, ofs + 1) + ofs;
+  va_list args2;
+  va_copy(args2, args);
+  int cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+  va_end(args2);
+  if (cnt < 0)
+    {
+      /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+      do
+       {
+         ret = mp_expand(mp) + ofs;
+         va_copy(args2, args);
+         cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+         va_end(args2);
+       }
+      while (cnt < 0);
+    }
+  else if ((uns)cnt >= mp_avail(mp) - ofs)
+    {
+      ret = mp_grow(mp, cnt + 1) + ofs;
+      va_copy(args2, args);
+      int cnt2 = vsnprintf(ret, cnt + 1, fmt, args2);
+      va_end(args2);
+      ASSERT(cnt2 == cnt);
+    }
+  mp_end(mp, ret + cnt + 1);
+  return ret - ofs;
+}
+
+char *
+mp_vprintf(struct mempool *mp, const char *fmt, va_list args)
+{
+  mp_start(mp, 1);
+  return mp_vprintf_at(mp, 0, fmt, args);
+}
+
+char *
+mp_printf(struct mempool *p, const char *fmt, ...)
+{
+  va_list args;
+  va_start(args, fmt);
+  char *res = mp_vprintf(p, fmt, args);
+  va_end(args);
+  return res;
+}
+
+char *
+mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args)
+{
+  uns ofs = mp_open(mp, ptr);
+  ASSERT(ofs);
+  return mp_vprintf_at(mp, ofs - 1, fmt, args);
+}
+
+char *
+mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...)
+{
+  va_list args;
+  va_start(args, fmt);
+  char *res = mp_vprintf_append(mp, ptr, fmt, args);
+  va_end(args);
+  return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  struct mempool *mp = mp_new(64);
+  char *x = mp_printf(mp, "<Hello, %s!>", "World");
+  fputs(x, stdout);
+  x = mp_printf_append(mp, x, "<Appended>");
+  fputs(x, stdout);
+  x = mp_printf(mp, "<Hello, %50s!>\n", "World");
+  fputs(x, stdout);
+  return 0;
+}
+
+#endif
diff --git a/lib/mempool-str.c b/lib/mempool-str.c

new file mode 100644 (file)

index 0000000..176ff3e
--- /dev/null
+++ b/lib/mempool-str.c
@@ -0,0 +1,102 @@
+/*
+ *     UCW Library -- Memory Pools (String Operations)
+ *
+ *     (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <alloca.h>
+#include <string.h>
+
+char *
+mp_strdup(struct mempool *p, const char *s)
+{
+  uns l = strlen(s) + 1;
+  char *t = mp_alloc_fast_noalign(p, l);
+  memcpy(t, s, l);
+  return t;
+}
+
+void *
+mp_memdup(struct mempool *p, const void *s, uns len)
+{
+  void *t = mp_alloc_fast(p, len);
+  memcpy(t, s, len);
+  return t;
+}
+
+char *
+mp_multicat(struct mempool *p, ...)
+{
+  va_list args, a;
+  va_start(args, p);
+  char *x, *y;
+  uns cnt = 0;
+  va_copy(a, args);
+  while (x = va_arg(a, char *))
+    cnt++;
+  uns *sizes = alloca(cnt * sizeof(uns));
+  uns len = 1;
+  cnt = 0;
+  va_end(a);
+  va_copy(a, args);
+  while (x = va_arg(a, char *))
+    len += sizes[cnt++] = strlen(x);
+  char *buf = mp_alloc_fast_noalign(p, len);
+  y = buf;
+  va_end(a);
+  cnt = 0;
+  while (x = va_arg(args, char *))
+    {
+      memcpy(y, x, sizes[cnt]);
+      y += sizes[cnt++];
+    }
+  *y = 0;
+  va_end(args);
+  return buf;
+}
+
+char *
+mp_strjoin(struct mempool *p, char **a, uns n, uns sep)
+{
+  uns sizes[n];
+  uns len = 1;
+  for (uns i=0; i<n; i++)
+    len += sizes[i] = strlen(a[i]);
+  if (sep && n)
+    len += n-1;
+  char *dest = mp_alloc_fast_noalign(p, len);
+  char *d = dest;
+  for (uns i=0; i<n; i++)
+    {
+      if (sep && i)
+       *d++ = sep;
+      memcpy(d, a[i], sizes[i]);
+      d += sizes[i];
+    }
+  *d = 0;
+  return dest;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+  struct mempool *p = mp_new(64);
+  char *s = mp_strdup(p, "12345");
+  char *c = mp_multicat(p, "<<", s, ">>", NULL);
+  puts(c);
+  char *a[] = { "bugs", "gnats", "insects" };
+  puts(mp_strjoin(p, a, 3, '.'));
+  puts(mp_strjoin(p, a, 3, 0));
+  return 0;
+}
+
+#endif
diff --git a/lib/mempool.c b/lib/mempool.c

new file mode 100644 (file)

index 0000000..658f538
--- /dev/null
+++ b/lib/mempool.c
@@ -0,0 +1,491 @@
+/*
+ *     UCW Library -- Memory Pools (One-Time Allocation)
+ *
+ *     (c) 1997--2001 Martin Mares <mj@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+
+#include <string.h>
+
+#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN)
+#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE)
+
+struct mempool_chunk {
+  struct mempool_chunk *next;
+  uns size;
+};
+
+static uns
+mp_align_size(uns size)
+{
+#ifdef POOL_IS_MMAP
+  return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL;
+#else
+  return ALIGN_TO(size, CPU_STRUCT_ALIGN);
+#endif
+}
+
+void
+mp_init(struct mempool *pool, uns chunk_size)
+{
+  chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+  *pool = (struct mempool) {
+    .chunk_size = chunk_size,
+    .threshold = chunk_size >> 1,
+    .last_big = &pool->last_big };
+}
+
+static void *
+mp_new_big_chunk(uns size)
+{
+  struct mempool_chunk *chunk;
+  chunk = xmalloc(size + MP_CHUNK_TAIL) + size;
+  chunk->size = size;
+  return chunk;
+}
+
+static void
+mp_free_big_chunk(struct mempool_chunk *chunk)
+{
+  xfree((void *)chunk - chunk->size);
+}
+
+static void *
+mp_new_chunk(uns size)
+{
+#ifdef POOL_IS_MMAP
+  struct mempool_chunk *chunk;
+  chunk = page_alloc(size + MP_CHUNK_TAIL) + size;
+  chunk->size = size;
+  return chunk;
+#else
+  return mp_new_big_chunk(size);
+#endif
+}
+
+static void
+mp_free_chunk(struct mempool_chunk *chunk)
+{
+#ifdef POOL_IS_MMAP
+  page_free((void *)chunk - chunk->size, chunk->size + MP_CHUNK_TAIL);
+#else
+  mp_free_big_chunk(chunk);
+#endif
+}
+
+struct mempool *
+mp_new(uns chunk_size)
+{
+  chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+  struct mempool_chunk *chunk = mp_new_chunk(chunk_size);
+  struct mempool *pool = (void *)chunk - chunk_size;
+  DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size);
+  chunk->next = NULL;
+  *pool = (struct mempool) {
+    .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } },
+    .chunk_size = chunk_size,
+    .threshold = chunk_size >> 1,
+    .last_big = &pool->last_big };
+  return pool;
+}
+
+static void
+mp_free_chain(struct mempool_chunk *chunk)
+{
+  while (chunk)
+    {
+      struct mempool_chunk *next = chunk->next;
+      mp_free_chunk(chunk);
+      chunk = next;
+    }
+}
+
+static void
+mp_free_big_chain(struct mempool_chunk *chunk)
+{
+  while (chunk)
+    {
+      struct mempool_chunk *next = chunk->next;
+      mp_free_big_chunk(chunk);
+      chunk = next;
+    }
+}
+
+void
+mp_delete(struct mempool *pool)
+{
+  DBG("Deleting mempool %p", pool);
+  mp_free_big_chain(pool->state.last[1]);
+  mp_free_chain(pool->unused);
+  mp_free_chain(pool->state.last[0]); // can contain the mempool structure
+}
+
+void
+mp_flush(struct mempool *pool)
+{
+  mp_free_big_chain(pool->state.last[1]);
+  struct mempool_chunk *chunk, *next;
+  for (chunk = pool->state.last[0]; chunk && (void *)chunk - chunk->size != pool; chunk = next)
+    {
+      next = chunk->next;
+      chunk->next = pool->unused;
+      pool->unused = chunk;
+    }
+  pool->state.last[0] = chunk;
+  pool->state.free[0] = chunk ? chunk->size - sizeof(*pool) : 0;
+  pool->state.last[1] = NULL;
+  pool->state.free[1] = 0;
+  pool->state.next = NULL;
+  pool->last_big = &pool->last_big;
+}
+
+static void
+mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, uns idx)
+{
+  while (chunk)
+    {
+      stats->chain_size[idx] += chunk->size + sizeof(*chunk);
+      stats->chain_count[idx]++;
+      chunk = chunk->next;
+    }
+  stats->total_size += stats->chain_size[idx];
+}
+
+void
+mp_stats(struct mempool *pool, struct mempool_stats *stats)
+{
+  bzero(stats, sizeof(*stats));
+  mp_stats_chain(pool->state.last[0], stats, 0);
+  mp_stats_chain(pool->state.last[1], stats, 1);
+  mp_stats_chain(pool->unused, stats, 2);
+}
+
+void *
+mp_alloc_internal(struct mempool *pool, uns size)
+{
+  struct mempool_chunk *chunk;
+  if (size <= pool->threshold)
+    {
+      pool->idx = 0;
+      if (pool->unused)
+        {
+         chunk = pool->unused;
+         pool->unused = chunk->next;
+       }
+      else
+       chunk = mp_new_chunk(pool->chunk_size);
+      chunk->next = pool->state.last[0];
+      pool->state.last[0] = chunk;
+      pool->state.free[0] = pool->chunk_size - size;
+      return (void *)chunk - pool->chunk_size;
+    }
+  else if (likely(size <= MP_SIZE_MAX))
+    {
+      pool->idx = 1;
+      uns aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN);
+      chunk = mp_new_big_chunk(aligned);
+      chunk->next = pool->state.last[1];
+      pool->state.last[1] = chunk;
+      pool->state.free[1] = aligned - size;
+      return pool->last_big = (void *)chunk - aligned;
+    }
+  else
+    die("Cannot allocate %u bytes from a mempool", size);
+}
+
+void *
+mp_alloc(struct mempool *pool, uns size)
+{
+  return mp_alloc_fast(pool, size);
+}
+
+void *
+mp_alloc_noalign(struct mempool *pool, uns size)
+{
+  return mp_alloc_fast_noalign(pool, size);
+}
+
+void *
+mp_alloc_zero(struct mempool *pool, uns size)
+{
+  void *ptr = mp_alloc_fast(pool, size);
+  bzero(ptr, size);
+  return ptr;
+}
+
+void *
+mp_start_internal(struct mempool *pool, uns size)
+{
+  void *ptr = mp_alloc_internal(pool, size);
+  pool->state.free[pool->idx] += size;
+  return ptr;
+}
+
+void *
+mp_start(struct mempool *pool, uns size)
+{
+  return mp_start_fast(pool, size);
+}
+
+void *
+mp_start_noalign(struct mempool *pool, uns size)
+{
+  return mp_start_fast_noalign(pool, size);
+}
+
+void *
+mp_grow_internal(struct mempool *pool, uns size)
+{
+  if (unlikely(size > MP_SIZE_MAX))
+    die("Cannot allocate %u bytes of memory", size);
+  uns avail = mp_avail(pool);
+  void *ptr = mp_ptr(pool);
+  if (pool->idx)
+    {
+      uns amortized = likely(avail <= MP_SIZE_MAX / 2) ? avail * 2 : MP_SIZE_MAX;
+      amortized = MAX(amortized, size);
+      amortized = ALIGN_TO(amortized, CPU_STRUCT_ALIGN);
+      struct mempool_chunk *chunk = pool->state.last[1], *next = chunk->next;
+      ptr = xrealloc(ptr, amortized + MP_CHUNK_TAIL);
+      chunk = ptr + amortized;
+      chunk->next = next;
+      chunk->size = amortized;
+      pool->state.last[1] = chunk;
+      pool->state.free[1] = amortized;
+      pool->last_big = ptr;
+      return ptr;
+    }
+  else
+    {
+      void *p = mp_start_internal(pool, size);
+      memcpy(p, ptr, avail);
+      return p;
+    }
+}
+
+uns
+mp_open(struct mempool *pool, void *ptr)
+{
+  return mp_open_fast(pool, ptr);
+}
+
+void *
+mp_realloc(struct mempool *pool, void *ptr, uns size)
+{
+  return mp_realloc_fast(pool, ptr, size);
+}
+
+void *
+mp_realloc_zero(struct mempool *pool, void *ptr, uns size)
+{
+  uns old_size = mp_open_fast(pool, ptr);
+  ptr = mp_grow(pool, size);
+  if (size > old_size)
+    bzero(ptr + old_size, size - old_size);
+  mp_end(pool, ptr + size);
+  return ptr;
+}
+
+void *
+mp_spread_internal(struct mempool *pool, void *p, uns size)
+{
+  void *old = mp_ptr(pool);
+  void *new = mp_grow_internal(pool, p-old+size);
+  return p-old+new;
+}
+
+void
+mp_restore(struct mempool *pool, struct mempool_state *state)
+{
+  struct mempool_chunk *chunk, *next;
+  struct mempool_state s = *state;
+  for (chunk = pool->state.last[0]; chunk != s.last[0]; chunk = next)
+    {
+      next = chunk->next;
+      chunk->next = pool->unused;
+      pool->unused = chunk;
+    }
+  for (chunk = pool->state.last[1]; chunk != s.last[1]; chunk = next)
+    {
+      next = chunk->next;
+      mp_free_big_chunk(chunk);
+    }
+  pool->state = s;
+  pool->last_big = &pool->last_big;
+}
+
+struct mempool_state *
+mp_push(struct mempool *pool)
+{
+  struct mempool_state state = pool->state;
+  struct mempool_state *p = mp_alloc_fast(pool, sizeof(*p));
+  *p = state;
+  pool->state.next = p;
+  return p;
+}
+
+void
+mp_pop(struct mempool *pool)
+{
+  ASSERT(pool->state.next);
+  struct mempool_state state = pool->state;
+  mp_restore(pool, &state);
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+static void
+fill(byte *ptr, uns len, uns magic)
+{
+  while (len--)
+    *ptr++ = (magic++ & 255);
+}
+
+static void
+check(byte *ptr, uns len, uns magic, uns align)
+{
+  ASSERT(!((uintptr_t)ptr & (align - 1)));
+  while (len--)
+    if (*ptr++ != (magic++ & 255))
+      ASSERT(0);
+}
+
+int main(int argc, char **argv)
+{
+  srand(time(NULL));
+  log_init(argv[0]);
+  cf_def_file = NULL;
+  if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || argc != optind)
+    die("Invalid usage");
+
+  uns max = 1000, n = 0, m = 0, can_realloc = 0;
+  void *ptr[max];
+  struct mempool_state *state[max];
+  uns len[max], num[max], align[max];
+  struct mempool *mp = mp_new(128), mp_static;
+
+  for (uns i = 0; i < 5000; i++)
+    {
+      for (uns j = 0; j < n; j++)
+       check(ptr[j], len[j], j, align[j]);
+#if 0
+      DBG("free_small=%u free_big=%u idx=%u chunk_size=%u last_big=%p", mp->state.free[0], mp->state.free[1], mp->idx, mp->chunk_size, mp->last_big);
+      for (struct mempool_chunk *ch = mp->state.last[0]; ch; ch = ch->next)
+       DBG("small %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+      for (struct mempool_chunk *ch = mp->state.last[1]; ch; ch = ch->next)
+       DBG("big %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+#endif
+      int r = random_max(100);
+      if ((r -= 1) < 0)
+        {
+         DBG("flush");
+         mp_flush(mp);
+         n = m = 0;
+       }
+      else if ((r -= 1) < 0)
+        {
+         DBG("delete & new");
+         mp_delete(mp);
+         if (random_max(2))
+           mp = mp_new(random_max(0x1000) + 1);
+         else
+           mp = &mp_static, mp_init(mp, random_max(512) + 1);
+         n = m = 0;
+       }
+      else if (n < max && (r -= 30) < 0)
+        {
+         len[n] = random_max(0x2000);
+         DBG("alloc(%u)", len[n]);
+         align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+         ptr[n] = (align[n] == 1) ? mp_alloc_fast_noalign(mp, len[n]) : mp_alloc_fast(mp, len[n]);
+         DBG(" -> (%p)", ptr[n]);
+         fill(ptr[n], len[n], n);
+         n++;
+         can_realloc = 1;
+       }
+      else if (n < max && (r -= 20) < 0)
+        {
+         len[n] = random_max(0x2000);
+         DBG("start(%u)", len[n]);
+         align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+         ptr[n] = (align[n] == 1) ? mp_start_fast_noalign(mp, len[n]) : mp_start_fast(mp, len[n]);
+         DBG(" -> (%p)", ptr[n]);
+         fill(ptr[n], len[n], n);
+         n++;
+         can_realloc = 1;
+         goto grow;
+       }
+      else if (can_realloc && n && (r -= 10) < 0)
+        {
+         if (mp_open(mp, ptr[n - 1]) != len[n - 1])
+           ASSERT(0);
+grow:
+         {
+           uns k = n - 1;
+           for (uns i = random_max(4); i--; )
+             {
+               uns l = len[k];
+               len[k] = random_max(0x2000);
+               DBG("grow(%u)", len[k]);
+               ptr[k] = mp_grow(mp, len[k]);
+               DBG(" -> (%p)", ptr[k]);
+               check(ptr[k], MIN(l, len[k]), k, align[k]);
+               fill(ptr[k], len[k], k);
+             }
+           mp_end(mp, ptr[k] + len[k]);
+         }
+       }
+      else if (can_realloc && n && (r -= 20) < 0)
+        {
+         uns i = n - 1, l = len[i];
+         DBG("realloc(%p, %u)", ptr[i], len[i]);
+         ptr[i] = mp_realloc(mp, ptr[i], len[i] = random_max(0x2000));
+         DBG(" -> (%p, %u)", ptr[i], len[i]);
+         check(ptr[i],  MIN(len[i], l), i, align[i]);
+         fill(ptr[i], len[i], i);
+       }
+      else if (m < max && (r -= 5) < 0)
+        {
+         DBG("push(%u)", m);
+         num[m] = n;
+         state[m++] = mp_push(mp);
+         can_realloc = 0;
+       }
+      else if (m && (r -= 2) < 0)
+        {
+         m--;
+         DBG("pop(%u)", m);
+         mp_pop(mp);
+         n = num[m];
+         can_realloc = 0;
+       }
+      else if (m && (r -= 1) < 0)
+        {
+         uns i = random_max(m);
+         DBG("restore(%u)", i);
+         mp_restore(mp, state[i]);
+         n = num[m = i];
+         can_realloc = 0;
+       }
+      else if (can_realloc && n && (r -= 5) < 0)
+        ASSERT(mp_size(mp, ptr[n - 1]) == len[n - 1]);
+    }
+
+  mp_delete(mp);
+  return 0;
+}
+
+#endif
diff --git a/lib/mempool.h b/lib/mempool.h

new file mode 100644 (file)

index 0000000..c53423a
--- /dev/null
+++ b/lib/mempool.h
@@ -0,0 +1,295 @@
+/*
+ *     UCW Library -- Memory Pools
+ *
+ *     (c) 1997--2005 Martin Mares <mj@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_POOLS_H
+#define _UCW_POOLS_H
+
+/* Memory pool state (see mp_push(), ...) */
+struct mempool_state {
+  uns free[2];
+  void *last[2];
+  struct mempool_state *next;
+};
+
+/* Memory pool */
+struct mempool {
+  struct mempool_state state;
+  void *unused, *last_big;
+  uns chunk_size, threshold, idx;
+};
+
+/* Statistics (see mp_stats()) */
+struct mempool_stats {
+  uns total_size;                      /* Real allocated size in bytes */
+  uns chain_count[3];                  /* Number of allocated chunks in small/big/unused chains */
+  uns chain_size[3];                   /* Size of allocated chunks in small/big/unused chains */
+};
+
+/* Initialize a given mempool structure. Chunk size must be in the interval [1, UINT_MAX / 2] */
+void mp_init(struct mempool *pool, uns chunk_size);
+
+/* Allocate and initialize a new memory pool. See mp_init for chunk size limitations. */
+struct mempool *mp_new(uns chunk_size);
+
+/* Cleanup mempool initialized by mp_init or mp_new */
+void mp_delete(struct mempool *pool);
+
+/* Free all data on a memory pool (saves some empty chunks for later allocations) */
+void mp_flush(struct mempool *pool);
+
+/* Compute some statistics for debug purposes. See the definition of the mempool_stats structure. */
+void mp_stats(struct mempool *pool, struct mempool_stats *stats);
+
+
+/*** Allocation routines ***/
+
+/* For internal use only, do not call directly */
+void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+
+/* The function allocates new <size> bytes on a given memory pool.
+ * If the <size> is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * CPU_STRUCT_ALIGN bytes and this condition remains true also
+ * after future reallocations.
+ */
+void *mp_alloc(struct mempool *pool, uns size);
+
+/* The same as mp_alloc, but the result may not be aligned */
+void *mp_alloc_noalign(struct mempool *pool, uns size);
+
+/* The same as mp_alloc, but fills the newly allocated data with zeroes */
+void *mp_alloc_zero(struct mempool *pool, uns size);
+
+/* Inlined version of mp_alloc() */
+static inline void *
+mp_alloc_fast(struct mempool *pool, uns size)
+{
+  uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+  if (size <= avail)
+    {
+      pool->state.free[0] = avail - size;
+      return pool->state.last[0] - avail;
+    }
+  else
+    return mp_alloc_internal(pool, size);
+}
+
+/* Inlined version of mp_alloc_noalign() */
+static inline void *
+mp_alloc_fast_noalign(struct mempool *pool, uns size)
+{
+  if (size <= pool->state.free[0])
+    {
+      void *ptr = pool->state.last[0] - pool->state.free[0];
+      pool->state.free[0] -= size;
+      return ptr;
+    }
+  else
+    return mp_alloc_internal(pool, size);
+}
+
+
+/*** Usage as a growing buffer ***/
+
+/* For internal use only, do not call directly */
+void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+void *mp_grow_internal(struct mempool *pool, uns size);
+void *mp_spread_internal(struct mempool *pool, void *p, uns size);
+
+static inline uns
+mp_idx(struct mempool *pool, void *ptr)
+{
+  return ptr == pool->last_big;
+}
+
+/* Open a new growing buffer (at least <size> bytes long).
+ * If the <size> is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * CPU_STRUCT_ALIGN bytes and this condition remains true also
+ * after future reallocations. There is an unaligned version as well.
+ *
+ * Keep in mind that you can't make any other <pool> allocations
+ * before you "close" the growing buffer with mp_end().
+ */
+void *mp_start(struct mempool *pool, uns size);
+void *mp_start_noalign(struct mempool *pool, uns size);
+
+/* Inlined version of mp_start() */
+static inline void *
+mp_start_fast(struct mempool *pool, uns size)
+{
+  uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+  if (size <= avail)
+    {
+      pool->idx = 0;
+      pool->state.free[0] = avail;
+      return pool->state.last[0] - avail;
+    }
+  else
+    return mp_start_internal(pool, size);
+}
+
+/* Inlined version of mp_start_noalign() */
+static inline void *
+mp_start_fast_noalign(struct mempool *pool, uns size)
+{
+  if (size <= pool->state.free[0])
+    {
+      pool->idx = 0;
+      return pool->state.last[0] - pool->state.free[0];
+    }
+  else
+    return mp_start_internal(pool, size);
+}
+
+/* Return start pointer of the growing buffer allocated by mp_start() or a similar function */
+static inline void *
+mp_ptr(struct mempool *pool)
+{
+  return pool->state.last[pool->idx] - pool->state.free[pool->idx];
+}
+
+/* Return the number of bytes available for extending the growing buffer */
+static inline uns
+mp_avail(struct mempool *pool)
+{
+  return pool->state.free[pool->idx];
+}
+
+/* Grow the buffer allocated by mp_start() to be at least <size> bytes long
+ * (<size> may be less than mp_avail(), even zero). Reallocated buffer may
+ * change its starting position. The content will be unchanged to the minimum
+ * of the old and new sizes; newly allocated memory will be uninitialized.
+ * Multiple calls to mp_grow have amortized linear cost wrt. the maximum value of <size>. */
+static inline void *
+mp_grow(struct mempool *pool, uns size)
+{
+  return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size);
+}
+
+/* Grow the buffer by at least one byte -- equivalent to mp_grow(pool, mp_avail(pool) + 1) */
+static inline void *
+mp_expand(struct mempool *pool)
+{
+  return mp_grow_internal(pool, mp_avail(pool) + 1);
+}
+
+/* Ensure that there is at least <size> bytes free after <p>, if not, reallocate and adjust <p>. */
+static inline void *
+mp_spread(struct mempool *pool, void *p, uns size)
+{
+  return (((uns)(pool->state.last[pool->idx] - p) >= size) ? p : mp_spread_internal(pool, p, size));
+}
+
+/* Close the growing buffer. The <end> must point just behind the data, you want to keep
+ * allocated (so it can be in the interval [mp_ptr(pool), mp_ptr(pool) + mp_avail(pool)]).
+ * Returns a pointer to the beginning of the just closed block. */
+static inline void *
+mp_end(struct mempool *pool, void *end)
+{
+  void *p = mp_ptr(pool);
+  pool->state.free[pool->idx] = pool->state.last[pool->idx] - end;
+  return p;
+}
+
+/* Return size in bytes of the last allocated memory block (with mp_alloc*() or mp_end()). */
+static inline uns
+mp_size(struct mempool *pool, void *ptr)
+{
+  uns idx = mp_idx(pool, ptr);
+  return pool->state.last[idx] - ptr - pool->state.free[idx];
+}
+
+/* Open the last memory block (allocated with mp_alloc*() or mp_end())
+ * for growing and return its size in bytes. The contents and the start pointer
+ * remain unchanged. Do not forget to call mp_end() to close it. */
+uns mp_open(struct mempool *pool, void *ptr);
+
+/* Inlined version of mp_open() */
+static inline uns
+mp_open_fast(struct mempool *pool, void *ptr)
+{
+  pool->idx = mp_idx(pool, ptr);
+  uns size = pool->state.last[pool->idx] - ptr - pool->state.free[pool->idx];
+  pool->state.free[pool->idx] += size;
+  return size;
+}
+
+/* Reallocate the last memory block (allocated with mp_alloc*() or mp_end())
+ * to the new <size>. Behavior is similar to mp_grow(), but the resulting
+ * block is closed. */
+void *mp_realloc(struct mempool *pool, void *ptr, uns size);
+
+/* The same as mp_realloc(), but fills the additional bytes (if any) with zeroes */
+void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size);
+
+/* Inlined version of mp_realloc() */
+static inline void *
+mp_realloc_fast(struct mempool *pool, void *ptr, uns size)
+{
+  mp_open_fast(pool, ptr);
+  ptr = mp_grow(pool, size);
+  mp_end(pool, ptr + size);
+  return ptr;
+}
+
+
+/*** Usage as a stack ***/
+
+/* Save the current state of a memory pool.
+ * Do not call this function with an opened growing buffer. */
+static inline void
+mp_save(struct mempool *pool, struct mempool_state *state)
+{
+  *state = pool->state;
+  pool->state.next = state;
+}
+
+/* Save the current state to a newly allocated mempool_state structure.
+ * Do not call this function with an opened growing buffer. */
+struct mempool_state *mp_push(struct mempool *pool);
+
+/* Restore the state saved by mp_save() or mp_push() and free all
+ * data allocated after that point (including the state structure itself).
+ * You can't reallocate the last memory block from the saved state. */
+void mp_restore(struct mempool *pool, struct mempool_state *state);
+
+/* Restore the state saved by the last call to mp_push().
+ * mp_pop() and mp_push() works as a stack so you can push more states safely. */
+void mp_pop(struct mempool *pool);
+
+
+/*** mempool-str.c ***/
+
+char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC;
+void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC;
+char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK;
+static inline char * LIKE_MALLOC
+mp_strcat(struct mempool *mp, const char *x, const char *y)
+{
+  return mp_multicat(mp, x, y, NULL);
+}
+char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC;
+
+
+/*** mempool-fmt.c ***/
+
+char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC;
+char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC;
+char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4);
+char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args);
+
+#endif
diff --git a/lib/mempool.t b/lib/mempool.t

new file mode 100644 (file)

index 0000000..10c396e
--- /dev/null
+++ b/lib/mempool.t
@@ -0,0 +1,11 @@
+# Tests for mempool modules
+
+Run:   ../obj/lib/mempool-t
+
+Run:   ../obj/lib/mempool-fmt-t
+Out:   <Hello, World!><Hello, World!><Appended><Hello,                                              World!>
+
+Run:   ../obj/lib/mempool-str-t
+Out:   <<12345>>
+       bugs.gnats.insects
+       bugsgnatsinsects
diff --git a/lib/mmap.c b/lib/mmap.c

new file mode 100644 (file)

index 0000000..928da4b
--- /dev/null
+++ b/lib/mmap.c
@@ -0,0 +1,47 @@
+/*
+ *     UCW Library -- Mapping of Files
+ *
+ *     (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+void *
+mmap_file(const char *name, unsigned *len, int writeable)
+{
+  int fd = open(name, writeable ? O_RDWR : O_RDONLY);
+  struct stat st;
+  void *x;
+
+  if (fd < 0)
+    die("open(%s): %m", name);
+  if (fstat(fd, &st) < 0)
+    die("fstat(%s): %m", name);
+  if (len)
+    *len = st.st_size;
+  if (st.st_size)
+    {
+      x = mmap(NULL, st.st_size, writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, 0);
+      if (x == MAP_FAILED)
+       die("mmap(%s): %m", name);
+    }
+  else /* For empty file, we can return any non-zero address */
+    x = "";
+  close(fd);
+  return x;
+}
+
+void
+munmap_file(void *start, unsigned len)
+{
+  munmap(start, len);
+}
diff --git a/lib/pagecache.c b/lib/pagecache.c

new file mode 100644 (file)

index 0000000..13ad366
--- /dev/null
+++ b/lib/pagecache.c
@@ -0,0 +1,429 @@
+/*
+ *     UCW Library -- File Page Cache
+ *
+ *     (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/pagecache.h"
+#include "lib/lfs.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <alloca.h>
+
+struct page_cache {
+  list free_pages;                     /* LRU queue of free non-dirty pages */
+  list locked_pages;                   /* List of locked pages (starts with dirty ones) */
+  list dirty_pages;                    /* List of free dirty pages */
+  uns page_size;                       /* Bytes per page (must be a power of two) */
+  uns free_count;                      /* Number of free / dirty pages */
+  uns total_count;                     /* Total number of pages */
+  uns max_pages;                       /* Maximum number of free pages */
+  uns hash_size;                       /* Hash table size */
+  uns stat_hit;                                /* Number of cache hits */
+  uns stat_miss;                       /* Number of cache misses */
+  uns stat_write;                      /* Number of writes */
+  list *hash_table;                    /* List heads corresponding to hash buckets */
+#ifndef HAVE_PREAD
+  sh_off_t pos;                                /* Current position in the file */
+  int pos_fd;                          /* FD the position corresponds to */
+#endif
+};
+
+#define PAGE_NUMBER(pos) ((pos) & ~(sh_off_t)(c->page_size - 1))
+#define PAGE_OFFSET(pos) ((pos) & (c->page_size - 1))
+
+struct page_cache *
+pgc_open(uns page_size, uns max_pages)
+{
+  struct page_cache *c = xmalloc_zero(sizeof(struct page_cache));
+  uns i;
+
+  init_list(&c->free_pages);
+  init_list(&c->locked_pages);
+  init_list(&c->dirty_pages);
+  c->page_size = page_size;
+  c->max_pages = max_pages;
+  c->hash_size = nextprime(c->max_pages);
+  c->hash_table = xmalloc(sizeof(list) * c->hash_size);
+  for(i=0; i<c->hash_size; i++)
+    init_list(&c->hash_table[i]);
+#ifndef HAVE_PREAD
+  c->pos_fd = -1;
+#endif
+  return c;
+}
+
+void
+pgc_close(struct page_cache *c)
+{
+  pgc_cleanup(c);
+  ASSERT(EMPTY_LIST(c->locked_pages));
+  ASSERT(EMPTY_LIST(c->dirty_pages));
+  ASSERT(EMPTY_LIST(c->free_pages));
+  xfree(c->hash_table);
+  xfree(c);
+}
+
+static void
+pgc_debug_page(struct page *p)
+{
+  printf("\tp=%08x d=%d f=%x c=%d\n", (uns) p->pos, p->fd, p->flags, p->lock_count);
+}
+
+void
+pgc_debug(struct page_cache *c, int mode)
+{
+  struct page *p;
+
+  printf(">> Page cache dump: pgsize=%d, pages=%d, freepages=%d of %d, hash=%d\n", c->page_size, c->total_count, c->free_count, c->max_pages, c->hash_size);
+  printf(">> stats: %d hits, %d misses, %d writes\n", c->stat_hit, c->stat_miss, c->stat_write);
+  if (mode)
+    {
+      puts("LRU list:");
+      WALK_LIST(p, c->free_pages)
+       pgc_debug_page(p);
+      puts("Locked list:");
+      WALK_LIST(p, c->locked_pages)
+       pgc_debug_page(p);
+      puts("Dirty list:");
+      WALK_LIST(p, c->dirty_pages)
+       pgc_debug_page(p);
+    }
+}
+
+static void
+flush_page(struct page_cache *c, struct page *p)
+{
+  int s;
+
+  ASSERT(p->flags & PG_FLAG_DIRTY);
+#ifdef HAVE_PREAD
+  s = sh_pwrite(p->fd, p->data, c->page_size, p->pos);
+#else
+  if (c->pos != p->pos || c->pos_fd != (int) p->fd)
+    sh_seek(p->fd, p->pos, SEEK_SET);
+  s = write(p->fd, p->data, c->page_size);
+  c->pos = p->pos + s;
+  c->pos_fd = p->fd;
+#endif
+  if (s < 0)
+    die("pgc_write(%d): %m", p->fd);
+  if (s != (int) c->page_size)
+    die("pgc_write(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
+  p->flags &= ~PG_FLAG_DIRTY;
+  c->stat_write++;
+}
+
+static int
+flush_cmp(const void *X, const void *Y)
+{
+  struct page *x = *((struct page **)X);
+  struct page *y = *((struct page **)Y);
+
+  if (x->fd < y->fd)
+    return -1;
+  if (x->fd > y->fd)
+    return 1;
+  if (x->pos < y->pos)
+    return -1;
+  if (x->pos > y->pos)
+    return 1;
+  return 0;
+}
+
+static void
+flush_pages(struct page_cache *c, uns force)
+{
+  uns cnt = 0;
+  uns max = force ? ~0U : c->free_count / 2;
+  uns i;
+  struct page *p, *q, **req, **rr;
+
+  WALK_LIST(p, c->dirty_pages)
+    {
+      cnt++;
+      if (cnt >= max)
+       break;
+    }
+  req = rr = alloca(cnt * sizeof(struct page *));
+  i = cnt;
+  p = HEAD(c->dirty_pages);
+  while ((q = (struct page *) p->n.next) && i--)
+    {
+      rem_node(&p->n);
+      add_tail(&c->free_pages, &p->n);
+      *rr++ = p;
+      p = q;
+    }
+  qsort(req, cnt, sizeof(struct page *), flush_cmp);
+  for(i=0; i<cnt; i++)
+    flush_page(c, req[i]);
+}
+
+static inline uns
+hash_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+  return (pos + fd) % c->hash_size;
+}
+
+static struct page *
+get_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+  node *n;
+  struct page *p;
+  uns hash = hash_page(c, pos, fd);
+
+  /*
+   *  Return locked buffer for given page.
+   */
+
+  WALK_LIST(n, c->hash_table[hash])
+    {
+      p = SKIP_BACK(struct page, hn, n);
+      if (p->pos == pos && p->fd == fd)
+       {
+         /* Found in the cache */
+         rem_node(&p->n);
+         if (!p->lock_count)
+           c->free_count--;
+         return p;
+       }
+    }
+  if (c->total_count < c->max_pages || !c->free_count)
+    {
+      /* Enough free space, expand the cache */
+      p = xmalloc(sizeof(struct page) + c->page_size);
+      c->total_count++;
+    }
+  else
+    {
+      /* Discard the oldest unlocked page */
+      p = HEAD(c->free_pages);
+      if (!p->n.next)
+       {
+         /* There are only dirty pages here */
+         flush_pages(c, 0);
+         p = HEAD(c->free_pages);
+         ASSERT(p->n.next);
+       }
+      ASSERT(!p->lock_count);
+      rem_node(&p->n);
+      rem_node(&p->hn);
+      c->free_count--;
+    }
+  p->pos = pos;
+  p->fd = fd;
+  p->flags = 0;
+  p->lock_count = 0;
+  add_tail(&c->hash_table[hash], &p->hn);
+  return p;
+}
+
+void
+pgc_flush(struct page_cache *c)
+{
+  struct page *p;
+
+  flush_pages(c, 1);
+  WALK_LIST(p, c->locked_pages)
+    if (p->flags & PG_FLAG_DIRTY)
+      flush_page(c, p);
+    else
+      break;
+}
+
+void
+pgc_cleanup(struct page_cache *c)
+{
+  struct page *p;
+  node *n;
+
+  pgc_flush(c);
+  WALK_LIST_DELSAFE(p, n, c->free_pages)
+    {
+      ASSERT(!(p->flags & PG_FLAG_DIRTY) && !p->lock_count);
+      rem_node(&p->n);
+      rem_node(&p->hn);
+      c->free_count--;
+      c->total_count--;
+      xfree(p);
+    }
+  ASSERT(!c->free_count);
+}
+
+static inline struct page *
+get_and_lock_page(struct page_cache *c, sh_off_t pos, uns fd)
+{
+  struct page *p = get_page(c, pos, fd);
+
+  add_tail(&c->locked_pages, &p->n);
+  p->lock_count++;
+  return p;
+}
+
+struct page *
+pgc_read(struct page_cache *c, int fd, sh_off_t pos)
+{
+  struct page *p;
+  int s;
+
+  ASSERT(!PAGE_OFFSET(pos));
+  p = get_and_lock_page(c, pos, fd);
+  if (p->flags & PG_FLAG_VALID)
+    c->stat_hit++;
+  else
+    {
+      c->stat_miss++;
+#ifdef HAVE_PREAD
+      s = sh_pread(fd, p->data, c->page_size, pos);
+#else
+      if (c->pos != pos || c->pos_fd != (int)fd)
+       sh_seek(fd, pos, SEEK_SET);
+      s = read(fd, p->data, c->page_size);
+      c->pos = pos + s;
+      c->pos_fd = fd;
+#endif
+      if (s < 0)
+       die("pgc_read(%d): %m", fd);
+      if (s != (int) c->page_size)
+       die("pgc_read(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
+      p->flags |= PG_FLAG_VALID;
+    }
+  return p;
+}
+
+struct page *
+pgc_get(struct page_cache *c, int fd, sh_off_t pos)
+{
+  struct page *p;
+
+  ASSERT(!PAGE_OFFSET(pos));
+  p = get_and_lock_page(c, pos, fd);
+  p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
+  return p;
+}
+
+struct page *
+pgc_get_zero(struct page_cache *c, int fd, sh_off_t pos)
+{
+  struct page *p;
+
+  ASSERT(!PAGE_OFFSET(pos));
+  p = get_and_lock_page(c, pos, fd);
+  bzero(p->data, c->page_size);
+  p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
+  return p;
+}
+
+void
+pgc_put(struct page_cache *c, struct page *p)
+{
+  ASSERT(p->lock_count);
+  if (--p->lock_count)
+    return;
+  rem_node(&p->n);
+  if (p->flags & PG_FLAG_DIRTY)
+    {
+      add_tail(&c->dirty_pages, &p->n);
+      c->free_count++;
+    }
+  else if (c->free_count < c->max_pages)
+    {
+      add_tail(&c->free_pages, &p->n);
+      c->free_count++;
+    }
+  else
+    {
+      rem_node(&p->hn);
+      xfree(p);
+      c->total_count--;
+    }
+}
+
+void
+pgc_mark_dirty(struct page_cache *c, struct page *p)
+{
+  ASSERT(p->lock_count);
+  if (!(p->flags & PG_FLAG_DIRTY))
+    {
+      p->flags |= PG_FLAG_DIRTY;
+      rem_node(&p->n);
+      add_head(&c->locked_pages, &p->n);
+    }
+}
+
+byte *
+pgc_read_data(struct page_cache *c, int fd, sh_off_t pos, uns *len)
+{
+  struct page *p;
+  sh_off_t page = PAGE_NUMBER(pos);
+  uns offset = PAGE_OFFSET(pos);
+
+  p = pgc_read(c, fd, page);
+  pgc_put(c, p);
+  *len = c->page_size - offset;
+  return p->data + offset;
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  struct page_cache *c = pgc_open(1024, 2);
+  struct page *p, *q, *r;
+  int fd = open("test", O_RDWR | O_CREAT | O_TRUNC, 0666);
+  if (fd < 0)
+    die("open: %m");
+  pgc_debug(c, 1);
+  p = pgc_get(c, fd, 0);
+  pgc_debug(c, 1);
+  strcpy(p->data, "one");
+  pgc_put(c, p);
+  pgc_debug(c, 1);
+  p = pgc_get(c, fd, 1024);
+  pgc_debug(c, 1);
+  strcpy(p->data, "two");
+  pgc_put(c, p);
+  pgc_debug(c, 1);
+  p = pgc_get(c, fd, 2048);
+  pgc_debug(c, 1);
+  strcpy(p->data, "three");
+  pgc_put(c, p);
+  pgc_debug(c, 1);
+  pgc_flush(c);
+  pgc_debug(c, 1);
+  p = pgc_read(c, fd, 0);
+  pgc_debug(c, 1);
+  strcpy(p->data, "odin");
+  pgc_mark_dirty(c, p);
+  pgc_debug(c, 1);
+  pgc_flush(c);
+  pgc_debug(c, 1);
+  q = pgc_read(c, fd, 1024);
+  pgc_debug(c, 1);
+  r = pgc_read(c, fd, 2048);
+  pgc_debug(c, 1);
+  pgc_put(c, p);
+  pgc_put(c, q);
+  pgc_put(c, r);
+  pgc_debug(c, 1);
+  p = pgc_get(c, fd, 3072);
+  pgc_debug(c, 1);
+  strcpy(p->data, "four");
+  pgc_put(c, p);
+  pgc_debug(c, 1);
+  pgc_cleanup(c);
+  pgc_debug(c, 1);
+  pgc_close(c);
+  return 0;
+}
+
+#endif
diff --git a/lib/pagecache.h b/lib/pagecache.h

new file mode 100644 (file)

index 0000000..ef4bf5a
--- /dev/null
+++ b/lib/pagecache.h
@@ -0,0 +1,42 @@
+/*
+ *     UCW Library -- File Page Cache
+ *
+ *     (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PAGECACHE_H
+#define _UCW_PAGECACHE_H
+
+#include "lib/lists.h"
+
+struct page_cache;
+
+struct page {
+  node n;                              /* Node in page list */
+  node hn;                             /* Node in hash table */
+  sh_off_t pos;
+  uns fd;
+  uns flags;
+  uns lock_count;
+  byte data[0];
+};
+
+#define PG_FLAG_DIRTY          1
+#define PG_FLAG_VALID          2
+
+struct page_cache *pgc_open(uns page_size, uns max_pages);
+void pgc_close(struct page_cache *);
+void pgc_debug(struct page_cache *, int mode);
+void pgc_flush(struct page_cache *);                           /* Write all unwritten pages */
+void pgc_cleanup(struct page_cache *);                         /* Deallocate all unused buffers */
+struct page *pgc_read(struct page_cache *, int fd, sh_off_t);  /* Read page and lock it */
+struct page *pgc_get(struct page_cache *, int fd, sh_off_t);   /* Get page for writing */
+struct page *pgc_get_zero(struct page_cache *, int fd, sh_off_t); /* ... and clear it */
+void pgc_put(struct page_cache *, struct page *);              /* Release page */
+void pgc_mark_dirty(struct page_cache *, struct page *);       /* Mark locked page as dirty */
+byte *pgc_read_data(struct page_cache *, int fd, sh_off_t, uns *);     /* Partial reading */
+
+#endif
diff --git a/lib/partmap.c b/lib/partmap.c

new file mode 100644 (file)

index 0000000..2b70f1d
--- /dev/null
+++ b/lib/partmap.c
@@ -0,0 +1,95 @@
+/*
+ *     UCW Library -- Mapping of File Parts
+ *
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *     (c) 2003--2005 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/partmap.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#ifdef CONFIG_PARTMAP_IS_MMAP
+#define PARTMAP_WINDOW ~(size_t)0
+#else
+#ifdef TEST
+#define PARTMAP_WINDOW 4096
+#else
+#define PARTMAP_WINDOW 16777216
+#endif
+#endif
+
+struct partmap *
+partmap_open(char *name, int writeable)
+{
+  struct partmap *p = xmalloc_zero(sizeof(struct partmap));
+
+  p->fd = sh_open(name, writeable ? O_RDWR : O_RDONLY);
+  if (p->fd < 0)
+    die("open(%s): %m", name);
+  if ((p->file_size = sh_seek(p->fd, 0, SEEK_END)) < 0)
+    die("lseek(%s): %m", name);
+  p->writeable = writeable;
+#ifdef CONFIG_PARTMAP_IS_MMAP
+  partmap_load(p, 0, p->file_size);
+#endif
+  return p;
+}
+
+sh_off_t
+partmap_size(struct partmap *p)
+{
+  return p->file_size;
+}
+
+void
+partmap_close(struct partmap *p)
+{
+  if (p->start_map)
+    munmap(p->start_map, p->end_off - p->start_off);
+  close(p->fd);
+  xfree(p);
+}
+
+void
+partmap_load(struct partmap *p, sh_off_t start, uns size)
+{
+  if (p->start_map)
+    munmap(p->start_map, p->end_off - p->start_off);
+  sh_off_t end = start + size;
+  sh_off_t win_start = start/CPU_PAGE_SIZE * CPU_PAGE_SIZE;
+  size_t win_len = PARTMAP_WINDOW;
+  if ((sh_off_t) (win_start+win_len) > p->file_size)
+    win_len = ALIGN_TO(p->file_size - win_start, CPU_PAGE_SIZE);
+  if ((sh_off_t) (win_start+win_len) < end)
+    die("partmap_map: Window is too small for mapping %d bytes", size);
+  p->start_map = sh_mmap(NULL, win_len, p->writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, p->fd, win_start);
+  if (p->start_map == MAP_FAILED)
+    die("mmap failed at position %lld: %m", (long long)win_start);
+  p->start_off = win_start;
+  p->end_off = win_start+win_len;
+  madvise(p->start_map, win_len, MADV_SEQUENTIAL);
+}
+
+#ifdef TEST
+int main(int argc, char **argv)
+{
+  struct partmap *p = partmap_open(argv[1], 0);
+  uns l = partmap_size(p);
+  uns i;
+  for (i=0; i<l; i++)
+    putchar(*(char *)partmap_map(p, i, 1));
+  partmap_close(p);
+  return 0;
+}
+#endif
diff --git a/lib/partmap.h b/lib/partmap.h

new file mode 100644 (file)

index 0000000..985b362
--- /dev/null
+++ b/lib/partmap.h
@@ -0,0 +1,47 @@
+/*
+ *     UCW Library -- Mapping of File Parts
+ *
+ *     (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *     (c) 2003--2005 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PARTMAP_H
+#define _UCW_PARTMAP_H
+
+struct partmap {
+  int fd;
+  sh_off_t file_size;
+  sh_off_t start_off, end_off;
+  byte *start_map;
+  int writeable;
+};
+
+struct partmap *partmap_open(char *name, int writeable);
+void partmap_close(struct partmap *p);
+sh_off_t partmap_size(struct partmap *p);
+void partmap_load(struct partmap *p, sh_off_t start, uns size);
+
+static inline void *
+partmap_map(struct partmap *p, sh_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_PARTMAP_IS_MMAP
+  if (unlikely(!p->start_map || start < p->start_off || (sh_off_t) (start+size) > p->end_off))
+    partmap_load(p, start, size);
+#endif
+  return p->start_map + (start - p->start_off);
+}
+
+static inline void *
+partmap_map_forward(struct partmap *p, sh_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_PARTMAP_IS_MMAP
+  if (unlikely((sh_off_t) (start+size) > p->end_off))
+    partmap_load(p, start, size);
+#endif
+  return p->start_map + (start - p->start_off);
+}
+
+#endif
diff --git a/lib/patimatch.c b/lib/patimatch.c

new file mode 100644 (file)

index 0000000..a0e29af
--- /dev/null
+++ b/lib/patimatch.c
@@ -0,0 +1,16 @@
+/*
+ *     UCW Library -- Shell-Like Case-Insensitive Pattern Matching (currently only '?' and '*')
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#define Convert(x) Cupcase(x)
+#define MATCH_FUNC_NAME match_pattern_nocase
+
+#include "lib/patmatch.h"
diff --git a/lib/patmatch.c b/lib/patmatch.c

new file mode 100644 (file)

index 0000000..bfd8aa5
--- /dev/null
+++ b/lib/patmatch.c
@@ -0,0 +1,15 @@
+/*
+ *     UCW Library -- Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#define Convert(x) (x)
+#define MATCH_FUNC_NAME match_pattern
+
+#include "lib/patmatch.h"
diff --git a/lib/patmatch.h b/lib/patmatch.h

new file mode 100644 (file)

index 0000000..a47669f
--- /dev/null
+++ b/lib/patmatch.h
@@ -0,0 +1,46 @@
+/*
+ *     UCW Library -- Generic Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+int
+MATCH_FUNC_NAME(const char *p, const char *s)
+{
+  while (*p)
+    {
+      if (*p == '?' && *s)
+       p++, s++;
+      else if (*p == '*')
+       {
+         int z = p[1];
+
+         if (!z)
+           return 1;
+         if (z == '\\' && p[2])
+           z = p[2];
+         z = Convert(z);
+         for(;;)
+           {
+             while (*s && Convert(*s) != z)
+               s++;
+             if (!*s)
+               return 0;
+             if (MATCH_FUNC_NAME(p+1, s))
+               return 1;
+             s++;
+           }
+       }
+      else
+       {
+         if (*p == '\\' && p[1])
+           p++;
+         if (Convert(*p++) != Convert(*s++))
+           return 0;
+       }
+    }
+  return !*s;
+}
diff --git a/lib/perl/CGI.pm b/lib/perl/CGI.pm

new file mode 100644 (file)

index 0000000..7d7cc45
--- /dev/null
+++ b/lib/perl/CGI.pm
@@ -0,0 +1,444 @@
+#      Poor Man's CGI Module for Perl
+#
+#      (c) 2002--2007 Martin Mares <mj@ucw.cz>
+#      Slightly modified by Tomas Valla <tom@ucw.cz>
+#
+#      This software may be freely distributed and used according to the terms
+#      of the GNU Lesser General Public License.
+
+# FIXME:
+# - respond with proper HTTP error codes
+# - if we get invalid parameters, generate HTTP error or redirect
+
+package UCW::CGI;
+
+# First of all, set up error handling, so that even errors during parsing
+# will be reported properly.
+
+# Variables to be set by the calling module:
+#      $UCW::CGI::error_mail           mail address of the script admin (optional)
+#                                      (this one has to be set in the BEGIN block!)
+#      $UCW::CGI::error_hook           function to be called for reporting errors
+
+my $error_reported;
+my $exit_code;
+my $debug = 0;
+
+sub report_bug($)
+{
+       if (!defined $error_reported) {
+               $error_reported = 1;
+               print STDERR $_[0];
+               if (defined($UCW::CGI::error_hook)) {
+                       &$UCW::CGI::error_hook($_[0]);
+               } else {
+                       print "Content-type: text/plain\n\n";
+                       print "Internal bug:\n";
+                       print $_[0], "\n";
+                       print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail;
+               }
+       }
+       die;
+}
+
+BEGIN {
+       $SIG{__DIE__} = sub { report_bug($_[0]); };
+       $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); };
+       $exit_code = 0;
+}
+
+END {
+       $? = $exit_code;
+}
+
+use strict;
+use warnings;
+
+require Exporter;
+our $VERSION = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&html_escape &url_escape &url_param_escape &self_ref &self_form &http_get);
+our @EXPORT_OK = qw();
+
+### Escaping ###
+
+sub url_escape($) {
+       my $x = shift @_;
+       $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge;
+       return $x;
+}
+
+sub url_param_escape($) {
+       my $x = shift @_;
+       $x = url_escape($x);
+       $x =~ s/%20/+/g;
+       return $x;
+}
+
+sub html_escape($) {
+       my $x = shift @_;
+       $x =~ s/&/&amp;/g;
+       $x =~ s/</&lt;/g;
+       $x =~ s/>/&gt;/g;
+       $x =~ s/"/&quot;/g;
+       return $x;
+}
+
+### Analysing RFC 822 Style Headers ###
+
+sub rfc822_prepare($) {
+       my $x = shift @_;
+       # Convert all %'s and backslash escapes to %xx escapes
+       $x =~ s/%/%25/g;
+       $x =~ s/\\(.)/"%".unpack("H2",$1)/ge;
+       # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically)
+       while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { }
+       # Remove quotes and escape dangerous characters inside (again closing at the end automatically)
+       $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge;
+       # All control characters are properly escaped, tokens are clearly visible.
+       # Finally remove all unnecessary spaces.
+       $x =~ s/\s+/ /g;
+       $x =~ s/(^ | $)//g;
+       $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g;
+       return $x;
+}
+
+sub rfc822_deescape($) {
+       my $x = shift @_;
+       $x =~ s/%(..)/pack("H2",$1)/ge;
+       return $x;
+}
+
+### Reading of HTTP headers ###
+
+sub http_get($) {
+       my $h = shift @_;
+       $h =~ tr/a-z-/A-Z_/;
+       return $ENV{"HTTP_$h"} || $ENV{"$h"};
+}
+
+### Parsing of Arguments ###
+
+my $arg_table;
+
+sub parse_arg_string($) {
+       my ($s) = @_;
+       $s =~ s/\s+//;
+       foreach $_ (split /[&:]/,$s) {
+               (/^([^=]+)=(.*)$/) or next;
+               my $arg = $arg_table->{$1} or next;
+               $_ = $2;
+               s/\+/ /g;
+               s/%(..)/pack("H2",$1)/eg;
+               s/\r\n/\n/g;
+               s/\r/\n/g;
+               $arg->{'multiline'} || s/(\n|\t)/ /g;
+               s/^\s+//;
+               s/\s+$//;
+               if (my $rx = $arg->{'check'}) {
+                       if (!/^$rx$/) { $_ = $arg->{'default'}; }
+               }
+
+               my $r = ref($arg->{'var'});
+               if ($r eq 'SCALAR') {
+                       ${$arg->{'var'}} = $_;
+               } elsif ($r eq 'ARRAY') {
+                       push @{$arg->{'var'}}, $_;
+               }
+       }
+}
+
+sub parse_multipart_form_data();
+
+sub parse_args($) {
+       $arg_table = shift @_;
+       if (!defined $ENV{"GATEWAY_INTERFACE"}) {
+               print STDERR "Must be called as a CGI script.\n";
+               $exit_code = 1;
+               exit;
+       }
+       foreach my $a (values %$arg_table) {
+               my $r = ref($a->{'var'});
+               defined($a->{'default'}) or $a->{'default'}="";
+               if ($r eq 'SCALAR') {
+                       ${$a->{'var'}} = $a->{'default'};
+               } elsif ($r eq 'ARRAY') {
+                       @{$a->{'var'}} = ();
+               }
+       }
+       my $method = $ENV{"REQUEST_METHOD"};
+       my $qs = $ENV{"QUERY_STRING"};
+       parse_arg_string($qs) if defined($qs);
+       if ($method eq "GET") {
+       } elsif ($method eq "POST") {
+               if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) {
+                       while (<STDIN>) {
+                               chomp;
+                               parse_arg_string($_);
+                       }
+               } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) {
+                       parse_multipart_form_data();
+               } else {
+                       die "Unknown content type for POST data";
+               }
+       } else {
+               die "Unknown request method";
+       }
+}
+
+### Parsing Multipart Form Data ###
+
+my $boundary;
+my $boundary_len;
+my $mp_buffer;
+my $mp_buffer_i;
+my $mp_buffer_boundary;
+my $mp_eof;
+
+sub refill_mp_data($) {
+       my ($more) = @_;
+       if ($mp_buffer_boundary >= $mp_buffer_i) {
+               return $mp_buffer_boundary - $mp_buffer_i;
+       } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) {
+               return $more;
+       } else {
+               if ($mp_buffer_i) {
+                       $mp_buffer = substr($mp_buffer, $mp_buffer_i);
+                       $mp_buffer_i = 0;
+               }
+               while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) {
+                       last if $mp_eof;
+                       my $data;
+                       my $n = read(STDIN, $data, 2048);
+                       if ($n > 0) {
+                               $mp_buffer .= $data;
+                       } else {
+                               $mp_eof = 1;
+                       }
+               }
+               $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+               if ($mp_buffer_boundary >= 0) {
+                       return $mp_buffer_boundary;
+               } elsif ($mp_eof) {
+                       return length($mp_buffer);
+               } else {
+                       return length($mp_buffer) - $boundary_len;
+               }
+       }
+}
+
+sub get_mp_line($) {
+       my ($allow_empty) = @_;
+       my $n = refill_mp_data(1024);
+       my $i = index($mp_buffer, "\r\n", $mp_buffer_i);
+       if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) {
+               my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i);
+               $mp_buffer_i = $i + 2;
+               return $s;
+       } elsif ($allow_empty) {
+               if ($n) {                                                       # An incomplete line
+                       my $s = substr($mp_buffer, $mp_buffer_i, $n);
+                       $mp_buffer_i += $n;
+                       return $s;
+               } else {                                                        # No more lines
+                       return undef;
+               }
+       } else {
+               die "Premature end of multipart POST data";
+       }
+}
+
+sub skip_mp_boundary() {
+       if ($mp_buffer_boundary != $mp_buffer_i) {
+               die "Premature end of multipart POST data";
+       }
+       $mp_buffer_boundary = -1;
+       $mp_buffer_i += 2;
+       my $b = get_mp_line(0);
+       print STDERR "SEP $b\n" if $debug;
+       $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+       if ("\r\n$b" =~ /^$boundary--/) {
+               return 0;
+       } else {
+               return 1;
+       }
+}
+
+sub parse_mp_header() {
+       my $h = {};
+       my $last;
+       while ((my $l = get_mp_line(0)) ne "") {
+               print STDERR "HH $l\n" if $debug;
+               if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) {
+                       $name =~ tr/A-Z/a-z/;
+                       $h->{$name} = $value;
+                       $last = $name;
+               } elsif ($l =~ /^\s+/ && $last) {
+                       $h->{$last} .= $l;
+               } else {
+                       $last = undef;
+               }
+       }
+       foreach my $n (keys %$h) {
+               $h->{$n} = rfc822_prepare($h->{$n});
+               print STDERR "H $n: $h->{$n}\n" if $debug;
+       }
+       return (keys %$h) ? $h : undef;
+}
+
+sub parse_multipart_form_data() {
+       # First of all, find the boundary string
+       my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"});
+       if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) {
+               die "Multipart content with no boundary string received";
+       }
+       $boundary = rfc822_deescape($boundary);
+       print STDERR "BOUNDARY IS $boundary\n" if $debug;
+
+       # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string
+       # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution.
+       my $agent = http_get("User-agent") || "";
+       $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/;
+       $boundary = "\r\n$boundary";
+       $boundary_len = length($boundary) + 2;
+
+       # Check upload size in advance
+       if (my $size = http_get("Content-Length")) {
+               my $max_allowed = 0;
+               foreach my $a (values %$arg_table) {
+                       $max_allowed += $a->{"maxsize"} || 65536;
+               }
+               if ($size > $max_allowed) {
+                       die "Maximum form data length exceeded";
+               }
+       }
+
+       # Initialize our buffering mechanism and part splitter
+       $mp_buffer = "\r\n";
+       $mp_buffer_i = 0;
+       $mp_buffer_boundary = -1;
+       $mp_eof = 0;
+
+       # Skip garbage before the 1st part
+       while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+       skip_mp_boundary() || return;
+
+       # Process individual parts
+       do { PART: {
+               print STDERR "NEXT PART\n" if $debug;
+               my $h = parse_mp_header();
+               my ($field, $cdisp, $a);
+               if ($h &&
+                   ($cdisp = $h->{"content-disposition"}) &&
+                   $cdisp =~ /^form-data/ &&
+                   (($field) = ($cdisp =~ /;name=([^;]+)/)) &&
+                   ($a = $arg_table->{"$field"})) {
+                       print STDERR "FIELD $field\n" if $debug;
+                       if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; }
+                       if (defined $a->{"var"}) {
+                               while (defined (my $l = get_mp_line(1))) {
+                                       print STDERR "VALUE $l\n" if $debug;
+                                       parse_arg_string("$field=$l");
+                               }
+                               next PART;
+                       } elsif (defined $a->{"file"}) {
+                               require File::Temp;
+                               require IO::Handle;
+                               my $max_size = $a->{"maxsize"} || 1048576;
+                               my @tmpargs = (undef, UNLINK => 1);
+                               push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"};
+                               my ($fh, $fn) = File::Temp::tempfile(@tmpargs);
+                               print STDERR "FILE UPLOAD to $fn\n" if $debug;
+                               ${$a->{"file"}} = $fn;
+                               ${$a->{"fh"}} = $fh if defined $a->{"fh"};
+                               my $total_size = 0;
+                               while (my $i = refill_mp_data(4096)) {
+                                       print $fh substr($mp_buffer, $mp_buffer_i, $i);
+                                       $mp_buffer_i += $i;
+                                       $total_size += $i;
+                                       if ($total_size > $max_size) { die "Uploaded file too long"; }
+                               }
+                               $fh->flush();   # Don't close the handle, the file would disappear otherwise
+                               next PART;
+                       }
+               }
+               print STDERR "SKIPPING\n" if $debug;
+               while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+       } } while (skip_mp_boundary());
+}
+
+### Generating Self-ref URL's ###
+
+sub make_out_args($) {
+       my ($overrides) = @_;
+       my $out = {};
+       foreach my $name (keys %$arg_table) {
+               my $arg = $arg_table->{$name};
+               defined($arg->{'var'}) || next;
+               defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next;
+               my $value;
+               if (!defined($value = $overrides->{$name})) {
+                       if (exists $overrides->{$name}) {
+                               $value = $arg->{'default'};
+                       } else {
+                               $value = ${$arg->{'var'}};
+                       }
+               }
+               if ($value ne $arg->{'default'}) {
+                       $out->{$name} = $value;
+               }
+       }
+       return $out;
+}
+
+sub self_ref(@) {
+       my %h = @_;
+       my $out = make_out_args(\%h);
+       return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out);
+}
+
+sub self_form(@) {
+       my %h = @_;
+       my $out = make_out_args(\%h);
+       return join('', map { "<input type=hidden name=$_ value='" . html_escape($out->{$_}) . "'>\n" } sort keys %$out);
+}
+
+### Cookies
+
+sub cookie_esc($) {
+       my $x = shift @_;
+       if ($x !~ /^[a-zA-Z0-9%]+$/) {
+               $x =~ s/([\\\"])/\\$1/g;
+               $x = "\"$x\"";
+       }
+       return $x;
+}
+
+sub set_cookie($$@) {
+       my $key = shift @_;
+       my $value = shift @_;
+       my %other = @_;
+       $other{'version'} = 1 unless defined $other{'version'};
+       print "Set-Cookie: $key=", cookie_esc($value);
+       foreach my $k (keys %other) {
+               print ";$k=", cookie_esc($other{$k});
+       }
+       print "\n";
+}
+
+sub parse_cookies() {
+       my $h = http_get("Cookie") or return ();
+       my @cook = ();
+       while (my ($padding,$name,$val,$xx,$rest) = ($h =~ /\s*([,;]\s*)*([^ =]+)=([^ =,;\"]*|\"([^\"\\]|\\.)*\")(\s.*|;.*|$)/)) {
+               if ($val =~ /^\"/) {
+                       $val =~ s/^\"//;
+                       $val =~ s/\"$//;
+                       $val =~ s/\\(.)/$1/g;
+               }
+               push @cook, $name, $val;
+               $h = $rest;
+       }
+       return @cook;
+}
+
+1;  # OK
diff --git a/lib/perl/Config.pm b/lib/perl/Config.pm

new file mode 100644 (file)

index 0000000..552690c
--- /dev/null
+++ b/lib/perl/Config.pm
@@ -0,0 +1,54 @@
+#      Perl module for parsing Sherlock configuration files (using the config utility)
+#
+#      (c) 2002--2005 Martin Mares <mj@ucw.cz>
+#
+#      This software may be freely distributed and used according to the terms
+#      of the GNU Lesser General Public License.
+
+package UCW::Config;
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+our %Sections = ();
+
+our $DefaultConfigFile = "";
+our $Usage = "-C, --config filename   Override the default configuration file
+-S, --set sec.item=val  Manual setting of a configuration item";
+
+
+sub Parse(@) {
+       my @options = @_;
+       my $defargs = "";
+       my $override_config = 0;
+       push @options, "config|C=s" => sub { my ($o,$a)=@_; $defargs .= " -C'$a'"; $override_config=1; };
+       push @options, "set|S=s" => sub { my ($o,$a)=@_; $defargs .= " -S'$a'"; };
+       Getopt::Long::Configure("bundling");
+       Getopt::Long::GetOptions(@options) or return 0;
+       if (!$override_config && $DefaultConfigFile) {
+               $defargs = "-C'$DefaultConfigFile' $defargs";
+       }
+       foreach my $section (keys %Sections) {
+               my $opts = $Sections{$section};
+               my $optlist = join(";", keys %$opts);
+               my %filtered_opts = map { my $t=$_; $t=~s/[#\$]+$//; $t => $$opts{$_} } keys %$opts;
+               my @l = `bin/config $defargs "$section\{$optlist\}"`;
+               $? && exit 1;
+               foreach my $o (@l) {
+                       $o =~ /^CF_.*_([^=]+)='(.*)'\n$/ or die "Cannot parse bin/config output: $_";
+                       my $var = $filtered_opts{$1};
+                       my $val = $2;
+                       if (ref $var eq "SCALAR") {
+                               $$var = $val;
+                       } elsif (ref $var eq "ARRAY") {
+                               push @$var, $val;
+                       } elsif (ref $var) {
+                               die ("UCW::Config::Parse: don't know how to set $o");
+                       }
+               }
+       }
+       1;
+}
+
+1;  # OK
diff --git a/lib/perl/Configure.pm b/lib/perl/Configure.pm

new file mode 100644 (file)

index 0000000..bd1a7cc
--- /dev/null
+++ b/lib/perl/Configure.pm
@@ -0,0 +1,190 @@
+#      Perl module for UCW Configure Scripts
+#
+#      (c) 2005 Martin Mares <mj@ucw.cz>
+#
+#      This software may be freely distributed and used according to the terms
+#      of the GNU Lesser General Public License.
+
+package UCW::Configure;
+
+use strict;
+use warnings;
+
+BEGIN {
+       # The somewhat hairy Perl export mechanism
+       use Exporter();
+       our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+       $VERSION = 1.0;
+       @ISA = qw(Exporter);
+       @EXPORT = qw(&Init &Log &Notice &Warn &Fail &IsSet &Set &UnSet &Append &Override &Get &Test &Include &Finish &FindFile &TryFindFile);
+       @EXPORT_OK = qw();
+       %EXPORT_TAGS = ();
+}
+
+our %vars = ();
+our %overriden = ();
+
+sub Log($) {
+       print @_;
+}
+
+sub Notice($) {
+       print @_ if $vars{"VERBOSE"};
+}
+
+sub Warn($) {
+       print "WARNING: ", @_;
+}
+
+sub Fail($) {
+       Log("ERROR: " . (shift @_) . "\n");
+       exit 1;
+}
+
+sub IsSet($) {
+       my ($x) = @_;
+       return exists $vars{$x};
+}
+
+sub Get($) {
+       my ($x) = @_;
+       return $vars{$x};
+}
+
+sub Set($;$) {
+       my ($x,$y) = @_;
+       $y=1 unless defined $y;
+       $vars{$x}=$y unless $overriden{$x};
+}
+
+sub UnSet($) {
+       my ($x) = @_;
+       delete $vars{$x} unless $overriden{$x};
+}
+
+sub Append($$) {
+       my ($x,$y) = @_;
+       Set($x, (IsSet($x) ? (Get($x) . " $y") : $y));
+}
+
+sub Override($;$) {
+       my ($x,$y) = @_;
+       $y=1 unless defined $y;
+       $vars{$x}=$y;
+       $overriden{$x} = 1;
+}
+
+sub Test($$$) {
+       my ($var,$msg,$sub) = @_;
+       Log "$msg ... ";
+       if (!IsSet($var)) {
+               Set $var, &$sub();
+       }
+       Log Get($var) . "\n";
+}
+
+sub TryFindFile($) {
+       my ($f) = @_;
+       if (-f $f) {
+               return $f;
+       } elsif ($f !~ /^\// && -f (Get("SRCDIR")."/$f")) {
+               return Get("SRCDIR")."/$f";
+       } else {
+               return undef;
+       }
+}
+
+sub FindFile($) {
+       my ($f) = @_;
+       my $F;
+       defined ($F = TryFindFile($f)) or Fail "Cannot find file $f";
+       return $F;
+}
+
+sub Init($$) {
+       my ($srcdir,$defconfig) = @_;
+       if ((!defined $defconfig && !@ARGV) || @ARGV && $ARGV[0] eq "--help") {
+               print STDERR "Usage: [<srcdir>/]configure " . (defined $defconfig ? "[" : "") . "<config-name>" . (defined $defconfig ? "]" : "") .
+                       " [<option>[=<value>] | -<option>] ...\n";
+               exit 1;
+       }
+       if (@ARGV && $ARGV[0] !~ /=/) {
+               Set('CONFIG' => shift @ARGV);
+       } else {
+               Set('CONFIG' => $defconfig);
+       }
+       Set("SRCDIR", $srcdir);
+
+       foreach my $x (@ARGV) {
+               if ($x =~ /^(\w+)=(.*)/) {
+                       Override($1 => $2);
+               } elsif ($x =~ /^-(\w+)$/) {
+                       Override($1 => 0);
+                       delete $vars{$1};
+               } elsif ($x =~ /^(\w+)$/) {
+                       Override($1 => 1);
+               } else {
+                       print STDERR "Invalid option $x\n";
+                       exit 1;
+               }
+       }
+
+       if (!TryFindFile(Get("CONFIG"))) {
+               TryFindFile(Get("CONFIG")."/config") or Fail "Cannot find configuration " . Get("CONFIG");
+               Override("CONFIG" => Get("CONFIG")."/config");
+       }
+}
+
+sub Include($) {
+       my ($f) = @_;
+       $f = FindFile($f);
+       Notice "Loading configuration $f\n";
+       require $f;
+}
+
+sub Finish() {
+       print "\n";
+
+       if (Get("SRCDIR") ne ".") {
+               Log "Preparing for compilation from directory " . Get("SRCDIR") . " to obj/ ... ";
+               -l "src" and unlink "src";
+               symlink Get("SRCDIR"), "src" or Fail "Cannot link source directory to src: $!";
+               Override("SRCDIR" => "src");
+               -l "Makefile" and unlink "Makefile";
+               -f "Makefile" and Fail "Makefile already exists";
+               symlink "src/Makefile", "Makefile" or Fail "Cannot link Makefile: $!";
+       } else {
+               Log "Preparing for compilation from current directory to obj/ ... ";
+       }
+       `rm -rf obj` if -d "obj"; Fail "Cannot delete old obj directory" if $?;
+       -d "obj" or mkdir("obj", 0777) or Fail "Cannot create obj directory: $!";
+       -d "obj/lib" or mkdir("obj/lib", 0777) or Fail "Cannot create obj/lib directory: $!";
+       Log "done\n";
+
+       Log "Generating autoconf.h ... ";
+       open X, ">obj/autoconf.h" or Fail $!;
+       print X "/* Generated automatically by $0, please don't touch manually. */\n";
+       foreach my $x (sort keys %vars) {
+               # Don't export variables which contain no underscores
+               next unless $x =~ /_/;
+               my $v = $vars{$x};
+               # Try to add quotes if necessary
+               $v = '"' . $v . '"' unless ($v =~ /^"/ || $v =~ /^\d*$/);
+               print X "#define $x $v\n";
+       }
+       close X;
+       Log "done\n";
+
+       Log "Generating config.mk ... ";
+       open X, ">obj/config.mk" or Fail $!;
+       print X "# Generated automatically by $0, please don't touch manually.\n";
+       foreach my $x (sort keys %vars) {
+               print X "$x=$vars{$x}\n";
+       }
+       print X "s=\${SRCDIR}\n";
+       print X "o=obj\n";
+       close X;
+       Log "done\n";
+}
+
+1;  # OK
diff --git a/lib/perl/Filelock/Filelock.pm b/lib/perl/Filelock/Filelock.pm

new file mode 100644 (file)

index 0000000..a81865e
--- /dev/null
+++ b/lib/perl/Filelock/Filelock.pm
@@ -0,0 +1,32 @@
+# Perl module for setting process limits
+#
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+#   UCW::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
+#
+
+package UCW::Filelock;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Filelock $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
diff --git a/lib/perl/Filelock/Filelock.xs b/lib/perl/Filelock/Filelock.xs

new file mode 100644 (file)

index 0000000..df92e03
--- /dev/null
+++ b/lib/perl/Filelock/Filelock.xs
@@ -0,0 +1,30 @@
+/*
+ * PerlXS module for managing file locks
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <unistd.h>
+#include <fcntl.h>
+
+
+MODULE = UCW::Filelock         PACKAGE = UCW::Filelock
+
+PROTOTYPES: ENABLED
+
+int
+fcntl_lock(IN int fd, IN int cmd, IN int type, IN int whence, IN int start, IN int len)
+CODE:
+       struct flock fl;
+       fl.l_type = type;
+       fl.l_whence = whence;
+       fl.l_start = start;
+       fl.l_len = len;
+
+       RETVAL = fcntl(fd, cmd, &fl);
+OUTPUT:
+       RETVAL
diff --git a/lib/perl/Filelock/MANIFEST b/lib/perl/Filelock/MANIFEST

new file mode 100644 (file)

index 0000000..c70159f
--- /dev/null
+++ b/lib/perl/Filelock/MANIFEST
@@ -0,0 +1,6 @@
+Makefile
+Makefile.PL
+MANIFEST
+Fcntllock.pm
+Fcntllock.xs
+lib/Sherlock/.exists
diff --git a/lib/perl/Filelock/Makefile b/lib/perl/Filelock/Makefile

new file mode 100644 (file)

index 0000000..de94cc0
--- /dev/null
+++ b/lib/perl/Filelock/Makefile
@@ -0,0 +1,24 @@
+# Makefile for the Filelock Perl module  (c) 2007 Pavel Chrvat <pchar@ucw.cz>
+
+DIRS+=lib/perl/Filelock/arch/auto/UCW/Filelock
+FILELOCK_DIR=lib/perl/Filelock
+
+PROGS+=$(o)/lib/perl/Filelock/Filelock.pm
+
+extras:: $(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT)
+
+$(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
+       $(M)MAKE $@
+       $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+       $(Q)touch $@
+       $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL
+       $(M)PREPARE $@
+       $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
+
+$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs
+       $(Q)cp $^ $@
+
+$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL
+       $(Q)cp $^ $@
diff --git a/lib/perl/Filelock/Makefile.PL b/lib/perl/Filelock/Makefile.PL

new file mode 100644 (file)

index 0000000..edc23aa
--- /dev/null
+++ b/lib/perl/Filelock/Makefile.PL
@@ -0,0 +1,9 @@
+# Makefile for Perl MakeMaker  (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+    'NAME'             => 'UCW::Filelock',
+    'VERSION_FROM'     => 'Filelock.pm',
+    'INST_LIB'         => 'lib',
+    'INST_ARCHLIB'     => 'arch',
+);
diff --git a/lib/perl/Log.pm b/lib/perl/Log.pm

new file mode 100644 (file)

index 0000000..6b1fa1f
--- /dev/null
+++ b/lib/perl/Log.pm
@@ -0,0 +1,34 @@
+#
+#      Perl module for Logging
+#
+#      (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+
+package UCW::Log;
+
+use lib 'lib/perl5';
+use strict;
+use warnings;
+use POSIX;
+use Exporter;
+
+our $version = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = ();
+our %EXPORT_TAGS = ( all => [qw(&Log &Die)]);
+our @EXPORT_OK = (@{$EXPORT_TAGS{'all'}});
+
+my $Prog = (reverse split(/\//, $0))[0];
+
+sub Log {
+  my $level = shift;
+  my $text = join(' ', @_);
+  print STDERR $level, strftime(" %Y-%m-%d %H:%M:%S ", localtime()), "[$Prog] ", $text, "\n";
+}
+
+sub Die {
+  Log('!', @_);
+  exit 1;
+}
+
+1;
diff --git a/lib/perl/Makefile b/lib/perl/Makefile

new file mode 100644 (file)

index 0000000..197cb9c
--- /dev/null
+++ b/lib/perl/Makefile
@@ -0,0 +1,10 @@
+# Perl modules
+
+DIRS+=lib/perl
+EXTRA_RUNDIRS+=lib/perl5/UCW
+PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm CGI.pm)
+
+ifdef CONFIG_UCW_PERL_MODULES
+include $(s)/lib/perl/Ulimit/Makefile
+include $(s)/lib/perl/Filelock/Makefile
+endif
diff --git a/lib/perl/Ulimit/MANIFEST b/lib/perl/Ulimit/MANIFEST

new file mode 100644 (file)

index 0000000..cfc357e
--- /dev/null
+++ b/lib/perl/Ulimit/MANIFEST
@@ -0,0 +1,6 @@
+Makefile
+Makefile.PL
+MANIFEST
+Ulimit.pm
+Ulimit.xs
+lib/Sherlock/.exists
diff --git a/lib/perl/Ulimit/Makefile b/lib/perl/Ulimit/Makefile

new file mode 100644 (file)

index 0000000..8fc355b
--- /dev/null
+++ b/lib/perl/Ulimit/Makefile
@@ -0,0 +1,24 @@
+# Makefile for the Ulimit Perl module  (c) 2003 Tomas Valla <tom@ucw.cz>
+
+DIRS+=lib/perl/Ulimit/arch/auto/UCW/Ulimit
+ULIMIT_DIR=lib/perl/Ulimit
+
+PROGS+=$(o)/lib/perl/Ulimit/Ulimit.pm
+
+extras:: $(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT)
+
+$(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
+       $(M)MAKE $@
+       $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+       $(Q)touch $@
+       $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL
+       $(M)PREPARE $@
+       $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
+
+$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs
+       $(Q)cp $^ $@
+
+$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL
+       $(Q)cp $^ $@
diff --git a/lib/perl/Ulimit/Makefile.PL b/lib/perl/Ulimit/Makefile.PL

new file mode 100644 (file)

index 0000000..1f5ae5a
--- /dev/null
+++ b/lib/perl/Ulimit/Makefile.PL
@@ -0,0 +1,9 @@
+# Makefile for Perl MakeMaker  (c) 2003 Tomas Valla <tom@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+    'NAME'             => 'UCW::Ulimit',
+    'VERSION_FROM'     => 'Ulimit.pm',
+    'INST_LIB'         => 'lib',
+    'INST_ARCHLIB'     => 'arch',
+);
diff --git a/lib/perl/Ulimit/Ulimit.pm b/lib/perl/Ulimit/Ulimit.pm

new file mode 100644 (file)

index 0000000..8497dfa
--- /dev/null
+++ b/lib/perl/Ulimit/Ulimit.pm
@@ -0,0 +1,48 @@
+# Perl module for setting process limits
+#
+# (c) 2003 Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+#   UCW::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
+#   UCW::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
+#
+# setlimit sets limit to values supplied in softlimit and hardlimit
+# getlimit reads limits into softlimit and hardlimit
+# $resource constants are defined below
+#
+
+package UCW::Ulimit;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $CPU = 0;
+our $FSIZE = 1;
+our $DATA = 2;
+our $STACK = 3;
+our $CORE = 4;
+our $RSS = 5;
+our $NPROC = 6;
+our $NOFILE = 7;
+our $MEMLOCK = 8;
+our $AS = 9;
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Ulimit $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
diff --git a/lib/perl/Ulimit/Ulimit.xs b/lib/perl/Ulimit/Ulimit.xs

new file mode 100644 (file)

index 0000000..5290cf1
--- /dev/null
+++ b/lib/perl/Ulimit/Ulimit.xs
@@ -0,0 +1,87 @@
+/*
+ * PerlXS module for managing process limits
+ *
+ * (c) 2003 Tomas Valla <tom@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <sys/resource.h>
+#include <unistd.h>
+
+
+MODULE = UCW::Ulimit           PACKAGE = UCW::Ulimit
+
+PROTOTYPES: ENABLED
+
+int
+setlimit(IN int resource, IN int soft, IN int hard)
+CODE:
+       struct rlimit rl;
+       int r;
+
+       switch(resource) {
+       case 0:
+               r = RLIMIT_CPU; break;
+       case 1:
+               r = RLIMIT_FSIZE; break;
+       case 2:
+               r = RLIMIT_DATA; break;
+       case 3:
+               r = RLIMIT_STACK; break;
+       case 4:
+               r = RLIMIT_CORE; break;
+       case 5:
+               r = RLIMIT_RSS; break;
+       case 6:
+               r = RLIMIT_NPROC; break;
+       case 7:
+               r = RLIMIT_NOFILE; break;
+       case 8:
+               r = RLIMIT_MEMLOCK; break;
+       case 9:
+               r = RLIMIT_AS; break;
+       }
+       rl.rlim_cur = soft;
+       rl.rlim_max = hard;
+       RETVAL = setrlimit(r, &rl);
+OUTPUT:
+       RETVAL
+
+
+int
+getlimit(IN int resource, OUT int soft, OUT int hard)
+CODE:
+       struct rlimit rl;
+       int r;
+
+       switch(resource) {
+       case 0:
+               r = RLIMIT_CPU; break;
+       case 1:
+               r = RLIMIT_FSIZE; break;
+       case 2:
+               r = RLIMIT_DATA; break;
+       case 3:
+               r = RLIMIT_STACK; break;
+       case 4:
+               r = RLIMIT_CORE; break;
+       case 5:
+               r = RLIMIT_RSS; break;
+       case 6:
+               r = RLIMIT_NPROC; break;
+       case 7:
+               r = RLIMIT_NOFILE; break;
+       case 8:
+               r = RLIMIT_MEMLOCK; break;
+       case 9:
+               r = RLIMIT_AS; break;
+       }
+
+       RETVAL = getrlimit(r, &rl);
+       soft = rl.rlim_cur;
+       hard = rl.rlim_max;
+OUTPUT:
+       RETVAL
diff --git a/lib/prefetch.h b/lib/prefetch.h

new file mode 100644 (file)

index 0000000..4c9c716
--- /dev/null
+++ b/lib/prefetch.h
@@ -0,0 +1,36 @@
+/*
+ *     UCW Library -- Prefetch
+ *
+ *     (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PREFETCH_H
+#define _UCW_PREFETCH_H
+
+#if defined(__k6)
+  /* K6 doesn't have prefetches */
+
+#elif defined(__athlon) || defined(__k8) || \
+      defined(__i686) || \
+      defined(__pentium4) || defined(__prescott) || defined(__nocona)
+
+#define HAVE_PREFETCH
+static inline void prefetch(void *addr)
+{
+  asm volatile ("prefetcht0 %0" : : "m" (*(byte*)addr));
+}
+
+#else
+#warning "Don't know how to prefetch on your CPU. Please fix lib/prefetch.h."
+#endif
+
+#ifndef HAVE_PREFETCH
+static inline void prefetch(void *addr UNUSED)
+{
+}
+#endif
+
+#endif
diff --git a/lib/prime.c b/lib/prime.c

new file mode 100644 (file)

index 0000000..eec6f5d
--- /dev/null
+++ b/lib/prime.c
@@ -0,0 +1,78 @@
+/*
+ *     UCW Library -- Prime Number Tests
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+static int                             /* Sequential search */
+__isprime(uns x)                       /* We know x != 2 && x != 3 */
+{
+  uns test = 5;
+
+  if (x == 5)
+    return 1;
+  for(;;)
+    {
+      if (!(x % test))
+       return 0;
+      if (x / test <= test)
+       return 1;
+      test += 2;                       /* 6k+1 */
+      if (!(x % test))
+       return 0;
+      if (x / test <= test)
+       return 1;
+      test += 4;                       /* 6k-1 */
+    }
+}
+
+int
+isprime(uns x)
+{
+  if (x < 5)
+    return (x == 2 || x == 3);
+  switch (x % 6)
+    {
+    case 1:
+    case 5:
+      return __isprime(x);
+    default:
+      return 0;
+    }
+}
+
+uns
+nextprime(uns x)                       /* Returns some prime greater than x */
+{
+  x += 5 - (x % 6);                    /* x is 6k-1 */
+  for(;;)
+    {
+      x += 2;                          /* 6k+1 */
+      if (__isprime(x))
+       return x;
+      x += 4;                          /* 6k-1 */
+      if (__isprime(x))
+       return x;
+    }
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+  uns k = atol(argv[1]);
+  printf("%d is%s prime\n", k, isprime(k) ? "" : "n't");
+  printf("Next prime is %d\n", nextprime(k));
+  return 0;
+}
+
+#endif
diff --git a/lib/primetable.c b/lib/primetable.c

new file mode 100644 (file)

index 0000000..2e6fe5d
--- /dev/null
+++ b/lib/primetable.c
@@ -0,0 +1,164 @@
+/*
+ *     UCW Library -- Prime Number Table
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/binsearch.h"
+
+/* A table of odd primes, each is about 1.2 times the previous one */
+static uns prime_table[] = {
+  3,
+  7,
+  13,
+  19,
+  29,
+  37,
+  53,
+  67,
+  89,
+  109,
+  137,
+  173,
+  211,
+  263,
+  331,
+  409,
+  499,
+  601,
+  727,
+  877,
+  1061,
+  1279,
+  1543,
+  1861,
+  2239,
+  2689,
+  3229,
+  3877,
+  4657,
+  5623,
+  6761,
+  8123,
+  9767,
+  11731,
+  14083,
+  16903,
+  20287,
+  24359,
+  29243,
+  35099,
+  42131,
+  50581,
+  60703,
+  72859,
+  87433,
+  104933,
+  125927,
+  151121,
+  181361,
+  217643,
+  261223,
+  313471,
+  376171,
+  451411,
+  541699,
+  650059,
+  780119,
+  936151,
+  1123391,
+  1348111,
+  1617739,
+  1941293,
+  2329559,
+  2795477,
+  3354581,
+  4025507,
+  4830619,
+  5796797,
+  6956203,
+  8347483,
+  10017011,
+  12020431,
+  14424539,
+  17309471,
+  20771371,
+  24925661,
+  29910821,
+  35892991,
+  43071601,
+  51685939,
+  62023139,
+  74427803,
+  89313379,
+  107176057,
+  128611313,
+  154333591,
+  185200339,
+  222240413,
+  266688509,
+  320026249,
+  384031507,
+  460837813,
+  553005391,
+  663606499,
+  796327811,
+  955593439,
+  1146712139,
+  1376054569,
+  1651265507,
+  1981518631,
+  2377822387,
+  2853386881,
+  3424064269,
+  4108877153,
+  4294967291
+};
+
+#define NPRIMES ARRAY_SIZE(prime_table)
+
+uns
+next_table_prime(uns x)
+{
+  if (x >= prime_table[NPRIMES-1])
+    return 0;
+  else
+    return prime_table[BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x+1)];
+}
+
+uns
+prev_table_prime(uns x)
+{
+  int i = BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x);
+  return i ? prime_table[i-1] : 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+#if 0          /* Generate the table */
+  uns x = 3, xx;
+  do
+    {
+      printf("  %u,\n", x);
+      xx = x;
+      x = nextprime(1.2*x);
+    }
+  while (x > xx);
+#else
+  for (int i=1; i<=100; i++)
+    printf("%d\t%d\t%d\n", i, next_table_prime(i), prev_table_prime(i));
+  for (uns i=0xfffffff0; i; i++)
+    printf("%u\t%u\t%u\n", i, next_table_prime(i), prev_table_prime(i));
+  return 0;
+#endif
+}
+
+#endif
diff --git a/lib/proctitle.c b/lib/proctitle.c

new file mode 100644 (file)

index 0000000..4e15418
--- /dev/null
+++ b/lib/proctitle.c
@@ -0,0 +1,82 @@
+/*
+ *     UCW Library -- Setting of Process Title
+ *
+ *     (c) 2001--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+static char **spt_argv;
+static char *spt_start, *spt_end;
+
+void
+setproctitle_init(int argc, char **argv)
+{
+#ifdef CONFIG_LINUX
+  int i, len;
+  char **env, **oldenv, *t;
+
+  spt_argv = argv;
+
+  /* Create a backup copy of environment */
+  oldenv = __environ;
+  len = 0;
+  for (i=0; oldenv[i]; i++)
+    len += strlen(oldenv[i]) + 1;
+  __environ = env = xmalloc(sizeof(char *)*(i+1));
+  t = xmalloc(len);
+  for (i=0; oldenv[i]; i++)
+    {
+      env[i] = t;
+      len = strlen(oldenv[i]) + 1;
+      memcpy(t, oldenv[i], len);
+      t += len;
+    }
+  env[i] = NULL;
+
+  /* Scan for consecutive free space */
+  spt_start = spt_end = argv[0];
+  for (i=0; i<argc; i++)
+    if (!i || spt_end+1 == argv[i])
+      spt_end = argv[i] + strlen(argv[i]);
+  for (i=0; oldenv[i]; i++)
+    if (spt_end+1 == oldenv[i])
+      spt_end = oldenv[i] + strlen(oldenv[i]);
+#endif
+}
+
+void
+setproctitle(const char *msg, ...)
+{
+  va_list args;
+  byte buf[256];
+  int n;
+
+  va_start(args, msg);
+  if (spt_end > spt_start)
+    {
+      n = vsnprintf(buf, sizeof(buf), msg, args);
+      if (n >= (int) sizeof(buf) || n < 0)
+       sprintf(buf, "<too-long>");
+      n = spt_end - spt_start;
+      strncpy(spt_start, buf, n);
+      spt_start[n] = 0;
+      spt_argv[0] = spt_start;
+      spt_argv[1] = NULL;
+    }
+  va_end(args);
+}
+
+char *
+getproctitle(void)
+{
+  return (spt_start < spt_end) ? spt_start : NULL;
+}
diff --git a/lib/profile.c b/lib/profile.c

new file mode 100644 (file)

index 0000000..83ff77c
--- /dev/null
+++ b/lib/profile.c
@@ -0,0 +1,129 @@
+/*
+ *     UCW Library -- Poor Man's Profiler
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/profile.h"
+
+#include <stdio.h>
+
+/* PROFILE_TOD */
+
+#include <sys/time.h>
+
+void
+prof_tod_init(struct prof_tod *c)
+{
+  c->sec = c->usec = 0;
+}
+
+void
+prof_tod_switch(struct prof_tod *o, struct prof_tod *n)
+{
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  if (n)
+    {
+      n->start_sec = tv.tv_sec;
+      n->start_usec = tv.tv_usec;
+    }
+  if (o)
+    {
+      o->sec += tv.tv_sec - o->start_sec;
+      o->usec += tv.tv_usec - o->start_usec;
+      if (o->usec < 0)
+       {
+         o->usec += 1000000;
+         o->sec--;
+       }
+      else while (o->usec >= 1000000)
+       {
+         o->usec -= 1000000;
+         o->sec++;
+       }
+    }
+}
+
+int
+prof_tod_format(char *buf, struct prof_tod *c)
+{
+  return sprintf(buf, "%d.%06d", c->sec, c->usec);
+}
+
+/* PROFILE_TSC */
+
+#ifdef CPU_I386
+
+void
+prof_tsc_init(struct prof_tsc *c)
+{
+  c->ticks = 0;
+}
+
+int
+prof_tsc_format(char *buf, struct prof_tsc *c)
+{
+  return sprintf(buf, "%lld", c->ticks);
+}
+
+#endif
+
+/* PROFILE_KTSC */
+
+#ifdef CONFIG_LINUX
+
+#include <fcntl.h>
+#include <unistd.h>
+static int self_prof_fd = -1;
+
+void
+prof_ktsc_init(struct prof_ktsc *c)
+{
+  if (self_prof_fd < 0)
+    {
+      self_prof_fd = open("/proc/self/profile", O_RDONLY, 0);
+      if (self_prof_fd < 0)
+       die("Unable to open /proc/self/profile: %m");
+    }
+  c->ticks_user = 0;
+  c->ticks_sys = 0;
+}
+
+void
+prof_ktsc_switch(struct prof_ktsc *o, struct prof_ktsc *n)
+{
+  unsigned long long u, s;
+  byte buf[256];
+
+  int l = pread(self_prof_fd, buf, sizeof(buf)-1, 0);
+  ASSERT(l > 0 && l < (int)sizeof(buf)-1);
+  buf[l] = 0;
+  l = sscanf(buf, "%lld%lld", &u, &s);
+  ASSERT(l == 2);
+
+  if (n)
+    {
+      n->start_user = u;
+      n->start_sys = s;
+    }
+  if (o)
+    {
+      u -= o->start_user;
+      o->ticks_user += u;
+      s -= o->start_sys;
+      o->ticks_sys += s;
+    }
+}
+
+int
+prof_ktsc_format(char *buf, struct prof_ktsc *c)
+{
+  return sprintf(buf, "%lld+%lld", (long long) c->ticks_user, (long long) c->ticks_sys);
+}
+
+#endif
diff --git a/lib/profile.h b/lib/profile.h

new file mode 100644 (file)

index 0000000..3704b44
--- /dev/null
+++ b/lib/profile.h
@@ -0,0 +1,140 @@
+/*
+ *     UCW Library -- Poor Man's Profiler
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  Usage:
+ *             #define PROFILE_xxx
+ *             #include "lib/profile.h"
+ *             prof_t cnt;
+ *             prof_init(&cnt);
+ *             ...
+ *             prof_start(&cnt);
+ *             ...
+ *             prof_stop(&cnt);
+ *             printf("%s\n", PROF_STR(cnt));
+ */
+
+/* PROFILE_TOD: gettimeofday() profiler */
+
+struct prof_tod {
+  u32 start_sec, start_usec;
+  s32 sec, usec;
+};
+
+void prof_tod_init(struct prof_tod *);
+void prof_tod_switch(struct prof_tod *, struct prof_tod *);
+int prof_tod_format(char *, struct prof_tod *);
+
+/* PROFILE_TSC: i386 TSC profiler */
+
+#ifdef CPU_I386
+
+struct prof_tsc {
+  u64 start_tsc;
+  u64 ticks;
+};
+
+void prof_tsc_init(struct prof_tsc *);
+int prof_tsc_format(char *, struct prof_tsc *);
+
+#endif
+
+/* PROFILE_KTSC: Linux kernel TSC profiler */
+
+#ifdef CONFIG_LINUX
+
+struct prof_ktsc {
+  u64 start_user, start_sys;
+  u64 ticks_user, ticks_sys;
+};
+
+void prof_ktsc_init(struct prof_ktsc *);
+void prof_ktsc_switch(struct prof_ktsc *, struct prof_ktsc *);
+int prof_ktsc_format(char *, struct prof_ktsc *);
+
+#endif
+
+/* Select the right profiler */
+
+#if defined(PROFILE_TOD)
+
+#define PROFILER
+#define PROF_STR_SIZE 21
+typedef struct prof_tod prof_t;
+#define prof_init prof_tod_init
+#define prof_switch prof_tod_switch
+#define prof_format prof_tod_format
+
+#elif defined(PROFILE_TSC)
+
+#define PROFILER
+#define PROFILER_INLINE
+#define PROF_STR_SIZE 24
+
+typedef struct prof_tsc prof_t;
+#define prof_init prof_tsc_init
+#define prof_format prof_tsc_format
+
+#define rdtscll(val) __asm__ __volatile__("rdtsc" : "=A" (val))
+
+static inline void prof_start(prof_t *c)
+{
+  rdtscll(c->start_tsc);
+}
+
+static inline void prof_stop(prof_t *c)
+{
+  u64 tsc;
+  rdtscll(tsc);
+  tsc -= c->start_tsc;
+  c->ticks += tsc;
+}
+
+static inline void prof_switch(prof_t *o, prof_t *n)
+{
+  u64 tsc;
+  rdtscll(tsc);
+  n->start_tsc = tsc;
+  tsc -= o->start_tsc;
+  o->ticks += tsc;
+}
+
+#elif defined(PROFILE_KTSC)
+
+#define PROFILER
+#define PROF_STR_SIZE 50
+typedef struct prof_ktsc prof_t;
+#define prof_init prof_ktsc_init
+#define prof_switch prof_ktsc_switch
+#define prof_format prof_ktsc_format
+
+#endif
+
+#ifdef PROFILER
+
+/* Stuff common for all profilers */
+#ifndef PROFILER_INLINE
+static inline void prof_start(prof_t *c) { prof_switch(NULL, c); }
+static inline void prof_stop(prof_t *c) { prof_switch(c, NULL); }
+#endif
+#define PROF_STR(C) ({ static char _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
+
+#else
+
+/* Dummy profiler with no output */
+typedef struct { } prof_t;
+static inline void prof_init(prof_t *c UNUSED) { }
+static inline void prof_start(prof_t *c UNUSED) { }
+static inline void prof_stop(prof_t *c UNUSED) { }
+static inline void prof_switch(prof_t *c UNUSED, prof_t *d UNUSED) { }
+static inline void prof_format(char *b, prof_t *c UNUSED) { b[0]='?'; b[1]=0; }
+#define PROF_STR_SIZE 2
+#define PROF_STR(C) "?"
+
+#endif
diff --git a/lib/qache.c b/lib/qache.c

new file mode 100644 (file)

index 0000000..91536bf
--- /dev/null
+++ b/lib/qache.c
@@ -0,0 +1,786 @@
+/*
+ *      Simple and Quick Shared Memory Cache
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/bitops.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/qache.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/*
+ *  The cache lives in a mmapped file of the following format:
+ *     qache_header
+ *     qache_entry[max_entries]        table of entries and their keys
+ *     u32 qache_hash[hash_size]       hash table pointing to keys
+ *     u32 block_next[num_blocks]      next block pointers
+ *     padding                         to a multiple of block size
+ *     blocks[]                        data blocks
+ */
+
+struct qache_header {
+  u32 magic;                           /* QCACHE_MAGIC */
+  u32 block_size;                      /* Parameters as in qache_params */
+  u32 block_shift;                     /* block_size = 1 << block_shift */
+  u32 num_blocks;
+  u32 format_id;
+  u32 entry_table_start;               /* Array of qache_entry's */
+  u32 max_entries;
+  u32 hash_table_start;                        /* Hash table containing all keys */
+  u32 hash_size;
+  u32 next_table_start;                        /* Array of next pointers */
+  u32 first_data_block;
+};
+
+#define QACHE_MAGIC 0xb79f6d12
+
+struct qache_entry {
+  u32 lru_prev, lru_next;              /* Entry #0: head of the cyclic LRU list */
+  u32 data_len;                                /* Entry #0: number of free blocks, Free entries: ~0U */
+  u32 first_data_block;                        /* Entry #0: first free block */
+  qache_key_t key;
+  u32 hash_next;                       /* Entry #0: first free entry, Free entries: next free */
+};
+
+struct qache {
+  struct qache_header *hdr;
+  struct qache_entry *entry_table;
+  u32 *hash_table;
+  u32 *next_table;
+  int fd;
+  byte *mmap_data;
+  uns file_size;
+  char *file_name;
+  uns locked;
+};
+
+#define first_free_entry entry_table[0].hash_next
+#define first_free_block entry_table[0].first_data_block
+#define num_free_blocks entry_table[0].data_len
+
+static inline char *
+format_key(qache_key_t *key)
+{
+  static char keybuf[2*sizeof(qache_key_t)+1];
+  for (uns i=0; i<sizeof(qache_key_t); i++)
+    sprintf(keybuf+2*i, "%02x", (*key)[i]);
+  return keybuf;
+}
+
+static void
+qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
+{
+#ifndef CONFIG_LINUX
+  /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
+  len += (start % CPU_PAGE_SIZE);
+  start -= start % CPU_PAGE_SIZE;
+  len = ALIGN_TO(len, CPU_PAGE_SIZE);
+  if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
+    msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
+#endif
+}
+
+static void
+qache_msync_block(struct qache *q, uns blk)
+{
+  DBG("\tSyncing block %d", blk);
+  qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
+}
+
+static void
+qache_lock(struct qache *q)
+{
+  /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
+  ASSERT(!q->locked);
+  struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+  if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+    die("fcntl lock on %s: %m", q->file_name);
+  q->locked = 1;
+  DBG("Locked cache %s", q->file_name);
+}
+
+static void
+qache_unlock(struct qache *q, uns dirty)
+{
+  ASSERT(q->locked);
+  if (dirty)                           /* Sync header, entry table and hash table */
+    qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
+  struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+  if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+    die("fcntl unlock on %s: %m", q->file_name);
+  q->locked = 0;
+  DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
+}
+
+enum entry_audit_flags {
+  ET_FREE_LIST = 1,
+  ET_LRU = 2,
+  ET_HASH = 4
+};
+
+static char *
+audit_entries(struct qache *q, byte *entrymap)
+{
+  uns i, j;
+
+  DBG("Auditing entries");
+
+  /* Check the free list */
+  i = q->first_free_entry;
+  while (i)
+    {
+      if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
+       return "inconsistent free entry list";
+      entrymap[i] |= ET_FREE_LIST;
+      i = q->entry_table[i].hash_next;
+    }
+
+  /* Check the hash table */
+  for (i=0; i<q->hdr->hash_size; i++)
+    {
+      j = q->hash_table[i];
+      while (j)
+       {
+         if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
+           return "inconsistent hash chains";
+         entrymap[j] |= ET_HASH;
+         j = q->entry_table[j].hash_next;
+       }
+    }
+
+  /* Check the LRU */
+  i = 0;
+  do
+    {
+      j = q->entry_table[i].lru_next;
+      if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
+       return "inconsistent LRU list";
+      entrymap[i] |= ET_LRU;
+      i = j;
+    }
+  while (i);
+
+  /* Check if all non-free items are in all lists */
+  for (i=1; i<q->hdr->max_entries; i++)
+    {
+      if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
+       return "inconsistent lists";
+    }
+  return NULL;
+}
+
+enum block_audit_flags {
+  BT_FREE_LIST = 1,
+  BT_ALLOC = 2
+};
+
+static char *
+audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
+{
+  uns i, j;
+
+  DBG("Auditing blocks");
+
+  /* Check the free list */
+  for (i=q->first_free_block; i; i=q->next_table[i])
+    {
+      if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
+       return "inconsistent free block list";
+      blockmap[i] |= BT_FREE_LIST;
+    }
+
+  /* Check allocation lists of entries */
+  for (i=1; i<q->hdr->max_entries; i++)
+    if (!(entrymap[i] & ET_FREE_LIST))
+      {
+       uns blocks = 0;
+       for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
+         {
+           if (blockmap[j])
+             return "inconsistent entry block list";
+           blockmap[j] |= BT_ALLOC;
+           blocks++;
+         }
+       if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
+         return "inconsistent entry data length";
+      }
+
+  /* Check if all blocks belong somewhere */
+  for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
+    if (!blockmap[i])
+      {
+       DBG("Block %d unreferenced", i);
+       return "unreferenced blocks found";
+      }
+
+  return NULL;
+}
+
+static char *
+do_audit(struct qache *q)
+{
+  byte *entry_map = xmalloc_zero(q->hdr->max_entries);
+  byte *block_map = xmalloc_zero(q->hdr->num_blocks);
+  byte *err = audit_entries(q, entry_map);
+  if (!err)
+    err = audit_blocks(q, entry_map, block_map);
+  xfree(block_map);
+  xfree(entry_map);
+  return err;
+}
+
+static void
+qache_setup_pointers(struct qache *q)
+{
+  q->hdr = (struct qache_header *) q->mmap_data;
+  q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
+  q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
+  q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
+}
+
+static int
+qache_open_existing(struct qache *q, struct qache_params *par)
+{
+  if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
+    return 0;
+
+  struct stat st;
+  char *err = "stat failed";
+  if (fstat(q->fd, &st) < 0)
+    goto close_and_fail;
+
+  err = "invalid file size";
+  if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
+    goto close_and_fail;
+  q->file_size = st.st_size;
+
+  err = "requested size change";
+  if (q->file_size != par->cache_size)
+    goto close_and_fail;
+
+  err = "cannot mmap";
+  if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+    goto close_and_fail;
+  struct qache_header *h = (struct qache_header *) q->mmap_data;
+
+  qache_setup_pointers(q);
+  qache_lock(q);
+
+  err = "incompatible format";
+  if (h->magic != QACHE_MAGIC ||
+      h->block_size != par->block_size ||
+      h->max_entries != par->max_entries ||
+      h->format_id != par->format_id)
+    goto unlock_and_fail;
+
+  err = "incomplete file";
+  if (h->num_blocks*h->block_size != q->file_size)
+    goto unlock_and_fail;
+
+  if (err = do_audit(q))
+    goto unlock_and_fail;
+
+  qache_unlock(q, 0);
+  msg(L_INFO, "Cache %s: using existing data", q->file_name);
+  return 1;
+
+ unlock_and_fail:
+  qache_unlock(q, 0);
+  munmap(q->mmap_data, q->file_size);
+ close_and_fail:
+  msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
+  close(q->fd);
+  return 0;
+}
+
+static void
+qache_create(struct qache *q, struct qache_params *par)
+{
+  q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+  if (q->fd < 0)
+    die("Cache %s: unable to create (%m)", q->file_name);
+  struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
+
+  struct qache_header h;
+  bzero(&h, sizeof(h));
+  h.magic = QACHE_MAGIC;
+  h.block_size = par->block_size;
+  h.block_shift = bit_fls(h.block_size);
+  h.num_blocks = par->cache_size >> h.block_shift;
+  h.format_id = par->format_id;
+  h.entry_table_start = sizeof(h);
+  h.max_entries = par->max_entries;
+  h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
+  h.hash_size = 1;
+  while (h.hash_size < h.max_entries)
+    h.hash_size *= 2;
+  h.next_table_start = h.hash_table_start + h.hash_size * 4;
+  h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
+  if (h.first_data_block >= h.num_blocks)
+    die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
+  bwrite(fb, &h, sizeof(h));
+
+  /* Entry #0: heads of all lists */
+  ASSERT(btell(fb) == (sh_off_t)h.entry_table_start);
+  struct qache_entry ent;
+  bzero(&ent, sizeof(ent));
+  ent.first_data_block = h.first_data_block;
+  ent.data_len = h.num_blocks - h.first_data_block;
+  ent.hash_next = 1;
+  bwrite(fb, &ent, sizeof(ent));
+
+  /* Other entries */
+  bzero(&ent, sizeof(ent));
+  ent.data_len = ~0U;
+  for (uns i=1; i<h.max_entries; i++)
+    {
+      ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
+      bwrite(fb, &ent, sizeof(ent));
+    }
+
+  /* The hash table */
+  ASSERT(btell(fb) == (sh_off_t)h.hash_table_start);
+  for (uns i=0; i<h.hash_size; i++)
+    bputl(fb, 0);
+
+  /* The next pointers */
+  ASSERT(btell(fb) == (sh_off_t)h.next_table_start);
+  for (uns i=0; i<h.num_blocks; i++)
+    bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
+
+  /* Padding */
+  ASSERT(btell(fb) <= (sh_off_t)(h.first_data_block << h.block_shift));
+  while (btell(fb) < (sh_off_t)(h.first_data_block << h.block_shift))
+    bputc(fb, 0);
+
+  /* Data blocks */
+  for (uns i=h.first_data_block; i<h.num_blocks; i++)
+    for (uns j=0; j<h.block_size; j+=4)
+      bputl(fb, 0);
+
+  ASSERT(btell(fb) == (sh_off_t)par->cache_size);
+  bclose(fb);
+  msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
+
+  if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+    die("Cache %s: mmap failed (%m)", par->file_name);
+  q->file_size = par->cache_size;
+  qache_setup_pointers(q);
+}
+
+struct qache *
+qache_open(struct qache_params *par)
+{
+  struct qache *q = xmalloc_zero(sizeof(*q));
+  q->file_name = xstrdup(par->file_name);
+
+  ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
+  par->cache_size = ALIGN_TO(par->cache_size, par->block_size);
+
+  if (par->force_reset <= 0 && qache_open_existing(q, par))
+    ;
+  else if (par->force_reset < 0)
+    die("Cache %s: read-only access requested, but no data available", q->file_name);
+  else
+    qache_create(q, par);
+  return q;
+}
+
+void
+qache_close(struct qache *q, uns retain_data)
+{
+  munmap(q->mmap_data, q->file_size);
+  close(q->fd);
+  if (!retain_data && unlink(q->file_name) < 0)
+    msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
+  xfree(q->file_name);
+  xfree(q);
+}
+
+static uns
+qache_hash(struct qache *q, qache_key_t *key)
+{
+  uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
+  return h % q->hdr->hash_size;
+}
+
+static uns
+qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+  ASSERT(q->locked);
+
+  if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
+    return pos_hint;
+
+  uns h = qache_hash(q, key);
+  for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
+    if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
+      return e;
+  return 0;
+}
+
+static void
+qache_hash_insert(struct qache *q, uns e)
+{
+  uns h = qache_hash(q, &q->entry_table[e].key);
+  q->entry_table[e].hash_next = q->hash_table[h];
+  q->hash_table[h] = e;
+}
+
+static void
+qache_hash_remove(struct qache *q, uns e)
+{
+  struct qache_entry *entry = &q->entry_table[e];
+  uns f, *hh;
+  for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
+    if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
+      {
+       *hh = entry->hash_next;
+       return;
+      }
+  ASSERT(0);
+}
+
+static uns
+qache_alloc_entry(struct qache *q)
+{
+  uns e = q->first_free_entry;
+  ASSERT(q->locked && e);
+  struct qache_entry *entry = &q->entry_table[e];
+  ASSERT(entry->data_len == ~0U);
+  q->first_free_entry = entry->hash_next;
+  entry->data_len = 0;
+  return e;
+}
+
+static void
+qache_free_entry(struct qache *q, uns e)
+{
+  struct qache_entry *entry = &q->entry_table[e];
+  ASSERT(q->locked && entry->data_len != ~0U);
+  entry->data_len = ~0U;
+  entry->hash_next = q->first_free_entry;
+  q->first_free_entry = e;
+}
+
+static inline void *
+get_block_start(struct qache *q, uns block)
+{
+  ASSERT(block && block < q->hdr->num_blocks);
+  return q->mmap_data + (block << q->hdr->block_shift);
+}
+
+static uns
+qache_alloc_block(struct qache *q)
+{
+  ASSERT(q->locked && q->num_free_blocks);
+  uns blk = q->first_free_block;
+  q->first_free_block = q->next_table[blk];
+  q->num_free_blocks--;
+  DBG("\tAllocated block %d", blk);
+  return blk;
+}
+
+static void
+qache_free_block(struct qache *q, uns blk)
+{
+  ASSERT(q->locked);
+  q->next_table[blk] = q->first_free_block;
+  q->first_free_block = blk;
+  q->num_free_blocks++;
+  DBG("\tFreed block %d", blk);
+}
+
+static void
+qache_lru_insert(struct qache *q, uns e)
+{
+  struct qache_entry *head = &q->entry_table[0];
+  struct qache_entry *entry = &q->entry_table[e];
+  ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
+  uns succe = head->lru_next;
+  struct qache_entry *succ = &q->entry_table[succe];
+  head->lru_next = e;
+  entry->lru_prev = 0;
+  entry->lru_next = succe;
+  succ->lru_prev = e;
+}
+
+static void
+qache_lru_remove(struct qache *q, uns e)
+{
+  ASSERT(q->locked);
+  struct qache_entry *entry = &q->entry_table[e];
+  q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
+  q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
+  entry->lru_prev = entry->lru_next = 0;
+}
+
+static uns
+qache_lru_get(struct qache *q)
+{
+  return q->entry_table[0].lru_prev;
+}
+
+static void
+qache_ll_delete(struct qache *q, uns e)
+{
+  struct qache_entry *entry = &q->entry_table[e];
+  uns blk = entry->first_data_block;
+  while (entry->data_len)
+    {
+      uns next = q->next_table[blk];
+      qache_free_block(q, blk);
+      blk = next;
+      if (entry->data_len >= q->hdr->block_size)
+       entry->data_len -= q->hdr->block_size;
+      else
+       entry->data_len = 0;
+    }
+  qache_lru_remove(q, e);
+  qache_hash_remove(q, e);
+  qache_free_entry(q, e);
+}
+
+uns
+qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
+{
+  qache_lock(q);
+
+  uns e = qache_hash_find(q, key, pos_hint);
+  if (e)
+    {
+      qache_ll_delete(q ,e);
+      DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
+    }
+
+  uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
+  if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
+    {
+      qache_unlock(q, 0);
+      return 0;
+    }
+  while (q->num_free_blocks < blocks || !q->first_free_entry)
+    {
+      e = qache_lru_get(q);
+      DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
+      ASSERT(e);
+      qache_ll_delete(q, e);
+    }
+  e = qache_alloc_entry(q);
+  struct qache_entry *entry = &q->entry_table[e];
+  entry->data_len = size;
+  memcpy(entry->key, key, sizeof(*key));
+  DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
+
+  entry->first_data_block = 0;
+  while (size)
+    {
+      uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
+      uns blk = qache_alloc_block(q);
+      q->next_table[blk] = entry->first_data_block;
+      memcpy(get_block_start(q, blk), data+size-chunk, chunk);
+      qache_msync_block(q, blk);
+      entry->first_data_block = blk;
+      size -= chunk;
+    }
+
+  qache_lru_insert(q, e);
+  qache_hash_insert(q, e);
+  qache_unlock(q, 1);
+  return e;
+}
+
+static void
+copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
+{
+  if (sizep)
+    {
+      uns size = *sizep;
+      uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
+      uns xfer = MIN(size, avail);
+      *sizep = avail;
+      if (datap)
+       {
+         if (!*datap)
+           *datap = xmalloc(xfer);
+         uns blk = entry->first_data_block;
+         while (start >= q->hdr->block_size)
+           {
+             blk = q->next_table[blk];
+             start -= q->hdr->block_size;
+           }
+         byte *data = *datap;
+         while (xfer)
+           {
+             uns len = MIN(xfer, q->hdr->block_size - start);
+             memcpy(data, get_block_start(q, blk), len);
+             blk = q->next_table[blk];
+             data += len;
+             xfer -= len;
+             start = 0;
+           }
+       }
+    }
+  else
+    ASSERT(!datap);
+}
+
+uns
+qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
+{
+  qache_lock(q);
+  uns e = qache_hash_find(q, key, pos_hint);
+  if (e)
+    {
+      struct qache_entry *entry = &q->entry_table[e];
+      DBG("Lookup <%s>: found entry %d", format_key(key), e);
+      qache_lru_remove(q, e);
+      qache_lru_insert(q, e);
+      copy_out(q, entry, datap, sizep, start);
+      qache_unlock(q, 1);     /* Yes, modified -- we update the LRU */
+    }
+  else
+    {
+      DBG("Lookup <%s>: not found", format_key(key));
+      qache_unlock(q, 0);
+    }
+  return e;
+}
+
+uns
+qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
+{
+  if (!pos || pos >= q->hdr->max_entries)
+    {
+      DBG("Probe %d: Out of range", pos);
+      return ~0U;
+    }
+
+  qache_lock(q);
+  uns ret = 0;
+  struct qache_entry *entry = &q->entry_table[pos];
+  if (entry->data_len != ~0U)
+    {
+      DBG("Probe %d: Found key <%s>", format_key(entry->key));
+      if (key)
+       memcpy(key, entry->key, sizeof(qache_key_t));
+      copy_out(q, entry, datap, sizep, start);
+      ret = pos;
+    }
+  else
+    DBG("Probe %d: Empty", pos);
+  qache_unlock(q, 0);
+  return ret;
+}
+
+uns
+qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+  qache_lock(q);
+  uns e = qache_hash_find(q, key, pos_hint);
+  if (e)
+    {
+      DBG("Delete <%s: deleting entry %d", format_key(key), e);
+      qache_ll_delete(q, e);
+    }
+  else
+    DBG("Delete <%s>: No match", format_key(key));
+  qache_unlock(q, 1);
+  return e;
+}
+
+void
+qache_debug(struct qache *q)
+{
+  msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
+      q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
+      q->hdr->max_entries, q->hdr->hash_size);
+
+  msg(L_DEBUG, "Table of cache entries:");
+  msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
+  for (uns e=0; e<q->hdr->max_entries; e++)
+    {
+      struct qache_entry *ent = &q->entry_table[e];
+      msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
+         ent->first_data_block, ent->hash_next, format_key(&ent->key));
+    }
+
+  msg(L_DEBUG, "Hash table:");
+  for (uns h=0; h<q->hdr->hash_size; h++)
+    msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
+
+  msg(L_DEBUG, "Next pointers:");
+  for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
+    msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
+}
+
+void
+qache_audit(struct qache *q)
+{
+  char *err;
+  qache_lock(q);
+  if (err = do_audit(q))
+    die("Cache %s: %s", q->file_name, err);
+  qache_unlock(q, 0);
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+  struct qache_params par = {
+    .file_name = "tmp/test",
+    .block_size = 256,
+    .cache_size = 65536,
+    .max_entries = 123,
+    .force_reset = 0,
+    .format_id = 0xfeedcafe
+  };
+  struct qache *q = qache_open(&par);
+
+  qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
+#define N 100
+  uns i, j;
+  byte data[11*N];
+  for (i=0; i<N; i++)
+    {
+      key[3] = i / 16; key[15] = i % 16;
+      for (j=0; j<11*i; j++)
+       data[j] = 0x33 + i*j;
+      qache_insert(q, &key, 0, data, 11*i);
+    }
+  qache_debug(q);
+  qache_audit(q);
+
+  uns found = 0;
+  for (i=0; i<100; i++)
+    {
+      key[3] = i / 16; key[15] = i % 16;
+      byte *dptr = data;
+      uns sz = sizeof(data);
+      uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
+      if (e)
+       {
+         ASSERT(sz == 11*i);
+         for (j=0; j<sz; j++)
+           ASSERT(data[j] == (byte)(0x33 + i*j));
+         found++;
+       }
+    }
+  msg(L_INFO, "Found %d of %d entries", found, N);
+
+  qache_close(q, 1);
+  return 0;
+}
+
+#endif
diff --git a/lib/qache.h b/lib/qache.h

new file mode 100644 (file)

index 0000000..5c242a4
--- /dev/null
+++ b/lib/qache.h
@@ -0,0 +1,57 @@
+/*
+ *      Simple and Quick Shared Memory Cache
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _UCW_QACHE_H
+#define _UCW_QACHE_H
+
+struct qache_params {
+  char *file_name;
+  uns block_size;                      /* Cache block size (a power of two) */
+  uns cache_size;                      /* Size of the whole cache */
+  uns max_entries;                     /* Maximum number of cached entries */
+  int force_reset;                     /* Force creation of a new cache even if the old one seems usable, -1 if reset should never be done */
+  uns format_id;                       /* Data format ID (old cache not used if formats differ) */
+};
+
+typedef byte qache_key_t[16];
+
+struct qache;
+
+/* Create and destroy a cache */
+struct qache *qache_open(struct qache_params *p);
+void qache_close(struct qache *q, uns retain_data);
+
+/* Insert new item to the cache with a given key and data. If pos_hint is non-zero, it serves
+ * as a hint about the position of the entry (if it's known that an entry with the particular key
+ * was located there a moment ago). Returns position of the new entry.
+ */
+uns qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size);
+
+/* Look up data in the cache, given a key and a position hint (as above). If datap is non-NULL, data
+ * from the cache entry are copied either to *datap (if *datap is NULL, new memory is allocated by
+ * calling xmalloc and *datap is set to point to that memory). The *sizep contains the maximum number
+ * of bytes to be copied (~0U if unlimited) and it is replaced by the number of bytes available (so it
+ * can be greater than the original value requested). The start indicates starting offset inside the
+ * entry's data.
+ */
+uns qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start);
+
+/* Inspect data in the cache (but don't modify LRU nor anything else), given a position.
+ * If key is non-NULL, it's filled with the cache key. The rest works as in qache_lookup.
+ * Returns 0 if the entry is empty, ~0 for position out of range, entry number otherwise.
+ */
+uns qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start);
+
+/* Delete data from the cache, given a key and a position hint. */
+uns qache_delete(struct qache *q, qache_key_t *key, uns pos_hint);
+
+/* Debugging dump (beware, doesn't lock the cache!) */
+void qache_debug(struct qache *q);
+
+/* Check consistency of the cache structure */
+void qache_audit(struct qache *q);
+
+#endif
diff --git a/lib/random.c b/lib/random.c

new file mode 100644 (file)

index 0000000..a771c97
--- /dev/null
+++ b/lib/random.c
@@ -0,0 +1,58 @@
+/*
+ *     UCW Library -- Unbiased Random Numbers
+ *
+ *     (c) 1998--2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+
+/* We expect the random generator in libc to give at least 30 bits of randomness */
+COMPILE_ASSERT(RAND_MAX_RANGE_TEST, RAND_MAX >= (1 << 30)-1);
+
+uns
+random_u32(void)
+{
+  return (random() & 0xffff) | ((random() & 0xffff) << 16);
+}
+
+uns
+random_max(uns max)
+{
+  uns r, l;
+
+  ASSERT(max <= (1 << 30));
+  l = (RAND_MAX + 1U) - ((RAND_MAX + 1U) % max);
+  do
+    r = random();
+  while (r >= l);
+  return r % max;
+}
+
+u64
+random_u64(void)
+{
+  return
+    ((u64)(random() & 0xffff) << 48) |
+    ((u64)(random() & 0xffffff) << 24) |
+    (random() & 0xffffff);
+}
+
+u64
+random_max_u64(u64 max)
+{
+  if (max < (1 << 30))
+    return random_max(max);
+
+  u64 r, l, m;
+  m = 0xffffffffffffffff;
+  l = m - (m % max);
+  do
+    r = random_u64();
+  while (r >= l);
+  return r % max;
+}
diff --git a/lib/randomkey.c b/lib/randomkey.c

new file mode 100644 (file)

index 0000000..a799a1b
--- /dev/null
+++ b/lib/randomkey.c
@@ -0,0 +1,25 @@
+/*
+ *     UCW Library -- Cryptographically Safe Random Key Generator
+ *
+ *     (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+randomkey(byte *buf, uns size)
+{
+  int fd;
+
+  if ((fd = open("/dev/urandom", O_RDONLY, 0)) < 0)
+    die("Unable to open /dev/urandom: %m");
+  if (read(fd, buf, size) != (int) size)
+    die("Error reading /dev/urandom: %m");
+  close(fd);
+}
diff --git a/lib/realloc.c b/lib/realloc.c

new file mode 100644 (file)

index 0000000..6d6b0ba
--- /dev/null
+++ b/lib/realloc.c
@@ -0,0 +1,26 @@
+/*
+ *     UCW Library -- Memory Re-allocation
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+
+#ifndef DEBUG_DMALLOC
+
+void *
+xrealloc(void *old, uns size)
+{
+  /* We assume that realloc(NULL, x) works like malloc(x), which is true with the glibc. */
+  void *x = realloc(old, size);
+  if (!x)
+    die("Cannot reallocate %d bytes of memory", size);
+  return x;
+}
+
+#endif
diff --git a/lib/redblack-test.c b/lib/redblack-test.c

new file mode 100644 (file)

index 0000000..80a7a00
--- /dev/null
+++ b/lib/redblack-test.c
@@ -0,0 +1,252 @@
+/*
+ *     Test of red-black trees
+ *
+ *     (c) 2002, Robert Spalek <robert@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/fastbuf.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+struct my1_node
+{
+       int key;
+       int x;
+};
+
+static void my_dump_key(struct fastbuf *fb, struct my1_node *n)
+{
+       char tmp[20];
+       sprintf(tmp, "key=%d ", n->key);
+       bputs(fb, tmp);
+}
+
+static void my_dump_data(struct fastbuf *fb, struct my1_node *n)
+{
+       char tmp[20];
+       sprintf(tmp, "x=%d ", n->x);
+       bputs(fb, tmp);
+}
+
+#define TREE_NODE struct my1_node
+#define TREE_PREFIX(x) my_##x
+#define TREE_KEY_ATOMIC key
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_LOOKUP
+#define TREE_WANT_DELETE
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void my_check_order(struct fastbuf *fb, struct my_tree *t)
+{
+       int last_key = 0x80000000;
+       TREE_FOR_ALL(my, t, n)
+       {
+               ASSERT(n->key >= last_key);
+               last_key = n->key;
+               if (fb)
+               {
+                       char tmp[30];
+                       sprintf(tmp, "%d -> %d\n", n->key, n->x);
+                       bputs(fb, tmp);
+               }
+       }
+       TREE_END_FOR;
+       if (fb)
+               bflush(fb);
+}
+
+struct my2_node
+{
+       char key[1];
+};
+
+static void my2_dump_key(struct fastbuf *fb, struct my2_node *n)
+{
+       bputs(fb, "key=");
+       bputs(fb, n->key);
+       bputc(fb, ' ');
+}
+
+static void my2_dump_data(struct fastbuf *fb UNUSED, struct my2_node *n UNUSED)
+{
+}
+
+#define TREE_NODE struct my2_node
+#define TREE_PREFIX(x) my2_##x
+#define TREE_KEY_ENDSTRING key
+#define TREE_NOCASE
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_NEW
+#define TREE_WANT_SEARCH
+#define TREE_WANT_REMOVE
+#define TREE_WANT_FIND_NEXT
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_STATIC
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void random_string(char *txt, uns max_len)
+{
+       uns len = random() % max_len;
+       uns j;
+       for (j=0; j<len; j++)
+               txt[j] = random() % 96 + 32;
+       txt[len] = 0;
+}
+
+static char *options = CF_SHORT_OPTS "vn:a";
+
+static char *help = "\
+Usage: test1.bin <options>\n\
+Options:\n"
+CF_USAGE
+"-v\tSet verbose mode\n\
+-n num\tNumber of inserted nodes\n\
+-a\tProbe some ASSERTs\n\
+";
+
+static void NONRET
+usage(void)
+{
+       fputs(help, stderr);
+       exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+       int verbose = 0, number = 1000, asserts = 0;
+       int opt;
+       struct fastbuf *fb, *dump_fb;
+       struct my_tree t;
+       struct my2_tree t2;
+       int i;
+       cf_def_file = NULL;
+       log_init(argv[0]);
+       while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+               switch (opt)
+               {
+                       case 'v':
+                               verbose++;
+                               break;
+                       case 'n':
+                               number = atoi(optarg);
+                               break;
+                       case 'a':
+                               asserts++;
+                               break;
+                       default:
+                               usage();
+                               break;
+               }
+       if (optind < argc)
+               usage();
+       fb = bfdopen(1, 4096);
+       if (verbose > 1)
+               dump_fb = fb;
+       else
+               dump_fb = NULL;
+
+       my_init(&t);
+       for (i=0; i<number; i++)
+               my_lookup(&t, random() % 1000000)->x = i;
+       my_dump(dump_fb, &t);
+       my_check_order(dump_fb, &t);
+       if (asserts)
+       {
+               my_new(&t, 1);
+               my_new(&t, 1);
+       }
+       my_cleanup(&t);
+       if (verbose > 0)
+               bputs(fb, "Load test passed\n");
+
+       my_init(&t);
+       for (i=0; i<100; i++)
+       {
+               my_new(&t, i)->x = i;
+               my_dump(dump_fb, &t);
+       }
+       for (i=0; i<100; i++)
+       {
+               int a = i/10, b = i%10, j = a*10 + (b + a) % 10;
+               int res UNUSED = my_delete(&t, j);
+               ASSERT(res);
+               my_dump(dump_fb, &t);
+       }
+       my_cleanup(&t);
+       if (verbose > 0)
+               bputs(fb, "Sequential adding and deleting passed\n");
+
+       my_init(&t);
+       for (i=0; i<997; i++)
+       {
+               my_new(&t, i*238 % 997)->x = i;
+               my_dump(NULL, &t);
+       }
+       my_dump(dump_fb, &t);
+       i = 0;
+       TREE_FOR_ALL(my, &t, n)
+       {
+               ASSERT(n->key == i);
+               i++;
+       }
+       TREE_END_FOR;
+       ASSERT(i == 997);
+       for (i=0; i<997; i++)
+       {
+               int res UNUSED = my_delete(&t, i*111 % 997);
+               ASSERT(res);
+               my_dump(NULL, &t);
+       }
+       my_dump(dump_fb, &t);
+       my_cleanup(&t);
+       if (verbose > 0)
+               bputs(fb, "Complete tree passed\n");
+
+       my2_init(&t2);
+       for (i=0; i<number; i++)
+       {
+               char txt[30];
+               random_string(txt, 30);
+               my2_new(&t2, txt);
+       }
+       my2_dump(dump_fb, &t2);
+       TREE_FOR_ALL(my2, &t2, n)
+       {
+               my2_node *tmp;
+               int count = 0;
+               for (tmp=n; tmp; tmp = my2_find_next(tmp))
+                       count++;
+               if (dump_fb)
+               {
+                       char txt[20];
+                       bputs(dump_fb, n->key);
+                       sprintf(txt, ": %d\n", count);
+                       bputs(dump_fb, txt);
+               }
+       }
+       TREE_END_FOR;
+       while (t2.count > 0)
+       {
+               char txt[30];
+               my2_node *n;
+               random_string(txt, 30);
+               n = my2_search(&t2, txt);
+               ASSERT(n);
+               my2_remove(&t2, n);
+       }
+       my2_dump(dump_fb, &t2);
+       my2_cleanup(&t2);
+       if (verbose > 0)
+               bputs(fb, "String test passed\n");
+
+       bclose(fb);
+       return 0;
+}
diff --git a/lib/redblack.h b/lib/redblack.h

new file mode 100644 (file)

index 0000000..a89149b
--- /dev/null
+++ b/lib/redblack.h
@@ -0,0 +1,1040 @@
+/*
+ *     UCW Library -- Red-black trees
+ *
+ *     (c) 2002--2005, Robert Spalek <robert@ucw.cz>
+ *
+ *     Skeleton based on hash-tables by:
+ *
+ *     (c) 2002, Martin Mares <mj@ucw.cz>
+ *
+ */
+
+/*
+ * Data structure description:
+ *
+ * A red-black tree is a binary search tree, where records are stored
+ * in nodes (may be also leaves).  Every node has a colour.  The
+ * following restrictions hold:
+ *
+ * - a parent of a red node is black
+ * - every path from the root to a node with less than 2 children
+ *   contains the same number of black nodes
+ *
+ * A usual interpretation is, that leaves are intervals between records
+ * and contain no data.  Every leaf is black.  This is equivalent, but
+ * saves the space.
+ */
+
+/*
+ *  This is not a normal header file, it's a generator of red-black trees.
+ *  Each time you include it with parameters set in the corresponding
+ *  preprocessor macros, it generates a tree structure with the parameters
+ *  given.
+ *
+ *  You need to specify:
+ *
+ *  TREE_NODE          data type where a node dwells (usually a struct).
+ *  TREE_PREFIX(x)     macro to add a name prefix (used on all global names
+ *                     defined by the tree generator).
+ *
+ *  Then decide on type of keys:
+ *
+ *  TREE_KEY_ATOMIC=f  use node->f as a key of an atomic type (i.e.,
+ *                     a type which can be compared using '>', `==', and '<')
+ *                     & TREE_ATOMIC_TYPE (defaults to int).
+ *  | TREE_KEY_STRING=f        use node->f as a string key, allocated
+ *                     separately from the rest of the node.
+ *  | TREE_KEY_ENDSTRING=f use node->f as a string key, allocated
+ *                     automatically at the end of the node struct
+ *                     (to be declared as "char f[1]" at the end).
+ *  | TREE_KEY_COMPLEX use a multi-component key; as the name suggests,
+ *                     the passing of parameters is a bit complex then.
+ *                     The TREE_KEY_COMPLEX(x) macro should expand to
+ *                     `x k1, x k2, ... x kn' and you should also define:
+ *    & TREE_KEY_DECL  declaration of function parameters in which key
+ *                     should be passed to all tree operations.
+ *                     That is, `type1 k1, type2 k2, ... typen kn'.
+ *                     With complex keys, TREE_GIVE_CMP is mandatory.
+ *
+ *  Then specify what operations you request (all names are automatically
+ *  prefixed by calling TREE_PREFIX):
+ *
+ *  <always defined>   init() -- initialize the tree.
+ *  TREE_WANT_CLEANUP  cleanup() -- deallocate the tree.
+ *  TREE_WANT_FIND     node *find(key) -- find first node with the specified
+ *                     key, return NULL if no such node exists.
+ *  TREE_WANT_FIND_NEXT        node *find_next(node *start) -- find next node with the
+ *                     specified key, return NULL if no such node exists.
+ *                     Implies TREE_DUPLICATES.
+ *  TREE_WANT_SEARCH   node *search(key) -- find the node with the specified
+ *                     or, if it does not exist, the nearest one.
+ *  TREE_WANT_SEARCH_DOWN node *search_down(key) -- find either the node with
+ *                      specified value, or if it does not exist, the node
+ *                      with nearest smaller value.
+ *  TREE_WANT_BOUNDARY node *boundary(uns direction) -- finds smallest
+ *                     (direction==0) or largest (direction==1) node.
+ *  TREE_WANT_ADJACENT node *adjacent(node *, uns direction) -- finds next
+ *                     (direction==1) or previous (direction==0) node.
+ *  TREE_WANT_NEW      node *new(key) -- create new node with given key.
+ *                     If it already exists, it is created as the last one.
+ *  TREE_WANT_LOOKUP   node *lookup(key) -- find node with given key,
+ *                     if it doesn't exist, create it. Defining
+ *                     TREE_GIVE_INIT_DATA is strongly recommended.
+ *  TREE_WANT_DELETE   int delete(key) -- delete and deallocate node
+ *                     with a given key. Returns success.
+ *  TREE_WANT_REMOVE   remove(node *) -- delete and deallocate given node.
+ *
+ *  TREE_WANT_DUMP     dump() -- dumps the whole tree to stdout
+ *
+ *  You can also supply several functions:
+ *
+ *  TREE_GIVE_CMP      int cmp(key1, key2) -- return -1, 0, and 1 according to
+ *                     the relation of keys.  By default, we use <, ==, > for
+ *                     atomic types and either strcmp or strcasecmp for
+ *                     strings.
+ *  TREE_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ *                     node should be allocated for dynamic data. Default=0
+ *                     or length of the string with TREE_KEY_ENDSTRING.
+ *  TREE_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ *                     created node. Defaults: assignment for atomic keys
+ *                     and static strings, strcpy for end-allocated strings.
+ *  TREE_GIVE_INIT_DATA        void init_data(node *) -- initialize data fields in a
+ *                     newly created node. Very useful for lookup operations.
+ *  TREE_GIVE_ALLOC    void *alloc(unsigned int size) -- allocate space for
+ *                     a node. Default is either normal or pooled allocation
+ *                     depending on whether we want deletions.
+ *                     void free(void *) -- the converse.
+ *
+ *  ... and a couple of extra parameters:
+ *
+ *  TREE_NOCASE                string comparisons should be case-insensitive.
+ *  TREE_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ *  TREE_USE_POOL=pool Allocate all nodes from given mempool.
+ *                     Collides with delete/remove functions.
+ *  TREE_GLOBAL                Functions are exported (i.e., not static).
+ *  TREE_CONSERVE_SPACE        Use as little space as possible at the price of a
+ *                     little slowdown.
+ *  TREE_DUPLICATES    Records with duplicate keys are allowed.
+ *  TREE_MAX_DEPTH     Maximal depth of a tree (for stack allocation).
+ *
+ *  If you set TREE_WANT_ITERATOR, you also get a iterator macro at no
+ *  extra charge:
+ *
+ *  TREE_FOR_ALL(tree_prefix, tree_pointer, variable)
+ *    {
+ *      // node *variable gets declared automatically
+ *      do_something_with_node(variable);
+ *      // use TREE_BREAK and TREE_CONTINUE instead of break and continue
+ *     // you must not alter contents of the tree here
+ *    }
+ *  TREE_END_FOR;
+ *
+ *  Then include "lib/redblack.h" and voila, you have a tree suiting all your
+ *  needs (at least those which you've revealed :) ).
+ *
+ *  After including this file, all parameter macros are automatically
+ *  undef'd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(TREE_NODE) || !defined(TREE_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#define P(x) TREE_PREFIX(x)
+
+/* Declare buckets and the tree.  */
+
+typedef TREE_NODE P(node);
+
+#if defined(TREE_WANT_FIND_NEXT) || defined(TREE_WANT_ADJACENT) || defined(TREE_WANT_ITERATOR) || defined(TREE_WANT_REMOVE)
+#      define TREE_STORE_PARENT
+#endif
+
+typedef struct P(bucket) {
+       struct P(bucket) *son[2];
+#ifdef TREE_STORE_PARENT
+       struct P(bucket) *parent;
+#endif
+#if !defined(TREE_CONSERVE_SPACE) && (defined(TREE_GIVE_EXTRA_SIZE) || defined(TREE_KEY_ENDSTRING))
+       uns red_flag:1;
+#endif
+       P(node) n;
+#if !defined(TREE_CONSERVE_SPACE) && !defined(TREE_GIVE_EXTRA_SIZE) && !defined(TREE_KEY_ENDSTRING)
+       uns red_flag:1;
+#endif
+} P(bucket);
+
+struct P(tree) {
+       uns count;
+       uns height;                     /* of black nodes */
+       P(bucket) *root;
+};
+
+typedef struct P(stack_entry) {
+       P(bucket) *buck;
+       uns son;
+} P(stack_entry);
+
+#define T struct P(tree)
+
+/* Preset parameters */
+
+#if defined(TREE_KEY_ATOMIC)
+
+#define TREE_KEY(x) x TREE_KEY_ATOMIC
+
+#ifndef TREE_ATOMIC_TYPE
+#      define TREE_ATOMIC_TYPE int
+#endif
+#define TREE_KEY_DECL TREE_ATOMIC_TYPE TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+#      define TREE_GIVE_CMP
+       static inline int P(cmp) (TREE_ATOMIC_TYPE x, TREE_ATOMIC_TYPE y)
+       {
+               if (x < y)
+                       return -1;
+               else if (x > y)
+                       return 1;
+               else
+                       return 0;
+       }
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+#      define TREE_GIVE_INIT_KEY
+       static inline void P(init_key) (P(node) *n, TREE_ATOMIC_TYPE k)
+       { TREE_KEY(n->) = k; }
+#endif
+
+#elif defined(TREE_KEY_STRING) || defined(TREE_KEY_ENDSTRING)
+
+#ifdef TREE_KEY_STRING
+#      define TREE_KEY(x) x TREE_KEY_STRING
+#      ifndef TREE_GIVE_INIT_KEY
+#              define TREE_GIVE_INIT_KEY
+               static inline void P(init_key) (P(node) *n, char *k)
+               { TREE_KEY(n->) = k; }
+#      endif
+#else
+#      define TREE_KEY(x) x TREE_KEY_ENDSTRING
+#      define TREE_GIVE_EXTRA_SIZE
+       static inline int P(extra_size) (char *k)
+       { return strlen(k); }
+#      ifndef TREE_GIVE_INIT_KEY
+#              define TREE_GIVE_INIT_KEY
+               static inline void P(init_key) (P(node) *n, char *k)
+               { strcpy(TREE_KEY(n->), k); }
+#      endif
+#endif
+#define TREE_KEY_DECL char *TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+#      define TREE_GIVE_CMP
+       static inline int P(cmp) (char *x, char *y)
+       {
+#              ifdef TREE_NOCASE
+                       return strcasecmp(x,y);
+#              else
+                       return strcmp(x,y);
+#              endif
+       }
+#endif
+
+#elif defined(TREE_KEY_COMPLEX)
+
+#define TREE_KEY(x) TREE_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the tree key type.
+#endif
+
+#ifndef TREE_CONSERVE_SPACE
+       static inline uns P(red_flag) (P(bucket) *node)
+       { return node->red_flag; }
+       static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+       { node->red_flag = flag; }
+       static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+       { return node->son[id]; }
+       static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+       { node->son[id] = son; }
+#else
+       /* Pointers are aligned, hence we can use lower bits.  */
+       static inline uns P(red_flag) (P(bucket) *node)
+       { return ((uintptr_t) node->son[0]) & 1L; }
+       static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+       { node->son[0] = (void*) ( (((uintptr_t) node->son[0]) & ~1L) | (flag & 1L) ); }
+       static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+       { return (void *) (((uintptr_t) node->son[id]) & ~1L); }
+       static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+       { node->son[id] = (void *) ((uintptr_t) son | (((uintptr_t) node->son[id]) & 1L) ); }
+#endif
+
+/* Defaults for missing parameters.  */
+
+#ifndef TREE_GIVE_CMP
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef TREE_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+#      define TREE_EXTRA_SIZE P(extra_size)
+#else
+/*
+ *  Beware, C macros are expanded iteratively, not recursively,
+ *  hence we get only a _single_ argument, although the expansion
+ *  of TREE_KEY contains commas.
+ */
+#      define TREE_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+#      error Unable to determine how to initialize keys.
+#endif
+
+#ifndef TREE_GIVE_INIT_DATA
+static inline void P(init_data) (P(node) *n UNUSED)
+{
+}
+#endif
+
+#include <stdlib.h>
+
+#ifndef TREE_GIVE_ALLOC
+#      ifdef TREE_USE_POOL
+               static inline void * P(alloc) (unsigned int size)
+               { return mp_alloc_fast(TREE_USE_POOL, size); }
+#              define TREE_SAFE_FREE(x)
+#      else
+               static inline void * P(alloc) (unsigned int size)
+               { return xmalloc(size); }
+
+               static inline void P(free) (void *x)
+               { xfree(x); }
+#      endif
+#endif
+
+#ifndef        TREE_SAFE_FREE
+#      define TREE_SAFE_FREE(x)        P(free) (x)
+#endif
+
+#ifdef TREE_GLOBAL
+#      define STATIC
+#else
+#      define STATIC static
+#endif
+
+#ifndef TREE_MAX_DEPTH
+#      define TREE_MAX_DEPTH 64
+#endif
+
+#if defined(TREE_WANT_FIND_NEXT) && !defined(TREE_DUPLICATES)
+#      define TREE_DUPLICATES
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+#ifndef TREE_WANT_FIND
+#      define TREE_WANT_FIND
+#endif
+#ifndef TREE_WANT_NEW
+#      define TREE_WANT_NEW
+#endif
+#endif
+
+/* Now the operations */
+
+STATIC void P(init) (T *t)
+{
+       t->count = t->height = 0;
+       t->root = NULL;
+}
+
+#ifdef TREE_WANT_CLEANUP
+static void P(cleanup_subtree) (T *t, P(bucket) *node)
+{
+       if (!node)
+               return;
+       P(cleanup_subtree) (t, P(tree_son) (node, 0));
+       P(cleanup_subtree) (t, P(tree_son) (node, 1));
+       P(free) (node);
+       t->count--;
+}
+
+STATIC void P(cleanup) (T *t)
+{
+       P(cleanup_subtree) (t, t->root);
+       ASSERT(!t->count);
+       t->height = 0;
+}
+#endif
+
+static uns P(fill_stack) (P(stack_entry) *stack, uns max_depth, P(bucket) *node, TREE_KEY_DECL, uns son_id UNUSED)
+{
+       uns i;
+       stack[0].buck = node;
+       for (i=0; stack[i].buck; i++)
+       {
+               int cmp;
+               cmp = P(cmp) (TREE_KEY(), TREE_KEY(stack[i].buck->n.));
+               if (cmp == 0)
+                       break;
+               else if (cmp < 0)
+                       stack[i].son = 0;
+               else
+                       stack[i].son = 1;
+               ASSERT(i+1 < max_depth);
+               stack[i+1].buck = P(tree_son) (stack[i].buck, stack[i].son);
+       }
+#ifdef TREE_DUPLICATES
+       if (stack[i].buck)
+       {
+               uns idx;
+               /* Find first/last of equal keys according to son_id.  */
+               idx = P(fill_stack) (stack+i+1, max_depth-i-1,
+                       P(tree_son) (stack[i].buck, son_id), TREE_KEY(), son_id);
+               if (stack[i+1+idx].buck)
+               {
+                       stack[i].son = son_id;
+                       i = i+1+idx;
+               }
+       }
+#endif
+       stack[i].son = 10;
+       return i;
+}
+
+#ifdef TREE_WANT_FIND
+STATIC P(node) * P(find) (T *t, TREE_KEY_DECL)
+{
+       P(stack_entry) stack[TREE_MAX_DEPTH];
+       uns depth;
+       depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+       return stack[depth].buck ? &stack[depth].buck->n : NULL;
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH_DOWN
+STATIC P(node) * P(search_down) (T *t, TREE_KEY_DECL)
+{
+       P(node) *last_right=NULL;
+       P(bucket) *node=t->root;
+       while(node)
+       {
+               int cmp;
+               cmp = P(cmp) (TREE_KEY(), TREE_KEY(node->n.));
+               if (cmp == 0)
+                       return &node->n;
+               else if (cmp < 0)
+                       node=P(tree_son) (node, 0);
+               else
+               {
+                       last_right=&node->n;
+                       node=P(tree_son) (node, 1);
+               }
+       }
+       return last_right;
+}
+#endif
+
+#ifdef TREE_WANT_BOUNDARY
+STATIC P(node) * P(boundary) (T *t, uns direction)
+{
+       P(bucket) *n = t->root, *ns;
+       if (!n)
+               return NULL;
+       else
+       {
+               uns son = !!direction;
+               while ((ns = P(tree_son) (n, son)))
+                       n = ns;
+               return &n->n;
+       }
+}
+#endif
+
+#ifdef TREE_STORE_PARENT
+STATIC P(node) * P(adjacent) (P(node) *start, uns direction)
+{
+       P(bucket) *node = SKIP_BACK(P(bucket), n, start);
+       P(bucket) *next = P(tree_son) (node, direction);
+       if (next)
+       {
+               while (1)
+               {
+                       node = P(tree_son) (next, 1 - direction);
+                       if (!node)
+                               break;
+                       next = node;
+               }
+       }
+       else
+       {
+               next = node->parent;
+               while (next && node == P(tree_son) (next, direction))
+               {
+                       node = next;
+                       next = node->parent;
+               }
+               if (!next)
+                       return NULL;
+               ASSERT(node == P(tree_son) (next, 1 - direction));
+       }
+       return &next->n;
+}
+#endif
+
+#if defined(TREE_DUPLICATES) || defined(TREE_WANT_DELETE) || defined(TREE_WANT_REMOVE)
+static int P(find_next_node) (P(stack_entry) *stack, uns max_depth, uns direction)
+{
+       uns depth = 0;
+       if (stack[0].buck)
+       {
+               ASSERT(depth+1 < max_depth);
+               stack[depth].son = direction;
+               stack[depth+1].buck = P(tree_son) (stack[depth].buck, direction);
+               depth++;
+               while (stack[depth].buck)
+               {
+                       ASSERT(depth+1 < max_depth);
+                       stack[depth].son = 1 - direction;
+                       stack[depth+1].buck = P(tree_son) (stack[depth].buck, 1 - direction);
+                       depth++;
+               }
+       }
+       return depth;
+}
+#endif
+
+#ifdef TREE_WANT_FIND_NEXT
+STATIC P(node) * P(find_next) (P(node) *start)
+{
+       P(node) *next = P(adjacent) (start, 1);
+       if (next && P(cmp) (TREE_KEY(start->), TREE_KEY(next->)) == 0)
+               return next;
+       else
+               return NULL;
+
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH
+STATIC P(node) * P(search) (T *t, TREE_KEY_DECL)
+{
+       P(stack_entry) stack[TREE_MAX_DEPTH];
+       uns depth;
+       depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+       if (!stack[depth].buck)
+       {
+               if (depth > 0)
+                       depth--;
+               else
+                       return NULL;
+       }
+       return &stack[depth].buck->n;
+}
+#endif
+
+#if 0
+#define TREE_TRACE(txt...) do { printf(txt); fflush(stdout); } while (0)
+#else
+#define TREE_TRACE(txt...)
+#endif
+
+static inline P(bucket) * P(rotation) (P(bucket) *node, uns son_id)
+{
+       /* Destroys red_flag's in node, son.  Returns new root.  */
+       P(bucket) *son = P(tree_son) (node, son_id);
+       TREE_TRACE("Rotation (node %d, son %d), direction %d\n", node->n.key, son->n.key, son_id);
+       node->son[son_id] = P(tree_son) (son, 1-son_id);
+       son->son[1-son_id] = node;
+#ifdef TREE_STORE_PARENT
+       if (node->son[son_id])
+               node->son[son_id]->parent = node;
+       son->parent = node->parent;
+       node->parent = son;
+#endif
+       return son;
+}
+
+static void P(rotate_after_insert) (T *t, P(stack_entry) *stack, uns depth)
+{
+       P(bucket) *node;
+       P(bucket) *parent, *grand, *uncle;
+       int s1, s2;
+try_it_again:
+       node = stack[depth].buck;
+       ASSERT(P(red_flag) (node));
+       /* At this moment, node became red.  The paths sum have
+        * been preserved, but we have to check the parental
+        * condition.  */
+       if (depth == 0)
+       {
+               ASSERT(t->root == node);
+               return;
+       }
+       parent = stack[depth-1].buck;
+       if (!P(red_flag) (parent))
+               return;
+       if (depth == 1)
+       {
+               ASSERT(t->root == parent);
+               P(set_red_flag) (parent, 0);
+               t->height++;
+               return;
+       }
+       grand = stack[depth-2].buck;
+       ASSERT(!P(red_flag) (grand));
+       /* The parent is also red, the grandparent exists and it
+        * is black.  */
+       s1 = stack[depth-1].son;
+       s2 = stack[depth-2].son;
+       uncle = P(tree_son) (grand, 1-s2);
+       if (uncle && P(red_flag) (uncle))
+       {
+               /* Red parent and uncle, black grandparent.
+                * Exchange and try another iteration. */
+               P(set_red_flag) (parent, 0);
+               P(set_red_flag) (uncle, 0);
+               P(set_red_flag) (grand, 1);
+               depth -= 2;
+               TREE_TRACE("Swapping colours (parent %d, uncle %d, grand %d), passing thru\n", parent->n.key, uncle->n.key, grand->n.key);
+               goto try_it_again;
+       }
+       /* Black uncle and grandparent, we need to rotate.  Test
+        * the direction.  */
+       if (s1 == s2)
+       {
+               node = P(rotation) (grand, s2);
+               P(set_red_flag) (parent, 0);
+               P(set_red_flag) (grand, 1);
+       }
+       else
+       {
+               grand->son[s2] = P(rotation) (parent, s1);
+               node = P(rotation) (grand, s2);
+               P(set_red_flag) (grand, 1);
+               P(set_red_flag) (parent, 1);
+               P(set_red_flag) (node, 0);
+       }
+       if (depth >= 3)
+               P(set_tree_son) (stack[depth-3].buck, stack[depth-3].son, node);
+       else
+               t->root = node;
+}
+
+#ifdef TREE_WANT_NEW
+STATIC P(node) * P(new) (T *t, TREE_KEY_DECL)
+{
+       P(stack_entry) stack[TREE_MAX_DEPTH];
+       P(bucket) *added;
+       uns depth;
+       depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+#ifdef TREE_DUPLICATES
+       /* It is the last found value, hence everything in the right subtree is
+        * strongly _bigger_.  */
+       depth += P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+#endif
+       ASSERT(!stack[depth].buck);
+       /* We are in a leaf, hence we can easily append a new leaf to it.  */
+       added = P(alloc) (sizeof(struct P(bucket)) + TREE_EXTRA_SIZE(TREE_KEY()) );
+       added->son[0] = added->son[1] = NULL;
+       stack[depth].buck = added;
+       if (depth > 0)
+       {
+#ifdef TREE_STORE_PARENT
+               added->parent = stack[depth-1].buck;
+#endif
+               P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, added);
+       }
+       else
+       {
+#ifdef TREE_STORE_PARENT
+               added->parent = NULL;
+#endif
+               t->root = added;
+       }
+       P(set_red_flag) (added, 1);     /* Set it red to not disturb the path sum.  */
+       P(init_key) (&added->n, TREE_KEY());
+       P(init_data) (&added->n);
+       t->count++;
+       /* Let us reorganize the red_flag's and the structure of the tree.  */
+       P(rotate_after_insert) (t, stack, depth);
+       return &added->n;
+}
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+STATIC P(node) * P(lookup) (T *t, TREE_KEY_DECL)
+{
+       P(node) *node;
+       node = P(find) (t, TREE_KEY());
+       if (node)
+               return node;
+       return P(new) (t, TREE_KEY());
+}
+#endif
+
+#if defined(TREE_WANT_REMOVE) || defined(TREE_WANT_DELETE)
+static void P(rotate_after_delete) (T *t, P(stack_entry) *stack, int depth)
+{
+       uns iteration = 0;
+       P(bucket) *parent, *sibling, *instead;
+       uns parent_red, del_son, sibl_red;
+missing_black:
+       if (depth < 0)
+       {
+               t->height--;
+               return;
+       }
+       parent = stack[depth].buck;
+       parent_red = P(red_flag) (parent);
+       del_son = stack[depth].son;
+       /* For the 1st iteration: we have deleted parent->son[del_son], which
+        * was a black node with no son.  Hence there is one mising black
+        * vertex in that path, which we are going to fix now.
+        *
+        * For other iterations: in that path, there is also missing a black
+        * node.  */
+       if (!iteration)
+               ASSERT(!P(tree_son) (parent, del_son));
+       sibling = P(tree_son) (parent, 1-del_son);
+       ASSERT(sibling);
+       sibl_red = P(red_flag) (sibling);
+       instead = NULL;
+       if (!sibl_red)
+       {
+               P(bucket) *son[2];
+               uns red[2];
+               son[0] = P(tree_son) (sibling, 0);
+               son[1] = P(tree_son) (sibling, 1);
+               red[0] = son[0] ? P(red_flag) (son[0]) : 0;
+               red[1] = son[1] ? P(red_flag) (son[1]) : 0;
+               if (!red[0] && !red[1])
+               {
+                       P(set_red_flag) (sibling, 1);
+                       P(set_red_flag) (parent, 0);
+                       if (parent_red)
+                               return;
+                       else
+                       {
+                               depth--;
+                               iteration++;
+                               TREE_TRACE("Swapping colours (parent %d, sibling %d), passing thru\n", parent->n.key, sibling->n.key);
+                               goto missing_black;
+                       }
+               } else if (!red[del_son])
+               {
+                       instead = P(rotation) (parent, 1-del_son);
+                       P(set_red_flag) (instead, parent_red);
+                       P(set_red_flag) (parent, 0);
+                       P(set_red_flag) (son[1-del_son], 0);
+               } else /* red[del_son] */
+               {
+                       parent->son[1-del_son] = P(rotation) (sibling, del_son);
+                       instead = P(rotation) (parent, 1-del_son);
+                       P(set_red_flag) (instead, parent_red);
+                       P(set_red_flag) (parent, 0);
+                       P(set_red_flag) (sibling, 0);
+               }
+       } else /* sibl_red */
+       {
+               P(bucket) *grand[2], *son;
+               uns red[2];
+               ASSERT(!parent_red);
+               son = P(tree_son) (sibling, del_son);
+               ASSERT(son && !P(red_flag) (son));
+               grand[0] = P(tree_son) (son, 0);
+               grand[1] = P(tree_son) (son, 1);
+               red[0] = grand[0] ? P(red_flag) (grand[0]) : 0;
+               red[1] = grand[1] ? P(red_flag) (grand[1]) : 0;
+               if (!red[0] && !red[1])
+               {
+                       instead = P(rotation) (parent, 1-del_son);
+                       P(set_red_flag) (instead, 0);
+                       P(set_red_flag) (parent, 0);
+                       P(set_red_flag) (son, 1);
+               }
+               else if (!red[del_son])
+               {
+                       parent->son[1-del_son] = P(rotation) (sibling, del_son);
+                       instead = P(rotation) (parent, 1-del_son);
+                       P(set_red_flag) (instead, 0);
+                       P(set_red_flag) (parent, 0);
+                       P(set_red_flag) (sibling, 1);
+                       P(set_red_flag) (grand[1-del_son], 0);
+               } else /* red[del_son] */
+               {
+                       sibling->son[del_son] = P(rotation) (son, del_son);
+                       parent->son[1-del_son] = P(rotation) (sibling, del_son);
+                       instead = P(rotation) (parent, 1-del_son);
+                       P(set_red_flag) (instead, 0);
+                       P(set_red_flag) (parent, 0);
+                       P(set_red_flag) (sibling, 1);
+                       P(set_red_flag) (son, 0);
+               }
+       }
+       /* We have performed all desired rotations and need to store the new
+        * pointer to the subtree.  */
+       ASSERT(instead);
+       if (depth > 0)
+               P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, instead);
+       else
+               t->root = instead;
+}
+
+static void P(remove_by_stack) (T *t, P(stack_entry) *stack, uns depth)
+{
+       P(bucket) *node = stack[depth].buck;
+       P(bucket) *son;
+       uns i;
+       for (i=0; i<depth; i++)
+               ASSERT(P(tree_son) (stack[i].buck, stack[i].son) == stack[i+1].buck);
+       if (P(tree_son) (node, 0) && P(tree_son) (node, 1))
+       {
+               P(bucket) *xchg;
+               uns flag_node, flag_xchg;
+               uns d = P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+
+               ASSERT(d >= 2);
+               d--;
+               xchg = stack[depth+d].buck;
+               flag_node = P(red_flag) (node);
+               flag_xchg = P(red_flag) (xchg);
+               ASSERT(!P(tree_son) (xchg, 0));
+               son = P(tree_son) (xchg, 1);
+               stack[depth].buck = xchg;       /* Magic iff d == 1.  */
+               stack[depth+d].buck = node;
+               xchg->son[0] = P(tree_son) (node, 0);
+               xchg->son[1] = P(tree_son) (node, 1);
+               if (depth > 0)
+                       P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, xchg);
+               else
+                       t->root = xchg;
+               node->son[0] = NULL;
+               node->son[1] = son;
+               P(set_tree_son) (stack[depth+d-1].buck, stack[depth+d-1].son, node);
+#ifdef TREE_STORE_PARENT
+               xchg->parent = depth > 0 ? stack[depth-1].buck : NULL;
+               xchg->son[0]->parent = xchg;
+               xchg->son[1]->parent = xchg;
+               node->parent = stack[depth+d-1].buck;
+               if (son)
+                       son->parent = node;
+#endif
+               P(set_red_flag) (xchg, flag_node);
+               P(set_red_flag) (node, flag_xchg);
+               depth += d;
+       }
+       else if (P(tree_son) (node, 0))
+               son = P(tree_son) (node, 0);
+       else
+               son = P(tree_son) (node, 1);
+       /* At this moment, stack[depth].buck == node and it has at most one son
+        * and it is stored in the variable son.  */
+       t->count--;
+       if (depth > 0)
+       {
+               P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, son);
+#ifdef TREE_STORE_PARENT
+               if (son)
+                       son->parent = stack[depth-1].buck;
+#endif
+       }
+       else
+       {
+               t->root = son;
+#ifdef TREE_STORE_PARENT
+               if (son)
+                       son->parent = NULL;
+#endif
+       }
+       if (P(red_flag) (node))
+       {
+               ASSERT(!son);
+               return;
+       }
+       TREE_SAFE_FREE(node);
+       /* We have deleted a black node.  */
+       if (son)
+       {
+               ASSERT(P(red_flag) (son));
+               P(set_red_flag) (son, 0);
+               return;
+       }
+       P(rotate_after_delete) (t, stack, (int) depth - 1);
+}
+#endif
+
+#ifdef TREE_WANT_REMOVE
+STATIC void P(remove) (T *t, P(node) *Node)
+{
+       P(stack_entry) stack[TREE_MAX_DEPTH];
+       P(bucket) *node = SKIP_BACK(P(bucket), n, Node);
+       uns depth = 0, i;
+       stack[0].buck = node;
+       stack[0].son = 10;
+       while (node->parent)
+       {
+               depth++;
+               ASSERT(depth < TREE_MAX_DEPTH);
+               stack[depth].buck = node->parent;
+               stack[depth].son = P(tree_son) (node->parent, 0) == node ? 0 : 1;
+               node = node->parent;
+       }
+       for (i=0; i<(depth+1)/2; i++)
+       {
+               P(stack_entry) tmp = stack[i];
+               stack[i] = stack[depth-i];
+               stack[depth-i] = tmp;
+       }
+       P(remove_by_stack) (t, stack, depth);
+}
+#endif
+
+#ifdef TREE_WANT_DELETE
+STATIC int P(delete) (T *t, TREE_KEY_DECL)
+{
+       P(stack_entry) stack[TREE_MAX_DEPTH];
+       uns depth;
+       depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+       if (stack[depth].buck)
+       {
+               P(remove_by_stack) (t, stack, depth);
+               return 1;
+       }
+       else
+               return 0;
+}
+#endif
+
+#ifdef TREE_WANT_DUMP
+static void P(dump_subtree) (struct fastbuf *fb, T *t, P(bucket) *node, P(bucket) *parent, int cmp_res, int level, uns black)
+{
+       uns flag;
+       int i;
+       if (!node)
+       {
+               ASSERT(black == t->height);
+               return;
+       }
+       flag = P(red_flag) (node);
+#ifdef TREE_STORE_PARENT
+       ASSERT(node->parent == parent);
+#endif
+       if (parent)
+       {
+               ASSERT(!flag || !P(red_flag) (parent));
+               cmp_res *= P(cmp) (TREE_KEY(node->n.), TREE_KEY(parent->n.));
+#ifdef TREE_DUPLICATES
+               ASSERT(cmp_res >= 0);
+#else
+               ASSERT(cmp_res > 0);
+#endif
+       }
+       P(dump_subtree) (fb, t, P(tree_son) (node, 0), node, -1, level+1, black + (1-flag));
+       if (fb)
+       {
+               char tmp[20];
+               for (i=0; i<level; i++)
+                       bputs(fb, "  ");
+               sprintf(tmp, "L%d %c\t", level, flag ? 'R' : 'B');
+               bputs(fb, tmp);
+               P(dump_key) (fb, &node->n);
+               P(dump_data) (fb, &node->n);
+               bputs(fb, "\n");
+       }
+       P(dump_subtree) (fb, t, P(tree_son) (node, 1), node, +1, level+1, black + (1-flag));
+}
+
+STATIC void P(dump) (struct fastbuf *fb, T *t)
+{
+       if (fb)
+       {
+               char tmp[50];
+               sprintf(tmp, "Tree of %d nodes and height %d\n", t->count, t->height);
+               bputs(fb, tmp);
+       }
+       P(dump_subtree) (fb, t, t->root, NULL, 0, 0, 0);
+       if (fb)
+       {
+               bputs(fb, "\n");
+               bflush(fb);
+       }
+}
+#endif
+
+/* And the iterator */
+
+#ifdef TREE_WANT_ITERATOR
+static P(node) * P(first_node) (T *t, uns direction)
+{
+       P(bucket) *node = t->root, *prev = NULL;
+       while (node)
+       {
+               prev = node;
+               node = P(tree_son) (node, direction);
+       }
+       return prev ? &prev->n : NULL;
+}
+
+#ifndef TREE_FOR_ALL
+
+#define TREE_FOR_ALL(t_px, t_ptr, t_var)                                               \
+do                                                                                     \
+{                                                                                      \
+       GLUE_(t_px,node) *t_var = GLUE_(t_px,first_node)(t_ptr, 0);                     \
+       for (; t_var; t_var = GLUE_(t_px,adjacent)(t_var, 1))                           \
+       {
+#define TREE_END_FOR } } while(0)
+#define TREE_BREAK break
+#define TREE_CONTINUE continue
+
+#endif
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+
+#undef TREE_NODE
+#undef TREE_PREFIX
+#undef TREE_KEY_ATOMIC
+#undef TREE_KEY_STRING
+#undef TREE_KEY_ENDSTRING
+#undef TREE_KEY_COMPLEX
+#undef TREE_KEY_DECL
+#undef TREE_WANT_CLEANUP
+#undef TREE_WANT_FIND
+#undef TREE_WANT_FIND_NEXT
+#undef TREE_WANT_SEARCH
+#undef TREE_WANT_SEARCH_DOWN
+#undef TREE_WANT_BOUNDARY
+#undef TREE_WANT_ADJACENT
+#undef TREE_WANT_NEW
+#undef TREE_WANT_LOOKUP
+#undef TREE_WANT_DELETE
+#undef TREE_WANT_REMOVE
+#undef TREE_WANT_DUMP
+#undef TREE_WANT_ITERATOR
+#undef TREE_GIVE_CMP
+#undef TREE_GIVE_EXTRA_SIZE
+#undef TREE_GIVE_INIT_KEY
+#undef TREE_GIVE_INIT_DATA
+#undef TREE_GIVE_ALLOC
+#undef TREE_NOCASE
+#undef TREE_ATOMIC_TYPE
+#undef TREE_USE_POOL
+#undef TREE_STATIC
+#undef TREE_CONSERVE_SPACE
+#undef TREE_DUPLICATES
+#undef TREE_MAX_DEPTH
+#undef TREE_STORE_PARENT
+#undef TREE_KEY
+#undef TREE_EXTRA_SIZE
+#undef TREE_SAFE_FREE
+#undef TREE_TRACE
+#undef STATIC
diff --git a/lib/regex.c b/lib/regex.c

new file mode 100644 (file)

index 0000000..270fb59
--- /dev/null
+++ b/lib/regex.c
@@ -0,0 +1,358 @@
+/*
+ *     UCW Library -- Interface to Regular Expression Libraries
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+#include "lib/hashfunc.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX)
+
+/* POSIX regular expression library */
+
+#ifdef CONFIG_OWN_REGEX
+#include "lib/regex/regex-sh.h"
+#else
+#include <regex.h>
+#endif
+
+struct regex {
+  regex_t rx;
+  regmatch_t matches[10];
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+  regex *r = xmalloc_zero(sizeof(regex));
+
+  int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
+  if (err)
+    {
+      char msg[256];
+      regerror(err, &r->rx, msg, sizeof(msg)-1);
+      /* regfree(&r->rx) not needed */
+      die("Error parsing regular expression `%s': %s", p, msg);
+    }
+  return r;
+}
+
+void
+rx_free(regex *r)
+{
+  regfree(&r->rx);
+  xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+  int err = regexec(&r->rx, s, 10, r->matches, 0);
+  if (!err)
+    {
+      /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */
+      return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]);
+    }
+  else if (err == REG_NOMATCH)
+    return 0;
+  else if (err == REG_ESPACE)
+    die("Regex matching ran out of memory");
+  else
+    die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+  char *end = dest + destlen - 1;
+
+  if (!rx_match(r, src))
+    return 0;
+
+  while (*by)
+    {
+      if (*by == '\\')
+       {
+         by++;
+         if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+           {
+             uns j = *by++ - '0';
+             if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
+               {
+                 const char *s = src + r->matches[j].rm_so;
+                 uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
+                 if (dest + i >= end)
+                   return -1;
+                 memcpy(dest, s, i);
+                 dest += i;
+                 continue;
+               }
+           }
+       }
+      if (dest < end)
+       *dest++ = *by++;
+      else
+       return -1;
+    }
+  *dest = 0;
+  return 1;
+}
+
+#elif defined(CONFIG_PCRE)
+
+/* PCRE library */
+
+#include <pcre.h>
+
+struct regex {
+  pcre *rx;
+  pcre_extra *extra;
+  uns match_array_size;
+  uns real_matches;
+  int matches[0];                      /* (max_matches+1) pairs (pos,len) plus some workspace */
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+  const char *err;
+  int errpos, match_array_size, eno;
+
+  pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL);
+  if (!rx)
+    die("Error parsing regular expression `%s': %s at position %d", p, err, errpos);
+  eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size);
+  if (eno)
+    die("Internal error: pcre_fullinfo() failed with error %d", eno);
+  match_array_size = 3*(match_array_size+1);
+  regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int));
+  r->rx = rx;
+  r->match_array_size = match_array_size;
+  r->extra = pcre_study(r->rx, 0, &err);
+  if (err)
+    die("Error studying regular expression `%s': %s", p, err);
+  return r;
+}
+
+void
+rx_free(regex *r)
+{
+  xfree(r->rx);
+  xfree(r->extra);
+  xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+  int len = str_len(s);
+  int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
+  if (err >= 0)
+    {
+      r->real_matches = err;
+      /* need to check that the full string matches */
+      return !(r->matches[0] || s[r->matches[1]]);
+    }
+  else if (err == PCRE_ERROR_NOMATCH)
+    return 0;
+  else if (err == PCRE_ERROR_NOMEMORY)
+    die("Regex matching ran out of memory");
+  else
+    die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+  char *end = dest + destlen - 1;
+
+  if (!rx_match(r, src))
+    return 0;
+
+  while (*by)
+    {
+      if (*by == '\\')
+       {
+         by++;
+         if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+           {
+             uns j = *by++ - '0';
+             if (j < r->real_matches && r->matches[2*j] >= 0)
+               {
+                 char *s = src + r->matches[2*j];
+                 uns i = r->matches[2*j+1] - r->matches[2*j];
+                 if (dest + i >= end)
+                   return -1;
+                 memcpy(dest, s, i);
+                 dest += i;
+                 continue;
+               }
+           }
+       }
+      if (dest < end)
+       *dest++ = *by++;
+      else
+       return -1;
+    }
+  *dest = 0;
+  return 1;
+}
+
+#else
+
+/* BSD regular expression library */
+
+#ifdef CONFIG_OWN_BSD_REGEX
+#include "lib/regex/regex-sh.h"
+#else
+#include <regex.h>
+#endif
+
+#define INITIAL_MEM 1024               /* Initial space allocated for each pattern */
+#define CHAR_SET_SIZE 256              /* How many characters in the character set.  */
+
+struct regex {
+  struct re_pattern_buffer buf;
+  struct re_registers regs;            /* Must not change between re_match() calls */
+  int len_cache;
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+  regex *r = xmalloc_zero(sizeof(regex));
+  const char *msg;
+
+  r->buf.buffer = xmalloc(INITIAL_MEM);
+  r->buf.allocated = INITIAL_MEM;
+  if (icase)
+    {
+      unsigned i;
+      r->buf.translate = xmalloc (CHAR_SET_SIZE);
+      /* Map uppercase characters to corresponding lowercase ones.  */
+      for (i = 0; i < CHAR_SET_SIZE; i++)
+        r->buf.translate[i] = Cupcase(i);
+    }
+  else
+    r->buf.translate = NULL;
+  re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
+  msg = re_compile_pattern(p, strlen(p), &r->buf);
+  if (!msg)
+    return r;
+  die("Error parsing pattern `%s': %s", p, msg);
+}
+
+void
+rx_free(regex *r)
+{
+  xfree(r->buf.buffer);
+  if (r->buf.translate)
+    xfree(r->buf.translate);
+  xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+  int len = strlen(s);
+
+  r->len_cache = len;
+  if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
+    return 0;
+  if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
+    return 0;
+  return 1;
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+  char *end = dest + destlen - 1;
+
+  if (!rx_match(r, src))
+    return 0;
+
+  while (*by)
+    {
+      if (*by == '\\')
+       {
+         by++;
+         if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+           {
+             uns j = *by++ - '0';
+             if (j < r->regs.num_regs)
+               {
+                 const char *s = src + r->regs.start[j];
+                 uns i = r->regs.end[j] - r->regs.start[j];
+                 if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
+                   return -1;
+                 if (dest + i >= end)
+                   return -1;
+                 memcpy(dest, s, i);
+                 dest += i;
+                 continue;
+               }
+           }
+       }
+      if (dest < end)
+       *dest++ = *by++;
+      else
+       return -1;
+    }
+  *dest = 0;
+  return 1;
+}
+
+#endif
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  regex *r;
+  char buf1[4096], buf2[4096];
+  int opt_i = 0;
+
+  if (!strcmp(argv[1], "-i"))
+    {
+      opt_i = 1;
+      argv++;
+      argc--;
+    }
+  r = rx_compile(argv[1], opt_i);
+  while (fgets(buf1, sizeof(buf1), stdin))
+    {
+      char *p = strchr(buf1, '\n');
+      if (p)
+       *p = 0;
+      if (argc == 2)
+       {
+         if (rx_match(r, buf1))
+           puts("MATCH");
+         else
+           puts("NO MATCH");
+       }
+      else
+       {
+         int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2));
+         if (i < 0)
+           puts("OVERFLOW");
+         else if (!i)
+           puts("NO MATCH");
+         else
+           puts(buf2);
+       }
+    }
+  rx_free(r);
+}
+
+#endif
diff --git a/lib/regex.t b/lib/regex.t

new file mode 100644 (file)

index 0000000..45b92a5
--- /dev/null
+++ b/lib/regex.t
@@ -0,0 +1,65 @@
+# Tests for the regex module
+
+Run:   ../obj/lib/regex-t 'a.*b.*c'
+In:    abc
+       ajkhkbbbbbc
+       Aabc
+Out:   MATCH
+       MATCH
+       NO MATCH
+
+Run:   ../obj/lib/regex-t -i 'a.*b.*c'
+In:    aBc
+       ajkhkbBBBBC
+       Aabc
+Out:   MATCH
+       MATCH
+       MATCH
+
+Run:   ../obj/lib/regex-t -i '(ahoj|nebo)'
+In:    Ahoj
+       nEBo
+       ahoja
+       (ahoj|nebo)
+Out:   MATCH
+       MATCH
+       NO MATCH
+       NO MATCH
+
+Run:   ../obj/lib/regex-t '\(ahoj\)'
+In:    (ahoj)
+       ahoj
+Out:   MATCH
+       NO MATCH
+
+Run:   ../obj/lib/regex-t '(.*b)*'
+In:    ababababab
+       ababababababababababababababababababababababababababababa
+Out:   MATCH
+       NO MATCH
+
+Run:   ../obj/lib/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
+In:    aaabbb
+       aabbccb
+       abcabc
+       aaccbb
+Out:   a<aabb>b
+       aabb<>b
+       NO MATCH
+       aa<>bb
+
+Run:   ../obj/lib/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
+In:    /nemecky/ubytovani/hotel.php?sort=&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3
+       /test...?f=1&s=3&sid=123&q=3&
+Out:   NO MATCH
+       MATCH
+
+Run:   ../obj/lib/regex-t '.*[0-9a-f]{8,16}.*'
+In:    abcdabcdabcd
+       aaaaaaaaaaaaaaaaaaaaaaaaaaaa
+       asddajlkdkajlqwepoiequwiouio
+       000001111p101010101010q12032
+Out:   MATCH
+       MATCH
+       NO MATCH
+       MATCH
diff --git a/lib/regex/Makefile b/lib/regex/Makefile

new file mode 100644 (file)

index 0000000..730b249
--- /dev/null
+++ b/lib/regex/Makefile
@@ -0,0 +1,7 @@
+# Makefile for the UCW Regex Library (c) 2004 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib/regex
+
+LIBUCW_MODS+=regex/regex
+
+$(o)/lib/regex/regex.o $(o)/lib/regex/regex.oo: CWARNS=
diff --git a/lib/regex/README b/lib/regex/README

new file mode 100644 (file)

index 0000000..f8c2605
--- /dev/null
+++ b/lib/regex/README
@@ -0,0 +1,9 @@
+This directory contains regular expression routines from the GNU libc 2.3.2
+which are significantly faster than the default regex libraries on most systems.
+
+They are distributed under the GNU LGPL.
+
+All files are exact copies of the original distribution, I only provided my
+own regex.c, regex-sh.h and Makefile.
+
+                                       Martin Mares, March 2004
diff --git a/lib/regex/regcomp.c b/lib/regex/regcomp.c

new file mode 100644 (file)

index 0000000..f25ecae
--- /dev/null
+++ b/lib/regex/regcomp.c
@@ -0,0 +1,3544 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+                                         int length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+                                    const re_dfastate_t *init_state,
+                                    char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
+static reg_errcode_t init_word_char (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+static reg_errcode_t analyze (re_dfa_t *dfa);
+static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
+static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
+                                            int top_clone_node, int root_node,
+                                            unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+                                    unsigned int constraint);
+static int search_duplicated_node (re_dfa_t *dfa, int org_node,
+                                  unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+                                        int node, int root);
+static void calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+                        reg_syntax_t syntax);
+static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+                       reg_syntax_t syntax);
+static int peek_token_bracket (re_token_t *token, re_string_t *input,
+                              reg_syntax_t syntax);
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+                         reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+                                 re_token_t *token, reg_syntax_t syntax,
+                                 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+                                re_token_t *token, reg_syntax_t syntax,
+                                int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+                                    re_token_t *token, reg_syntax_t syntax,
+                                    int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+                                 re_token_t *token, reg_syntax_t syntax,
+                                 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+                                re_dfa_t *dfa, re_token_t *token,
+                                reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+                                     re_token_t *token, reg_syntax_t syntax,
+                                     reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+                                           re_string_t *regexp,
+                                           re_token_t *token, int token_len,
+                                           re_dfa_t *dfa,
+                                           reg_syntax_t syntax);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+                                         re_string_t *regexp,
+                                         re_token_t *token);
+#ifndef _LIBC
+# ifdef RE_ENABLE_I18N
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+                                     re_charset_t *mbcset, int *range_alloc,
+                                     bracket_elem_t *start_elem,
+                                     bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+                                            re_charset_t *mbcset,
+                                            int *coll_sym_alloc,
+                                            const unsigned char *name);
+# else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+                                     bracket_elem_t *start_elem,
+                                     bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+                                            const unsigned char *name);
+# endif /* not RE_ENABLE_I18N */
+#endif /* not _LIBC */
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+                                       re_charset_t *mbcset,
+                                       int *equiv_class_alloc,
+                                       const unsigned char *name);
+static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
+                                     re_charset_t *mbcset,
+                                     int *char_class_alloc,
+                                     const unsigned char *class_name,
+                                     reg_syntax_t syntax);
+#else  /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+                                       const unsigned char *name);
+static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
+                                     const unsigned char *class_name,
+                                     reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err);
+static void free_bin_tree (bin_tree_t *tree);
+static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
+                               re_token_type_t type, int index);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+\f
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?  */
+
+const char __re_error_msgid[] attribute_hidden =
+  {
+#define REG_NOERROR_IDX        0
+    gettext_noop ("Success")   /* REG_NOERROR */
+    "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+    gettext_noop ("No match")  /* REG_NOMATCH */
+    "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+    "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+    "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+    "\0"
+#define REG_EESCAPE_IDX        (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+    "\0"
+#define REG_ESUBREG_IDX        (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+    "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+    gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+    "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+    "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+    "\0"
+#define REG_BADBR_IDX  (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+    "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+    gettext_noop ("Invalid range end") /* REG_ERANGE */
+    "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+    gettext_noop ("Memory exhausted") /* REG_ESPACE */
+    "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+    "\0"
+#define REG_EEND_IDX   (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+    gettext_noop ("Premature end of regular expression") /* REG_EEND */
+    "\0"
+#define REG_ESIZE_IDX  (REG_EEND_IDX + sizeof "Premature end of regular expression")
+    gettext_noop ("Regular expression too big") /* REG_ESIZE */
+    "\0"
+#define REG_ERPAREN_IDX        (REG_ESIZE_IDX + sizeof "Regular expression too big")
+    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+  };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+  {
+    REG_NOERROR_IDX,
+    REG_NOMATCH_IDX,
+    REG_BADPAT_IDX,
+    REG_ECOLLATE_IDX,
+    REG_ECTYPE_IDX,
+    REG_EESCAPE_IDX,
+    REG_ESUBREG_IDX,
+    REG_EBRACK_IDX,
+    REG_EPAREN_IDX,
+    REG_EBRACE_IDX,
+    REG_BADBR_IDX,
+    REG_ERANGE_IDX,
+    REG_ESPACE_IDX,
+    REG_BADRPT_IDX,
+    REG_EEND_IDX,
+    REG_ESIZE_IDX,
+    REG_ERPAREN_IDX
+  };
+\f
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+    const char *pattern;
+    size_t length;
+    struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub.  */
+  bufp->no_sub = 0;
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+  if (!ret)
+    return NULL;
+  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  char *fastmap = bufp->fastmap;
+
+  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+  if (dfa->init_state != dfa->init_state_word)
+    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+  if (dfa->init_state != dfa->init_state_nl)
+    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+  if (dfa->init_state != dfa->init_state_begbuf)
+    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+  bufp->fastmap_accurate = 1;
+  return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+  fastmap[ch] = 1;
+  if (icase)
+    fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+   Compile fastmap for the initial_state INIT_STATE.  */
+
+static void
+re_compile_fastmap_iter (bufp, init_state, fastmap)
+     regex_t *bufp;
+     const re_dfastate_t *init_state;
+     char *fastmap;
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  int node_cnt;
+  int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE));
+  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+    {
+      int node = init_state->nodes.elems[node_cnt];
+      re_token_type_t type = dfa->nodes[node].type;
+
+      if (type == CHARACTER)
+       re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+      else if (type == SIMPLE_BRACKET)
+       {
+         int i, j, ch;
+         for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+           for (j = 0; j < UINT_BITS; ++j, ++ch)
+             if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+               re_set_fastmap (fastmap, icase, ch);
+       }
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET)
+       {
+         int i;
+         re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+         if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+             || cset->nranges || cset->nchar_classes)
+           {
+# ifdef _LIBC
+             if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+               {
+                 /* In this case we want to catch the bytes which are
+                    the first byte of any collation elements.
+                    e.g. In da_DK, we want to catch 'a' since "aa"
+                         is a valid collation element, and don't catch
+                         'b' since 'b' is the only collation element
+                         which starts from 'b'.  */
+                 int j, ch;
+                 const int32_t *table = (const int32_t *)
+                   _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+                 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+                   for (j = 0; j < UINT_BITS; ++j, ++ch)
+                     if (table[ch] < 0)
+                       re_set_fastmap (fastmap, icase, ch);
+               }
+# else
+             if (MB_CUR_MAX > 1)
+               for (i = 0; i < SBC_MAX; ++i)
+                 if (__btowc (i) == WEOF)
+                   re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+           }
+         for (i = 0; i < cset->nmbchars; ++i)
+           {
+             char buf[256];
+             mbstate_t state;
+             memset (&state, '\0', sizeof (state));
+             __wcrtomb (buf, cset->mbchars[i], &state);
+             re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+           }
+       }
+#endif /* RE_ENABLE_I18N */
+      else if (type == END_OF_RE || type == OP_PERIOD)
+       {
+         memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+         if (type == END_OF_RE)
+           bufp->can_be_null = 1;
+         return;
+       }
+    }
+}
+\f
+/* Entry point for POSIX code.  */
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+                        : RE_SYNTAX_POSIX_BASIC);
+
+  preg->buffer = NULL;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = re_malloc (char, SBC_MAX);
+  if (BE (preg->fastmap == NULL, 0))
+    return REG_ESPACE;
+
+  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+  preg->no_sub = !!(cflags & REG_NOSUB);
+  preg->translate = NULL;
+
+  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  /* We have already checked preg->fastmap != NULL.  */
+  if (BE (ret == REG_NOERROR, 1))
+    /* Compute the fastmap now, since regexec cannot modify the pattern
+       buffer.  This function nevers fails in this implementation.  */
+    (void) re_compile_fastmap (preg);
+  else
+    {
+      /* Some error occurred while compiling the expression.  */
+      re_free (preg->fastmap);
+      preg->fastmap = NULL;
+    }
+
+  return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+    int errcode;
+    const regex_t *preg;
+    char *errbuf;
+    size_t errbuf_size;
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (BE (errcode < 0
+         || errcode >= (int) (sizeof (__re_error_msgid_idx)
+                              / sizeof (__re_error_msgid_idx[0])), 0))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (BE (errbuf_size != 0, 1))
+    {
+      if (BE (msg_size > errbuf_size, 0))
+       {
+#if defined HAVE_MEMPCPY || defined _LIBC
+         *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+         memcpy (errbuf, msg, errbuf_size - 1);
+         errbuf[errbuf_size - 1] = 0;
+#endif
+       }
+      else
+       memcpy (errbuf, msg, msg_size);
+    }
+
+  return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+  int i, j;
+
+  re_free (dfa->subexps);
+
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      re_token_t *node = dfa->nodes + i;
+#ifdef RE_ENABLE_I18N
+      if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+       free_charset (node->opr.mbcset);
+      else
+#endif /* RE_ENABLE_I18N */
+       if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+         re_free (node->opr.sbcset);
+    }
+  re_free (dfa->nexts);
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      if (dfa->eclosures != NULL)
+       re_node_set_free (dfa->eclosures + i);
+      if (dfa->inveclosures != NULL)
+       re_node_set_free (dfa->inveclosures + i);
+      if (dfa->edests != NULL)
+       re_node_set_free (dfa->edests + i);
+    }
+  re_free (dfa->edests);
+  re_free (dfa->eclosures);
+  re_free (dfa->inveclosures);
+  re_free (dfa->nodes);
+
+  for (i = 0; i <= dfa->state_hash_mask; ++i)
+    {
+      struct re_state_table_entry *entry = dfa->state_table + i;
+      for (j = 0; j < entry->num; ++j)
+       {
+         re_dfastate_t *state = entry->array[j];
+         free_state (state);
+       }
+      re_free (entry->array);
+    }
+  re_free (dfa->state_table);
+
+  if (dfa->word_char != NULL)
+    re_free (dfa->word_char);
+#ifdef DEBUG
+  re_free (dfa->re_str);
+#endif
+
+  re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  if (BE (dfa != NULL, 1))
+    free_dfa_content (dfa);
+
+  re_free (preg->fastmap);
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+\f
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec above without link errors.  */
+weak_function
+# endif
+re_comp (s)
+     const char *s;
+{
+  reg_errcode_t ret;
+  char *fastmap;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+       return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (re_comp_buf.buffer)
+    {
+      fastmap = re_comp_buf.fastmap;
+      re_comp_buf.fastmap = NULL;
+      __regfree (&re_comp_buf);
+      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+      re_comp_buf.fastmap = fastmap;
+    }
+
+  if (re_comp_buf.fastmap == NULL)
+    {
+      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+      if (re_comp_buf.fastmap == NULL)
+       return (char *) gettext (__re_error_msgid
+                                + __re_error_msgid_idx[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+  __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+\f
+/* Internal entry point.
+   Compile the regular expression PATTERN, whose length is LENGTH.
+   SYNTAX indicate regular expression's syntax.  */
+
+static reg_errcode_t
+re_compile_internal (preg, pattern, length, syntax)
+     regex_t *preg;
+     const char * pattern;
+     int length;
+     reg_syntax_t syntax;
+{
+  reg_errcode_t err = REG_NOERROR;
+  re_dfa_t *dfa;
+  re_string_t regexp;
+
+  /* Initialize the pattern buffer.  */
+  preg->fastmap_accurate = 0;
+  preg->syntax = syntax;
+  preg->not_bol = preg->not_eol = 0;
+  preg->used = 0;
+  preg->re_nsub = 0;
+  preg->can_be_null = 0;
+  preg->regs_allocated = REGS_UNALLOCATED;
+
+  /* Initialize the dfa.  */
+  dfa = (re_dfa_t *) preg->buffer;
+  if (preg->allocated < sizeof (re_dfa_t))
+    {
+      /* If zero allocated, but buffer is non-null, try to realloc
+        enough space.  This loses if buffer's address is bogus, but
+        that is the user's responsibility.  If ->buffer is NULL this
+        is a simple allocation.  */
+      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+      if (dfa == NULL)
+       return REG_ESPACE;
+      preg->allocated = sizeof (re_dfa_t);
+    }
+  preg->buffer = (unsigned char *) dfa;
+  preg->used = sizeof (re_dfa_t);
+
+  err = init_dfa (dfa, length);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+#ifdef DEBUG
+  dfa->re_str = re_malloc (char, length + 1);
+  strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+                            syntax & RE_ICASE);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+
+  /* Parse the regular expression, and build a structure tree.  */
+  preg->re_nsub = 0;
+  dfa->str_tree = parse (&regexp, preg, syntax, &err);
+  if (BE (dfa->str_tree == NULL, 0))
+    goto re_compile_internal_free_return;
+
+  /* Analyze the tree and collect information which is necessary to
+     create the dfa.  */
+  err = analyze (dfa);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
+  /* Then create the initial state of the dfa.  */
+  err = create_initial_state (dfa);
+
+  /* Release work areas.  */
+  free_workarea_compile (preg);
+  re_string_destruct (&regexp);
+
+  if (BE (err != REG_NOERROR, 0))
+    {
+    re_compile_internal_free_return:
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+    }
+
+  return err;
+}
+
+/* Initialize DFA.  We use the length of the regular expression PAT_LEN
+   as the initial length of some arrays.  */
+
+static reg_errcode_t
+init_dfa (dfa, pat_len)
+     re_dfa_t *dfa;
+     int pat_len;
+{
+  int table_size;
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
+  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+  dfa->states_alloc = pat_len + 1;
+
+  /*  table_size = 2 ^ ceil(log pat_len) */
+  for (table_size = 1; table_size > 0; table_size <<= 1)
+    if (table_size > pat_len)
+      break;
+
+  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+  dfa->state_hash_mask = table_size - 1;
+
+  dfa->subexps_alloc = 1;
+  dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
+  dfa->word_char = NULL;
+
+  if (BE (dfa->nodes == NULL || dfa->state_table == NULL
+         || dfa->subexps == NULL, 0))
+    {
+      /* We don't bother to free anything which was allocated.  Very
+        soon the process will go down anyway.  */
+      dfa->subexps = NULL;
+      dfa->state_table = NULL;
+      dfa->nodes = NULL;
+      return REG_ESPACE;
+    }
+  return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+   "word".  In this case "word" means that it is the word construction
+   character used by some operators like "\<", "\>", etc.  */
+
+static reg_errcode_t
+init_word_char (dfa)
+     re_dfa_t *dfa;
+{
+  int i, j, ch;
+  dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+  if (BE (dfa->word_char == NULL, 0))
+    return REG_ESPACE;
+  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+    for (j = 0; j < UINT_BITS; ++j, ++ch)
+      if (isalnum (ch) || ch == '_')
+       dfa->word_char[i] |= 1 << j;
+  return REG_NOERROR;
+}
+
+/* Free the work area which are only used while compiling.  */
+
+static void
+free_workarea_compile (preg)
+     regex_t *preg;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  free_bin_tree (dfa->str_tree);
+  dfa->str_tree = NULL;
+  re_free (dfa->org_indices);
+  dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts.  */
+
+static reg_errcode_t
+create_initial_state (dfa)
+     re_dfa_t *dfa;
+{
+  int first, i;
+  reg_errcode_t err;
+  re_node_set init_nodes;
+
+  /* Initial states have the epsilon closure of the node which is
+     the first node of the regular expression.  */
+  first = dfa->str_tree->first;
+  dfa->init_node = first;
+  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* The back-references which are in initial states can epsilon transit,
+     since in this case all of the subexpressions can be null.
+     Then we add epsilon closures of the nodes which are the next nodes of
+     the back-references.  */
+  if (dfa->nbackref > 0)
+    for (i = 0; i < init_nodes.nelem; ++i)
+      {
+       int node_idx = init_nodes.elems[i];
+       re_token_type_t type = dfa->nodes[node_idx].type;
+
+       int clexp_idx;
+       if (type != OP_BACK_REF)
+         continue;
+       for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+         {
+           re_token_t *clexp_node;
+           clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+           if (clexp_node->type == OP_CLOSE_SUBEXP
+               && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
+             break;
+         }
+       if (clexp_idx == init_nodes.nelem)
+         continue;
+
+       if (type == OP_BACK_REF)
+         {
+           int dest_idx = dfa->edests[node_idx].elems[0];
+           if (!re_node_set_contains (&init_nodes, dest_idx))
+             {
+               re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+               i = 0;
+             }
+         }
+      }
+
+  /* It must be the first time to invoke acquire_state.  */
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (BE (dfa->init_state == NULL, 0))
+    return err;
+  if (dfa->init_state->has_constraint)
+    {
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+                                                      CONTEXT_WORD);
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+                                                    CONTEXT_NEWLINE);
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+                                                        &init_nodes,
+                                                        CONTEXT_NEWLINE
+                                                        | CONTEXT_BEGBUF);
+      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+             || dfa->init_state_begbuf == NULL, 0))
+       return err;
+    }
+  else
+    dfa->init_state_word = dfa->init_state_nl
+      = dfa->init_state_begbuf = dfa->init_state;
+
+  re_node_set_free (&init_nodes);
+  return REG_NOERROR;
+}
+\f
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+   "eclosure", and "inveclosure".  */
+
+static reg_errcode_t
+analyze (dfa)
+     re_dfa_t *dfa;
+{
+  int i;
+  reg_errcode_t ret;
+
+  /* Allocate arrays.  */
+  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+         || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
+    return REG_ESPACE;
+  /* Initialize them.  */
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      dfa->nexts[i] = -1;
+      re_node_set_init_empty (dfa->edests + i);
+      re_node_set_init_empty (dfa->eclosures + i);
+      re_node_set_init_empty (dfa->inveclosures + i);
+    }
+
+  ret = analyze_tree (dfa, dfa->str_tree);
+  if (BE (ret == REG_NOERROR, 1))
+    {
+      ret = calc_eclosure (dfa);
+      if (ret == REG_NOERROR)
+       calc_inveclosure (dfa);
+    }
+  return ret;
+}
+
+/* Helper functions for analyze.
+   This function calculate "first", "next", and "edest" for the subtree
+   whose root is NODE.  */
+
+static reg_errcode_t
+analyze_tree (dfa, node)
+     re_dfa_t *dfa;
+     bin_tree_t *node;
+{
+  reg_errcode_t ret;
+  if (node->first == -1)
+    calc_first (dfa, node);
+  if (node->next == -1)
+    calc_next (dfa, node);
+  if (node->eclosure.nelem == 0)
+    calc_epsdest (dfa, node);
+  /* Calculate "first" etc. for the left child.  */
+  if (node->left != NULL)
+    {
+      ret = analyze_tree (dfa, node->left);
+      if (BE (ret != REG_NOERROR, 0))
+       return ret;
+    }
+  /* Calculate "first" etc. for the right child.  */
+  if (node->right != NULL)
+    {
+      ret = analyze_tree (dfa, node->right);
+      if (BE (ret != REG_NOERROR, 0))
+       return ret;
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate "first" for the node NODE.  */
+static void
+calc_first (dfa, node)
+     re_dfa_t *dfa;
+     bin_tree_t *node;
+{
+  int idx, type;
+  idx = node->node_idx;
+  type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
+
+  switch (type)
+    {
+#ifdef DEBUG
+    case OP_OPEN_BRACKET:
+    case OP_CLOSE_BRACKET:
+    case OP_OPEN_DUP_NUM:
+    case OP_CLOSE_DUP_NUM:
+    case OP_NON_MATCH_LIST:
+    case OP_OPEN_COLL_ELEM:
+    case OP_CLOSE_COLL_ELEM:
+    case OP_OPEN_EQUIV_CLASS:
+    case OP_CLOSE_EQUIV_CLASS:
+    case OP_OPEN_CHAR_CLASS:
+    case OP_CLOSE_CHAR_CLASS:
+      /* These must not be appeared here.  */
+      assert (0);
+#endif
+    case END_OF_RE:
+    case CHARACTER:
+    case OP_PERIOD:
+    case OP_DUP_ASTERISK:
+    case OP_DUP_QUESTION:
+#ifdef RE_ENABLE_I18N
+    case COMPLEX_BRACKET:
+#endif /* RE_ENABLE_I18N */
+    case SIMPLE_BRACKET:
+    case OP_BACK_REF:
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      node->first = idx;
+      break;
+    case OP_DUP_PLUS:
+#ifdef DEBUG
+      assert (node->left != NULL);
+#endif
+      if (node->left->first == -1)
+       calc_first (dfa, node->left);
+      node->first = node->left->first;
+      break;
+    case OP_ALT:
+      node->first = idx;
+      break;
+      /* else fall through */
+    default:
+#ifdef DEBUG
+      assert (node->left != NULL);
+#endif
+      if (node->left->first == -1)
+       calc_first (dfa, node->left);
+      node->first = node->left->first;
+      break;
+    }
+}
+
+/* Calculate "next" for the node NODE.  */
+
+static void
+calc_next (dfa, node)
+     re_dfa_t *dfa;
+     bin_tree_t *node;
+{
+  int idx, type;
+  bin_tree_t *parent = node->parent;
+  if (parent == NULL)
+    {
+      node->next = -1;
+      idx = node->node_idx;
+      if (node->type == 0)
+       dfa->nexts[idx] = node->next;
+      return;
+    }
+
+  idx = parent->node_idx;
+  type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
+
+  switch (type)
+    {
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+      node->next = idx;
+      break;
+    case CONCAT:
+      if (parent->left == node)
+       {
+         if (parent->right->first == -1)
+           calc_first (dfa, parent->right);
+         node->next = parent->right->first;
+         break;
+       }
+      /* else fall through */
+    default:
+      if (parent->next == -1)
+       calc_next (dfa, parent);
+      node->next = parent->next;
+      break;
+    }
+  idx = node->node_idx;
+  if (node->type == 0)
+    dfa->nexts[idx] = node->next;
+}
+
+/* Calculate "edest" for the node NODE.  */
+
+static void
+calc_epsdest (dfa, node)
+     re_dfa_t *dfa;
+     bin_tree_t *node;
+{
+  int idx;
+  idx = node->node_idx;
+  if (node->type == 0)
+    {
+      if (dfa->nodes[idx].type == OP_DUP_ASTERISK
+         || dfa->nodes[idx].type == OP_DUP_PLUS
+         || dfa->nodes[idx].type == OP_DUP_QUESTION)
+       {
+         if (node->left->first == -1)
+           calc_first (dfa, node->left);
+         if (node->next == -1)
+           calc_next (dfa, node);
+         re_node_set_init_2 (dfa->edests + idx, node->left->first,
+                             node->next);
+       }
+      else if (dfa->nodes[idx].type == OP_ALT)
+       {
+         int left, right;
+         if (node->left != NULL)
+           {
+             if (node->left->first == -1)
+               calc_first (dfa, node->left);
+             left = node->left->first;
+           }
+         else
+           {
+             if (node->next == -1)
+               calc_next (dfa, node);
+             left = node->next;
+           }
+         if (node->right != NULL)
+           {
+             if (node->right->first == -1)
+               calc_first (dfa, node->right);
+             right = node->right->first;
+           }
+         else
+           {
+             if (node->next == -1)
+               calc_next (dfa, node);
+             right = node->next;
+           }
+         re_node_set_init_2 (dfa->edests + idx, left, right);
+       }
+      else if (dfa->nodes[idx].type == ANCHOR
+              || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+              || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+              || dfa->nodes[idx].type == OP_BACK_REF)
+       re_node_set_init_1 (dfa->edests + idx, node->next);
+    }
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+   to their own constraint.  */
+
+static reg_errcode_t
+duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
+                       init_constraint)
+     re_dfa_t *dfa;
+     int top_org_node, top_clone_node, root_node;
+     unsigned int init_constraint;
+{
+  reg_errcode_t err;
+  int org_node, clone_node, ret;
+  unsigned int constraint = init_constraint;
+  for (org_node = top_org_node, clone_node = top_clone_node;;)
+    {
+      int org_dest, clone_dest;
+      if (dfa->nodes[org_node].type == OP_BACK_REF)
+       {
+         /* If the back reference epsilon-transit, its destination must
+            also have the constraint.  Then duplicate the epsilon closure
+            of the destination of the back reference, and store it in
+            edests of the back reference.  */
+         org_dest = dfa->nexts[org_node];
+         re_node_set_empty (dfa->edests + clone_node);
+         err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+         dfa->nexts[clone_node] = dfa->nexts[org_node];
+         ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+         if (BE (ret < 0, 0))
+           return REG_ESPACE;
+       }
+      else if (dfa->edests[org_node].nelem == 0)
+       {
+         /* In case of the node can't epsilon-transit, don't duplicate the
+            destination and store the original destination as the
+            destination of the node.  */
+         dfa->nexts[clone_node] = dfa->nexts[org_node];
+         break;
+       }
+      else if (dfa->edests[org_node].nelem == 1)
+       {
+         /* In case of the node can epsilon-transit, and it has only one
+            destination.  */
+         org_dest = dfa->edests[org_node].elems[0];
+         re_node_set_empty (dfa->edests + clone_node);
+         if (dfa->nodes[org_node].type == ANCHOR)
+           {
+             /* In case of the node has another constraint, append it.  */
+             if (org_node == root_node && clone_node != org_node)
+               {
+                 /* ...but if the node is root_node itself, it means the
+                    epsilon closure have a loop, then tie it to the
+                    destination of the root_node.  */
+                 ret = re_node_set_insert (dfa->edests + clone_node,
+                                           org_dest);
+                 if (BE (ret < 0, 0))
+                   return REG_ESPACE;
+                 break;
+               }
+             constraint |= dfa->nodes[org_node].opr.ctx_type;
+           }
+         err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+         ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+         if (BE (ret < 0, 0))
+           return REG_ESPACE;
+       }
+      else /* dfa->edests[org_node].nelem == 2 */
+       {
+         /* In case of the node can epsilon-transit, and it has two
+            destinations. E.g. '|', '*', '+', '?'.   */
+         org_dest = dfa->edests[org_node].elems[0];
+         re_node_set_empty (dfa->edests + clone_node);
+         /* Search for a duplicated node which satisfies the constraint.  */
+         clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+         if (clone_dest == -1)
+           {
+             /* There are no such a duplicated node, create a new one.  */
+             err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+             if (BE (err != REG_NOERROR, 0))
+               return err;
+             ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+             if (BE (ret < 0, 0))
+               return REG_ESPACE;
+             err = duplicate_node_closure (dfa, org_dest, clone_dest,
+                                           root_node, constraint);
+             if (BE (err != REG_NOERROR, 0))
+               return err;
+           }
+         else
+           {
+             /* There are a duplicated node which satisfy the constraint,
+                use it to avoid infinite loop.  */
+             ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+             if (BE (ret < 0, 0))
+               return REG_ESPACE;
+           }
+
+         org_dest = dfa->edests[org_node].elems[1];
+         err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+         ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+         if (BE (ret < 0, 0))
+           return REG_ESPACE;
+       }
+      org_node = org_dest;
+      clone_node = clone_dest;
+    }
+  return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+   satisfies the constraint CONSTRAINT.  */
+
+static int
+search_duplicated_node (dfa, org_node, constraint)
+     re_dfa_t *dfa;
+     int org_node;
+     unsigned int constraint;
+{
+  int idx;
+  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+    {
+      if (org_node == dfa->org_indices[idx]
+         && constraint == dfa->nodes[idx].constraint)
+       return idx; /* Found.  */
+    }
+  return -1; /* Not found.  */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+   otherwise return the error code.  */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
+     re_dfa_t *dfa;
+     int *new_idx, org_idx;
+     unsigned int constraint;
+{
+  re_token_t dup;
+  int dup_idx;
+
+  dup = dfa->nodes[org_idx];
+  dup_idx = re_dfa_add_node (dfa, dup, 1);
+  if (BE (dup_idx == -1, 0))
+    return REG_ESPACE;
+  dfa->nodes[dup_idx].constraint = constraint;
+  if (dfa->nodes[org_idx].type == ANCHOR)
+    dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+  dfa->nodes[dup_idx].duplicated = 1;
+  re_node_set_init_empty (dfa->edests + dup_idx);
+  re_node_set_init_empty (dfa->eclosures + dup_idx);
+  re_node_set_init_empty (dfa->inveclosures + dup_idx);
+
+  /* Store the index of the original node.  */
+  dfa->org_indices[dup_idx] = org_idx;
+  *new_idx = dup_idx;
+  return REG_NOERROR;
+}
+
+static void
+calc_inveclosure (dfa)
+     re_dfa_t *dfa;
+{
+  int src, idx, dest;
+  for (src = 0; src < dfa->nodes_len; ++src)
+    {
+      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+       {
+         dest = dfa->eclosures[src].elems[idx];
+         re_node_set_insert (dfa->inveclosures + dest, src);
+       }
+    }
+}
+
+/* Calculate "eclosure" for all the node in DFA.  */
+
+static reg_errcode_t
+calc_eclosure (dfa)
+     re_dfa_t *dfa;
+{
+  int node_idx, incomplete;
+#ifdef DEBUG
+  assert (dfa->nodes_len > 0);
+#endif
+  incomplete = 0;
+  /* For each nodes, calculate epsilon closure.  */
+  for (node_idx = 0; ; ++node_idx)
+    {
+      reg_errcode_t err;
+      re_node_set eclosure_elem;
+      if (node_idx == dfa->nodes_len)
+       {
+         if (!incomplete)
+           break;
+         incomplete = 0;
+         node_idx = 0;
+       }
+
+#ifdef DEBUG
+      assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+      /* If we have already calculated, skip it.  */
+      if (dfa->eclosures[node_idx].nelem != 0)
+       continue;
+      /* Calculate epsilon closure of `node_idx'.  */
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+
+      if (dfa->eclosures[node_idx].nelem == 0)
+       {
+         incomplete = 1;
+         re_node_set_free (&eclosure_elem);
+       }
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE.  */
+
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+     re_node_set *new_set;
+     re_dfa_t *dfa;
+     int node, root;
+{
+  reg_errcode_t err;
+  unsigned int constraint;
+  int i, incomplete;
+  re_node_set eclosure;
+  incomplete = 0;
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* This indicates that we are calculating this node now.
+     We reference this value to avoid infinite loop.  */
+  dfa->eclosures[node].nelem = -1;
+
+  constraint = ((dfa->nodes[node].type == ANCHOR)
+               ? dfa->nodes[node].opr.ctx_type : 0);
+  /* If the current node has constraints, duplicate all nodes.
+     Since they must inherit the constraints.  */
+  if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+    {
+      int org_node, cur_node;
+      org_node = cur_node = node;
+      err = duplicate_node_closure (dfa, node, node, node, constraint);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  /* Expand each epsilon destination nodes.  */
+  if (IS_EPSILON_NODE(dfa->nodes[node].type))
+    for (i = 0; i < dfa->edests[node].nelem; ++i)
+      {
+       re_node_set eclosure_elem;
+       int edest = dfa->edests[node].elems[i];
+       /* If calculating the epsilon closure of `edest' is in progress,
+          return intermediate result.  */
+       if (dfa->eclosures[edest].nelem == -1)
+         {
+           incomplete = 1;
+           continue;
+         }
+       /* If we haven't calculated the epsilon closure of `edest' yet,
+          calculate now. Otherwise use calculated epsilon closure.  */
+       if (dfa->eclosures[edest].nelem == 0)
+         {
+           err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+           if (BE (err != REG_NOERROR, 0))
+             return err;
+         }
+       else
+         eclosure_elem = dfa->eclosures[edest];
+       /* Merge the epsilon closure of `edest'.  */
+       re_node_set_merge (&eclosure, &eclosure_elem);
+       /* If the epsilon closure of `edest' is incomplete,
+          the epsilon closure of this node is also incomplete.  */
+       if (dfa->eclosures[edest].nelem == 0)
+         {
+           incomplete = 1;
+           re_node_set_free (&eclosure_elem);
+         }
+      }
+
+  /* Epsilon closures include itself.  */
+  re_node_set_insert (&eclosure, node);
+  if (incomplete && !root)
+    dfa->eclosures[node].nelem = 0;
+  else
+    dfa->eclosures[node] = eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
+}
+\f
+/* Functions for token which are used in the parser.  */
+
+/* Fetch a token from INPUT.
+   We must not use this function inside bracket expressions.  */
+
+static re_token_t
+fetch_token (input, syntax)
+     re_string_t *input;
+     reg_syntax_t syntax;
+{
+  re_token_t token;
+  int consumed_byte;
+  consumed_byte = peek_token (&token, input, syntax);
+  re_string_skip_bytes (input, consumed_byte);
+  return token;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function inside bracket expressions.  */
+
+static int
+peek_token (token, input, syntax)
+     re_token_t *token;
+     re_string_t *input;
+     reg_syntax_t syntax;
+{
+  unsigned char c;
+
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  token->mb_partial = 0;
+  if (MB_CUR_MAX > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      token->mb_partial = 1;
+      return 1;
+    }
+#endif
+  if (c == '\\')
+    {
+      unsigned char c2;
+      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+       {
+         token->type = BACK_SLASH;
+         return 1;
+       }
+
+      c2 = re_string_peek_byte_case (input, 1);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      switch (c2)
+       {
+       case '|':
+         if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+           token->type = OP_ALT;
+         break;
+       case '1': case '2': case '3': case '4': case '5':
+       case '6': case '7': case '8': case '9':
+         if (!(syntax & RE_NO_BK_REFS))
+           {
+             token->type = OP_BACK_REF;
+             token->opr.idx = c2 - '0';
+           }
+         break;
+       case '<':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = WORD_FIRST;
+           }
+         break;
+       case '>':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = WORD_LAST;
+           }
+         break;
+       case 'b':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = WORD_DELIM;
+           }
+         break;
+       case 'B':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = INSIDE_WORD;
+           }
+         break;
+       case 'w':
+         if (!(syntax & RE_NO_GNU_OPS))
+           token->type = OP_WORD;
+         break;
+       case 'W':
+         if (!(syntax & RE_NO_GNU_OPS))
+           token->type = OP_NOTWORD;
+         break;
+       case '`':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = BUF_FIRST;
+           }
+         break;
+       case '\'':
+         if (!(syntax & RE_NO_GNU_OPS))
+           {
+             token->type = ANCHOR;
+             token->opr.idx = BUF_LAST;
+           }
+         break;
+       case '(':
+         if (!(syntax & RE_NO_BK_PARENS))
+           token->type = OP_OPEN_SUBEXP;
+         break;
+       case ')':
+         if (!(syntax & RE_NO_BK_PARENS))
+           token->type = OP_CLOSE_SUBEXP;
+         break;
+       case '+':
+         if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+           token->type = OP_DUP_PLUS;
+         break;
+       case '?':
+         if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+           token->type = OP_DUP_QUESTION;
+         break;
+       case '{':
+         if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+           token->type = OP_OPEN_DUP_NUM;
+         break;
+       case '}':
+         if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+           token->type = OP_CLOSE_DUP_NUM;
+         break;
+       default:
+         break;
+       }
+      return 2;
+    }
+
+  token->type = CHARACTER;
+  switch (c)
+    {
+    case '\n':
+      if (syntax & RE_NEWLINE_ALT)
+       token->type = OP_ALT;
+      break;
+    case '|':
+      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+       token->type = OP_ALT;
+      break;
+    case '*':
+      token->type = OP_DUP_ASTERISK;
+      break;
+    case '+':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+       token->type = OP_DUP_PLUS;
+      break;
+    case '?':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+       token->type = OP_DUP_QUESTION;
+      break;
+    case '{':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+       token->type = OP_OPEN_DUP_NUM;
+      break;
+    case '}':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+       token->type = OP_CLOSE_DUP_NUM;
+      break;
+    case '(':
+      if (syntax & RE_NO_BK_PARENS)
+       token->type = OP_OPEN_SUBEXP;
+      break;
+    case ')':
+      if (syntax & RE_NO_BK_PARENS)
+       token->type = OP_CLOSE_SUBEXP;
+      break;
+    case '[':
+      token->type = OP_OPEN_BRACKET;
+      break;
+    case '.':
+      token->type = OP_PERIOD;
+      break;
+    case '^':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+         re_string_cur_idx (input) != 0)
+       {
+         char prev = re_string_peek_byte (input, -1);
+         if (prev != '|' && prev != '(' &&
+             (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
+           break;
+       }
+      token->type = ANCHOR;
+      token->opr.idx = LINE_FIRST;
+      break;
+    case '$':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+         re_string_cur_idx (input) + 1 != re_string_length (input))
+       {
+         re_token_t next;
+         re_string_skip_bytes (input, 1);
+         peek_token (&next, input, syntax);
+         re_string_skip_bytes (input, -1);
+         if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+           break;
+       }
+      token->type = ANCHOR;
+      token->opr.idx = LINE_LAST;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function out of bracket expressions.  */
+
+static int
+peek_token_bracket (token, input, syntax)
+     re_token_t *token;
+     re_string_t *input;
+     reg_syntax_t syntax;
+{
+  unsigned char c;
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      return 1;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS))
+    {
+      /* In this case, '\' escape a character.  */
+      unsigned char c2;
+      re_string_skip_bytes (input, 1);
+      c2 = re_string_peek_byte (input, 0);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      return 1;
+    }
+  if (c == '[') /* '[' is a special char in a bracket exps.  */
+    {
+      unsigned char c2;
+      int token_len;
+      c2 = re_string_peek_byte (input, 1);
+      token->opr.c = c2;
+      token_len = 2;
+      switch (c2)
+       {
+       case '.':
+         token->type = OP_OPEN_COLL_ELEM;
+         break;
+       case '=':
+         token->type = OP_OPEN_EQUIV_CLASS;
+         break;
+       case ':':
+         if (syntax & RE_CHAR_CLASSES)
+           {
+             token->type = OP_OPEN_CHAR_CLASS;
+             break;
+           }
+         /* else fall through.  */
+       default:
+         token->type = CHARACTER;
+         token->opr.c = c;
+         token_len = 1;
+         break;
+       }
+      return token_len;
+    }
+  switch (c)
+    {
+    case '-':
+      token->type = OP_CHARSET_RANGE;
+      break;
+    case ']':
+      token->type = OP_CLOSE_BRACKET;
+      break;
+    case '^':
+      token->type = OP_NON_MATCH_LIST;
+      break;
+    default:
+      token->type = CHARACTER;
+    }
+  return 1;
+}
+\f
+/* Functions for parser.  */
+
+/* Entry point of the parser.
+   Parse the regular expression REGEXP and return the structure tree.
+   If an error is occured, ERR is set by error code, and return NULL.
+   This function build the following tree, from regular expression <reg_exp>:
+          CAT
+          / \
+         /   \
+   <reg_exp>  EOR
+
+   CAT means concatenation.
+   EOR means end of regular expression.  */
+
+static bin_tree_t *
+parse (regexp, preg, syntax, err)
+     re_string_t *regexp;
+     regex_t *preg;
+     reg_syntax_t syntax;
+     reg_errcode_t *err;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *eor, *root;
+  re_token_t current_token;
+  int new_idx;
+  current_token = fetch_token (regexp, syntax);
+  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+  new_idx = re_dfa_add_node (dfa, current_token, 0);
+  eor = create_tree (NULL, NULL, 0, new_idx);
+  if (tree != NULL)
+    root = create_tree (tree, eor, CONCAT, 0);
+  else
+    root = eor;
+  if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  return root;
+}
+
+/* This function build the following tree, from regular expression
+   <branch1>|<branch2>:
+          ALT
+          / \
+         /   \
+   <branch1> <branch2>
+
+   ALT means alternative, which represents the operator `|'.  */
+
+static bin_tree_t *
+parse_reg_exp (regexp, preg, token, syntax, nest, err)
+     re_string_t *regexp;
+     regex_t *preg;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     int nest;
+     reg_errcode_t *err;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *branch = NULL;
+  int new_idx;
+  tree = parse_branch (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type == OP_ALT)
+    {
+      re_token_t alt_token = *token;
+      new_idx = re_dfa_add_node (dfa, alt_token, 0);
+      *token = fetch_token (regexp, syntax);
+      if (token->type != OP_ALT && token->type != END_OF_RE
+         && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+       {
+         branch = parse_branch (regexp, preg, token, syntax, nest, err);
+         if (BE (*err != REG_NOERROR && branch == NULL, 0))
+           {
+             free_bin_tree (tree);
+             return NULL;
+           }
+       }
+      else
+       branch = NULL;
+      tree = create_tree (tree, branch, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+      dfa->has_plural_match = 1;
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   <exp1><exp2>:
+       CAT
+       / \
+       /   \
+   <exp1> <exp2>
+
+   CAT means concatenation.  */
+
+static bin_tree_t *
+parse_branch (regexp, preg, token, syntax, nest, err)
+     re_string_t *regexp;
+     regex_t *preg;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     int nest;
+     reg_errcode_t *err;
+{
+  bin_tree_t *tree, *exp;
+  tree = parse_expression (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type != OP_ALT && token->type != END_OF_RE
+        && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+    {
+      exp = parse_expression (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && exp == NULL, 0))
+       {
+         free_bin_tree (tree);
+         return NULL;
+       }
+      if (tree != NULL && exp != NULL)
+       {
+         tree = create_tree (tree, exp, CONCAT, 0);
+         if (tree == NULL)
+           {
+             *err = REG_ESPACE;
+             return NULL;
+           }
+       }
+      else if (tree == NULL)
+       tree = exp;
+      /* Otherwise exp == NULL, we don't need to create new tree.  */
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+        *
+        |
+        a
+*/
+
+static bin_tree_t *
+parse_expression (regexp, preg, token, syntax, nest, err)
+     re_string_t *regexp;
+     regex_t *preg;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     int nest;
+     reg_errcode_t *err;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  int new_idx;
+  switch (token->type)
+    {
+    case CHARACTER:
+      new_idx = re_dfa_add_node (dfa, *token, 0);
+      tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       {
+         while (!re_string_eoi (regexp)
+                && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+           {
+             bin_tree_t *mbc_remain;
+             *token = fetch_token (regexp, syntax);
+             new_idx = re_dfa_add_node (dfa, *token, 0);
+             mbc_remain = create_tree (NULL, NULL, 0, new_idx);
+             tree = create_tree (tree, mbc_remain, CONCAT, 0);
+             if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
+               {
+                 *err = REG_ESPACE;
+                 return NULL;
+               }
+           }
+       }
+#endif
+      break;
+    case OP_OPEN_SUBEXP:
+      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+      break;
+    case OP_OPEN_BRACKET:
+      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+      break;
+    case OP_BACK_REF:
+      if (BE (preg->re_nsub < token->opr.idx
+             || dfa->subexps[token->opr.idx - 1].end == -1, 0))
+       {
+         *err = REG_ESUBREG;
+         return NULL;
+       }
+      dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
+      new_idx = re_dfa_add_node (dfa, *token, 0);
+      tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+      ++dfa->nbackref;
+      dfa->has_mb_node = 1;
+      break;
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+    case OP_DUP_QUESTION:
+    case OP_OPEN_DUP_NUM:
+      if (syntax & RE_CONTEXT_INVALID_OPS)
+       {
+         *err = REG_BADRPT;
+         return NULL;
+       }
+      else if (syntax & RE_CONTEXT_INDEP_OPS)
+       {
+         *token = fetch_token (regexp, syntax);
+         return parse_expression (regexp, preg, token, syntax, nest, err);
+       }
+      /* else fall through  */
+    case OP_CLOSE_SUBEXP:
+      if ((token->type == OP_CLOSE_SUBEXP) &&
+         !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+       {
+         *err = REG_ERPAREN;
+         return NULL;
+       }
+      /* else fall through  */
+    case OP_CLOSE_DUP_NUM:
+      /* We treat it as a normal character.  */
+
+      /* Then we can these characters as normal characters.  */
+      token->type = CHARACTER;
+      new_idx = re_dfa_add_node (dfa, *token, 0);
+      tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+      break;
+    case ANCHOR:
+      if (dfa->word_char == NULL)
+       {
+         *err = init_word_char (dfa);
+         if (BE (*err != REG_NOERROR, 0))
+           return NULL;
+       }
+      if (token->opr.ctx_type == WORD_DELIM)
+       {
+         bin_tree_t *tree_first, *tree_last;
+         int idx_first, idx_last;
+         token->opr.ctx_type = WORD_FIRST;
+         idx_first = re_dfa_add_node (dfa, *token, 0);
+         tree_first = create_tree (NULL, NULL, 0, idx_first);
+         token->opr.ctx_type = WORD_LAST;
+         idx_last = re_dfa_add_node (dfa, *token, 0);
+         tree_last = create_tree (NULL, NULL, 0, idx_last);
+         token->type = OP_ALT;
+         new_idx = re_dfa_add_node (dfa, *token, 0);
+         tree = create_tree (tree_first, tree_last, 0, new_idx);
+         if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
+                 || tree_first == NULL || tree_last == NULL
+                 || tree == NULL, 0))
+           {
+             *err = REG_ESPACE;
+             return NULL;
+           }
+       }
+      else
+       {
+         new_idx = re_dfa_add_node (dfa, *token, 0);
+         tree = create_tree (NULL, NULL, 0, new_idx);
+         if (BE (new_idx == -1 || tree == NULL, 0))
+           {
+             *err = REG_ESPACE;
+             return NULL;
+           }
+       }
+      /* We must return here, since ANCHORs can't be followed
+        by repetition operators.
+        eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+            it must not be "<ANCHOR(^)><REPEAT(*)>".  */
+      *token = fetch_token (regexp, syntax);
+      return tree;
+    case OP_PERIOD:
+      new_idx = re_dfa_add_node (dfa, *token, 0);
+      tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+      if (MB_CUR_MAX > 1)
+       dfa->has_mb_node = 1;
+      break;
+    case OP_WORD:
+      tree = build_word_op (dfa, 0, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+      break;
+    case OP_NOTWORD:
+      tree = build_word_op (dfa, 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+      break;
+    case OP_ALT:
+    case END_OF_RE:
+      return NULL;
+    case BACK_SLASH:
+      *err = REG_EESCAPE;
+      return NULL;
+    default:
+      /* Must not happen?  */
+#ifdef DEBUG
+      assert (0);
+#endif
+      return NULL;
+    }
+  *token = fetch_token (regexp, syntax);
+
+  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+        || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+    {
+      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+      dfa->has_plural_match = 1;
+    }
+
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   (<reg_exp>):
+        SUBEXP
+           |
+       <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (regexp, preg, token, syntax, nest, err)
+     re_string_t *regexp;
+     regex_t *preg;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     int nest;
+     reg_errcode_t *err;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *left_par, *right_par;
+  size_t cur_nsub;
+  int new_idx;
+  cur_nsub = preg->re_nsub++;
+  if (dfa->subexps_alloc < preg->re_nsub)
+    {
+      re_subexp_t *new_array;
+      dfa->subexps_alloc *= 2;
+      new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
+      if (BE (new_array == NULL, 0))
+       {
+         dfa->subexps_alloc /= 2;
+         *err = REG_ESPACE;
+         return NULL;
+       }
+      dfa->subexps = new_array;
+    }
+  dfa->subexps[cur_nsub].start = dfa->nodes_len;
+  dfa->subexps[cur_nsub].end = -1;
+
+  new_idx = re_dfa_add_node (dfa, *token, 0);
+  left_par = create_tree (NULL, NULL, 0, new_idx);
+  if (BE (new_idx == -1 || left_par == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  dfa->nodes[new_idx].opr.idx = cur_nsub;
+  *token = fetch_token (regexp, syntax);
+
+  /* The subexpression may be a null string.  */
+  if (token->type == OP_CLOSE_SUBEXP)
+    tree = NULL;
+  else
+    {
+      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+       return NULL;
+    }
+  if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+    {
+      free_bin_tree (tree);
+      *err = REG_BADPAT;
+      return NULL;
+    }
+  new_idx = re_dfa_add_node (dfa, *token, 0);
+  dfa->subexps[cur_nsub].end = dfa->nodes_len;
+  right_par = create_tree (NULL, NULL, 0, new_idx);
+  tree = ((tree == NULL) ? right_par
+         : create_tree (tree, right_par, CONCAT, 0));
+  tree = create_tree (left_par, tree, CONCAT, 0);
+  if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  dfa->nodes[new_idx].opr.idx = cur_nsub;
+
+  return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
+
+static bin_tree_t *
+parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
+     bin_tree_t *dup_elem;
+     re_string_t *regexp;
+     re_dfa_t *dfa;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     reg_errcode_t *err;
+{
+  re_token_t dup_token;
+  bin_tree_t *tree = dup_elem, *work_tree;
+  int new_idx, start_idx = re_string_cur_idx (regexp);
+  re_token_t start_token = *token;
+  if (token->type == OP_OPEN_DUP_NUM)
+    {
+      int i;
+      int end = 0;
+      int start = fetch_number (regexp, token, syntax);
+      bin_tree_t *elem;
+      if (start == -1)
+       {
+         if (token->type == CHARACTER && token->opr.c == ',')
+           start = 0; /* We treat "{,m}" as "{0,m}".  */
+         else
+           {
+             *err = REG_BADBR; /* <re>{} is invalid.  */
+             return NULL;
+           }
+       }
+      if (BE (start != -2, 1))
+       {
+         /* We treat "{n}" as "{n,n}".  */
+         end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+                : ((token->type == CHARACTER && token->opr.c == ',')
+                   ? fetch_number (regexp, token, syntax) : -2));
+       }
+      if (BE (start == -2 || end == -2, 0))
+       {
+         /* Invalid sequence.  */
+         if (token->type == OP_CLOSE_DUP_NUM)
+           goto parse_dup_op_invalid_interval;
+         else
+           goto parse_dup_op_ebrace;
+       }
+      if (BE (start == 0 && end == 0, 0))
+       {
+         /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
+         *token = fetch_token (regexp, syntax);
+         free_bin_tree (dup_elem);
+         return NULL;
+       }
+
+      /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+      elem = tree;
+      for (i = 0; i < start; ++i)
+       if (i != 0)
+         {
+           work_tree = duplicate_tree (elem, dfa);
+           tree = create_tree (tree, work_tree, CONCAT, 0);
+           if (BE (work_tree == NULL || tree == NULL, 0))
+             goto parse_dup_op_espace;
+         }
+
+      if (end == -1)
+       {
+         /* We treat "<re>{0,}" as "<re>*".  */
+         dup_token.type = OP_DUP_ASTERISK;
+         if (start > 0)
+           {
+             elem = duplicate_tree (elem, dfa);
+             new_idx = re_dfa_add_node (dfa, dup_token, 0);
+             work_tree = create_tree (elem, NULL, 0, new_idx);
+             tree = create_tree (tree, work_tree, CONCAT, 0);
+             if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
+                     || tree == NULL, 0))
+               goto parse_dup_op_espace;
+           }
+         else
+           {
+             new_idx = re_dfa_add_node (dfa, dup_token, 0);
+             tree = create_tree (elem, NULL, 0, new_idx);
+             if (BE (new_idx == -1 || tree == NULL, 0))
+               goto parse_dup_op_espace;
+           }
+       }
+      else if (end - start > 0)
+       {
+         /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?".  */
+         dup_token.type = OP_DUP_QUESTION;
+         if (start > 0)
+           {
+             elem = duplicate_tree (elem, dfa);
+             new_idx = re_dfa_add_node (dfa, dup_token, 0);
+             elem = create_tree (elem, NULL, 0, new_idx);
+             tree = create_tree (tree, elem, CONCAT, 0);
+             if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
+               goto parse_dup_op_espace;
+           }
+         else
+           {
+             new_idx = re_dfa_add_node (dfa, dup_token, 0);
+             tree = elem = create_tree (elem, NULL, 0, new_idx);
+             if (BE (new_idx == -1 || tree == NULL, 0))
+               goto parse_dup_op_espace;
+           }
+         for (i = 1; i < end - start; ++i)
+           {
+             work_tree = duplicate_tree (elem, dfa);
+             tree = create_tree (tree, work_tree, CONCAT, 0);
+             if (BE (work_tree == NULL || tree == NULL, 0))
+               {
+                 *err = REG_ESPACE;
+                 return NULL;
+               }
+           }
+       }
+    }
+  else
+    {
+      new_idx = re_dfa_add_node (dfa, *token, 0);
+      tree = create_tree (tree, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || tree == NULL, 0))
+       {
+         *err = REG_ESPACE;
+         return NULL;
+       }
+    }
+  *token = fetch_token (regexp, syntax);
+  return tree;
+
+ parse_dup_op_espace:
+  free_bin_tree (tree);
+  *err = REG_ESPACE;
+  return NULL;
+
+ parse_dup_op_ebrace:
+  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+    {
+      *err = REG_EBRACE;
+      return NULL;
+    }
+  goto parse_dup_op_rollback;
+ parse_dup_op_invalid_interval:
+  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+    {
+      *err = REG_BADBR;
+      return NULL;
+    }
+ parse_dup_op_rollback:
+  re_string_set_index (regexp, start_idx);
+  *token = start_token;
+  token->type = CHARACTER;
+  return dup_elem;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+   I'm not sure, but maybe enough.  */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+     re_charset_t *mbcset;
+     int *range_alloc;
+# else /* not RE_ENABLE_I18N */
+build_range_exp (sbcset, start_elem, end_elem)
+# endif /* not RE_ENABLE_I18N */
+     re_bitset_ptr_t sbcset;
+     bracket_elem_t *start_elem, *end_elem;
+{
+  unsigned int start_ch, end_ch;
+  /* Equivalence Classes and Character Classes can't be a range start/end.  */
+  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+         || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+         0))
+    return REG_ERANGE;
+
+  /* We can handle no multi character collating elements without libc
+     support.  */
+  if (BE ((start_elem->type == COLL_SYM
+          && strlen ((char *) start_elem->opr.name) > 1)
+         || (end_elem->type == COLL_SYM
+             && strlen ((char *) end_elem->opr.name) > 1), 0))
+    return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+  {
+    wchar_t wc, start_wc, end_wc;
+    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+               : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+                  : 0));
+    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+             : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+                : 0));
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+               ? __btowc (start_ch) : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+             ? __btowc (end_ch) : end_elem->opr.wch);
+    cmp_buf[0] = start_wc;
+    cmp_buf[4] = end_wc;
+    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+      return REG_ERANGE;
+
+    /* Check the space of the arrays.  */
+    if (*range_alloc == mbcset->nranges)
+      {
+       /* There are not enough space, need realloc.  */
+       wchar_t *new_array_start, *new_array_end;
+       int new_nranges;
+
+       /* +1 in case of mbcset->nranges is 0.  */
+       new_nranges = 2 * mbcset->nranges + 1;
+       /* Use realloc since mbcset->range_starts and mbcset->range_ends
+          are NULL if *range_alloc == 0.  */
+       new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+                                     new_nranges);
+       new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+                                   new_nranges);
+
+       if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+         return REG_ESPACE;
+
+       mbcset->range_starts = new_array_start;
+       mbcset->range_ends = new_array_end;
+       *range_alloc = new_nranges;
+      }
+
+    mbcset->range_starts[mbcset->nranges] = start_wc;
+    mbcset->range_ends[mbcset->nranges++] = end_wc;
+
+    /* Build the table for single byte characters.  */
+    for (wc = 0; wc <= SBC_MAX; ++wc)
+      {
+       cmp_buf[2] = wc;
+       if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+           && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+         bitset_set (sbcset, wc);
+      }
+  }
+# else /* not RE_ENABLE_I18N */
+  {
+    unsigned int ch;
+    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+               : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+                  : 0));
+    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+             : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+                : 0));
+    if (start_ch > end_ch)
+      return REG_ERANGE;
+    /* Build the table for single byte characters.  */
+    for (ch = 0; ch <= SBC_MAX; ++ch)
+      if (start_ch <= ch  && ch <= end_ch)
+       bitset_set (sbcset, ch);
+  }
+# endif /* not RE_ENABLE_I18N */
+  return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+     re_charset_t *mbcset;
+     int *coll_sym_alloc;
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (sbcset, name)
+# endif /* not RE_ENABLE_I18N */
+     re_bitset_ptr_t sbcset;
+     const unsigned char *name;
+{
+  size_t name_len = strlen ((const char *) name);
+  if (BE (name_len != 1, 0))
+    return REG_ECOLLATE;
+  else
+    {
+      bitset_set (sbcset, name[0]);
+      return REG_NOERROR;
+    }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
+
+static bin_tree_t *
+parse_bracket_exp (regexp, dfa, token, syntax, err)
+     re_string_t *regexp;
+     re_dfa_t *dfa;
+     re_token_t *token;
+     reg_syntax_t syntax;
+     reg_errcode_t *err;
+{
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Seek the collating symbol entry correspondings to NAME.
+     Return the index of the symbol in the SYMB_TABLE.  */
+
+  static inline int32_t
+  seek_collating_symbol_entry (name, name_len)
+        const unsigned char *name;
+        size_t name_len;
+    {
+      int32_t hash = elem_hash ((const char *) name, name_len);
+      int32_t elem = hash % table_size;
+      int32_t second = hash % (table_size - 2);
+      while (symb_table[2 * elem] != 0)
+       {
+         /* First compare the hashing value.  */
+         if (symb_table[2 * elem] == hash
+             /* Compare the length of the name.  */
+             && name_len == extra[symb_table[2 * elem + 1]]
+             /* Compare the name.  */
+             && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+                        name_len) == 0)
+           {
+             /* Yep, this is the entry.  */
+             break;
+           }
+
+         /* Next entry.  */
+         elem += second;
+       }
+      return elem;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  static inline unsigned int
+  lookup_collation_sequence_value (br_elem)
+        bracket_elem_t *br_elem;
+    {
+      if (br_elem->type == SB_CHAR)
+       {
+         /*
+         if (MB_CUR_MAX == 1)
+         */
+         if (nrules == 0)
+           return collseqmb[br_elem->opr.ch];
+         else
+           {
+             wint_t wc = __btowc (br_elem->opr.ch);
+             return collseq_table_lookup (collseqwc, wc);
+           }
+       }
+      else if (br_elem->type == MB_CHAR)
+       {
+         return collseq_table_lookup (collseqwc, br_elem->opr.wch);
+       }
+      else if (br_elem->type == COLL_SYM)
+       {
+         size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+         if (nrules != 0)
+           {
+             int32_t elem, idx;
+             elem = seek_collating_symbol_entry (br_elem->opr.name,
+                                                 sym_name_len);
+             if (symb_table[2 * elem] != 0)
+               {
+                 /* We found the entry.  */
+                 idx = symb_table[2 * elem + 1];
+                 /* Skip the name of collating element name.  */
+                 idx += 1 + extra[idx];
+                 /* Skip the byte sequence of the collating element.  */
+                 idx += 1 + extra[idx];
+                 /* Adjust for the alignment.  */
+                 idx = (idx + 3) & ~3;
+                 /* Skip the multibyte collation sequence value.  */
+                 idx += sizeof (unsigned int);
+                 /* Skip the wide char sequence of the collating element.  */
+                 idx += sizeof (unsigned int) *
+                   (1 + *(unsigned int *) (extra + idx));
+                 /* Return the collation sequence value.  */
+                 return *(unsigned int *) (extra + idx);
+               }
+             else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+               {
+                 /* No valid character.  Match it as a single byte
+                    character.  */
+                 return collseqmb[br_elem->opr.name[0]];
+               }
+           }
+         else if (sym_name_len == 1)
+           return collseqmb[br_elem->opr.name[0]];
+       }
+      return UINT_MAX;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+  static inline reg_errcode_t
+# ifdef RE_ENABLE_I18N
+  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+        re_charset_t *mbcset;
+        int *range_alloc;
+# else /* not RE_ENABLE_I18N */
+  build_range_exp (sbcset, start_elem, end_elem)
+# endif /* not RE_ENABLE_I18N */
+        re_bitset_ptr_t sbcset;
+        bracket_elem_t *start_elem, *end_elem;
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+# ifdef RE_ENABLE_I18N
+      /* Check the space of the arrays.  */
+      if (*range_alloc == mbcset->nranges)
+       {
+         /* There are not enough space, need realloc.  */
+         uint32_t *new_array_start;
+         uint32_t *new_array_end;
+         int new_nranges;
+
+         /* +1 in case of mbcset->nranges is 0.  */
+         new_nranges = 2 * mbcset->nranges + 1;
+         /* Use realloc since mbcset->range_starts and mbcset->range_ends
+            are NULL if *range_alloc == 0.  */
+         new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+                                       new_nranges);
+         new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+                                     new_nranges);
+
+         if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+           return REG_ESPACE;
+
+         mbcset->range_starts = new_array_start;
+         mbcset->range_ends = new_array_end;
+         *range_alloc = new_nranges;
+       }
+# endif /* RE_ENABLE_I18N */
+
+      /* Equivalence Classes and Character Classes can't be a range
+        start/end.  */
+      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+             || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+             0))
+       return REG_ERANGE;
+
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+       return REG_ECOLLATE;
+      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+       return REG_ERANGE;
+
+# ifdef RE_ENABLE_I18N
+      /* Got valid collation sequence values, add them as a new entry.  */
+      mbcset->range_starts[mbcset->nranges] = start_collseq;
+      mbcset->range_ends[mbcset->nranges++] = end_collseq;
+# endif /* RE_ENABLE_I18N */
+
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch <= SBC_MAX; ch++)
+       {
+         uint32_t ch_collseq;
+         /*
+         if (MB_CUR_MAX == 1)
+         */
+         if (nrules == 0)
+           ch_collseq = collseqmb[ch];
+         else
+           ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch));
+         if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+           bitset_set (sbcset, ch);
+       }
+      return REG_NOERROR;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument sinse we may update it.  */
+
+  static inline reg_errcode_t
+# ifdef RE_ENABLE_I18N
+  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+        re_charset_t *mbcset;
+        int *coll_sym_alloc;
+# else /* not RE_ENABLE_I18N */
+  build_collating_symbol (sbcset, name)
+# endif /* not RE_ENABLE_I18N */
+        re_bitset_ptr_t sbcset;
+        const unsigned char *name;
+    {
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
+       {
+         elem = seek_collating_symbol_entry (name, name_len);
+         if (symb_table[2 * elem] != 0)
+           {
+             /* We found the entry.  */
+             idx = symb_table[2 * elem + 1];
+             /* Skip the name of collating element name.  */
+             idx += 1 + extra[idx];
+           }
+         else if (symb_table[2 * elem] == 0 && name_len == 1)
+           {
+             /* No valid character, treat it as a normal
+                character.  */
+             bitset_set (sbcset, name[0]);
+             return REG_NOERROR;
+           }
+         else
+           return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+         /* Got valid collation sequence, add it as a new entry.  */
+         /* Check the space of the arrays.  */
+         if (*coll_sym_alloc == mbcset->ncoll_syms)
+           {
+             /* Not enough, realloc it.  */
+             /* +1 in case of mbcset->ncoll_syms is 0.  */
+             *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+             /* Use realloc since mbcset->coll_syms is NULL
+                if *alloc == 0.  */
+             mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+                                             *coll_sym_alloc);
+             if (BE (mbcset->coll_syms == NULL, 0))
+               return REG_ESPACE;
+           }
+         mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+# endif /* RE_ENABLE_I18N */
+         return REG_NOERROR;
+       }
+      else
+       {
+         if (BE (name_len != 1, 0))
+           return REG_ECOLLATE;
+         else
+           {
+             bitset_set (sbcset, name[0]);
+             return REG_NOERROR;
+           }
+       }
+    }
+#endif
+
+  re_token_t br_token;
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+  int equiv_class_alloc = 0, char_class_alloc = 0;
+#else /* not RE_ENABLE_I18N */
+  int non_match = 0;
+#endif /* not RE_ENABLE_I18N */
+  bin_tree_t *work_tree;
+  int token_len, new_idx;
+#ifdef _LIBC
+  collseqmb = (const unsigned char *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules)
+    {
+      /*
+      if (MB_CUR_MAX > 1)
+      */
+       collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+                                                 _NL_COLLATE_SYMB_TABLEMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+                                                  _NL_COLLATE_SYMB_EXTRAMB);
+    }
+#endif
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+  if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  token_len = peek_token_bracket (token, regexp, syntax);
+  if (BE (token->type == END_OF_RE, 0))
+    {
+      *err = REG_BADPAT;
+      goto parse_bracket_exp_free_return;
+    }
+  if (token->type == OP_NON_MATCH_LIST)
+    {
+#ifdef RE_ENABLE_I18N
+      int i;
+      mbcset->non_match = 1;
+#else /* not RE_ENABLE_I18N */
+      non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+       bitset_set (sbcset, '\0');
+      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+       {
+         *err = REG_BADPAT;
+         goto parse_bracket_exp_free_return;
+       }
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       for (i = 0; i < SBC_MAX; ++i)
+         if (__btowc (i) == WEOF)
+           bitset_set (sbcset, i);
+#endif /* RE_ENABLE_I18N */
+    }
+
+  /* We treat the first ']' as a normal character.  */
+  if (token->type == OP_CLOSE_BRACKET)
+    token->type = CHARACTER;
+
+  while (1)
+    {
+      bracket_elem_t start_elem, end_elem;
+      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+      reg_errcode_t ret;
+      int token_len2 = 0, is_range_exp = 0;
+      re_token_t token2;
+
+      start_elem.opr.name = start_name_buf;
+      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+                                  syntax);
+      if (BE (ret != REG_NOERROR, 0))
+       {
+         *err = ret;
+         goto parse_bracket_exp_free_return;
+       }
+
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+       {
+         *err = REG_BADPAT;
+         goto parse_bracket_exp_free_return;
+       }
+      if (token->type == OP_CHARSET_RANGE)
+       {
+         re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
+         token_len2 = peek_token_bracket (&token2, regexp, syntax);
+         if (BE (token->type == END_OF_RE, 0))
+           {
+             *err = REG_BADPAT;
+             goto parse_bracket_exp_free_return;
+           }
+         if (token2.type == OP_CLOSE_BRACKET)
+           {
+             /* We treat the last '-' as a normal character.  */
+             re_string_skip_bytes (regexp, -token_len);
+             token->type = CHARACTER;
+           }
+         else
+           is_range_exp = 1;
+       }
+
+      if (is_range_exp == 1)
+       {
+         end_elem.opr.name = end_name_buf;
+         ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+                                      dfa, syntax);
+         if (BE (ret != REG_NOERROR, 0))
+           {
+             *err = ret;
+             goto parse_bracket_exp_free_return;
+           }
+
+         token_len = peek_token_bracket (token, regexp, syntax);
+         if (BE (token->type == END_OF_RE, 0))
+           {
+             *err = REG_BADPAT;
+             goto parse_bracket_exp_free_return;
+           }
+         *err = build_range_exp (sbcset,
+#ifdef RE_ENABLE_I18N
+                                 mbcset, &range_alloc,
+#endif /* RE_ENABLE_I18N */
+                                 &start_elem, &end_elem);
+         if (BE (*err != REG_NOERROR, 0))
+           goto parse_bracket_exp_free_return;
+       }
+      else
+       {
+         switch (start_elem.type)
+           {
+           case SB_CHAR:
+             bitset_set (sbcset, start_elem.opr.ch);
+             break;
+#ifdef RE_ENABLE_I18N
+           case MB_CHAR:
+             /* Check whether the array has enough space.  */
+             if (mbchar_alloc == mbcset->nmbchars)
+               {
+                 /* Not enough, realloc it.  */
+                 /* +1 in case of mbcset->nmbchars is 0.  */
+                 mbchar_alloc = 2 * mbcset->nmbchars + 1;
+                 /* Use realloc since array is NULL if *alloc == 0.  */
+                 mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
+                                               mbchar_alloc);
+                 if (BE (mbcset->mbchars == NULL, 0))
+                   goto parse_bracket_exp_espace;
+               }
+             mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+             break;
+#endif /* RE_ENABLE_I18N */
+           case EQUIV_CLASS:
+             *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+                                       mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+                                       start_elem.opr.name);
+             if (BE (*err != REG_NOERROR, 0))
+               goto parse_bracket_exp_free_return;
+             break;
+           case COLL_SYM:
+             *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+                                            mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+                                            start_elem.opr.name);
+             if (BE (*err != REG_NOERROR, 0))
+               goto parse_bracket_exp_free_return;
+             break;
+           case CHAR_CLASS:
+             *err = build_charclass (sbcset,
+#ifdef RE_ENABLE_I18N
+                                     mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+                                     start_elem.opr.name, syntax);
+             if (BE (*err != REG_NOERROR, 0))
+              goto parse_bracket_exp_free_return;
+             break;
+           default:
+             assert (0);
+             break;
+           }
+       }
+      if (token->type == OP_CLOSE_BRACKET)
+       break;
+    }
+
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+
+  /* If it is non-matching list.  */
+#ifdef RE_ENABLE_I18N
+  if (mbcset->non_match)
+#else /* not RE_ENABLE_I18N */
+  if (non_match)
+#endif /* not RE_ENABLE_I18N */
+    bitset_not (sbcset);
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  new_idx = re_dfa_add_node (dfa, br_token, 0);
+  work_tree = create_tree (NULL, NULL, 0, new_idx);
+  if (BE (new_idx == -1 || work_tree == NULL, 0))
+    goto parse_bracket_exp_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+      || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes
+                                               || mbcset->non_match)))
+    {
+      re_token_t alt_token;
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      new_idx = re_dfa_add_node (dfa, br_token, 0);
+      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+       goto parse_bracket_exp_espace;
+      /* Then join them by ALT node.  */
+      dfa->has_plural_match = 1;
+      alt_token.type = OP_ALT;
+      new_idx = re_dfa_add_node (dfa, alt_token, 0);
+      work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
+      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+       return work_tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return work_tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return work_tree;
+#endif /* not RE_ENABLE_I18N */
+
+ parse_bracket_exp_espace:
+  *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  return NULL;
+}
+
+/* Parse an element in the bracket expression.  */
+
+static reg_errcode_t
+parse_bracket_element (elem, regexp, token, token_len, dfa, syntax)
+     bracket_elem_t *elem;
+     re_string_t *regexp;
+     re_token_t *token;
+     int token_len;
+     re_dfa_t *dfa;
+     reg_syntax_t syntax;
+{
+#ifdef RE_ENABLE_I18N
+  int cur_char_size;
+  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+  if (cur_char_size > 1)
+    {
+      elem->type = MB_CHAR;
+      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+      re_string_skip_bytes (regexp, cur_char_size);
+      return REG_NOERROR;
+    }
+#endif /* RE_ENABLE_I18N */
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+      || token->type == OP_OPEN_EQUIV_CLASS)
+    return parse_bracket_symbol (elem, regexp, token);
+  elem->type = SB_CHAR;
+  elem->opr.ch = token->opr.c;
+  return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
+   such as [:<character_class>:], [.<collating_element>.], and
+   [=<equivalent_class>=].  */
+
+static reg_errcode_t
+parse_bracket_symbol (elem, regexp, token)
+     bracket_elem_t *elem;
+     re_string_t *regexp;
+     re_token_t *token;
+{
+  unsigned char ch, delim = token->opr.c;
+  int i = 0;
+  for (;; ++i)
+    {
+      if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE)
+       return REG_EBRACK;
+      if (token->type == OP_OPEN_CHAR_CLASS)
+       ch = re_string_fetch_byte_case (regexp);
+      else
+       ch = re_string_fetch_byte (regexp);
+      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+       break;
+      elem->opr.name[i] = ch;
+    }
+  re_string_skip_bytes (regexp, 1);
+  elem->opr.name[i] = '\0';
+  switch (token->type)
+    {
+    case OP_OPEN_COLL_ELEM:
+      elem->type = COLL_SYM;
+      break;
+    case OP_OPEN_EQUIV_CLASS:
+      elem->type = EQUIV_CLASS;
+      break;
+    case OP_OPEN_CHAR_CLASS:
+      elem->type = CHAR_CLASS;
+      break;
+    default:
+      break;
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the equivalence class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
+     re_charset_t *mbcset;
+     int *equiv_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (sbcset, name)
+#endif /* not RE_ENABLE_I18N */
+     re_bitset_ptr_t sbcset;
+     const unsigned char *name;
+{
+#if defined _LIBC && defined RE_ENABLE_I18N
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules != 0)
+    {
+      const int32_t *table, *indirect;
+      const unsigned char *weights, *extra, *cp;
+      unsigned char char_buf[2];
+      int32_t idx1, idx2;
+      unsigned int ch;
+      size_t len;
+      /* This #include defines a local function!  */
+# include <locale/weight.h>
+      /* Calculate the index for equivalence class.  */
+      cp = name;
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+                                              _NL_COLLATE_WEIGHTMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+                                                  _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+                                               _NL_COLLATE_INDIRECTMB);
+      idx1 = findidx (&cp);
+      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+       /* This isn't a valid character.  */
+       return REG_ECOLLATE;
+
+      /* Build single byte matcing table for this equivalence class.  */
+      char_buf[1] = (unsigned char) '\0';
+      len = weights[idx1];
+      for (ch = 0; ch < SBC_MAX; ++ch)
+       {
+         char_buf[0] = ch;
+         cp = char_buf;
+         idx2 = findidx (&cp);
+/*
+         idx2 = table[ch];
+*/
+         if (idx2 == 0)
+           /* This isn't a valid character.  */
+           continue;
+         if (len == weights[idx2])
+           {
+             int cnt = 0;
+             while (cnt <= len &&
+                    weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+               ++cnt;
+
+             if (cnt > len)
+               bitset_set (sbcset, ch);
+           }
+       }
+      /* Check whether the array has enough space.  */
+      if (*equiv_class_alloc == mbcset->nequiv_classes)
+       {
+         /* Not enough, realloc it.  */
+         /* +1 in case of mbcset->nequiv_classes is 0.  */
+         *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+         /* Use realloc since the array is NULL if *alloc == 0.  */
+         mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
+                                             *equiv_class_alloc);
+         if (BE (mbcset->equiv_classes == NULL, 0))
+           return REG_ESPACE;
+       }
+      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+    }
+  else
+#endif /* _LIBC && RE_ENABLE_I18N */
+    {
+      if (BE (strlen ((const char *) name) != 1, 0))
+       return REG_ECOLLATE;
+      bitset_set (sbcset, *name);
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the character class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax)
+     re_charset_t *mbcset;
+     int *char_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_charclass (sbcset, class_name, syntax)
+#endif /* not RE_ENABLE_I18N */
+     re_bitset_ptr_t sbcset;
+     const unsigned char *class_name;
+     reg_syntax_t syntax;
+{
+  int i;
+  const char *name = (const char *) class_name;
+
+  /* In case of REG_ICASE "upper" and "lower" match the both of
+     upper and lower cases.  */
+  if ((syntax & RE_ICASE)
+      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+    name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+  /* Check the space of the arrays.  */
+  if (*char_class_alloc == mbcset->nchar_classes)
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      *char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
+                                        *char_class_alloc);
+      if (BE (mbcset->char_classes == NULL, 0))
+       return REG_ESPACE;
+    }
+  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)\
+    for (i = 0; i < SBC_MAX; ++i)      \
+      {                                        \
+       if (ctype_func (i))             \
+         bitset_set (sbcset, i);       \
+      }
+
+  if (strcmp (name, "alnum") == 0)
+    BUILD_CHARCLASS_LOOP (isalnum)
+  else if (strcmp (name, "cntrl") == 0)
+    BUILD_CHARCLASS_LOOP (iscntrl)
+  else if (strcmp (name, "lower") == 0)
+    BUILD_CHARCLASS_LOOP (islower)
+  else if (strcmp (name, "space") == 0)
+    BUILD_CHARCLASS_LOOP (isspace)
+  else if (strcmp (name, "alpha") == 0)
+    BUILD_CHARCLASS_LOOP (isalpha)
+  else if (strcmp (name, "digit") == 0)
+    BUILD_CHARCLASS_LOOP (isdigit)
+  else if (strcmp (name, "print") == 0)
+    BUILD_CHARCLASS_LOOP (isprint)
+  else if (strcmp (name, "upper") == 0)
+    BUILD_CHARCLASS_LOOP (isupper)
+  else if (strcmp (name, "blank") == 0)
+    BUILD_CHARCLASS_LOOP (isblank)
+  else if (strcmp (name, "graph") == 0)
+    BUILD_CHARCLASS_LOOP (isgraph)
+  else if (strcmp (name, "punct") == 0)
+    BUILD_CHARCLASS_LOOP (ispunct)
+  else if (strcmp (name, "xdigit") == 0)
+    BUILD_CHARCLASS_LOOP (isxdigit)
+  else
+    return REG_ECTYPE;
+
+  return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_word_op (dfa, not, err)
+     re_dfa_t *dfa;
+     int not;
+     reg_errcode_t *err;
+{
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int alloc = 0;
+#else /* not RE_ENABLE_I18N */
+  int non_match = 0;
+#endif /* not RE_ENABLE_I18N */
+  reg_errcode_t ret;
+  re_token_t br_token;
+  bin_tree_t *tree;
+  int new_idx;
+
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+  if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  if (not)
+    {
+#ifdef RE_ENABLE_I18N
+      int i;
+      /*
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+       bitset_set(cset->sbcset, '\0');
+      */
+      mbcset->non_match = 1;
+      if (MB_CUR_MAX > 1)
+       for (i = 0; i < SBC_MAX; ++i)
+         if (__btowc (i) == WEOF)
+           bitset_set (sbcset, i);
+#else /* not RE_ENABLE_I18N */
+      non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+    }
+
+  /* We don't care the syntax in this case.  */
+  ret = build_charclass (sbcset,
+#ifdef RE_ENABLE_I18N
+                        mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+                        (const unsigned char *) "alpha", 0);
+
+  if (BE (ret != REG_NOERROR, 0))
+    {
+      re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+      *err = ret;
+      return NULL;
+    }
+  /* \w match '_' also.  */
+  bitset_set (sbcset, '_');
+
+  /* If it is non-matching list.  */
+#ifdef RE_ENABLE_I18N
+  if (mbcset->non_match)
+#else /* not RE_ENABLE_I18N */
+  if (non_match)
+#endif /* not RE_ENABLE_I18N */
+    bitset_not (sbcset);
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  new_idx = re_dfa_add_node (dfa, br_token, 0);
+  tree = create_tree (NULL, NULL, 0, new_idx);
+  if (BE (new_idx == -1 || tree == NULL, 0))
+    goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
+    {
+      re_token_t alt_token;
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      new_idx = re_dfa_add_node (dfa, br_token, 0);
+      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
+      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+       goto build_word_op_espace;
+      /* Then join them by ALT node.  */
+      alt_token.type = OP_ALT;
+      new_idx = re_dfa_add_node (dfa, alt_token, 0);
+      tree = create_tree (tree, mbc_tree, 0, new_idx);
+      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+       return tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+   Fetch a number from `input', and return the number.
+   Return -1, if the number field is empty like "{,1}".
+   Return -2, If an error is occured.  */
+
+static int
+fetch_number (input, token, syntax)
+     re_string_t *input;
+     re_token_t *token;
+     reg_syntax_t syntax;
+{
+  int num = -1;
+  unsigned char c;
+  while (1)
+    {
+      *token = fetch_token (input, syntax);
+      c = token->opr.c;
+      if (BE (token->type == END_OF_RE, 0))
+       return -2;
+      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+       break;
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+            ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+      num = (num > RE_DUP_MAX) ? -2 : num;
+    }
+  return num;
+}
+\f
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+  re_free (cset->mbchars);
+# ifdef _LIBC
+  re_free (cset->coll_syms);
+  re_free (cset->equiv_classes);
+  re_free (cset->range_starts);
+  re_free (cset->range_ends);
+# endif
+  re_free (cset->char_classes);
+  re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+\f
+/* Functions for binary tree operation.  */
+
+/* Create a node of tree.
+   Note: This function automatically free left and right if malloc fails.  */
+
+static bin_tree_t *
+create_tree (left, right, type, index)
+     bin_tree_t *left;
+     bin_tree_t *right;
+     re_token_type_t type;
+     int index;
+{
+  bin_tree_t *tree;
+  tree = re_malloc (bin_tree_t, 1);
+  if (BE (tree == NULL, 0))
+    {
+      free_bin_tree (left);
+      free_bin_tree (right);
+      return NULL;
+    }
+  tree->parent = NULL;
+  tree->left = left;
+  tree->right = right;
+  tree->type = type;
+  tree->node_idx = index;
+  tree->first = -1;
+  tree->next = -1;
+  re_node_set_init_empty (&tree->eclosure);
+
+  if (left != NULL)
+    left->parent = tree;
+  if (right != NULL)
+    right->parent = tree;
+  return tree;
+}
+
+/* Free the sub tree pointed by TREE.  */
+
+static void
+free_bin_tree (tree)
+     bin_tree_t *tree;
+{
+  if (tree == NULL)
+    return;
+  /*re_node_set_free (&tree->eclosure);*/
+  free_bin_tree (tree->left);
+  free_bin_tree (tree->right);
+  re_free (tree);
+}
+
+/* Duplicate the node SRC, and return new node.  */
+
+static bin_tree_t *
+duplicate_tree (src, dfa)
+     const bin_tree_t *src;
+     re_dfa_t *dfa;
+{
+  bin_tree_t *left = NULL, *right = NULL, *new_tree;
+  int new_node_idx;
+  /* Since node indies must be according to Post-order of the tree,
+     we must duplicate the left at first.  */
+  if (src->left != NULL)
+    {
+      left = duplicate_tree (src->left, dfa);
+      if (left == NULL)
+       return NULL;
+    }
+
+  /* Secondaly, duplicate the right.  */
+  if (src->right != NULL)
+    {
+      right = duplicate_tree (src->right, dfa);
+      if (right == NULL)
+       {
+         free_bin_tree (left);
+         return NULL;
+       }
+    }
+
+  /* At last, duplicate itself.  */
+  if (src->type == NON_TYPE)
+    {
+      new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
+      dfa->nodes[new_node_idx].duplicated = 1;
+      if (BE (new_node_idx == -1, 0))
+       {
+         free_bin_tree (left);
+         free_bin_tree (right);
+         return NULL;
+       }
+    }
+  else
+    new_node_idx = src->type;
+
+  new_tree = create_tree (left, right, src->type, new_node_idx);
+  if (BE (new_tree == NULL, 0))
+    {
+      free_bin_tree (left);
+      free_bin_tree (right);
+    }
+  return new_tree;
+}
diff --git a/lib/regex/regex-sh.h b/lib/regex/regex-sh.h

new file mode 100644 (file)

index 0000000..723d2d8
--- /dev/null
+++ b/lib/regex/regex-sh.h
@@ -0,0 +1,24 @@
+/*
+ *  Regular Expression Functions from glibc 2.3.2
+ *  (renamed to sh_* to avoid clashes with the system libraries)
+ */
+
+#ifndef _UCW_REGEX_H
+#define _UCW_REGEX_H
+
+#define regfree sh_regfree
+#define regexec sh_regexec
+#define regcomp sh_regcomp
+#define regerror sh_regerror
+#define re_set_registers sh_re_set_registers
+#define re_match_2 sh_re_match2
+#define re_match sh_re_match
+#define re_search sh_re_search
+#define re_compile_pattern sh_re_compile_pattern
+#define re_set_syntax sh_re_set_syntax
+#define re_search_2 sh_re_search_2
+#define re_compile_fastmap sh_re_compile_fastmap
+
+#include "lib/regex/regex.h"
+
+#endif
diff --git a/lib/regex/regex.c b/lib/regex/regex.c

new file mode 100644 (file)

index 0000000..7cc5237
--- /dev/null
+++ b/lib/regex/regex.c
@@ -0,0 +1,10 @@
+/*
+ *  Regular Expression Functions from glibc 2.3.2
+ */
+
+#include <sys/types.h>
+#include "regex-sh.h"
+#include "regex_internal.h"
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
diff --git a/lib/regex/regex.h b/lib/regex/regex.h

new file mode 100644 (file)

index 0000000..fac441d
--- /dev/null
+++ b/lib/regex/regex.h
@@ -0,0 +1,574 @@
+/* Definitions for data structures and routines for the regular
+   expression library.
+   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */
+
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+# include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically,
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+\f
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK                                                  \
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL                    \
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS                      \
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES                 \
+   | RE_DOT_NEWLINE              | RE_CONTEXT_INDEP_ANCHORS            \
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK                                              \
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)        \
+   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS           \
+       | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK                                            \
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS             \
+   | RE_INTERVALS          | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP                                                 \
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES                                \
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS                           \
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP                                                        \
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS                   \
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE                   \
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS                            \
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP                                          \
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES                    \
+   | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON                                                \
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL             \
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC                                          \
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC                                  \
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED                                       \
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS                 \
+   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES                          \
+   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR                            \
+   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+   removed and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED                               \
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS                 \
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES                          \
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS                            \
+   | RE_NO_BK_VBAR         | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+\f
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+  REG_ENOSYS = -1,     /* This will never happen for this implementation.  */
+#endif
+
+  REG_NOERROR = 0,     /* Success.  */
+  REG_NOMATCH,         /* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,          /* Invalid pattern.  */
+  REG_ECOLLATE,                /* Not implemented.  */
+  REG_ECTYPE,          /* Invalid character class name.  */
+  REG_EESCAPE,         /* Trailing backslash.  */
+  REG_ESUBREG,         /* Invalid back reference.  */
+  REG_EBRACK,          /* Unmatched left bracket.  */
+  REG_EPAREN,          /* Parenthesis imbalance.  */
+  REG_EBRACE,          /* Unmatched \{.  */
+  REG_BADBR,           /* Invalid contents of \{\}.  */
+  REG_ERANGE,          /* Invalid range end.  */
+  REG_ESPACE,          /* Ran out of memory.  */
+  REG_BADRPT,          /* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,            /* Premature end.  */
+  REG_ESIZE,           /* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN          /* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+\f
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+       /* Space that holds the compiled pattern.  It is declared as
+          `unsigned char *' because its elements are
+           sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+       /* Number of bytes to which `buffer' points.  */
+  unsigned long int allocated;
+
+       /* Number of bytes actually used in `buffer'.  */
+  unsigned long int used;
+
+        /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+           the fastmap, if there is one, to skip over impossible
+           starting points for matches.  */
+  char *fastmap;
+
+        /* Either a translate table to apply to all characters before
+           comparing them, or zero for no translation.  The translation
+           is applied to a pattern when it is compiled and to a string
+           when it is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+       /* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+        /* Zero if this pattern cannot match the empty string, one else.
+           Well, in truth it's used only in `re_search_2', to see
+           whether or not we should use the fastmap, so we don't set
+           this absolutely perfectly; see `re_compile_fastmap' (the
+           `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+             for `max (RE_NREGS, re_nsub + 1)' groups.
+           If REGS_REALLOCATE, reallocate space if necessary.
+           If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+        /* Set to zero when `regex_compile' compiles a pattern; set to one
+           by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+        /* If set, `re_match_2' does not return information about
+           subexpressions.  */
+  unsigned no_sub : 1;
+
+        /* If set, a beginning-of-line anchor doesn't match at the
+           beginning of the string.  */
+  unsigned not_bol : 1;
+
+        /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+        /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+\f
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+\f
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
+#if __STDC__
+
+# define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+# define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, size_t length,
+             struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+            int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+             unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility.  */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+   "restrict", and "configure" may have defined "restrict".  */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+#  if defined restrict || 199901L <= __STDC_VERSION__
+#   define __restrict restrict
+#  else
+#   define __restrict
+#  endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax.  */
+#ifndef __restrict_arr
+# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+#  define __restrict_arr __restrict
+# else
+#  define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
+                             const char *__restrict __pattern,
+                             int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
+                             const char *__restrict __string, size_t __nmatch,
+                             regmatch_t __pmatch[__restrict_arr],
+                             int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+                                 char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
+\f
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/lib/regex/regex_internal.c b/lib/regex/regex_internal.c

new file mode 100644 (file)

index 0000000..f969c7c
--- /dev/null
+++ b/lib/regex/regex_internal.c
@@ -0,0 +1,1263 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static void re_string_construct_common (const char *str, int len,
+                                       re_string_t *pstr,
+                                       RE_TRANSLATE_TYPE trans, int icase);
+#ifdef RE_ENABLE_I18N
+static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
+                                wint_t *last_wc);
+#endif /* RE_ENABLE_I18N */
+static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
+                                             const re_node_set *nodes,
+                                             unsigned int hash);
+static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+                                    unsigned int hash);
+static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+                                         const re_node_set *nodes,
+                                         unsigned int hash);
+static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+                                         const re_node_set *nodes,
+                                         unsigned int context,
+                                         unsigned int hash);
+static unsigned int inline calc_state_hash (const re_node_set *nodes,
+                                           unsigned int context);
+\f
+/* Functions for string operation.  */
+
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
+static reg_errcode_t
+re_string_allocate (pstr, str, len, init_len, trans, icase)
+     re_string_t *pstr;
+     const char *str;
+     int len, init_len, icase;
+     RE_TRANSLATE_TYPE trans;
+{
+  reg_errcode_t ret;
+  int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase);
+  pstr->stop = pstr->len;
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+                   : (unsigned char *) str);
+  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+  pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
+                    || MB_CUR_MAX > 1) ? pstr->valid_len : len;
+  return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them.  */
+
+static reg_errcode_t
+re_string_construct (pstr, str, len, trans, icase)
+     re_string_t *pstr;
+     const char *str;
+     int len, icase;
+     RE_TRANSLATE_TYPE trans;
+{
+  reg_errcode_t ret;
+  re_string_construct_common (str, len, pstr, trans, icase);
+  pstr->stop = pstr->len;
+  /* Set 0 so that this function can initialize whole buffers.  */
+  pstr->valid_len = 0;
+
+  if (len > 0)
+    {
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+       return ret;
+    }
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+                   : (unsigned char *) str);
+  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       build_wcs_upper_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+       build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+       {
+         if (trans != NULL)
+           re_string_translate_buffer (pstr);
+         else
+           pstr->valid_len = len;
+       }
+    }
+
+  /* Initialized whole buffers, then valid_len == bufs_len.  */
+  pstr->valid_len = pstr->bufs_len;
+  return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+re_string_realloc_buffers (pstr, new_buf_len)
+     re_string_t *pstr;
+     int new_buf_len;
+{
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
+    {
+      wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      pstr->wcs = new_array;
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (MBS_ALLOCATED (pstr))
+    {
+      unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
+                                            new_buf_len);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      pstr->mbs = new_array;
+    }
+  if (MBS_CASE_ALLOCATED (pstr))
+    {
+      unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
+                                            new_buf_len);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      pstr->mbs_case = new_array;
+      if (!MBS_ALLOCATED (pstr))
+       pstr->mbs = pstr->mbs_case;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
+static void
+re_string_construct_common (str, len, pstr, trans, icase)
+     const char *str;
+     int len;
+     re_string_t *pstr;
+     RE_TRANSLATE_TYPE trans;
+     int icase;
+{
+  memset (pstr, '\0', sizeof (re_string_t));
+  pstr->raw_mbs = (const unsigned char *) str;
+  pstr->len = len;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+   If the byte sequence of the string are:
+     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+   Then wide character buffer will be:
+     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
+   We use WEOF for padding, they indicate that the position isn't
+   a first byte of a multibyte character.
+
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
+build_wcs_buffer (pstr)
+     re_string_t *pstr;
+{
+  mbstate_t prev_st;
+  int byte_idx, end_idx, mbclen, remain_len;
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+                             + byte_idx), remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+       {
+         /* The buffer doesn't have enough space, finish to build.  */
+         pstr->cur_state = prev_st;
+         break;
+       }
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+       {
+         /* We treat these cases as a singlebyte character.  */
+         mbclen = 1;
+         wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+         pstr->cur_state = prev_st;
+       }
+
+      /* Apply the translateion if we need.  */
+      if (pstr->trans != NULL && mbclen == 1)
+       {
+         int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
+         pstr->mbs_case[byte_idx] = ch;
+       }
+      /* Write wide character and padding.  */
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+       pstr->wcs[byte_idx++] = WEOF;
+    }
+  pstr->valid_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static void
+build_wcs_upper_buffer (pstr)
+     re_string_t *pstr;
+{
+  mbstate_t prev_st;
+  int byte_idx, end_idx, mbclen, remain_len;
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+                             + byte_idx), remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+       {
+         /* The buffer doesn't have enough space, finish to build.  */
+         pstr->cur_state = prev_st;
+         break;
+       }
+      else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
+       {
+         /* In case of a singlebyte character.  */
+         int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+         /* Apply the translateion if we need.  */
+         if (pstr->trans != NULL && mbclen == 1)
+           {
+             ch = pstr->trans[ch];
+             pstr->mbs_case[byte_idx] = ch;
+           }
+         pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
+         pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
+         if (BE (mbclen == (size_t) -1, 0))
+           pstr->cur_state = prev_st;
+       }
+      else /* mbclen > 1 */
+       {
+         if (iswlower (wc))
+           wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
+         else
+           memcpy (pstr->mbs + byte_idx,
+                   pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+         pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
+         /* Write paddings.  */
+         for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+           pstr->wcs[byte_idx++] = WEOF;
+       }
+    }
+  pstr->valid_len = byte_idx;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+re_string_skip_chars (pstr, new_raw_idx, last_wc)
+     re_string_t *pstr;
+     int new_raw_idx;
+     wint_t *last_wc;
+{
+  mbstate_t prev_st;
+  int rawbuf_idx, mbclen;
+  wchar_t wc = 0;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      int remain_len;
+      remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+                       remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+       {
+         /* We treat these cases as a singlebyte character.  */
+         mbclen = 1;
+         pstr->cur_state = prev_st;
+       }
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  *last_wc = (wint_t) wc;
+  return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N  */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+   This function is used in case of REG_ICASE.  */
+
+static void
+build_upper_buffer (pstr)
+     re_string_t *pstr;
+{
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (pstr->trans != NULL)
+       {
+         ch =  pstr->trans[ch];
+         pstr->mbs_case[char_idx] = ch;
+       }
+      if (islower (ch))
+       pstr->mbs[char_idx] = toupper (ch);
+      else
+       pstr->mbs[char_idx] = ch;
+    }
+  pstr->valid_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+re_string_translate_buffer (pstr)
+     re_string_t *pstr;
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs_case[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of MB_CUR_MAX > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */
+
+static reg_errcode_t
+re_string_reconstruct (pstr, idx, eflags, newline)
+     re_string_t *pstr;
+     int idx, eflags, newline;
+{
+  int offset = idx - pstr->raw_mbs_idx;
+  if (offset < 0)
+    {
+      /* Reset buffer.  */
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+      pstr->len += pstr->raw_mbs_idx;
+      pstr->stop += pstr->raw_mbs_idx;
+      pstr->valid_len = pstr->raw_mbs_idx = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+                          : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!MBS_CASE_ALLOCATED (pstr))
+       pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
+      if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
+       pstr->mbs = (unsigned char *) pstr->raw_mbs;
+      offset = idx;
+    }
+
+  if (offset != 0)
+    {
+      /* Are the characters which are already checked remain?  */
+      if (offset < pstr->valid_len)
+       {
+         /* Yes, move them to the front of the buffer.  */
+         pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
+                                                   newline);
+#ifdef RE_ENABLE_I18N
+         if (MB_CUR_MAX > 1)
+           memmove (pstr->wcs, pstr->wcs + offset,
+                    (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+         if (MBS_ALLOCATED (pstr))
+           memmove (pstr->mbs, pstr->mbs + offset,
+                    pstr->valid_len - offset);
+         if (MBS_CASE_ALLOCATED (pstr))
+           memmove (pstr->mbs_case, pstr->mbs_case + offset,
+                    pstr->valid_len - offset);
+         pstr->valid_len -= offset;
+#if DEBUG
+         assert (pstr->valid_len > 0);
+#endif
+       }
+      else
+       {
+         /* No, skip all characters until IDX.  */
+         pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+         if (MB_CUR_MAX > 1)
+           {
+             int wcs_idx;
+             wint_t wc;
+             pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+             for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+               pstr->wcs[wcs_idx] = WEOF;
+             if (pstr->trans && wc <= 0xff)
+               wc = pstr->trans[wc];
+             pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
+                                  : ((newline && IS_WIDE_NEWLINE (wc))
+                                     ? CONTEXT_NEWLINE : 0));
+           }
+         else
+#endif /* RE_ENABLE_I18N */
+           {
+             int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+             if (pstr->trans)
+               c = pstr->trans[c];
+             pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
+                                  : ((newline && IS_NEWLINE (c))
+                                     ? CONTEXT_NEWLINE : 0));
+           }
+       }
+      if (!MBS_CASE_ALLOCATED (pstr))
+       {
+         pstr->mbs_case += offset;
+         /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED.  */
+         if (!MBS_ALLOCATED (pstr))
+           pstr->mbs += offset;
+       }
+    }
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+  pstr->stop -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
+    {
+      if (pstr->icase)
+       build_wcs_upper_buffer (pstr);
+      else
+       build_wcs_buffer (pstr);
+    }
+  else
+#endif /* RE_ENABLE_I18N */
+    {
+      if (pstr->icase)
+       build_upper_buffer (pstr);
+      else if (pstr->trans != NULL)
+       re_string_translate_buffer (pstr);
+    }
+  pstr->cur_idx = 0;
+
+  return REG_NOERROR;
+}
+
+static void
+re_string_destruct (pstr)
+     re_string_t *pstr;
+{
+#ifdef RE_ENABLE_I18N
+  re_free (pstr->wcs);
+#endif /* RE_ENABLE_I18N  */
+  if (MBS_ALLOCATED (pstr))
+    re_free (pstr->mbs);
+  if (MBS_CASE_ALLOCATED (pstr))
+    re_free (pstr->mbs_case);
+}
+
+/* Return the context at IDX in INPUT.  */
+
+static unsigned int
+re_string_context_at (input, idx, eflags, newline_anchor)
+     const re_string_t *input;
+     int idx, eflags, newline_anchor;
+{
+  int c;
+  if (idx < 0 || idx == input->len)
+    {
+      if (idx < 0)
+       /* In this case, we use the value stored in input->tip_context,
+          since we can't know the character in input->mbs[-1] here.  */
+       return input->tip_context;
+      else /* (idx == input->len) */
+       return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+               : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+    }
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
+    {
+      wint_t wc;
+      int wc_idx = idx;
+      while(input->wcs[wc_idx] == WEOF)
+       {
+#ifdef DEBUG
+         /* It must not happen.  */
+         assert (wc_idx >= 0);
+#endif
+         --wc_idx;
+         if (wc_idx < 0)
+           return input->tip_context;
+       }
+      wc = input->wcs[wc_idx];
+      if (IS_WIDE_WORD_CHAR (wc))
+       return CONTEXT_WORD;
+      return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
+    }
+  else
+#endif
+    {
+      c = re_string_byte_at (input, idx);
+      if (IS_WORD_CHAR (c))
+       return CONTEXT_WORD;
+      return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
+    }
+}
+\f
+/* Functions for set operation.  */
+
+static reg_errcode_t
+re_node_set_alloc (set, size)
+     re_node_set *set;
+     int size;
+{
+  set->alloc = size;
+  set->nelem = 0;
+  set->elems = re_malloc (int, size);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_1 (set, elem)
+     re_node_set *set;
+     int elem;
+{
+  set->alloc = 1;
+  set->nelem = 1;
+  set->elems = re_malloc (int, 1);
+  if (BE (set->elems == NULL, 0))
+    {
+      set->alloc = set->nelem = 0;
+      return REG_ESPACE;
+    }
+  set->elems[0] = elem;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_2 (set, elem1, elem2)
+     re_node_set *set;
+     int elem1, elem2;
+{
+  set->alloc = 2;
+  set->elems = re_malloc (int, 2);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  if (elem1 == elem2)
+    {
+      set->nelem = 1;
+      set->elems[0] = elem1;
+    }
+  else
+    {
+      set->nelem = 2;
+      if (elem1 < elem2)
+       {
+         set->elems[0] = elem1;
+         set->elems[1] = elem2;
+       }
+      else
+       {
+         set->elems[0] = elem2;
+         set->elems[1] = elem1;
+       }
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_copy (dest, src)
+     re_node_set *dest;
+     const re_node_set *src;
+{
+  dest->nelem = src->nelem;
+  if (src->nelem > 0)
+    {
+      dest->alloc = dest->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+       {
+         dest->alloc = dest->nelem = 0;
+         return REG_ESPACE;
+       }
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+    }
+  else
+    re_node_set_init_empty (dest);
+  return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
+
+static reg_errcode_t
+re_node_set_add_intersect (dest, src1, src2)
+     re_node_set *dest;
+     const re_node_set *src1, *src2;
+{
+  int i1, i2, id;
+  if (src1->nelem > 0 && src2->nelem > 0)
+    {
+      if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+       {
+         dest->alloc = src1->nelem + src2->nelem + dest->nelem;
+         dest->elems = re_realloc (dest->elems, int, dest->alloc);
+         if (BE (dest->elems == NULL, 0))
+           return REG_ESPACE;
+       }
+    }
+  else
+    return REG_NOERROR;
+
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+       {
+         ++i2;
+         continue;
+       }
+      if (src1->elems[i1] == src2->elems[i2])
+       {
+         while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
+           ++id;
+         if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
+           ++id;
+         else
+           {
+             memmove (dest->elems + id + 1, dest->elems + id,
+                      sizeof (int) * (dest->nelem - id));
+             dest->elems[id++] = src2->elems[i2++];
+             ++dest->nelem;
+           }
+       }
+      ++i1;
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+re_node_set_init_union (dest, src1, src2)
+     re_node_set *dest;
+     const re_node_set *src1, *src2;
+{
+  int i1, i2, id;
+  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+    {
+      dest->alloc = src1->nelem + src2->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+       return REG_ESPACE;
+    }
+  else
+    {
+      if (src1 != NULL && src1->nelem > 0)
+       return re_node_set_init_copy (dest, src1);
+      else if (src2 != NULL && src2->nelem > 0)
+       return re_node_set_init_copy (dest, src2);
+      else
+       re_node_set_init_empty (dest);
+      return REG_NOERROR;
+    }
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+       {
+         dest->elems[id++] = src2->elems[i2++];
+         continue;
+       }
+      if (src1->elems[i1] == src2->elems[i2])
+       ++i2;
+      dest->elems[id++] = src1->elems[i1++];
+    }
+  if (i1 < src1->nelem)
+    {
+      memcpy (dest->elems + id, src1->elems + i1,
+            (src1->nelem - i1) * sizeof (int));
+      id += src1->nelem - i1;
+    }
+  else if (i2 < src2->nelem)
+    {
+      memcpy (dest->elems + id, src2->elems + i2,
+            (src2->nelem - i2) * sizeof (int));
+      id += src2->nelem - i2;
+    }
+  dest->nelem = id;
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+re_node_set_merge (dest, src)
+     re_node_set *dest;
+     const re_node_set *src;
+{
+  int si, di;
+  if (src == NULL || src->nelem == 0)
+    return REG_NOERROR;
+  if (dest->alloc < src->nelem + dest->nelem)
+    {
+      int *new_buffer;
+      dest->alloc = 2 * (src->nelem + dest->alloc);
+      new_buffer = re_realloc (dest->elems, int, dest->alloc);
+      if (BE (new_buffer == NULL, 0))
+       return REG_ESPACE;
+      dest->elems = new_buffer;
+    }
+
+  for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
+    {
+      int cp_from, ncp, mid, right, src_elem = src->elems[si];
+      /* Binary search the spot we will add the new element.  */
+      right = dest->nelem;
+      while (di < right)
+       {
+         mid = (di + right) / 2;
+         if (dest->elems[mid] < src_elem)
+           di = mid + 1;
+         else
+           right = mid;
+       }
+      if (di >= dest->nelem)
+       break;
+
+      if (dest->elems[di] == src_elem)
+       {
+         /* Skip since, DEST already has the element.  */
+         ++di;
+         ++si;
+         continue;
+       }
+
+      /* Skip the src elements which are less than dest->elems[di].  */
+      cp_from = si;
+      while (si < src->nelem && src->elems[si] < dest->elems[di])
+       ++si;
+      /* Copy these src elements.  */
+      ncp = si - cp_from;
+      memmove (dest->elems + di + ncp, dest->elems + di,
+              sizeof (int) * (dest->nelem - di));
+      memcpy (dest->elems + di, src->elems + cp_from,
+             sizeof (int) * ncp);
+      /* Update counters.  */
+      di += ncp;
+      dest->nelem += ncp;
+    }
+
+  /* Copy remaining src elements.  */
+  if (si < src->nelem)
+    {
+      memcpy (dest->elems + di, src->elems + si,
+             sizeof (int) * (src->nelem - si));
+      dest->nelem += src->nelem - si;
+    }
+  return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   return 0 if SET already has ELEM,
+   return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+re_node_set_insert (set, elem)
+     re_node_set *set;
+     int elem;
+{
+  int idx, right, mid;
+  /* In case of the set is empty.  */
+  if (set->elems == NULL || set->alloc == 0)
+    {
+      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+       return 1;
+      else
+       return -1;
+    }
+
+  /* Binary search the spot we will add the new element.  */
+  idx = 0;
+  right = set->nelem;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+       idx = mid + 1;
+      else
+       right = mid;
+    }
+
+  /* Realloc if we need.  */
+  if (set->alloc < set->nelem + 1)
+    {
+      int *new_array;
+      set->alloc = set->alloc * 2;
+      new_array = re_malloc (int, set->alloc);
+      if (BE (new_array == NULL, 0))
+       return -1;
+      /* Copy the elements they are followed by the new element.  */
+      if (idx > 0)
+       memcpy (new_array, set->elems, sizeof (int) * (idx));
+      /* Copy the elements which follows the new element.  */
+      if (set->nelem - idx > 0)
+       memcpy (new_array + idx + 1, set->elems + idx,
+               sizeof (int) * (set->nelem - idx));
+      re_free (set->elems);
+      set->elems = new_array;
+    }
+  else
+    {
+      /* Move the elements which follows the new element.  */
+      if (set->nelem - idx > 0)
+       memmove (set->elems + idx + 1, set->elems + idx,
+                sizeof (int) * (set->nelem - idx));
+    }
+  /* Insert the new element.  */
+  set->elems[idx] = elem;
+  ++set->nelem;
+  return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+   return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise.  */
+
+static int
+re_node_set_compare (set1, set2)
+     const re_node_set *set1, *set2;
+{
+  int i;
+  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+    return 0;
+  for (i = 0 ; i < set1->nelem ; i++)
+    if (set1->elems[i] != set2->elems[i])
+      return 0;
+  return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
+
+static int
+re_node_set_contains (set, elem)
+     const re_node_set *set;
+     int elem;
+{
+  int idx, right, mid;
+  if (set->nelem <= 0)
+    return 0;
+
+  /* Binary search the element.  */
+  idx = 0;
+  right = set->nelem - 1;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+       idx = mid + 1;
+      else
+       right = mid;
+    }
+  return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+re_node_set_remove_at (set, idx)
+     re_node_set *set;
+     int idx;
+{
+  if (idx < 0 || idx >= set->nelem)
+    return;
+  if (idx < set->nelem - 1)
+    memmove (set->elems + idx, set->elems + idx + 1,
+            sizeof (int) * (set->nelem - idx - 1));
+  --set->nelem;
+}
+\f
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+   Or return -1, if an error will be occured.  */
+
+static int
+re_dfa_add_node (dfa, token, mode)
+     re_dfa_t *dfa;
+     re_token_t token;
+     int mode;
+{
+  if (dfa->nodes_len >= dfa->nodes_alloc)
+    {
+      re_token_t *new_array;
+      dfa->nodes_alloc *= 2;
+      new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
+      if (BE (new_array == NULL, 0))
+       return -1;
+      else
+       dfa->nodes = new_array;
+      if (mode)
+       {
+         int *new_nexts, *new_indices;
+         re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
+         new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
+         new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
+         new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
+         new_eclosures = re_realloc (dfa->eclosures, re_node_set,
+                                     dfa->nodes_alloc);
+         new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+                                        dfa->nodes_alloc);
+         if (BE (new_nexts == NULL || new_indices == NULL
+                 || new_edests == NULL || new_eclosures == NULL
+                 || new_inveclosures == NULL, 0))
+           return -1;
+         dfa->nexts = new_nexts;
+         dfa->org_indices = new_indices;
+         dfa->edests = new_edests;
+         dfa->eclosures = new_eclosures;
+         dfa->inveclosures = new_inveclosures;
+       }
+    }
+  dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].duplicated = 0;
+  dfa->nodes[dfa->nodes_len].constraint = 0;
+  return dfa->nodes_len++;
+}
+
+static unsigned int inline
+calc_state_hash (nodes, context)
+     const re_node_set *nodes;
+     unsigned int context;
+{
+  unsigned int hash = nodes->nelem + context;
+  int i;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    hash += nodes->elems[i];
+  return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+          return value is NULL and ERR is REG_NOERROR.
+        - We never return non-NULL value in case of any errors, it is for
+          optimization.  */
+
+static re_dfastate_t*
+re_acquire_state (err, dfa, nodes)
+     reg_errcode_t *err;
+     re_dfa_t *dfa;
+     const re_node_set *nodes;
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (BE (nodes->nelem == 0, 0))
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, 0);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+       continue;
+      if (re_node_set_compare (&state->nodes, nodes))
+       return state;
+    }
+
+  /* There are no appropriate state in the dfa, create the new one.  */
+  new_state = create_ci_newstate (dfa, nodes, hash);
+  if (BE (new_state != NULL, 1))
+    return new_state;
+  else
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+   whose context is equivalent to CONTEXT.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+          return value is NULL and ERR is REG_NOERROR.
+        - We never return non-NULL value in case of any errors, it is for
+          optimization.  */
+
+static re_dfastate_t*
+re_acquire_state_context (err, dfa, nodes, context)
+     reg_errcode_t *err;
+     re_dfa_t *dfa;
+     const re_node_set *nodes;
+     unsigned int context;
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (nodes->nelem == 0)
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, context);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+       continue;
+      if (re_node_set_compare (state->entrance_nodes, nodes)
+         && state->context == context)
+       return state;
+    }
+  /* There are no appropriate state in `dfa', create the new one.  */
+  new_state = create_cd_newstate (dfa, nodes, context, hash);
+  if (BE (new_state != NULL, 1))
+    return new_state;
+  else
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+}
+
+/* Allocate memory for DFA state and initialize common properties.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+create_newstate_common (dfa, nodes, hash)
+     re_dfa_t *dfa;
+     const re_node_set *nodes;
+     unsigned int hash;
+{
+  re_dfastate_t *newstate;
+  reg_errcode_t err;
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+  newstate->trtable = NULL;
+  newstate->trtable_search = NULL;
+  newstate->hash = hash;
+  return newstate;
+}
+
+/* Store the new state NEWSTATE whose hash value is HASH in appropriate
+   position.  Return value indicate the error code if failed.  */
+
+static reg_errcode_t
+register_state (dfa, newstate, hash)
+     re_dfa_t *dfa;
+     re_dfastate_t *newstate;
+     unsigned int hash;
+{
+  struct re_state_table_entry *spot;
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  if (spot->alloc <= spot->num)
+    {
+      re_dfastate_t **new_array;
+      spot->alloc = 2 * spot->num + 2;
+      new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      spot->array = new_array;
+    }
+  spot->array[spot->num++] = newstate;
+  return REG_NOERROR;
+}
+
+/* Create the new state which is independ of contexts.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+create_ci_newstate (dfa, nodes, hash)
+     re_dfa_t *dfa;
+     const re_node_set *nodes;
+     unsigned int hash;
+{
+  int i;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+  newstate = create_newstate_common (dfa, nodes, hash);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (type == CHARACTER && !node->constraint)
+       continue;
+
+      /* If the state has the halt node, the state is a halt state.  */
+      else if (type == END_OF_RE)
+       newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET
+              || (type == OP_PERIOD && MB_CUR_MAX > 1))
+       newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_BACK_REF)
+       newstate->has_backref = 1;
+      else if (type == ANCHOR || node->constraint)
+       newstate->has_constraint = 1;
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+create_cd_newstate (dfa, nodes, context, hash)
+     re_dfa_t *dfa;
+     const re_node_set *nodes;
+     unsigned int context, hash;
+{
+  int i, nctx_nodes = 0;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = create_newstate_common (dfa, nodes, hash);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  newstate->context = context;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      unsigned int constraint = 0;
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (node->constraint)
+       constraint = node->constraint;
+
+      if (type == CHARACTER && !constraint)
+       continue;
+      /* If the state has the halt node, the state is a halt state.  */
+      else if (type == END_OF_RE)
+       newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET
+              || (type == OP_PERIOD && MB_CUR_MAX > 1))
+       newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_BACK_REF)
+       newstate->has_backref = 1;
+      else if (type == ANCHOR)
+       constraint = node->opr.ctx_type;
+
+      if (constraint)
+       {
+         if (newstate->entrance_nodes == &newstate->nodes)
+           {
+             newstate->entrance_nodes = re_malloc (re_node_set, 1);
+             if (BE (newstate->entrance_nodes == NULL, 0))
+               {
+                 free_state (newstate);
+                 return NULL;
+               }
+             re_node_set_init_copy (newstate->entrance_nodes, nodes);
+             nctx_nodes = 0;
+             newstate->has_constraint = 1;
+           }
+
+         if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+           {
+             re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+             ++nctx_nodes;
+           }
+       }
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return  newstate;
+}
+
+static void
+free_state (state)
+     re_dfastate_t *state;
+{
+  if (state->entrance_nodes != &state->nodes)
+    {
+      re_node_set_free (state->entrance_nodes);
+      re_free (state->entrance_nodes);
+    }
+  re_node_set_free (&state->nodes);
+  re_free (state->trtable);
+  re_free (state->trtable_search);
+  re_free (state);
+}
diff --git a/lib/regex/regex_internal.h b/lib/regex/regex_internal.h

new file mode 100644 (file)

index 0000000..bf84ad6
--- /dev/null
+++ b/lib/regex/regex_internal.h
@@ -0,0 +1,742 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+
+/* In case that the system doesn't have isblank().  */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#   include <locale/localeinfo.h>
+#   include <locale/elem-hash.h>
+#   include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages.  */
+#if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+#  undef gettext
+#  define gettext(msgid) \
+  INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+# define gettext_noop(String) String
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of bits in a byte.  */
+#define BYTE_BITS 8
+/* Number of single byte character.  */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline.  */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc.  */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define attribute_hidden
+#endif /* not _LIBC */
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* Number of bits in an unsinged int.  */
+#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
+/* Number of unsigned int in an bit_set.  */
+#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
+typedef unsigned int bitset[BITSET_UINTS];
+typedef unsigned int *re_bitset_ptr_t;
+
+#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
+#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
+#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
+#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_set_all(set) \
+  memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_copy(dest,src) \
+  memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
+static inline void bitset_not (bitset set);
+static inline void bitset_merge (bitset dest, const bitset src);
+static inline void bitset_not_merge (bitset dest, const bitset src);
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define DUMMY_CONSTRAINT 0x0100
+
+typedef enum
+{
+  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+  WORD_DELIM = DUMMY_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+  int alloc;
+  int nelem;
+  int *elems;
+} re_node_set;
+
+typedef enum
+{
+  NON_TYPE = 0,
+
+  /* Token type, these are used only by token.  */
+  OP_OPEN_BRACKET,
+  OP_CLOSE_BRACKET,
+  OP_CHARSET_RANGE,
+  OP_OPEN_DUP_NUM,
+  OP_CLOSE_DUP_NUM,
+  OP_NON_MATCH_LIST,
+  OP_OPEN_COLL_ELEM,
+  OP_CLOSE_COLL_ELEM,
+  OP_OPEN_EQUIV_CLASS,
+  OP_CLOSE_EQUIV_CLASS,
+  OP_OPEN_CHAR_CLASS,
+  OP_CLOSE_CHAR_CLASS,
+  OP_WORD,
+  OP_NOTWORD,
+  BACK_SLASH,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT,
+  ALT,
+  SUBEXP,
+  SIMPLE_BRACKET,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET,
+#endif /* RE_ENABLE_I18N */
+
+  /* Node type, These are used by token, node, tree.  */
+  OP_OPEN_SUBEXP,
+  OP_CLOSE_SUBEXP,
+  OP_PERIOD,
+  CHARACTER,
+  END_OF_RE,
+  OP_ALT,
+  OP_DUP_ASTERISK,
+  OP_DUP_PLUS,
+  OP_DUP_QUESTION,
+  OP_BACK_REF,
+  ANCHOR,
+
+  /* Dummy marker.  */
+  END_OF_RE_TOKEN_T
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+  /* Multibyte characters.  */
+  wchar_t *mbchars;
+
+  /* Collating symbols.  */
+# ifdef _LIBC
+  int32_t *coll_syms;
+# endif
+
+  /* Equivalence classes. */
+# ifdef _LIBC
+  int32_t *equiv_classes;
+# endif
+
+  /* Range expressions. */
+# ifdef _LIBC
+  uint32_t *range_starts;
+  uint32_t *range_ends;
+# else /* not _LIBC */
+  wchar_t *range_starts;
+  wchar_t *range_ends;
+# endif /* not _LIBC */
+
+  /* Character classes. */
+  wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
+  int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+  union
+  {
+    unsigned char c;           /* for CHARACTER */
+    re_bitset_ptr_t sbcset;    /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+    re_charset_t *mbcset;      /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+    int idx;                   /* for BACK_REF */
+    re_context_type ctx_type;  /* for ANCHOR */
+  } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
+  unsigned int constraint : 10;        /* context constraint */
+  unsigned int duplicated : 1;
+#ifdef RE_ENABLE_I18N
+  unsigned int mb_partial : 1;
+#endif
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) \
+  ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
+   || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
+   || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
+
+#define ACCEPT_MB_NODE(type) \
+  ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
+
+struct re_string_t
+{
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Store the multibyte string.  In case of "case insensitive mode" like
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
+  /* Store the case sensitive multibyte string.  In case of
+     "case insensitive mode", the original string are stored,
+     otherwise MBS_CASE points the same address that MBS points.  */
+  unsigned char *mbs_case;
+#ifdef RE_ENABLE_I18N
+  /* Store the wide character string which is corresponding to MBS.  */
+  wint_t *wcs;
+  mbstate_t cur_state;
+#endif
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The length of the buffers MBS, MBS_CASE, and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* This is length_of_RAW_MBS - RAW_MBS_IDX.  */
+  int len;
+  /* End of the buffer may be shorter than its length in the cases such
+     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
+     instead of LEN.  */
+  int stop;
+
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* 1 if REG_ICASE.  */
+  unsigned int icase : 1;
+};
+typedef struct re_string_t re_string_t;
+/* In case of REG_ICASE, we allocate the buffer dynamically for mbs.  */
+#define MBS_ALLOCATED(pstr) (pstr->icase)
+/* In case that we need translation, we allocate the buffer dynamically
+   for mbs_case.  Note that mbs == mbs_case if not REG_ICASE.  */
+#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
+
+
+static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
+                                        int len, int init_len,
+                                        RE_TRANSLATE_TYPE trans, int icase);
+static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
+                                         int len, RE_TRANSLATE_TYPE trans,
+                                         int icase);
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+                                           int eflags, int newline);
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+                                               int new_buf_len);
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr);
+static void build_wcs_upper_buffer (re_string_t *pstr);
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr);
+static void re_string_translate_buffer (re_string_t *pstr);
+static void re_string_destruct (re_string_t *pstr);
+#ifdef RE_ENABLE_I18N
+static int re_string_elem_size_at (const re_string_t *pstr, int idx);
+static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
+static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
+#endif /* RE_ENABLE_I18N */
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+                                         int eflags, int newline_anchor);
+#define re_string_peek_byte(pstr, offset) \
+  ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_peek_byte_case(pstr, offset) \
+  ((pstr)->mbs_case[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+  ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_fetch_byte_case(pstr) \
+  ((pstr)->mbs_case[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+  ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+  ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \
+                               || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+  struct bin_tree_t *parent;
+  struct bin_tree_t *left;
+  struct bin_tree_t *right;
+
+  /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+     Otherwise `type' indicate the type of this node.  */
+  re_token_type_t type;
+  int node_idx;
+
+  int first;
+  int next;
+  re_node_set eclosure;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+  || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+  || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+  unsigned int hash;
+  re_node_set nodes;
+  re_node_set *entrance_nodes;
+  struct re_dfastate_t **trtable;
+  struct re_dfastate_t **trtable_search;
+  /* If this state is a special state.
+     A state is a special state if the state is the halt state, or
+     a anchor.  */
+  unsigned int context : 2;
+  unsigned int halt : 1;
+  /* If this state can accept `multi byte'.
+     Note that we refer to multibyte characters, and multi character
+     collating elements as `multi byte'.  */
+  unsigned int accept_mb : 1;
+  /* If this state has backreference node(s).  */
+  unsigned int has_backref : 1;
+  unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+typedef struct
+{
+  /* start <= node < end  */
+  int start;
+  int end;
+} re_subexp_t;
+
+struct re_state_table_entry
+{
+  int num;
+  int alloc;
+  re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
+
+typedef struct
+{
+  int next_idx;
+  int alloc;
+  re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
+
+typedef struct
+{
+  int node;
+  int str_idx; /* The position NODE match at.  */
+  state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+   And information about the node, whose type is OP_CLOSE_SUBEXP,
+   corresponding to NODE is stored in LASTS.  */
+
+typedef struct
+{
+  int str_idx;
+  int node;
+  int next_last_offset;
+  state_array_t *path;
+  int alasts; /* Allocation size of LASTS.  */
+  int nlasts; /* The number of LASTS.  */
+  re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+  int node;
+  int str_idx;
+  int subexp_from;
+  int subexp_to;
+  int flag;
+};
+
+typedef struct
+{
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  int last_node;
+  /* The string object corresponding to the input string.  */
+  re_string_t *input;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_mb_elem_len;
+  int nsub_tops;
+  int asub_tops;
+  re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+  int cur_bkref;
+  int cls_subexp_idx;
+
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **limited_states;
+
+  re_node_set limits;
+
+  int last_node;
+  int last_str_idx;
+  int check_subexp;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+  int idx;
+  int node;
+  regmatch_t *regs;
+  re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+  int num;
+  int alloc;
+  struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+  re_bitset_ptr_t word_char;
+
+  /* number of subexpressions `re_nsub' is in regex_t.  */
+  int subexps_alloc;
+  re_subexp_t *subexps;
+
+  re_token_t *nodes;
+  int nodes_alloc;
+  int nodes_len;
+  bin_tree_t *str_tree;
+  int *nexts;
+  int *org_indices;
+  re_node_set *edests;
+  re_node_set *eclosures;
+  re_node_set *inveclosures;
+  struct re_state_table_entry *state_table;
+  unsigned int state_hash_mask;
+  re_dfastate_t *init_state;
+  re_dfastate_t *init_state_word;
+  re_dfastate_t *init_state_nl;
+  re_dfastate_t *init_state_begbuf;
+  int states_alloc;
+  int init_node;
+  int nbackref; /* The number of backreference in this dfa.  */
+  /* Bitmap expressing which backreference is used.  */
+  unsigned int used_bkref_map;
+#ifdef DEBUG
+  char* re_str;
+#endif
+  unsigned int has_plural_match : 1;
+  /* If this dfa has "multibyte node", which is a backreference or
+     a node which can accept multibyte character or multi character
+     collating element.  */
+  unsigned int has_mb_node : 1;
+};
+typedef struct re_dfa_t re_dfa_t;
+
+static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
+static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem);
+static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
+                                        int elem2);
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
+                                           const re_node_set *src);
+static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
+                                               const re_node_set *src1,
+                                               const re_node_set *src2);
+static reg_errcode_t re_node_set_init_union (re_node_set *dest,
+                                            const re_node_set *src1,
+                                            const re_node_set *src2);
+static reg_errcode_t re_node_set_merge (re_node_set *dest,
+                                       const re_node_set *src);
+static int re_node_set_insert (re_node_set *set, int elem);
+static int re_node_set_compare (const re_node_set *set1,
+                               const re_node_set *set2);
+static int re_node_set_contains (const re_node_set *set, int elem);
+static void re_node_set_remove_at (re_node_set *set, int idx);
+#define re_node_set_remove(set,id) \
+  (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
+static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
+                                       const re_node_set *nodes);
+static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
+                                               re_dfa_t *dfa,
+                                               const re_node_set *nodes,
+                                               unsigned int context);
+static void free_state (re_dfastate_t *state);
+\f
+
+typedef enum
+{
+  SB_CHAR,
+  MB_CHAR,
+  EQUIV_CLASS,
+  COLL_SYM,
+  CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+  bracket_elem_type type;
+  union
+  {
+    unsigned char ch;
+    unsigned char *name;
+    wchar_t wch;
+  } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation.  */
+static inline void
+bitset_not (set)
+     bitset set;
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+    set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (dest, src)
+     bitset dest;
+     const bitset src;
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+    dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_not_merge (dest, src)
+     bitset dest;
+     const bitset src;
+{
+  int i;
+  for (i = 0; i < BITSET_UINTS; ++i)
+    dest[i] |= ~src[i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string.  */
+static inline int
+re_string_char_size_at (pstr, idx)
+     const re_string_t *pstr;
+     int idx;
+{
+  int byte_idx;
+  if (MB_CUR_MAX == 1)
+    return 1;
+  for (byte_idx = 1; idx + byte_idx < pstr->len; ++byte_idx)
+    if (pstr->wcs[idx + byte_idx] != WEOF)
+      break;
+  return byte_idx;
+}
+
+static inline wint_t
+re_string_wchar_at (pstr, idx)
+     const re_string_t *pstr;
+     int idx;
+{
+  if (MB_CUR_MAX == 1)
+    return (wint_t) pstr->mbs[idx];
+  return (wint_t) pstr->wcs[idx];
+}
+
+static int
+re_string_elem_size_at (pstr, idx)
+     const re_string_t *pstr;
+     int idx;
+{
+#ifdef _LIBC
+  const unsigned char *p, *extra;
+  const int32_t *table, *indirect;
+  int32_t tmp;
+# include <locale/weight.h>
+  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+  if (nrules != 0)
+    {
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      extra = (const unsigned char *)
+       _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+                                               _NL_COLLATE_INDIRECTMB);
+      p = pstr->mbs + idx;
+      tmp = findidx (&p);
+      return p - pstr->mbs - idx;
+    }
+  else
+#endif /* _LIBC */
+    return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /*  _REGEX_INTERNAL_H */
diff --git a/lib/regex/regexec.c b/lib/regex/regexec.c

new file mode 100644 (file)

index 0000000..6ea14a6
--- /dev/null
+++ b/lib/regex/regexec.c
@@ -0,0 +1,3977 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+                                    re_string_t *input, int n);
+static void match_ctx_clean (re_match_context_t *mctx);
+static void match_ctx_free (re_match_context_t *cache);
+static void match_ctx_free_subtops (re_match_context_t *mctx);
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+                                         int str_idx, int from, int to);
+static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
+static void match_ctx_clear_flag (re_match_context_t *mctx);
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+                                          int str_idx);
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+                                                  int node, int str_idx);
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+                          re_dfastate_t **limited_sts, int last_node,
+                          int last_str_idx, int check_subexp);
+static reg_errcode_t re_search_internal (const regex_t *preg,
+                                        const char *string, int length,
+                                        int start, int range, int stop,
+                                        size_t nmatch, regmatch_t pmatch[],
+                                        int eflags);
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+                            const char *string1, int length1,
+                            const char *string2, int length2,
+                            int start, int range, struct re_registers *regs,
+                            int stop, int ret_len);
+static int re_search_stub (struct re_pattern_buffer *bufp,
+                          const char *string, int length, int start,
+                          int range, int stop, struct re_registers *regs,
+                          int ret_len);
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+                             int nregs, int regs_allocated);
+static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
+                                                        const regex_t *preg,
+                                                        const re_match_context_t *mctx,
+                                                        int idx);
+static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
+                                            re_match_context_t *mctx);
+static int check_matching (const regex_t *preg, re_match_context_t *mctx,
+                          int fl_search, int fl_longest_match);
+static int check_halt_node_context (const re_dfa_t *dfa, int node,
+                                   unsigned int context);
+static int check_halt_state_context (const regex_t *preg,
+                                    const re_dfastate_t *state,
+                                    const re_match_context_t *mctx, int idx);
+static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
+                        int cur_idx, int nmatch);
+static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
+                             const re_match_context_t *mctx,
+                             int *pidx, int node, re_node_set *eps_via_nodes,
+                             struct re_fail_stack_t *fs);
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+                                     int str_idx, int *dests, int nregs,
+                                     regmatch_t *regs,
+                                     re_node_set *eps_via_nodes);
+static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+                          regmatch_t *regs, re_node_set *eps_via_nodes);
+static reg_errcode_t set_regs (const regex_t *preg,
+                              const re_match_context_t *mctx,
+                              size_t nmatch, regmatch_t *pmatch,
+                              int fl_backtrack);
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const regex_t *preg,
+                               const re_match_context_t *mctx,
+                               re_sift_context_t *sctx,
+                               int node_idx, int str_idx, int max_str_idx);
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const regex_t *preg,
+                                          re_match_context_t *mctx,
+                                          re_sift_context_t *sctx);
+static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
+                                             re_match_context_t *mctx,
+                                             re_sift_context_t *sctx,
+                                             int str_idx,
+                                             re_node_set *dest_nodes);
+static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
+                                           re_node_set *dest_nodes,
+                                           const re_node_set *candidates);
+static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+                                           re_node_set *dest_nodes,
+                                           const re_node_set *and_nodes);
+static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
+                            re_match_context_t *mctx, int dst_node,
+                            int dst_idx, int src_node, int src_idx);
+static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
+                                     int limit, re_node_set *eclosures,
+                                     int subexp_idx, int node, int str_idx);
+static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+                                         re_node_set *dest_nodes,
+                                         const re_node_set *candidates,
+                                         re_node_set *limits,
+                                         struct re_backref_cache_entry *bkref_ents,
+                                         int str_idx);
+static reg_errcode_t sift_states_bkref (const regex_t *preg,
+                                       re_match_context_t *mctx,
+                                       re_sift_context_t *sctx,
+                                       int str_idx, re_node_set *dest_nodes);
+static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
+                                             int next_state_log_idx);
+static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
+                                       re_dfastate_t **src, int num);
+static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
+                                    re_match_context_t *mctx,
+                                    re_dfastate_t *state, int fl_search);
+static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
+                                               re_match_context_t *mctx,
+                                               re_node_set *cur_nodes,
+                                               int str_idx);
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
+                                       re_dfastate_t *pstate,
+                                       int fl_search,
+                                       re_match_context_t *mctx);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (const regex_t *preg,
+                                      re_dfastate_t *pstate,
+                                      re_match_context_t *mctx);
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (const regex_t *preg,
+                                         re_node_set *nodes,
+                                         re_match_context_t *mctx);
+static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
+                                int bkref_node, int bkref_str_idx);
+static reg_errcode_t get_subexp_sub (const regex_t *preg,
+                                    re_match_context_t *mctx,
+                                    re_sub_match_top_t *sub_top,
+                                    re_sub_match_last_t *sub_last,
+                                    int bkref_node, int bkref_str);
+static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
+                            int subexp_idx, int fl_open);
+static reg_errcode_t check_arrival (const regex_t *preg,
+                                   re_match_context_t *mctx,
+                                   state_array_t *path, int top_node,
+                                   int top_str, int last_node, int last_str,
+                                   int fl_open);
+static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
+                                                  re_dfa_t *dfa,
+                                                  re_match_context_t *mctx,
+                                                  int str_idx,
+                                                  re_node_set *cur_nodes,
+                                                  re_node_set *next_nodes);
+static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
+                                              re_node_set *cur_nodes,
+                                              int ex_subexp, int fl_open);
+static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
+                                                  re_node_set *dst_nodes,
+                                                  int target, int ex_subexp,
+                                                  int fl_open);
+static reg_errcode_t expand_bkref_cache (const regex_t *preg,
+                                        re_match_context_t *mctx,
+                                        re_node_set *cur_nodes, int cur_str,
+                                        int last_str, int subexp_num,
+                                        int fl_open);
+static re_dfastate_t **build_trtable (const regex_t *dfa,
+                                     const re_dfastate_t *state,
+                                     int fl_search);
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const regex_t *preg, int node_idx,
+                                   const re_string_t *input, int idx);
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+                                                  size_t name_len);
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const regex_t *dfa,
+                                      const re_dfastate_t *state,
+                                      re_node_set *states_node,
+                                      bitset *states_ch);
+static int check_node_accept (const regex_t *preg, const re_token_t *node,
+                             const re_match_context_t *mctx, int idx);
+static reg_errcode_t extend_buffers (re_match_context_t *mctx);
+\f
+/* Entry point for POSIX code.  */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *__restrict preg;
+    const char *__restrict string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  reg_errcode_t err;
+  int length = strlen (string);
+  if (preg->no_sub)
+    err = re_search_internal (preg, string, length, 0, length, length, 0,
+                             NULL, eflags);
+  else
+    err = re_search_internal (preg, string, length, 0, length, length, nmatch,
+                             pmatch, eflags);
+  return err != REG_NOERROR;
+}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
+
+/* Entry points for GNU code.  */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+   The former two functions operate on STRING with length LENGTH,
+   while the later two operate on concatenation of STRING1 and STRING2
+   with lengths LENGTH1 and LENGTH2, respectively.
+
+   re_match() matches the compiled pattern in BUFP against the string,
+   starting at index START.
+
+   re_search() first tries matching at index START, then it tries to match
+   starting from index START + 1, and so on.  The last start position tried
+   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
+   way as re_match().)
+
+   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+   the first STOP characters of the concatenation of the strings should be
+   concerned.
+
+   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
+   computed relative to the concatenation, not relative to the individual
+   strings.)
+
+   On success, re_match* functions return the length of the match, re_search*
+   return the position of the start of the match.  Return value -1 means no
+   match was found and -2 indicates an internal error.  */
+
+int
+re_match (bufp, string, length, start, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+                          start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+                          start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+                 stop, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  const char *str;
+  int rval;
+  int len = length1 + length2;
+  int free_str = 0;
+
+  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+    return -2;
+
+  /* Concatenate the strings.  */
+  if (length2 > 0)
+    if (length1 > 0)
+      {
+       char *s = re_malloc (char, len);
+
+       if (BE (s == NULL, 0))
+         return -2;
+       memcpy (s, string1, length1);
+       memcpy (s + length1, string2, length2);
+       str = s;
+       free_str = 1;
+      }
+    else
+      str = string2;
+  else
+    str = string1;
+
+  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+                        ret_len);
+  if (free_str)
+    re_free ((char *) str);
+  return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+   Additional parameters:
+   If RET_LEN is nonzero the length of the match is returned (re_match style);
+   otherwise the position of the match is returned.  */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  reg_errcode_t result;
+  regmatch_t *pmatch;
+  int nregs, rval;
+  int eflags = 0;
+
+  /* Check for out-of-range.  */
+  if (BE (start < 0 || start > length, 0))
+    return -1;
+  if (BE (start + range > length, 0))
+    range = length - start;
+  else if (BE (start + range < 0, 0))
+    range = -start;
+
+  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+  /* Compile fastmap if we haven't yet.  */
+  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+    re_compile_fastmap (bufp);
+
+  if (BE (bufp->no_sub, 0))
+    regs = NULL;
+
+  /* We need at least 1 register.  */
+  if (regs == NULL)
+    nregs = 1;
+  else if (BE (bufp->regs_allocated == REGS_FIXED &&
+              regs->num_regs < bufp->re_nsub + 1, 0))
+    {
+      nregs = regs->num_regs;
+      if (BE (nregs < 1, 0))
+       {
+         /* Nothing can be copied to regs.  */
+         regs = NULL;
+         nregs = 1;
+       }
+    }
+  else
+    nregs = bufp->re_nsub + 1;
+  pmatch = re_malloc (regmatch_t, nregs);
+  if (BE (pmatch == NULL, 0))
+    return -2;
+
+  result = re_search_internal (bufp, string, length, start, range, stop,
+                              nregs, pmatch, eflags);
+
+  rval = 0;
+
+  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  if (result != REG_NOERROR)
+    rval = -1;
+  else if (regs != NULL)
+    {
+      /* If caller wants register contents data back, copy them.  */
+      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+                                          bufp->regs_allocated);
+      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+       rval = -2;
+    }
+
+  if (BE (rval == 0, 1))
+    {
+      if (ret_len)
+       {
+         assert (pmatch[0].rm_so == start);
+         rval = pmatch[0].rm_eo - start;
+       }
+      else
+       rval = pmatch[0].rm_so;
+    }
+  re_free (pmatch);
+  return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+    struct re_registers *regs;
+    regmatch_t *pmatch;
+    int nregs, regs_allocated;
+{
+  int rval = REGS_REALLOCATE;
+  int i;
+  int need_regs = nregs + 1;
+  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+     uses.  */
+
+  /* Have the register data arrays been allocated?  */
+  if (regs_allocated == REGS_UNALLOCATED)
+    { /* No.  So allocate them with malloc.  */
+      regs->start = re_malloc (regoff_t, need_regs);
+      if (BE (regs->start == NULL, 0))
+       return REGS_UNALLOCATED;
+      regs->end = re_malloc (regoff_t, need_regs);
+      if (BE (regs->end == NULL, 0))
+       {
+         re_free (regs->start);
+         return REGS_UNALLOCATED;
+       }
+      regs->num_regs = need_regs;
+    }
+  else if (regs_allocated == REGS_REALLOCATE)
+    { /* Yes.  If we need more elements than were already
+        allocated, reallocate them.  If we need fewer, just
+        leave it alone.  */
+      if (need_regs > regs->num_regs)
+       {
+         regs->start = re_realloc (regs->start, regoff_t, need_regs);
+         if (BE (regs->start == NULL, 0))
+           {
+             if (regs->end != NULL)
+               re_free (regs->end);
+             return REGS_UNALLOCATED;
+           }
+         regs->end = re_realloc (regs->end, regoff_t, need_regs);
+         if (BE (regs->end == NULL, 0))
+           {
+             re_free (regs->start);
+             return REGS_UNALLOCATED;
+           }
+         regs->num_regs = need_regs;
+       }
+    }
+  else
+    {
+      assert (regs_allocated == REGS_FIXED);
+      /* This function may not be called with REGS_FIXED and nregs too big.  */
+      assert (regs->num_regs >= nregs);
+      rval = REGS_FIXED;
+    }
+
+  /* Copy the regs.  */
+  for (i = 0; i < nregs; ++i)
+    {
+      regs->start[i] = pmatch[i].rm_so;
+      regs->end[i] = pmatch[i].rm_eo;
+    }
+  for ( ; i < regs->num_regs; ++i)
+    regs->start[i] = regs->end[i] = -1;
+
+  return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t *) 0;
+    }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+\f
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+     const char *s;
+{
+  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+\f
+static re_node_set empty_set;
+
+/* Internal entry point.  */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
+   mingings with regexec.  START, and RANGE have the same meanings
+   with re_search.
+   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+   otherwise return the error code.
+   Note: We assume front end functions already check ranges.
+   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+                   eflags)
+    const regex_t *preg;
+    const char *string;
+    int length, start, range, stop, eflags;
+    size_t nmatch;
+    regmatch_t pmatch[];
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  re_string_t input;
+  int left_lim, right_lim, incr;
+  int fl_longest_match, match_first, match_last = -1;
+  int fast_translate, sb;
+  re_match_context_t mctx;
+  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
+                   && range && !preg->can_be_null) ? preg->fastmap : NULL);
+
+  /* Check if the DFA haven't been compiled.  */
+  if (BE (preg->used == 0 || dfa->init_state == NULL
+         || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+         || dfa->init_state_begbuf == NULL, 0))
+    return REG_NOMATCH;
+
+  re_node_set_init_empty (&empty_set);
+  memset (&mctx, '\0', sizeof (re_match_context_t));
+
+  /* We must check the longest matching, if nmatch > 0.  */
+  fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+  err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
+                           preg->translate, preg->syntax & RE_ICASE);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+  input.stop = stop;
+
+  err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* We will log all the DFA states through which the dfa pass,
+     if nmatch > 1, or this dfa has "multibyte node", which is a
+     back-reference or a node which can accept multibyte character or
+     multi character collating element.  */
+  if (nmatch > 1 || dfa->has_mb_node)
+    {
+      mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
+      if (BE (mctx.state_log == NULL, 0))
+       {
+         err = REG_ESPACE;
+         goto free_return;
+       }
+    }
+  else
+    mctx.state_log = NULL;
+
+#ifdef DEBUG
+  /* We assume front-end functions already check them.  */
+  assert (start + range >= 0 && start + range <= length);
+#endif
+
+  match_first = start;
+  input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+                      : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+
+  /* Check incrementally whether of not the input string match.  */
+  incr = (range < 0) ? -1 : 1;
+  left_lim = (range < 0) ? start + range : start;
+  right_lim = (range < 0) ? start : start + range;
+  sb = MB_CUR_MAX == 1;
+  fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
+
+  for (;;)
+    {
+      /* At first get the current byte from input string.  */
+      if (fastmap)
+       {
+         if (BE (fast_translate, 1))
+           {
+             unsigned RE_TRANSLATE_TYPE t
+               = (unsigned RE_TRANSLATE_TYPE) preg->translate;
+             if (BE (range >= 0, 1))
+               {
+                 if (BE (t != NULL, 0))
+                   {
+                     while (BE (match_first < right_lim, 1)
+                            && !fastmap[t[(unsigned char) string[match_first]]])
+                       ++match_first;
+                   }
+                 else
+                   {
+                     while (BE (match_first < right_lim, 1)
+                            && !fastmap[(unsigned char) string[match_first]])
+                       ++match_first;
+                   }
+                 if (BE (match_first == right_lim, 0))
+                   {
+                     int ch = match_first >= length
+                              ? 0 : (unsigned char) string[match_first];
+                     if (!fastmap[t ? t[ch] : ch])
+                       break;
+                   }
+               }
+             else
+               {
+                 while (match_first >= left_lim)
+                   {
+                     int ch = match_first >= length
+                              ? 0 : (unsigned char) string[match_first];
+                     if (fastmap[t ? t[ch] : ch])
+                       break;
+                     --match_first;
+                   }
+                 if (match_first < left_lim)
+                   break;
+               }
+           }
+         else
+           {
+             int ch;
+
+             do
+               {
+                 /* In this case, we can't determine easily the current byte,
+                    since it might be a component byte of a multibyte
+                    character.  Then we use the constructed buffer
+                    instead.  */
+                 /* If MATCH_FIRST is out of the valid range, reconstruct the
+                    buffers.  */
+                 if (input.raw_mbs_idx + input.valid_len <= match_first
+                     || match_first < input.raw_mbs_idx)
+                   {
+                     err = re_string_reconstruct (&input, match_first, eflags,
+                                                  preg->newline_anchor);
+                     if (BE (err != REG_NOERROR, 0))
+                       goto free_return;
+                   }
+                 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+                    Note that MATCH_FIRST must not be smaller than 0.  */
+                 ch = ((match_first >= length) ? 0
+                      : re_string_byte_at (&input,
+                                           match_first - input.raw_mbs_idx));
+                 if (fastmap[ch])
+                   break;
+                 match_first += incr;
+               }
+             while (match_first >= left_lim && match_first <= right_lim);
+             if (! fastmap[ch])
+               break;
+           }
+       }
+
+      /* Reconstruct the buffers so that the matcher can assume that
+        the matching starts from the begining of the buffer.  */
+      err = re_string_reconstruct (&input, match_first, eflags,
+                                  preg->newline_anchor);
+      if (BE (err != REG_NOERROR, 0))
+       goto free_return;
+#ifdef RE_ENABLE_I18N
+     /* Eliminate it when it is a component of a multibyte character
+        and isn't the head of a multibyte character.  */
+      if (sb || re_string_first_byte (&input, 0))
+#endif
+       {
+         /* It seems to be appropriate one, then use the matcher.  */
+         /* We assume that the matching starts from 0.  */
+         mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+         match_last = check_matching (preg, &mctx, 0, fl_longest_match);
+         if (match_last != -1)
+           {
+             if (BE (match_last == -2, 0))
+               {
+                 err = REG_ESPACE;
+                 goto free_return;
+               }
+             else
+               {
+                 mctx.match_last = match_last;
+                 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+                   {
+                     re_dfastate_t *pstate = mctx.state_log[match_last];
+                     mctx.last_node = check_halt_state_context (preg, pstate,
+                                                                &mctx, match_last);
+                   }
+                 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+                     || dfa->nbackref)
+                   {
+                     err = prune_impossible_nodes (preg, &mctx);
+                     if (err == REG_NOERROR)
+                       break;
+                     if (BE (err != REG_NOMATCH, 0))
+                       goto free_return;
+                   }
+                 else
+                   break; /* We found a matching.  */
+               }
+           }
+         match_ctx_clean (&mctx);
+       }
+      /* Update counter.  */
+      match_first += incr;
+      if (match_first < left_lim || right_lim < match_first)
+       break;
+    }
+
+  /* Set pmatch[] if we need.  */
+  if (match_last != -1 && nmatch > 0)
+    {
+      int reg_idx;
+
+      /* Initialize registers.  */
+      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+       pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+      /* Set the points where matching start/end.  */
+      pmatch[0].rm_so = 0;
+      pmatch[0].rm_eo = mctx.match_last;
+
+      if (!preg->no_sub && nmatch > 1)
+       {
+         err = set_regs (preg, &mctx, nmatch, pmatch,
+                         dfa->has_plural_match && dfa->nbackref > 0);
+         if (BE (err != REG_NOERROR, 0))
+           goto free_return;
+       }
+
+      /* At last, add the offset to the each registers, since we slided
+        the buffers so that We can assume that the matching starts from 0.  */
+      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+       if (pmatch[reg_idx].rm_so != -1)
+         {
+           pmatch[reg_idx].rm_so += match_first;
+           pmatch[reg_idx].rm_eo += match_first;
+         }
+    }
+  err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
+ free_return:
+  re_free (mctx.state_log);
+  if (dfa->nbackref)
+    match_ctx_free (&mctx);
+  re_string_destruct (&input);
+  return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (preg, mctx)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+{
+  int halt_node, match_last;
+  reg_errcode_t ret;
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **lim_states = NULL;
+  re_sift_context_t sctx;
+#ifdef DEBUG
+  assert (mctx->state_log != NULL);
+#endif
+  match_last = mctx->match_last;
+  halt_node = mctx->last_node;
+  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+  if (BE (sifted_states == NULL, 0))
+    {
+      ret = REG_ESPACE;
+      goto free_return;
+    }
+  if (dfa->nbackref)
+    {
+      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+      if (BE (lim_states == NULL, 0))
+       {
+         ret = REG_ESPACE;
+         goto free_return;
+       }
+      while (1)
+       {
+         memset (lim_states, '\0',
+                 sizeof (re_dfastate_t *) * (match_last + 1));
+         match_ctx_clear_flag (mctx);
+         sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+                        match_last, 0);
+         ret = sift_states_backward (preg, mctx, &sctx);
+         re_node_set_free (&sctx.limits);
+         if (BE (ret != REG_NOERROR, 0))
+             goto free_return;
+         if (sifted_states[0] != NULL || lim_states[0] != NULL)
+           break;
+         do
+           {
+             --match_last;
+             if (match_last < 0)
+               {
+                 ret = REG_NOMATCH;
+                 goto free_return;
+               }
+           } while (!mctx->state_log[match_last]->halt);
+         halt_node = check_halt_state_context (preg,
+                                               mctx->state_log[match_last],
+                                               mctx, match_last);
+       }
+      ret = merge_state_array (dfa, sifted_states, lim_states,
+                              match_last + 1);
+      re_free (lim_states);
+      lim_states = NULL;
+      if (BE (ret != REG_NOERROR, 0))
+       goto free_return;
+    }
+  else
+    {
+      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+                    match_last, 0);
+      ret = sift_states_backward (preg, mctx, &sctx);
+      re_node_set_free (&sctx.limits);
+      if (BE (ret != REG_NOERROR, 0))
+       goto free_return;
+    }
+  re_free (mctx->state_log);
+  mctx->state_log = sifted_states;
+  sifted_states = NULL;
+  mctx->last_node = halt_node;
+  mctx->match_last = match_last;
+  ret = REG_NOERROR;
+ free_return:
+  re_free (sifted_states);
+  re_free (lim_states);
+  return ret;
+}
+
+/* Acquire an initial state and return it.
+   We must select appropriate initial state depending on the context,
+   since initial states may have constraints like "\<", "^", etc..  */
+
+static inline re_dfastate_t *
+acquire_init_state_context (err, preg, mctx, idx)
+     reg_errcode_t *err;
+     const regex_t *preg;
+     const re_match_context_t *mctx;
+     int idx;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+  *err = REG_NOERROR;
+  if (dfa->init_state->has_constraint)
+    {
+      unsigned int context;
+      context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
+                                      preg->newline_anchor);
+      if (IS_WORD_CONTEXT (context))
+       return dfa->init_state_word;
+      else if (IS_ORDINARY_CONTEXT (context))
+       return dfa->init_state;
+      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+       return dfa->init_state_begbuf;
+      else if (IS_NEWLINE_CONTEXT (context))
+       return dfa->init_state_nl;
+      else if (IS_BEGBUF_CONTEXT (context))
+       {
+         /* It is relatively rare case, then calculate on demand.  */
+         return  re_acquire_state_context (err, dfa,
+                                           dfa->init_state->entrance_nodes,
+                                           context);
+       }
+      else
+       /* Must not happen?  */
+       return dfa->init_state;
+    }
+  else
+    return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+   and return the index where the matching end, return -1 if not match,
+   or return -2 in case of an error.
+   FL_SEARCH means we must search where the matching starts,
+   FL_LONGEST_MATCH means we want the POSIX longest matching.
+   Note that the matcher assume that the maching starts from the current
+   index of the buffer.  */
+
+static int
+check_matching (preg, mctx, fl_search, fl_longest_match)
+    const regex_t *preg;
+    re_match_context_t *mctx;
+    int fl_search, fl_longest_match;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t err;
+  int match = 0;
+  int match_last = -1;
+  int cur_str_idx = re_string_cur_idx (mctx->input);
+  re_dfastate_t *cur_state;
+
+  cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
+  /* An initial state must not be NULL(invalid state).  */
+  if (BE (cur_state == NULL, 0))
+    return -2;
+  if (mctx->state_log != NULL)
+    mctx->state_log[cur_str_idx] = cur_state;
+
+  /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+     later.  E.g. Processing back references.  */
+  if (dfa->nbackref)
+    {
+      err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  if (cur_state->has_backref)
+    {
+      err = transit_state_bkref (preg, &cur_state->nodes, mctx);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  /* If the RE accepts NULL string.  */
+  if (cur_state->halt)
+    {
+      if (!cur_state->has_constraint
+         || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
+       {
+         if (!fl_longest_match)
+           return cur_str_idx;
+         else
+           {
+             match_last = cur_str_idx;
+             match = 1;
+           }
+       }
+    }
+
+  while (!re_string_eoi (mctx->input))
+    {
+      cur_state = transit_state (&err, preg, mctx, cur_state,
+                                fl_search && !match);
+      if (cur_state == NULL) /* Reached at the invalid state or an error.  */
+       {
+         cur_str_idx = re_string_cur_idx (mctx->input);
+         if (BE (err != REG_NOERROR, 0))
+           return -2;
+         if (fl_search && !match)
+           {
+             /* Restart from initial state, since we are searching
+                the point from where matching start.  */
+#ifdef RE_ENABLE_I18N
+             if (MB_CUR_MAX == 1
+                 || re_string_first_byte (mctx->input, cur_str_idx))
+#endif /* RE_ENABLE_I18N */
+               cur_state = acquire_init_state_context (&err, preg, mctx,
+                                                       cur_str_idx);
+             if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+               return -2;
+             if (mctx->state_log != NULL)
+               mctx->state_log[cur_str_idx] = cur_state;
+           }
+         else if (!fl_longest_match && match)
+           break;
+         else /* (fl_longest_match && match) || (!fl_search && !match)  */
+           {
+             if (mctx->state_log == NULL)
+               break;
+             else
+               {
+                 int max = mctx->state_log_top;
+                 for (; cur_str_idx <= max; ++cur_str_idx)
+                   if (mctx->state_log[cur_str_idx] != NULL)
+                     break;
+                 if (cur_str_idx > max)
+                   break;
+               }
+           }
+       }
+
+      if (cur_state != NULL && cur_state->halt)
+       {
+         /* Reached at a halt state.
+            Check the halt state can satisfy the current context.  */
+         if (!cur_state->has_constraint
+             || check_halt_state_context (preg, cur_state, mctx,
+                                          re_string_cur_idx (mctx->input)))
+           {
+             /* We found an appropriate halt state.  */
+             match_last = re_string_cur_idx (mctx->input);
+             match = 1;
+             if (!fl_longest_match)
+               break;
+           }
+       }
+   }
+  return match_last;
+}
+
+/* Check NODE match the current context.  */
+
+static int check_halt_node_context (dfa, node, context)
+    const re_dfa_t *dfa;
+    int node;
+    unsigned int context;
+{
+  re_token_type_t type = dfa->nodes[node].type;
+  unsigned int constraint = dfa->nodes[node].constraint;
+  if (type != END_OF_RE)
+    return 0;
+  if (!constraint)
+    return 1;
+  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+    return 0;
+  return 1;
+}
+
+/* Check the halt state STATE match the current context.
+   Return 0 if not match, if the node, STATE has, is a halt node and
+   match the context, return the node.  */
+
+static int
+check_halt_state_context (preg, state, mctx, idx)
+    const regex_t *preg;
+    const re_dfastate_t *state;
+    const re_match_context_t *mctx;
+    int idx;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i;
+  unsigned int context;
+#ifdef DEBUG
+  assert (state->halt);
+#endif
+  context = re_string_context_at (mctx->input, idx, mctx->eflags,
+                                 preg->newline_anchor);
+  for (i = 0; i < state->nodes.nelem; ++i)
+    if (check_halt_node_context (dfa, state->nodes.elems[i], context))
+      return state->nodes.elems[i];
+  return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+   corresponding to the DFA).
+   Return the destination node, and update EPS_VIA_NODES, return -1 in case
+   of errors.  */
+
+static int
+proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
+    const regex_t *preg;
+    regmatch_t *regs;
+    const re_match_context_t *mctx;
+    int nregs, *pidx, node;
+    re_node_set *eps_via_nodes;
+    struct re_fail_stack_t *fs;
+{
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  int i, err, dest_node;
+  dest_node = -1;
+  if (IS_EPSILON_NODE (dfa->nodes[node].type))
+    {
+      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+      int ndest, dest_nodes[2];
+      err = re_node_set_insert (eps_via_nodes, node);
+      if (BE (err < 0, 0))
+       return -1;
+      /* Pick up valid destinations.  */
+      for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
+       {
+         int candidate = dfa->edests[node].elems[i];
+         if (!re_node_set_contains (cur_nodes, candidate))
+           continue;
+         dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
+         dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
+         ++ndest;
+       }
+      if (ndest <= 1)
+       return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
+      /* In order to avoid infinite loop like "(a*)*".  */
+      if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
+       return dest_nodes[1];
+      if (fs != NULL)
+       push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
+      return dest_nodes[0];
+    }
+  else
+    {
+      int naccepted = 0;
+      re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+      if (ACCEPT_MB_NODE (type))
+       naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
+      else
+#endif /* RE_ENABLE_I18N */
+      if (type == OP_BACK_REF)
+       {
+         int subexp_idx = dfa->nodes[node].opr.idx;
+         naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+         if (fs != NULL)
+           {
+             if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+               return -1;
+             else if (naccepted)
+               {
+                 char *buf = (char *) re_string_get_buffer (mctx->input);
+                 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+                             naccepted) != 0)
+                   return -1;
+               }
+           }
+
+         if (naccepted == 0)
+           {
+             err = re_node_set_insert (eps_via_nodes, node);
+             if (BE (err < 0, 0))
+               return -2;
+             dest_node = dfa->edests[node].elems[0];
+             if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+                                       dest_node))
+               return dest_node;
+           }
+       }
+
+      if (naccepted != 0
+         || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
+       {
+         dest_node = dfa->nexts[node];
+         *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+         if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+                    || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+                                              dest_node)))
+           return -1;
+         re_node_set_empty (eps_via_nodes);
+         return dest_node;
+       }
+    }
+  return -1;
+}
+
+static reg_errcode_t
+push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
+     struct re_fail_stack_t *fs;
+     int str_idx, *dests, nregs;
+     regmatch_t *regs;
+     re_node_set *eps_via_nodes;
+{
+  reg_errcode_t err;
+  int num = fs->num++;
+  if (fs->num == fs->alloc)
+    {
+      struct re_fail_stack_ent_t *new_array;
+      fs->alloc *= 2;
+      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+                                      * fs->alloc));
+      if (new_array == NULL)
+       return REG_ESPACE;
+      fs->stack = new_array;
+    }
+  fs->stack[num].idx = str_idx;
+  fs->stack[num].node = dests[1];
+  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+  return err;
+}
+
+static int
+pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
+     struct re_fail_stack_t *fs;
+     int *pidx, nregs;
+     regmatch_t *regs;
+     re_node_set *eps_via_nodes;
+{
+  int num = --fs->num;
+  assert (num >= 0);
+ *pidx = fs->stack[num].idx;
+  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+  re_node_set_free (eps_via_nodes);
+  re_free (fs->stack[num].regs);
+  *eps_via_nodes = fs->stack[num].eps_via_nodes;
+  return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+   PMATCH.
+   Note: We assume that pmatch[0] is already set, and
+   pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
+
+static reg_errcode_t
+set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
+     const regex_t *preg;
+     const re_match_context_t *mctx;
+     size_t nmatch;
+     regmatch_t *pmatch;
+     int fl_backtrack;
+{
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  int idx, cur_node, real_nmatch;
+  re_node_set eps_via_nodes;
+  struct re_fail_stack_t *fs;
+  struct re_fail_stack_t fs_body = {0, 2, NULL};
+#ifdef DEBUG
+  assert (nmatch > 1);
+  assert (mctx->state_log != NULL);
+#endif
+  if (fl_backtrack)
+    {
+      fs = &fs_body;
+      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+    }
+  else
+    fs = NULL;
+  cur_node = dfa->init_node;
+  real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
+  re_node_set_init_empty (&eps_via_nodes);
+  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+    {
+      update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
+      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+       {
+         int reg_idx;
+         if (fs)
+           {
+             for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+               if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+                 break;
+             if (reg_idx == nmatch)
+               {
+                 re_node_set_free (&eps_via_nodes);
+                 return free_fail_stack_return (fs);
+               }
+             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+                                        &eps_via_nodes);
+           }
+         else
+           {
+             re_node_set_free (&eps_via_nodes);
+             return REG_NOERROR;
+           }
+       }
+
+      /* Proceed to next node.  */
+      cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
+                                   &eps_via_nodes, fs);
+
+      if (BE (cur_node < 0, 0))
+       {
+         if (cur_node == -2)
+           return REG_ESPACE;
+         if (fs)
+           cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+                                      &eps_via_nodes);
+         else
+           {
+             re_node_set_free (&eps_via_nodes);
+             return REG_NOMATCH;
+           }
+       }
+    }
+  re_node_set_free (&eps_via_nodes);
+  return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+free_fail_stack_return (fs)
+     struct re_fail_stack_t *fs;
+{
+  if (fs)
+    {
+      int fs_idx;
+      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+       {
+         re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+         re_free (fs->stack[fs_idx].regs);
+       }
+      re_free (fs->stack);
+    }
+  return REG_NOERROR;
+}
+
+static void
+update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
+     re_dfa_t *dfa;
+     regmatch_t *pmatch;
+     int cur_node, cur_idx, nmatch;
+{
+  int type = dfa->nodes[cur_node].type;
+  int reg_num;
+  if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
+    return;
+  reg_num = dfa->nodes[cur_node].opr.idx + 1;
+  if (reg_num >= nmatch)
+    return;
+  if (type == OP_OPEN_SUBEXP)
+    {
+      /* We are at the first node of this sub expression.  */
+      pmatch[reg_num].rm_so = cur_idx;
+      pmatch[reg_num].rm_eo = -1;
+    }
+  else if (type == OP_CLOSE_SUBEXP)
+    /* We are at the first node of this sub expression.  */
+    pmatch[reg_num].rm_eo = cur_idx;
+}
+
+#define NUMBER_OF_STATE 1
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+   and sift the nodes in each states according to the following rules.
+   Updated state_log will be wrote to STATE_LOG.
+
+   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+       If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+       the LAST_NODE, we throw away the node `a'.
+     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+       string `s' and transit to `b':
+       i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+          away the node `a'.
+       ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+           throwed away, we throw away the node `a'.
+     3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
+       i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+          node `a'.
+       ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
+           we throw away the node `a'.  */
+
+#define STATE_NODE_CONTAINS(state,node) \
+  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+sift_states_backward (preg, mctx, sctx)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     re_sift_context_t *sctx;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  int null_cnt = 0;
+  int str_idx = sctx->last_str_idx;
+  re_node_set cur_dest;
+  re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
+
+#ifdef DEBUG
+  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+  cur_src = &mctx->state_log[str_idx]->nodes;
+
+  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
+     transit to the last_node and the last_node itself.  */
+  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* Then check each states in the state_log.  */
+  while (str_idx > 0)
+    {
+      int i, ret;
+      /* Update counters.  */
+      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+      if (null_cnt > mctx->max_mb_elem_len)
+       {
+         memset (sctx->sifted_states, '\0',
+                 sizeof (re_dfastate_t *) * str_idx);
+         re_node_set_free (&cur_dest);
+         return REG_NOERROR;
+       }
+      re_node_set_empty (&cur_dest);
+      --str_idx;
+      cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+                : &mctx->state_log[str_idx]->nodes);
+
+      /* Then build the next sifted state.
+        We build the next sifted state on `cur_dest', and update
+        `sifted_states[str_idx]' with `cur_dest'.
+        Note:
+        `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+        `cur_src' points the node_set of the old `state_log[str_idx]'.  */
+      for (i = 0; i < cur_src->nelem; i++)
+       {
+         int prev_node = cur_src->elems[i];
+         int naccepted = 0;
+         re_token_type_t type = dfa->nodes[prev_node].type;
+
+         if (IS_EPSILON_NODE(type))
+           continue;
+#ifdef RE_ENABLE_I18N
+         /* If the node may accept `multi byte'.  */
+         if (ACCEPT_MB_NODE (type))
+           naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
+                                            str_idx, sctx->last_str_idx);
+
+#endif /* RE_ENABLE_I18N */
+         /* We don't check backreferences here.
+            See update_cur_sifted_state().  */
+
+         if (!naccepted
+             && check_node_accept (preg, dfa->nodes + prev_node, mctx,
+                                   str_idx)
+             && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+                                     dfa->nexts[prev_node]))
+           naccepted = 1;
+
+         if (naccepted == 0)
+           continue;
+
+         if (sctx->limits.nelem)
+           {
+             int to_idx = str_idx + naccepted;
+             if (check_dst_limits (dfa, &sctx->limits, mctx,
+                                   dfa->nexts[prev_node], to_idx,
+                                   prev_node, str_idx))
+               continue;
+           }
+         ret = re_node_set_insert (&cur_dest, prev_node);
+         if (BE (ret == -1, 0))
+           {
+             err = REG_ESPACE;
+             goto free_return;
+           }
+       }
+
+      /* Add all the nodes which satisfy the following conditions:
+        - It can epsilon transit to a node in CUR_DEST.
+        - It is in CUR_SRC.
+        And update state_log.  */
+      err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
+      if (BE (err != REG_NOERROR, 0))
+       goto free_return;
+    }
+  err = REG_NOERROR;
+ free_return:
+  re_node_set_free (&cur_dest);
+  return err;
+}
+
+/* Helper functions.  */
+
+static inline reg_errcode_t
+clean_state_log_if_need (mctx, next_state_log_idx)
+    re_match_context_t *mctx;
+    int next_state_log_idx;
+{
+  int top = mctx->state_log_top;
+
+  if (next_state_log_idx >= mctx->input->bufs_len
+      || (next_state_log_idx >= mctx->input->valid_len
+         && mctx->input->valid_len < mctx->input->len))
+    {
+      reg_errcode_t err;
+      err = extend_buffers (mctx);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  if (top < next_state_log_idx)
+    {
+      memset (mctx->state_log + top + 1, '\0',
+             sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+      mctx->state_log_top = next_state_log_idx;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+merge_state_array (dfa, dst, src, num)
+     re_dfa_t *dfa;
+     re_dfastate_t **dst;
+     re_dfastate_t **src;
+     int num;
+{
+  int st_idx;
+  reg_errcode_t err;
+  for (st_idx = 0; st_idx < num; ++st_idx)
+    {
+      if (dst[st_idx] == NULL)
+       dst[st_idx] = src[st_idx];
+      else if (src[st_idx] != NULL)
+       {
+         re_node_set merged_set;
+         err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+                                       &src[st_idx]->nodes);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+         dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+         re_node_set_free (&merged_set);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+       }
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     re_sift_context_t *sctx;
+     int str_idx;
+     re_node_set *dest_nodes;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  const re_node_set *candidates;
+  candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+               : &mctx->state_log[str_idx]->nodes);
+
+  /* At first, add the nodes which can epsilon transit to a node in
+     DEST_NODE.  */
+  if (dest_nodes->nelem)
+    {
+      err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  /* Then, check the limitations in the current sift_context.  */
+  if (dest_nodes->nelem && sctx->limits.nelem)
+    {
+      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+                                mctx->bkref_ents, str_idx);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+
+  /* Update state_log.  */
+  sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+  if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
+    return err;
+
+  if ((mctx->state_log[str_idx] != NULL
+       && mctx->state_log[str_idx]->has_backref))
+    {
+      err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+add_epsilon_src_nodes (dfa, dest_nodes, candidates)
+     re_dfa_t *dfa;
+     re_node_set *dest_nodes;
+     const re_node_set *candidates;
+{
+  reg_errcode_t err;
+  int src_idx;
+  re_node_set src_copy;
+
+  err = re_node_set_init_copy (&src_copy, dest_nodes);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
+    {
+      err = re_node_set_add_intersect (dest_nodes, candidates,
+                                      dfa->inveclosures
+                                      + src_copy.elems[src_idx]);
+      if (BE (err != REG_NOERROR, 0))
+       {
+         re_node_set_free (&src_copy);
+         return err;
+       }
+    }
+  re_node_set_free (&src_copy);
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
+     re_dfa_t *dfa;
+     int node;
+     re_node_set *dest_nodes;
+     const re_node_set *candidates;
+{
+    int ecl_idx;
+    reg_errcode_t err;
+    re_node_set *inv_eclosure = dfa->inveclosures + node;
+    re_node_set except_nodes;
+    re_node_set_init_empty (&except_nodes);
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+       int cur_node = inv_eclosure->elems[ecl_idx];
+       if (cur_node == node)
+         continue;
+       if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+         {
+           int edst1 = dfa->edests[cur_node].elems[0];
+           int edst2 = ((dfa->edests[cur_node].nelem > 1)
+                        ? dfa->edests[cur_node].elems[1] : -1);
+           if ((!re_node_set_contains (inv_eclosure, edst1)
+                && re_node_set_contains (dest_nodes, edst1))
+               || (edst2 > 0
+                   && !re_node_set_contains (inv_eclosure, edst2)
+                   && re_node_set_contains (dest_nodes, edst2)))
+             {
+               err = re_node_set_add_intersect (&except_nodes, candidates,
+                                                dfa->inveclosures + cur_node);
+               if (BE (err != REG_NOERROR, 0))
+                 {
+                   re_node_set_free (&except_nodes);
+                   return err;
+                 }
+             }
+         }
+      }
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+       int cur_node = inv_eclosure->elems[ecl_idx];
+       if (!re_node_set_contains (&except_nodes, cur_node))
+         {
+           int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+           re_node_set_remove_at (dest_nodes, idx);
+         }
+      }
+    re_node_set_free (&except_nodes);
+    return REG_NOERROR;
+}
+
+static int
+check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
+     re_dfa_t *dfa;
+     re_node_set *limits;
+     re_match_context_t *mctx;
+     int dst_node, dst_idx, src_node, src_idx;
+{
+  int lim_idx, src_pos, dst_pos;
+
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = mctx->bkref_ents + limits->elems[lim_idx];
+      subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+
+      dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
+                                          dfa->eclosures + dst_node,
+                                          subexp_idx, dst_node, dst_idx);
+      src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
+                                          dfa->eclosures + src_node,
+                                          subexp_idx, src_node, src_idx);
+
+      /* In case of:
+        <src> <dst> ( <subexp> )
+        ( <subexp> ) <src> <dst>
+        ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
+      if (src_pos == dst_pos)
+       continue; /* This is unrelated limitation.  */
+      else
+       return 1;
+    }
+  return 0;
+}
+
+static int
+check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
+                          str_idx)
+     re_dfa_t *dfa;
+     re_match_context_t *mctx;
+     re_node_set *eclosures;
+     int limit, subexp_idx, node, str_idx;
+{
+  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+  int pos = (str_idx < lim->subexp_from ? -1
+            : (lim->subexp_to < str_idx ? 1 : 0));
+  if (pos == 0
+      && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
+    {
+      int node_idx;
+      for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+       {
+         int node = eclosures->elems[node_idx];
+         re_token_type_t type= dfa->nodes[node].type;
+         if (type == OP_BACK_REF)
+           {
+             int bi = search_cur_bkref_entry (mctx, str_idx);
+             for (; bi < mctx->nbkref_ents; ++bi)
+               {
+                 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
+                 if (ent->str_idx > str_idx)
+                   break;
+                 if (ent->node == node && ent->subexp_from == ent->subexp_to)
+                   {
+                     int cpos, dst;
+                     dst = dfa->edests[node].elems[0];
+                     cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
+                                                       dfa->eclosures + dst,
+                                                       subexp_idx, dst,
+                                                       str_idx);
+                     if ((str_idx == lim->subexp_from && cpos == -1)
+                         || (str_idx == lim->subexp_to && cpos == 0))
+                       return cpos;
+                   }
+               }
+           }
+         if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
+             && str_idx == lim->subexp_from)
+           {
+             pos = -1;
+             break;
+           }
+         if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
+             && str_idx == lim->subexp_to)
+           break;
+       }
+      if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
+       pos = 1;
+    }
+  return pos;
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+   which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
+     re_dfa_t *dfa;
+     re_node_set *dest_nodes;
+     const re_node_set *candidates;
+     re_node_set *limits;
+     struct re_backref_cache_entry *bkref_ents;
+     int str_idx;
+{
+  reg_errcode_t err;
+  int node_idx, lim_idx;
+
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = bkref_ents + limits->elems[lim_idx];
+
+      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+       continue; /* This is unrelated limitation.  */
+
+      subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+      if (ent->subexp_to == str_idx)
+       {
+         int ops_node = -1;
+         int cls_node = -1;
+         for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+           {
+             int node = dest_nodes->elems[node_idx];
+             re_token_type_t type= dfa->nodes[node].type;
+             if (type == OP_OPEN_SUBEXP
+                 && subexp_idx == dfa->nodes[node].opr.idx)
+               ops_node = node;
+             else if (type == OP_CLOSE_SUBEXP
+                      && subexp_idx == dfa->nodes[node].opr.idx)
+               cls_node = node;
+           }
+
+         /* Check the limitation of the open subexpression.  */
+         /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
+         if (ops_node >= 0)
+           {
+             err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
+                                         candidates);
+             if (BE (err != REG_NOERROR, 0))
+               return err;
+           }
+         /* Check the limitation of the close subexpression.  */
+         for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+           {
+             int node = dest_nodes->elems[node_idx];
+             if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
+                 && !re_node_set_contains (dfa->eclosures + node, cls_node))
+               {
+                 /* It is against this limitation.
+                    Remove it form the current sifted state.  */
+                 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
+                                             candidates);
+                 if (BE (err != REG_NOERROR, 0))
+                   return err;
+                 --node_idx;
+               }
+           }
+       }
+      else /* (ent->subexp_to != str_idx)  */
+       {
+         for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+           {
+             int node = dest_nodes->elems[node_idx];
+             re_token_type_t type= dfa->nodes[node].type;
+             if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+               {
+                 if (subexp_idx != dfa->nodes[node].opr.idx)
+                   continue;
+                 if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
+                     || (type == OP_OPEN_SUBEXP))
+                   {
+                     /* It is against this limitation.
+                        Remove it form the current sifted state.  */
+                     err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
+                                                 candidates);
+                     if (BE (err != REG_NOERROR, 0))
+                       return err;
+                   }
+               }
+           }
+       }
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     re_sift_context_t *sctx;
+     int str_idx;
+     re_node_set *dest_nodes;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  int node_idx, node;
+  re_sift_context_t local_sctx;
+  const re_node_set *candidates;
+  candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+               : &mctx->state_log[str_idx]->nodes);
+  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
+
+  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+    {
+      int cur_bkref_idx = re_string_cur_idx (mctx->input);
+      re_token_type_t type;
+      node = candidates->elems[node_idx];
+      type = dfa->nodes[node].type;
+      if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
+       continue;
+      /* Avoid infinite loop for the REs like "()\1+".  */
+      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+       continue;
+      if (type == OP_BACK_REF)
+       {
+         int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+         for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+           {
+             int disabled_idx, subexp_len, to_idx, dst_node;
+             struct re_backref_cache_entry *entry;
+             entry = mctx->bkref_ents + enabled_idx;
+             if (entry->str_idx > str_idx)
+               break;
+             if (entry->node != node)
+                 continue;
+             subexp_len = entry->subexp_to - entry->subexp_from;
+             to_idx = str_idx + subexp_len;
+             dst_node = (subexp_len ? dfa->nexts[node]
+                         : dfa->edests[node].elems[0]);
+
+             if (to_idx > sctx->last_str_idx
+                 || sctx->sifted_states[to_idx] == NULL
+                 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
+                                          dst_node)
+                 || check_dst_limits (dfa, &sctx->limits, mctx, node,
+                                      str_idx, dst_node, to_idx))
+               continue;
+               {
+                 re_dfastate_t *cur_state;
+                 entry->flag = 0;
+                 for (disabled_idx = enabled_idx + 1;
+                      disabled_idx < mctx->nbkref_ents; ++disabled_idx)
+                   {
+                     struct re_backref_cache_entry *entry2;
+                     entry2 = mctx->bkref_ents + disabled_idx;
+                     if (entry2->str_idx > str_idx)
+                       break;
+                     entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
+                   }
+
+                 if (local_sctx.sifted_states == NULL)
+                   {
+                     local_sctx = *sctx;
+                     err = re_node_set_init_copy (&local_sctx.limits,
+                                                  &sctx->limits);
+                     if (BE (err != REG_NOERROR, 0))
+                       goto free_return;
+                   }
+                 local_sctx.last_node = node;
+                 local_sctx.last_str_idx = str_idx;
+                 err = re_node_set_insert (&local_sctx.limits, enabled_idx);
+                 if (BE (err < 0, 0))
+                   {
+                     err = REG_ESPACE;
+                     goto free_return;
+                   }
+                 cur_state = local_sctx.sifted_states[str_idx];
+                 err = sift_states_backward (preg, mctx, &local_sctx);
+                 if (BE (err != REG_NOERROR, 0))
+                   goto free_return;
+                 if (sctx->limited_states != NULL)
+                   {
+                     err = merge_state_array (dfa, sctx->limited_states,
+                                              local_sctx.sifted_states,
+                                              str_idx + 1);
+                     if (BE (err != REG_NOERROR, 0))
+                       goto free_return;
+                   }
+                 local_sctx.sifted_states[str_idx] = cur_state;
+                 re_node_set_remove (&local_sctx.limits, enabled_idx);
+                 /* We must not use the variable entry here, since
+                    mctx->bkref_ents might be realloced.  */
+                 mctx->bkref_ents[enabled_idx].flag = 1;
+               }
+           }
+         enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+         for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+           {
+             struct re_backref_cache_entry *entry;
+             entry = mctx->bkref_ents + enabled_idx;
+             if (entry->str_idx > str_idx)
+               break;
+             if (entry->node == node)
+               entry->flag = 0;
+           }
+       }
+    }
+  err = REG_NOERROR;
+ free_return:
+  if (local_sctx.sifted_states != NULL)
+    {
+      re_node_set_free (&local_sctx.limits);
+    }
+
+  return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
+    const regex_t *preg;
+    const re_match_context_t *mctx;
+    re_sift_context_t *sctx;
+    int node_idx, str_idx, max_str_idx;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int naccepted;
+  /* Check the node can accept `multi byte'.  */
+  naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
+  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+                           dfa->nexts[node_idx]))
+    /* The node can't accept the `multi byte', or the
+       destination was already throwed away, then the node
+       could't accept the current input `multi byte'.   */
+    naccepted = 0;
+  /* Otherwise, it is sure that the node could accept
+     `naccepted' bytes input.  */
+  return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+\f
+/* Functions for state transition.  */
+
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte, and update STATE_LOG if necessary.
+   If STATE can accept a multibyte char/collating element/back reference
+   update the destination of STATE_LOG.  */
+
+static re_dfastate_t *
+transit_state (err, preg, mctx, state, fl_search)
+     reg_errcode_t *err;
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     re_dfastate_t *state;
+     int fl_search;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  re_dfastate_t **trtable, *next_state;
+  unsigned char ch;
+  int cur_idx;
+
+  if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
+      || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
+         && mctx->input->valid_len < mctx->input->len))
+    {
+      *err = extend_buffers (mctx);
+      if (BE (*err != REG_NOERROR, 0))
+       return NULL;
+    }
+
+  *err = REG_NOERROR;
+  if (state == NULL)
+    {
+      next_state = state;
+      re_string_skip_bytes (mctx->input, 1);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      /* If the current state can accept multibyte.  */
+      if (state->accept_mb)
+       {
+         *err = transit_state_mb (preg, state, mctx);
+         if (BE (*err != REG_NOERROR, 0))
+           return NULL;
+       }
+#endif /* RE_ENABLE_I18N */
+
+      /* Then decide the next state with the single byte.  */
+      if (1)
+       {
+         /* Use transition table  */
+         ch = re_string_fetch_byte (mctx->input);
+         trtable = fl_search ? state->trtable_search : state->trtable;
+         if (trtable == NULL)
+           {
+             trtable = build_trtable (preg, state, fl_search);
+             if (fl_search)
+               state->trtable_search = trtable;
+             else
+               state->trtable = trtable;
+           }
+         next_state = trtable[ch];
+       }
+      else
+       {
+         /* don't use transition table  */
+         next_state = transit_state_sb (err, preg, state, fl_search, mctx);
+         if (BE (next_state == NULL && err != REG_NOERROR, 0))
+           return NULL;
+       }
+    }
+
+  cur_idx = re_string_cur_idx (mctx->input);
+  /* Update the state_log if we need.  */
+  if (mctx->state_log != NULL)
+    {
+      if (cur_idx > mctx->state_log_top)
+       {
+         mctx->state_log[cur_idx] = next_state;
+         mctx->state_log_top = cur_idx;
+       }
+      else if (mctx->state_log[cur_idx] == 0)
+       {
+         mctx->state_log[cur_idx] = next_state;
+       }
+      else
+       {
+         re_dfastate_t *pstate;
+         unsigned int context;
+         re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+         /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+            the destination of a multibyte char/collating element/
+            back reference.  Then the next state is the union set of
+            these destinations and the results of the transition table.  */
+         pstate = mctx->state_log[cur_idx];
+         log_nodes = pstate->entrance_nodes;
+         if (next_state != NULL)
+           {
+             table_nodes = next_state->entrance_nodes;
+             *err = re_node_set_init_union (&next_nodes, table_nodes,
+                                            log_nodes);
+             if (BE (*err != REG_NOERROR, 0))
+               return NULL;
+           }
+         else
+           next_nodes = *log_nodes;
+         /* Note: We already add the nodes of the initial state,
+                  then we don't need to add them here.  */
+
+         context = re_string_context_at (mctx->input,
+                                         re_string_cur_idx (mctx->input) - 1,
+                                         mctx->eflags, preg->newline_anchor);
+         next_state = mctx->state_log[cur_idx]
+           = re_acquire_state_context (err, dfa, &next_nodes, context);
+         /* We don't need to check errors here, since the return value of
+            this function is next_state and ERR is already set.  */
+
+         if (table_nodes != NULL)
+           re_node_set_free (&next_nodes);
+       }
+    }
+
+  /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+     later.  We must check them here, since the back references in the
+     next state might use them.  */
+  if (dfa->nbackref && next_state/* && fl_process_bkref */)
+    {
+      *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
+                                       cur_idx);
+      if (BE (*err != REG_NOERROR, 0))
+       return NULL;
+    }
+
+  /* If the next state has back references.  */
+  if (next_state != NULL && next_state->has_backref)
+    {
+      *err = transit_state_bkref (preg, &next_state->nodes, mctx);
+      if (BE (*err != REG_NOERROR, 0))
+       return NULL;
+      next_state = mctx->state_log[cur_idx];
+    }
+  return next_state;
+}
+
+/* Helper functions for transit_state.  */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+   OP_OPEN_SUBEXP and which have corresponding back references in the regular
+   expression. And register them to use them later for evaluating the
+   correspoding back references.  */
+
+static reg_errcode_t
+check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
+     re_dfa_t *dfa;
+     re_match_context_t *mctx;
+     re_node_set *cur_nodes;
+     int str_idx;
+{
+  int node_idx;
+  reg_errcode_t err;
+
+  /* TODO: This isn't efficient.
+          Because there might be more than one nodes whose types are
+          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+          nodes.
+          E.g. RE: (a){2}  */
+  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+    {
+      int node = cur_nodes->elems[node_idx];
+      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+         && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+       {
+         err = match_ctx_add_subtop (mctx, node, str_idx);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+       }
+    }
+  return REG_NOERROR;
+}
+
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte.  */
+
+static re_dfastate_t *
+transit_state_sb (err, preg, state, fl_search, mctx)
+     reg_errcode_t *err;
+     const regex_t *preg;
+     re_dfastate_t *state;
+     int fl_search;
+     re_match_context_t *mctx;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  re_node_set next_nodes;
+  re_dfastate_t *next_state;
+  int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
+  unsigned int context;
+
+  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+  if (BE (*err != REG_NOERROR, 0))
+    return NULL;
+  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+    {
+      int cur_node = state->nodes.elems[node_cnt];
+      if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
+       {
+         *err = re_node_set_merge (&next_nodes,
+                                   dfa->eclosures + dfa->nexts[cur_node]);
+         if (BE (*err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return NULL;
+           }
+       }
+    }
+  if (fl_search)
+    {
+#ifdef RE_ENABLE_I18N
+      int not_initial = 0;
+      if (MB_CUR_MAX > 1)
+       for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
+         if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
+           {
+             not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
+             break;
+           }
+      if (!not_initial)
+#endif
+       {
+         *err = re_node_set_merge (&next_nodes,
+                                   dfa->init_state->entrance_nodes);
+         if (BE (*err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return NULL;
+           }
+       }
+    }
+  context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
+                                 preg->newline_anchor);
+  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+  /* We don't need to check errors here, since the return value of
+     this function is next_state and ERR is already set.  */
+
+  re_node_set_free (&next_nodes);
+  re_string_skip_bytes (mctx->input, 1);
+  return next_state;
+}
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+transit_state_mb (preg, pstate, mctx)
+    const regex_t *preg;
+    re_dfastate_t *pstate;
+    re_match_context_t *mctx;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i;
+
+  for (i = 0; i < pstate->nodes.nelem; ++i)
+    {
+      re_node_set dest_nodes, *new_nodes;
+      int cur_node_idx = pstate->nodes.elems[i];
+      int naccepted = 0, dest_idx;
+      unsigned int context;
+      re_dfastate_t *dest_state;
+
+      if (dfa->nodes[cur_node_idx].constraint)
+       {
+         context = re_string_context_at (mctx->input,
+                                         re_string_cur_idx (mctx->input),
+                                         mctx->eflags, preg->newline_anchor);
+         if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+                                          context))
+           continue;
+       }
+
+      /* How many bytes the node can accepts?  */
+      if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+       naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
+                                            re_string_cur_idx (mctx->input));
+      if (naccepted == 0)
+       continue;
+
+      /* The node can accepts `naccepted' bytes.  */
+      dest_idx = re_string_cur_idx (mctx->input) + naccepted;
+      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+                              : mctx->max_mb_elem_len);
+      err = clean_state_log_if_need (mctx, dest_idx);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+#ifdef DEBUG
+      assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+      /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
+        then we use pstate->nodes.elems[i] instead.  */
+      new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
+
+      dest_state = mctx->state_log[dest_idx];
+      if (dest_state == NULL)
+       dest_nodes = *new_nodes;
+      else
+       {
+         err = re_node_set_init_union (&dest_nodes,
+                                       dest_state->entrance_nodes, new_nodes);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+       }
+      context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
+                                     preg->newline_anchor);
+      mctx->state_log[dest_idx]
+       = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+      if (dest_state != NULL)
+       re_node_set_free (&dest_nodes);
+      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+       return err;
+    }
+  return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+transit_state_bkref (preg, nodes, mctx)
+    const regex_t *preg;
+    re_node_set *nodes;
+    re_match_context_t *mctx;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i;
+  int cur_str_idx = re_string_cur_idx (mctx->input);
+
+  for (i = 0; i < nodes->nelem; ++i)
+    {
+      int dest_str_idx, prev_nelem, bkc_idx;
+      int node_idx = nodes->elems[i];
+      unsigned int context;
+      re_token_t *node = dfa->nodes + node_idx;
+      re_node_set *new_dest_nodes;
+
+      /* Check whether `node' is a backreference or not.  */
+      if (node->type != OP_BACK_REF)
+       continue;
+
+      if (node->constraint)
+       {
+         context = re_string_context_at (mctx->input, cur_str_idx,
+                                         mctx->eflags, preg->newline_anchor);
+         if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+           continue;
+       }
+
+      /* `node' is a backreference.
+        Check the substring which the substring matched.  */
+      bkc_idx = mctx->nbkref_ents;
+      err = get_subexp (preg, mctx, node_idx, cur_str_idx);
+      if (BE (err != REG_NOERROR, 0))
+       goto free_return;
+
+      /* And add the epsilon closures (which is `new_dest_nodes') of
+        the backreference to appropriate state_log.  */
+#ifdef DEBUG
+      assert (dfa->nexts[node_idx] != -1);
+#endif
+      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+       {
+         int subexp_len;
+         re_dfastate_t *dest_state;
+         struct re_backref_cache_entry *bkref_ent;
+         bkref_ent = mctx->bkref_ents + bkc_idx;
+         if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+           continue;
+         subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+         new_dest_nodes = (subexp_len == 0
+                           ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+                           : dfa->eclosures + dfa->nexts[node_idx]);
+         dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+                         - bkref_ent->subexp_from);
+         context = re_string_context_at (mctx->input, dest_str_idx - 1,
+                                         mctx->eflags, preg->newline_anchor);
+         dest_state = mctx->state_log[dest_str_idx];
+         prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+                       : mctx->state_log[cur_str_idx]->nodes.nelem);
+         /* Add `new_dest_node' to state_log.  */
+         if (dest_state == NULL)
+           {
+             mctx->state_log[dest_str_idx]
+               = re_acquire_state_context (&err, dfa, new_dest_nodes,
+                                           context);
+             if (BE (mctx->state_log[dest_str_idx] == NULL
+                     && err != REG_NOERROR, 0))
+               goto free_return;
+           }
+         else
+           {
+             re_node_set dest_nodes;
+             err = re_node_set_init_union (&dest_nodes,
+                                           dest_state->entrance_nodes,
+                                           new_dest_nodes);
+             if (BE (err != REG_NOERROR, 0))
+               {
+                 re_node_set_free (&dest_nodes);
+                 goto free_return;
+               }
+             mctx->state_log[dest_str_idx]
+               = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+             re_node_set_free (&dest_nodes);
+             if (BE (mctx->state_log[dest_str_idx] == NULL
+                     && err != REG_NOERROR, 0))
+               goto free_return;
+           }
+         /* We need to check recursively if the backreference can epsilon
+            transit.  */
+         if (subexp_len == 0
+             && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+           {
+             err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
+                                              cur_str_idx);
+             if (BE (err != REG_NOERROR, 0))
+               goto free_return;
+             err = transit_state_bkref (preg, new_dest_nodes, mctx);
+             if (BE (err != REG_NOERROR, 0))
+               goto free_return;
+           }
+       }
+    }
+  err = REG_NOERROR;
+ free_return:
+  return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+   Note that we might collect inappropriate candidates here.
+   However, the cost of checking them strictly here is too high, then we
+   delay these checking for prune_impossible_nodes().  */
+
+static reg_errcode_t
+get_subexp (preg, mctx, bkref_node, bkref_str_idx)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     int bkref_node, bkref_str_idx;
+{
+  int subexp_num, sub_top_idx;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  char *buf = (char *) re_string_get_buffer (mctx->input);
+  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
+  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+  for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
+    {
+      struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
+      if (entry->str_idx > bkref_str_idx)
+       break;
+      if (entry->node == bkref_node)
+       return REG_NOERROR; /* We already checked it.  */
+    }
+  subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
+
+  /* For each sub expression  */
+  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+    {
+      reg_errcode_t err;
+      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+      re_sub_match_last_t *sub_last;
+      int sub_last_idx, sl_str;
+      char *bkref_str;
+
+      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+       continue; /* It isn't related.  */
+
+      sl_str = sub_top->str_idx;
+      bkref_str = buf + bkref_str_idx;
+      /* At first, check the last node of sub expressions we already
+        evaluated.  */
+      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+       {
+         int sl_str_diff;
+         sub_last = sub_top->lasts[sub_last_idx];
+         sl_str_diff = sub_last->str_idx - sl_str;
+         /* The matched string by the sub expression match with the substring
+            at the back reference?  */
+         if (sl_str_diff > 0
+             && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
+           break; /* We don't need to search this sub expression any more.  */
+         bkref_str += sl_str_diff;
+         sl_str += sl_str_diff;
+         err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
+                               bkref_str_idx);
+         if (err == REG_NOMATCH)
+           continue;
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+       }
+      if (sub_last_idx < sub_top->nlasts)
+       continue;
+      if (sub_last_idx > 0)
+       ++sl_str;
+      /* Then, search for the other last nodes of the sub expression.  */
+      for (; sl_str <= bkref_str_idx; ++sl_str)
+       {
+         int cls_node, sl_str_off;
+         re_node_set *nodes;
+         sl_str_off = sl_str - sub_top->str_idx;
+         /* The matched string by the sub expression match with the substring
+            at the back reference?  */
+         if (sl_str_off > 0
+             && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
+           break; /* We don't need to search this sub expression any more.  */
+         if (mctx->state_log[sl_str] == NULL)
+           continue;
+         /* Does this state have a ')' of the sub expression?  */
+         nodes = &mctx->state_log[sl_str]->nodes;
+         cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
+         if (cls_node == -1)
+           continue; /* No.  */
+         if (sub_top->path == NULL)
+           {
+             sub_top->path = calloc (sizeof (state_array_t),
+                                     sl_str - sub_top->str_idx + 1);
+             if (sub_top->path == NULL)
+               return REG_ESPACE;
+           }
+         /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+            in the current context?  */
+         err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
+                              sub_top->str_idx, cls_node, sl_str, 0);
+         if (err == REG_NOMATCH)
+             continue;
+         if (BE (err != REG_NOERROR, 0))
+             return err;
+         sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+         if (BE (sub_last == NULL, 0))
+           return REG_ESPACE;
+         err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
+                               bkref_str_idx);
+         if (err == REG_NOMATCH)
+           continue;
+       }
+    }
+  return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp().  */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+   If it can arrive, register the sub expression expressed with SUB_TOP
+   and SUB_LAST.  */
+
+static reg_errcode_t
+get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     re_sub_match_top_t *sub_top;
+     re_sub_match_last_t *sub_last;
+     int bkref_node, bkref_str;
+{
+  reg_errcode_t err;
+  int to_idx;
+  /* Can the subexpression arrive the back reference?  */
+  err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
+                      sub_last->str_idx, bkref_node, bkref_str, 1);
+  if (err != REG_NOERROR)
+    return err;
+  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+                            sub_last->str_idx);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+  clean_state_log_if_need (mctx, to_idx);
+  return REG_NOERROR;
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+   Search '(' if FL_OPEN, or search ')' otherwise.
+   TODO: This function isn't efficient...
+        Because there might be more than one nodes whose types are
+        OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+        nodes.
+        E.g. RE: (a){2}  */
+
+static int
+find_subexp_node (dfa, nodes, subexp_idx, fl_open)
+     re_dfa_t *dfa;
+     re_node_set *nodes;
+     int subexp_idx, fl_open;
+{
+  int cls_idx;
+  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+    {
+      int cls_node = nodes->elems[cls_idx];
+      re_token_t *node = dfa->nodes + cls_node;
+      if (((fl_open && node->type == OP_OPEN_SUBEXP)
+         || (!fl_open && node->type == OP_CLOSE_SUBEXP))
+         && node->opr.idx == subexp_idx)
+       return cls_node;
+    }
+  return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
+   heavily reused.
+   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
+
+static reg_errcode_t
+check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
+              fl_open)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     state_array_t *path;
+     int top_node, top_str, last_node, last_str, fl_open;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t err;
+  int subexp_num, backup_cur_idx, str_idx, null_cnt;
+  re_dfastate_t *cur_state = NULL;
+  re_node_set *cur_nodes, next_nodes;
+  re_dfastate_t **backup_state_log;
+  unsigned int context;
+
+  subexp_num = dfa->nodes[top_node].opr.idx;
+  /* Extend the buffer if we need.  */
+  if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
+    {
+      re_dfastate_t **new_array;
+      int old_alloc = path->alloc;
+      path->alloc += last_str + mctx->max_mb_elem_len + 1;
+      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+      if (new_array == NULL)
+       return REG_ESPACE;
+      path->array = new_array;
+      memset (new_array + old_alloc, '\0',
+             sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+    }
+
+  str_idx = path->next_idx == 0 ? top_str : path->next_idx;
+
+  /* Temporary modify MCTX.  */
+  backup_state_log = mctx->state_log;
+  backup_cur_idx = mctx->input->cur_idx;
+  mctx->state_log = path->array;
+  mctx->input->cur_idx = str_idx;
+
+  /* Setup initial node set.  */
+  context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
+                                 preg->newline_anchor);
+  if (str_idx == top_str)
+    {
+      err = re_node_set_init_1 (&next_nodes, top_node);
+      if (BE (err != REG_NOERROR, 0))
+       return err;
+      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
+      if (BE (err != REG_NOERROR, 0))
+       {
+         re_node_set_free (&next_nodes);
+         return err;
+       }
+    }
+  else
+    {
+      cur_state = mctx->state_log[str_idx];
+      if (cur_state && cur_state->has_backref)
+       {
+         err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+         if (BE ( err != REG_NOERROR, 0))
+           return err;
+       }
+      else
+       re_node_set_init_empty (&next_nodes);
+    }
+  if (str_idx == top_str || (cur_state && cur_state->has_backref))
+    {
+      if (next_nodes.nelem)
+       {
+         err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
+                                   subexp_num, fl_open);
+         if (BE ( err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return err;
+           }
+       }
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+       {
+         re_node_set_free (&next_nodes);
+         return err;
+       }
+      mctx->state_log[str_idx] = cur_state;
+    }
+
+  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+    {
+      re_node_set_empty (&next_nodes);
+      if (mctx->state_log[str_idx + 1])
+       {
+         err = re_node_set_merge (&next_nodes,
+                                  &mctx->state_log[str_idx + 1]->nodes);
+         if (BE (err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return err;
+           }
+       }
+      if (cur_state)
+       {
+         err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
+                                            &cur_state->nodes, &next_nodes);
+         if (BE (err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return err;
+           }
+       }
+      ++str_idx;
+      if (next_nodes.nelem)
+       {
+         err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
+                                         fl_open);
+         if (BE (err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return err;
+           }
+         err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
+                                   subexp_num, fl_open);
+         if (BE ( err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&next_nodes);
+             return err;
+           }
+       }
+      context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
+                                     preg->newline_anchor);
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+       {
+         re_node_set_free (&next_nodes);
+         return err;
+       }
+      mctx->state_log[str_idx] = cur_state;
+      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+    }
+  re_node_set_free (&next_nodes);
+  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+              : &mctx->state_log[last_str]->nodes);
+  path->next_idx = str_idx;
+
+  /* Fix MCTX.  */
+  mctx->state_log = backup_state_log;
+  mctx->input->cur_idx = backup_cur_idx;
+
+  if (cur_nodes == NULL)
+    return REG_NOMATCH;
+  /* Then check the current node set has the node LAST_NODE.  */
+  return (re_node_set_contains (cur_nodes, last_node)
+         || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
+         : REG_NOMATCH);
+}
+
+/* Helper functions for check_arrival.  */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+   to NEXT_NODES.
+   TODO: This function is similar to the functions transit_state*(),
+        however this function has many additional works.
+        Can't we unify them?  */
+
+static reg_errcode_t
+check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
+     const regex_t *preg;
+     re_dfa_t *dfa;
+     re_match_context_t *mctx;
+     int str_idx;
+     re_node_set *cur_nodes, *next_nodes;
+{
+  int cur_idx;
+  reg_errcode_t err;
+  re_node_set union_set;
+  re_node_set_init_empty (&union_set);
+  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+    {
+      int naccepted = 0;
+      int cur_node = cur_nodes->elems[cur_idx];
+      re_token_type_t type = dfa->nodes[cur_node].type;
+      if (IS_EPSILON_NODE(type))
+       continue;
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (ACCEPT_MB_NODE (type))
+       {
+         naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
+                                              str_idx);
+         if (naccepted > 1)
+           {
+             re_dfastate_t *dest_state;
+             int next_node = dfa->nexts[cur_node];
+             int next_idx = str_idx + naccepted;
+             dest_state = mctx->state_log[next_idx];
+             re_node_set_empty (&union_set);
+             if (dest_state)
+               {
+                 err = re_node_set_merge (&union_set, &dest_state->nodes);
+                 if (BE (err != REG_NOERROR, 0))
+                   {
+                     re_node_set_free (&union_set);
+                     return err;
+                   }
+                 err = re_node_set_insert (&union_set, next_node);
+                 if (BE (err < 0, 0))
+                   {
+                     re_node_set_free (&union_set);
+                     return REG_ESPACE;
+                   }
+               }
+             else
+               {
+                 err = re_node_set_insert (&union_set, next_node);
+                 if (BE (err < 0, 0))
+                   {
+                     re_node_set_free (&union_set);
+                     return REG_ESPACE;
+                   }
+               }
+             mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+                                                           &union_set);
+             if (BE (mctx->state_log[next_idx] == NULL
+                     && err != REG_NOERROR, 0))
+               {
+                 re_node_set_free (&union_set);
+                 return err;
+               }
+           }
+       }
+#endif /* RE_ENABLE_I18N */
+      if (naccepted
+         || check_node_accept (preg, dfa->nodes + cur_node, mctx,
+                               str_idx))
+       {
+         err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+         if (BE (err < 0, 0))
+           {
+             re_node_set_free (&union_set);
+             return REG_ESPACE;
+           }
+       }
+    }
+  re_node_set_free (&union_set);
+  return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+   CUR_NODES, however exclude the nodes which are:
+    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
+     re_dfa_t *dfa;
+     re_node_set *cur_nodes;
+     int ex_subexp, fl_open;
+{
+  reg_errcode_t err;
+  int idx, outside_node;
+  re_node_set new_nodes;
+#ifdef DEBUG
+  assert (cur_nodes->nelem);
+#endif
+  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  /* Create a new node set NEW_NODES with the nodes which are epsilon
+     closures of the node in CUR_NODES.  */
+
+  for (idx = 0; idx < cur_nodes->nelem; ++idx)
+    {
+      int cur_node = cur_nodes->elems[idx];
+      re_node_set *eclosure = dfa->eclosures + cur_node;
+      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
+      if (outside_node == -1)
+       {
+         /* There are no problematic nodes, just merge them.  */
+         err = re_node_set_merge (&new_nodes, eclosure);
+         if (BE (err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&new_nodes);
+             return err;
+           }
+       }
+      else
+       {
+         /* There are problematic nodes, re-calculate incrementally.  */
+         err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+                                             ex_subexp, fl_open);
+         if (BE (err != REG_NOERROR, 0))
+           {
+             re_node_set_free (&new_nodes);
+             return err;
+           }
+       }
+    }
+  re_node_set_free (cur_nodes);
+  *cur_nodes = new_nodes;
+  return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+   Check incrementally the epsilon closure of TARGET, and if it isn't
+   problematic append it to DST_NODES.  */
+
+static reg_errcode_t
+check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
+     re_dfa_t *dfa;
+     int target, ex_subexp, fl_open;
+     re_node_set *dst_nodes;
+{
+  int cur_node, type;
+  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+    {
+      int err;
+      type = dfa->nodes[cur_node].type;
+
+      if (((type == OP_OPEN_SUBEXP && fl_open)
+          || (type == OP_CLOSE_SUBEXP && !fl_open))
+         && dfa->nodes[cur_node].opr.idx == ex_subexp)
+       {
+         if (!fl_open)
+           {
+             err = re_node_set_insert (dst_nodes, cur_node);
+             if (BE (err == -1, 0))
+               return REG_ESPACE;
+           }
+         break;
+       }
+      err = re_node_set_insert (dst_nodes, cur_node);
+      if (BE (err == -1, 0))
+       return REG_ESPACE;
+      if (dfa->edests[cur_node].nelem == 0)
+       break;
+      if (dfa->edests[cur_node].nelem == 2)
+       {
+         err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+                                             dfa->edests[cur_node].elems[1],
+                                             ex_subexp, fl_open);
+         if (BE (err != REG_NOERROR, 0))
+           return err;
+       }
+      cur_node = dfa->edests[cur_node].elems[0];
+    }
+  return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+   destination of the back references by the appropriate entry
+   in MCTX->BKREF_ENTS.  */
+
+static reg_errcode_t
+expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
+                   fl_open)
+     const regex_t *preg;
+     re_match_context_t *mctx;
+     int cur_str, last_str, subexp_num, fl_open;
+     re_node_set *cur_nodes;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int cache_idx, cache_idx_start;
+  /* The current state.  */
+
+  cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+  for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
+    {
+      int to_idx, next_node;
+      struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
+      if (ent->str_idx > cur_str)
+       break;
+      /* Is this entry ENT is appropriate?  */
+      if (!re_node_set_contains (cur_nodes, ent->node))
+       continue; /* No.  */
+
+      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+      /* Calculate the destination of the back reference, and append it
+        to MCTX->STATE_LOG.  */
+      if (to_idx == cur_str)
+       {
+         /* The backreference did epsilon transit, we must re-check all the
+            node in the current state.  */
+         re_node_set new_dests;
+         reg_errcode_t err2, err3;
+         next_node = dfa->edests[ent->node].elems[0];
+         if (re_node_set_contains (cur_nodes, next_node))
+           continue;
+         err = re_node_set_init_1 (&new_dests, next_node);
+         err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
+                                          fl_open);
+         err3 = re_node_set_merge (cur_nodes, &new_dests);
+         re_node_set_free (&new_dests);
+         if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+                 || err3 != REG_NOERROR, 0))
+           {
+             err = (err != REG_NOERROR ? err
+                    : (err2 != REG_NOERROR ? err2 : err3));
+             return err;
+           }
+         /* TODO: It is still inefficient...  */
+         cache_idx = cache_idx_start - 1;
+         continue;
+       }
+      else
+       {
+         re_node_set union_set;
+         next_node = dfa->nexts[ent->node];
+         if (mctx->state_log[to_idx])
+           {
+             int ret;
+             if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+                                       next_node))
+               continue;
+             err = re_node_set_init_copy (&union_set,
+                                          &mctx->state_log[to_idx]->nodes);
+             ret = re_node_set_insert (&union_set, next_node);
+             if (BE (err != REG_NOERROR || ret < 0, 0))
+               {
+                 re_node_set_free (&union_set);
+                 err = err != REG_NOERROR ? err : REG_ESPACE;
+                 return err;
+               }
+           }
+         else
+           {
+             err = re_node_set_init_1 (&union_set, next_node);
+             if (BE (err != REG_NOERROR, 0))
+               return err;
+           }
+         mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+         re_node_set_free (&union_set);
+         if (BE (mctx->state_log[to_idx] == NULL
+                 && err != REG_NOERROR, 0))
+           return err;
+       }
+    }
+  return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+   Return the new table if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t **
+build_trtable (preg, state, fl_search)
+    const regex_t *preg;
+    const re_dfastate_t *state;
+    int fl_search;
+{
+  reg_errcode_t err;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i, j, k, ch;
+  int dests_node_malloced = 0, dest_states_malloced = 0;
+  int ndests; /* Number of the destination states from `state'.  */
+  re_dfastate_t **trtable;
+  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+  re_node_set follows, *dests_node;
+  bitset *dests_ch;
+  bitset acceptable;
+
+  /* We build DFA states which corresponds to the destination nodes
+     from `state'.  `dests_node[i]' represents the nodes which i-th
+     destination state contains, and `dests_ch[i]' represents the
+     characters which i-th destination state accepts.  */
+#ifdef _LIBC
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
+    dests_node = (re_node_set *)
+                alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+  else
+#endif
+    {
+      dests_node = (re_node_set *)
+                  malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+      if (BE (dests_node == NULL, 0))
+       return NULL;
+      dests_node_malloced = 1;
+    }
+  dests_ch = (bitset *) (dests_node + SBC_MAX);
+
+  /* Initialize transiton table.  */
+  trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+  if (BE (trtable == NULL, 0))
+    {
+      if (dests_node_malloced)
+       free (dests_node);
+      return NULL;
+    }
+
+  /* At first, group all nodes belonging to `state' into several
+     destinations.  */
+  ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
+  if (BE (ndests <= 0, 0))
+    {
+      if (dests_node_malloced)
+       free (dests_node);
+      /* Return NULL in case of an error, trtable otherwise.  */
+      if (ndests == 0)
+       return trtable;
+      free (trtable);
+      return NULL;
+    }
+
+  err = re_node_set_alloc (&follows, ndests + 1);
+  if (BE (err != REG_NOERROR, 0))
+    goto out_free;
+
+#ifdef _LIBC
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+                        + ndests * 3 * sizeof (re_dfastate_t *)))
+    dest_states = (re_dfastate_t **)
+                 alloca (ndests * 3 * sizeof (re_dfastate_t *));
+  else
+#endif
+    {
+      dest_states = (re_dfastate_t **)
+                   malloc (ndests * 3 * sizeof (re_dfastate_t *));
+      if (BE (dest_states == NULL, 0))
+       {
+out_free:
+         if (dest_states_malloced)
+           free (dest_states);
+         re_node_set_free (&follows);
+         for (i = 0; i < ndests; ++i)
+           re_node_set_free (dests_node + i);
+         free (trtable);
+         if (dests_node_malloced)
+           free (dests_node);
+         return NULL;
+       }
+      dest_states_malloced = 1;
+    }
+  dest_states_word = dest_states + ndests;
+  dest_states_nl = dest_states_word + ndests;
+  bitset_empty (acceptable);
+
+  /* Then build the states for all destinations.  */
+  for (i = 0; i < ndests; ++i)
+    {
+      int next_node;
+      re_node_set_empty (&follows);
+      /* Merge the follows of this destination states.  */
+      for (j = 0; j < dests_node[i].nelem; ++j)
+       {
+         next_node = dfa->nexts[dests_node[i].elems[j]];
+         if (next_node != -1)
+           {
+             err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+             if (BE (err != REG_NOERROR, 0))
+               goto out_free;
+           }
+       }
+      /* If search flag is set, merge the initial state.  */
+      if (fl_search)
+       {
+#ifdef RE_ENABLE_I18N
+         int not_initial = 0;
+         for (j = 0; j < follows.nelem; ++j)
+           if (dfa->nodes[follows.elems[j]].type == CHARACTER)
+             {
+               not_initial = dfa->nodes[follows.elems[j]].mb_partial;
+               break;
+             }
+         if (!not_initial)
+#endif
+           {
+             err = re_node_set_merge (&follows,
+                                      dfa->init_state->entrance_nodes);
+             if (BE (err != REG_NOERROR, 0))
+               goto out_free;
+           }
+       }
+      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+       goto out_free;
+      /* If the new state has context constraint,
+        build appropriate states for these contexts.  */
+      if (dest_states[i]->has_constraint)
+       {
+         dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+                                                         CONTEXT_WORD);
+         if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+           goto out_free;
+         dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+                                                       CONTEXT_NEWLINE);
+         if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+           goto out_free;
+       }
+      else
+       {
+         dest_states_word[i] = dest_states[i];
+         dest_states_nl[i] = dest_states[i];
+       }
+      bitset_merge (acceptable, dests_ch[i]);
+    }
+
+  /* Update the transition table.  */
+  /* For all characters ch...:  */
+  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+    for (j = 0; j < UINT_BITS; ++j, ++ch)
+      if ((acceptable[i] >> j) & 1)
+       {
+         /* The current state accepts the character ch.  */
+         if (IS_WORD_CHAR (ch))
+           {
+             for (k = 0; k < ndests; ++k)
+               if ((dests_ch[k][i] >> j) & 1)
+                 {
+                   /* k-th destination accepts the word character ch.  */
+                   trtable[ch] = dest_states_word[k];
+                   /* There must be only one destination which accepts
+                      character ch.  See group_nodes_into_DFAstates.  */
+                   break;
+                 }
+           }
+         else /* not WORD_CHAR */
+           {
+             for (k = 0; k < ndests; ++k)
+               if ((dests_ch[k][i] >> j) & 1)
+                 {
+                   /* k-th destination accepts the non-word character ch.  */
+                   trtable[ch] = dest_states[k];
+                   /* There must be only one destination which accepts
+                      character ch.  See group_nodes_into_DFAstates.  */
+                   break;
+                 }
+           }
+       }
+  /* new line */
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
+      /* The current state accepts newline character.  */
+      for (k = 0; k < ndests; ++k)
+       if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
+         {
+           /* k-th destination accepts newline character.  */
+           trtable[NEWLINE_CHAR] = dest_states_nl[k];
+           /* There must be only one destination which accepts
+              newline.  See group_nodes_into_DFAstates.  */
+           break;
+         }
+    }
+
+  if (dest_states_malloced)
+    free (dest_states);
+
+  re_node_set_free (&follows);
+  for (i = 0; i < ndests; ++i)
+    re_node_set_free (dests_node + i);
+
+  if (dests_node_malloced)
+    free (dests_node);
+
+  return trtable;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+   Then for all destinations, set the nodes belonging to the destination
+   to DESTS_NODE[i] and set the characters accepted by the destination
+   to DEST_CH[i].  This function return the number of destinations.  */
+
+static int
+group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
+    const regex_t *preg;
+    const re_dfastate_t *state;
+    re_node_set *dests_node;
+    bitset *dests_ch;
+{
+  reg_errcode_t err;
+  const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i, j, k;
+  int ndests; /* Number of the destinations from `state'.  */
+  bitset accepts; /* Characters a node can accept.  */
+  const re_node_set *cur_nodes = &state->nodes;
+  bitset_empty (accepts);
+  ndests = 0;
+
+  /* For all the nodes belonging to `state',  */
+  for (i = 0; i < cur_nodes->nelem; ++i)
+    {
+      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+      re_token_type_t type = node->type;
+      unsigned int constraint = node->constraint;
+
+      /* Enumerate all single byte character this node can accept.  */
+      if (type == CHARACTER)
+       bitset_set (accepts, node->opr.c);
+      else if (type == SIMPLE_BRACKET)
+       {
+         bitset_merge (accepts, node->opr.sbcset);
+       }
+      else if (type == OP_PERIOD)
+       {
+         bitset_set_all (accepts);
+         if (!(preg->syntax & RE_DOT_NEWLINE))
+           bitset_clear (accepts, '\n');
+         if (preg->syntax & RE_DOT_NOT_NULL)
+           bitset_clear (accepts, '\0');
+       }
+      else
+       continue;
+
+      /* Check the `accepts' and sift the characters which are not
+        match it the context.  */
+      if (constraint)
+       {
+         if (constraint & NEXT_WORD_CONSTRAINT)
+           for (j = 0; j < BITSET_UINTS; ++j)
+             accepts[j] &= dfa->word_char[j];
+         if (constraint & NEXT_NOTWORD_CONSTRAINT)
+           for (j = 0; j < BITSET_UINTS; ++j)
+             accepts[j] &= ~dfa->word_char[j];
+         if (constraint & NEXT_NEWLINE_CONSTRAINT)
+           {
+             int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+             bitset_empty (accepts);
+             if (accepts_newline)
+               bitset_set (accepts, NEWLINE_CHAR);
+             else
+               continue;
+           }
+       }
+
+      /* Then divide `accepts' into DFA states, or create a new
+        state.  */
+      for (j = 0; j < ndests; ++j)
+       {
+         bitset intersec; /* Intersection sets, see below.  */
+         bitset remains;
+         /* Flags, see below.  */
+         int has_intersec, not_subset, not_consumed;
+
+         /* Optimization, skip if this state doesn't accept the character.  */
+         if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+           continue;
+
+         /* Enumerate the intersection set of this state and `accepts'.  */
+         has_intersec = 0;
+         for (k = 0; k < BITSET_UINTS; ++k)
+           has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+         /* And skip if the intersection set is empty.  */
+         if (!has_intersec)
+           continue;
+
+         /* Then check if this state is a subset of `accepts'.  */
+         not_subset = not_consumed = 0;
+         for (k = 0; k < BITSET_UINTS; ++k)
+           {
+             not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+             not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+           }
+
+         /* If this state isn't a subset of `accepts', create a
+            new group state, which has the `remains'. */
+         if (not_subset)
+           {
+             bitset_copy (dests_ch[ndests], remains);
+             bitset_copy (dests_ch[j], intersec);
+             err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+             if (BE (err != REG_NOERROR, 0))
+               goto error_return;
+             ++ndests;
+           }
+
+         /* Put the position in the current group. */
+         err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+         if (BE (err < 0, 0))
+           goto error_return;
+
+         /* If all characters are consumed, go to next node. */
+         if (!not_consumed)
+           break;
+       }
+      /* Some characters remain, create a new group. */
+      if (j == ndests)
+       {
+         bitset_copy (dests_ch[ndests], accepts);
+         err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+         if (BE (err != REG_NOERROR, 0))
+           goto error_return;
+         ++ndests;
+         bitset_empty (accepts);
+       }
+    }
+  return ndests;
+ error_return:
+  for (j = 0; j < ndests; ++j)
+    re_node_set_free (dests_node + j);
+  return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+   Return the number of the bytes the node accepts.
+   STR_IDX is the current index of the input string.
+
+   This function handles the nodes which can accept one character, or
+   one collating element like '.', '[a-z]', opposite to the other nodes
+   can only accept one byte.  */
+
+static int
+check_node_accept_bytes (preg, node_idx, input, str_idx)
+    const regex_t *preg;
+    int node_idx, str_idx;
+    const re_string_t *input;
+{
+  const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  const re_token_t *node = dfa->nodes + node_idx;
+  int elem_len = re_string_elem_size_at (input, str_idx);
+  int char_len = re_string_char_size_at (input, str_idx);
+  int i;
+# ifdef _LIBC
+  int j;
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif /* _LIBC */
+  if (elem_len <= 1 && char_len <= 1)
+    return 0;
+  if (node->type == OP_PERIOD)
+    {
+      /* '.' accepts any one character except the following two cases.  */
+      if ((!(preg->syntax & RE_DOT_NEWLINE) &&
+          re_string_byte_at (input, str_idx) == '\n') ||
+         ((preg->syntax & RE_DOT_NOT_NULL) &&
+          re_string_byte_at (input, str_idx) == '\0'))
+       return 0;
+      return char_len;
+    }
+  else if (node->type == COMPLEX_BRACKET)
+    {
+      const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+      const unsigned char *pin = ((char *) re_string_get_buffer (input)
+                                 + str_idx);
+# endif /* _LIBC */
+      int match_len = 0;
+      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+                   ? re_string_wchar_at (input, str_idx) : 0);
+
+      /* match with multibyte character?  */
+      for (i = 0; i < cset->nmbchars; ++i)
+       if (wc == cset->mbchars[i])
+         {
+           match_len = char_len;
+           goto check_node_accept_bytes_match;
+         }
+      /* match with character_class?  */
+      for (i = 0; i < cset->nchar_classes; ++i)
+       {
+         wctype_t wt = cset->char_classes[i];
+         if (__iswctype (wc, wt))
+           {
+             match_len = char_len;
+             goto check_node_accept_bytes_match;
+           }
+       }
+
+# ifdef _LIBC
+      if (nrules != 0)
+       {
+         unsigned int in_collseq = 0;
+         const int32_t *table, *indirect;
+         const unsigned char *weights, *extra;
+         const char *collseqwc;
+         int32_t idx;
+         /* This #include defines a local function!  */
+#  include <locale/weight.h>
+
+         /* match with collating_symbol?  */
+         if (cset->ncoll_syms)
+           extra = (const unsigned char *)
+             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+         for (i = 0; i < cset->ncoll_syms; ++i)
+           {
+             const unsigned char *coll_sym = extra + cset->coll_syms[i];
+             /* Compare the length of input collating element and
+                the length of current collating element.  */
+             if (*coll_sym != elem_len)
+               continue;
+             /* Compare each bytes.  */
+             for (j = 0; j < *coll_sym; j++)
+               if (pin[j] != coll_sym[1 + j])
+                 break;
+             if (j == *coll_sym)
+               {
+                 /* Match if every bytes is equal.  */
+                 match_len = j;
+                 goto check_node_accept_bytes_match;
+               }
+           }
+
+         if (cset->nranges)
+           {
+             if (elem_len <= char_len)
+               {
+                 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+                 in_collseq = collseq_table_lookup (collseqwc, wc);
+               }
+             else
+               in_collseq = find_collation_sequence_value (pin, elem_len);
+           }
+         /* match with range expression?  */
+         for (i = 0; i < cset->nranges; ++i)
+           if (cset->range_starts[i] <= in_collseq
+               && in_collseq <= cset->range_ends[i])
+             {
+               match_len = elem_len;
+               goto check_node_accept_bytes_match;
+             }
+
+         /* match with equivalence_class?  */
+         if (cset->nequiv_classes)
+           {
+             const unsigned char *cp = pin;
+             table = (const int32_t *)
+               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+             weights = (const unsigned char *)
+               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+             extra = (const unsigned char *)
+               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+             indirect = (const int32_t *)
+               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+             idx = findidx (&cp);
+             if (idx > 0)
+               for (i = 0; i < cset->nequiv_classes; ++i)
+                 {
+                   int32_t equiv_class_idx = cset->equiv_classes[i];
+                   size_t weight_len = weights[idx];
+                   if (weight_len == weights[equiv_class_idx])
+                     {
+                       int cnt = 0;
+                       while (cnt <= weight_len
+                              && (weights[equiv_class_idx + 1 + cnt]
+                                  == weights[idx + 1 + cnt]))
+                         ++cnt;
+                       if (cnt > weight_len)
+                         {
+                           match_len = elem_len;
+                           goto check_node_accept_bytes_match;
+                         }
+                     }
+                 }
+           }
+       }
+      else
+# endif /* _LIBC */
+       {
+         /* match with range expression?  */
+#if __GNUC__ >= 2
+         wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+         wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+         cmp_buf[2] = wc;
+#endif
+         for (i = 0; i < cset->nranges; ++i)
+           {
+             cmp_buf[0] = cset->range_starts[i];
+             cmp_buf[4] = cset->range_ends[i];
+             if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+                 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+               {
+                 match_len = char_len;
+                 goto check_node_accept_bytes_match;
+               }
+           }
+       }
+    check_node_accept_bytes_match:
+      if (!cset->non_match)
+       return match_len;
+      else
+       {
+         if (match_len > 0)
+           return 0;
+         else
+           return (elem_len > char_len) ? elem_len : char_len;
+       }
+    }
+  return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+find_collation_sequence_value (mbs, mbs_len)
+    const unsigned char *mbs;
+    size_t mbs_len;
+{
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules == 0)
+    {
+      if (mbs_len == 1)
+       {
+         /* No valid character.  Match it as a single byte character.  */
+         const unsigned char *collseq = (const unsigned char *)
+           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+         return collseq[mbs[0]];
+       }
+      return UINT_MAX;
+    }
+  else
+    {
+      int32_t idx;
+      const unsigned char *extra = (const unsigned char *)
+       _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+
+      for (idx = 0; ;)
+       {
+         int mbs_cnt, found = 0;
+         int32_t elem_mbs_len;
+         /* Skip the name of collating element name.  */
+         idx = idx + extra[idx] + 1;
+         elem_mbs_len = extra[idx++];
+         if (mbs_len == elem_mbs_len)
+           {
+             for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+               if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+                 break;
+             if (mbs_cnt == elem_mbs_len)
+               /* Found the entry.  */
+               found = 1;
+           }
+         /* Skip the byte sequence of the collating element.  */
+         idx += elem_mbs_len;
+         /* Adjust for the alignment.  */
+         idx = (idx + 3) & ~3;
+         /* Skip the collation sequence value.  */
+         idx += sizeof (uint32_t);
+         /* Skip the wide char sequence of the collating element.  */
+         idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+         /* If we found the entry, return the sequence value.  */
+         if (found)
+           return *(uint32_t *) (extra + idx);
+         /* Skip the collation sequence value.  */
+         idx += sizeof (uint32_t);
+       }
+    }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+   byte of the INPUT.  */
+
+static int
+check_node_accept (preg, node, mctx, idx)
+    const regex_t *preg;
+    const re_token_t *node;
+    const re_match_context_t *mctx;
+    int idx;
+{
+  unsigned char ch;
+  if (node->constraint)
+    {
+      /* The node has constraints.  Check whether the current context
+        satisfies the constraints.  */
+      unsigned int context = re_string_context_at (mctx->input, idx,
+                                                  mctx->eflags,
+                                                  preg->newline_anchor);
+      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+       return 0;
+    }
+  ch = re_string_byte_at (mctx->input, idx);
+  if (node->type == CHARACTER)
+    return node->opr.c == ch;
+  else if (node->type == SIMPLE_BRACKET)
+    return bitset_contain (node->opr.sbcset, ch);
+  else if (node->type == OP_PERIOD)
+    return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
+            || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
+  else
+    return 0;
+}
+
+/* Extend the buffers, if the buffers have run out.  */
+
+static reg_errcode_t
+extend_buffers (mctx)
+     re_match_context_t *mctx;
+{
+  reg_errcode_t ret;
+  re_string_t *pstr = mctx->input;
+
+  /* Double the lengthes of the buffers.  */
+  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  if (mctx->state_log != NULL)
+    {
+      /* And double the length of state_log.  */
+      re_dfastate_t **new_array;
+      new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+                             pstr->bufs_len * 2);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      mctx->state_log = new_array;
+    }
+
+  /* Then reconstruct the buffers.  */
+  if (pstr->icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       build_wcs_upper_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+       build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+       build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+       {
+         if (pstr->trans != NULL)
+           re_string_translate_buffer (pstr);
+         else
+           pstr->valid_len = pstr->bufs_len;
+       }
+    }
+  return REG_NOERROR;
+}
+
+\f
+/* Functions for matching context.  */
+
+/* Initialize MCTX.  */
+
+static reg_errcode_t
+match_ctx_init (mctx, eflags, input, n)
+    re_match_context_t *mctx;
+    int eflags, n;
+    re_string_t *input;
+{
+  mctx->eflags = eflags;
+  mctx->input = input;
+  mctx->match_last = -1;
+  if (n > 0)
+    {
+      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+       return REG_ESPACE;
+    }
+  else
+    mctx->bkref_ents = NULL;
+  mctx->nbkref_ents = 0;
+  mctx->abkref_ents = n;
+  mctx->max_mb_elem_len = 1;
+  mctx->nsub_tops = 0;
+  mctx->asub_tops = n;
+  return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+   This function must be invoked when the matcher changes the start index
+   of the input, or changes the input string.  */
+
+static void
+match_ctx_clean (mctx)
+    re_match_context_t *mctx;
+{
+  match_ctx_free_subtops (mctx);
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX.  */
+
+static void
+match_ctx_free (mctx)
+    re_match_context_t *mctx;
+{
+  match_ctx_free_subtops (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+/* Free all the memory associated with MCTX->SUB_TOPS.  */
+
+static void
+match_ctx_free_subtops (mctx)
+     re_match_context_t *mctx;
+{
+  int st_idx;
+  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+    {
+      int sl_idx;
+      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+       {
+         re_sub_match_last_t *last = top->lasts[sl_idx];
+         re_free (last->path.array);
+         re_free (last);
+       }
+      re_free (top->lasts);
+      if (top->path)
+       {
+         re_free (top->path->array);
+         re_free (top->path);
+       }
+      free (top);
+    }
+}
+
+/* Add a new backreference entry to MCTX.
+   Note that we assume that caller never call this function with duplicate
+   entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+match_ctx_add_entry (mctx, node, str_idx, from, to)
+     re_match_context_t *mctx;
+     int node, str_idx, from, to;
+{
+  if (mctx->nbkref_ents >= mctx->abkref_ents)
+    {
+      struct re_backref_cache_entry* new_entry;
+      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+                             mctx->abkref_ents * 2);
+      if (BE (new_entry == NULL, 0))
+       {
+         re_free (mctx->bkref_ents);
+         return REG_ESPACE;
+       }
+      mctx->bkref_ents = new_entry;
+      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+             sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+      mctx->abkref_ents *= 2;
+    }
+  mctx->bkref_ents[mctx->nbkref_ents].node = node;
+  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+  mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
+  if (mctx->max_mb_elem_len < to - from)
+    mctx->max_mb_elem_len = to - from;
+  return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx.
+   Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
+
+static int
+search_cur_bkref_entry (mctx, str_idx)
+     re_match_context_t *mctx;
+     int str_idx;
+{
+  int left, right, mid;
+  right = mctx->nbkref_ents;
+  for (left = 0; left < right;)
+    {
+      mid = (left + right) / 2;
+      if (mctx->bkref_ents[mid].str_idx < str_idx)
+       left = mid + 1;
+      else
+       right = mid;
+    }
+  return left;
+}
+
+static void
+match_ctx_clear_flag (mctx)
+     re_match_context_t *mctx;
+{
+  int i;
+  for (i = 0; i < mctx->nbkref_ents; ++i)
+    {
+      mctx->bkref_ents[i].flag = 0;
+    }
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+   at STR_IDX.  */
+
+static reg_errcode_t
+match_ctx_add_subtop (mctx, node, str_idx)
+     re_match_context_t *mctx;
+     int node, str_idx;
+{
+#ifdef DEBUG
+  assert (mctx->sub_tops != NULL);
+  assert (mctx->asub_tops > 0);
+#endif
+  if (mctx->nsub_tops == mctx->asub_tops)
+    {
+      re_sub_match_top_t **new_array;
+      mctx->asub_tops *= 2;
+      new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
+                             mctx->asub_tops);
+      if (BE (new_array == NULL, 0))
+       return REG_ESPACE;
+      mctx->sub_tops = new_array;
+    }
+  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+  if (mctx->sub_tops[mctx->nsub_tops] == NULL)
+    return REG_ESPACE;
+  mctx->sub_tops[mctx->nsub_tops]->node = node;
+  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+  return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
+
+static re_sub_match_last_t *
+match_ctx_add_sublast (subtop, node, str_idx)
+     re_sub_match_top_t *subtop;
+     int node, str_idx;
+{
+  re_sub_match_last_t *new_entry;
+  if (subtop->nlasts == subtop->alasts)
+    {
+      re_sub_match_last_t **new_array;
+      subtop->alasts = 2 * subtop->alasts + 1;
+      new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
+                             subtop->alasts);
+      if (BE (new_array == NULL, 0))
+       return NULL;
+      subtop->lasts = new_array;
+    }
+  new_entry = calloc (1, sizeof (re_sub_match_last_t));
+  if (BE (new_entry == NULL, 0))
+    return NULL;
+  subtop->lasts[subtop->nlasts] = new_entry;
+  new_entry->node = node;
+  new_entry->str_idx = str_idx;
+  ++subtop->nlasts;
+  return new_entry;
+}
+
+static void
+sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
+              check_subexp)
+    re_sift_context_t *sctx;
+    re_dfastate_t **sifted_sts, **limited_sts;
+    int last_node, last_str_idx, check_subexp;
+{
+  sctx->sifted_states = sifted_sts;
+  sctx->limited_states = limited_sts;
+  sctx->last_node = last_node;
+  sctx->last_str_idx = last_str_idx;
+  sctx->check_subexp = check_subexp;
+  sctx->cur_bkref = -1;
+  sctx->cls_subexp_idx = -1;
+  re_node_set_init_empty (&sctx->limits);
+}
diff --git a/lib/runcmd.c b/lib/runcmd.c

new file mode 100644 (file)

index 0000000..0204e1d
--- /dev/null
+++ b/lib/runcmd.c
@@ -0,0 +1,134 @@
+/*
+ *     UCW Library -- Running of Commands
+ *
+ *     (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+void NONRET
+exec_command_v(const char *cmd, va_list args)
+{
+  va_list cargs;
+  va_copy(cargs, args);
+  int cnt = 2;
+  char *arg;
+  while (arg = va_arg(cargs, char *))
+    cnt++;
+  va_end(cargs);
+  char **argv = alloca(sizeof(char *) * cnt);
+  argv[0] = (char *)cmd;
+  cnt = 1;
+  va_copy(cargs, args);
+  while (arg = va_arg(cargs, char *))
+    argv[cnt++] = arg;
+  va_end(cargs);
+  argv[cnt] = NULL;
+  execv(cmd, argv);
+  char echo[256];
+  echo_command_v(echo, sizeof(echo), cmd, args);
+  msg(L_ERROR, "Cannot execute %s: %m", echo);
+  exit(255);
+}
+
+int
+run_command_v(const char *cmd, va_list args)
+{
+  pid_t p = fork();
+  if (p < 0)
+    {
+      msg(L_ERROR, "fork() failed: %m");
+      return 0;
+    }
+  else if (!p)
+    exec_command_v(cmd, args);
+  else
+    {
+      int stat;
+      char status_msg[EXIT_STATUS_MSG_SIZE];
+      p = waitpid(p, &stat, 0);
+      if (p < 0)
+       die("waitpid() failed: %m");
+      if (format_exit_status(status_msg, stat))
+       {
+         char echo[256];
+         echo_command_v(echo, sizeof(echo), cmd, args);
+         msg(L_ERROR, "`%s' failed: %s", echo, status_msg);
+         return 0;
+       }
+      return 1;
+    }
+}
+
+void
+echo_command_v(char *buf, int size, const char *cmd, va_list args)
+{
+  char *limit = buf + size - 4;
+  char *p = buf;
+  const char *arg = cmd;
+  do
+    {
+      int l = strlen(arg);
+      if (p != buf && p < limit)
+       *p++ = ' ';
+      if (p+l > limit)
+       {
+         memcpy(p, arg, limit-p);
+         strcpy(limit, "...");
+         return;
+       }
+      memcpy(p, arg, l);
+      p += l;
+    }
+  while (arg = va_arg(args, char *));
+  *p = 0;
+}
+
+int
+run_command(const char *cmd, ...)
+{
+  va_list args;
+  va_start(args, cmd);
+  int e = run_command_v(cmd, args);
+  va_end(args);
+  return e;
+}
+
+void NONRET
+exec_command(const char *cmd, ...)
+{
+  va_list args;
+  va_start(args, cmd);
+  exec_command_v(cmd, args);
+}
+
+void
+echo_command(char *buf, int len, const char *cmd, ...)
+{
+  va_list args;
+  va_start(args, cmd);
+  echo_command_v(buf, len, cmd, args);
+  va_end(args);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  char msg[1024];
+  echo_command(msg, sizeof(msg), "/bin/echo", "datel", "strakapoud", NULL);
+  log(L_INFO, "Running <%s>", msg);
+  run_command("/bin/echo", "datel", "strakapoud", NULL);
+  return 0;
+}
+
+#endif
diff --git a/lib/semaphore.h b/lib/semaphore.h

new file mode 100644 (file)

index 0000000..fa2e0ee
--- /dev/null
+++ b/lib/semaphore.h
@@ -0,0 +1,60 @@
+/*
+ *     The UCW Library -- POSIX semaphores wrapper
+ *
+ *     (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SEMAPHORE_H
+#define _UCW_SEMAPHORE_H
+
+#include <semaphore.h>
+
+#ifdef CONFIG_DARWIN
+
+#include <unistd.h>
+#include <stdio.h>
+
+/* In Darwin, sem_init() is unfortunately not implemented and the guide
+ * recommends emulating it using sem_open().  */
+
+static inline sem_t *
+sem_alloc(void)
+{
+  static uns cnt = 0;
+  char buf[20];
+  sprintf(buf, "tmp/sem-%d-%d", getpid(), cnt++);
+  sem_t *sem = sem_open(buf, O_CREAT, 0777, 0);
+  ASSERT(sem != (sem_t*) SEM_FAILED);
+  return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+  sem_close(sem);
+}
+
+#else
+
+static inline sem_t *
+sem_alloc(void)
+{
+  sem_t *sem = xmalloc(sizeof(sem_t));
+  int res = sem_init(sem, 0, 0);
+  ASSERT(!res);
+  return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+  sem_destroy(sem);
+  xfree(sem);
+}
+
+#endif
+
+#endif
diff --git a/lib/shell/Makefile b/lib/shell/Makefile

new file mode 100644 (file)

index 0000000..5d6d65e
--- /dev/null
+++ b/lib/shell/Makefile
@@ -0,0 +1,12 @@
+# Support routines for shell scripts
+
+DIRS+=lib/shell
+PROGS+=$(o)/lib/shell/config $(o)/lib/shell/logger
+DATAFILES+=$(o)/lib/shell/libucw.sh
+
+$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBUCW)
+$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/lib/shell/,config.test)
+
+$(o)/lib/shell/config.test: $(o)/lib/shell/config
diff --git a/lib/shell/config.c b/lib/shell/config.c

new file mode 100644 (file)

index 0000000..4208b86
--- /dev/null
+++ b/lib/shell/config.c
@@ -0,0 +1,434 @@
+/*
+ *     UCW Library -- Shell Interface to Configuration Files
+ *
+ *     (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ *     (c) 2006 Robert Spalek <robert@ucw.cz>
+ *     (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     Once we were using this beautiful Shell version, but it turned out
+ *     that it doesn't work with nested config files:
+ *
+ *             eval `sed <cf/sherlock '/^#/d;/^ *$/d;s/ \+$//;
+ *             h;s@[^  ]*@@;x;s@[      ].*@@;y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;G;s/\n//;
+ *             /^\[SECTION\]/,/^\[/ {; /^[A-Z]/ { s/^\([^      ]\+\)[  ]*\(.*\)$/SH_\1="\2"/; p; }; };
+ *             d;'`
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/getopt.h"
+#include "lib/conf-internal.h"
+#include "lib/clists.h"
+#include "lib/mempool.h"
+#include "lib/chartype.h"
+#include "lib/bbuf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <alloca.h>
+
+static void
+help(void)
+{
+  fputs("\n\
+Usage: config [-C<configfile>] [-S<section>.<option>=<value>] <sections>\n\
+\n\
+<sections>\t<section>[;<sections>]\n\
+<section>\t[!]<name>{[<items>]}\n\
+<items>\t\t[-]<item>[;<items>]\n\
+<item>\t\t<static> | <array> | <list>\n\
+<static>\t<type><name>[=<value>]\n\
+<list>\t\t@<name>{[<items>]}\n\
+<array>\t\t<type><name><left-bracket>[<number>]<right-bracket>\n\
+<value>\t\t[a-zA-Z0-9.-/]* | 'string without single quotes'<value> | \"c-like string\"<value>\n\
+\n\
+Types:\n\
+<empty>\t\tString\n\
+#\t\t32-bit integer\n\
+##\t\t64-bit integer\n\
+$\t\tFloating point number\n\
+\n\
+Modifiers:\n\
+!\t\tReport unknown items as errors\n\
+-\t\tDo not dump item's value\n\
+", stderr);
+  exit(1);
+}
+
+union value {
+  void *v_ptr;
+  int v_int;
+  u64 v_u64;
+  double v_double;
+  clist list;
+};
+
+#define FLAG_HIDE              0x1
+#define FLAG_NO_UNKNOWN                0x2
+
+struct item {
+  cnode node;
+  uns flags;
+  struct cf_item cf;
+  union value value;
+  uns index;
+};
+
+struct section {
+  struct item item;
+  clist list;
+  uns count;
+  uns size;
+};
+
+static struct mempool *pool;
+static clist sections;
+static byte *pos;
+
+static void
+parse_white(void)
+{
+  while (Cspace(*pos))
+    pos++;
+}
+
+static void
+parse_char(byte c)
+{
+  if (*pos++ != c)
+    die("Missing '%c'", c);
+}
+
+static byte *
+parse_name(void)
+{
+  byte *name = pos;
+  while (Cword(*pos))
+    pos++;
+  uns len = pos - name;
+  if (!len)
+    die("Expected item/section name");
+  byte *buf = mp_alloc(pool, len + 1);
+  memcpy(buf, name, len);
+  buf[len] = 0;
+  return buf;
+}
+
+static void
+parse_section(struct section *section)
+{
+#define TRY(x) do{byte *_err=(x); if (_err) die(_err); }while(0)
+  for (uns sep = 0; ; sep = 1)
+    {
+      parse_white();
+      if (!*pos || *pos == '}')
+       break;
+      if (sep)
+       parse_char(';');
+      parse_white();
+
+      struct item *item;
+
+      if (*pos == '@')
+        {
+         pos++;
+         struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+         sec->size = sizeof(cnode);
+         clist_init(&sec->list);
+         item = &sec->item;
+         item->cf.name = parse_name();
+         item->cf.cls = CC_LIST;
+         item->cf.number = 1;
+         parse_white();
+         parse_char('{');
+         parse_section(sec);
+         parse_char('}');
+       }
+      else
+        {
+         item = mp_alloc_zero(pool, sizeof(*item));
+         if (*pos == '-')
+           {
+             item->flags |= FLAG_HIDE;
+             pos++;
+           }
+         item->cf.cls = CC_STATIC;
+         item->cf.number = 1;
+         switch (*pos)
+           {
+             case '#':
+               if (*++pos == '#')
+                 {
+                   pos++;
+                   item->cf.type = CT_U64;
+                 }
+               else
+                 item->cf.type = CT_INT;
+               break;
+             case '$':
+               pos++;
+               item->cf.type = CT_DOUBLE;
+               break;
+             default:
+               if (!Cword(*pos))
+                 die("Invalid type syntax");
+               item->cf.type = CT_STRING;
+               break;
+           }
+         parse_white();
+         item->cf.name = parse_name();
+         parse_white();
+         if (*pos == '[')
+           {
+             pos++;
+             parse_white();
+             item->cf.cls = CC_DYNAMIC;
+             byte *num = pos;
+             while (*pos && *pos != ']')
+               pos++;
+             if (!*pos)
+               die("Missing ']'");
+             *pos++ = 0;
+             if (!*num)
+               item->cf.number = CF_ANY_NUM;
+             else
+               {
+                 int inum;
+                 TRY(cf_parse_int(num, &inum));
+                 if (!inum)
+                   die("Invalid array length");
+                 item->cf.number = inum;
+               }
+             parse_white();
+           }
+         if (*pos == '=')
+           {
+             pos++;
+             parse_white();
+             if (section->item.cf.cls == CC_LIST)
+               die("List items can not have default values");
+             if (item->cf.cls == CC_DYNAMIC)
+               die("Arrays can not have default values");
+             byte *def = pos, *d = def;
+             while (*pos != ';' && *pos != '}' && !Cspace(*pos))
+               {
+                 if (*pos == '\'')
+                   {
+                     pos++;
+                     while (*pos != '\'')
+                       {
+                         if (!*pos)
+                           die("Unterminated string");
+                         *d++ = *pos++;
+                       }
+                     pos++;
+                   }
+                 else if (*pos == '"')
+                   {
+                     pos++;
+                     byte *start = d;
+                     uns esc = 0;
+                     while (*pos != '"' || esc)
+                       {
+                         if (!*pos)
+                           die("Unterminated string");
+                         if (*pos == '\\')
+                           esc ^= 1;
+                         else
+                           esc = 0;
+                         *d++ = *pos++;
+                       }
+                     pos++;
+                     *d = 0;
+                     d = str_unesc(start, start);
+                   }
+                 else
+                   *d++ = *pos++;
+               }
+             uns len = d - def;
+             byte *buf = mp_alloc(pool, len + 1);
+             memcpy(buf, def, len);
+             buf[len] = 0;
+             switch (item->cf.type)
+               {
+                 case CT_STRING:
+                   item->value.v_ptr = buf;
+                   break;
+                 case CT_INT:
+                   TRY(cf_parse_int(buf, &item->value.v_int));
+                   break;
+                 case CT_U64:
+                   TRY(cf_parse_u64(buf, &item->value.v_u64));
+                   break;
+                 case CT_DOUBLE:
+                   TRY(cf_parse_double(buf, &item->value.v_double));
+                   break;
+                 default:
+                   ASSERT(0);
+               }
+           }
+       }
+      if (section->item.cf.cls == CC_LIST)
+        {
+          item->cf.ptr = (void *)(uintptr_t)section->size;
+          section->size += sizeof(union value);
+        }
+      else
+        item->cf.ptr = &item->value;
+      clist_add_tail(&section->list, &item->node);
+      section->count++;
+    }
+#undef TRY
+}
+
+static void
+parse_outer(void)
+{
+  for (uns sep = 0; ; sep = 1)
+    {
+      parse_white();
+      if (!*pos)
+       break;
+      if (sep)
+       parse_char(';');
+      parse_white();
+      struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+      if (*pos == '!')
+        {
+         pos++;
+         sec->item.flags |= FLAG_NO_UNKNOWN;
+       }
+      sec->item.cf.name = parse_name();
+      parse_white();
+      parse_char('{');
+      clist_add_tail(&sections, &sec->item.node);
+      clist_init(&sec->list);
+      parse_section(sec);
+      parse_char('}');
+    }
+}
+
+static struct cf_section *
+generate_section(struct section *section)
+{
+  struct cf_section *sec = mp_alloc_zero(pool, sizeof(*sec));
+  if (section->item.cf.cls == CC_LIST)
+    sec->size = section->size;
+  struct cf_item *c = sec->cfg = mp_alloc_zero(pool, sizeof(struct cf_item) * (section->count + 1));
+  CLIST_FOR_EACH(struct item *, item, section->list)
+    {
+      *c = item->cf;
+      if (c->cls == CC_LIST)
+       c->u.sec = generate_section((struct section *)item);
+      c++;
+    }
+  c->cls = CC_END;
+  return sec;
+}
+
+static bb_t path;
+
+static void
+dump_value(uns array, struct item *item, void *v)
+{
+  byte buf[128], *value = buf;
+  if (!array)
+    printf("CF_%s_%s='", path.ptr, item->cf.name);
+  else
+    printf("CF_%s_%s[%u]='", path.ptr, item->cf.name, ++item->index);
+  switch (item->cf.type)
+    {
+      case CT_INT:
+        sprintf(buf, "%d", *(int *)v);
+        break;
+      case CT_U64:
+        sprintf(buf, "%llu", (long long) *(u64 *)v);
+       break;
+      case CT_DOUBLE:
+       sprintf(buf, "%g", *(double *)v);
+       break;
+      case CT_STRING:
+        if (*(byte **)v)
+          value = *(byte **)v;
+        else
+          *value = 0;
+        break;
+      default:
+        ASSERT(0);
+    }
+  while (*value) {
+    if (*value == '\'')
+      printf("'\\''");
+    else
+      putchar(*value);
+    value++;
+  }
+  printf("'\n");
+}
+
+static void
+dump_item(struct item *item, void *ptr, uns path_len)
+{
+  if (item->flags & FLAG_HIDE)
+    return;
+  byte *val = (byte *)((uintptr_t)ptr + (uintptr_t)item->cf.ptr);
+  if (item->cf.cls == CC_LIST)
+    {
+      uns len = strlen(item->cf.name);
+      bb_grow(&path, path_len + len + 1);
+      path.ptr[path_len] = '_';
+      memcpy(path.ptr + path_len + 1, item->cf.name, len);
+      CLIST_FOR_EACH(cnode *, ptr2, *(clist *)val)
+        CLIST_FOR_EACH(struct item *, item2, ((struct section *)item)->list)
+          dump_item(item2, ptr2, path_len + len + 1);
+    }
+  else
+    {
+      bb_grow(&path, path_len + 1)[path_len] = 0;
+      if (item->cf.cls == CC_STATIC)
+       dump_value(!!ptr, item, val);
+      else
+        {
+         val = *(void **)val;
+         uns len = DARY_LEN(val);
+         uns size = cf_type_size(item->cf.type, NULL);
+         for (uns i = 0; i < len; i++, val += size)
+           dump_value(1, item, val);
+       }
+    }
+}
+
+int main(int argc, char **argv)
+{
+  log_init("config");
+  if (argc < 2)
+    help();
+  pos = argv[argc - 1];
+  argv[argc - 1] = NULL;
+
+  pool = mp_new(0x1000);
+  clist_init(&sections);
+  parse_outer();
+  CLIST_FOR_EACH(struct section *, sec, sections)
+    cf_declare_section(sec->item.cf.name, generate_section(sec), !(sec->item.flags & FLAG_NO_UNKNOWN));
+
+  if (cf_getopt(argc - 1, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+    help();
+
+  bb_init(&path);
+  CLIST_FOR_EACH(struct section *, section, sections)
+    {
+      uns len = strlen(section->item.cf.name);
+      memcpy(bb_grow(&path, len), section->item.cf.name, len);
+      CLIST_FOR_EACH(struct item *, item, section->list)
+        dump_item(item, NULL, len);
+    }
+  bb_done(&path);
+
+  return 0;
+}
+
diff --git a/lib/shell/config.t b/lib/shell/config.t

new file mode 100644 (file)

index 0000000..a8aeb72
--- /dev/null
+++ b/lib/shell/config.t
@@ -0,0 +1,39 @@
+# Tests for configuration parser
+
+Run:   ../obj/lib/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
+Out:   CF_sec1_int1='23'
+       CF_sec1_long1='1234567812345678'
+       CF_sec1_str2='s2'
+       CF_sec1_int2='123'
+       CF_sec1_long2='4321'
+       CF_sec1_int3='16'
+       CF_sec1_int4='0'
+       CF_sec1_dbl1='1.1'
+       CF_sec1_dbl2='0'
+       CF_sec2_str3=''
+
+Run:   ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
+Out:   CF_sec1_list1_int1[1]='2'
+       CF_sec1_list1_str1[1]='a2'
+       CF_sec1_list1_int1[2]='3'
+       CF_sec1_list1_str1[2]='a3'
+
+Run:   ../obj/lib/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
+Out:   CF_sec1_ar1[1]='a'
+       CF_sec1_ar1[2]='b'
+       CF_sec1_ar1[3]='c'
+       CF_sec1_ar2[1]='1'
+       CF_sec1_ar2[2]='2'
+       CF_sec1_ar3[1]='1.1'
+
+Run:   ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
+Out:   CF_sec1_list1_str1[1]='1'
+       CF_sec1_list1_list2_str2[1]='a'
+       CF_sec1_list1_list2_str2[2]='b'
+       CF_sec1_list1_list2_str2[3]='c'
+       CF_sec1_list1_str1[2]='2'
+       CF_sec1_list1_list2_str2[4]='d'
+       CF_sec1_list1_list2_str2[5]='e'
+
+Run:   ../obj/lib/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
+Out:   CF_sec_str='ab"cd\e'\''fg'
diff --git a/lib/shell/libucw.sh b/lib/shell/libucw.sh

new file mode 100644 (file)

index 0000000..b02230c
--- /dev/null
+++ b/lib/shell/libucw.sh
@@ -0,0 +1,42 @@
+# The UCW Library -- Shell Functions
+# (c) 2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+UCW_CF=
+while [ "${1:0:2}" = "-C" -o "${1:0:2}" = "-S" ] ; do
+       if [ -z "${1:2:1}" ] ; then
+               UCW_CF="$UCW_CF $1 $2"
+               shift 2
+       else
+               UCW_CF="$UCW_CF $1"
+               shift 1
+       fi
+done
+
+function log # msg
+{
+       bin/logger $UCW_PROGNAME I "$1"
+}
+
+function errlog # msg
+{
+       bin/logger $UCW_PROGNAME E "$1"
+}
+
+function warnlog # msg
+{
+       bin/logger $UCW_PROGNAME E "$1"
+}
+
+function die # msg
+{
+       bin/logger $UCW_PROGNAME ! "$1"
+       exit 1
+}
+
+function parse-config # section vars...
+{
+       eval `bin/config$UCW_CF "$@"`
+}
diff --git a/lib/shell/logger.c b/lib/shell/logger.c

new file mode 100644 (file)

index 0000000..67315c9
--- /dev/null
+++ b/lib/shell/logger.c
@@ -0,0 +1,42 @@
+/*
+ *     UCW Library Utilities -- A Simple Logger for use in shell scripts
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+
+int
+main(int argc, char **argv)
+{
+  byte buf[1024], *c;
+
+  log_init("logger");
+  if (argc < 3 || argc > 4 || strlen(argv[2]) != 1)
+    die("Usage: logger [<logname>:]<progname> <level> [<text>]");
+  if (c = strchr(argv[1], ':'))
+    {
+      *c++ = 0;
+      log_init(c);
+      log_file(argv[1]);
+    }
+  else
+    log_init(argv[1]);
+  if (argc > 3)
+    msg(argv[2][0], argv[3]);
+  else
+    while (fgets(buf, sizeof(buf), stdin))
+      {
+       c = strchr(buf, '\n');
+       if (c)
+         *c = 0;
+       msg(argv[2][0], buf);
+      }
+  return 0;
+}
diff --git a/lib/sighandler.c b/lib/sighandler.c

new file mode 100644 (file)

index 0000000..2739ba1
--- /dev/null
+++ b/lib/sighandler.c
@@ -0,0 +1,64 @@
+/*
+ *     UCW Library -- Catching of signals and calling callback functions
+ *
+ *     (c) 2004, Robert Spalek <robert@ucw.cz>
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+static int sig_handler_nest[NSIG];
+static struct sigaction sig_handler_old[NSIG];
+
+static void
+signal_handler_internal(int sig)
+{
+  struct ucwlib_context *ctx = ucwlib_thread_context();
+  if (!ctx->signal_handlers || !ctx->signal_handlers[sig] || ctx->signal_handlers[sig](sig))
+    abort();
+}
+
+void
+handle_signal(int signum)
+{
+  ucwlib_lock();
+  if (!sig_handler_nest[signum]++)
+    {
+      struct sigaction act;
+      bzero(&act, sizeof(act));
+      act.sa_handler = signal_handler_internal;
+      act.sa_flags = SA_NODEFER;
+      if (sigaction(signum, &act, &sig_handler_old[signum]) < 0)
+       die("sigaction: %m");
+    }
+  ucwlib_unlock();
+}
+
+void
+unhandle_signal(int signum)
+{
+  ucwlib_lock();
+  ASSERT(sig_handler_nest[signum]);
+  if (!--sig_handler_nest[signum])
+    {
+      if (sigaction(signum, &sig_handler_old[signum], NULL) < 0)
+       die("sigaction: %m");
+    }
+  ucwlib_unlock();
+}
+
+sh_sighandler_t
+set_signal_handler(int signum, sh_sighandler_t new)
+{
+  struct ucwlib_context *ctx = ucwlib_thread_context();
+  if (!ctx->signal_handlers)
+    ctx->signal_handlers = xmalloc_zero(NSIG * sizeof(sh_sighandler_t));
+  sh_sighandler_t old = ctx->signal_handlers[signum];
+  ctx->signal_handlers[signum] = new;
+  return old;
+}
diff --git a/lib/simple-lists.c b/lib/simple-lists.c

new file mode 100644 (file)

index 0000000..8f14a3f
--- /dev/null
+++ b/lib/simple-lists.c
@@ -0,0 +1,48 @@
+/*
+ *     UCW Library -- Linked Lists of Simple Items
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/conf.h"
+#include "lib/simple-lists.h"
+
+simp_node *
+simp_append(struct mempool *mp, clist *l)
+{
+  simp_node *n = mp_alloc_fast(mp, sizeof(*n));
+  clist_add_tail(l, &n->n);
+  return n;
+}
+
+simp2_node *
+simp2_append(struct mempool *mp, clist *l)
+{
+  simp2_node *n = mp_alloc_fast(mp, sizeof(*n));
+  clist_add_tail(l, &n->n);
+  return n;
+}
+
+/* Configuration sections for common lists */
+
+struct cf_section cf_string_list_config = {
+  CF_TYPE(simp_node),
+  CF_ITEMS {
+    CF_STRING("String", PTR_TO(simp_node, s)),
+    CF_END
+  }
+};
+
+struct cf_section cf_2string_list_config = {
+  CF_TYPE(simp2_node),
+  CF_ITEMS {
+    CF_STRING("Src", PTR_TO(simp2_node, s1)),
+    CF_STRING("Dest", PTR_TO(simp2_node, s2)),
+    CF_END
+  }
+};
diff --git a/lib/simple-lists.h b/lib/simple-lists.h

new file mode 100644 (file)

index 0000000..f553a6b
--- /dev/null
+++ b/lib/simple-lists.h
@@ -0,0 +1,49 @@
+/*
+ *     UCW Library -- Linked Lists of Simple Items
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SIMPLE_LISTS_H
+#define _UCW_SIMPLE_LISTS_H
+
+#include "lib/clists.h"
+
+typedef struct simp_node {
+  cnode n;
+  union {
+    char *s;
+    void *p;
+    int i;
+    uns u;
+  };
+} simp_node;
+
+typedef struct simp2_node {
+  cnode n;
+  union {
+    char *s1;
+    void *p1;
+    int i1;
+    uns u1;
+  };
+  union {
+    char *s2;
+    void *p2;
+    int i2;
+    uns u2;
+  };
+} simp2_node;
+
+struct mempool;
+simp_node *simp_append(struct mempool *mp, clist *l);
+simp2_node *simp2_append(struct mempool *mp, clist *l);
+
+/* Configuration sections */
+extern struct cf_section cf_string_list_config;
+extern struct cf_section cf_2string_list_config;
+
+#endif
diff --git a/lib/slists.c b/lib/slists.c

new file mode 100644 (file)

index 0000000..fffc64e
--- /dev/null
+++ b/lib/slists.c
@@ -0,0 +1,83 @@
+/*
+ *     UCW Library -- Single-Linked Lists
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/slists.h"
+
+static inline snode *
+slist_raw_prev(slist *l, snode *n)
+{
+  snode *m = &l->head;
+  while (m)
+    {
+      if (n == m->next)
+       return m;
+      m = m->next;
+    }
+  ASSERT(0);
+}
+
+void *
+slist_prev(slist *l, snode *n)
+{
+  snode *p = slist_raw_prev(l, n);
+  return (p == &l->head) ? NULL : p;
+}
+
+void
+slist_insert_before(slist *l, snode *what, snode *before)
+{
+  what->next = before;
+  slist_raw_prev(l, before)->next = what;
+}
+
+void
+slist_remove(slist *l, snode *n)
+{
+  snode *p = slist_raw_prev(l, n);
+  slist_remove_after(l, p);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <alloca.h>
+
+int main(void)
+{
+  slist l;
+
+  struct x {
+    snode n;
+    int val;
+  };
+
+  slist_init(&l);
+  for (int i=1; i<=10; i++)
+    {
+      struct x *x = alloca(sizeof(*x));
+      x->val = i;
+      if (i % 2)
+       slist_add_head(&l, &x->n);
+      else
+       slist_add_tail(&l, &x->n);
+    }
+
+  struct x *x, *prev;
+  SLIST_WALK_DELSAFE(x, l, prev)
+    if (x->val == 5)
+      slist_remove_after(&l, &prev->n);
+    else if (x->val == 6)
+      slist_remove(&l, &x->n);
+  SLIST_FOR_EACH(struct x *, x, l)
+    printf("%d/", x->val);
+  putchar('\n');
+}
+
+#endif
diff --git a/lib/slists.h b/lib/slists.h

new file mode 100644 (file)

index 0000000..b0e9f4e
--- /dev/null
+++ b/lib/slists.h
@@ -0,0 +1,90 @@
+/*
+ *     UCW Library -- Single-Linked Lists
+ *
+ *     (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SLISTS_H
+#define _UCW_SLISTS_H
+
+typedef struct snode {
+  struct snode *next;
+} snode;
+
+typedef struct slist {
+  struct snode head, *last;
+} slist;
+
+static inline void *slist_head(slist *l)
+{
+  return l->head.next;
+}
+
+static inline void *slist_tail(slist *l)
+{
+  return l->last;
+}
+
+static inline void *slist_next(snode *n)
+{
+  return n->next;
+}
+
+static inline int slist_empty(slist *l)
+{
+  return !l->head.next;
+}
+
+#define SLIST_WALK(n,list) for(n=(void*)(list).head.next; (n); (n)=(void*)((snode*)(n))->next)
+#define SLIST_WALK_DELSAFE(n,list,prev) for((prev)=(void*)&(list).head; (n)=(void*)((snode*)prev)->next; (prev)=(((snode*)(prev))->next==(snode*)(n) ? (void*)(n) : (void*)(prev)))
+#define SLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; n; n=(void*)((snode*)(n))->next)
+
+static inline void slist_insert_after(slist *l, snode *what, snode *after)
+{
+  what->next = after->next;
+  after->next = what;
+  if (!what->next)
+    l->last = what;
+}
+
+static inline void slist_add_head(slist *l, snode *n)
+{
+  n->next = l->head.next;
+  l->head.next = n;
+  if (!l->last)
+    l->last = n;
+}
+
+static inline void slist_add_tail(slist *l, snode *n)
+{
+  if (l->last)
+    l->last->next = n;
+  else
+    l->head.next = n;
+  n->next = NULL;
+  l->last = n;
+}
+
+static inline void slist_init(slist *l)
+{
+  l->head.next = l->last = NULL;
+}
+
+static inline void slist_remove_after(slist *l, snode *after)
+{
+  snode *n = after->next;
+  after->next = n->next;
+  if (l->last == n)
+    l->last = (after == &l->head) ? NULL : after;
+}
+
+/* Non-trivial functions */
+
+void *slist_prev(slist *l, snode *n);
+void slist_insert_before(slist *l, snode *what, snode *before);
+void slist_remove(slist *l, snode *n);
+
+#endif
diff --git a/lib/slists.t b/lib/slists.t

new file mode 100644 (file)

index 0000000..fe642b1
--- /dev/null
+++ b/lib/slists.t
@@ -0,0 +1,4 @@
+# Test for slists module
+
+Run:   ../obj/lib/slists-t
+Out:   9/7/3/1/2/4/8/10/
diff --git a/lib/sorter/Makefile b/lib/sorter/Makefile

new file mode 100644 (file)

index 0000000..b54c05f
--- /dev/null
+++ b/lib/sorter/Makefile
@@ -0,0 +1,13 @@
+# Makefile for the UCW Sorter (c) 2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib/sorter
+
+LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array)
+LIBUCW_INCLUDES+=$(addprefix sorter/, array.h common.h s-fixint.h \
+       s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h)
+
+ifdef CONFIG_DEBUG_TOOLS
+PROGS+=$(o)/lib/sorter/sort-test
+endif
+
+$(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW)
diff --git a/lib/sorter/TODO b/lib/sorter/TODO

new file mode 100644 (file)

index 0000000..bd399e2
--- /dev/null
+++ b/lib/sorter/TODO
@@ -0,0 +1,15 @@
+Cleanups:
+o  Log messages should show both original and new size of the data. The speed
+   should be probably calculated from the former.
+o  Buffer sizing in shep-export.
+
+Improvements:
+o  When quicksorting a large input (especially in threaded case), invest more
+   time to picking a good pivot.
+o  Overlay presorter I/O with internal sorting.
+
+Users of lib/sorter/array.h which might use radix-sorting:
+indexer/chewer.c
+indexer/lexfreq.c
+indexer/mkgraph.c
+indexer/reftexts.c
diff --git a/lib/sorter/array.c b/lib/sorter/array.c

new file mode 100644 (file)

index 0000000..6d65560
--- /dev/null
+++ b/lib/sorter/array.c
@@ -0,0 +1,475 @@
+/*
+ *     UCW Library -- Optimized Array Sorter
+ *
+ *     (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <alloca.h>
+
+#define ASORT_MIN_SHIFT 2
+
+#define ASORT_TRACE(x...) ASORT_XTRACE(1, x)
+#define ASORT_XTRACE(level, x...) do { if (sorter_trace_array >= level) msg(L_DEBUG, x); } while(0)
+
+static void
+asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+  // swap_output == 0 if result should be returned in `array', otherwise in `buffer'
+  uns buckets = (1 << ctx->radix_bits);
+  uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+  uns cnt[buckets];
+
+#if 0
+  static int reported[64];
+  if (!reported[hash_bits]++)
+#endif
+  DBG(">>> n=%u h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output);
+
+  bzero(cnt, sizeof(cnt));
+  ctx->radix_count(array, num_elts, cnt, shift);
+
+  uns pos = 0;
+  for (uns i=0; i<buckets; i++)
+    {
+      uns j = cnt[i];
+      cnt[i] = pos;
+      pos += j;
+    }
+  ASSERT(pos == num_elts);
+
+  ctx->radix_split(array, buffer, num_elts, cnt, shift);
+  pos = 0;
+  for (uns i=0; i<buckets; i++)
+    {
+      uns n = cnt[i] - pos;
+      if (n < ctx->radix_threshold || shift < ASORT_MIN_SHIFT)
+       {
+         ctx->quicksort(buffer, n);
+         if (!swapped_output)
+           memcpy(array, buffer, n * ctx->elt_size);
+       }
+      else
+       asort_radix(ctx, buffer, array, n, shift, !swapped_output);
+      array += n * ctx->elt_size;
+      buffer += n * ctx->elt_size;
+      pos = cnt[i];
+    }
+}
+
+#ifdef CONFIG_UCW_THREADS
+
+#include "lib/threads.h"
+#include "lib/workqueue.h"
+#include "lib/eltpool.h"
+
+static uns asort_threads_use_count;
+static uns asort_threads_ready;
+static struct worker_pool asort_thread_pool;
+
+static uns
+rs_estimate_stack(void)
+{
+  // Stack space needed by the recursive radix-sorter
+  uns ctrsize = sizeof(uns) * (1 << CONFIG_UCW_RADIX_SORTER_BITS);
+  uns maxdepth = (64 / CONFIG_UCW_RADIX_SORTER_BITS) + 1;
+  return ctrsize * maxdepth;
+}
+
+void
+asort_start_threads(uns run)
+{
+  ucwlib_lock();
+  asort_threads_use_count++;
+  if (run && !asort_threads_ready)
+    {
+      // XXX: If somebody overrides the radix-sorter parameters to insane values,
+      // he also should override the stack size to insane values.
+      asort_thread_pool.stack_size = default_thread_stack_size + rs_estimate_stack();
+      asort_thread_pool.num_threads = sorter_threads;
+      ASORT_TRACE("Initializing thread pool (%d threads, %dK stack)", sorter_threads, asort_thread_pool.stack_size >> 10);
+      worker_pool_init(&asort_thread_pool);
+      asort_threads_ready = 1;
+    }
+  ucwlib_unlock();
+}
+
+void
+asort_stop_threads(void)
+{
+  ucwlib_lock();
+  if (!--asort_threads_use_count && asort_threads_ready)
+    {
+      ASORT_TRACE("Shutting down thread pool");
+      worker_pool_cleanup(&asort_thread_pool);
+      asort_threads_ready = 0;
+    }
+  ucwlib_unlock();
+}
+
+struct qs_work {
+  struct work w;
+  struct asort_context *ctx;
+  void *array;
+  uns num_elts;
+  int left, right;
+#define LR_UNDEF -100
+};
+
+static void
+qs_handle_work(struct worker_thread *thr UNUSED, struct work *ww)
+{
+  struct qs_work *w = (struct qs_work *) ww;
+  struct asort_context *ctx = w->ctx;
+
+  DBG("Thread %d: got %u elts", thr->id, w->num_elts);
+  if (w->num_elts < ctx->thread_threshold)
+    {
+      ctx->quicksort(w->array, w->num_elts);
+      w->left = w->right = LR_UNDEF;
+    }
+  else
+    ctx->quicksplit(w->array, w->num_elts, &w->left, &w->right);
+  DBG("Thread %d: returning l=%u r=%u", thr->id, w->left, w->right);
+}
+
+static struct qs_work *
+qs_alloc_work(struct asort_context *ctx)
+{
+  struct qs_work *w = ep_alloc(ctx->eltpool);
+  w->w.priority = 0;
+  w->w.go = qs_handle_work;
+  w->ctx = ctx;
+  return w;
+}
+
+static void
+threaded_quicksort(struct asort_context *ctx)
+{
+  struct work_queue q;
+  struct qs_work *v, *w;
+
+  asort_start_threads(1);
+  work_queue_init(&asort_thread_pool, &q);
+  ctx->eltpool = ep_new(sizeof(struct qs_work), 1000);
+
+  w = qs_alloc_work(ctx);
+  w->array = ctx->array;
+  w->num_elts = ctx->num_elts;
+  work_submit(&q, &w->w);
+
+  while (v = (struct qs_work *) work_wait(&q))
+    {
+      if (v->left != LR_UNDEF)
+       {
+         if (v->right > 0)
+           {
+             w = qs_alloc_work(ctx);
+             w->array = v->array;
+             w->num_elts = v->right + 1;
+             w->w.priority = v->w.priority + 1;
+             work_submit(&q, &w->w);
+           }
+         if (v->left < (int)v->num_elts - 1)
+           {
+             w = qs_alloc_work(ctx);
+             w->array = v->array + v->left * ctx->elt_size;
+             w->num_elts = v->num_elts - v->left;
+             w->w.priority = v->w.priority + 1;
+             work_submit(&q, &w->w);
+           }
+       }
+      ep_free(ctx->eltpool, v);
+    }
+
+  ep_delete(ctx->eltpool);
+  work_queue_cleanup(&q);
+  asort_stop_threads();
+}
+
+struct rs_work {
+  struct work w;
+  struct asort_context *ctx;
+  void *array, *buffer;                // Like asort_radix().
+  uns num_elts;
+  uns shift;
+  uns swap_output;
+  uns cnt[0];
+};
+
+static void
+rs_count(struct worker_thread *thr UNUSED, struct work *ww)
+{
+  struct rs_work *w = (struct rs_work *) ww;
+
+  DBG("Thread %d: Counting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+  w->ctx->radix_count(w->array, w->num_elts, w->cnt, w->shift);
+  DBG("Thread %d: Counting done", thr->id);
+}
+
+static void
+rs_split(struct worker_thread *thr UNUSED, struct work *ww)
+{
+  struct rs_work *w = (struct rs_work *) ww;
+
+  DBG("Thread %d: Splitting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+  w->ctx->radix_split(w->array, w->buffer, w->num_elts, w->cnt, w->shift);
+  DBG("Thread %d: Splitting done", thr->id);
+}
+
+static void
+rs_finish(struct worker_thread *thr UNUSED, struct work *ww)
+{
+  struct rs_work *w = (struct rs_work *) ww;
+
+  if (thr)
+    DBG("Thread %d: Finishing %u items, shift=%d", thr->id, w->num_elts, w->shift);
+  if (w->shift < ASORT_MIN_SHIFT || w->num_elts < w->ctx->radix_threshold)
+    {
+      w->ctx->quicksort(w->array, w->num_elts);
+      if (w->swap_output)
+       memcpy(w->buffer, w->array, w->num_elts * w->ctx->elt_size);
+    }
+  else
+    asort_radix(w->ctx, w->array, w->buffer, w->num_elts, w->shift, w->swap_output);
+  if (thr)
+    DBG("Thread %d: Finishing done", thr->id);
+}
+
+static void
+rs_wait_small(struct asort_context *ctx)
+{
+  struct rs_work *w;
+
+  while (w = (struct rs_work *) work_wait(ctx->rs_work_queue))
+    {
+      DBG("Reaping small chunk of %u items", w->num_elts);
+      ep_free(ctx->eltpool, w);
+    }
+}
+
+static void
+rs_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+  uns buckets = (1 << ctx->radix_bits);
+  uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+  uns cnt[buckets];
+  uns blksize = num_elts / sorter_threads;
+  DBG(">>> n=%u h=%d s=%d blk=%u sw=%d", num_elts, hash_bits, shift, blksize, swapped_output);
+
+  // If there are any small chunks in progress, wait for them to finish
+  rs_wait_small(ctx);
+
+  // Start parallel counting
+  void *iptr = array;
+  for (uns i=0; i<sorter_threads; i++)
+    {
+      struct rs_work *w = ctx->rs_works[i];
+      w->w.priority = 0;
+      w->w.go = rs_count;
+      w->ctx = ctx;
+      w->array = iptr;
+      w->buffer = buffer;
+      w->num_elts = blksize;
+      if (i == sorter_threads-1)
+       w->num_elts += num_elts % sorter_threads;
+      w->shift = shift;
+      iptr += w->num_elts * ctx->elt_size;
+      bzero(w->cnt, sizeof(uns) * buckets);
+      work_submit(ctx->rs_work_queue, &w->w);
+    }
+
+  // Get bucket sizes from the counts
+  bzero(cnt, sizeof(cnt));
+  for (uns i=0; i<sorter_threads; i++)
+    {
+      struct rs_work *w = (struct rs_work *) work_wait(ctx->rs_work_queue);
+      ASSERT(w);
+      for (uns j=0; j<buckets; j++)
+       cnt[j] += w->cnt[j];
+    }
+
+  // Calculate bucket starts
+  uns pos = 0;
+  for (uns i=0; i<buckets; i++)
+    {
+      uns j = cnt[i];
+      cnt[i] = pos;
+      pos += j;
+    }
+  ASSERT(pos == num_elts);
+
+  // Start parallel splitting
+  for (uns i=0; i<sorter_threads; i++)
+    {
+      struct rs_work *w = ctx->rs_works[i];
+      w->w.go = rs_split;
+      for (uns j=0; j<buckets; j++)
+       {
+         uns k = w->cnt[j];
+         w->cnt[j] = cnt[j];
+         cnt[j] += k;
+       }
+      work_submit(ctx->rs_work_queue, &w->w);
+    }
+  ASSERT(cnt[buckets-1] == num_elts);
+
+  // Wait for splits to finish
+  while (work_wait(ctx->rs_work_queue))
+    ;
+
+  // Recurse on buckets
+  pos = 0;
+  for (uns i=0; i<buckets; i++)
+    {
+      uns n = cnt[i] - pos;
+      if (!n)
+       continue;
+      if (n < ctx->thread_threshold || shift < ASORT_MIN_SHIFT)
+       {
+         struct rs_work *w = ep_alloc(ctx->eltpool);
+         w->w.priority = 0;
+         w->w.go = rs_finish;
+         w->ctx = ctx;
+         w->array = buffer;
+         w->buffer = array;
+         w->num_elts = n;
+         w->shift = shift;
+         w->swap_output = !swapped_output;
+         if (n < ctx->thread_chunk)
+           {
+             DBG("Sorting block %u+%u inline", pos, n);
+             rs_finish(NULL, &w->w);
+             ep_free(ctx->eltpool, w);
+           }
+         else
+           {
+             DBG("Scheduling block %u+%u", pos, n);
+             work_submit(ctx->rs_work_queue, &w->w);
+           }
+       }
+      else
+       rs_radix(ctx, buffer, array, n, shift, !swapped_output);
+      pos = cnt[i];
+      array += n * ctx->elt_size;
+      buffer += n * ctx->elt_size;
+    }
+}
+
+static void
+threaded_radixsort(struct asort_context *ctx, uns swap)
+{
+  struct work_queue q;
+
+  asort_start_threads(1);
+  work_queue_init(&asort_thread_pool, &q);
+
+  // Prepare work structures for counting and splitting.
+  // We use big_alloc(), because we want to avoid cacheline aliasing between threads.
+  ctx->rs_work_queue = &q;
+  ctx->rs_works = alloca(sizeof(struct rs_work *) * sorter_threads);
+  for (uns i=0; i<sorter_threads; i++)
+    ctx->rs_works[i] = big_alloc(sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+  // Prepare a pool for all remaining small bits which will be sorted on background.
+  ctx->eltpool = ep_new(sizeof(struct rs_work), 1000);
+
+  // Do the big splitting
+  rs_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+  for (uns i=0; i<sorter_threads; i++)
+    big_free(ctx->rs_works[i], sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+  // Finish the small blocks
+  rs_wait_small(ctx);
+
+  ASSERT(!ctx->eltpool->num_allocated);
+  ep_delete(ctx->eltpool);
+  work_queue_cleanup(&q);
+  asort_stop_threads();
+}
+
+#else
+
+void asort_start_threads(uns run UNUSED) { }
+void asort_stop_threads(void) { }
+
+#endif
+
+static uns
+predict_swap(struct asort_context *ctx)
+{
+  uns bits = ctx->radix_bits;
+  uns elts = ctx->num_elts;
+  uns swap = 0;
+
+  while (elts >= ctx->radix_threshold && bits >= ASORT_MIN_SHIFT)
+    {
+      DBG("Predicting pass: %u elts, %d bits", elts, bits);
+      swap = !swap;
+      elts >>= ctx->radix_bits;
+      bits = MAX(bits, ctx->radix_bits) - ctx->radix_bits;
+    }
+  return swap;
+}
+
+void
+asort_run(struct asort_context *ctx)
+{
+  ctx->thread_threshold = MIN(sorter_thread_threshold / ctx->elt_size, ~0U);
+  ctx->thread_chunk = MIN(sorter_thread_chunk / ctx->elt_size, ~0U);
+  ctx->radix_threshold = MIN(sorter_radix_threshold / ctx->elt_size, ~0U);
+
+  ASORT_TRACE("Array-sorting %u items per %u bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits);
+  ASORT_XTRACE(2, "Limits: thread_threshold=%u, thread_chunk=%u, radix_threshold=%u",
+       ctx->thread_threshold, ctx->thread_chunk, ctx->radix_threshold);
+  uns allow_threads UNUSED = (sorter_threads > 1 &&
+                             ctx->num_elts >= ctx->thread_threshold &&
+                             !(sorter_debug & SORT_DEBUG_ASORT_NO_THREADS));
+
+  if (ctx->num_elts < ctx->radix_threshold ||
+      ctx->hash_bits <= ASORT_MIN_SHIFT ||
+      !ctx->radix_split ||
+      (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX))
+    {
+#ifdef CONFIG_UCW_THREADS
+      if (allow_threads)
+       {
+         ASORT_XTRACE(2, "Decided to use parallel quicksort");
+         threaded_quicksort(ctx);
+       }
+      else
+#endif
+       {
+         ASORT_XTRACE(2, "Decided to use sequential quicksort");
+         ctx->quicksort(ctx->array, ctx->num_elts);
+       }
+    }
+  else
+    {
+      uns swap = predict_swap(ctx);
+#ifdef CONFIG_UCW_THREADS
+      if (allow_threads)
+       {
+         ASORT_XTRACE(2, "Decided to use parallel radix-sort (swap=%d)", swap);
+         threaded_radixsort(ctx, swap);
+       }
+      else
+#endif
+       {
+         ASORT_XTRACE(2, "Decided to use sequential radix-sort (swap=%d)", swap);
+         asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+       }
+      if (swap)
+       ctx->array = ctx->buffer;
+    }
+
+  ASORT_XTRACE(2, "Array-sort finished");
+}
diff --git a/lib/sorter/array.h b/lib/sorter/array.h

new file mode 100644 (file)

index 0000000..04a27d7
--- /dev/null
+++ b/lib/sorter/array.h
@@ -0,0 +1,321 @@
+/*
+ *     UCW Library -- Optimized Array Sorter
+ *
+ *     (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This is a generator of routines for sorting huge arrays, similar to the one
+ *  in lib/arraysort.h. It cannot handle discontiguous arrays, but it is able
+ *  to employ radix-sorting if a monotone hash function is available and also
+ *  use several threads in parallel on SMP systems (this assumes that all
+ *  callbacks you provide are thread-safe).
+ *
+ *  It is usually called internally by the generic shorter machinery, but
+ *  you are free to use it explicitly if you need.
+ *
+ *  So much for advocacy, there are the parameters (those marked with [*]
+ *  are mandatory):
+ *
+ *  ASORT_PREFIX(x) [*]        add a name prefix (used on all global names
+ *                     defined by the sorter)
+ *  ASORT_KEY_TYPE  [*]        data type of a single array entry key
+ *  ASORT_LT(x,y)      x < y for ASORT_KEY_TYPE (default: "x<y")
+ *  ASORT_HASH(x)      a monotone hash function (safisfying hash(x) < hash(y) => x<y)
+ *  ASORT_LONG_HASH    hashes are 64-bit numbers (default is 32 bits)
+ *
+ *  Fine-tuning parameters: (if you really insist)
+ *
+ *  ASORT_THRESHOLD    threshold for switching between quicksort and insertsort
+ *  ASORT_RADIX_BITS   how many bits of the hash functions are to be used at once for
+ *                     radix-sorting.
+ *
+ *  After including this file, a function
+ *     ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts [, ASORT_KEY_TYPE *buf, uns hash_bits])
+ *  is declared and all parameter macros are automatically undef'd. Here `buf' is an
+ *  auxiliary buffer of the same size as the input array, required whenever radix
+ *  sorting should be used, and `hash_bits' is the number of significant bits returned
+ *  by the hash function. If the buffer is specified, the sorting function returns either
+ *  a pointer to the input array or to the buffer, depending on where the result is stored.
+ *  If you do not use hashing, these parameters should be omitted.
+ */
+
+#include "lib/sorter/common.h"
+
+#define Q(x) ASORT_PREFIX(x)
+
+typedef ASORT_KEY_TYPE Q(key);
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { Q(key) tmp = array[i]; array[i]=array[j]; array[j]=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8              /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_RADIX_BITS
+#define ASORT_RADIX_BITS CONFIG_UCW_RADIX_SORTER_BITS
+#endif
+#define ASORT_RADIX_MASK ((1 << (ASORT_RADIX_BITS)) - 1)
+
+/* QuickSort with optimizations a'la Sedgewick, inspired by qsort() from GNU libc. */
+
+static void Q(quicksort)(void *array_ptr, uns num_elts)
+{
+  Q(key) *array = array_ptr;
+  struct stk { int l, r; } stack[8*sizeof(uns)];
+  int l, r, left, right, m;
+  uns sp = 0;
+  Q(key) pivot;
+
+  if (num_elts <= 1)
+    return;
+
+  left = 0;
+  right = num_elts - 1;
+  for(;;)
+    {
+      l = left;
+      r = right;
+      m = (l+r)/2;
+      if (ASORT_LT(array[m], array[l]))
+       ASORT_SWAP(l,m);
+      if (ASORT_LT(array[r], array[m]))
+       {
+         ASORT_SWAP(m,r);
+         if (ASORT_LT(array[m], array[l]))
+           ASORT_SWAP(l,m);
+       }
+      pivot = array[m];
+      do
+       {
+         while (ASORT_LT(array[l], pivot))
+           l++;
+         while (ASORT_LT(pivot, array[r]))
+           r--;
+         if (l < r)
+           {
+             ASORT_SWAP(l,r);
+             l++;
+             r--;
+           }
+         else if (l == r)
+           {
+             l++;
+             r--;
+           }
+       }
+      while (l <= r);
+      if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+       {
+         /* Both partitions ok => push the larger one */
+         if ((r - left) > (right - l))
+           {
+             stack[sp].l = left;
+             stack[sp].r = r;
+             left = l;
+           }
+         else
+           {
+             stack[sp].l = l;
+             stack[sp].r = right;
+             right = r;
+           }
+         sp++;
+       }
+      else if ((r - left) >= ASORT_THRESHOLD)
+       {
+         /* Left partition OK, right undersize */
+         right = r;
+       }
+      else if ((right - l) >= ASORT_THRESHOLD)
+       {
+         /* Right partition OK, left undersize */
+         left = l;
+       }
+      else
+       {
+         /* Both partitions undersize => pop */
+         if (!sp)
+           break;
+         sp--;
+         left = stack[sp].l;
+         right = stack[sp].r;
+       }
+    }
+
+  /*
+   * We have a partially sorted array, finish by insertsort. Inspired
+   * by qsort() in GNU libc.
+   */
+
+  /* Find minimal element which will serve as a barrier */
+  r = MIN(num_elts, ASORT_THRESHOLD);
+  m = 0;
+  for (l=1; l<r; l++)
+    if (ASORT_LT(array[l], array[m]))
+      m = l;
+  ASORT_SWAP(0,m);
+
+  /* Insertion sort */
+  for (m=1; m<(int)num_elts; m++)
+    {
+      l=m;
+      while (ASORT_LT(array[m], array[l-1]))
+       l--;
+      while (l < m)
+       {
+         ASORT_SWAP(l,m);
+         l++;
+       }
+    }
+}
+
+/* Just the splitting part of QuickSort */
+
+static void Q(quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp)
+{
+  Q(key) *array = array_ptr;
+  int l, r, m;
+  Q(key) pivot;
+
+  l = 0;
+  r = num_elts - 1;
+  m = (l+r)/2;
+  if (ASORT_LT(array[m], array[l]))
+    ASORT_SWAP(l,m);
+  if (ASORT_LT(array[r], array[m]))
+    {
+      ASORT_SWAP(m,r);
+      if (ASORT_LT(array[m], array[l]))
+       ASORT_SWAP(l,m);
+    }
+  pivot = array[m];
+  do
+    {
+      while (ASORT_LT(array[l], pivot))
+       l++;
+      while (ASORT_LT(pivot, array[r]))
+       r--;
+      if (l < r)
+       {
+         ASORT_SWAP(l,r);
+         l++;
+         r--;
+       }
+      else if (l == r)
+       {
+         l++;
+         r--;
+       }
+    }
+  while (l <= r);
+  *leftp = l;
+  *rightp = r;
+}
+
+#ifdef ASORT_HASH
+
+static void Q(radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift)
+{
+  Q(key) *src = src_ptr;
+  uns i;
+
+  switch (shift)
+    {
+#define RC(s) \
+    case s:                                                            \
+      for (i=0; i<num_elts; i++)                                       \
+       cnt[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ] ++;         \
+      break;                                                           \
+
+#ifdef ASORT_LONG_HASH
+      RC(63); RC(62); RC(61); RC(60); RC(59); RC(58); RC(57); RC(56);
+      RC(55); RC(54); RC(53); RC(52); RC(51); RC(50); RC(49); RC(48);
+      RC(47); RC(46); RC(45); RC(44); RC(43); RC(42); RC(41); RC(40);
+      RC(39); RC(38); RC(37); RC(36); RC(35); RC(34); RC(33); RC(32);
+#endif
+      RC(31); RC(30); RC(29); RC(28); RC(27); RC(26); RC(25); RC(24);
+      RC(23); RC(22); RC(21); RC(20); RC(19); RC(18); RC(17); RC(16);
+      RC(15); RC(14); RC(13); RC(12); RC(11); RC(10); RC(9); RC(8);
+      RC(7); RC(6); RC(5); RC(4); RC(3); RC(2); RC(1); RC(0);
+    default:
+      ASSERT(0);
+    }
+#undef RC
+}
+
+static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift)
+{
+  Q(key) *src = src_ptr, *dest = dest_ptr;
+  uns i;
+
+  switch (shift)
+    {
+#define RS(s) \
+    case s:                                                                            \
+      for (i=0; i<num_elts; i++)                                                       \
+       dest[ ptrs[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ]++ ] = src[i];        \
+      break;
+
+#ifdef ASORT_LONG_HASH
+      RS(63); RS(62); RS(61); RS(60); RS(59); RS(58); RS(57); RS(56);
+      RS(55); RS(54); RS(53); RS(52); RS(51); RS(50); RS(49); RS(48);
+      RS(47); RS(46); RS(45); RS(44); RS(43); RS(42); RS(41); RS(40);
+      RS(39); RS(38); RS(37); RS(36); RS(35); RS(34); RS(33); RS(32);
+#endif
+      RS(31); RS(30); RS(29); RS(28); RS(27); RS(26); RS(25); RS(24);
+      RS(23); RS(22); RS(21); RS(20); RS(19); RS(18); RS(17); RS(16);
+      RS(15); RS(14); RS(13); RS(12); RS(11); RS(10); RS(9); RS(8);
+      RS(7); RS(6); RS(5); RS(4); RS(3); RS(2); RS(1); RS(0);
+    default:
+      ASSERT(0);
+    }
+#undef RS
+}
+
+#endif
+
+static Q(key) *Q(sort)(Q(key) *array, uns num_elts
+#ifdef ASORT_HASH
+  , Q(key) *buffer, uns hash_bits
+#endif
+  )
+{
+  struct asort_context ctx = {
+    .array = array,
+    .num_elts = num_elts,
+    .elt_size = sizeof(Q(key)),
+    .quicksort = Q(quicksort),
+    .quicksplit = Q(quicksplit),
+#ifdef ASORT_HASH
+    .buffer = buffer,
+    .hash_bits = hash_bits,
+    .radix_count = Q(radix_count),
+    .radix_split = Q(radix_split),
+    .radix_bits = ASORT_RADIX_BITS,
+#endif
+  };
+  asort_run(&ctx);
+  return ctx.array;
+}
+
+#undef ASORT_HASH
+#undef ASORT_KEY_TYPE
+#undef ASORT_LONG_HASH
+#undef ASORT_LT
+#undef ASORT_PAGE_ALIGNED
+#undef ASORT_PREFIX
+#undef ASORT_RADIX_BITS
+#undef ASORT_RADIX_MASK
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef Q
diff --git a/lib/sorter/common.h b/lib/sorter/common.h

new file mode 100644 (file)

index 0000000..ddd7ba6
--- /dev/null
+++ b/lib/sorter/common.h
@@ -0,0 +1,152 @@
+/*
+ *     UCW Library -- Universal Sorter: Common Declarations
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SORTER_COMMON_H
+#define _UCW_SORTER_COMMON_H
+
+#include "lib/clists.h"
+
+/* Configuration variables */
+extern uns sorter_trace, sorter_trace_array, sorter_stream_bufsize;
+extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits, sorter_add_radix_bits;
+extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
+extern uns sorter_threads;
+extern u64 sorter_bufsize, sorter_small_input;
+extern u64 sorter_thread_threshold, sorter_thread_chunk, sorter_radix_threshold;
+extern struct fb_params sorter_fb_params, sorter_small_fb_params;
+
+#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
+#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
+
+enum sort_debug {
+  SORT_DEBUG_NO_PRESORT = 1,
+  SORT_DEBUG_NO_JOIN = 2,
+  SORT_DEBUG_KEEP_BUCKETS = 4,
+  SORT_DEBUG_NO_RADIX = 8,
+  SORT_DEBUG_NO_MULTIWAY = 16,
+  SORT_DEBUG_ASORT_NO_RADIX = 32,
+  SORT_DEBUG_ASORT_NO_THREADS = 64
+};
+
+struct sort_bucket;
+
+struct sort_context {
+  struct fastbuf *in_fb;
+  struct fastbuf *out_fb;
+  uns hash_bits;
+  u64 in_size;
+  struct fb_params *fb_params;
+
+  struct mempool *pool;
+  clist bucket_list;
+  void *big_buf;
+  size_t big_buf_size;
+
+  int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
+
+  // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
+  // Return 1 if there is more data available in the source bucket.
+  int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
+
+  // Estimate how much input data from `b' will fit in the internal sorting buffer.
+  u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
+
+  // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
+  // Bucket arrays are NULL-terminated.
+  void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
+
+  // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
+  void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
+
+  // Radix split according to hash function
+  void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
+
+  // State variables of internal_sort
+  void *key_buf;
+  int more_keys;
+
+  // Timing
+  timestamp_t start_time;
+  uns last_pass_time;
+  uns total_int_time, total_pre_time, total_ext_time;
+};
+
+void sorter_run(struct sort_context *ctx);
+
+/* Buffers */
+
+void *sorter_alloc(struct sort_context *ctx, uns size);
+void sorter_prepare_buf(struct sort_context *ctx);
+void sorter_alloc_buf(struct sort_context *ctx);
+void sorter_free_buf(struct sort_context *ctx);
+
+/* Buckets */
+
+struct sort_bucket {
+  cnode n;
+  struct sort_context *ctx;
+  uns flags;
+  struct fastbuf *fb;
+  byte *filename;
+  u64 size;                            // Size in bytes (not valid when writing)
+  uns runs;                            // Number of runs, 0 if not sorted
+  uns hash_bits;                       // Remaining bits of the hash function
+  byte *ident;                         // Identifier used in debug messages
+};
+
+enum sort_bucket_flags {
+  SBF_FINAL = 1,                       // This bucket corresponds to the final output file (always 1 run)
+  SBF_SOURCE = 2,                      // Contains the source file (always 0 runs)
+  SBF_CUSTOM_PRESORT = 4,              // Contains source to read via custom presorter
+  SBF_OPEN_WRITE = 256,                        // We are currently writing to the fastbuf
+  SBF_OPEN_READ = 512,                 // We are reading from the fastbuf
+  SBF_DESTROYED = 1024,                        // Already done with, no further references allowed
+  SBF_SWAPPED_OUT = 2048,              // Swapped out to a named file
+};
+
+struct sort_bucket *sbuck_new(struct sort_context *ctx);
+void sbuck_drop(struct sort_bucket *b);
+int sbuck_have(struct sort_bucket *b);
+int sbuck_has_file(struct sort_bucket *b);
+sh_off_t sbuck_size(struct sort_bucket *b);
+struct fastbuf *sbuck_read(struct sort_bucket *b);
+struct fastbuf *sbuck_write(struct sort_bucket *b);
+void sbuck_swap_out(struct sort_bucket *b);
+
+/* Contexts and helper functions for the array sorter */
+
+struct asort_context {
+  // Interface between generic code in array.c and functions generated by array.h
+  void *array;                         // Array to sort
+  void *buffer;                                // Auxiliary buffer (required when radix-sorting)
+  uns num_elts;                                // Number of elements in the array
+  uns elt_size;                                // Bytes per element
+  uns hash_bits;                       // Remaining bits of the hash function
+  uns radix_bits;                      // How many bits to process in a single radix-sort pass
+  void (*quicksort)(void *array_ptr, uns num_elts);
+  void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
+  void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
+  void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
+
+  // Used internally by array.c
+  struct rs_work **rs_works;
+  struct work_queue *rs_work_queue;
+  struct eltpool *eltpool;
+
+  // Configured limits translated from bytes to elements
+  uns thread_threshold;
+  uns thread_chunk;
+  uns radix_threshold;
+};
+
+void asort_run(struct asort_context *ctx);
+void asort_start_threads(uns run);
+void asort_stop_threads(void);
+
+#endif
diff --git a/lib/sorter/config.c b/lib/sorter/config.c

new file mode 100644 (file)

index 0000000..9ff646b
--- /dev/null
+++ b/lib/sorter/config.c
@@ -0,0 +1,57 @@
+/*
+ *     UCW Library -- Universal Sorter: Configuration
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/sorter/common.h"
+
+uns sorter_trace;
+uns sorter_trace_array;
+u64 sorter_bufsize = 65536;
+uns sorter_debug;
+uns sorter_min_radix_bits;
+uns sorter_max_radix_bits;
+uns sorter_add_radix_bits;
+uns sorter_min_multiway_bits;
+uns sorter_max_multiway_bits;
+uns sorter_threads;
+u64 sorter_thread_threshold = 1048576;
+u64 sorter_thread_chunk = 4096;
+u64 sorter_radix_threshold = 4096;
+struct fb_params sorter_fb_params;
+struct fb_params sorter_small_fb_params;
+u64 sorter_small_input;
+
+static struct cf_section sorter_config = {
+  CF_ITEMS {
+    CF_UNS("Trace", &sorter_trace),
+    CF_UNS("TraceArray", &sorter_trace_array),
+    CF_SECTION("FileAccess", &sorter_fb_params, &fbpar_cf),
+    CF_SECTION("SmallFileAccess", &sorter_fb_params, &fbpar_cf),
+    CF_U64("SmallInput", &sorter_small_input),
+    CF_U64("SortBuffer", &sorter_bufsize),
+    CF_UNS("Debug", &sorter_debug),
+    CF_UNS("MinRadixBits", &sorter_min_radix_bits),
+    CF_UNS("MaxRadixBits", &sorter_max_radix_bits),
+    CF_UNS("AddRadixBits", &sorter_add_radix_bits),
+    CF_UNS("MinMultiwayBits", &sorter_min_multiway_bits),
+    CF_UNS("MaxMultiwayBits", &sorter_max_multiway_bits),
+    CF_UNS("Threads", &sorter_threads),
+    CF_U64("ThreadThreshold", &sorter_thread_threshold),
+    CF_U64("ThreadChunk", &sorter_thread_chunk),
+    CF_U64("RadixThreshold", &sorter_radix_threshold),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR sorter_init_config(void)
+{
+  cf_declare_section("Sorter", &sorter_config, 0);
+}
diff --git a/lib/sorter/govern.c b/lib/sorter/govern.c

new file mode 100644 (file)

index 0000000..dbdbd47
--- /dev/null
+++ b/lib/sorter/govern.c
@@ -0,0 +1,440 @@
+/*
+ *     UCW Library -- Universal Sorter: Governing Routines
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define F_BSIZE(b) stk_fsize(sbuck_size(b))
+
+static void
+sorter_start_timer(struct sort_context *ctx)
+{
+  init_timer(&ctx->start_time);
+}
+
+static void
+sorter_stop_timer(struct sort_context *ctx, uns *account_to)
+{
+  ctx->last_pass_time = get_timer(&ctx->start_time);
+  *account_to += ctx->last_pass_time;
+}
+
+static uns
+sorter_speed(struct sort_context *ctx, u64 size)
+{
+  if (!size)
+    return 0;
+  if (!ctx->last_pass_time)
+    return 0;
+  return (uns)((double)size / (1<<20) * 1000 / ctx->last_pass_time);
+}
+
+static int
+sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
+{
+  sorter_alloc_buf(ctx);
+  if (in->flags & SBF_CUSTOM_PRESORT)
+    {
+      /*
+       *  The trick with automatic joining, which we use for the normal presorter,
+       *  is not necessary with the custom presorter, because the custom presorter
+       *  is never called in the middle of the sorted data.
+       */
+      struct fastbuf *f = sbuck_write(out);
+      out->runs++;
+      return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size);
+    }
+  return ctx->internal_sort(ctx, in, out, out_only);
+}
+
+static struct sort_bucket *
+sbuck_join_to(struct sort_bucket *b, sh_off_t *sizep)
+{
+  if (sorter_debug & SORT_DEBUG_NO_JOIN)
+    return NULL;
+
+  struct sort_bucket *out = (struct sort_bucket *) b->n.prev;  // Such bucket is guaranteed to exist
+  if (!(out->flags & SBF_FINAL))
+    return NULL;
+  ASSERT(out->runs == 1);
+  *sizep = sbuck_size(out);
+  return out;
+}
+
+static sh_off_t
+sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, sh_off_t join_size)
+{
+  if (join && join->runs >= 2)
+    {
+      if (b)
+       sbuck_drop(b);
+      ASSERT(join->runs == 2);
+      join->runs--;
+      return sbuck_size(join) - join_size;
+    }
+  else if (b)
+    {
+      clist_insert_after(&b->n, list_pos);
+      return sbuck_size(b);
+    }
+  else
+    return 0;
+}
+
+static void
+sorter_join(struct sort_bucket *b)
+{
+  struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
+  ASSERT(join->flags & SBF_FINAL);
+  ASSERT(b->runs == 1);
+
+  if (!sbuck_has_file(join))
+    {
+      // The final bucket doesn't have any file associated yet, so replace
+      // it with the new bucket.
+      SORT_XTRACE(3, "Replaced final bucket");
+      b->flags |= SBF_FINAL;
+      sbuck_drop(join);
+    }
+  else
+    {
+      SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
+      struct fastbuf *src = sbuck_read(b);
+      struct fastbuf *dest = sbuck_write(join);
+      bbcopy(src, dest, ~0U);
+      sbuck_drop(b);
+    }
+}
+
+static void
+sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
+{
+  struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
+  cnode *list_pos = b->n.prev;
+  sh_off_t join_size;
+  struct sort_bucket *join = sbuck_join_to(b, &join_size);
+
+  if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
+    {
+      SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+      sorter_start_timer(ctx);
+      ins[0] = sbuck_new(ctx);
+      if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
+       {
+         sorter_stop_timer(ctx, &ctx->total_pre_time);
+         sh_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
+         SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 3), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+         sbuck_drop(b);
+         return;
+       }
+
+      ins[1] = sbuck_new(ctx);
+      int i = 1;
+      while (sorter_presort(ctx, b, ins[i], ins[i]))
+       i = 1-i;
+      sbuck_drop(b);
+      sorter_stop_timer(ctx, &ctx->total_pre_time);
+      SORT_TRACE("Presorting pass (%d+%d runs, %s+%s, %dMB/s)",
+                ins[0]->runs, ins[1]->runs,
+                F_BSIZE(ins[0]), F_BSIZE(ins[1]),
+                sorter_speed(ctx, sbuck_size(ins[0]) + sbuck_size(ins[1])));
+    }
+  else
+    {
+      SORT_XTRACE(2, "Presorting disabled");
+      ins[0] = b;
+    }
+
+  SORT_XTRACE(3, "Main sorting");
+  uns pass = 0;
+  do {
+    ++pass;
+    sorter_start_timer(ctx);
+    if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
+      {
+       // This is guaranteed to produce a single run, so join if possible
+       outs[0] = join;
+       outs[1] = NULL;
+       ctx->twoway_merge(ctx, ins, outs);
+       sh_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
+       sorter_stop_timer(ctx, &ctx->total_ext_time);
+       SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
+       sbuck_drop(ins[0]);
+       sbuck_drop(ins[1]);
+       return;
+      }
+    outs[0] = sbuck_new(ctx);
+    outs[1] = sbuck_new(ctx);
+    outs[2] = NULL;
+    ctx->twoway_merge(ctx, ins, outs);
+    sorter_stop_timer(ctx, &ctx->total_ext_time);
+    SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s, %dMB/s)", pass,
+              outs[0]->runs, outs[1]->runs,
+              F_BSIZE(outs[0]), F_BSIZE(outs[1]),
+              sorter_speed(ctx, sbuck_size(outs[0]) + sbuck_size(outs[1])));
+    sbuck_drop(ins[0]);
+    sbuck_drop(ins[1]);
+    memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
+  } while (sbuck_have(ins[1]));
+
+  sbuck_drop(ins[1]);
+  clist_insert_after(&ins[0]->n, list_pos);
+}
+
+static void
+sorter_multiway(struct sort_context *ctx, struct sort_bucket *b)
+{
+  clist parts;
+  cnode *list_pos = b->n.prev;
+  sh_off_t join_size;
+  struct sort_bucket *join = sbuck_join_to(b, &join_size);
+  uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 3;
+
+  clist_init(&parts);
+  ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
+  SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+  uns cont;
+  uns part_cnt = 0;
+  u64 total_size = 0;
+  sorter_start_timer(ctx);
+  do
+    {
+      struct sort_bucket *p = sbuck_new(ctx);
+      cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
+      if (sbuck_have(p))
+       {
+         part_cnt++;
+         clist_add_tail(&parts, &p->n);
+         total_size += sbuck_size(p);
+         sbuck_swap_out(p);
+       }
+      else
+       sbuck_drop(p);
+    }
+  while (cont);
+  sorter_stop_timer(ctx, &ctx->total_pre_time);
+  sorter_free_buf(ctx);
+  sbuck_drop(b);
+
+  if (part_cnt <= 1)
+    {
+      sh_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
+      SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+      return;
+    }
+
+  SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
+
+  uns max_ways = 1 << sorter_max_multiway_bits;
+  struct sort_bucket *ways[max_ways+1];
+  SORT_XTRACE(3, "Starting up to %d-way merge", max_ways);
+  for (;;)
+    {
+      uns n = 0;
+      struct sort_bucket *p;
+      while (n < max_ways && (p = clist_head(&parts)))
+       {
+         clist_remove(&p->n);
+         ways[n++] = p;
+       }
+      ways[n] = NULL;
+      ASSERT(n > 1);
+
+      struct sort_bucket *out;
+      if (clist_empty(&parts) && join)
+       out = join;
+      else
+       out = sbuck_new(ctx);
+      sorter_start_timer(ctx);
+      ctx->multiway_merge(ctx, ways, out);
+      sorter_stop_timer(ctx, &ctx->total_ext_time);
+
+      for (uns i=0; i<n; i++)
+       sbuck_drop(ways[i]);
+
+      if (clist_empty(&parts))
+       {
+         sh_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
+         SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
+         return;
+       }
+      else
+       {
+         sbuck_swap_out(out);
+         clist_add_tail(&parts, &out->n);
+         SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
+       }
+    }
+}
+
+static void
+sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
+{
+  // Add more bits if requested and allowed.
+  bits = MIN(bits + sorter_add_radix_bits, sorter_max_radix_bits);
+
+  uns nbuck = 1 << bits;
+  SORT_XTRACE(3, "Running radix split on %s with hash %d bits of %d (expecting %s buckets)",
+             F_BSIZE(b), bits, b->hash_bits, stk_fsize(sbuck_size(b) / nbuck));
+  sorter_free_buf(ctx);
+  sorter_start_timer(ctx);
+
+  struct sort_bucket **outs = alloca(nbuck * sizeof(struct sort_bucket *));
+  for (uns i=nbuck; i--; )
+    {
+      outs[i] = sbuck_new(ctx);
+      outs[i]->hash_bits = b->hash_bits - bits;
+      clist_insert_after(&outs[i]->n, &b->n);
+    }
+
+  ctx->radix_split(ctx, b, outs, b->hash_bits - bits, bits);
+
+  u64 min = ~(u64)0, max = 0, sum = 0;
+  for (uns i=0; i<nbuck; i++)
+    {
+      u64 s = sbuck_size(outs[i]);
+      min = MIN(min, s);
+      max = MAX(max, s);
+      sum += s;
+      if (nbuck > 4)
+       sbuck_swap_out(outs[i]);
+    }
+
+  sorter_stop_timer(ctx, &ctx->total_ext_time);
+  SORT_TRACE("Radix split (%d buckets, %s min, %s max, %s avg, %dMB/s)", nbuck,
+            stk_fsize(min), stk_fsize(max), stk_fsize(sum / nbuck), sorter_speed(ctx, sum));
+  sbuck_drop(b);
+}
+
+static void
+sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
+{
+  // Drop empty buckets
+  if (!sbuck_have(b))
+    {
+      SORT_XTRACE(4, "Dropping empty bucket");
+      sbuck_drop(b);
+      return;
+    }
+
+  // How many bits of bucket size we have to reduce before it fits in the RAM?
+  // (this is insanely large if the input size is unknown, but it serves our purpose)
+  u64 insize = sbuck_size(b);
+  u64 mem = ctx->internal_estimate(ctx, b) * 0.8;      // Magical factor accounting for various non-uniformities
+  uns bits = 0;
+  while ((insize >> bits) > mem)
+    bits++;
+
+  // Calculate the possibilities of radix splits
+  uns radix_bits;
+  if (!ctx->radix_split ||
+      (b->flags & SBF_CUSTOM_PRESORT) ||
+      (sorter_debug & SORT_DEBUG_NO_RADIX))
+    radix_bits = 0;
+  else
+    {
+      radix_bits = MIN(bits, b->hash_bits);
+      radix_bits = MIN(radix_bits, sorter_max_radix_bits);
+      if (radix_bits < sorter_min_radix_bits)
+       radix_bits = 0;
+    }
+
+  // The same for multi-way merges
+  uns multiway_bits;
+  if (!ctx->multiway_merge ||
+      (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
+      (sorter_debug & SORT_DEBUG_NO_PRESORT))
+    multiway_bits = 0;
+  else
+    {
+      multiway_bits = MIN(bits, sorter_max_multiway_bits);
+      if (multiway_bits < sorter_min_multiway_bits)
+       multiway_bits = 0;
+    }
+
+  SORT_XTRACE(3, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
+       stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
+       radix_bits, multiway_bits);
+
+  // If the input already consists of a single run, just join it
+  if (b->runs)
+    return sorter_join(b);
+
+  // If everything fits in memory, the 2-way strategy will sort it in memory
+  if (!bits)
+    return sorter_twoway(ctx, b);
+
+  // If we can reduce everything in one pass, do so and prefer radix splits
+  if (radix_bits == bits)
+    return sorter_radix(ctx, b, radix_bits);
+  if (multiway_bits == bits)
+    return sorter_multiway(ctx, b);
+
+  // Otherwise, reduce as much as possible and again prefer radix splits
+  if (radix_bits)
+    return sorter_radix(ctx, b, radix_bits);
+  if (multiway_bits)
+    return sorter_multiway(ctx, b);
+
+  // Fall back to 2-way strategy if nothing else applies
+  return sorter_twoway(ctx, b);
+}
+
+void
+sorter_run(struct sort_context *ctx)
+{
+  ctx->pool = mp_new(4096);
+  clist_init(&ctx->bucket_list);
+  sorter_prepare_buf(ctx);
+  asort_start_threads(0);
+
+  // Create bucket containing the source
+  struct sort_bucket *bin = sbuck_new(ctx);
+  bin->flags = SBF_SOURCE | SBF_OPEN_READ;
+  if (ctx->custom_presort)
+    bin->flags |= SBF_CUSTOM_PRESORT;
+  else
+    bin->fb = ctx->in_fb;
+  bin->ident = "in";
+  bin->size = ctx->in_size;
+  bin->hash_bits = ctx->hash_bits;
+  clist_add_tail(&ctx->bucket_list, &bin->n);
+  SORT_XTRACE(2, "Input size: %s, %d hash bits", F_BSIZE(bin), bin->hash_bits);
+  ctx->fb_params = (bin->size < sorter_small_input) ? &sorter_small_fb_params : &sorter_fb_params;
+
+  // Create bucket for the output
+  struct sort_bucket *bout = sbuck_new(ctx);
+  bout->flags = SBF_FINAL;
+  if (bout->fb = ctx->out_fb)
+    bout->flags |= SBF_OPEN_WRITE;
+  bout->ident = "out";
+  bout->runs = 1;
+  clist_add_head(&ctx->bucket_list, &bout->n);
+
+  // Repeatedly sort buckets
+  struct sort_bucket *b;
+  while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
+    sorter_decide(ctx, b);
+
+  asort_stop_threads();
+  sorter_free_buf(ctx);
+  sbuck_write(bout);           // Force empty bucket to a file
+  SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
+  SORT_XTRACE(2, "Final timings: %.3fs external sorting, %.3fs presorting, %.3fs internal sorting",
+             ctx->total_ext_time/1000., ctx->total_pre_time/1000., ctx->total_int_time/1000.);
+  ctx->out_fb = sbuck_read(bout);
+  mp_delete(ctx->pool);
+}
diff --git a/lib/sorter/s-fixint.h b/lib/sorter/s-fixint.h

new file mode 100644 (file)

index 0000000..20cce74
--- /dev/null
+++ b/lib/sorter/s-fixint.h
@@ -0,0 +1,126 @@
+/*
+ *     UCW Library -- Universal Sorter: Fixed-Size Internal Sorting Module
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(key)
+#define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0)
+#ifdef SORT_INTERNAL_RADIX
+#  define ASORT_HASH(x) P(hash)(&(x))
+#    ifdef SORT_LONG_HASH
+#      define ASORT_LONG_HASH
+#    endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ *  This is a more efficient implementation of the internal sorter,
+ *  which runs under the following assumptions:
+ *
+ *     - the keys have fixed (and small) size
+ *     - no data are present after the key
+ *     - unification does not require any workspace
+ */
+
+static size_t P(internal_workspace)(void)
+{
+  size_t workspace = 0;
+#ifdef SORT_UNIFY
+  workspace = sizeof(P(key) *);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+  workspace = MAX(workspace, sizeof(P(key)));
+#endif
+  return workspace;
+}
+
+static uns P(internal_num_keys)(struct sort_context *ctx)
+{
+  size_t bufsize = ctx->big_buf_size;
+  size_t workspace = P(internal_workspace)();
+  if (workspace)
+    bufsize -= CPU_PAGE_SIZE;
+  u64 maxkeys = bufsize / (sizeof(P(key)) + workspace);
+  return MIN(maxkeys, ~0U);                                    // The number of records must fit in uns
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+  sorter_alloc_buf(ctx);
+  struct fastbuf *in = sbuck_read(bin);
+  P(key) *buf = ctx->big_buf;
+  uns maxkeys = P(internal_num_keys)(ctx);
+
+  SORT_XTRACE(5, "s-fixint: Reading (maxkeys=%u, hash_bits=%d)", maxkeys, bin->hash_bits);
+  uns n = 0;
+  while (n < maxkeys && P(read_key)(in, &buf[n]))
+    n++;
+  if (!n)
+    return 0;
+  void *workspace UNUSED = ALIGN_PTR(&buf[n], CPU_PAGE_SIZE);
+
+  SORT_XTRACE(4, "s-fixint: Sorting %u items (%s items, %s workspace)",
+       n,
+       stk_fsize(n * sizeof(P(key))),
+       stk_fsize(n * P(internal_workspace)()));
+  timestamp_t timer;
+  init_timer(&timer);
+  buf = P(array_sort)(buf, n
+#ifdef SORT_INTERNAL_RADIX
+    , workspace, bin->hash_bits
+#endif
+    );
+  if ((void *)buf != ctx->big_buf)
+    workspace = ctx->big_buf;
+  ctx->total_int_time += get_timer(&timer);
+
+  SORT_XTRACE(5, "s-fixint: Writing");
+  if (n < maxkeys)
+    bout = bout_only;
+  struct fastbuf *out = sbuck_write(bout);
+  bout->runs++;
+  uns merged UNUSED = 0;
+  for (uns i=0; i<n; i++)
+    {
+#ifdef SORT_UNIFY
+      if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
+       {
+         P(key) **keys = workspace;
+         uns n = 2;
+         keys[0] = &buf[i];
+         keys[1] = &buf[i+1];
+         while (!P(compare)(&buf[i], &buf[i+n]))
+           {
+             keys[n] = &buf[i+n];
+             n++;
+           }
+         P(write_merged)(out, keys, NULL, n, NULL);
+         merged += n - 1;
+         i += n - 1;
+         continue;
+       }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+      ASSERT(i == n-1 || P(compare)(&buf[i], &buf[i+1]) < 0);
+#endif
+      P(write_key)(out, &buf[i]);
+    }
+#ifdef SORT_UNIFY
+  SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+  return (n == maxkeys);
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+  return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1;       // -1 since if the buffer is full, we don't recognize EOF
+}
diff --git a/lib/sorter/s-internal.h b/lib/sorter/s-internal.h

new file mode 100644 (file)

index 0000000..8cb869f
--- /dev/null
+++ b/lib/sorter/s-internal.h
@@ -0,0 +1,252 @@
+/*
+ *     UCW Library -- Universal Sorter: Internal Sorting Module
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#ifdef SORT_INTERNAL_RADIX
+/* Keep copies of the items' hashes to save cache misses */
+#define SORT_COPY_HASH
+#endif
+
+typedef struct {
+  P(key) *key;
+#ifdef SORT_COPY_HASH
+  P(hash_t) hash;
+#endif
+} P(internal_item_t);
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(internal_item_t)
+#ifdef SORT_COPY_HASH
+#  ifdef SORT_INT
+#    define ASORT_LT(x,y) ((x).hash < (y).hash)                // In this mode, the hash is the value
+#  else
+#    define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
+#  endif
+#else
+#  define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
+#endif
+#ifdef SORT_INTERNAL_RADIX
+#    ifdef SORT_COPY_HASH
+#      define ASORT_HASH(x) (x).hash
+#    else
+#      define ASORT_HASH(x) P(hash)((x).key)
+#    endif
+#    ifdef SORT_LONG_HASH
+#      define ASORT_LONG_HASH
+#    endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ *  The big_buf has the following layout:
+ *
+ *     +-------------------------------------------------------------------------------+
+ *     | array of internal_item's                                                      |
+ *     +-------------------------------------------------------------------------------+
+ *     | padding to make the following part page-aligned                               |
+ *     +--------------------------------+----------------------------------------------+
+ *     | shadow copy of item array      | array of pointers to data for write_merged() |
+ *     | used if radix-sorting          +----------------------------------------------+
+ *     |                                | workspace for write_merged()                 |
+ *     +--------------------------------+----------------------------------------------+
+ *     |              +---------+                                                      |
+ *     |              | key     |                                                      |
+ *     |              +---------+                                                      |
+ *     | sequence of  | padding |                                                      |
+ *     | items        +---------+                                                      |
+ *     |              | data    |                                                      |
+ *     |              +---------+                                                      |
+ *     |              | padding |                                                      |
+ *     |              +---------+                                                      |
+ *     +-------------------------------------------------------------------------------+
+ *
+ *  (the data which are in different columns are never accessed simultaneously,
+ *   so we use a single buffer for both)
+ */
+
+static inline void *P(internal_get_data)(P(key) *key)
+{
+  uns ksize = SORT_KEY_SIZE(*key);
+#ifdef SORT_UNIFY
+  ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#endif
+  return (byte *) key + ksize;
+}
+
+static inline size_t P(internal_workspace)(P(key) *key UNUSED)
+{
+  size_t ws = 0;
+#ifdef SORT_UNIFY
+  ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+  ws += SORT_UNIFY_WORKSPACE(*key);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+  ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+  return ws;
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+  sorter_alloc_buf(ctx);
+  struct fastbuf *in = sbuck_read(bin);
+
+  P(key) key, *keybuf = ctx->key_buf;
+  if (!keybuf)
+    keybuf = ctx->key_buf = sorter_alloc(ctx, sizeof(key));
+  if (ctx->more_keys)
+    {
+      key = *keybuf;
+      ctx->more_keys = 0;
+    }
+  else if (!P(read_key)(in, &key))
+    return 0;
+
+  size_t bufsize = ctx->big_buf_size;
+#ifdef SORT_VAR_DATA
+  if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
+    {
+      SORT_XTRACE(4, "s-internal: Generating a giant run");
+      struct fastbuf *out = sbuck_write(bout);
+      P(copy_data)(&key, in, out);
+      bout->runs++;
+      return 1;                                // We don't know, but 1 is always safe
+    }
+#endif
+
+  SORT_XTRACE(5, "s-internal: Reading");
+  P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
+  byte *end = (byte *) ctx->big_buf + bufsize;
+  size_t remains = bufsize - CPU_PAGE_SIZE;
+  do
+    {
+      uns ksize = SORT_KEY_SIZE(key);
+#ifdef SORT_UNIFY
+      uns ksize_aligned = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#else
+      uns ksize_aligned = ksize;
+#endif
+      uns dsize = SORT_DATA_SIZE(key);
+      uns recsize = ALIGN_TO(ksize_aligned + dsize, CPU_STRUCT_ALIGN);
+      size_t totalsize = recsize + sizeof(P(internal_item_t)) + P(internal_workspace)(&key);
+      if (unlikely(totalsize > remains
+#ifdef CPU_64BIT_POINTERS
+                  || item >= item_array + ~0U          // The number of items must fit in an uns
+#endif
+        ))
+       {
+         ctx->more_keys = 1;
+         *keybuf = key;
+         break;
+       }
+      remains -= totalsize;
+      end -= recsize;
+      memcpy(end, &key, ksize);
+#ifdef SORT_VAR_DATA
+      breadb(in, end + ksize_aligned, dsize);
+#endif
+      item->key = (P(key)*) end;
+#ifdef SORT_COPY_HASH
+      item->hash = P(hash)(item->key);
+#endif
+      item++;
+    }
+  while (P(read_key)(in, &key));
+  last_item = item;
+
+  uns count = last_item - item_array;
+  void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
+  SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
+       count,
+       stk_fsize((byte*)last_item - (byte*)item_array),
+       stk_fsize(end - (byte*)last_item - remains),
+       stk_fsize((byte*)ctx->big_buf + bufsize - end));
+  timestamp_t timer;
+  init_timer(&timer);
+  item_array = P(array_sort)(item_array, count
+#ifdef SORT_INTERNAL_RADIX
+    , workspace, bin->hash_bits
+#endif
+    );
+  if ((void *)item_array != ctx->big_buf)
+    workspace = ctx->big_buf;
+  last_item = item_array + count;
+  ctx->total_int_time += get_timer(&timer);
+
+  SORT_XTRACE(5, "s-internal: Writing");
+  if (!ctx->more_keys)
+    bout = bout_only;
+  struct fastbuf *out = sbuck_write(bout);
+  bout->runs++;
+  uns merged UNUSED = 0;
+  for (item = item_array; item < last_item; item++)
+    {
+#ifdef SORT_UNIFY
+      if (item < last_item - 1 && !P(compare)(item->key, item[1].key))
+       {
+         // Rewrite the item structures with just pointers to keys and place
+         // pointers to data in the workspace.
+         P(key) **key_array = (void *) item;
+         void **data_array = workspace;
+         key_array[0] = item[0].key;
+         data_array[0] = P(internal_get_data)(key_array[0]);
+         uns cnt;
+         for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++)
+           {
+             key_array[cnt] = item[cnt].key;
+             data_array[cnt] = P(internal_get_data)(key_array[cnt]);
+           }
+         P(write_merged)(out, key_array, data_array, cnt, data_array+cnt);
+         item += cnt - 1;
+         merged += cnt - 1;
+         continue;
+       }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+      ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0);
+#endif
+      P(write_key)(out, item->key);
+#ifdef SORT_VAR_DATA
+      bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key));
+#endif
+    }
+#ifdef SORT_UNIFY
+  SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+  return ctx->more_keys;
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+  // Most of this is just wild guesses
+#ifdef SORT_VAR_KEY
+  uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
+#else
+  uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
+#endif
+  uns ws = 0;
+#ifdef SORT_UNIFY
+  ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+  ws += avg;
+#endif
+#ifdef SORT_INTERNAL_RADIX
+  ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+  // We ignore the data part of records, it probably won't make the estimate much worse
+  return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
+}
+
+#undef SORT_COPY_HASH
diff --git a/lib/sorter/s-multiway.h b/lib/sorter/s-multiway.h

new file mode 100644 (file)

index 0000000..83e928f
--- /dev/null
+++ b/lib/sorter/s-multiway.h
@@ -0,0 +1,148 @@
+/*
+ *     UCW Library -- Universal Sorter: Multi-Way Merge Module
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ * We use a binary tree to keep track of the current minimum. The tree is
+ * represented by an array (in the same way as binary heaps usually are),
+ * leaves correspond to input streams and each internal vertex remembers
+ * the leaf in its subtree, which has the lowest key.
+ */
+
+typedef struct P(mwt) {
+  int i;               // Minimum of the subtree
+#ifdef SORT_UNIFY
+  int eq;              // Did we encounter equality anywhere in the subtree?
+#endif
+} P(mwt);
+
+static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
+{
+  while (i /= 2)
+    {
+      if (tree[2*i].i < 0)
+       tree[i] = tree[2*i+1];
+      else if (tree[2*i+1].i < 0)
+       tree[i] = tree[2*i];
+      else
+       {
+         int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
+         tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
+#ifdef SORT_UNIFY
+         if (!cmp)
+           tree[i].eq = 1;
+#endif
+       }
+      /*
+       * It is very tempting to stop as soon as the current node does not
+       * change, but it is wrong, because even if the stream index stored in
+       * the tree is the same, the actual key value can differ.
+       */
+    }
+  /*
+   * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
+   * leading to an assumption that tree[1] does not change during this function.
+   * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
+   */
+  asm volatile ("" : : : "memory");
+}
+
+static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
+{
+  tree[i].i = val;
+  P(update_tree)(keys, tree, i);
+}
+
+static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
+{
+  uns num_ins = 0;
+  while (ins[num_ins])
+    num_ins++;
+
+  uns n2 = 1;
+  while (n2 < num_ins)
+    n2 *= 2;
+
+  struct fastbuf *fout = sbuck_write(out);
+  struct fastbuf *fins[num_ins];
+  P(key) keys[num_ins];
+  P(mwt) tree[2*n2];
+  for (uns i=1; i<2*n2; i++)
+    tree[i] = (P(mwt)) { .i = -1 };
+
+  for (uns i=0; i<num_ins; i++)
+    {
+      fins[i] = sbuck_read(ins[i]);
+      if (P(read_key)(fins[i], &keys[i]))
+       P(set_tree)(keys, tree, n2+i, i);
+    }
+
+#ifdef SORT_UNIFY
+
+  uns hits[num_ins];
+  P(key) *mkeys[num_ins], *key;
+  struct fastbuf *mfb[num_ins];
+
+  while (likely(tree[1].i >= 0))
+    {
+      int i = tree[1].i;
+      if (!tree[1].eq)
+       {
+         /* The key is unique, so let's go through the fast path */
+         P(copy_data)(&keys[i], fins[i], fout);
+         if (unlikely(!P(read_key)(fins[i], &keys[i])))
+           tree[n2+i].i = -1;
+         P(update_tree)(keys, tree, n2+i);
+         continue;
+       }
+
+      uns m = 0;
+      key = &keys[i];
+      do
+       {
+         hits[m] = i;
+         mkeys[m] = &keys[i];
+         mfb[m] = fins[i];
+         m++;
+         P(set_tree)(keys, tree, n2+i, -1);
+         i = tree[1].i;
+         if (unlikely(i < 0))
+           break;
+       }
+      while (!P(compare)(key, &keys[i]));
+
+      P(copy_merged)(mkeys, mfb, m, fout);
+
+      for (uns j=0; j<m; j++)
+       {
+         i = hits[j];
+         if (likely(P(read_key)(fins[i], &keys[i])))
+           P(set_tree)(keys, tree, n2+i, i);
+       }
+    }
+
+#else
+
+  /* Simplified version which does not support any unification */
+  while (likely(tree[1].i >= 0))
+    {
+      uns i = tree[1].i;
+      P(key) UNUSED key = keys[i];
+      P(copy_data)(&keys[i], fins[i], fout);
+      if (unlikely(!P(read_key)(fins[i], &keys[i])))
+       tree[n2+i].i = -1;
+      P(update_tree)(keys, tree, n2+i);
+#ifdef SORT_ASSERT_UNIQUE
+      ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);
+#endif
+    }
+
+#endif
+
+  out->runs++;
+}
diff --git a/lib/sorter/s-radix.h b/lib/sorter/s-radix.h

new file mode 100644 (file)

index 0000000..289f255
--- /dev/null
+++ b/lib/sorter/s-radix.h
@@ -0,0 +1,30 @@
+/*
+ *     UCW Library -- Universal Sorter: Radix-Split Module
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include <string.h>
+
+static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
+{
+  uns nbucks = 1 << numbits;
+  uns mask = nbucks - 1;
+  struct fastbuf *in = sbuck_read(bin);
+  P(key) k;
+
+  struct fastbuf *outs[nbucks];
+  bzero(outs, sizeof(outs));
+
+  while (P(read_key)(in, &k))
+    {
+      P(hash_t) h = P(hash)(&k);
+      uns i = (h >> bitpos) & mask;
+      if (unlikely(!outs[i]))
+       outs[i] = sbuck_write(bouts[i]);
+      P(copy_data)(&k, in, outs[i]);
+    }
+}
diff --git a/lib/sorter/s-twoway.h b/lib/sorter/s-twoway.h

new file mode 100644 (file)

index 0000000..ef23a2e
--- /dev/null
+++ b/lib/sorter/s-twoway.h
@@ -0,0 +1,103 @@
+/*
+ *     UCW Library -- Universal Sorter: Two-Way Merge Module
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
+{
+  struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
+  P(key) kbuf1, kbuf2, kbuf3, kbuf4;
+  P(key) *kin1 = &kbuf1, *kprev1 = &kbuf2, *kin2 = &kbuf3, *kprev2 = &kbuf4;
+  P(key) *kout = NULL, *ktmp;
+  int next1, next2, run1, run2;
+  int comp;
+  uns run_count = 0;
+
+  fin1 = sbuck_read(ins[0]);
+  next1 = P(read_key)(fin1, kin1);
+  if (sbuck_have(ins[1]))
+    {
+      fin2 = sbuck_read(ins[1]);
+      next2 = P(read_key)(fin2, kin2);
+    }
+  else
+    {
+      fin2 = NULL;
+      next2 = 0;
+    }
+  fout1 = fout2 = NULL;
+
+  run1 = next1, run2 = next2;
+  while (next1 || next2)
+    {
+      if (!run1)
+       comp = 1;
+      else if (!run2)
+       comp = -1;
+      else
+       comp = P(compare)(kin1, kin2);
+      ktmp = (comp <= 0) ? kin1 : kin2;
+      if (!kout || !(P(compare)(kout, ktmp) LESS 0))
+       {
+         SWAP(fout1, fout2, ftmp);
+         if (unlikely(!fout1))
+           {
+             if (!fout2)
+               fout1 = sbuck_write(outs[0]);
+             else if (outs[1])
+               fout1 = sbuck_write(outs[1]);
+             else
+               fout1 = fout2;
+           }
+         run_count++;
+       }
+#ifdef SORT_ASSERT_UNIQUE
+      ASSERT(comp != 0);
+#endif
+      if (comp LESS 0)
+       {
+         P(copy_data)(kin1, fin1, fout1);
+         SWAP(kin1, kprev1, ktmp);
+         next1 = P(read_key)(fin1, kin1);
+         run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+         kout = kprev1;
+       }
+#ifdef SORT_UNIFY
+      else if (comp == 0)
+       {
+         P(key) *mkeys[] = { kin1, kin2 };
+         struct fastbuf *mfb[] = { fin1, fin2 };
+         P(copy_merged)(mkeys, mfb, 2, fout1);
+         SWAP(kin1, kprev1, ktmp);
+         next1 = P(read_key)(fin1, kin1);
+         run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+         SWAP(kin2, kprev2, ktmp);
+         next2 = P(read_key)(fin2, kin2);
+         run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+         kout = kprev2;
+       }
+#endif
+      else
+       {
+         P(copy_data)(kin2, fin2, fout1);
+         SWAP(kin2, kprev2, ktmp);
+         next2 = P(read_key)(fin2, kin2);
+         run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+         kout = kprev2;
+       }
+      if (!run1 && !run2)
+       {
+         run1 = next1;
+         run2 = next2;
+       }
+    }
+
+  if (fout2 && fout2 != fout1)
+    outs[1]->runs += run_count / 2;
+  if (fout1)
+    outs[0]->runs += (run_count+1) / 2;
+}
diff --git a/lib/sorter/sbuck.c b/lib/sorter/sbuck.c

new file mode 100644 (file)

index 0000000..c6ebdee
--- /dev/null
+++ b/lib/sorter/sbuck.c
@@ -0,0 +1,158 @@
+/*
+ *     UCW Library -- Universal Sorter: Operations on Contexts, Buffers and Buckets
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <fcntl.h>
+
+void *
+sorter_alloc(struct sort_context *ctx, uns size)
+{
+  return mp_alloc_zero(ctx->pool, size);
+}
+
+struct sort_bucket *
+sbuck_new(struct sort_context *ctx)
+{
+  struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
+  b->ctx = ctx;
+  return b;
+}
+
+void
+sbuck_drop(struct sort_bucket *b)
+{
+  if (b)
+    {
+      ASSERT(!(b->flags & SBF_DESTROYED));
+      if (b->n.prev)
+       clist_remove(&b->n);
+      bclose(b->fb);
+      bzero(b, sizeof(*b));
+      b->flags = SBF_DESTROYED;
+    }
+}
+
+sh_off_t
+sbuck_size(struct sort_bucket *b)
+{
+  if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
+    return btell(b->fb);
+  else
+    return b->size;
+}
+
+int
+sbuck_have(struct sort_bucket *b)
+{
+  return b && sbuck_size(b);
+}
+
+int
+sbuck_has_file(struct sort_bucket *b)
+{
+  return (b->fb || (b->flags & SBF_SWAPPED_OUT));
+}
+
+static void
+sbuck_swap_in(struct sort_bucket *b)
+{
+  if (b->flags & SBF_SWAPPED_OUT)
+    {
+      b->fb = bopen_file(b->filename, O_RDWR, b->ctx->fb_params);
+      if (b->flags & SBF_OPEN_WRITE)
+       bseek(b->fb, 0, SEEK_END);
+      if (!(sorter_debug & SORT_DEBUG_KEEP_BUCKETS))
+       bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
+      b->flags &= ~SBF_SWAPPED_OUT;
+      SORT_XTRACE(3, "Swapped in %s", b->filename);
+    }
+}
+
+struct fastbuf *
+sbuck_read(struct sort_bucket *b)
+{
+  sbuck_swap_in(b);
+  if (b->flags & SBF_OPEN_READ)
+    return b->fb;
+  else if (b->flags & SBF_OPEN_WRITE)
+    {
+      b->size = btell(b->fb);
+      b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
+      brewind(b->fb);
+      return b->fb;
+    }
+  else
+    ASSERT(0);
+}
+
+struct fastbuf *
+sbuck_write(struct sort_bucket *b)
+{
+  sbuck_swap_in(b);
+  if (b->flags & SBF_OPEN_WRITE)
+    ASSERT(b->fb);
+  else
+    {
+      ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
+      b->fb = bopen_tmp_file(b->ctx->fb_params);
+      if (sorter_debug & SORT_DEBUG_KEEP_BUCKETS)
+       bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+      b->flags |= SBF_OPEN_WRITE;
+      b->filename = mp_strdup(b->ctx->pool, b->fb->name);
+    }
+  return b->fb;
+}
+
+void
+sbuck_swap_out(struct sort_bucket *b)
+{
+  if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb && !(b->flags & SBF_SOURCE))
+    {
+      if (b->flags & SBF_OPEN_WRITE)
+       b->size = btell(b->fb);
+      bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+      bclose(b->fb);
+      b->fb = NULL;
+      b->flags |= SBF_SWAPPED_OUT;
+      SORT_XTRACE(3, "Swapped out %s", b->filename);
+    }
+}
+
+void
+sorter_prepare_buf(struct sort_context *ctx)
+{
+  u64 bs = sorter_bufsize;
+  bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
+  bs = MAX(bs, 2*(u64)CPU_PAGE_SIZE);
+  ctx->big_buf_size = bs;
+}
+
+void
+sorter_alloc_buf(struct sort_context *ctx)
+{
+  if (ctx->big_buf)
+    return;
+  ctx->big_buf = big_alloc(ctx->big_buf_size);
+  SORT_XTRACE(3, "Allocated sorting buffer (%s)", stk_fsize(ctx->big_buf_size));
+}
+
+void
+sorter_free_buf(struct sort_context *ctx)
+{
+  if (!ctx->big_buf)
+    return;
+  big_free(ctx->big_buf, ctx->big_buf_size);
+  ctx->big_buf = NULL;
+  SORT_XTRACE(3, "Freed sorting buffer");
+}
diff --git a/lib/sorter/sort-test.c b/lib/sorter/sort-test.c

new file mode 100644 (file)

index 0000000..e7d5873
--- /dev/null
+++ b/lib/sorter/sort-test.c
@@ -0,0 +1,706 @@
+/*
+ *     UCW Library -- Testing the Sorter
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/hashfunc.h"
+#include "lib/md5.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** A hack for overriding radix-sorter configuration ***/
+
+#ifdef FORCE_RADIX_BITS
+#undef CONFIG_UCW_RADIX_SORTER_BITS
+#define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
+#endif
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+static uns test_id;
+
+static void
+start(void)
+{
+  sync();
+  init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+  sync();
+  msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+  u32 x;
+};
+
+#define SORT_KEY_REGULAR struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT(k) (k).x
+#define SORT_DELETE_INPUT 0
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+  uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+  uns K = N/4*3;
+  msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+  struct fastbuf *f = bopen_tmp(65536);
+  for (uns i=0; i<N; i++)
+    bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+  brewind(f);
+
+  start();
+  f = s1_sort(f, NULL, N-1);
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  for (uns i=0; i<N; i++)
+    {
+      uns j = bgetl(f);
+      if (i != j)
+       die("Discrepancy: %u instead of %u", j, i);
+    }
+  bclose(f);
+}
+
+/*** Integers with merging, but no data ***/
+
+struct key2 {
+  u32 x;
+  u32 cnt;
+};
+
+static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
+{
+  for (uns i=1; i<n; i++)
+    k[0]->cnt += k[i]->cnt;
+  bwrite(f, k[0], sizeof(struct key2));
+}
+
+#define SORT_KEY_REGULAR struct key2
+#define SORT_PREFIX(x) s2_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIFY
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_counted(int mode, u64 size)
+{
+  u64 items = size / sizeof(struct key2);
+  uns mult = 2;
+  while (items/(2*mult) > 0xffff0000)
+    mult++;
+  uns N = items ? nextprime(items/(2*mult)) : 0;
+  uns K = N/4*3;
+  msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
+
+  struct fastbuf *f = bopen_tmp(65536);
+  for (uns m=0; m<mult; m++)
+    for (uns i=0; i<N; i++)
+      for (uns j=0; j<2; j++)
+       {
+         bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+         bputl(f, 1);
+       }
+  brewind(f);
+
+  start();
+  f = s2_sort(f, NULL, N-1);
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  for (uns i=0; i<N; i++)
+    {
+      uns j = bgetl(f);
+      if (i != j)
+       die("Discrepancy: %u instead of %u", j, i);
+      uns k = bgetl(f);
+      if (k != 2*mult)
+       die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
+    }
+  bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+  u32 hash[4];
+  u32 i;
+  u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+  COMPARE(x->hash[0], y->hash[0]);
+  COMPARE(x->hash[1], y->hash[1]);
+  COMPARE(x->hash[2], y->hash[2]);
+  COMPARE(x->hash[3], y->hash[3]);
+  return 0;
+}
+
+static inline uns s3_hash(struct key3 *x)
+{
+  return x->hash[0];
+}
+
+#define SORT_KEY_REGULAR struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_HASH_BITS 32
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+  k->i = i;
+  k->payload[0] = 7*i + 13;
+  k->payload[1] = 13*i + 19;
+  k->payload[2] = 19*i + 7;
+  switch (mode)
+    {
+    case 0:
+      k->hash[0] = i;
+      k->hash[1] = k->payload[0];
+      k->hash[2] = k->payload[1];
+      k->hash[3] = k->payload[2];
+      break;
+    case 1:
+      k->hash[0] = ~i;
+      k->hash[1] = k->payload[0];
+      k->hash[2] = k->payload[1];
+      k->hash[3] = k->payload[2];
+      break;
+    default: ;
+      struct MD5Context ctx;
+      MD5Init(&ctx);
+      MD5Update(&ctx, (byte*) &k->i, 4);
+      MD5Final((byte*) &k->hash, &ctx);
+      break;
+    }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+  uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+  msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+  struct key3 k, lastk;
+
+  struct fastbuf *f = bopen_tmp(65536);
+  uns hash_sum = 0;
+  for (uns i=0; i<N; i++)
+    {
+      gen_hash_key(mode, &k, i);
+      hash_sum += k.hash[3];
+      bwrite(f, &k, sizeof(k));
+    }
+  brewind(f);
+
+  start();
+  f = s3_sort(f, NULL);
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  for (uns i=0; i<N; i++)
+    {
+      int ok = breadb(f, &k, sizeof(k));
+      ASSERT(ok);
+      if (i && s3_compare(&k, &lastk) <= 0)
+       ASSERT(0);
+      gen_hash_key(mode, &lastk, k.i);
+      if (memcmp(&k, &lastk, sizeof(k)))
+       ASSERT(0);
+      hash_sum -= k.hash[3];
+    }
+  ASSERT(!hash_sum);
+  bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+  uns len;
+  byte s[KEY4_MAX];
+};
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+  uns l = MIN(x->len, y->len);
+  int c = memcmp(x->s, y->s, l);
+  if (c)
+    return c;
+  COMPARE(x->len, y->len);
+  return 0;
+}
+
+static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
+{
+  x->len = bgetl(f);
+  if (x->len == 0xffffffff)
+    return 0;
+  ASSERT(x->len < KEY4_MAX);
+  breadb(f, x->s, x->len);
+  return 1;
+}
+
+static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
+{
+  ASSERT(x->len < KEY4_MAX);
+  bputl(f, x->len);
+  bwrite(f, x->s, x->len);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_read_key s4_read_key
+#define s4b_write_key s4_write_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+  return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_DATA_SIZE(x) s4_data_size(&(x))
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+  k->len = random_max(KEY4_MAX);
+  for (uns i=0; i<k->len; i++)
+    k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+  while (len--)
+    {
+      *buf++ = h >> 24;
+      h = h*259309 + 17;
+    }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+  uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+  uns N = MIN(size / avg_item_size, 0xffffffff);
+  msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+  srand(1);
+
+  struct key4 k, lastk;
+  byte buf[256], buf2[256];
+  uns sum = 0;
+
+  struct fastbuf *f = bopen_tmp(65536);
+  for (uns i=0; i<N; i++)
+    {
+      gen_key4(&k);
+      s4_write_key(f, &k);
+      uns h = hash_block(k.s, k.len);
+      sum += h;
+      if (mode)
+       {
+         gen_data4(buf, s4_data_size(&k), h);
+         bwrite(f, buf, s4_data_size(&k));
+       }
+    }
+  brewind(f);
+
+  start();
+  f = (mode ? s4b_sort : s4_sort)(f, NULL);
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  for (uns i=0; i<N; i++)
+    {
+      int ok = s4_read_key(f, &k);
+      ASSERT(ok);
+      uns h = hash_block(k.s, k.len);
+      if (mode && s4_data_size(&k))
+       {
+         ok = breadb(f, buf, s4_data_size(&k));
+         ASSERT(ok);
+         gen_data4(buf2, s4_data_size(&k), h);
+         ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+       }
+      if (i && s4_compare(&k, &lastk) < 0)
+       ASSERT(0);
+      sum -= h;
+      lastk = k;
+    }
+  ASSERT(!sum);
+  bclose(f);
+}
+
+/*** Graph-like structure with custom presorting ***/
+
+struct key5 {
+  u32 x;
+  u32 cnt;
+};
+
+static uns s5_N, s5_K, s5_L, s5_i, s5_j;
+
+struct s5_pair {
+  uns x, y;
+};
+
+static int s5_gen(struct s5_pair *p)
+{
+  if (s5_j >= s5_N)
+    {
+      if (!s5_N || s5_i >= s5_N-1)
+       return 0;
+      s5_j = 0;
+      s5_i++;
+    }
+  p->x = ((u64)s5_j * s5_K) % s5_N;
+  p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
+  s5_j++;
+  return 1;
+}
+
+#define ASORT_PREFIX(x) s5m_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) ary[i]
+#define ASORT_EXTRA_ARGS , u32 *ary
+#include "lib/arraysort.h"
+
+static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
+{
+  u32 *a = buf;
+  uns m = 0;
+  for (uns i=0; i<n; i++)
+    {
+      memcpy(&a[m], data[i], 4*keys[i]->cnt);
+      m += keys[i]->cnt;
+    }
+  s5m_sort(m, a);
+  keys[0]->cnt = m;
+  bwrite(f, keys[0], sizeof(struct key5));
+  bwrite(f, a, 4*m);
+}
+
+static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+{
+  u32 k[n];
+  uns m = 0;
+  for (uns i=0; i<n; i++)
+    {
+      k[i] = bgetl(data[i]);
+      m += keys[i]->cnt;
+    }
+  struct key5 key = { .x = keys[0]->x, .cnt = m };
+  bwrite(dest, &key, sizeof(key));
+  while (key.cnt--)
+    {
+      uns b = 0;
+      for (uns i=1; i<n; i++)
+       if (k[i] < k[b])
+         b = i;
+      bputl(dest, k[b]);
+      if (--keys[b]->cnt)
+       k[b] = bgetl(data[b]);
+      else
+       k[b] = ~0U;
+    }
+}
+
+static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
+{
+  COMPARE_LT(x.x, y.x);
+  COMPARE_LT(x.y, y.y);
+  return 0;
+}
+
+#define ASORT_PREFIX(x) s5p_##x
+#define ASORT_KEY_TYPE struct s5_pair
+#define ASORT_LT(x,y) s5p_lt(x,y)
+#include "lib/sorter/array.h"
+
+static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+{
+  uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
+  struct s5_pair *a = buf;
+  uns n = 0;
+  while (n<max && s5_gen(&a[n]))
+    n++;
+  if (!n)
+    return 0;
+  s5p_sort(a, n);
+  uns i = 0;
+  while (i < n)
+    {
+      uns j = i;
+      while (i < n && a[i].x == a[j].x)
+       i++;
+      struct key5 k = { .x = a[j].x, .cnt = i-j };
+      bwrite(dest, &k, sizeof(k));
+      while (j < i)
+       bputl(dest, a[j++].y);
+    }
+  return 1;
+}
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_PRESORT
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5b_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+#define s5b_write_merged s5_write_merged
+#define s5b_copy_merged s5_copy_merged
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_graph(uns mode, u64 size)
+{
+  uns N = 3;
+  while ((u64)N*(N+2)*4 < size)
+    N = nextprime(N);
+  if (!size)
+    N = 0;
+  msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
+  s5_N = N;
+  s5_K = N/4*3;
+  s5_L = N/3*2;
+  s5_i = s5_j = 0;
+
+  struct fastbuf *in = NULL;
+  if (mode)
+    {
+      struct s5_pair p;
+      in = bopen_tmp(65536);
+      while (s5_gen(&p))
+       {
+         struct key5 k = { .x = p.x, .cnt = 1 };
+         bwrite(in, &k, sizeof(k));
+         bputl(in, p.y);
+       }
+      brewind(in);
+    }
+
+  start();
+  struct fastbuf *f = bopen_tmp(65536);
+  bputl(f, 0xfeedcafe);
+  struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
+  ASSERT(f == g);
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  uns c = bgetl(f);
+  ASSERT(c == 0xfeedcafe);
+  for (uns i=0; i<N; i++)
+    {
+      struct key5 k;
+      int ok = breadb(f, &k, sizeof(k));
+      ASSERT(ok);
+      ASSERT(k.x == i);
+      ASSERT(k.cnt == N);
+      for (uns j=0; j<N; j++)
+       {
+         uns y = bgetl(f);
+         ASSERT(y == j);
+       }
+    }
+  bclose(f);
+}
+
+/*** Simple 8-byte integer keys ***/
+
+struct key6 {
+  u64 x;
+};
+
+#define SORT_KEY_REGULAR struct key6
+#define SORT_PREFIX(x) s6_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT64(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int64(int mode, u64 size)
+{
+  u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
+  u64 K = N/4*3;
+  msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
+
+  struct fastbuf *f = bopen_tmp(65536);
+  for (u64 i=0; i<N; i++)
+    bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
+  brewind(f);
+
+  start();
+  f = s6_sort(f, NULL, 777777*(N-1));
+  stop();
+
+  SORT_XTRACE(2, "Verifying");
+  for (u64 i=0; i<N; i++)
+    {
+      u64 j = bgetq(f);
+      if (777777*i != j)
+       die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
+    }
+  bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+  test_id = i;
+  switch (i)
+    {
+    case 0:
+      test_int(0, size); break;
+    case 1:
+      test_int(1, size); break;
+    case 2:
+      test_int(2, size); break;
+    case 3:
+      test_counted(0, size); break;
+    case 4:
+      test_counted(1, size); break;
+    case 5:
+      test_counted(2, size); break;
+    case 6:
+      test_hashes(0, size); break;
+    case 7:
+      test_hashes(1, size); break;
+    case 8:
+      test_hashes(2, size); break;
+    case 9:
+      test_strings(0, size); break;
+    case 10:
+      test_strings(1, size); break;
+    case 11:
+      test_graph(0, size); break;
+    case 12:
+      test_graph(1, size); break;
+    case 13:
+      test_int64(0, size); break;
+    case 14:
+      test_int64(1, size); break;
+    case 15:
+      test_int64(2, size); break;
+#define TMAX 16
+    }
+}
+
+int
+main(int argc, char **argv)
+{
+  log_init(NULL);
+  int c;
+  u64 size = 10000000;
+  uns t = ~0;
+
+  while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+    switch (c)
+      {
+      case 'd':
+       sorter_debug = atol(optarg);
+       break;
+      case 's':
+       if (cf_parse_u64(optarg, &size))
+         goto usage;
+       break;
+      case 't':
+         {
+           char *w[32];
+           int f = sepsplit(optarg, ',', w, ARRAY_SIZE(w));
+           if (f < 0)
+             goto usage;
+           t = 0;
+           for (int i=0; i<f; i++)
+             {
+               int j = atol(w[i]);
+               if (j >= TMAX)
+                 goto usage;
+               t |= 1 << j;
+             }
+         }
+       break;
+      case 'v':
+       sorter_trace++;
+       break;
+      default:
+      usage:
+       fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
+       exit(1);
+      }
+  if (optind != argc)
+    goto usage;
+
+  for (uns i=0; i<TMAX; i++)
+    if (t & (1 << i))
+      run_test(i, size);
+
+  return 0;
+}
diff --git a/lib/sorter/sorter.h b/lib/sorter/sorter.h

new file mode 100644 (file)

index 0000000..565a8de
--- /dev/null
+++ b/lib/sorter/sorter.h
@@ -0,0 +1,320 @@
+/*
+ *     UCW Library -- Universal Sorter
+ *
+ *     (c) 2001--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+/*
+ *  This is not a normal header file, but a generator of sorting
+ *  routines.  Each time you include it with parameters set in the
+ *  corresponding preprocessor macros, it generates a file sorter
+ *  with the parameters given.
+ *
+ *  The sorter operates on fastbufs containing sequences of items. Each item
+ *  consists of a key, optionally followed by data. The keys are represented
+ *  by fixed-size structures of type SORT_KEY internally, if this format differs
+ *  from the on-disk format, explicit reading and writing routines can be provided.
+ *  The data are always copied verbatim, unless the sorter is in the merging
+ *  mode in which it calls callbacks for merging of items with equal keys.
+ *
+ *  All callbacks must be thread-safe.
+ *
+ *  Basic parameters and callbacks:
+ *
+ *  SORT_PREFIX(x)      add a name prefix (used on all global names defined by the sorter)
+ *
+ *  SORT_KEY           data type capable of holding a single key in memory (the on-disk
+ *                     representation can be different). Alternatively, you can use:
+ *  SORT_KEY_REGULAR   data type holding a single key both in memory and on disk;
+ *                     in this case, bread() and bwrite() is used to read/write keys
+ *                     and it's also assumed that the keys are not very long.
+ *  int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
+ *                     compares two keys, returns result like strcmp(). Mandatory.
+ *  int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
+ *                     reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
+ *                     Mandatory unless SORT_KEY_REGULAR is defined.
+ *  void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
+ *                     writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
+ *
+ *  SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
+ *                     can be truncated to this number of bytes without any harm;
+ *                     used to save memory when the keys have variable sizes).
+ *                     Default: always store the whole SORT_KEY.
+ *  SORT_DATA_SIZE(key)        gets a key and returns the amount of data following it.
+ *                     Default: records consist of keys only.
+ *
+ *  Integer sorting:
+ *
+ *  SORT_INT(key)      we are sorting by an integer value returned by this macro.
+ *                     In this mode, PREFIX_compare is supplied automatically and the sorting
+ *                     function gets an extra parameter specifying the range of the integers.
+ *                     The better the range fits, the faster we sort.
+ *                     Sets up SORT_HASH_xxx automatically.
+ *  SORT_INT64(key)    the same for 64-bit integers.
+ *
+ *  Hashing (optional, but it can speed sorting up):
+ *
+ *  SORT_HASH_BITS     signals that a monotone hashing function returning a given number of
+ *                     bits is available. A monotone hash is a function f from keys to integers
+ *                     such that f(x) < f(y) implies x < y, which is approximately uniformly
+ *                     distributed. It should be declared as:
+ *  uns PREFIX_hash(SORT_KEY *a)
+ *
+ *  Unification:
+ *
+ *  SORT_UNIFY         merge items with identical keys. It requires the following functions:
+ *  void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
+ *                     takes n records in memory with keys which compare equal and writes
+ *                     a single record to the given fastbuf. `buf' points to a buffer which
+ *                     is guaranteed to hold the sum of workspace requirements (see below)
+ *                     over all given records. The function is allowed to modify all its inputs.
+ *  void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+ *                     takes n records with keys in memory and data in fastbufs and writes
+ *                     a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
+ *                     is defined.
+ *  SORT_UNIFY_WORKSPACE(key)
+ *                     gets a key and returns the amount of workspace required when merging
+ *                     the given record. Defaults to 0.
+ *
+ *  Input (choose one of these):
+ *
+ *  SORT_INPUT_FILE    file of a given name
+ *  SORT_INPUT_FB      seekable fastbuf stream
+ *  SORT_INPUT_PIPE    non-seekable fastbuf stream
+ *  SORT_INPUT_PRESORT custom presorter. Calls function
+ *  int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+ *                     to get successive batches of pre-sorted data.
+ *                     The function is passed a page-aligned presorting buffer.
+ *                     It returns 1 on success or 0 on EOF.
+ *  SORT_DELETE_INPUT  A C expression, if true, then the input files are deleted
+ *                     as soon as possible.
+ *
+ *  Output (chose one of these):
+ *
+ *  SORT_OUTPUT_FILE   file of a given name
+ *  SORT_OUTPUT_FB     temporary fastbuf stream
+ *  SORT_OUTPUT_THIS_FB        a given fastbuf stream which can already contain some data
+ *
+ *  Other switches:
+ *
+ *  SORT_UNIQUE                all items have distinct keys (checked in debug mode)
+ *
+ *  The function generated:
+ *
+ *  <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
+ *                     <in> = input file name/fastbuf or NULL
+ *                     <out> = output file name/fastbuf or NULL
+ *                     <range> = maximum integer value for the SORT_INT mode
+ *
+ *  After including this file, all parameter macros are automatically
+ *  undef'd.
+ */
+
+#include "lib/sorter/common.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+
+#define P(x) SORT_PREFIX(x)
+
+#ifdef SORT_KEY_REGULAR
+typedef SORT_KEY_REGULAR P(key);
+static inline int P(read_key) (struct fastbuf *f, P(key) *k)
+{
+  return breadb(f, k, sizeof(P(key)));
+}
+static inline void P(write_key) (struct fastbuf *f, P(key) *k)
+{
+  bwrite(f, k, sizeof(P(key)));
+}
+#elif defined(SORT_KEY)
+typedef SORT_KEY P(key);
+#else
+#error Missing definition of sorting key.
+#endif
+
+#ifdef SORT_INT64
+typedef u64 P(hash_t);
+#define SORT_INT SORT_INT64
+#define SORT_LONG_HASH
+#else
+typedef uns P(hash_t);
+#endif
+
+#ifdef SORT_INT
+static inline int P(compare) (P(key) *x, P(key) *y)
+{
+  if (SORT_INT(*x) < SORT_INT(*y))
+    return -1;
+  if (SORT_INT(*x) > SORT_INT(*y))
+    return 1;
+  return 0;
+}
+
+#ifndef SORT_HASH_BITS
+static inline P(hash_t) P(hash) (P(key) *x)
+{
+  return SORT_INT((*x));
+}
+#endif
+#endif
+
+#ifdef SORT_UNIFY
+#define LESS <
+#else
+#define LESS <=
+#endif
+#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
+
+#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
+#define SORT_ASSERT_UNIQUE
+#endif
+
+#ifdef SORT_KEY_SIZE
+#define SORT_VAR_KEY
+#else
+#define SORT_KEY_SIZE(key) sizeof(key)
+#endif
+
+#ifdef SORT_DATA_SIZE
+#define SORT_VAR_DATA
+#else
+#define SORT_DATA_SIZE(key) 0
+#endif
+
+static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
+{
+  P(write_key)(out, key);
+#ifdef SORT_VAR_DATA
+  bbcopy(in, out, SORT_DATA_SIZE(*key));
+#else
+  (void) in;
+#endif
+}
+
+#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
+static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
+{
+  P(write_merged)(dest, keys, NULL, n, NULL);
+}
+#endif
+
+#if defined(SORT_HASH_BITS) || defined(SORT_INT)
+#define SORT_INTERNAL_RADIX
+#include "lib/sorter/s-radix.h"
+#endif
+
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
+#include "lib/sorter/s-internal.h"
+#else
+#include "lib/sorter/s-fixint.h"
+#endif
+
+#include "lib/sorter/s-twoway.h"
+#include "lib/sorter/s-multiway.h"
+
+static struct fastbuf *P(sort)(
+#ifdef SORT_INPUT_FILE
+                              byte *in,
+#else
+                              struct fastbuf *in,
+#endif
+#ifdef SORT_OUTPUT_FILE
+                              byte *out
+#else
+                              struct fastbuf *out
+#endif
+#ifdef SORT_INT
+                              , u64 int_range
+#endif
+                              )
+{
+  struct sort_context ctx;
+  bzero(&ctx, sizeof(ctx));
+
+#ifdef SORT_INPUT_FILE
+  ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
+  ctx.in_size = bfilesize(ctx.in_fb);
+#elif defined(SORT_INPUT_FB)
+  ctx.in_fb = in;
+  ctx.in_size = bfilesize(in);
+#elif defined(SORT_INPUT_PIPE)
+  ctx.in_fb = in;
+  ctx.in_size = ~(u64)0;
+#elif defined(SORT_INPUT_PRESORT)
+  ASSERT(!in);
+  ctx.custom_presort = P(presort);
+  ctx.in_size = ~(u64)0;
+#else
+#error No input given.
+#endif
+#ifdef SORT_DELETE_INPUT
+  if (SORT_DELETE_INPUT)
+    bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
+#endif
+
+#ifdef SORT_OUTPUT_FB
+  ASSERT(!out);
+#elif defined(SORT_OUTPUT_THIS_FB)
+  ctx.out_fb = out;
+#elif defined(SORT_OUTPUT_FILE)
+  /* Just assume fastbuf output and rename the fastbuf later */
+#else
+#error No output given.
+#endif
+
+#ifdef SORT_HASH_BITS
+  ctx.hash_bits = SORT_HASH_BITS;
+  ctx.radix_split = P(radix_split);
+#elif defined(SORT_INT)
+  ctx.hash_bits = 0;
+  while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
+    ctx.hash_bits++;
+  ctx.radix_split = P(radix_split);
+#endif
+
+  ctx.internal_sort = P(internal);
+  ctx.internal_estimate = P(internal_estimate);
+  ctx.twoway_merge = P(twoway_merge);
+  ctx.multiway_merge = P(multiway_merge);
+
+  sorter_run(&ctx);
+
+#ifdef SORT_OUTPUT_FILE
+  bfix_tmp_file(ctx.out_fb, out);
+  ctx.out_fb = NULL;
+#endif
+  return ctx.out_fb;
+}
+
+#undef SORT_ASSERT_UNIQUE
+#undef SORT_DATA_SIZE
+#undef SORT_DELETE_INPUT
+#undef SORT_HASH_BITS
+#undef SORT_INPUT_FB
+#undef SORT_INPUT_FILE
+#undef SORT_INPUT_PIPE
+#undef SORT_INPUT_PRESORT
+#undef SORT_INT
+#undef SORT_INT64
+#undef SORT_INTERNAL_RADIX
+#undef SORT_KEY
+#undef SORT_KEY_REGULAR
+#undef SORT_KEY_SIZE
+#undef SORT_LONG_HASH
+#undef SORT_OUTPUT_FB
+#undef SORT_OUTPUT_FILE
+#undef SORT_OUTPUT_THIS_FB
+#undef SORT_PREFIX
+#undef SORT_UNIFY
+#undef SORT_UNIFY_WORKSPACE
+#undef SORT_UNIQUE
+#undef SORT_VAR_DATA
+#undef SORT_VAR_KEY
+#undef SWAP
+#undef LESS
+#undef P
diff --git a/lib/stkstring.c b/lib/stkstring.c

new file mode 100644 (file)

index 0000000..13bc8e0
--- /dev/null
+++ b/lib/stkstring.c
@@ -0,0 +1,126 @@
+#include "lib/lib.h"
+#include "lib/stkstring.h"
+
+#include <stdio.h>
+
+uns
+stk_array_len(char **s, uns cnt)
+{
+  uns l = 1;
+  while (cnt--)
+    l += strlen(*s++);
+  return l;
+}
+
+void
+stk_array_join(char *x, char **s, uns cnt, uns sep)
+{
+  while (cnt--)
+    {
+      uns l = strlen(*s);
+      memcpy(x, *s, l);
+      x += l;
+      s++;
+      if (sep && cnt)
+       *x++ = sep;
+    }
+  *x = 0;
+}
+
+uns
+stk_printf_internal(const char *fmt, ...)
+{
+  uns len = 256;
+  char *buf = alloca(len);
+  va_list args, args2;
+  va_start(args, fmt);
+  for (;;)
+    {
+      va_copy(args2, args);
+      int l = vsnprintf(buf, len, fmt, args2);
+      va_end(args2);
+      if (l < 0)
+       len *= 2;
+      else
+       {
+         va_end(args);
+         return l+1;
+       }
+      buf = alloca(len);
+    }
+}
+
+uns
+stk_vprintf_internal(const char *fmt, va_list args)
+{
+  uns len = 256;
+  char *buf = alloca(len);
+  va_list args2;
+  for (;;)
+    {
+      va_copy(args2, args);
+      int l = vsnprintf(buf, len, fmt, args2);
+      va_end(args2);
+      if (l < 0)
+       len *= 2;
+      else
+       {
+         va_end(args);
+         return l+1;
+       }
+      buf = alloca(len);
+    }
+}
+
+void
+stk_hexdump_internal(char *dst, const byte *src, uns n)
+{
+  for (uns i=0; i<n; i++)
+    {
+      if (i)
+       *dst++ = ' ';
+      dst += sprintf(dst, "%02x", *src++);
+    }
+  *dst = 0;
+}
+
+void
+stk_fsize_internal(char *buf, u64 x)
+{
+  if (x < 1<<10)
+    sprintf(buf, "%dB", (int)x);
+  else if (x < 10<<10)
+    sprintf(buf, "%.1fK", (double)x/(1<<10));
+  else if (x < 1<<20)
+    sprintf(buf, "%dK", (int)(x/(1<<10)));
+  else if (x < 10<<20)
+    sprintf(buf, "%.1fM", (double)x/(1<<20));
+  else if (x < 1<<30)
+    sprintf(buf, "%dM", (int)(x/(1<<20)));
+  else if (x < (u64)10<<30)
+    sprintf(buf, "%.1fG", (double)x/(1<<30));
+  else if (x != ~(u64)0)
+    sprintf(buf, "%dG", (int)(x/(1<<30)));
+  else
+    strcpy(buf, "unknown");
+}
+
+#ifdef TEST
+
+int main(void)
+{
+  char *a = stk_strndup("are!",3);
+  a = stk_strcat(a, " the ");
+  a = stk_strmulticat(a, stk_strdup("Jabberwock, "), "my", NULL);
+  char *arr[] = { a, " son" };
+  a = stk_strarraycat(arr, 2);
+  a = stk_printf("Bew%s!", a);
+  puts(a);
+  puts(stk_hexdump(a, 3));
+  char *ary[] = { "The", "jaws", "that", "bite" };
+  puts(stk_strjoin(ary, 4, ' '));
+  puts(stk_fsize(1234567));
+  return 0;
+}
+
+#endif
diff --git a/lib/stkstring.h b/lib/stkstring.h

new file mode 100644 (file)

index 0000000..d0d6ad7
--- /dev/null
+++ b/lib/stkstring.h
@@ -0,0 +1,38 @@
+/*
+ *     UCW Library -- Strings Allocated on the Stack
+ *
+ *     (c) 2005--2007 Martin Mares <mj@ucw.cz>
+ *     (c) 2005 Tomas Valla <tom@ucw.cz>
+ *     (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_STKSTRING_H
+#define _UCW_STKSTRING_H
+
+#include <alloca.h>
+#include <string.h>
+#include <stdio.h>
+
+#define stk_strdup(s) ({ const char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
+#define stk_strndup(s,n) ({ const char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
+#define stk_strcat(s1,s2) ({ const char *_s1=(s1); const char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
+#define stk_strmulticat(s...) ({ char *_s[]={s}; char *_x=alloca(stk_array_len(_s, ARRAY_SIZE(_s)-1)); stk_array_join(_x, _s, ARRAY_SIZE(_s)-1, 0); _x; })
+#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
+#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
+#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
+#define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
+#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(char*)(s),_n); _x; })
+#define stk_str_unesc(s) ({ const char *_s=(s); char *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
+
+uns stk_array_len(char **s, uns cnt);
+void stk_array_join(char *x, char **s, uns cnt, uns sep);
+uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
+uns stk_vprintf_internal(const char *x, va_list args);
+void stk_hexdump_internal(char *dst, const byte *src, uns n);
+void stk_fsize_internal(char *dst, u64 size);
+
+#endif
diff --git a/lib/stkstring.t b/lib/stkstring.t

new file mode 100644 (file)

index 0000000..bbb3eda
--- /dev/null
+++ b/lib/stkstring.t
@@ -0,0 +1,7 @@
+# Tests for stkstring modules
+
+Run:   ../obj/lib/stkstring-t
+Out:   Beware the Jabberwock, my son!
+       42 65 77
+       The jaws that bite
+       1.2M
diff --git a/lib/str-test.c b/lib/str-test.c

new file mode 100644 (file)

index 0000000..e289258
--- /dev/null
+++ b/lib/str-test.c
@@ -0,0 +1,132 @@
+/*
+ *     Checking the correctness of str_len() and hash_*() and proving, that
+ *     it is faster than the classical version ;-)
+ */
+
+#include "lib/hashfunc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+/* It will be divided by (10 + strlen()).  */
+#define TEST_TIME      1000000
+
+/* The shift of the string according to the alignment.  */
+static uns alignment = 0;
+
+static void
+random_string(byte *str, int len)
+{
+       int i;
+       for (i=0; i<len; i++)
+               str[i] = random() % 255 + 1;
+       str[len] = 0;
+}
+
+static uns
+elapsed_time(void)
+{
+       static struct timeval last_tv, tv;
+       uns elapsed;
+       gettimeofday(&tv, NULL);
+       elapsed = (tv.tv_sec - last_tv.tv_sec) * 1000000 + (tv.tv_usec - last_tv.tv_usec);
+       last_tv = tv;
+       return elapsed;
+}
+
+int
+main(int argc, char **argv)
+{
+       byte *strings[] = {
+               "",
+               "a",
+               "aa",
+               "aaa",
+               "aaaa",
+               "aaaaa",
+               "aaaaaa",
+               "aaaaaaa",
+               "aaaaaaaa",
+               "aaaaaaaaa",
+               "aaaaaaaaaa",
+               "AHOJ",
+               "\200aaaa",
+               "\200",
+               "\200\200",
+               "\200\200\200",
+               "\200\200\200\200",
+               "\200\200\200\200\200",
+               "kelapS treboR",
+               "Robert Spalek",
+               "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu",
+               "********************************",
+               "****************************************************************",
+               NULL
+       };
+       int lengths[] = {
+               0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+               11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+               30, 40, 50, 60, 70, 80, 90, 100,
+               200, 300, 400, 500, 600, 700, 800, 900, 1000,
+               2000, 4000, 8000, 16000, 32000, 64000,
+               -1
+       };
+       int i;
+       if (argc > 1)
+               alignment = atoi(argv[1]);
+       printf("Alignment set to %d\n", alignment);
+       for (i=0; strings[i]; i++)
+               if (strlen(strings[i]) != str_len(strings[i]))
+                       die("Internal str_len() error on string %d", i);
+       printf("%d strings tested OK\n", i);
+       for (i=0; strings[i]; i++)
+       {
+               uns h1, h2;
+               h1 = hash_string(strings[i]);
+               h2 = hash_string_nocase(strings[i]);
+               if (h1 != hash_block(strings[i], str_len(strings[i])))
+                       die("Internal hash_string() error on string %d", i);
+               printf("hash %2d = %08x %08x", i, h1, h2);
+               if (h1 == h2)
+                       printf(" upper case?");
+               printf("\n");
+       }
+       for (i=0; lengths[i] >= 0; i++)
+       {
+               byte str[lengths[i] + 1 + alignment];
+               uns count = TEST_TIME / (lengths[i] + 10);
+               uns el1 = 0, el2 = 0, elh = 0, elhn = 0;
+               uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0;
+               uns j;
+               for (j=0; j<count; j++)
+               {
+                       random_string(str + alignment, lengths[i]);
+                       elapsed_time();
+                       /* Avoid "optimizing" by gcc, since the functions are
+                        * attributed PURE.  */
+                       tot1 += strlen(str + alignment);
+                       el1 += elapsed_time();
+                       tot2 += str_len(str + alignment);
+                       el2 += elapsed_time();
+                       hash ^= hash_string(str + alignment);
+                       elh += elapsed_time();
+                       hashn ^= hash_string_nocase(str + alignment);
+                       elhn += elapsed_time();
+               }
+               if (tot1 != tot2)
+                       die("Internal error during test %d", i);
+               printf("Test %d: strlen = %d, passes = %d, classical = %d usec, speedup = %.4f\n",
+                       i, lengths[i], count, el1, (el1 + 0.) / el2);
+               printf("\t\t total hash = %08x/%08x, hash time = %d/%d usec\n", hash, hashn, elh, elhn);
+       }
+/*
+       printf("test1: %d\n", hash_modify(10000000, 10000000, 99777555));
+       printf("test1: %d, %d\n", i, hash_modify(i, lengths[i-2], 99777333));
+       printf("test1: %d, %d\n", i, hash_modify(lengths[i-2], i, 99777333));
+       printf("test1: %d,%d,%d->%d\n", i, i*3-2, i*i, hash_modify(4587, i*3-2, i*i));
+       printf("test1: %d\n", hash_modify(lengths[5], 345, i));
+*/
+       return 0;
+}
diff --git a/lib/str_ctype.c b/lib/str_ctype.c

new file mode 100644 (file)

index 0000000..2857d8e
--- /dev/null
+++ b/lib/str_ctype.c
@@ -0,0 +1,16 @@
+/*
+ *     UCW Library -- Character Classes
+ *
+ *     (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_cat[256] = {
+#define CHAR(code,upper,lower,cat) cat,
+#include "lib/charmap.h"
+#undef CHAR
+};
diff --git a/lib/str_lower.c b/lib/str_lower.c

new file mode 100644 (file)

index 0000000..f548a11
--- /dev/null
+++ b/lib/str_lower.c
@@ -0,0 +1,16 @@
+/*
+ *     UCW Library -- Lowercase Map
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_lower[256] = {
+#define CHAR(code,upper,lower,cat) lower,
+#include "lib/charmap.h"
+#undef CHAR
+};
diff --git a/lib/str_upper.c b/lib/str_upper.c

new file mode 100644 (file)

index 0000000..e527956
--- /dev/null
+++ b/lib/str_upper.c
@@ -0,0 +1,16 @@
+/*
+ *     UCW Library -- Uppercase Map
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/chartype.h"
+
+const unsigned char _c_upper[256] = {
+#define CHAR(code,upper,lower,cat) upper,
+#include "lib/charmap.h"
+#undef CHAR
+};
diff --git a/lib/string.c b/lib/string.c

new file mode 100644 (file)

index 0000000..602a7d7
--- /dev/null
+++ b/lib/string.c
@@ -0,0 +1,90 @@
+/*
+ *     UCW Library -- String Routines
+ *
+ *     (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+#include <stdlib.h>
+
+/* Expands C99-like escape sequences.
+ * It is safe to use the same buffer for both input and output. */
+char *
+str_unesc(char *d, const char *s)
+{
+  while (*s)
+    {
+      if (*s == '\\')
+       switch (s[1])
+         {
+           case 'a': *d++ = '\a'; s += 2; break;
+           case 'b': *d++ = '\b'; s += 2; break;
+           case 'f': *d++ = '\f'; s += 2; break;
+           case 'n': *d++ = '\n'; s += 2; break;
+           case 'r': *d++ = '\r'; s += 2; break;
+           case 't': *d++ = '\t'; s += 2; break;
+           case 'v': *d++ = '\v'; s += 2; break;
+           case '\?': *d++ = '\?'; s += 2; break;
+           case '\'': *d++ = '\''; s += 2; break;
+           case '\"': *d++ = '\"'; s += 2; break;
+           case '\\': *d++ = '\\'; s += 2; break;
+           case 'x':
+             if (!Cxdigit(s[2]))
+               {
+                 s++;
+                 DBG("\\x used with no following hex digits");
+               }
+             else
+               {
+                 char *p;
+                 uns v = strtoul(s + 2, &p, 16);
+                 if (v <= 255)
+                   *d++ = v;
+                 else
+                   DBG("hex escape sequence out of range");
+                  s = (char *)p;
+               }
+             break;
+            default:
+             if (s[1] >= '0' && s[1] <= '7')
+               {
+                 uns v = s[1] - '0';
+                 s += 2;
+                 for (uns i = 0; i < 2 && *s >= '0' && *s <= '7'; s++, i++)
+                   v = (v << 3) + *s - '0';
+                 if (v <= 255)
+                   *d++ = v;
+                 else
+                   DBG("octal escape sequence out of range");
+               }
+             *d++ = *s++;
+             break;
+         }
+      else
+       *d++ = *s++;
+    }
+  *d = 0;
+  return d;
+}
+
+char *
+str_format_flags(char *dest, const char *fmt, uns flags)
+{
+  char *start = dest;
+  for (uns i=0; fmt[i]; i++)
+    {
+      if (flags & (1 << i))
+       *dest++ = fmt[i];
+      else
+       *dest++ = '-';
+    }
+  *dest = 0;
+  return start;
+}
diff --git a/lib/sync.c b/lib/sync.c

new file mode 100644 (file)

index 0000000..6341415
--- /dev/null
+++ b/lib/sync.c
@@ -0,0 +1,28 @@
+/*
+ *     UCW Library -- Syncing Directories
+ *
+ *     (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+sync_dir(const char *name)
+{
+  int fd = open(name, O_RDONLY
+#ifdef CONFIG_LINUX
+               | O_DIRECTORY
+#endif
+);
+  if (fd < 0)
+    goto err;
+  int err = fsync(fd);
+  close(fd);
+  if (err >= 0)
+    return;
+ err:
+  msg(L_ERROR, "Unable to sync directory %s: %m", name);
+}
diff --git a/lib/threads-conf.c b/lib/threads-conf.c

new file mode 100644 (file)

index 0000000..f3ac5a2
--- /dev/null
+++ b/lib/threads-conf.c
@@ -0,0 +1,27 @@
+/*
+ *     The UCW Library -- Threading Helpers
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/conf.h"
+
+uns default_thread_stack_size = 65556;
+
+static struct cf_section threads_config = {
+  CF_ITEMS {
+    CF_UNS("DefaultStackSize", &default_thread_stack_size),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR
+ucwlib_threads_conf_init(void)
+{
+  cf_declare_section("Threads", &threads_config, 0);
+}
diff --git a/lib/threads.c b/lib/threads.c

new file mode 100644 (file)

index 0000000..c7497f9
--- /dev/null
+++ b/lib/threads.c
@@ -0,0 +1,123 @@
+/*
+ *     The UCW Library -- Threading Helpers
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+
+#ifdef CONFIG_UCW_THREADS
+
+#include <pthread.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef __NR_gettid
+static pid_t
+gettid(void)
+{
+  return syscall(__NR_gettid);
+}
+#define CONFIG_USE_GETTID
+#endif
+#endif
+
+static pthread_key_t ucwlib_context_key;
+static pthread_mutex_t ucwlib_master_mutex;
+
+static void
+ucwlib_free_thread_context(void *p)
+{
+  xfree(p);
+}
+
+static void CONSTRUCTOR
+ucwlib_threads_init(void)
+{
+  if (pthread_key_create(&ucwlib_context_key, ucwlib_free_thread_context) < 0)
+    die("Cannot create pthread_key: %m");
+  pthread_mutex_init(&ucwlib_master_mutex, NULL);
+}
+
+static int
+ucwlib_tid(void)
+{
+  static int tid_counter;
+  int tid;
+
+#ifdef CONFIG_USE_GETTID
+  tid = gettid();
+  if (tid > 0)
+    return tid;
+  /* The syscall might be unimplemented */
+#endif
+
+  ucwlib_lock();
+  tid = ++tid_counter;
+  ucwlib_unlock();
+  return tid;
+}
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+  struct ucwlib_context *c = pthread_getspecific(ucwlib_context_key);
+  if (!c)
+    {
+      c = xmalloc_zero(sizeof(*c));
+      c->thread_id = ucwlib_tid();
+      pthread_setspecific(ucwlib_context_key, c);
+    }
+  return c;
+}
+
+void
+ucwlib_lock(void)
+{
+  pthread_mutex_lock(&ucwlib_master_mutex);
+}
+
+void
+ucwlib_unlock(void)
+{
+  pthread_mutex_unlock(&ucwlib_master_mutex);
+}
+
+#else
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+  static struct ucwlib_context ucwlib_context;
+  return &ucwlib_context;
+}
+
+void
+ucwlib_lock(void)
+{
+}
+
+void
+ucwlib_unlock(void)
+{
+}
+
+#endif
+
+#ifdef TEST
+
+int main(void)
+{
+  ucwlib_lock();
+  ucwlib_unlock();
+  log(L_INFO, "tid=%d", ucwlib_thread_context()->thread_id);
+  return 0;
+}
+
+#endif
diff --git a/lib/threads.h b/lib/threads.h

new file mode 100644 (file)

index 0000000..c70354f
--- /dev/null
+++ b/lib/threads.h
@@ -0,0 +1,35 @@
+/*
+ *     The UCW Library -- Threading Helpers
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_THREAD_H
+#define _UCW_THREAD_H
+
+/* This structure holds per-thread data */
+
+struct ucwlib_context {
+  int thread_id;                       // Thread ID (either kernel tid or a counter)
+  int temp_counter;                    // Counter for fb-temp.c
+  struct asio_queue *io_queue;         // Async I/O queue for fb-direct.c
+  sh_sighandler_t *signal_handlers;    // Signal handlers for sighandler.c
+};
+
+struct ucwlib_context *ucwlib_thread_context(void);
+
+/* Global lock used for initialization, cleanup and other not so frequently accessed global state */
+
+void ucwlib_lock(void);
+void ucwlib_unlock(void);
+
+#ifdef CONFIG_UCW_THREADS
+
+extern uns default_thread_stack_size;
+
+#endif
+
+#endif
diff --git a/lib/timer.c b/lib/timer.c

new file mode 100644 (file)

index 0000000..761ab30
--- /dev/null
+++ b/lib/timer.c
@@ -0,0 +1,43 @@
+/*
+ *     UCW Library -- A Simple Millisecond Timer
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+timestamp_t
+get_timestamp(void)
+{
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+}
+
+void
+init_timer(timestamp_t *timer)
+{
+  *timer = get_timestamp();
+}
+
+uns
+get_timer(timestamp_t *timer)
+{
+  timestamp_t t = *timer;
+  *timer = get_timestamp();
+  return MIN(*timer-t, ~0U);
+}
+
+uns
+switch_timer(timestamp_t *old, timestamp_t *new)
+{
+  *new = get_timestamp();
+  return MIN(*new-*old, ~0U);
+}
diff --git a/lib/unaligned.h b/lib/unaligned.h

new file mode 100644 (file)

index 0000000..62a6643
--- /dev/null
+++ b/lib/unaligned.h
@@ -0,0 +1,174 @@
+/*
+ *     UCW Library -- Fast Access to Unaligned Data
+ *
+ *     (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNALIGNED_H
+#define _UCW_UNALIGNED_H
+
+/* Big endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_be(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_be(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_be(const void *p) { return *(u64 *)p; }
+static inline void put_u16_be(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_be(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_be(void *p, u64 x) { *(u64 *)p = x; }
+#else
+static inline uns get_u16_be(const void *p)
+{
+  const byte *c = p;
+  return (c[0] << 8) | c[1];
+}
+static inline u32 get_u32_be(const void *p)
+{
+  const byte *c = p;
+  return (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
+}
+static inline u64 get_u64_be(const void *p)
+{
+  return ((u64) get_u32_be(p) << 32) | get_u32_be((const byte *)p+4);
+}
+static inline void put_u16_be(void *p, uns x)
+{
+  byte *c = p;
+  c[0] = x >> 8;
+  c[1] = x;
+}
+static inline void put_u32_be(void *p, u32 x)
+{
+  byte *c = p;
+  c[0] = x >> 24;
+  c[1] = x >> 16;
+  c[2] = x >> 8;
+  c[3] = x;
+}
+static inline void put_u64_be(void *p, u64 x)
+{
+  put_u32_be(p, x >> 32);
+  put_u32_be((byte *)p+4, x);
+}
+#endif
+
+/* Little-endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && !defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_le(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_le(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_le(const void *p) { return *(u64 *)p; }
+static inline void put_u16_le(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_le(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_le(void *p, u64 x) { *(u64 *)p = x; }
+#else
+static inline uns get_u16_le(const void *p)
+{
+  const byte *c = p;
+  return c[0] | (c[1] << 8);
+}
+static inline u32 get_u32_le(const void *p)
+{
+  const byte *c = p;
+  return c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
+}
+static inline u64 get_u64_le(const void *p)
+{
+  return get_u32_le(p) | ((u64) get_u32_le((const byte *)p+4) << 32);
+}
+static inline void put_u16_le(void *p, uns x)
+{
+  byte *c = p;
+  c[0] = x;
+  c[1] = x >> 8;
+}
+static inline void put_u32_le(void *p, u32 x)
+{
+  byte *c = p;
+  c[0] = x;
+  c[1] = x >> 8;
+  c[2] = x >> 16;
+  c[3] = x >> 24;
+}
+static inline void put_u64_le(void *p, u64 x)
+{
+  put_u32_le(p, x);
+  put_u32_le((byte *)p+4, x >> 32);
+}
+#endif
+
+static inline u64 get_u40_be(const void *p)
+{
+  const byte *c = p;
+  return ((u64)c[0] << 32) | get_u32_be(c+1);
+}
+
+static inline void put_u40_be(void *p, u64 x)
+{
+  byte *c = p;
+  c[0] = x >> 32;
+  put_u32_be(c+1, x);
+}
+
+static inline u64 get_u40_le(const void *p)
+{
+  const byte *c = p;
+  return get_u32_le(c) | ((u64) c[4] << 32);
+}
+
+static inline void put_u40_le(void *p, u64 x)
+{
+  byte *c = p;
+  put_u32_le(c, x);
+  c[4] = x >> 32;
+}
+
+/* The native format */
+
+#ifdef CPU_BIG_ENDIAN
+
+static inline uns get_u16(const void *p) { return get_u16_be(p); }
+static inline u32 get_u32(const void *p) { return get_u32_be(p); }
+static inline u64 get_u64(const void *p) { return get_u64_be(p); }
+static inline u64 get_u40(const void *p) { return get_u40_be(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_be(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_be(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_be(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_be(p, x); }
+
+#else
+
+static inline uns get_u16(const void *p) { return get_u16_le(p); }
+static inline u32 get_u32(const void *p) { return get_u32_le(p); }
+static inline u64 get_u64(const void *p) { return get_u64_le(p); }
+static inline u64 get_u40(const void *p) { return get_u40_le(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_le(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_le(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_le(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_le(p, x); }
+
+#endif
+
+/* Just for completeness */
+
+static inline uns get_u8(const void *p) { return *(const byte *)p; }
+static inline void put_u8(void *p, uns x) { *(byte *)p = x; }
+
+/* Backward compatibility macros */
+
+#define GET_U8(p) get_u8(p)
+#define GET_U16(p) get_u16(p)
+#define GET_U32(p) get_u32(p)
+#define GET_U64(p) get_u64(p)
+#define GET_U40(p) get_u40(p)
+
+#define PUT_U8(p,x) put_u8(p,x);
+#define PUT_U16(p,x) put_u16(p,x)
+#define PUT_U32(p,x) put_u32(p,x)
+#define PUT_U64(p,x) put_u64(p,x)
+#define PUT_U40(p,x) put_u40(p,x)
+
+#endif
diff --git a/lib/unicode.c b/lib/unicode.c

new file mode 100644 (file)

index 0000000..c5ffb82
--- /dev/null
+++ b/lib/unicode.c
@@ -0,0 +1,143 @@
+/*
+ *     UCW Library -- UTF-8 Functions
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2003 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/unicode.h"
+
+uns
+utf8_strlen(const byte *str)
+{
+  uns len = 0;
+  while (*str)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
+
+uns
+utf8_strnlen(const byte *str, uns n)
+{
+  uns len = 0;
+  const byte *end = str + n;
+  while (str < end)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
+
+#ifdef TEST
+
+#include <string.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+  byte buf[256];
+
+#define FUNCS \
+  F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
+  F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
+
+  enum {
+#define F(x) FUNC_##x,
+    FUNCS
+#undef F
+  };
+  char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+    FUNCS
+#undef F
+  };
+
+  uns func = ~0U;
+  if (argc > 1)
+    for (uns i = 0; i < ARRAY_SIZE(names); i++)
+      if (!strcasecmp(names[i], argv[1]))
+       func = i;
+  if (!~func)
+    {
+      fprintf(stderr, "Invalid usage!\n");
+      return 1;
+    }
+
+  if (func < FUNC_UTF8_PUT)
+    {
+      byte *p = buf, *q = buf, *last;
+      uns u;
+      bzero(buf, sizeof(buf));
+      while (scanf("%x", &u) == 1)
+       *q++ = u;
+      while (p < q)
+       {
+         last = p;
+         if (p != buf)
+           putchar(' ');
+         switch (func)
+           {
+             case FUNC_UTF8_GET:
+               p = utf8_get(p, &u);
+               break;
+             case FUNC_UTF8_32_GET:
+               p = utf8_32_get(p, &u);
+               break;
+             case FUNC_UTF16_BE_GET:
+               p = utf16_be_get(p, &u);
+               break;
+             case FUNC_UTF16_LE_GET:
+               p = utf16_le_get(p, &u);
+               break;
+             default:
+               ASSERT(0);
+           }
+         printf("%04x", u);
+         ASSERT(last < p && p <= q);
+       }
+      putchar('\n');
+    }
+  else
+    {
+      uns u, i=0;
+      while (scanf("%x", &u) == 1)
+       {
+         byte *p = buf, *q = buf;
+         switch (func)
+           {
+             case FUNC_UTF8_PUT:
+               p = utf8_put(p, u);
+               break;
+             case FUNC_UTF8_32_PUT:
+               p = utf8_32_put(p, u);
+               break;
+             case FUNC_UTF16_BE_PUT:
+               p = utf16_be_put(p, u);
+               break;
+             case FUNC_UTF16_LE_PUT:
+               p = utf16_le_put(p, u);
+               break;
+             default:
+               ASSERT(0);
+           }
+         while (q < p)
+           {
+             if (i++)
+               putchar(' ');
+             printf("%02x", *q++);
+           }
+       }
+      putchar('\n');
+    }
+  return 0;
+}
+
+#endif
diff --git a/lib/unicode.h b/lib/unicode.h

new file mode 100644 (file)

index 0000000..9a3fe07
--- /dev/null
+++ b/lib/unicode.h
@@ -0,0 +1,334 @@
+/*
+ *     UCW Library -- Unicode Characters
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *     (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNICODE_H
+#define _UCW_UNICODE_H
+
+#include "lib/unaligned.h"
+
+/* Macros for handling UTF-8 */
+
+#define UNI_REPLACEMENT 0xfffc
+
+/* Encode a character from the basic multilingual plane [0, 0xFFFF]
+ * (subset of Unicode 4.0); up to 3 bytes needed (RFC2279) */
+static inline byte *
+utf8_put(byte *p, uns u)
+{
+  if (u < 0x80)
+    *p++ = u;
+  else if (u < 0x800)
+    {
+      *p++ = 0xc0 | (u >> 6);
+      *p++ = 0x80 | (u & 0x3f);
+    }
+  else
+    {
+      ASSERT(u < 0x10000);
+      *p++ = 0xe0 | (u >> 12);
+      *p++ = 0x80 | ((u >> 6) & 0x3f);
+      *p++ = 0x80 | (u & 0x3f);
+    }
+  return p;
+}
+
+/* Encode a value from the range [0, 0x7FFFFFFF];
+ * (superset of Unicode 4.0) up to 6 bytes needed (RFC2279) */
+static inline byte *
+utf8_32_put(byte *p, uns u)
+{
+  if (u < 0x80)
+    *p++ = u;
+  else if (u < 0x800)
+    {
+      *p++ = 0xc0 | (u >> 6);
+      goto put1;
+    }
+  else if (u < (1<<16))
+    {
+      *p++ = 0xe0 | (u >> 12);
+      goto put2;
+    }
+  else if (u < (1<<21))
+    {
+      *p++ = 0xf0 | (u >> 18);
+      goto put3;
+    }
+  else if (u < (1<<26))
+    {
+      *p++ = 0xf8 | (u >> 24);
+      goto put4;
+    }
+  else if (u < (1U<<31))
+    {
+      *p++ = 0xfc | (u >> 30);
+      *p++ = 0x80 | ((u >> 24) & 0x3f);
+put4: *p++ = 0x80 | ((u >> 18) & 0x3f);
+put3: *p++ = 0x80 | ((u >> 12) & 0x3f);
+put2: *p++ = 0x80 | ((u >> 6) & 0x3f);
+put1: *p++ = 0x80 | (u & 0x3f);
+    }
+  else
+    ASSERT(0);
+  return p;
+}
+
+#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
+
+/* Decode a character from the basic multilingual plane [0, 0xFFFF]
+ * or return 'repl' if the encoding has been corrupted */
+static inline byte *
+utf8_get_repl(const byte *p, uns *uu, uns repl)
+{
+  uns u = *p++;
+  if (u < 0x80)
+    ;
+  else if (unlikely(u < 0xc0))
+    {
+      /* Incorrect byte sequence */
+    bad:
+      u = repl;
+    }
+  else if (u < 0xe0)
+    {
+      u &= 0x1f;
+      UTF8_GET_NEXT;
+    }
+  else if (likely(u < 0xf0))
+    {
+      u &= 0x0f;
+      UTF8_GET_NEXT;
+      UTF8_GET_NEXT;
+    }
+  else
+    goto bad;
+  *uu = u;
+  return (byte *)p;
+}
+
+/* Decode a value from the range [0, 0x7FFFFFFF] 
+ * or return 'repl' if the encoding has been corrupted */
+static inline byte *
+utf8_32_get_repl(const byte *p, uns *uu, uns repl)
+{
+  uns u = *p++;
+  if (u < 0x80)
+    ;
+  else if (unlikely(u < 0xc0))
+    {
+      /* Incorrect byte sequence */
+    bad:
+      u = repl;
+    }
+  else if (u < 0xe0)
+    {
+      u &= 0x1f;
+      goto get1;
+    }
+  else if (u < 0xf0)
+    {
+      u &= 0x0f;
+      goto get2;
+    }
+  else if (u < 0xf8)
+    {
+      u &= 0x07;
+      goto get3;
+    }
+  else if (u < 0xfc)
+    {
+      u &= 0x03;
+      goto get4;
+    }
+  else if (u < 0xfe)
+    {
+      u &= 0x01;
+      UTF8_GET_NEXT;
+get4: UTF8_GET_NEXT;
+get3: UTF8_GET_NEXT;
+get2: UTF8_GET_NEXT;
+get1: UTF8_GET_NEXT;
+    }
+  else
+    goto bad;
+  *uu = u;
+  return (byte *)p;
+}
+
+/* Decode a character from the basic multilingual plane [0, 0xFFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline byte *
+utf8_get(const byte *p, uns *uu)
+{
+  return utf8_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+/* Decode a value from the range [0, 0x7FFFFFFF] 
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline byte *
+utf8_32_get(const byte *p, uns *uu)
+{
+  return utf8_32_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+#define PUT_UTF8(p,u) p = utf8_put(p, u)
+#define GET_UTF8(p,u) p = (byte*)utf8_get(p, &(u))
+
+#define PUT_UTF8_32(p,u) p = utf8_32_put(p, u)
+#define GET_UTF8_32(p,u) p = (byte*)utf8_32_get(p, &(u))
+
+#define UTF8_SKIP(p) do {                              \
+    uns c = *p++;                                      \
+    if (c >= 0xc0)                                     \
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)      \
+        p++, c <<= 1;                                  \
+  } while (0)
+
+#define UTF8_SKIP_BWD(p) while ((*--(p) & 0xc0) == 0x80)
+
+static inline uns
+utf8_space(uns u)
+{
+  if (u < 0x80)
+    return 1;
+  if (u < 0x800)
+    return 2;
+  if (u < (1<<16))
+    return 3;
+  if (u < (1<<21))
+    return 4;
+  if (u < (1<<26))
+    return 5;
+  return 6;
+}
+
+static inline uns
+utf8_encoding_len(uns c)
+{
+  if (c < 0x80)
+    return 1;
+  ASSERT(c >= 0xc0 && c < 0xfe);
+  if (c < 0xe0)
+    return 2;
+  if (c < 0xf0)
+    return 3;
+  if (c < 0xf8)
+    return 4;
+  if (c < 0xfc)
+    return 5;
+  return 6;
+}
+
+/* Encode a character from the range [0, 0xD7FF] or [0xE000,0x11FFFF];
+ * up to 4 bytes needed */
+static inline void *
+utf16_le_put(void *p, uns u)
+{
+  if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+    {
+      put_u16_le(p, u);
+      return p + 2;
+    }
+  else if ((u -= 0x10000) < 0x100000)
+    {
+      put_u16_le(p, 0xd800 | (u >> 10));
+      put_u16_le(p + 2, 0xdc00 | (u & 0x3ff));
+      return p + 4;
+    }
+  else
+    ASSERT(0);
+}
+
+static inline void *
+utf16_be_put(void *p, uns u)
+{
+  if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+    {
+      put_u16_be(p, u);
+      return p + 2;
+    }
+  else if ((u -= 0x10000) < 0x100000)
+    {
+      put_u16_be(p, 0xd800 | (u >> 10));
+      put_u16_be(p + 2, 0xdc00 | (u & 0x3ff));
+      return p + 4;
+    }
+  else
+    ASSERT(0);
+}
+
+/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
+ * or return `repl' if the encoding has been corrupted */
+static inline void *
+utf16_le_get_repl(const void *p, uns *uu, uns repl)
+{
+  uns u = get_u16_le(p), x, y;
+  x = u - 0xd800;
+  if (x < 0x800)
+    if (x < 0x400 && (y = get_u16_le(p + 2) - 0xdc00) < 0x400)
+      {
+       u = 0x10000 + (x << 10) + y;
+       p += 2;
+      }
+    else
+      u = repl;
+  *uu = u;
+  return (void *)(p + 2);
+}
+
+static inline void *
+utf16_be_get_repl(const void *p, uns *uu, uns repl)
+{
+  uns u = get_u16_be(p), x, y;
+  x = u - 0xd800;
+  if (x < 0x800)
+    if (x < 0x400 && (y = get_u16_be(p + 2) - 0xdc00) < 0x400)
+      {
+       u = 0x10000 + (x << 10) + y;
+       p += 2;
+      }
+    else
+      u = repl;
+  *uu = u;
+  return (void *)(p + 2);
+}
+
+/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline void *
+utf16_le_get(const void *p, uns *uu)
+{
+  return utf16_le_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+static inline void *
+utf16_be_get(const void *p, uns *uu)
+{
+  return utf16_be_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+static inline uns
+unicode_sanitize_char(uns u)
+{
+  if (u >= 0x10000 ||                  // We don't accept anything outside the basic plane
+      u >= 0xd800 && u < 0xf900 ||     // neither we do surrogates
+      u >= 0x80 && u < 0xa0 ||         // nor latin-1 control chars
+      u < 0x20 && u != '\t')
+    return UNI_REPLACEMENT;
+  return u;
+}
+
+/* unicode-utf8.c */
+
+uns utf8_strlen(const byte *str);
+uns utf8_strnlen(const byte *str, uns n);
+
+#endif
diff --git a/lib/unicode.t b/lib/unicode.t

new file mode 100644 (file)

index 0000000..1b5549e
--- /dev/null
+++ b/lib/unicode.t
@@ -0,0 +1,71 @@
+# Tests for the Unicode module
+
+Name:  utf8_put (1)
+Run:   ../obj/lib/unicode-t utf8_put
+In:    0041 0048 004f 004a
+Out:   41 48 4f 4a
+
+Name:  utf8_put (2)
+Run:   ../obj/lib/unicode-t utf8_put
+In:    00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+Out:   c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+
+Name:  utf8_get (1)
+Run:   ../obj/lib/unicode-t utf8_get
+In:    41 48 4f 4a
+Out:   0041 0048 004f 004a
+
+Name:  utf8_get (2)
+Run:   ../obj/lib/unicode-t utf8_get
+In:    c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+Out:   00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+
+Name:  utf8_get (3)
+Run:   ../obj/lib/unicode-t utf8_get
+In:    84 ff f9 f8 c2 aa 41
+Out:   fffc fffc fffc fffc 00aa 0041
+
+Name:  utf8_32_put
+Run:   ../obj/lib/unicode-t utf8_32_put
+In:    15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+Out:   f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+
+Name:  utf8_32_get (1)
+Run:   ../obj/lib/unicode-t utf8_32_get
+In:    f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+Out:   15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+
+Name:  utf8_32_get (2)
+Run:   ../obj/lib/unicode-t utf8_32_get
+In:    fe 83 81
+Out:   fffc fffc fffc
+
+Name:  utf16_be_put
+Run:   ../obj/lib/unicode-t utf16_be_put
+In:    0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out:   00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name:  utf16_le_put
+Run:   ../obj/lib/unicode-t utf16_le_put
+In:    0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out:   41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name:  utf16_be_get (1)
+Run:   ../obj/lib/unicode-t utf16_be_get
+In:    00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out:   0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name:  utf16_be_get (2)
+Run:   ../obj/lib/unicode-t utf16_be_get
+In:    dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out:   fffc 2a5f fffc fffc 2a5f fffc
+
+Name:  utf16_le_get (1)
+Run:   ../obj/lib/unicode-t utf16_le_get
+In:    41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out:   0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name:  utf16_le_get (2)
+Run:   ../obj/lib/unicode-t utf16_le_get
+In:    1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out:   fffc 2a5f fffc fffc 2a5f fffc
diff --git a/lib/url.c b/lib/url.c

new file mode 100644 (file)

index 0000000..15a731f
--- /dev/null
+++ b/lib/url.c
@@ -0,0 +1,767 @@
+/*
+ *     UCW Library -- URL Functions
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2001--2005 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ *
+ *     The URL syntax corresponds to RFC 2396 with several exceptions:
+ *
+ *        o  Escaping of special characters still follows RFC 1738.
+ *        o  Interpretation of path parameters follows RFC 1808.
+ *
+ *     XXX: The buffer handling in this module is really horrible, but it works.
+ */
+
+#include "lib/lib.h"
+#include "lib/url.h"
+#include "lib/chartype.h"
+#include "lib/conf.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <alloca.h>
+
+/* Configuration */
+
+static uns url_ignore_spaces;
+static uns url_ignore_underflow;
+static char *url_component_separators = "";
+static uns url_min_repeat_count = 0x7fffffff;
+static uns url_max_repeat_length = 0;
+static uns url_max_occurences = ~0U;
+
+static struct cf_section url_config = {
+  CF_ITEMS {
+    CF_UNS("IgnoreSpaces", &url_ignore_spaces),
+    CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
+    CF_STRING("ComponentSeparators", &url_component_separators),
+    CF_UNS("MinRepeatCount", &url_min_repeat_count),
+    CF_UNS("MaxRepeatLength", &url_max_repeat_length),
+    CF_UNS("MaxOccurences", &url_max_occurences),
+    CF_END
+  }
+};
+
+static void CONSTRUCTOR url_init_config(void)
+{
+  cf_declare_section("URL", &url_config, 0);
+}
+
+/* Escaping and de-escaping */
+
+static uns
+enhex(uns x)
+{
+  return (x<10) ? (x + '0') : (x - 10 + 'A');
+}
+
+int
+url_deescape(const byte *s, byte *d)
+{
+  byte *dstart = d;
+  byte *end = d + MAX_URL_SIZE - 10;
+  while (*s)
+    {
+      if (d >= end)
+       return URL_ERR_TOO_LONG;
+      if (*s == '%')
+       {
+         unsigned int val;
+         if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
+           return URL_ERR_INVALID_ESCAPE;
+         val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
+         if (val < 0x20)
+           return URL_ERR_INVALID_ESCAPED_CHAR;
+         switch (val)
+           {
+           case ';':
+             val = NCC_SEMICOLON; break;
+           case '/':
+             val = NCC_SLASH; break;
+           case '?':
+             val = NCC_QUEST; break;
+           case ':':
+             val = NCC_COLON; break;
+           case '@':
+             val = NCC_AT; break;
+           case '=':
+             val = NCC_EQUAL; break;
+           case '&':
+             val = NCC_AND; break;
+           case '#':
+             val = NCC_HASH; break;
+           }
+         *d++ = val;
+         s += 3;
+       }
+      else if (*s > 0x20)
+       *d++ = *s++;
+      else if (Cspace(*s))
+       {
+         const byte *s0 = s;
+         while (Cspace(*s))
+           s++;
+         if (!url_ignore_spaces || !(!*s || d == dstart))
+           {
+             while (Cspace(*s0))
+               {
+                 if (d >= end)
+                   return URL_ERR_TOO_LONG;
+                 *d++ = *s0++;
+               }
+           }
+       }
+      else
+       return URL_ERR_INVALID_CHAR;
+    }
+  *d = 0;
+  return 0;
+}
+
+int
+url_enescape(const byte *s, byte *d)
+{
+  byte *end = d + MAX_URL_SIZE - 10;
+  unsigned int c;
+
+  while (c = *s)
+    {
+      if (d >= end)
+       return URL_ERR_TOO_LONG;
+      if (Calnum(c) ||                                                 /* RFC 1738(2.2): Only alphanumerics ... */
+         c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||   /* ... and several other exceptions ... */
+         c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
+         c == ',' ||
+         c == '/' || c == '?' || c == ':' || c == '@' ||               /* ... and reserved chars used for reserved purpose */
+         c == '=' || c == '&' || c == '#' || c == ';')
+       *d++ = *s++;
+      else
+       {
+         uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s;
+         *d++ = '%';
+         *d++ = enhex(val >> 4);
+         *d++ = enhex(val & 0x0f);
+         s++;
+       }
+    }
+  *d = 0;
+  return 0;
+}
+
+int
+url_enescape_friendly(const byte *src, byte *dest)
+{
+  byte *end = dest + MAX_URL_SIZE - 10;
+  while (*src)
+    {
+      if (dest >= end)
+       return URL_ERR_TOO_LONG;
+      if (*src < NCC_MAX)
+       *dest++ = NCC_CHARS[*src++];
+      else if (*src >= 0x20 && *src < 0x7f)
+       *dest++ = *src++;
+      else
+       {
+         *dest++ = '%';
+         *dest++ = enhex(*src >> 4);
+         *dest++ = enhex(*src++ & 0x0f);
+       }
+    }
+  *dest = 0;
+  return 0;
+}
+
+/* Split an URL (several parts may be copied to the destination buffer) */
+
+byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
+static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
+
+uns
+identify_protocol(const byte *p)
+{
+  uns i;
+
+  for(i=1; i<URL_PROTO_MAX; i++)
+    if (!strcasecmp(p, url_proto_names[i]))
+      return i;
+  return URL_PROTO_UNKNOWN;
+}
+
+int
+url_split(byte *s, struct url *u, byte *d)
+{
+  bzero(u, sizeof(struct url));
+  u->port = ~0;
+  u->bufend = d + MAX_URL_SIZE - 10;
+
+  if (s[0] != '/')                     /* Seek for "protocol:" */
+    {
+      byte *p = s;
+      while (*p && Calnum(*p))
+       p++;
+      if (p != s && *p == ':')
+       {
+         u->protocol = d;
+         while (s < p)
+           *d++ = *s++;
+         *d++ = 0;
+         u->protoid = identify_protocol(u->protocol);
+         s++;
+         if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
+           {
+             /* The protocol requires complete host spec, but it's missing -> treat as a relative path instead */
+             int len = d - u->protocol;
+             d -= len;
+             s -= len;
+             u->protocol = NULL;
+             u->protoid = 0;
+           }
+       }
+    }
+
+  if (s[0] == '/')                     /* Host spec or absolute path */
+    {
+      if (s[1] == '/')                 /* Host spec */
+       {
+         byte *q, *e;
+         byte *at = NULL;
+         char *ep;
+
+         s += 2;
+         q = d;
+         while (*s && *s != '/' && *s != '?')  /* Copy user:passwd@host:port */
+           {
+             if (*s != '@')
+               *d++ = *s;
+             else if (!at)
+               {
+                 *d++ = 0;
+                 at = d;
+               }
+             else                      /* This shouldn't happen with sane URL's, but we need to be sure */
+               *d++ = NCC_AT;
+             s++;
+           }
+         *d++ = 0;
+         if (at)                       /* user:passwd present */
+           {
+             u->user = q;
+             if (e = strchr(q, ':'))
+               {
+                 *e++ = 0;
+                 u->pass = e;
+               }
+           }
+         else
+           at = q;
+         e = strchr(at, ':');
+         if (e)                        /* host:port present */
+           {
+             uns p;
+             *e++ = 0;
+             p = strtoul(e, &ep, 10);
+             if (ep && *ep || p > 65535)
+               return URL_ERR_INVALID_PORT;
+             else if (p)               /* Port 0 (e.g. in :/) is treated as default port */
+               u->port = p;
+           }
+         u->host = at;
+       }
+    }
+
+  u->rest = s;
+  u->buf = d;
+  return 0;
+}
+
+/* Normalization according to given base URL */
+
+static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
+
+static int
+relpath_merge(struct url *u, struct url *b)
+{
+  byte *a = u->rest;
+  byte *o = b->rest;
+  byte *d = u->buf;
+  byte *e = u->bufend;
+  byte *p;
+
+  if (a[0] == '/')                     /* Absolute path => OK */
+    return 0;
+  if (o[0] != '/' && o[0] != '?')
+    return URL_PATH_UNDERFLOW;
+
+  if (!a[0])                           /* Empty URL -> inherit everything */
+    {
+      u->rest = b->rest;
+      return 0;
+    }
+
+  u->rest = d;                         /* We know we'll need to copy the path somewhere else */
+
+  if (a[0] == '#')                     /* Another fragment */
+    {
+      for(p=o; *p && *p != '#'; p++)
+       ;
+      goto copy;
+    }
+  if (a[0] == '?')                     /* New query */
+    {
+      for(p=o; *p && *p != '#' && *p != '?'; p++)
+       ;
+      goto copy;
+    }
+  if (a[0] == ';')                     /* Change parameters */
+    {
+      for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
+       ;
+      goto copy;
+    }
+
+  p = NULL;                            /* Copy original path and find the last slash */
+  while (*o && *o != ';' && *o != '?' && *o != '#')
+    {
+      if (d >= e)
+       return URL_ERR_TOO_LONG;
+      if ((*d++ = *o++) == '/')
+       p = d;
+    }
+  if (!p)
+    return URL_ERR_REL_NOTHING;
+  d = p;
+
+  while (*a)
+    {
+      if (a[0] == '.')
+       {
+         if (a[1] == '/' || !a[1])     /* Skip "./" and ".$" */
+           {
+             a++;
+             if (a[0])
+               a++;
+             continue;
+           }
+         else if (a[1] == '.' && (a[2] == '/' || !a[2])) /* "../" */
+           {
+             a += 2;
+             if (a[0])
+               a++;
+             if (d <= u->buf + 1)
+               {
+                 /*
+                  * RFC 1808 says we should leave ".." as a path segment, but
+                  * we intentionally break the rule and refuse the URL.
+                  */
+                 if (!url_ignore_underflow)
+                   return URL_PATH_UNDERFLOW;
+               }
+             else
+               {
+                 d--;                  /* Discard trailing slash */
+                 while (d[-1] != '/')
+                   d--;
+               }
+             continue;
+           }
+       }
+      while (a[0] && a[0] != '/')
+       {
+         if (d >= e)
+           return URL_ERR_TOO_LONG;
+         *d++ = *a++;
+       }
+      if (a[0])
+       *d++ = *a++;
+    }
+
+okay:
+  *d++ = 0;
+  u->buf = d;
+  return 0;
+
+copy:                                  /* Combine part of old URL with the new one */
+  while (o < p)
+    if (d < e)
+      *d++ = *o++;
+    else
+      return URL_ERR_TOO_LONG;
+  while (*a)
+    if (d < e)
+      *d++ = *a++;
+    else
+      return URL_ERR_TOO_LONG;
+  goto okay;
+}
+
+int
+url_normalize(struct url *u, struct url *b)
+{
+  int err;
+
+  /* Basic checks */
+  if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) ||
+      !u->host && u->user ||
+      !u->user && u->pass ||
+      !u->rest)
+    return URL_SYNTAX_ERROR;
+
+  if (!u->protocol)
+    {
+      /* Now we know it's a relative URL. Do we have any base? */
+      if (!b || !url_proto_path_flags[b->protoid])
+       return URL_ERR_REL_NOTHING;
+      u->protocol = b->protocol;
+      u->protoid = b->protoid;
+
+      /* Reference to the same host */
+      if (!u->host)
+       {
+         u->host = b->host;
+         u->user = b->user;
+         u->pass = b->pass;
+         u->port = b->port;
+         if (err = relpath_merge(u, b))
+           return err;
+       }
+    }
+
+  /* Change path "?" to "/?" because it's the true meaning */
+  if (u->rest[0] == '?')
+    {
+      int l = strlen(u->rest);
+      if (u->bufend - u->buf < l+1)
+       return URL_ERR_TOO_LONG;
+      u->buf[0] = '/';
+      memcpy(u->buf+1, u->rest, l+1);
+      u->rest = u->buf;
+      u->buf += l+2;
+    }
+
+  /* Fill in missing info */
+  if (u->port == ~0U)
+    u->port = std_ports[u->protoid];
+
+  return 0;
+}
+
+/* Name canonicalization */
+
+static void
+lowercase(byte *b)
+{
+  if (b)
+    while (*b)
+      {
+       if (*b >= 'A' && *b <= 'Z')
+         *b = *b + 0x20;
+       b++;
+      }
+}
+
+static void
+kill_end_dot(byte *b)
+{
+  byte *k;
+
+  if (b)
+    {
+      k = b + strlen(b) - 1;
+      while (k > b && *k == '.')
+       *k-- = 0;
+    }
+}
+
+int
+url_canonicalize(struct url *u)
+{
+  char *c;
+
+  lowercase(u->protocol);
+  lowercase(u->host);
+  kill_end_dot(u->host);
+  if ((!u->rest || !*u->rest) && url_proto_path_flags[u->protoid])
+    u->rest = "/";
+  if (u->rest && (c = strchr(u->rest, '#')))   /* Kill fragment reference */
+    *c = 0;
+  return 0;
+}
+
+/* Pack a broken-down URL */
+
+static byte *
+append(byte *d, const byte *s, byte *e)
+{
+  if (d)
+    while (*s)
+      {
+       if (d >= e)
+         return NULL;
+       *d++ = *s++;
+      }
+  return d;
+}
+
+int
+url_pack(struct url *u, byte *d)
+{
+  byte *e = d + MAX_URL_SIZE - 10;
+
+  if (u->protocol)
+    {
+      d = append(d, u->protocol, e);
+      d = append(d, ":", e);
+      u->protoid = identify_protocol(u->protocol);
+    }
+  if (u->host)
+    {
+      d = append(d, "//", e);
+      if (u->user)
+       {
+         d = append(d, u->user, e);
+         if (u->pass)
+           {
+             d = append(d, ":", e);
+             d = append(d, u->pass, e);
+           }
+         d = append(d, "@", e);
+       }
+      d = append(d, u->host, e);
+      if (u->port != std_ports[u->protoid] && u->port != ~0U)
+       {
+         char z[10];
+         sprintf(z, "%d", u->port);
+         d = append(d, ":", e);
+         d = append(d, z, e);
+       }
+    }
+  if (u->rest)
+    d = append(d, u->rest, e);
+  if (!d)
+    return URL_ERR_TOO_LONG;
+  *d = 0;
+  return 0;
+}
+
+/* Error messages */
+
+static char *errmsg[] = {
+  "Something is wrong",
+  "Too long",
+  "Invalid character",
+  "Invalid escape",
+  "Invalid escaped character",
+  "Invalid port number",
+  "Relative URL not allowed",
+  "Unknown protocol",
+  "Syntax error",
+  "Path underflow"
+};
+
+char *
+url_error(uns err)
+{
+  if (err >= sizeof(errmsg) / sizeof(char *))
+    err = 0;
+  return errmsg[err];
+}
+
+/* Standard cookbook recipes */
+
+int
+url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
+{
+  int err;
+
+  if (err = url_deescape(u, buf1))
+    return err;
+  if (err = url_split(buf1, url, buf2))
+    return err;
+  if (err = url_normalize(url, base))
+    return err;
+  return url_canonicalize(url);
+}
+
+int
+url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
+{
+  byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
+  int err;
+  struct url ur;
+
+  (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) ||
+   (err = url_pack(&ur, buf3)) ||
+   (err = url_enescape(buf3, dst)));
+  return err;
+}
+
+/* Testing */
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+  char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
+  int err;
+  struct url url, url0;
+  char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
+
+  if (argc != 2 && argc != 3)
+    return 1;
+  if (argc == 3)
+    base = argv[2];
+  if (err = url_deescape(argv[1], buf1))
+    {
+      printf("deesc: error %d\n", err);
+      return 1;
+    }
+  printf("deesc: %s\n", buf1);
+  if (err = url_split(buf1, &url, buf2))
+    {
+      printf("split: error %d\n", err);
+      return 1;
+    }
+  printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+  if (err = url_split(base, &url0, buf3))
+    {
+      printf("split base: error %d\n", err);
+      return 1;
+    }
+  if (err = url_normalize(&url0, NULL))
+    {
+      printf("normalize base: error %d\n", err);
+      return 1;
+    }
+  printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest);
+  if (err = url_normalize(&url, &url0))
+    {
+      printf("normalize: error %d\n", err);
+      return 1;
+    }
+  printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+  if (err = url_canonicalize(&url))
+    {
+      printf("canonicalize: error %d\n", err);
+      return 1;
+    }
+  printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+  if (err = url_pack(&url, buf4))
+    {
+      printf("pack: error %d\n", err);
+      return 1;
+    }
+  printf("pack: %s\n", buf4);
+  if (err = url_enescape(buf4, buf2))
+    {
+      printf("enesc: error %d\n", err);
+      return 1;
+    }
+  printf("enesc: %s\n", buf2);
+  return 0;
+}
+
+#endif
+
+struct component {
+       const byte *start;
+       int length;
+       uns count;
+       u32 hash;
+};
+
+static inline u32
+hashf(const byte *start, int length)
+{
+       u32 hf = length;
+       while (length-- > 0)
+               hf = (hf << 8 | hf >> 24) ^ *start++;
+       return hf;
+}
+
+static inline uns
+repeat_count(struct component *comp, uns count, uns len)
+{
+       struct component *orig_comp = comp;
+       uns found = 0;
+       while (1)
+       {
+               uns i;
+               comp += len;
+               count -= len;
+               found++;
+               if (count < len)
+                       return found;
+               for (i=0; i<len; i++)
+                       if (comp[i].hash != orig_comp[i].hash
+                       || comp[i].length != orig_comp[i].length
+                       || memcmp(comp[i].start, orig_comp[i].start, comp[i].length))
+                               return found;
+       }
+}
+
+int
+url_has_repeated_component(const byte *url)
+{
+       struct component *comp;
+       uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
+       const byte *c;
+       uns i, j, k;
+
+       for (comps=0, c=url; c; comps++)
+       {
+               c = strpbrk(c, url_component_separators);
+               if (c)
+                       c++;
+       }
+       if (comps < url_min_repeat_count && comps <= url_max_occurences)
+               return 0;
+       comp = alloca(comps * sizeof(*comp));
+       for (i=0, c=url; c; i++)
+       {
+               comp[i].start = c;
+               c = strpbrk(c, url_component_separators);
+               if (c)
+               {
+                       comp[i].length = c - comp[i].start;
+                       c++;
+               }
+               else
+                       comp[i].length = strlen(comp[i].start);
+       }
+       ASSERT(i == comps);
+       for (i=0; i<comps; i++)
+               comp[i].hash = hashf(comp[i].start, comp[i].length);
+       if (comps > url_max_occurences)
+       {
+               hash_size = next_table_prime(comps);
+               hash = alloca(hash_size * sizeof(*hash));
+               next = alloca(comps * sizeof(*next));
+               memset(hash, 255, hash_size * sizeof(*hash));
+               for (i=0; i<comps; i++)
+               {
+                       j = comp[i].hash % hash_size;
+                       for (k = hash[j]; ~k && (comp[i].hash != comp[k].hash || comp[i].length != comp[k].length ||
+                           memcmp(comp[k].start, comp[i].start, comp[i].length)); k = next[k]);
+                       if (!~k)
+                       {
+                               next[i] = hash[j];
+                               hash[j] = i;
+                               comp[i].count = 1;
+                       }
+                       else
+                       {
+                               if (comp[k].count++ >= url_max_occurences)
+                                       return 1;
+                       }
+               }
+       }
+       for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++)
+               for (rep_prefix = 0; rep_prefix <= comps - comp_len; rep_prefix++)
+                       if (repeat_count(comp + rep_prefix, comps - rep_prefix, comp_len) >= url_min_repeat_count)
+                               return comp_len;
+       return 0;
+}
diff --git a/lib/url.h b/lib/url.h

new file mode 100644 (file)

index 0000000..9390ae9
--- /dev/null
+++ b/lib/url.h
@@ -0,0 +1,90 @@
+/*
+ *     UCW Library -- URL Functions
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_URL_H
+#define _UCW_URL_H
+
+#define MAX_URL_SIZE 1024
+
+/* Non-control meanings of control characters */
+
+#define NCC_SEMICOLON 1
+#define NCC_SLASH 2
+#define NCC_QUEST 3
+#define NCC_COLON 4
+#define NCC_AT 5
+#define NCC_EQUAL 6
+#define NCC_AND 7
+#define NCC_HASH 8
+#define NCC_MAX 9
+
+#define NCC_CHARS " ;/?:@=&#"
+
+/* Remove/Introduce '%' escapes */
+
+int url_deescape(const byte *s, byte *d);
+int url_enescape(const byte *s, byte *d);
+int url_enescape_friendly(const byte *src, byte *dest);        // for cards.c only
+
+/* URL splitting and normalization */
+
+struct url {
+  byte *protocol;
+  uns protoid;
+  byte *user;
+  byte *pass;
+  byte *host;
+  uns port;                            /* ~0 if unspec */
+  byte *rest;
+  byte *buf, *bufend;
+};
+
+int url_split(byte *s, struct url *u, byte *d);
+int url_normalize(struct url *u, struct url *b);
+int url_canonicalize(struct url *u);
+int url_pack(struct url *u, byte *d);
+int url_canon_split_rel(const byte *url, byte *buf1, byte *buf2, struct url *u, struct url *base);
+int url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base);
+uns identify_protocol(const byte *p);
+int url_has_repeated_component(const byte *url);
+
+static inline int url_canon_split(const byte *url, byte *buf1, byte *buf2, struct url *u)
+{ return url_canon_split_rel(url, buf1, buf2, u, NULL); }
+
+static inline int url_auto_canonicalize(const byte *src, byte *dst)
+{ return url_auto_canonicalize_rel(src, dst, NULL); }
+
+/* Error codes */
+
+char *url_error(uns);
+
+#define URL_ERR_TOO_LONG 1
+#define URL_ERR_INVALID_CHAR 2
+#define URL_ERR_INVALID_ESCAPE 3
+#define URL_ERR_INVALID_ESCAPED_CHAR 4
+#define URL_ERR_INVALID_PORT 5
+#define URL_ERR_REL_NOTHING 6
+#define URL_ERR_UNKNOWN_PROTOCOL 7
+#define URL_SYNTAX_ERROR 8
+#define URL_PATH_UNDERFLOW 9
+
+#define URL_PROTO_UNKNOWN 0
+#define URL_PROTO_HTTP 1
+#define URL_PROTO_FTP 2
+#define URL_PROTO_FILE 3
+#define URL_PROTO_MAX 4
+
+#define URL_PNAMES { "unknown", "http", "ftp", "file" }
+#define URL_DEFPORTS { ~0, 80, 21, 0 }
+#define URL_PATH_FLAGS { 0, 1, 1, 1 }
+
+extern byte *url_proto_names[];
+
+#endif
diff --git a/lib/wildmatch.c b/lib/wildmatch.c

new file mode 100644 (file)

index 0000000..e0b5e2e
--- /dev/null
+++ b/lib/wildmatch.c
@@ -0,0 +1,237 @@
+/*
+ *     UCW Library -- Fast Pattern Matcher for Short Wildcard Patterns (only `?' and `*' supported)
+ *
+ *     Traditional NFA -> DFA method with on-the-fly DFA construction.
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/wildmatch.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_STATES 32          /* Must be <= 32, state 0 is reserved, state 1 is initial */
+#define MAX_CACHED 256         /* Maximum number of cached DFA states */
+#define HASH_SIZE 512          /* Number of entries in DFA hash table (at least MAX_CACHED+MAX_STATES) */
+#define HASH_SKIP 137
+
+struct nfa_state {
+  byte ch;                     /* 0 for non-matching state */
+  byte final;                  /* Accepting state */
+  u32 match_states;            /* States to go to when input character == ch */
+  u32 default_states;          /* States to go to whatever the input is */
+};
+
+struct dfa_state {
+  uintptr_t edge[256];         /* Outgoing DFA edges. Bit 0 is set for incomplete edges which
+                                * contain just state set and clear for complete ones which point
+                                * to other states. NULL means `no match'.
+                                */
+  u32 nfa_set;                 /* A set of NFA states this DFA state represents */
+  int final;                   /* This is an accepting state */
+  struct dfa_state *next;      /* Next in the chain of free states */
+};
+
+struct wildpatt {
+  struct nfa_state nfa[MAX_STATES];
+  struct dfa_state *hash[HASH_SIZE];
+  struct dfa_state *dfa_start;
+  uns nfa_states;
+  uns dfa_cache_counter;
+  struct mempool *pool;
+  struct dfa_state *free_states;
+};
+
+static inline unsigned
+wp_hash(u32 set)
+{
+  set ^= set >> 16;
+  set ^= set >> 8;
+  return set % HASH_SIZE;
+}
+
+static struct dfa_state *
+wp_new_state(struct wildpatt *w, u32 set)
+{
+  unsigned h = wp_hash(set);
+  struct dfa_state *d;
+  unsigned bit;
+  u32 def_set;
+
+  while (d = w->hash[h])
+    {
+      if (d->nfa_set == set)
+       return d;
+      h = (h + HASH_SKIP) % HASH_SIZE;
+    }
+  if (d = w->free_states)
+    w->free_states = d->next;
+  else
+    d = mp_alloc(w->pool, sizeof(*d));
+  w->hash[h] = d;
+  bzero(d, sizeof(*d));
+  d->nfa_set = set;
+  def_set = 0;
+  for(bit=1; bit <= w->nfa_states; bit++)
+    if (set & (1 << bit))
+      {
+       struct nfa_state *n = &w->nfa[bit];
+       if (n->ch)
+         d->edge[n->ch] |= n->match_states | 1;
+       d->final |= n->final;
+       def_set |= n->default_states;
+      }
+  if (def_set)
+    {
+      unsigned i;
+      def_set |= 1;
+      for(i=0; i<256; i++)
+       d->edge[i] |= def_set;
+    }
+  w->dfa_cache_counter++;
+  return d;
+}
+
+struct wildpatt *
+wp_compile(const byte *p, struct mempool *pool)
+{
+  struct wildpatt *w;
+  uns i;
+
+  if (strlen(p) >= MAX_STATES)         /* Too long */
+    return NULL;
+  w = mp_alloc_zero(pool, sizeof(*w));
+  w->pool = pool;
+  for(i=1; *p; p++)
+    {
+      struct nfa_state *n = w->nfa + i;
+      if (*p == '?')
+       n->default_states |= 1 << (++i);/* Default edge to a new state */
+      else if (*p == '*')
+       n->default_states |= 1 << i;    /* Default edge to the same state */
+      else
+       {
+         n->ch = *p;                   /* Edge to new state labelled with 'c' */
+         n->match_states = 1 << (++i);
+       }
+    }
+  w->nfa[i].final = 1;
+  w->nfa_states = i;
+  w->dfa_start = wp_new_state(w, 1 << 1);
+  return w;
+}
+
+static void
+wp_prune_cache(struct wildpatt *w)
+{
+  /*
+   *   I was unable to trigger cache overflow on my large set of
+   *   test cases, so I decided to handle it in an extremely dumb
+   *   way.   --mj
+   */
+  int i;
+  for(i=0; i<HASH_SIZE; i++)
+    if (w->hash[i] && w->hash[i]->nfa_set != (1 << 1))
+      {
+       struct dfa_state *d = w->hash[i];
+       w->hash[i] = NULL;
+       d->next = w->free_states;
+       w->free_states = d;
+      }
+  w->dfa_cache_counter = 1;    /* Only the initial state remains */
+}
+
+int
+wp_match(struct wildpatt *w, const byte *s)
+{
+  struct dfa_state *d;
+
+  if (w->dfa_cache_counter >= MAX_CACHED)
+    wp_prune_cache(w);
+  d = w->dfa_start;
+  while (*s)
+    {
+      uintptr_t next = d->edge[*s];
+      if (next & 1)
+       {
+         /* Need to lookup/create the destination state */
+         struct dfa_state *new = wp_new_state(w, next & ~1);
+         d->edge[*s] = (uintptr_t) new;
+         d = new;
+       }
+      else if (!next)
+       return 0;
+      else
+       d = (struct dfa_state *) next;
+      s++;
+    }
+  return d->final;
+}
+
+int
+wp_min_size(const byte *p)
+{
+  int s = 0;
+
+  while (*p)
+    if (*p++ != '*')
+      s++;
+  return s;
+}
+
+#ifdef TEST
+
+void
+wp_dump(struct wildpatt *w)
+{
+  int i;
+
+  puts("NFA:");
+  for(i=1; i<=w->nfa_states; i++)
+    {
+      struct nfa_state *n = w->nfa + i;
+      printf("%2d: %d %02x %08x %08x\n", i, n->final, n->ch, n->match_states, n->default_states);
+    }
+  puts("DFA:");
+  for(i=0; i<HASH_SIZE; i++)
+    if (w->hash[i])
+      printf("%3d: %08x\n", i, w->hash[i]->nfa_set);
+  printf("%d DFA states cached.\n", w->dfa_cache_counter);
+}
+
+int main(int argc, char **argv)
+{
+  struct wildpatt *w;
+  char buf[1024];
+
+  if (argc != 2) return 1;
+  w = wp_compile(argv[1], mp_new(65536));
+  if (!w)
+    {
+      puts("Compile error");
+      return 1;
+    }
+  wp_dump(w);
+  while (fgets(buf, sizeof(buf)-1, stdin))
+    {
+      char *c = strchr(buf, '\n');
+      if (!c) break;
+      *c = 0;
+#if 0
+      printf("%d\n", wp_match(w, buf));
+#else
+      if (wp_match(w, buf))
+       puts(buf);
+#endif
+    }
+  wp_dump(w);
+  return 0;
+}
+
+#endif
diff --git a/lib/wildmatch.h b/lib/wildmatch.h

new file mode 100644 (file)

index 0000000..a429da1
--- /dev/null
+++ b/lib/wildmatch.h
@@ -0,0 +1,15 @@
+/*
+ *     UCW Library -- Fast Wildcard Pattern Matcher (only `?' and `*' supported)
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+struct wildpatt;
+struct mempool;
+
+struct wildpatt *wp_compile(const byte *, struct mempool *);
+int wp_match(struct wildpatt *, const byte *);
+int wp_min_size(const byte *);
diff --git a/lib/wordsplit.c b/lib/wordsplit.c

new file mode 100644 (file)

index 0000000..2e50edc
--- /dev/null
+++ b/lib/wordsplit.c
@@ -0,0 +1,62 @@
+/*
+ *     UCW Library -- Word Splitting
+ *
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *     (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
+#include <string.h>
+
+int
+sepsplit(char *str, uns sep, char **rec, uns max)
+{
+  uns cnt = 0;
+  while (1)
+  {
+    rec[cnt++] = str;
+    str = strchr(str, sep);
+    if (!str)
+      return cnt;
+    if (cnt >= max)
+      return -1;
+    *str++ = 0;
+  }
+}
+
+int
+wordsplit(char *src, char **dst, uns max)
+{
+  uns cnt = 0;
+
+  for(;;)
+    {
+      while (Cspace(*src))
+       *src++ = 0;
+      if (!*src)
+       break;
+      if (cnt >= max)
+       return -1;
+      if (*src == '"')
+       {
+         src++;
+         dst[cnt++] = src;
+         while (*src && *src != '"')
+           src++;
+         if (*src)
+           *src++ = 0;
+       }
+      else
+       {
+         dst[cnt++] = src;
+         while (*src && !Cspace(*src))
+           src++;
+       }
+    }
+  return cnt;
+}
diff --git a/lib/workqueue.c b/lib/workqueue.c

new file mode 100644 (file)

index 0000000..8833e46
--- /dev/null
+++ b/lib/workqueue.c
@@ -0,0 +1,282 @@
+/*
+ *     UCW Library -- Thread Pools and Work Queues
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/threads.h"
+#include "lib/workqueue.h"
+#include "lib/heap.h"
+
+static void *
+worker_thread_init(void *arg)
+{
+  struct worker_thread *t = arg;
+  struct worker_pool *pool = t->pool;
+
+  if (pool->init_thread)
+    pool->init_thread(t);
+  sem_post(pool->init_cleanup_sem);
+
+  for (;;)
+    {
+      struct work *w = raw_queue_get(&pool->requests);
+      w->go(t, w);
+      raw_queue_put(&w->reply_to->finished, w);
+    }
+
+  return NULL;
+}
+
+static void
+worker_thread_signal_finish(struct worker_thread *t, struct work *w UNUSED)
+{
+  if (t->pool->cleanup_thread)
+    t->pool->cleanup_thread(t);
+  sem_post(t->pool->init_cleanup_sem);
+  pthread_exit(NULL);
+}
+
+void
+worker_pool_init(struct worker_pool *p)
+{
+  clist_init(&p->worker_threads);
+  raw_queue_init(&p->requests);
+  p->init_cleanup_sem = sem_alloc();
+
+  pthread_attr_t attr;
+  if (pthread_attr_init(&attr) < 0 ||
+      pthread_attr_setstacksize(&attr, p->stack_size ? : default_thread_stack_size) < 0)
+    ASSERT(0);
+
+  for (uns i=0; i < p->num_threads; i++)
+    {
+      struct worker_thread *t = (p->new_thread ? p->new_thread() : xmalloc(sizeof(*t)));
+      t->pool = p;
+      t->id = i;
+      int err = pthread_create(&t->thread, &attr, worker_thread_init, t);
+      if (err)
+       die("Unable to create thread: %m");
+      clist_add_tail(&p->worker_threads, &t->n);
+      sem_wait(p->init_cleanup_sem);
+    }
+
+  pthread_attr_destroy(&attr);
+}
+
+void
+worker_pool_cleanup(struct worker_pool *p)
+{
+  for (uns i=0; i < p->num_threads; i++)
+    {
+      struct work w = {
+       .go = worker_thread_signal_finish
+      };
+      raw_queue_put(&p->requests, &w);
+      sem_wait(p->init_cleanup_sem);
+    }
+
+  struct worker_thread *tmp;
+  CLIST_FOR_EACH_DELSAFE(struct worker_thread *, t, p->worker_threads, tmp)
+    {
+      int err = pthread_join(t->thread, NULL);
+      ASSERT(!err);
+      if (p->free_thread)
+       p->free_thread(t);
+      else
+       xfree(t);
+    }
+  raw_queue_cleanup(&p->requests);
+  sem_free(p->init_cleanup_sem);
+}
+
+void
+raw_queue_init(struct raw_queue *q)
+{
+  pthread_mutex_init(&q->queue_mutex, NULL);
+  clist_init(&q->pri0_queue);
+  q->queue_sem = sem_alloc();
+  q->pri_heap = NULL;
+  q->heap_cnt = q->heap_max = 0;
+}
+
+void
+raw_queue_cleanup(struct raw_queue *q)
+{
+  ASSERT(clist_empty(&q->pri0_queue));
+  ASSERT(!q->heap_cnt);
+  xfree(q->pri_heap);
+  sem_free(q->queue_sem);
+  pthread_mutex_destroy(&q->queue_mutex);
+}
+
+#define PRI_LESS(x,y) ((x)->priority > (y)->priority)
+
+void
+raw_queue_put(struct raw_queue *q, struct work *w)
+{
+  pthread_mutex_lock(&q->queue_mutex);
+  if (!w->priority)
+    clist_add_tail(&q->pri0_queue, &w->n);
+  else
+    {
+      if (unlikely(q->heap_cnt >= q->heap_max))
+       {
+         struct work **old_heap = q->pri_heap;
+         q->heap_max = (q->heap_max ? 2*q->heap_max : 16);
+         q->pri_heap = xrealloc(old_heap, (q->heap_max + 1) * sizeof(struct work *));
+       }
+      struct work **heap = q->pri_heap;
+      heap[++q->heap_cnt] = w;
+      HEAP_INSERT(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+    }
+  pthread_mutex_unlock(&q->queue_mutex);
+  sem_post(q->queue_sem);
+}
+
+static inline struct work *
+raw_queue_do_get(struct raw_queue *q)
+{
+  pthread_mutex_lock(&q->queue_mutex);
+  struct work *w;
+  if (!q->heap_cnt)
+    {
+      w = clist_head(&q->pri0_queue);
+      ASSERT(w);
+      clist_remove(&w->n);
+    }
+  else
+    {
+      struct work **heap = q->pri_heap;
+      w = heap[1];
+      HEAP_DELMIN(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+    }
+  pthread_mutex_unlock(&q->queue_mutex);
+  return w;
+}
+
+struct work *
+raw_queue_get(struct raw_queue *q)
+{
+  sem_wait(q->queue_sem);
+  return raw_queue_do_get(q);
+}
+
+struct work *
+raw_queue_try_get(struct raw_queue *q)
+{
+  if (!sem_trywait(q->queue_sem))
+    return raw_queue_do_get(q);
+  else
+    return NULL;
+}
+
+void
+work_queue_init(struct worker_pool *p, struct work_queue *q)
+{
+  q->pool = p;
+  q->nr_running = 0;
+  raw_queue_init(&q->finished);
+}
+
+void
+work_queue_cleanup(struct work_queue *q)
+{
+  ASSERT(!q->nr_running);
+  raw_queue_cleanup(&q->finished);
+}
+
+void
+work_submit(struct work_queue *q, struct work *w)
+{
+  ASSERT(w->go);
+  w->reply_to = q;
+  raw_queue_put(&q->pool->requests, w);
+  q->nr_running++;
+}
+
+static struct work *
+work_do_wait(struct work_queue *q, int try)
+{
+  if (!q->nr_running)
+    return NULL;
+  struct work *w = (try ? raw_queue_try_get : raw_queue_get)(&q->finished);
+  if (!w)
+    return NULL;
+  q->nr_running--;
+  return w;
+}
+
+struct work *
+work_wait(struct work_queue *q)
+{
+  return work_do_wait(q, 0);
+}
+
+struct work *
+work_try_wait(struct work_queue *q)
+{
+  return work_do_wait(q, 1);
+}
+
+#ifdef TEST
+
+#include <unistd.h>
+
+static void wt_init(struct worker_thread *t)
+{
+  log(L_INFO, "INIT %d", t->id);
+}
+
+static void wt_cleanup(struct worker_thread *t)
+{
+  log(L_INFO, "CLEANUP %d", t->id);
+}
+
+struct w {
+  struct work w;
+  uns id;
+};
+
+static void go(struct worker_thread *t, struct work *w)
+{
+  log(L_INFO, "GO %d: request %d (pri %d)", t->id, ((struct w *)w)->id, w->priority);
+  usleep(1);
+}
+
+int main(void)
+{
+  struct worker_pool pool = {
+    .num_threads = 10,
+    .stack_size = 65536,
+    .init_thread = wt_init,
+    .cleanup_thread = wt_cleanup
+  };
+  worker_pool_init(&pool);
+
+  struct work_queue q;
+  work_queue_init(&pool, &q);
+  for (uns i=0; i<500; i++)
+    {
+      struct w *w = xmalloc_zero(sizeof(*w));
+      w->w.go = go;
+      w->w.priority = (i < 250 ? i : 0);
+      w->id = i;
+      work_submit(&q, &w->w);
+      log(L_INFO, "Submitted request %d (pri %d)", w->id, w->w.priority);
+    }
+
+  struct w *w;
+  while (w = (struct w *) work_wait(&q))
+    log(L_INFO, "Finished request %d", w->id);
+
+  work_queue_cleanup(&q);
+  worker_pool_cleanup(&pool);
+  return 0;
+}
+
+#endif
diff --git a/lib/workqueue.h b/lib/workqueue.h

new file mode 100644 (file)

index 0000000..b16a994
--- /dev/null
+++ b/lib/workqueue.h
@@ -0,0 +1,93 @@
+/*
+ *     UCW Library -- Thread Pools and Work Queues
+ *
+ *     (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_WORKQUEUE_H
+#define _UCW_WORKQUEUE_H
+
+/*
+ *  A thread pool is a set of threads receiving work requests from a common queue,
+ *  each work request contains a pointer to a function inside the thread.
+ *
+ *  A work queue is an interface for submitting work requests. It's bound to a single
+ *  thread pool, it remembers running requests and gathers replies. A single work queue
+ *  should not be used by multiple threads simultaneously.
+ *
+ *  Requests can have priorities. Requests with the highest priority are served first.
+ *  Requests of priority 0 are guaranteed to be served on first-come-first-served
+ *  basis, requests of higher priorities are unordered.
+ *
+ *  When a thread pool is initialized, new_thread() is called for every thread first,
+ *  allocating struct worker_thread (and user-defined thread context following it) for
+ *  each thread. Then the threads are fired and each of them executes the init_thread()
+ *  callback. These callbacks are serialized and worker_pool_init() function waits
+ *  until all of them finish.
+ */
+
+#include "lib/semaphore.h"
+#include "lib/clists.h"
+
+#include <pthread.h>
+
+struct worker_thread {                         // One of threads serving requests
+  cnode n;
+  pthread_t thread;
+  struct worker_pool *pool;
+  int id;                                      // Inside the pool
+  /* user-defined data can follow */
+};
+
+struct raw_queue {                             // Generic queue with locking
+  pthread_mutex_t queue_mutex;
+  clist pri0_queue;                            // Ordinary queue for requests with priority=0
+  struct work **pri_heap;                      // A heap for request with priority>0
+  uns heap_cnt, heap_max;
+  sem_t *queue_sem;                            // Number of requests queued
+};
+
+struct worker_pool {
+  struct raw_queue requests;
+  uns num_threads;
+  uns stack_size;                              // 0 for default
+  struct worker_thread *(*new_thread)(void);   // default: xmalloc the struct
+  void (*free_thread)(struct worker_thread *t);        // default: xfree
+  void (*init_thread)(struct worker_thread *t);        // default: empty
+  void (*cleanup_thread)(struct worker_thread *t); // default: empty
+  clist worker_threads;
+  sem_t *init_cleanup_sem;
+};
+
+struct work_queue {
+  struct worker_pool *pool;
+  uns nr_running;                              // Number of requests in service
+  struct raw_queue finished;                   // Finished requests queue up here
+};
+
+struct work {                                  // A single request
+  cnode n;
+  uns priority;
+  struct work_queue *reply_to;                 // Where to queue the request when it's finished
+  void (*go)(struct worker_thread *t, struct work *w);         // Called inside the worker thread
+};
+
+void worker_pool_init(struct worker_pool *p);
+void worker_pool_cleanup(struct worker_pool *p);
+
+void raw_queue_init(struct raw_queue *q);
+void raw_queue_cleanup(struct raw_queue *q);
+void raw_queue_put(struct raw_queue *q, struct work *w);
+struct work *raw_queue_get(struct raw_queue *q);
+struct work *raw_queue_try_get(struct raw_queue *q);
+
+void work_queue_init(struct worker_pool *p, struct work_queue *q);
+void work_queue_cleanup(struct work_queue *q);
+void work_submit(struct work_queue *q, struct work *w);
+struct work *work_wait(struct work_queue *q);
+struct work *work_try_wait(struct work_queue *q);
+
+#endif /* !_UCW_WORKQUEUE_H */
author	Martin Mares <mj@ucw.cz>
	Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)
committer	Martin Mares <mj@ucw.cz>
	Thu, 15 May 2008 09:21:15 +0000 (11:21 +0200)
lib/Makefile	[new file with mode: 0644]	patch \| blob
lib/THREADS	[new file with mode: 0644]	patch \| blob
lib/adler32.c	[new file with mode: 0644]	patch \| blob
lib/alloc.c	[new file with mode: 0644]	patch \| blob
lib/alloc_str.c	[new file with mode: 0644]	patch \| blob
lib/arraysort.h	[new file with mode: 0644]	patch \| blob
lib/asio.c	[new file with mode: 0644]	patch \| blob
lib/asio.h	[new file with mode: 0644]	patch \| blob
lib/asio.t	[new file with mode: 0644]	patch \| blob
lib/asort-test.c	[new file with mode: 0644]	patch \| blob
lib/autoconf.cfg	[new file with mode: 0644]	patch \| blob
lib/base224.c	[new file with mode: 0644]	patch \| blob
lib/base224.h	[new file with mode: 0644]	patch \| blob
lib/base64.c	[new file with mode: 0644]	patch \| blob
lib/base64.h	[new file with mode: 0644]	patch \| blob
lib/bbuf.c	[new file with mode: 0644]	patch \| blob
lib/bbuf.h	[new file with mode: 0644]	patch \| blob
lib/bbuf.t	[new file with mode: 0644]	patch \| blob
lib/bigalloc.c	[new file with mode: 0644]	patch \| blob
lib/binheap-node.h	[new file with mode: 0644]	patch \| blob
lib/binheap-test.c	[new file with mode: 0644]	patch \| blob
lib/binheap.h	[new file with mode: 0644]	patch \| blob
lib/binsearch.h	[new file with mode: 0644]	patch \| blob
lib/bit-ffs.c	[new file with mode: 0644]	patch \| blob
lib/bit-fls.c	[new file with mode: 0644]	patch \| blob
lib/bitarray.h	[new file with mode: 0644]	patch \| blob
lib/bitops.h	[new file with mode: 0644]	patch \| blob
lib/bitops.t	[new file with mode: 0644]	patch \| blob
lib/bitsig.c	[new file with mode: 0644]	patch \| blob
lib/bitsig.h	[new file with mode: 0644]	patch \| blob
lib/carefulio.c	[new file with mode: 0644]	patch \| blob
lib/charmap.h	[new file with mode: 0644]	patch \| blob
lib/chartype.h	[new file with mode: 0644]	patch \| blob
lib/clists.h	[new file with mode: 0644]	patch \| blob
lib/conf-alloc.c	[new file with mode: 0644]	patch \| blob
lib/conf-dump.c	[new file with mode: 0644]	patch \| blob
lib/conf-input.c	[new file with mode: 0644]	patch \| blob
lib/conf-internal.h	[new file with mode: 0644]	patch \| blob
lib/conf-intr.c	[new file with mode: 0644]	patch \| blob
lib/conf-journal.c	[new file with mode: 0644]	patch \| blob
lib/conf-parse.c	[new file with mode: 0644]	patch \| blob
lib/conf-section.c	[new file with mode: 0644]	patch \| blob
lib/conf-test.c	[new file with mode: 0644]	patch \| blob
lib/conf-test.cf	[new file with mode: 0644]	patch \| blob
lib/conf.h	[new file with mode: 0644]	patch \| blob
lib/config.h	[new file with mode: 0644]	patch \| blob
lib/ctmatch.c	[new file with mode: 0644]	patch \| blob
lib/db-emul.c	[new file with mode: 0644]	patch \| blob
lib/db-test.c	[new file with mode: 0644]	patch \| blob
lib/db-tool.c	[new file with mode: 0644]	patch \| blob
lib/db.c	[new file with mode: 0644]	patch \| blob
lib/db.h	[new file with mode: 0644]	patch \| blob
lib/db_internal.h	[new file with mode: 0644]	patch \| blob
lib/default.cfg	[new file with mode: 0644]	patch \| blob
lib/eltpool.c	[new file with mode: 0644]	patch \| blob
lib/eltpool.h	[new file with mode: 0644]	patch \| blob
lib/eltpool.test	[new file with mode: 0644]	patch \| blob
lib/exitstatus.c	[new file with mode: 0644]	patch \| blob
lib/fastbuf.c	[new file with mode: 0644]	patch \| blob
lib/fastbuf.h	[new file with mode: 0644]	patch \| blob
lib/fastbuf.t	[new file with mode: 0644]	patch \| blob
lib/fb-atomic.c	[new file with mode: 0644]	patch \| blob
lib/fb-buffer.c	[new file with mode: 0644]	patch \| blob
lib/fb-direct.c	[new file with mode: 0644]	patch \| blob
lib/fb-file.c	[new file with mode: 0644]	patch \| blob
lib/fb-grow.c	[new file with mode: 0644]	patch \| blob
lib/fb-limfd.c	[new file with mode: 0644]	patch \| blob
lib/fb-mem.c	[new file with mode: 0644]	patch \| blob
lib/fb-mmap.c	[new file with mode: 0644]	patch \| blob
lib/fb-param.c	[new file with mode: 0644]	patch \| blob
lib/fb-pool.c	[new file with mode: 0644]	patch \| blob
lib/fb-temp.c	[new file with mode: 0644]	patch \| blob
lib/ff-binary.c	[new file with mode: 0644]	patch \| blob
lib/ff-binary.h	[new file with mode: 0644]	patch \| blob
lib/ff-printf.c	[new file with mode: 0644]	patch \| blob
lib/ff-string.c	[new file with mode: 0644]	patch \| blob
lib/ff-unicode.c	[new file with mode: 0644]	patch \| blob
lib/ff-unicode.h	[new file with mode: 0644]	patch \| blob
lib/ff-unicode.t	[new file with mode: 0644]	patch \| blob
lib/ff-utf8.h	[new file with mode: 0644]	patch \| blob
lib/gbuf.h	[new file with mode: 0644]	patch \| blob
lib/getopt.c	[new file with mode: 0644]	patch \| blob
lib/getopt.h	[new file with mode: 0644]	patch \| blob
lib/getopt.t	[new file with mode: 0644]	patch \| blob
lib/getopt/Makefile	[new file with mode: 0644]	patch \| blob
lib/getopt/README	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt-sh.c	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt-sh.h	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt.c	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt.h	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt1.c	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt_init.c	[new file with mode: 0644]	patch \| blob
lib/getopt/getopt_int.h	[new file with mode: 0644]	patch \| blob
lib/hash-test.c	[new file with mode: 0644]	patch \| blob
lib/hash-test.t	[new file with mode: 0644]	patch \| blob
lib/hashfunc.c	[new file with mode: 0644]	patch \| blob
lib/hashfunc.h	[new file with mode: 0644]	patch \| blob
lib/hashtable.h	[new file with mode: 0644]	patch \| blob
lib/heap.h	[new file with mode: 0644]	patch \| blob
lib/ipaccess.c	[new file with mode: 0644]	patch \| blob
lib/ipaccess.h	[new file with mode: 0644]	patch \| blob
lib/kmp-search.h	[new file with mode: 0644]	patch \| blob
lib/kmp-test.c	[new file with mode: 0644]	patch \| blob
lib/kmp-test.t	[new file with mode: 0644]	patch \| blob
lib/kmp.h	[new file with mode: 0644]	patch \| blob
lib/lfs-test.c	[new file with mode: 0644]	patch \| blob
lib/lfs.h	[new file with mode: 0644]	patch \| blob
lib/lib.h	[new file with mode: 0644]	patch \| blob
lib/libucw.pc	[new file with mode: 0644]	patch \| blob
lib/lists.c	[new file with mode: 0644]	patch \| blob
lib/lists.h	[new file with mode: 0644]	patch \| blob
lib/lizard-safe.c	[new file with mode: 0644]	patch \| blob
lib/lizard-test.c	[new file with mode: 0644]	patch \| blob
lib/lizard.c	[new file with mode: 0644]	patch \| blob
lib/lizard.h	[new file with mode: 0644]	patch \| blob
lib/log-file.c	[new file with mode: 0644]	patch \| blob
lib/log.c	[new file with mode: 0644]	patch \| blob
lib/mainloop.c	[new file with mode: 0644]	patch \| blob
lib/mainloop.h	[new file with mode: 0644]	patch \| blob
lib/md5.c	[new file with mode: 0644]	patch \| blob
lib/md5.h	[new file with mode: 0644]	patch \| blob
lib/md5hex.c	[new file with mode: 0644]	patch \| blob
lib/mempool-fmt.c	[new file with mode: 0644]	patch \| blob
lib/mempool-str.c	[new file with mode: 0644]	patch \| blob
lib/mempool.c	[new file with mode: 0644]	patch \| blob
lib/mempool.h	[new file with mode: 0644]	patch \| blob
lib/mempool.t	[new file with mode: 0644]	patch \| blob
lib/mmap.c	[new file with mode: 0644]	patch \| blob
lib/pagecache.c	[new file with mode: 0644]	patch \| blob
lib/pagecache.h	[new file with mode: 0644]	patch \| blob
lib/partmap.c	[new file with mode: 0644]	patch \| blob
lib/partmap.h	[new file with mode: 0644]	patch \| blob
lib/patimatch.c	[new file with mode: 0644]	patch \| blob
lib/patmatch.c	[new file with mode: 0644]	patch \| blob
lib/patmatch.h	[new file with mode: 0644]	patch \| blob
lib/perl/CGI.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Config.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Configure.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Filelock/Filelock.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Filelock/Filelock.xs	[new file with mode: 0644]	patch \| blob
lib/perl/Filelock/MANIFEST	[new file with mode: 0644]	patch \| blob
lib/perl/Filelock/Makefile	[new file with mode: 0644]	patch \| blob
lib/perl/Filelock/Makefile.PL	[new file with mode: 0644]	patch \| blob
lib/perl/Log.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Makefile	[new file with mode: 0644]	patch \| blob
lib/perl/Ulimit/MANIFEST	[new file with mode: 0644]	patch \| blob
lib/perl/Ulimit/Makefile	[new file with mode: 0644]	patch \| blob
lib/perl/Ulimit/Makefile.PL	[new file with mode: 0644]	patch \| blob
lib/perl/Ulimit/Ulimit.pm	[new file with mode: 0644]	patch \| blob
lib/perl/Ulimit/Ulimit.xs	[new file with mode: 0644]	patch \| blob
lib/prefetch.h	[new file with mode: 0644]	patch \| blob
lib/prime.c	[new file with mode: 0644]	patch \| blob
lib/primetable.c	[new file with mode: 0644]	patch \| blob
lib/proctitle.c	[new file with mode: 0644]	patch \| blob
lib/profile.c	[new file with mode: 0644]	patch \| blob
lib/profile.h	[new file with mode: 0644]	patch \| blob
lib/qache.c	[new file with mode: 0644]	patch \| blob
lib/qache.h	[new file with mode: 0644]	patch \| blob
lib/random.c	[new file with mode: 0644]	patch \| blob
lib/randomkey.c	[new file with mode: 0644]	patch \| blob
lib/realloc.c	[new file with mode: 0644]	patch \| blob
lib/redblack-test.c	[new file with mode: 0644]	patch \| blob
lib/redblack.h	[new file with mode: 0644]	patch \| blob
lib/regex.c	[new file with mode: 0644]	patch \| blob
lib/regex.t	[new file with mode: 0644]	patch \| blob
lib/regex/Makefile	[new file with mode: 0644]	patch \| blob
lib/regex/README	[new file with mode: 0644]	patch \| blob
lib/regex/regcomp.c	[new file with mode: 0644]	patch \| blob
lib/regex/regex-sh.h	[new file with mode: 0644]	patch \| blob
lib/regex/regex.c	[new file with mode: 0644]	patch \| blob
lib/regex/regex.h	[new file with mode: 0644]	patch \| blob
lib/regex/regex_internal.c	[new file with mode: 0644]	patch \| blob
lib/regex/regex_internal.h	[new file with mode: 0644]	patch \| blob
lib/regex/regexec.c	[new file with mode: 0644]	patch \| blob
lib/runcmd.c	[new file with mode: 0644]	patch \| blob
lib/semaphore.h	[new file with mode: 0644]	patch \| blob
lib/shell/Makefile	[new file with mode: 0644]	patch \| blob
lib/shell/config.c	[new file with mode: 0644]	patch \| blob
lib/shell/config.t	[new file with mode: 0644]	patch \| blob
lib/shell/libucw.sh	[new file with mode: 0644]	patch \| blob
lib/shell/logger.c	[new file with mode: 0644]	patch \| blob
lib/sighandler.c	[new file with mode: 0644]	patch \| blob
lib/simple-lists.c	[new file with mode: 0644]	patch \| blob
lib/simple-lists.h	[new file with mode: 0644]	patch \| blob
lib/slists.c	[new file with mode: 0644]	patch \| blob
lib/slists.h	[new file with mode: 0644]	patch \| blob
lib/slists.t	[new file with mode: 0644]	patch \| blob
lib/sorter/Makefile	[new file with mode: 0644]	patch \| blob
lib/sorter/TODO	[new file with mode: 0644]	patch \| blob
lib/sorter/array.c	[new file with mode: 0644]	patch \| blob
lib/sorter/array.h	[new file with mode: 0644]	patch \| blob
lib/sorter/common.h	[new file with mode: 0644]	patch \| blob
lib/sorter/config.c	[new file with mode: 0644]	patch \| blob
lib/sorter/govern.c	[new file with mode: 0644]	patch \| blob
lib/sorter/s-fixint.h	[new file with mode: 0644]	patch \| blob
lib/sorter/s-internal.h	[new file with mode: 0644]	patch \| blob
lib/sorter/s-multiway.h	[new file with mode: 0644]	patch \| blob
lib/sorter/s-radix.h	[new file with mode: 0644]	patch \| blob
lib/sorter/s-twoway.h	[new file with mode: 0644]	patch \| blob
lib/sorter/sbuck.c	[new file with mode: 0644]	patch \| blob
lib/sorter/sort-test.c	[new file with mode: 0644]	patch \| blob
lib/sorter/sorter.h	[new file with mode: 0644]	patch \| blob
lib/stkstring.c	[new file with mode: 0644]	patch \| blob
lib/stkstring.h	[new file with mode: 0644]	patch \| blob
lib/stkstring.t	[new file with mode: 0644]	patch \| blob
lib/str-test.c	[new file with mode: 0644]	patch \| blob
lib/str_ctype.c	[new file with mode: 0644]	patch \| blob
lib/str_lower.c	[new file with mode: 0644]	patch \| blob
lib/str_upper.c	[new file with mode: 0644]	patch \| blob
lib/string.c	[new file with mode: 0644]	patch \| blob
lib/sync.c	[new file with mode: 0644]	patch \| blob
lib/threads-conf.c	[new file with mode: 0644]	patch \| blob
lib/threads.c	[new file with mode: 0644]	patch \| blob
lib/threads.h	[new file with mode: 0644]	patch \| blob
lib/timer.c	[new file with mode: 0644]	patch \| blob
lib/unaligned.h	[new file with mode: 0644]	patch \| blob
lib/unicode.c	[new file with mode: 0644]	patch \| blob
lib/unicode.h	[new file with mode: 0644]	patch \| blob
lib/unicode.t	[new file with mode: 0644]	patch \| blob
lib/url.c	[new file with mode: 0644]	patch \| blob
lib/url.h	[new file with mode: 0644]	patch \| blob
lib/wildmatch.c	[new file with mode: 0644]	patch \| blob
lib/wildmatch.h	[new file with mode: 0644]	patch \| blob
lib/wordsplit.c	[new file with mode: 0644]	patch \| blob
lib/workqueue.c	[new file with mode: 0644]	patch \| blob
lib/workqueue.h	[new file with mode: 0644]	patch \| blob