From 54d6995ab44a7122c74c195bd3fedf11c2d223da Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Thu, 15 May 2008 11:21:15 +0200 Subject: [PATCH] Added libucw from Sherlock v3.12.2. No changes have been made except for removing SHERLOCK_VERSION. --- lib/Makefile | 138 ++ lib/THREADS | 7 + lib/adler32.c | 48 + lib/alloc.c | 45 + lib/alloc_str.c | 19 + lib/arraysort.h | 174 ++ lib/asio.c | 289 +++ lib/asio.h | 70 + lib/asio.t | 4 + lib/asort-test.c | 77 + lib/autoconf.cfg | 277 +++ lib/base224.c | 213 ++ lib/base224.h | 25 + lib/base64.c | 120 + lib/base64.h | 25 + lib/bbuf.c | 86 + lib/bbuf.h | 22 + lib/bbuf.t | 4 + lib/bigalloc.c | 111 + lib/binheap-node.h | 19 + lib/binheap-test.c | 94 + lib/binheap.h | 203 ++ lib/binsearch.h | 26 + lib/bit-ffs.c | 46 + lib/bit-fls.c | 42 + lib/bitarray.h | 107 + lib/bitops.h | 40 + lib/bitops.t | 53 + lib/bitsig.c | 162 ++ lib/bitsig.h | 15 + lib/carefulio.c | 51 + lib/charmap.h | 268 +++ lib/chartype.h | 49 + lib/clists.h | 132 ++ lib/conf-alloc.c | 43 + lib/conf-dump.c | 123 + lib/conf-input.c | 455 ++++ lib/conf-internal.h | 44 + lib/conf-intr.c | 645 ++++++ lib/conf-journal.c | 117 + lib/conf-parse.c | 167 ++ lib/conf-section.c | 203 ++ lib/conf-test.c | 219 ++ lib/conf-test.cf | 52 + lib/conf.h | 163 ++ lib/config.h | 49 + lib/ctmatch.c | 44 + lib/db-emul.c | 155 ++ lib/db-test.c | 475 ++++ lib/db-tool.c | 264 +++ lib/db.c | 598 +++++ lib/db.h | 50 + lib/db_internal.h | 58 + lib/default.cfg | 50 + lib/eltpool.c | 100 + lib/eltpool.h | 65 + lib/eltpool.test | 4 + lib/exitstatus.c | 36 + lib/fastbuf.c | 204 ++ lib/fastbuf.h | 410 ++++ lib/fastbuf.t | 15 + lib/fb-atomic.c | 169 ++ lib/fb-buffer.c | 70 + lib/fb-direct.c | 344 +++ lib/fb-file.c | 276 +++ lib/fb-grow.c | 139 ++ lib/fb-limfd.c | 75 + lib/fb-mem.c | 221 ++ lib/fb-mmap.c | 228 ++ lib/fb-param.c | 184 ++ lib/fb-pool.c | 81 + lib/fb-temp.c | 86 + lib/ff-binary.c | 34 + lib/ff-binary.h | 81 + lib/ff-printf.c | 83 + lib/ff-string.c | 286 +++ lib/ff-unicode.c | 346 +++ lib/ff-unicode.h | 144 ++ lib/ff-unicode.t | 41 + lib/ff-utf8.h | 15 + lib/gbuf.h | 71 + lib/getopt.c | 57 + lib/getopt.h | 92 + lib/getopt.t | 21 + lib/getopt/Makefile | 5 + lib/getopt/README | 12 + lib/getopt/getopt-sh.c | 4 + lib/getopt/getopt-sh.h | 14 + lib/getopt/getopt.c | 1226 ++++++++++ lib/getopt/getopt.h | 177 ++ lib/getopt/getopt1.c | 192 ++ lib/getopt/getopt_init.c | 75 + lib/getopt/getopt_int.h | 130 ++ lib/hash-test.c | 319 +++ lib/hash-test.t | 13 + lib/hashfunc.c | 213 ++ lib/hashfunc.h | 43 + lib/hashtable.h | 663 ++++++ lib/heap.h | 88 + lib/ipaccess.c | 127 ++ lib/ipaccess.h | 28 + lib/kmp-search.h | 191 ++ lib/kmp-test.c | 206 ++ lib/kmp-test.t | 3 + lib/kmp.h | 463 ++++ lib/lfs-test.c | 63 + lib/lfs.h | 63 + lib/lib.h | 279 +++ lib/libucw.pc | 16 + lib/lists.c | 77 + lib/lists.h | 64 + lib/lizard-safe.c | 102 + lib/lizard-test.c | 123 + lib/lizard.c | 478 ++++ lib/lizard.h | 49 + lib/log-file.c | 108 + lib/log.c | 149 ++ lib/mainloop.c | 545 +++++ lib/mainloop.h | 108 + lib/md5.c | 249 +++ lib/md5.h | 24 + lib/md5hex.c | 35 + lib/mempool-fmt.c | 99 + lib/mempool-str.c | 102 + lib/mempool.c | 491 ++++ lib/mempool.h | 295 +++ lib/mempool.t | 11 + lib/mmap.c | 47 + lib/pagecache.c | 429 ++++ lib/pagecache.h | 42 + lib/partmap.c | 95 + lib/partmap.h | 47 + lib/patimatch.c | 16 + lib/patmatch.c | 15 + lib/patmatch.h | 46 + lib/perl/CGI.pm | 444 ++++ lib/perl/Config.pm | 54 + lib/perl/Configure.pm | 190 ++ lib/perl/Filelock/Filelock.pm | 32 + lib/perl/Filelock/Filelock.xs | 30 + lib/perl/Filelock/MANIFEST | 6 + lib/perl/Filelock/Makefile | 24 + lib/perl/Filelock/Makefile.PL | 9 + lib/perl/Log.pm | 34 + lib/perl/Makefile | 10 + lib/perl/Ulimit/MANIFEST | 6 + lib/perl/Ulimit/Makefile | 24 + lib/perl/Ulimit/Makefile.PL | 9 + lib/perl/Ulimit/Ulimit.pm | 48 + lib/perl/Ulimit/Ulimit.xs | 87 + lib/prefetch.h | 36 + lib/prime.c | 78 + lib/primetable.c | 164 ++ lib/proctitle.c | 82 + lib/profile.c | 129 ++ lib/profile.h | 140 ++ lib/qache.c | 786 +++++++ lib/qache.h | 57 + lib/random.c | 58 + lib/randomkey.c | 25 + lib/realloc.c | 26 + lib/redblack-test.c | 252 +++ lib/redblack.h | 1040 +++++++++ lib/regex.c | 358 +++ lib/regex.t | 65 + lib/regex/Makefile | 7 + lib/regex/README | 9 + lib/regex/regcomp.c | 3544 +++++++++++++++++++++++++++++ lib/regex/regex-sh.h | 24 + lib/regex/regex.c | 10 + lib/regex/regex.h | 574 +++++ lib/regex/regex_internal.c | 1263 +++++++++++ lib/regex/regex_internal.h | 742 ++++++ lib/regex/regexec.c | 3977 +++++++++++++++++++++++++++++++++ lib/runcmd.c | 134 ++ lib/semaphore.h | 60 + lib/shell/Makefile | 12 + lib/shell/config.c | 434 ++++ lib/shell/config.t | 39 + lib/shell/libucw.sh | 42 + lib/shell/logger.c | 42 + lib/sighandler.c | 64 + lib/simple-lists.c | 48 + lib/simple-lists.h | 49 + lib/slists.c | 83 + lib/slists.h | 90 + lib/slists.t | 4 + lib/sorter/Makefile | 13 + lib/sorter/TODO | 15 + lib/sorter/array.c | 475 ++++ lib/sorter/array.h | 321 +++ lib/sorter/common.h | 152 ++ lib/sorter/config.c | 57 + lib/sorter/govern.c | 440 ++++ lib/sorter/s-fixint.h | 126 ++ lib/sorter/s-internal.h | 252 +++ lib/sorter/s-multiway.h | 148 ++ lib/sorter/s-radix.h | 30 + lib/sorter/s-twoway.h | 103 + lib/sorter/sbuck.c | 158 ++ lib/sorter/sort-test.c | 706 ++++++ lib/sorter/sorter.h | 320 +++ lib/stkstring.c | 126 ++ lib/stkstring.h | 38 + lib/stkstring.t | 7 + lib/str-test.c | 132 ++ lib/str_ctype.c | 16 + lib/str_lower.c | 16 + lib/str_upper.c | 16 + lib/string.c | 90 + lib/sync.c | 28 + lib/threads-conf.c | 27 + lib/threads.c | 123 + lib/threads.h | 35 + lib/timer.c | 43 + lib/unaligned.h | 174 ++ lib/unicode.c | 143 ++ lib/unicode.h | 334 +++ lib/unicode.t | 71 + lib/url.c | 767 +++++++ lib/url.h | 90 + lib/wildmatch.c | 237 ++ lib/wildmatch.h | 15 + lib/wordsplit.c | 62 + lib/workqueue.c | 282 +++ lib/workqueue.h | 93 + 226 files changed, 41307 insertions(+) create mode 100644 lib/Makefile create mode 100644 lib/THREADS create mode 100644 lib/adler32.c create mode 100644 lib/alloc.c create mode 100644 lib/alloc_str.c create mode 100644 lib/arraysort.h create mode 100644 lib/asio.c create mode 100644 lib/asio.h create mode 100644 lib/asio.t create mode 100644 lib/asort-test.c create mode 100644 lib/autoconf.cfg create mode 100644 lib/base224.c create mode 100644 lib/base224.h create mode 100644 lib/base64.c create mode 100644 lib/base64.h create mode 100644 lib/bbuf.c create mode 100644 lib/bbuf.h create mode 100644 lib/bbuf.t create mode 100644 lib/bigalloc.c create mode 100644 lib/binheap-node.h create mode 100644 lib/binheap-test.c create mode 100644 lib/binheap.h create mode 100644 lib/binsearch.h create mode 100644 lib/bit-ffs.c create mode 100644 lib/bit-fls.c create mode 100644 lib/bitarray.h create mode 100644 lib/bitops.h create mode 100644 lib/bitops.t create mode 100644 lib/bitsig.c create mode 100644 lib/bitsig.h create mode 100644 lib/carefulio.c create mode 100644 lib/charmap.h create mode 100644 lib/chartype.h create mode 100644 lib/clists.h create mode 100644 lib/conf-alloc.c create mode 100644 lib/conf-dump.c create mode 100644 lib/conf-input.c create mode 100644 lib/conf-internal.h create mode 100644 lib/conf-intr.c create mode 100644 lib/conf-journal.c create mode 100644 lib/conf-parse.c create mode 100644 lib/conf-section.c create mode 100644 lib/conf-test.c create mode 100644 lib/conf-test.cf create mode 100644 lib/conf.h create mode 100644 lib/config.h create mode 100644 lib/ctmatch.c create mode 100644 lib/db-emul.c create mode 100644 lib/db-test.c create mode 100644 lib/db-tool.c create mode 100644 lib/db.c create mode 100644 lib/db.h create mode 100644 lib/db_internal.h create mode 100644 lib/default.cfg create mode 100644 lib/eltpool.c create mode 100644 lib/eltpool.h create mode 100644 lib/eltpool.test create mode 100644 lib/exitstatus.c create mode 100644 lib/fastbuf.c create mode 100644 lib/fastbuf.h create mode 100644 lib/fastbuf.t create mode 100644 lib/fb-atomic.c create mode 100644 lib/fb-buffer.c create mode 100644 lib/fb-direct.c create mode 100644 lib/fb-file.c create mode 100644 lib/fb-grow.c create mode 100644 lib/fb-limfd.c create mode 100644 lib/fb-mem.c create mode 100644 lib/fb-mmap.c create mode 100644 lib/fb-param.c create mode 100644 lib/fb-pool.c create mode 100644 lib/fb-temp.c create mode 100644 lib/ff-binary.c create mode 100644 lib/ff-binary.h create mode 100644 lib/ff-printf.c create mode 100644 lib/ff-string.c create mode 100644 lib/ff-unicode.c create mode 100644 lib/ff-unicode.h create mode 100644 lib/ff-unicode.t create mode 100644 lib/ff-utf8.h create mode 100644 lib/gbuf.h create mode 100644 lib/getopt.c create mode 100644 lib/getopt.h create mode 100644 lib/getopt.t create mode 100644 lib/getopt/Makefile create mode 100644 lib/getopt/README create mode 100644 lib/getopt/getopt-sh.c create mode 100644 lib/getopt/getopt-sh.h create mode 100644 lib/getopt/getopt.c create mode 100644 lib/getopt/getopt.h create mode 100644 lib/getopt/getopt1.c create mode 100644 lib/getopt/getopt_init.c create mode 100644 lib/getopt/getopt_int.h create mode 100644 lib/hash-test.c create mode 100644 lib/hash-test.t create mode 100644 lib/hashfunc.c create mode 100644 lib/hashfunc.h create mode 100644 lib/hashtable.h create mode 100644 lib/heap.h create mode 100644 lib/ipaccess.c create mode 100644 lib/ipaccess.h create mode 100644 lib/kmp-search.h create mode 100644 lib/kmp-test.c create mode 100644 lib/kmp-test.t create mode 100644 lib/kmp.h create mode 100644 lib/lfs-test.c create mode 100644 lib/lfs.h create mode 100644 lib/lib.h create mode 100644 lib/libucw.pc create mode 100644 lib/lists.c create mode 100644 lib/lists.h create mode 100644 lib/lizard-safe.c create mode 100644 lib/lizard-test.c create mode 100644 lib/lizard.c create mode 100644 lib/lizard.h create mode 100644 lib/log-file.c create mode 100644 lib/log.c create mode 100644 lib/mainloop.c create mode 100644 lib/mainloop.h create mode 100644 lib/md5.c create mode 100644 lib/md5.h create mode 100644 lib/md5hex.c create mode 100644 lib/mempool-fmt.c create mode 100644 lib/mempool-str.c create mode 100644 lib/mempool.c create mode 100644 lib/mempool.h create mode 100644 lib/mempool.t create mode 100644 lib/mmap.c create mode 100644 lib/pagecache.c create mode 100644 lib/pagecache.h create mode 100644 lib/partmap.c create mode 100644 lib/partmap.h create mode 100644 lib/patimatch.c create mode 100644 lib/patmatch.c create mode 100644 lib/patmatch.h create mode 100644 lib/perl/CGI.pm create mode 100644 lib/perl/Config.pm create mode 100644 lib/perl/Configure.pm create mode 100644 lib/perl/Filelock/Filelock.pm create mode 100644 lib/perl/Filelock/Filelock.xs create mode 100644 lib/perl/Filelock/MANIFEST create mode 100644 lib/perl/Filelock/Makefile create mode 100644 lib/perl/Filelock/Makefile.PL create mode 100644 lib/perl/Log.pm create mode 100644 lib/perl/Makefile create mode 100644 lib/perl/Ulimit/MANIFEST create mode 100644 lib/perl/Ulimit/Makefile create mode 100644 lib/perl/Ulimit/Makefile.PL create mode 100644 lib/perl/Ulimit/Ulimit.pm create mode 100644 lib/perl/Ulimit/Ulimit.xs create mode 100644 lib/prefetch.h create mode 100644 lib/prime.c create mode 100644 lib/primetable.c create mode 100644 lib/proctitle.c create mode 100644 lib/profile.c create mode 100644 lib/profile.h create mode 100644 lib/qache.c create mode 100644 lib/qache.h create mode 100644 lib/random.c create mode 100644 lib/randomkey.c create mode 100644 lib/realloc.c create mode 100644 lib/redblack-test.c create mode 100644 lib/redblack.h create mode 100644 lib/regex.c create mode 100644 lib/regex.t create mode 100644 lib/regex/Makefile create mode 100644 lib/regex/README create mode 100644 lib/regex/regcomp.c create mode 100644 lib/regex/regex-sh.h create mode 100644 lib/regex/regex.c create mode 100644 lib/regex/regex.h create mode 100644 lib/regex/regex_internal.c create mode 100644 lib/regex/regex_internal.h create mode 100644 lib/regex/regexec.c create mode 100644 lib/runcmd.c create mode 100644 lib/semaphore.h create mode 100644 lib/shell/Makefile create mode 100644 lib/shell/config.c create mode 100644 lib/shell/config.t create mode 100644 lib/shell/libucw.sh create mode 100644 lib/shell/logger.c create mode 100644 lib/sighandler.c create mode 100644 lib/simple-lists.c create mode 100644 lib/simple-lists.h create mode 100644 lib/slists.c create mode 100644 lib/slists.h create mode 100644 lib/slists.t create mode 100644 lib/sorter/Makefile create mode 100644 lib/sorter/TODO create mode 100644 lib/sorter/array.c create mode 100644 lib/sorter/array.h create mode 100644 lib/sorter/common.h create mode 100644 lib/sorter/config.c create mode 100644 lib/sorter/govern.c create mode 100644 lib/sorter/s-fixint.h create mode 100644 lib/sorter/s-internal.h create mode 100644 lib/sorter/s-multiway.h create mode 100644 lib/sorter/s-radix.h create mode 100644 lib/sorter/s-twoway.h create mode 100644 lib/sorter/sbuck.c create mode 100644 lib/sorter/sort-test.c create mode 100644 lib/sorter/sorter.h create mode 100644 lib/stkstring.c create mode 100644 lib/stkstring.h create mode 100644 lib/stkstring.t create mode 100644 lib/str-test.c create mode 100644 lib/str_ctype.c create mode 100644 lib/str_lower.c create mode 100644 lib/str_upper.c create mode 100644 lib/string.c create mode 100644 lib/sync.c create mode 100644 lib/threads-conf.c create mode 100644 lib/threads.c create mode 100644 lib/threads.h create mode 100644 lib/timer.c create mode 100644 lib/unaligned.h create mode 100644 lib/unicode.c create mode 100644 lib/unicode.h create mode 100644 lib/unicode.t create mode 100644 lib/url.c create mode 100644 lib/url.h create mode 100644 lib/wildmatch.c create mode 100644 lib/wildmatch.h create mode 100644 lib/wordsplit.c create mode 100644 lib/workqueue.c create mode 100644 lib/workqueue.h diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 0000000..7e520c2 --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,138 @@ +# Makefile for the UCW Library (c) 1997--2007 Martin Mares + +DIRS+=lib +CONFIGS+=library +LIBUCW=$(o)/lib/libucw.pc + +ifdef CONFIG_UCW_DBTOOL +PROGS+=$(o)/lib/db-tool +endif + +LIBUCW_MODS= \ + threads \ + alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \ + mmap pagecache partmap hashfunc \ + lists slists simple-lists bitsig \ + log log-file proctitle \ + conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \ + ipaccess \ + profile \ + fastbuf ff-binary ff-string ff-printf ff-unicode \ + fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \ + str_ctype str_upper str_lower unicode stkstring \ + wildmatch wordsplit ctmatch patimatch patmatch regex \ + prime primetable random timer randomkey \ + bit-ffs bit-fls \ + db \ + url \ + mainloop exitstatus runcmd sighandler \ + lizard lizard-safe adler32 \ + md5 md5hex \ + base64 base224 \ + sync \ + qache \ + string \ + bbuf \ + getopt + +LIBUCW_INCLUDES= \ + lib.h config.h threads.h \ + mempool.h pagecache.h \ + arraysort.h \ + lists.h clists.h slists.h simple-lists.h \ + unaligned.h prefetch.h \ + bbuf.h gbuf.h bitarray.h bitsig.h \ + hashfunc.h hashtable.h \ + heap.h binheap.h binheap-node.h \ + redblack.h \ + binsearch.h \ + bitops.h \ + conf.h getopt.h ipaccess.h \ + profile.h \ + fastbuf.h lfs.h ff-unicode.h ff-utf8.h ff-binary.h \ + chartype.h unicode.h stkstring.h \ + wildmatch.h patmatch.h \ + db.h \ + url.h \ + mainloop.h \ + lizard.h \ + md5.h \ + base64.h base224.h \ + qache.h \ + kmp.h kmp-search.h binsearch.h \ + partmap.h + +ifdef CONFIG_UCW_THREADS +# Some modules require threading +LIBUCW_MODS+=threads-conf workqueue asio fb-direct +LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h +endif + +ifdef CONFIG_OWN_REGEX +include $(s)/lib/regex/Makefile +endif + +ifdef CONFIG_OWN_GETOPT +include $(s)/lib/getopt/Makefile +endif + +include $(s)/lib/sorter/Makefile + +LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS)) + +$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) +$(o)/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) + +$(o)/lib/hashfunc.o $(o)/lib/hashfunc.oo: CFLAGS += -funroll-loops +$(o)/lib/lizard.o: CFLAGS += $(COPT2) -funroll-loops + +$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW) +$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW) +$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW) +$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW) +$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW) +$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW) +$(o)/lib/asort-test: $(o)/lib/asort-test.o $(LIBUCW) +$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW) +$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW) +$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW) +$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET) +$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW) + +TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ + slists.test kmp-test.test bbuf.test getopt.test fastbuf.test ff-unicode.test eltpool.test) + +$(o)/lib/regex.test: $(o)/lib/regex-t +$(o)/lib/unicode.test: $(o)/lib/unicode-t +$(o)/lib/hash-test.test: $(o)/lib/hash-test +$(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t +$(o)/lib/stkstring.test: $(o)/lib/stkstring-t +$(o)/lib/bitops.test: $(o)/lib/bit-ffs-t $(o)/lib/bit-fls-t +$(o)/lib/slists.test: $(o)/lib/slists-t +$(o)/lib/kmp-test.test: $(o)/lib/kmp-test +$(o)/lib/bbuf.test: $(o)/lib/bbuf-t +$(o)/lib/getopt.test: $(o)/lib/getopt-t +$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t +$(o)/lib/ff-unicode.test: $(o)/lib/ff-unicode-t +$(o)/lib/eltpool.test: $(o)/lib/eltpool-t + +ifdef CONFIG_UCW_THREADS +TESTS+=$(addprefix $(o)/lib/,asio.test) +$(o)/lib/asio.test: $(o)/lib/asio-t +endif + +API_LIBS+=libucw +API_INCLUDES+=$(o)/lib/.include-stamp +$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h + $(Q)$(s)/build/install-includes $(> 16) & 0xffff; + int k; + + if (!buf) return 1L; + + while (len > 0) { + k = len < NMAX ? (int)len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + MOD(s1); + MOD(s2); + } + return (s2 << 16) | s1; +} diff --git a/lib/alloc.c b/lib/alloc.c new file mode 100644 index 0000000..678901a --- /dev/null +++ b/lib/alloc.c @@ -0,0 +1,45 @@ +/* + * UCW Library -- Memory Allocation + * + * (c) 2000 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include + +#ifndef DEBUG_DMALLOC + +void * +xmalloc(uns size) +{ + void *x = malloc(size); + if (!x) + die("Cannot allocate %d bytes of memory", size); + return x; +} + +#endif + +void * +xmalloc_zero(uns size) +{ + void *x = xmalloc(size); + bzero(x, size); + return x; +} + +void +xfree(void *ptr) +{ + /* + * Maybe it is a little waste of resources to make this a function instead + * of a macro, but xmalloc() is not used for anything critical anyway, + * so let's prefer simplicity. + */ + free(ptr); +} diff --git a/lib/alloc_str.c b/lib/alloc_str.c new file mode 100644 index 0000000..05e803c --- /dev/null +++ b/lib/alloc_str.c @@ -0,0 +1,19 @@ +/* + * UCW Library -- String Allocation + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include + +char * +xstrdup(const char *s) +{ + uns l = strlen(s) + 1; + return memcpy(xmalloc(l), s, l); +} diff --git a/lib/arraysort.h b/lib/arraysort.h new file mode 100644 index 0000000..d5a9c78 --- /dev/null +++ b/lib/arraysort.h @@ -0,0 +1,174 @@ +/* + * UCW Library -- Universal Array Sorter + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is not a normal header file, it's a generator of sorting + * routines. Each time you include it with parameters set in the + * corresponding preprocessor macros, it generates an array sorter + * with the parameters given. + * + * You might wonder why the heck do we implement our own array sorter + * instead of using qsort(). The primary reason is that qsort handles + * only continuous arrays, but we need to sort array-like data structures + * where the only way to access elements is by using an indexing macro. + * Besides that, we are more than 2 times faster. + * + * So much for advocacy, there are the parameters (those marked with [*] + * are mandatory): + * + * ASORT_PREFIX(x) [*] add a name prefix (used on all global names + * defined by the sorter) + * ASORT_KEY_TYPE [*] data type of a single array entry key + * ASORT_ELT(i) [*] returns the key of i-th element + * ASORT_LT(x,y) x < y for ASORT_TYPE (default: "x= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD) + { + /* Both partitions ok => push the larger one */ + if ((r - left) > (right - l)) + { + stack[sp].l = left; + stack[sp].r = r; + left = l; + } + else + { + stack[sp].l = l; + stack[sp].r = right; + right = r; + } + sp++; + } + else if ((r - left) >= ASORT_THRESHOLD) + { + /* Left partition OK, right undersize */ + right = r; + } + else if ((right - l) >= ASORT_THRESHOLD) + { + /* Right partition OK, left undersize */ + left = l; + } + else + { + /* Both partitions undersize => pop */ + if (!sp) + break; + sp--; + left = stack[sp].l; + right = stack[sp].r; + } + } + + /* + * We have a partially sorted array, finish by insertsort. Inspired + * by qsort() in GNU libc. + */ + + /* Find minimal element which will serve as a barrier */ + r = MIN(array_size, ASORT_THRESHOLD); + m = 0; + for (l=1; l + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/asio.h" +#include "lib/threads.h" + +#include +#include +#include + +static uns asio_num_users; +static struct worker_pool asio_wpool; + +static void +asio_init_unlocked(void) +{ + if (asio_num_users++) + return; + + DBG("ASIO: INIT"); + asio_wpool.num_threads = 1; + worker_pool_init(&asio_wpool); +} + +static void +asio_cleanup_unlocked(void) +{ + if (--asio_num_users) + return; + + DBG("ASIO: CLEANUP"); + worker_pool_cleanup(&asio_wpool); +} + +void +asio_init_queue(struct asio_queue *q) +{ + ucwlib_lock(); + asio_init_unlocked(); + ucwlib_unlock(); + + DBG("ASIO: New queue %p", q); + ASSERT(q->buffer_size); + q->allocated_requests = 0; + q->running_requests = 0; + q->running_writebacks = 0; + q->use_count = 0; + clist_init(&q->idle_list); + clist_init(&q->done_list); + work_queue_init(&asio_wpool, &q->queue); +} + +void +asio_cleanup_queue(struct asio_queue *q) +{ + DBG("ASIO: Removing queue %p", q); + ASSERT(!q->running_requests); + ASSERT(!q->running_writebacks); + ASSERT(!q->allocated_requests); + ASSERT(clist_empty(&q->done_list)); + + struct asio_request *r; + while (r = clist_remove_head(&q->idle_list)) + { + big_free(r->buffer, q->buffer_size); + xfree(r); + } + + work_queue_cleanup(&q->queue); + + ucwlib_lock(); + asio_cleanup_unlocked(); + ucwlib_unlock(); +} + +struct asio_request * +asio_get(struct asio_queue *q) +{ + q->allocated_requests++; + struct asio_request *r = clist_head(&q->idle_list); + if (!r) + { + r = xmalloc_zero(sizeof(*r)); + r->queue = q; + r->buffer = big_alloc(q->buffer_size); + DBG("ASIO: Got %p (new)", r); + } + else + { + clist_remove(&r->work.n); + DBG("ASIO: Got %p", r); + } + r->op = ASIO_FREE; + r->fd = -1; + r->len = 0; + r->status = -1; + r->returned_errno = -1; + r->submitted = 0; + return r; +} + +static int +asio_raw_wait(struct asio_queue *q) +{ + struct asio_request *r = (struct asio_request *) work_wait(&q->queue); + if (!r) + return 0; + r->submitted = 0; + q->running_requests--; + if (r->op == ASIO_WRITE_BACK) + { + DBG("ASIO: Finished writeback %p", r); + if (r->status < 0) + die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno)); + if (r->status != (int)r->len) + die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len); + q->running_writebacks--; + asio_put(r); + } + else + clist_add_tail(&q->done_list, &r->work.n); + return 1; +} + +static void +asio_handler(struct worker_thread *t UNUSED, struct work *w) +{ + struct asio_request *r = (struct asio_request *) w; + + DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r, + (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len); + errno = 0; + switch (r->op) + { + case ASIO_READ: + r->status = read(r->fd, r->buffer, r->len); + break; + case ASIO_WRITE: + case ASIO_WRITE_BACK: + r->status = write(r->fd, r->buffer, r->len); + break; + default: + die("ASIO: Got unknown request type %d", r->op); + } + r->returned_errno = errno; + DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno); +} + +void +asio_submit(struct asio_request *r) +{ + struct asio_queue *q = r->queue; + DBG("ASIO: Submitting %p on queue %p", r, q); + ASSERT(r->op != ASIO_FREE); + ASSERT(!r->submitted); + if (r->op == ASIO_WRITE_BACK) + { + while (q->running_writebacks >= q->max_writebacks) + { + DBG("ASIO: Waiting for free writebacks"); + if (!asio_raw_wait(q)) + ASSERT(0); + } + q->running_writebacks++; + } + q->running_requests++; + r->submitted = 1; + r->work.go = asio_handler; + r->work.priority = 0; + work_submit(&q->queue, &r->work); +} + +struct asio_request * +asio_wait(struct asio_queue *q) +{ + struct asio_request *r; + while (!(r = clist_head(&q->done_list))) + { + DBG("ASIO: Waiting on queue %p", q); + if (!asio_raw_wait(q)) + return NULL; + } + clist_remove(&r->work.n); + DBG("ASIO: Done %p", r); + return r; +} + +void +asio_put(struct asio_request *r) +{ + struct asio_queue *q = r->queue; + DBG("ASIO: Put %p", r); + ASSERT(!r->submitted); + ASSERT(q->allocated_requests); + clist_add_tail(&q->idle_list, &r->work.n); + q->allocated_requests--; +} + +void +asio_sync(struct asio_queue *q) +{ + DBG("ASIO: Syncing queue %p", q); + while (q->running_requests) + if (!asio_raw_wait(q)) + ASSERT(0); +} + +#ifdef TEST + +int main(void) +{ + struct asio_queue q; + struct asio_request *r; + + q.buffer_size = 4096; + q.max_writebacks = 2; + asio_init_queue(&q); + +#if 0 + + for (;;) + { + r = asio_get(&q); + r->op = ASIO_READ; + r->fd = 0; + r->len = q.buffer_size; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + if (r->status <= 0) + { + asio_put(r); + break; + } + r->op = ASIO_WRITE_BACK; + r->fd = 1; + r->len = r->status; + asio_submit(r); + } + asio_sync(&q); + +#else + + r = asio_get(&q); + r->op = ASIO_READ; + r->fd = 0; + r->len = 1; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + asio_put(r); + + for (uns i=0; i<10; i++) + { + r = asio_get(&q); + r->op = ASIO_WRITE_BACK; + r->fd = 1; + r->len = 1; + r->buffer[0] = 'A' + i; + asio_submit(r); + } + asio_sync(&q); + + r = asio_get(&q); + r->op = ASIO_WRITE; + r->fd = 1; + r->len = 1; + r->buffer[0] = '\n'; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + asio_put(r); + +#endif + + asio_cleanup_queue(&q); + return 0; +} + +#endif diff --git a/lib/asio.h b/lib/asio.h new file mode 100644 index 0000000..6773c81 --- /dev/null +++ b/lib/asio.h @@ -0,0 +1,70 @@ +/* + * UCW Library -- Asynchronous I/O + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_ASIO_H +#define _UCW_ASIO_H + +#include "lib/workqueue.h" +#include "lib/clists.h" + +/* + * This module takes care of scheduling and executing asynchronous I/O requests + * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf + * back-end, but you can use it explicitly, too. + * + * You can define several I/O queues, each for use by a single thread. Requests + * on a single queue are always processed in order of their submits, requests + * from different queues may be interleaved (although the current implementation + * does not do so). Normal read and write requests are returned to their queue + * when they are completed. Write-back requests are automatically freed when + * done, but the number of such requests in fly is limited in order to avoid + * consuming all memory, so a submit of a write-back request can block. + */ + +struct asio_queue { + uns buffer_size; // How large buffers do we use [user-settable] + uns max_writebacks; // Maximum number of writeback requests active [user-settable] + uns allocated_requests; + uns running_requests; // Total number of running requests + uns running_writebacks; // How many of them are writebacks + clist idle_list; // Recycled requests waiting for get + clist done_list; // Finished requests + struct work_queue queue; + uns use_count; // For use by the caller +}; + +enum asio_op { + ASIO_FREE, + ASIO_READ, + ASIO_WRITE, + ASIO_WRITE_BACK, // Background write with no success notification +}; + +struct asio_request { + struct work work; // asio_requests are internally just work nodes + struct asio_queue *queue; + byte *buffer; + int fd; + enum asio_op op; + uns len; + int status; + int returned_errno; + int submitted; + void *user_data; // For use by the caller +}; + +void asio_init_queue(struct asio_queue *q); // Initialize a new queue +void asio_cleanup_queue(struct asio_queue *q); +struct asio_request *asio_get(struct asio_queue *q); // Get an empty request +void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks) +struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more +void asio_put(struct asio_request *r); // Return a finished request for recycling +void asio_sync(struct asio_queue *q); // Wait until all requests are finished + +#endif /* !_UCW_ASIO_H */ diff --git a/lib/asio.t b/lib/asio.t new file mode 100644 index 0000000..b660657 --- /dev/null +++ b/lib/asio.t @@ -0,0 +1,4 @@ +# Tests for asynchronous I/O + +Run: echo y | ../obj/lib/asio-t +Out: ABCDEFGHIJ diff --git a/lib/asort-test.c b/lib/asort-test.c new file mode 100644 index 0000000..9c6abd4 --- /dev/null +++ b/lib/asort-test.c @@ -0,0 +1,77 @@ +/* + * UCW Library -- Universal Array Sorter Test and Benchmark + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include + +#define N 4000037 /* a prime */ + +struct elt { + u32 key; + u32 x, y; +}; + +static struct elt array[N]; + +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) array[i].key +#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0) + +static void generate(void) +{ + uns i; + for (i=0; ikey < Y->key) + return -1; + else if (X->key > Y->key) + return 1; + else + return 0; +} + +#define ASORT_PREFIX(x) as_##x +#include "lib/arraysort.h" + +int main(void) +{ + timestamp_t timer; + + generate(); + init_timer(&timer); + qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp); + printf("qsort: %d ms\n", get_timer(&timer)); + check(); + generate(); + init_timer(&timer); + as_sort(N); + printf("asort: %d ms\n", get_timer(&timer)); + check(); + return 0; +} diff --git a/lib/autoconf.cfg b/lib/autoconf.cfg new file mode 100644 index 0000000..b0e3c53 --- /dev/null +++ b/lib/autoconf.cfg @@ -0,0 +1,277 @@ +# Automatic configuration of the UCW Library +# (c) 2005--2007 Martin Mares +# (c) 2006 Robert Spalek + +### OS ### + +Test("OS", "Checking on which OS we run", sub { + my $os = `uname`; + chomp $os; + Fail "Unable to determine OS type" if $? || $os eq ""; + return $os; +}); + +if (Get("OS") eq "Linux") { + Set("CONFIG_LINUX"); +} elsif (Get("OS") eq "Darwin") { + Set("CONFIG_DARWIN"); +} else { + Fail "Don't know how to run on this operating system."; +} + +### Compiler ### + +# Default compiler +Test("CC", "Checking for C compiler", sub { return "gcc"; }); + +# GCC version +Test("GCCVER", "Checking for GCC version", sub { + my $gcc = Get("CC"); + my $ver = `$gcc --version | sed '2,\$d; s/^\\(.* \\)*\\([0-9]*\\.[0-9]*\\).*/\\2/'`; + chomp $ver; + Fail "Unable to determine GCC version" if $? || $ver eq ""; + return $ver; +}); +my ($gccmaj, $gccmin) = split(/\./, Get("GCCVER")); +my $gccver = 1000*$gccmaj + $gccmin; +$gccver >= 3000 or Fail "GCC older than 3.0 doesn't support C99 well enough."; + +### CPU ### + +Test("ARCH", "Checking for machine architecture", sub { + my $mach = `uname -m`; + chomp $mach; + Fail "Unable to determine machine type" if $? || $mach eq ""; + if ($mach =~ /^i[0-9]86$/) { + return "i386"; + } elsif ($mach =~ /^(x86[_-]|amd)64$/) { + return "amd64"; + } else { + return "unknown"; + } +}); + +sub parse_cpuinfo_linux() { + open X, "/proc/cpuinfo" || undef; + my %pc = (); + while () { + chomp; + /^$/ && last; + /^([^\t]+)\t+:\s*(.*)$/ and $pc{$1}=$2; + } + close X; + return ($pc{'vendor_id'}, + $pc{'cpu family'}, + $pc{'model'}); +} + +sub parse_cpuinfo_darwin() { + @cpu = (`sysctl -n machdep.cpu.vendor`, + `sysctl -n machdep.cpu.family`, + `sysctl -n machdep.cpu.model`); + chomp @cpu; + return @cpu; +} + +sub parse_cpuinfo() { + my @cpu; + if (IsSet("CONFIG_LINUX")) { + @cpu = parse_cpuinfo_linux(); + } elsif (IsSet("CONFIG_DARWIN")) { + @cpu = parse_cpuinfo_darwin(); + } + $cpu[0] = "" if !defined $cpu[0]; + $cpu[1] = 0 if !defined $cpu[1]; + $cpu[2] = 0 if !defined $cpu[2]; + return @cpu; +} + +Test("CPU_ARCH", "Checking for CPU architecture", sub { + my $mach = Get("ARCH"); + my $arch = ""; + if ($mach eq "i386") { + Set("CPU_I386"); + UnSet("CPU_64BIT_POINTERS"); + Set("CPU_LITTLE_ENDIAN"); + UnSet("CPU_BIG_ENDIAN"); + Set("CPU_ALLOW_UNALIGNED"); + Set("CPU_STRUCT_ALIGN" => 4); + if (IsSet("CONFIG_EXACT_CPU")) { + my ($vendor, $family, $model) = parse_cpuinfo(); + # Try to understand CPU vendor, family and model [inspired by MPlayer's configure script] + if ($vendor eq "AuthenticAMD") { + if ($family >= 6) { + if ($model >= 31 && $gccver >= 3004) { $arch = "athlon64"; } + elsif ($model >= 6 && $gccver >= 3003) { $arch = "athlon-xp"; } + else { $arch = "athlon"; } + } + } elsif ($vendor eq "GenuineIntel") { + if ($family >= 15 && $gccver >= 3003) { + if ($model >= 4) { $arch = "nocona"; } + elsif ($model >= 3) { $arch = "prescott"; } + else { $arch = "pentium4"; } + } elsif ($family == 6 && $gccver >= 3003) { + if ($model == 15) { $arch = "prescott"; } + elsif (($model == 9 || $model == 13) && $gccver >= 3004) { $arch = "pentium-m"; } + elsif ($model >= 7) { $arch = "pentium3"; } + elsif ($model >= 3) { $arch = "pentium2"; } + } + } + + # No match on vendor, try the family + if ($arch eq "") { + if ($family >= 6) { + $arch = "i686"; + } elsif ($family >= 3) { + $arch = "i${family}86"; + } + } + Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) "); + return $arch; + } else { + return "default"; + } + } elsif ($mach eq "amd64") { + Set("CPU_AMD64"); + Set("CPU_64BIT_POINTERS"); + Set("CPU_LITTLE_ENDIAN"); + UnSet("CPU_BIG_ENDIAN"); + Set("CPU_ALLOW_UNALIGNED"); + Set("CPU_STRUCT_ALIGN" => 8); + if (IsSet("CONFIG_EXACT_CPU")) { + # In x86-64 world, the detection is somewhat easier so far... + my ($vendor, $family, $model) = parse_cpuinfo(); + if ($vendor eq "AuthenticAMD") { + $arch = "athlon64"; + } elsif ($vendor eq "GenuineIntel") { + $arch = "nocona"; + } + Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) "); + return $arch; + } else { + return "default"; + } + } else { + return "unknown"; + } +}); + +if (Get("CPU_ARCH") eq "unknown") { + Warn "CPU architecture not recognized, using defaults, keep fingers crossed.\n"; +} + +### Compiler and its Options ### + +# C flags: tell the compiler we're speaking C99, and disable common symbols +Set("CLANG" => "-std=gnu99 -fno-common"); + +# C optimizations +Set("COPT" => '-O2'); +if (Get("CPU_ARCH") ne "unknown" && Get("CPU_ARCH") ne "default") { + Append("COPT", '-march=$(CPU_ARCH)'); +} + +# C optimizations for highly exposed code +Set("COPT2" => '-O3'); + +# Warnings +Set("CWARNS" => '-Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline'); +Set("CWARNS_OFF" => ''); + +# Linker flags +Set("LOPT" => ""); + +# Extra libraries +Set("LIBS" => ""); + +# Extra flags for compiling and linking shared libraries +Set("CSHARED" => '-fPIC'); +if (IsSet("CONFIG_DARWIN")) { + Set("LSHARED" => '-dynamiclib -install_name lib/$(@F) -undefined dynamic_lookup'); +} else { + Set("LSHARED" => '-shared -Wl,-soname,lib/$(@F)'); +} + +# Extra switches depending on GCC version: +if ($gccver == 3000) { + Append("COPT" => "-fstrict-aliasing"); +} elsif ($gccver == 3003) { + Append("CWARNS" => "-Wundef -Wredundant-decls"); + Append("COPT" => "-finline-limit=20000 --param max-inline-insns-auto=1000"); +} elsif ($gccver == 3004) { + Append("CWARNS" => "-Wundef -Wredundant-decls"); + Append("COPT" => "-finline-limit=2000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400"); +} elsif ($gccver == 4000 || $gccver == 4001) { + Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers"); + Append("CWARNS_OFF" => "-Wno-pointer-sign"); + Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400"); +} elsif ($gccver == 4002) { + Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers"); + Append("CWARNS_OFF" => "-Wno-pointer-sign"); + Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400 -fgnu89-inline"); +} else { + Warn "Don't know anything about this GCC version, using default switches.\n"; +} + +if (IsSet("CONFIG_DEBUG")) { + # If debugging: + Set("DEBUG_ASSERTS"); + Set("DEBUG_DIE_BY_ABORT") if Get("CONFIG_DEBUG") > 1; + Set("CDEBUG" => "-ggdb"); +} else { + # If building a release version: + Append("COPT" => "-fomit-frame-pointer"); + Append("LOPT" => "-s"); +} + +if (IsSet("CONFIG_DARWIN")) { + # gcc-4.0 on Darwin doesn't set this in the gnu99 mode + Append("CLANG" => "-fnested-functions"); + # Directory hierarchy of the fink project + Append("LIBS" => "-L/sw/lib"); + Append("COPT" => "-I/sw/include"); + # Fill in some constants not found in the system header files + Set("SOL_TCP" => 6); # missing in /usr/include/netinet/tcp.h +} + +# Determine page size +Test("CPU_PAGE_SIZE", "Determining page size", sub { + my $p; + if (IsSet("CONFIG_DARWIN")) { + $p = `sysctl -n hw.pagesize`; + defined $p or Fail "sysctl hw.pagesize failed"; + } elsif (IsSet("CONFIG_LINUX")) { + $p = `getconf PAGE_SIZE`; + defined $p or Fail "getconf PAGE_SIZE failed"; + } + chomp $p; + return $p; +}); + +if (IsSet("CONFIG_LARGE_FILES") && IsSet("CONFIG_LINUX")) { + # Use 64-bit versions of file functions + Set("CONFIG_LFS"); +} + +# Decide how will lib/partmap.c work +Set("PARTMAP_IS_MMAP") if IsSet("CPU_64BIT_POINTERS"); + +# Option for lib/mempool.c +Set("POOL_IS_MMAP"); + +# Guess optimal bit width of the radix-sorter +if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) { + # This should be safe everywhere + Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10); +} else { + # Use this on modern CPU's + Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12); +} + +# If debugging memory allocations: +#LIBS+=-lefence +#CDEBUG+=-DDEBUG_DMALLOC +#LIBS+=-ldmalloc + +# Return success +1; diff --git a/lib/base224.c b/lib/base224.c new file mode 100644 index 0000000..3d62471 --- /dev/null +++ b/lib/base224.c @@ -0,0 +1,213 @@ +/* + * UCW Library -- Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * The `base-224' encoding transforms general sequences of bytes + * to sequences of non-control 8-bit characters (0x20-0xff). Since + * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l) + * and we want to avoid lengthy calculations, we cheat a bit: + * + * Each base-224 digit can be represented as a (base-7 digit, base-32 digit) + * pair, so we pass the lower 5 bits directly and use a base-7 encoder + * for the upper part. We process blocks of 39 bits and encode them + * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert + * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get + * the 7 upper parts we need (with a little redundancy). Little endian + * ordering is used to make handling of partial blocks easy. + * + * We transform 39 source bits to 40 destination bits, stretching the data + * by 1/39 = approx. 2.56%. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/base224.h" + +static void +encode_block(byte *w, u32 hi, u32 lo) +{ + uns x, y; + + /* + * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7. + * +----------------+----------------+----------------+----------------+----------------+ + * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0| + * +----------------+----------------+----------------+----------------+----------------+ + */ + + w[0] = lo & 0x1f; + w[1] = (lo >> 7) & 0x1f; + w[2] = (lo >> 15) & 0x1f; + w[3] = (lo >> 23) & 0x1f; + w[4] = (lo >> 31) | ((hi << 1) & 0x1e); + x = (lo >> 5) & 0x0003 + | (lo >> 10) & 0x001c + | (lo >> 15) & 0x00e0 + | (lo >> 20) & 0x0700 + | (hi << 7) & 0x3800; + DBG("<<< h=%08x l=%08x x=%d", hi, lo, x); + for (y=0; y<5; y++) + { + w[y] += 0x20 + ((x % 7) << 5); + x /= 7; + } +} + +uns +base224_encode(byte *dest, const byte *src, uns len) +{ + u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */ + uns i=0; /* How many source bits do we have buffered */ + u32 x; + byte *w=dest; + + while (len--) + { + x = *src++; + if (i < 32) + { + lo |= x << i; + if (i > 24) + hi |= x >> (32-i); + } + else + hi |= x << (i-32); + i += 8; + if (i >= 39) + { + encode_block(w, hi, lo); + w += 5; + lo = hi >> 7; + hi = 0; + i -= 39; + } + } + if (i) /* Partial block */ + { + encode_block(w, hi, lo); + w += (i+8)/8; /* Just check logarithms if you want to understand */ + } + return w - dest; +} + +uns +base224_decode(byte *dest, const byte *src, uns len) +{ + u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */ + uns i=0; /* How many bits do we have accumulated */ + u32 h, l; /* Decoding of the current block */ + uns x; /* base-7 part of the current block */ + uns len0; + byte *start = dest; + + do + { + if (!len) + break; + len0 = len; + + ASSERT(*src >= 0x20); /* byte 0 */ + h = 0; + l = *src & 0x1f; + x = (*src++ >> 5) - 1; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 1 */ + l |= (*src & 0x1f) << 7; + x += ((*src++ >> 5) - 1) * 7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 2 */ + l |= (*src & 0x1f) << 15; + x += ((*src++ >> 5) - 1) * 7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 3 */ + l |= (*src & 0x1f) << 23; + x += ((*src++ >> 5) - 1) * 7*7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 4 */ + l |= *src << 31; + h = (*src & 0x1f) >> 1; + x += ((*src++ >> 5) - 1) * 7*7*7*7; + --len; + + blockend: + len0 -= len; + l |= ((x & 0x0003) << 5) /* Decode base-7 */ + | ((x & 0x001c) << 10) + | ((x & 0x00e0) << 15) + | ((x & 0x0700) << 20); + h |= (x & 0x3800) >> 7; + + DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0); + lo |= l << i; + hi |= h << i; + if (i) + hi |= l >> (32-i); + i += len0*8 - 1; + + while (i >= 8) + { + *dest++ = lo; + lo = (lo >> 8U) | (hi << 24); + hi >>= 8; + i -= 8; + } + } + while (len0 == 5); + return dest-start; +} + +#ifdef TEST + +#include + +int main(int argc, char **argv) +{ +#if 0 + byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 }; + byte o[256], w[256]; + uns l; + l = base224_encode(o, i, sizeof(i)); + fwrite(o, 1, l, stdout); + fputc(0xaa, stdout); + l = base224_decode(w, o, l); + fwrite(w, 1, l, stdout); +#else + if (argc > 1) + { + byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17]; + uns l; + while (l = fread(i, 1, sizeof(i), stdin)) + { + l = base224_decode(o, i, l); + fwrite(o, 1, l, stdout); + } + } + else + { + byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23]; + uns l; + while (l = fread(i, 1, sizeof(i), stdin)) + { + l = base224_encode(o, i, l); + fwrite(o, 1, l, stdout); + } + } +#endif + + return 0; +} + +#endif diff --git a/lib/base224.h b/lib/base224.h new file mode 100644 index 0000000..7e815d8 --- /dev/null +++ b/lib/base224.h @@ -0,0 +1,25 @@ +/* + * UCW Library -- Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +uns base224_encode(byte *dest, const byte *src, uns len); +uns base224_decode(byte *dest, const byte *src, uns len); + +/* + * Warning: when encoding, at least 4 bytes of extra space are needed. + * Better use this macro to calculate buffer size. + */ +#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5) + +/* + * When called for BASE224_IN_CHUNK-byte chunks, the result will be + * always BASE224_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE224_IN_CHUNK 39 +#define BASE224_OUT_CHUNK 40 diff --git a/lib/base64.c b/lib/base64.c new file mode 100644 index 0000000..ef8faa4 --- /dev/null +++ b/lib/base64.c @@ -0,0 +1,120 @@ +/* + * UCW Library -- Base 64 Encoding & Decoding + * + * (c) 2002, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/base64.h" + +#include + +static const byte base64_table[] = + { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' + }; +static const byte base64_pad = '='; + +uns +base64_encode(byte *dest, const byte *src, uns len) +{ + const byte *current = src; + uns i = 0; + + while (len > 2) { /* keep going until we have less than 24 bits */ + dest[i++] = base64_table[current[0] >> 2]; + dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; + dest[i++] = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)]; + dest[i++] = base64_table[current[2] & 0x3f]; + + current += 3; + len -= 3; /* we just handle 3 octets of data */ + } + + /* now deal with the tail end of things */ + if (len != 0) { + dest[i++] = base64_table[current[0] >> 2]; + if (len > 1) { + dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; + dest[i++] = base64_table[(current[1] & 0x0f) << 2]; + dest[i++] = base64_pad; + } + else { + dest[i++] = base64_table[(current[0] & 0x03) << 4]; + dest[i++] = base64_pad; + dest[i++] = base64_pad; + } + } + return i; +} + +/* as above, but backwards. :) */ +uns +base64_decode(byte *dest, const byte *src, uns len) +{ + const byte *current = src; + uns ch; + uns i = 0, j = 0; + static byte reverse_table[256]; + static uns table_built = 0; + + if (table_built == 0) { + byte *chp; + table_built = 1; + for(ch = 0; ch < 256; ch++) { + chp = strchr(base64_table, ch); + if(chp) { + reverse_table[ch] = chp - base64_table; + } else { + reverse_table[ch] = 0xff; + } + } + } + + /* run through the whole string, converting as we go */ + ch = 0; + while (len > 0) { + len--; + ch = *current++; + if (ch == base64_pad) break; + + /* When Base64 gets POSTed, all pluses are interpreted as spaces. + This line changes them back. It's not exactly the Base64 spec, + but it is completely compatible with it (the spec says that + spaces are invalid). This will also save many people considerable + headache. - Turadg Aleahmad + */ + + if (ch == ' ') ch = '+'; + + ch = reverse_table[ch]; + if (ch == 0xff) continue; + + switch(i % 4) { + case 0: + dest[j] = ch << 2; + break; + case 1: + dest[j++] |= ch >> 4; + dest[j] = (ch & 0x0f) << 4; + break; + case 2: + dest[j++] |= ch >>2; + dest[j] = (ch & 0x03) << 6; + break; + case 3: + dest[j++] |= ch; + break; + } + i++; + } + return j; +} diff --git a/lib/base64.h b/lib/base64.h new file mode 100644 index 0000000..7890966 --- /dev/null +++ b/lib/base64.h @@ -0,0 +1,25 @@ +/* + * UCW Library -- Base 64 Encoding & Decoding + * + * (c) 2002, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +uns base64_encode(byte *dest, const byte *src, uns len); +uns base64_decode(byte *dest, const byte *src, uns len); + +/* + * Use this macro to calculate buffer size. + */ +#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4) + +/* + * When called for BASE64_IN_CHUNK-byte chunks, the result will be + * always BASE64_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE64_IN_CHUNK 3 +#define BASE64_OUT_CHUNK 4 + diff --git a/lib/bbuf.c b/lib/bbuf.c new file mode 100644 index 0000000..9d4af26 --- /dev/null +++ b/lib/bbuf.c @@ -0,0 +1,86 @@ +/* + * UCW Library -- A simple growing buffers for byte-sized items + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/bbuf.h" + +#include + +char * +bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args) +{ + bb_grow(bb, ofs + 1); + va_list args2; + va_copy(args2, args); + int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + if (cnt < 0) + { + /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */ + do + { + bb_do_grow(bb, bb->len + 1); + va_copy(args2, args); + cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + } + while (cnt < 0); + } + else if ((uns)cnt >= bb->len - ofs) + { + bb_do_grow(bb, ofs + cnt + 1); + va_copy(args2, args); + int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + ASSERT(cnt2 == cnt); + } + return bb->ptr + ofs; +} + +char * +bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = bb_vprintf_at(bb, ofs, fmt, args); + va_end(args); + return res; +} + +char * +bb_vprintf(bb_t *bb, const char *fmt, va_list args) +{ + return bb_vprintf_at(bb, 0, fmt, args); +} + +char * +bb_printf(bb_t *bb, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = bb_vprintf_at(bb, 0, fmt, args); + va_end(args); + return res; +} + +#ifdef TEST + +int main(void) +{ + bb_t bb; + bb_init(&bb); + char *x = bb_printf(&bb, "", "World"); + fputs(x, stdout); + x = bb_printf_at(&bb, 5, "\n", "World"); + fputs(x, stdout); + bb_done(&bb); + return 0; +} + +#endif diff --git a/lib/bbuf.h b/lib/bbuf.h new file mode 100644 index 0000000..22e62bb --- /dev/null +++ b/lib/bbuf.h @@ -0,0 +1,22 @@ +/* + * UCW Library -- A simple growing buffer for byte-sized items. + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BBUF_H +#define _UCW_BBUF_H + +#define GBUF_TYPE byte +#define GBUF_PREFIX(x) bb_##x +#include "lib/gbuf.h" + +char *bb_vprintf(bb_t *bb, const char *fmt, va_list args); +char *bb_printf(bb_t *bb, const char *fmt, ...); +char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args); +char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...); + +#endif diff --git a/lib/bbuf.t b/lib/bbuf.t new file mode 100644 index 0000000..ebf9ecc --- /dev/null +++ b/lib/bbuf.t @@ -0,0 +1,4 @@ +# Tests for growing buffers + +Run: ../obj/lib/bbuf-t +Out: diff --git a/lib/bigalloc.c b/lib/bigalloc.c new file mode 100644 index 0000000..9581188 --- /dev/null +++ b/lib/bigalloc.c @@ -0,0 +1,111 @@ +/* + * UCW Library -- Allocation of Large Aligned Buffers + * + * (c) 2006--2007 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include +#include + +void * +page_alloc(u64 len) +{ + if (len > SIZE_MAX) + die("page_alloc: Size %llu is too large for the current architecture", (long long) len); + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == (byte*) MAP_FAILED) + die("Cannot mmap %llu bytes of memory: %m", (long long)len); + return p; +} + +void * +page_alloc_zero(u64 len) +{ + void *p = page_alloc(len); + bzero(p, len); + return p; +} + +void +page_free(void *start, u64 len) +{ + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1))); + munmap(start, len); +} + +void * +page_realloc(void *start, u64 old_len, u64 new_len) +{ + void *p = page_alloc(new_len); + memcpy(p, start, MIN(old_len, new_len)); + page_free(start, old_len); + return p; +} + +static u64 +big_round(u64 len) +{ + return ALIGN_TO(len, (u64)CPU_PAGE_SIZE); +} + +void * +big_alloc(u64 len) +{ + u64 l = big_round(len); + if (l > SIZE_MAX - 2*CPU_PAGE_SIZE) + die("big_alloc: Size %llu is too large for the current architecture", (long long) len); +#ifdef CONFIG_DEBUG + l += 2*CPU_PAGE_SIZE; +#endif + byte *p = page_alloc(l); +#ifdef CONFIG_DEBUG + *(u64*)p = len; + mprotect(p, CPU_PAGE_SIZE, PROT_NONE); + mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE); + p += CPU_PAGE_SIZE; +#endif + return p; +} + +void * +big_alloc_zero(u64 len) +{ + void *p = big_alloc(len); + bzero(p, big_round(len)); + return p; +} + +void +big_free(void *start, u64 len) +{ + byte *p = start; + u64 l = big_round(len); +#ifdef CONFIG_DEBUG + p -= CPU_PAGE_SIZE; + mprotect(p, CPU_PAGE_SIZE, PROT_READ); + ASSERT(*(u64*)p == len); + l += 2*CPU_PAGE_SIZE; +#endif + page_free(p, l); +} + +#ifdef TEST + +int main(void) +{ + byte *p = big_alloc(123456); + // p[-1] = 1; + big_free(p, 123456); + return 0; +} + +#endif diff --git a/lib/binheap-node.h b/lib/binheap-node.h new file mode 100644 index 0000000..44be9f4 --- /dev/null +++ b/lib/binheap-node.h @@ -0,0 +1,19 @@ +/* + * UCW Library -- Binomial Heaps: Declarations + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +struct bh_node { + struct bh_node *first_son; + struct bh_node *last_son; + struct bh_node *next_sibling; + byte order; +}; + +struct bh_heap { + struct bh_node root; +}; diff --git a/lib/binheap-test.c b/lib/binheap-test.c new file mode 100644 index 0000000..bfd28a0 --- /dev/null +++ b/lib/binheap-test.c @@ -0,0 +1,94 @@ +/* + * UCW Library -- Binomial Heaps: Testing + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include + +#define BH_PREFIX(x) bht_##x +#define BH_WANT_INSERT +#define BH_WANT_FINDMIN +#define BH_WANT_DELETEMIN +#include "lib/binheap-node.h" + +struct item { + struct bh_node n; + uns key; +}; + +static inline uns bht_key(struct bh_node *n) +{ + return ((struct item *)n)->key; +} + +static inline uns bht_less(struct bh_node *a, struct bh_node *b) +{ + return bht_key(a) < bht_key(b); +} + +static void +bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uns offset) +{ + if (!a) + return; + printf("%*s", offset, ""); + printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : ""); + for (struct bh_node *b=a->first_son; b; b=b->next_sibling) + bht_do_dump(b, a->last_son, offset+1); +} + +static void +bht_dump(struct bh_heap *h) +{ + printf("root\n"); + for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling) + bht_do_dump(b, b->last_son, 1); +} + +#include "lib/binheap.h" + +int main(void) +{ + uns i; + struct bh_heap h; +#define N 1048576 +#define K(i) ((259309*i+1009)%N) + + bht_init(&h); + + for (i=0; ikey = K(i); + // printf("Insert %d\n", a->key); + bht_insert(&h, &a->n); + // bht_dump(&h); + } + // bht_dump(&h); + ASSERT(bht_key(bht_findmin(&h)) == 0); + uns cnt = 0; + BH_FOR_ALL(bht_, &h, a) + { + cnt++; + } + BH_END_FOR; + printf("cnt=%d\n", cnt); + ASSERT(cnt == N); + for (i=0; ikey); + ASSERT(a->key == i); + // bht_dump(&h); + } + bht_dump(&h); + + return 0; +} diff --git a/lib/binheap.h b/lib/binheap.h new file mode 100644 index 0000000..0c6ee5a --- /dev/null +++ b/lib/binheap.h @@ -0,0 +1,203 @@ +/* + * UCW Library -- Binomial Heaps + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is a generic implementation of Binomial Heaps. Each time you include + * this file with parameters set in the corresponding preprocessor macros + * as described below, it generates functions for manipulating the particular + * version of the binomial heap. + * + * You need to specify: + * + * BH_PREFIX(x) macro to add a name prefix (used on all global names + * defined by the hash table generator). All further + * names mentioned here except for macro names will be + * implicitly prefixed. + * + * Then you continue by including "lib/binheap-node.h" which defines struct bh_node + * and struct bh_root (both without prefix). The heap elements are always allocated by + * you and they must include struct bh_node which serves as a handle used for all + * the heap functions and it contains all information needed for heap-keeping. + * The heap itself is also allocated by you and it's represented by struct bh_heap. + * + * When you have the declaration of heap nodes, you continue with defining: + * + * less(p,q) returns 1 if the key corresponding to bh_node *p + * is less than the one corresponding to *q. + * + * Then specify what operations you request: + * + * init(heap*) -- initialize the heap. + * BH_WANT_INSERT insert(heap*, node*) -- insert the node to the heap. + * BH_WANT_FINDMIN node *findmin(heap*) -- find node with minimum key. + * BH_WANT_DELETEMIN node *deletemin(heap*) -- findmin and delete the node. + * + * Then include "lib/binheap.h" and voila, you have a binomial heap + * suiting all your needs (at least those which you've revealed :) ). + * + * You also get a iterator macro at no extra charge: + * + * BH_FOR_ALL(bh_prefix, hash*, variable) + * { + * // node *variable gets declared automatically + * do_something_with_node(variable); + * // use BH_BREAK and BH_CONTINUE instead of break and continue + * // you must not alter contents of the hash table here + * } + * BH_END_FOR; + * + * After including this file, all parameter macros are automatically + * undef'd. + */ + +#define BH_NODE struct bh_node +#define BH_HEAP struct bh_heap + +static void +BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b) +{ + BH_NODE **pp = &a->first_son; + BH_NODE *q = b->first_son; + BH_NODE *p, *r, *s; + + while ((p = *pp) && q) + { + /* p,q are the next nodes of a,b; pp points to where p is linked */ + if (p->order < q->order) /* p is smaller => skip it */ + pp = &p->next_sibling; + else if (p->order > q->order) /* q is smaller => insert it before p */ + { + r = q; + q = q->next_sibling; + r->next_sibling = p; + *pp = r; + pp = &r->next_sibling; + } + else /* p and q are of the same order => need to merge them */ + { + if (BH_PREFIX(less)(p, q)) /* we'll hang r below s */ + { + r = q; + s = p; + } + else + { + r = p; + s = q; + } + *pp = p->next_sibling; /* unlink p,q from their lists */ + q = q->next_sibling; + + if (s->last_son) /* merge r to s, increasing order */ + s->last_son->next_sibling = r; + else + s->first_son = r; + s->last_son = r; + s->order++; + r->next_sibling = NULL; + + if (!q || q->order > s->order) /* put the result into the b's list if possible */ + { + s->next_sibling = q; + q = s; + } + else /* otherwise put the result to the a's list */ + { + p = s->next_sibling = *pp; + *pp = s; + if (p && p->order == s->order) /* 3-collision */ + pp = &s->next_sibling; + } + } + } + if (!p) + *pp = q; +} + +#ifdef BH_WANT_INSERT +static void +BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a) +{ + BH_NODE sh; + + sh.first_son = a; + a->first_son = a->last_son = a->next_sibling = NULL; + BH_PREFIX(merge)(&heap->root, &sh); +} +#endif + +#ifdef BH_WANT_FINDMIN +static BH_NODE * +BH_PREFIX(findmin)(BH_HEAP *heap) +{ + BH_NODE *p, *best; + + best = NULL; + for (p=heap->root.first_son; p; p=p->next_sibling) + if (!best || BH_PREFIX(less)(p, best)) + best = p; + return best; +} +#endif + +#ifdef BH_WANT_DELETEMIN +static BH_NODE * +BH_PREFIX(deletemin)(BH_HEAP *heap) +{ + BH_NODE *p, **pp, **bestp; + + bestp = NULL; + for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling) + if (!bestp || BH_PREFIX(less)(p, *bestp)) + bestp = pp; + if (!bestp) + return NULL; + + p = *bestp; + *bestp = p->next_sibling; + BH_PREFIX(merge)(&heap->root, p); + return p; +} +#endif + +static inline void +BH_PREFIX(init)(BH_HEAP *heap) +{ + bzero(heap, sizeof(*heap)); +} + +#ifndef BH_FOR_ALL + +#define BH_FOR_ALL(bh_px, bh_heap, bh_var) \ +do { \ + struct bh_node *bh_stack[32]; \ + uns bh_sp = 0; \ + if (bh_stack[0] = (bh_heap)->root.first_son) \ + bh_sp++; \ + while (bh_sp) { \ + struct bh_node *bh_var = bh_stack[--bh_sp]; \ + if (bh_var->next_sibling) \ + bh_stack[bh_sp++] = bh_var->next_sibling; \ + if (bh_var->first_son) \ + bh_stack[bh_sp++] = bh_var->first_son; +#define BH_END_FOR \ + } \ +} while (0) + +#define BH_BREAK { bh_sp=0; break; } +#define BH_CONTINUE continue + +#endif + +#undef BH_PREFIX +#undef BH_NODE +#undef BH_HEAP +#undef BH_WANT_INSERT +#undef BH_WANT_FINDMIN +#undef BH_WANT_DELETEMIN diff --git a/lib/binsearch.h b/lib/binsearch.h new file mode 100644 index 0000000..6741956 --- /dev/null +++ b/lib/binsearch.h @@ -0,0 +1,26 @@ +/* + * UCW Library -- Generic Binary Search + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \ + uns l = 0, r = (N); \ + while (l < r) \ + { \ + uns m = (l+r)/2; \ + if (ary_lt_x(ary,m,x)) \ + l = m+1; \ + else \ + r = m; \ + } \ + l; \ +}) + +#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x) + +#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM) +#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; }) diff --git a/lib/bit-ffs.c b/lib/bit-ffs.c new file mode 100644 index 0000000..8a9198d --- /dev/null +++ b/lib/bit-ffs.c @@ -0,0 +1,46 @@ +/* + * UCW Library -- Find Lowest Set Bit + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/bitops.h" + +/* Just a table, the rest is in bitops.h */ + +const byte ffs_table[] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +#ifdef TEST + +#include + +int main(void) +{ + uns i; + while (scanf("%x", &i) == 1) + printf("%d\n", bit_ffs(i)); + return 0; +} + +#endif diff --git a/lib/bit-fls.c b/lib/bit-fls.c new file mode 100644 index 0000000..6a6227d --- /dev/null +++ b/lib/bit-fls.c @@ -0,0 +1,42 @@ +/* + * UCW Library -- Find Highest Set Bit + * + * (c) 1997-2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/bitops.h" + +int +bit_fls(u32 x) +{ + uns l; + + if (!x) + return -1; + + l = 0; + if (x & 0xffff0000) { l += 16; x &= 0xffff0000; } + if (x & 0xff00ff00) { l += 8; x &= 0xff00ff00; } + if (x & 0xf0f0f0f0) { l += 4; x &= 0xf0f0f0f0; } + if (x & 0xcccccccc) { l += 2; x &= 0xcccccccc; } + if (x & 0xaaaaaaaa) l++; + return l; +} + +#ifdef TEST + +#include + +int main(void) +{ + uns i; + while (scanf("%x", &i) == 1) + printf("%d\n", bit_fls(i)); + return 0; +} + +#endif diff --git a/lib/bitarray.h b/lib/bitarray.h new file mode 100644 index 0000000..7248041 --- /dev/null +++ b/lib/bitarray.h @@ -0,0 +1,107 @@ +/* + * UCW Library -- Bit Array Operations + * + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BITARRAY_H +#define _UCW_BITARRAY_H + +#include + +typedef u32 *bitarray_t; +#define BIT_ARRAY_WORDS(n) (((n)+31)/32) +#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n)) +#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)] + +static inline bitarray_t +bit_array_xmalloc(uns n) +{ + return xmalloc(BIT_ARRAY_BYTES(n)); +} + +static inline bitarray_t +bit_array_xmalloc_zero(uns n) +{ + return xmalloc_zero(BIT_ARRAY_BYTES(n)); +} + +static inline void +bit_array_zero(bitarray_t a, uns n) +{ + bzero(a, BIT_ARRAY_BYTES(n)); +} + +static inline void +bit_array_set_all(bitarray_t a, uns n) +{ + memset(a, 255, BIT_ARRAY_BYTES(n)); +} + +static inline void +bit_array_set(bitarray_t a, uns i) +{ + a[i/32] |= (1 << (i%32)); +} + +static inline void +bit_array_clear(bitarray_t a, uns i) +{ + a[i/32] &= ~(1 << (i%32)); +} + +static inline void +bit_array_assign(bitarray_t a, uns i, uns x) +{ + if (x) + bit_array_set(a, i); + else + bit_array_clear(a, i); +} + +static inline uns +bit_array_isset(bitarray_t a, uns i) +{ + return a[i/32] & (1 << (i%32)); +} + +static inline uns +bit_array_get(bitarray_t a, uns i) +{ + return !! bit_array_isset(a, i); +} + +static inline uns +bit_array_test_and_set(bitarray_t a, uns i) +{ + uns t = bit_array_isset(a, i); + bit_array_set(a, i); + return t; +} + +static inline uns +bit_array_test_and_clear(bitarray_t a, uns i) +{ + uns t = bit_array_isset(a, i); + bit_array_clear(a, i); + return t; +} + +/* Iterate over all set bits, possibly destructively */ +#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size) \ + for (uns var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++) \ + for (uns var##_lo=0; ary[var##_hi]; var##_lo++) \ + if (ary[var##_hi] & (1 << var##_lo)) \ + { \ + uns var = 32*var##_hi + var##_lo; \ + ary[var##_hi] &= ~(1 << var##_lo); \ + do + +#define BIT_ARRAY_FISH_BITS_END \ + while (0); \ + } + +#endif diff --git a/lib/bitops.h b/lib/bitops.h new file mode 100644 index 0000000..c1e6371 --- /dev/null +++ b/lib/bitops.h @@ -0,0 +1,40 @@ +/* + * UCW Library -- Bit Operations + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BITOPS_H +#define _UCW_BITOPS_H + +/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */ + +int bit_fls(u32 x); /* bit_fls(0)=-1 */ + +/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */ + +extern const byte ffs_table[256]; + +#ifdef __pentium4 /* On other ia32 machines, the C version is faster */ + +static inline uns bit_ffs(uns w) +{ + asm("bsfl %1,%0" :"=r" (w) :"rm" (w)); + return w; +} + +#else + +static inline uns bit_ffs(uns w) +{ + uns b = (w & 0xffff) ? 0 : 16; + b += ((w >> b) & 0xff) ? 0 : 8; + return b + ffs_table[(w >> b) & 0xff]; +} + +#endif + +#endif diff --git a/lib/bitops.t b/lib/bitops.t new file mode 100644 index 0000000..97b2b35 --- /dev/null +++ b/lib/bitops.t @@ -0,0 +1,53 @@ +# Tests for bitops modules + +Run: ../obj/lib/bit-ffs-t +In: 1 + 2 + 3 + 4 + 5 + 6 + 12345678 + 23030300 + 23030000 + 23000000 + 40000000 + 80000000 +Out: 0 + 1 + 0 + 2 + 0 + 1 + 3 + 8 + 16 + 24 + 30 + 31 + +Run: ../obj/lib/bit-fls-t +In: 1 + 2 + 3 + 4 + 5 + 6 + 12345678 + 23030303 + 03030303 + 00030303 + 00000303 + 0fedcba9 +Out: 0 + 1 + 1 + 2 + 2 + 2 + 28 + 29 + 25 + 17 + 9 + 27 diff --git a/lib/bitsig.c b/lib/bitsig.c new file mode 100644 index 0000000..8ffe8db --- /dev/null +++ b/lib/bitsig.c @@ -0,0 +1,162 @@ +/* + * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates + * + * (c) 2002 Martin Mares + * + * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files + * (An access method for documents and its analytical performance evaluation), + * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984. + * + * This data structure provides a very compact representation + * of a set of strings with insertion and membership search, + * but with a certain low probability it cheats by incidentally + * reporting a non-member as a member. Generally the larger you + * create the structure, the lower this probability is. + * + * How does it work: the structure is just an array of M bits + * and each possible element is hashed to a set of (at most) L + * bit positions. For each element of the represented set, we + * set its L bits to ones and we report as present all elements + * whose all L bits ar set. + * + * Analysis: Let's assume N items have already been stored and let A + * denote L/M (density of the hash function). The probability that + * a fixed bit of the array is set by any of the N items is + * 1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA. + * This is minimized by setting A=(ln 2)/N (try taking derivative). + * Given a non-present item, the probability that all of the bits + * corresponding to this item are set by the other items (that is, + * the structure gives a false answer) is (1-e^-NA)^L = 2^-L. + * Hence, if we want to give false answers with probability less + * than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L. + * + * Example: For a set of 10^7 items with P[error] < 10^-6, we set + * L := 20 and M := 290*10^6 bits = cca 34.5 MB (29 bits per item). + * + * We leave L and an upper bound for N as parameters set during + * creation of the structure. Currently, the structure is limited + * to 4 Gb = 512 MB. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/bitsig.h" +#include "lib/md5.h" + +#include + +struct bitsig { + uns l, m, n, maxn, max_m_mult; + u32 hash[4]; + uns hindex; + byte array[0]; +}; + +struct bitsig * +bitsig_init(uns perrlog, uns maxn) +{ + struct bitsig *b; + u64 m; + uns mbytes; + + m = ((u64) maxn * perrlog * 145 + 99) / 100; + if (m >= (u64) 1 << 32) + die("bitsig_init: bitsig array too large (maximum is 4 Gb)"); + mbytes = (m + 7) >> 3U; + b = xmalloc(sizeof(struct bitsig) + mbytes); + b->l = perrlog; + b->m = m; + b->n = 0; + b->maxn = maxn; + b->max_m_mult = (0xffffffff / m) * m; + bzero(b->array, mbytes); + msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn); + return b; +} + +void +bitsig_free(struct bitsig *b) +{ + xfree(b); +} + +static void +bitsig_hash_init(struct bitsig *b, byte *item) +{ + struct MD5Context c; + + MD5Init(&c); + MD5Update(&c, item, strlen(item)); + MD5Final((byte *) b->hash, &c); + b->hindex = 0; +} + +static inline uns +bitsig_hash_bit(struct bitsig *b) +{ + u32 h; + do + { + h = b->hash[b->hindex]; + b->hash[b->hindex] *= 3006477127U; + b->hindex = (b->hindex+1) % 4; + } + while (h >= b->max_m_mult); + return h % b->m; +} + +int +bitsig_member(struct bitsig *b, byte *item) +{ + uns i, bit; + + bitsig_hash_init(b, item); + for (i=0; il; i++) + { + bit = bitsig_hash_bit(b); + if (!(b->array[bit >> 3] & (1 << (bit & 7)))) + return 0; + } + return 1; +} + +int +bitsig_insert(struct bitsig *b, byte *item) +{ + uns i, bit, was; + + bitsig_hash_init(b, item); + was = 1; + for (i=0; il; i++) + { + bit = bitsig_hash_bit(b); + if (!(b->array[bit >> 3] & (1 << (bit & 7)))) + { + was = 0; + b->array[bit >> 3] |= (1 << (bit & 7)); + } + } + if (!was && b->n++ == b->maxn+1) + msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!"); + return was; +} + +#ifdef TEST + +#include +#include + +int main(int argc, char **argv) +{ + struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2])); + byte buf[1024]; + + while (fgets(buf, 1024, stdin)) + printf("%d\n", bitsig_insert(b, buf)); + + return 0; +} + +#endif diff --git a/lib/bitsig.h b/lib/bitsig.h new file mode 100644 index 0000000..60a5b14 --- /dev/null +++ b/lib/bitsig.h @@ -0,0 +1,15 @@ +/* + * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +struct bitsig; + +struct bitsig *bitsig_init(uns perrlog, uns maxn); +void bitsig_free(struct bitsig *b); +int bitsig_member(struct bitsig *b, byte *item); +int bitsig_insert(struct bitsig *b, byte *item); diff --git a/lib/carefulio.c b/lib/carefulio.c new file mode 100644 index 0000000..b8d865d --- /dev/null +++ b/lib/carefulio.c @@ -0,0 +1,51 @@ +/* + * UCW Library -- Careful Read/Write + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include + +/* + * Reads and writes on sockets and pipes can return partial results, + * so we implement an iterated read/write call. + */ + +int +careful_read(int fd, void *buf, int len) +{ + byte *pos = buf; + while (len) + { + int l = read(fd, pos, len); + if (l < 0) + return -1; + if (!l) + return 0; + pos += l; + len -= l; + } + return 1; +} + +int +careful_write(int fd, const void *buf, int len) +{ + const byte *pos = buf; + while (len) + { + int l = write(fd, pos, len); + if (l < 0) + return -1; + if (!l) + return 0; + pos += l; + len -= l; + } + return 1; +} diff --git a/lib/charmap.h b/lib/charmap.h new file mode 100644 index 0000000..3e348fc --- /dev/null +++ b/lib/charmap.h @@ -0,0 +1,268 @@ +/* + * UCW Library -- Character Code Map (UTF-8 Version) + * + * (c) 1998--2004 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* Syntax: CHAR(code, uppercase, lowercase, category) */ + +CHAR(0x00,0x00,0x00,_C_CTRL) // +CHAR(0x01,0x01,0x01,_C_CTRL) // +CHAR(0x02,0x02,0x02,_C_CTRL) // +CHAR(0x03,0x03,0x03,_C_CTRL) // +CHAR(0x04,0x04,0x04,_C_CTRL) // +CHAR(0x05,0x05,0x05,_C_CTRL) // +CHAR(0x06,0x06,0x06,_C_CTRL) // +CHAR(0x07,0x07,0x07,_C_CTRL) // +CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK) // +CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT) // +CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK) // +CHAR(0x0B,0x0B,0x0B,_C_CTRL) // +CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK) // +CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK) // +CHAR(0x0E,0x0E,0x0E,_C_CTRL) // +CHAR(0x0F,0x0F,0x0F,_C_CTRL) // +CHAR(0x10,0x10,0x10,_C_CTRL) // +CHAR(0x11,0x11,0x11,_C_CTRL) // +CHAR(0x12,0x12,0x12,_C_CTRL) // +CHAR(0x13,0x13,0x13,_C_CTRL) // +CHAR(0x14,0x14,0x14,_C_CTRL) // +CHAR(0x15,0x15,0x15,_C_CTRL) // +CHAR(0x16,0x16,0x16,_C_CTRL) // +CHAR(0x17,0x17,0x17,_C_CTRL) // +CHAR(0x18,0x18,0x18,_C_CTRL) // +CHAR(0x19,0x19,0x19,_C_CTRL) // +CHAR(0x1A,0x1A,0x1A,_C_CTRL) // +CHAR(0x1B,0x1B,0x1B,_C_CTRL) // +CHAR(0x1C,0x1C,0x1C,_C_CTRL) // +CHAR(0x1D,0x1D,0x1D,_C_CTRL) // +CHAR(0x1E,0x1E,0x1E,_C_CTRL) // +CHAR(0x1F,0x1F,0x1F,_C_CTRL) // +CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT) // SPACE +CHAR(0x21,0x21,0x21,_C_PRINT) // EXCLAMATION MARK +CHAR(0x22,0x22,0x22,_C_PRINT) // QUOTATION MARK +CHAR(0x23,0x23,0x23,_C_PRINT) // NUMBER SIGN +CHAR(0x24,0x24,0x24,_C_PRINT) // DOLLAR SIGN +CHAR(0x25,0x25,0x25,_C_PRINT) // PERCENT SIGN +CHAR(0x26,0x26,0x26,_C_PRINT) // AMPERSAND +CHAR(0x27,0x27,0x27,_C_PRINT) // APOSTROPHE +CHAR(0x28,0x28,0x28,_C_PRINT) // LEFT PARENTHESIS +CHAR(0x29,0x29,0x29,_C_PRINT) // RIGHT PARENTHESIS +CHAR(0x2A,0x2A,0x2A,_C_PRINT) // ASTERISK +CHAR(0x2B,0x2B,0x2B,_C_PRINT) // PLUS SIGN +CHAR(0x2C,0x2C,0x2C,_C_PRINT) // COMMA +CHAR(0x2D,0x2D,0x2D,_C_PRINT) // HYPHEN-MINUS +CHAR(0x2E,0x2E,0x2E,_C_PRINT) // FULL STOP +CHAR(0x2F,0x2F,0x2F,_C_PRINT) // SOLIDUS +CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ZERO +CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ONE +CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT TWO +CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT THREE +CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FOUR +CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FIVE +CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SIX +CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SEVEN +CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT EIGHT +CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT NINE +CHAR(0x3A,0x3A,0x3A,_C_PRINT) // COLON +CHAR(0x3B,0x3B,0x3B,_C_PRINT) // SEMICOLON +CHAR(0x3C,0x3C,0x3C,_C_PRINT) // LESS-THAN SIGN +CHAR(0x3D,0x3D,0x3D,_C_PRINT) // EQUALS SIGN +CHAR(0x3E,0x3E,0x3E,_C_PRINT) // GREATER-THAN SIGN +CHAR(0x3F,0x3F,0x3F,_C_PRINT) // QUESTION MARK +CHAR(0x40,0x40,0x40,_C_PRINT) // COMMERCIAL AT +CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER A +CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER B +CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER C +CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER D +CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER E +CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER F +CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER G +CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER H +CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER I +CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER J +CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER K +CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER L +CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER M +CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER N +CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER O +CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER P +CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Q +CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER R +CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER S +CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER T +CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER U +CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER V +CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER W +CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER X +CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Y +CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Z +CHAR(0x5B,0x5B,0x5B,_C_PRINT) // LEFT SQUARE BRACKET +CHAR(0x5C,0x5C,0x5C,_C_PRINT) // REVERSE SOLIDUS +CHAR(0x5D,0x5D,0x5D,_C_PRINT) // RIGHT SQUARE BRACKET +CHAR(0x5E,0x5E,0x5E,_C_PRINT) // CIRCUMFLEX ACCENT +CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT) // LOW LINE +CHAR(0x60,0x60,0x60,_C_PRINT) // GRAVE ACCENT +CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER A +CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER B +CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER C +CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER D +CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER E +CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER F +CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER G +CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER H +CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER I +CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER J +CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER K +CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER L +CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER M +CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER N +CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER O +CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER P +CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Q +CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER R +CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER S +CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER T +CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER U +CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER V +CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER W +CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER X +CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Y +CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Z +CHAR(0x7B,0x7B,0x7B,_C_PRINT) // LEFT CURLY BRACKET +CHAR(0x7C,0x7C,0x7C,_C_PRINT) // VERTICAL LINE +CHAR(0x7D,0x7D,0x7D,_C_PRINT) // RIGHT CURLY BRACKET +CHAR(0x7E,0x7E,0x7E,_C_PRINT) // TILDE +CHAR(0x7F,0x7F,0x7F,_C_CTRL) // +CHAR(0x80,0x80,0x80,_C_PRINT) // UTF-8 +CHAR(0x81,0x81,0x81,_C_PRINT) // UTF-8 +CHAR(0x82,0x82,0x82,_C_PRINT) // UTF-8 +CHAR(0x83,0x83,0x83,_C_PRINT) // UTF-8 +CHAR(0x84,0x84,0x84,_C_PRINT) // UTF-8 +CHAR(0x85,0x85,0x85,_C_PRINT) // UTF-8 +CHAR(0x86,0x86,0x86,_C_PRINT) // UTF-8 +CHAR(0x87,0x87,0x87,_C_PRINT) // UTF-8 +CHAR(0x88,0x88,0x88,_C_PRINT) // UTF-8 +CHAR(0x89,0x89,0x89,_C_PRINT) // UTF-8 +CHAR(0x8A,0x8A,0x8A,_C_PRINT) // UTF-8 +CHAR(0x8B,0x8B,0x8B,_C_PRINT) // UTF-8 +CHAR(0x8C,0x8C,0x8C,_C_PRINT) // UTF-8 +CHAR(0x8D,0x8D,0x8D,_C_PRINT) // UTF-8 +CHAR(0x8E,0x8E,0x8E,_C_PRINT) // UTF-8 +CHAR(0x8F,0x8F,0x8F,_C_PRINT) // UTF-8 +CHAR(0x90,0x90,0x90,_C_PRINT) // UTF-8 +CHAR(0x91,0x91,0x91,_C_PRINT) // UTF-8 +CHAR(0x92,0x92,0x92,_C_PRINT) // UTF-8 +CHAR(0x93,0x93,0x93,_C_PRINT) // UTF-8 +CHAR(0x94,0x94,0x94,_C_PRINT) // UTF-8 +CHAR(0x95,0x95,0x95,_C_PRINT) // UTF-8 +CHAR(0x96,0x96,0x96,_C_PRINT) // UTF-8 +CHAR(0x97,0x97,0x97,_C_PRINT) // UTF-8 +CHAR(0x98,0x98,0x98,_C_PRINT) // UTF-8 +CHAR(0x99,0x99,0x99,_C_PRINT) // UTF-8 +CHAR(0x9A,0x9A,0x9A,_C_PRINT) // UTF-8 +CHAR(0x9B,0x9B,0x9B,_C_PRINT) // UTF-8 +CHAR(0x9C,0x9C,0x9C,_C_PRINT) // UTF-8 +CHAR(0x9D,0x9D,0x9D,_C_PRINT) // UTF-8 +CHAR(0x9E,0x9E,0x9E,_C_PRINT) // UTF-8 +CHAR(0x9F,0x9F,0x9F,_C_PRINT) // UTF-8 +CHAR(0xA0,0xA0,0xA0,_C_PRINT) // UTF-8 +CHAR(0xA1,0xA1,0xA1,_C_PRINT) // UTF-8 +CHAR(0xA2,0xA2,0xA2,_C_PRINT) // UTF-8 +CHAR(0xA3,0xA3,0xA3,_C_PRINT) // UTF-8 +CHAR(0xA4,0xA4,0xA4,_C_PRINT) // UTF-8 +CHAR(0xA5,0xA5,0xA5,_C_PRINT) // UTF-8 +CHAR(0xA6,0xA6,0xA6,_C_PRINT) // UTF-8 +CHAR(0xA7,0xA7,0xA7,_C_PRINT) // UTF-8 +CHAR(0xA8,0xA8,0xA8,_C_PRINT) // UTF-8 +CHAR(0xA9,0xA9,0xA9,_C_PRINT) // UTF-8 +CHAR(0xAA,0xAA,0xAA,_C_PRINT) // UTF-8 +CHAR(0xAB,0xAB,0xAB,_C_PRINT) // UTF-8 +CHAR(0xAC,0xAC,0xAC,_C_PRINT) // UTF-8 +CHAR(0xAD,0xAD,0xAD,_C_PRINT) // UTF-8 +CHAR(0xAE,0xAE,0xAE,_C_PRINT) // UTF-8 +CHAR(0xAF,0xAF,0xAF,_C_PRINT) // UTF-8 +CHAR(0xB0,0xB0,0xB0,_C_PRINT) // UTF-8 +CHAR(0xB1,0xB1,0xB1,_C_PRINT) // UTF-8 +CHAR(0xB2,0xB2,0xB2,_C_PRINT) // UTF-8 +CHAR(0xB3,0xB3,0xB3,_C_PRINT) // UTF-8 +CHAR(0xB4,0xB4,0xB4,_C_PRINT) // UTF-8 +CHAR(0xB5,0xB5,0xB5,_C_PRINT) // UTF-8 +CHAR(0xB6,0xB6,0xB6,_C_PRINT) // UTF-8 +CHAR(0xB7,0xB7,0xB7,_C_PRINT) // UTF-8 +CHAR(0xB8,0xB8,0xB8,_C_PRINT) // UTF-8 +CHAR(0xB9,0xB9,0xB9,_C_PRINT) // UTF-8 +CHAR(0xBA,0xBA,0xBA,_C_PRINT) // UTF-8 +CHAR(0xBB,0xBB,0xBB,_C_PRINT) // UTF-8 +CHAR(0xBC,0xBC,0xBC,_C_PRINT) // UTF-8 +CHAR(0xBD,0xBD,0xBD,_C_PRINT) // UTF-8 +CHAR(0xBE,0xBE,0xBE,_C_PRINT) // UTF-8 +CHAR(0xBF,0xBF,0xBF,_C_PRINT) // UTF-8 +CHAR(0xC0,0xC0,0xC0,_C_PRINT) // UTF-8 +CHAR(0xC1,0xC1,0xC1,_C_PRINT) // UTF-8 +CHAR(0xC2,0xC2,0xC2,_C_PRINT) // UTF-8 +CHAR(0xC3,0xC3,0xC3,_C_PRINT) // UTF-8 +CHAR(0xC4,0xC4,0xC4,_C_PRINT) // UTF-8 +CHAR(0xC5,0xC5,0xC5,_C_PRINT) // UTF-8 +CHAR(0xC6,0xC6,0xC6,_C_PRINT) // UTF-8 +CHAR(0xC7,0xC7,0xC7,_C_PRINT) // UTF-8 +CHAR(0xC8,0xC8,0xC8,_C_PRINT) // UTF-8 +CHAR(0xC9,0xC9,0xC9,_C_PRINT) // UTF-8 +CHAR(0xCA,0xCA,0xCA,_C_PRINT) // UTF-8 +CHAR(0xCB,0xCB,0xCB,_C_PRINT) // UTF-8 +CHAR(0xCC,0xCC,0xCC,_C_PRINT) // UTF-8 +CHAR(0xCD,0xCD,0xCD,_C_PRINT) // UTF-8 +CHAR(0xCE,0xCE,0xCE,_C_PRINT) // UTF-8 +CHAR(0xCF,0xCF,0xCF,_C_PRINT) // UTF-8 +CHAR(0xD0,0xD0,0xD0,_C_PRINT) // UTF-8 +CHAR(0xD1,0xD1,0xD1,_C_PRINT) // UTF-8 +CHAR(0xD2,0xD2,0xD2,_C_PRINT) // UTF-8 +CHAR(0xD3,0xD3,0xD3,_C_PRINT) // UTF-8 +CHAR(0xD4,0xD4,0xD4,_C_PRINT) // UTF-8 +CHAR(0xD5,0xD5,0xD5,_C_PRINT) // UTF-8 +CHAR(0xD6,0xD6,0xD6,_C_PRINT) // UTF-8 +CHAR(0xD7,0xD7,0xD7,_C_PRINT) // UTF-8 +CHAR(0xD8,0xD8,0xD8,_C_PRINT) // UTF-8 +CHAR(0xD9,0xD9,0xD9,_C_PRINT) // UTF-8 +CHAR(0xDA,0xDA,0xDA,_C_PRINT) // UTF-8 +CHAR(0xDB,0xDB,0xDB,_C_PRINT) // UTF-8 +CHAR(0xDC,0xDC,0xDC,_C_PRINT) // UTF-8 +CHAR(0xDD,0xDD,0xDD,_C_PRINT) // UTF-8 +CHAR(0xDE,0xDE,0xDE,_C_PRINT) // UTF-8 +CHAR(0xDF,0xDF,0xDF,_C_PRINT) // UTF-8 +CHAR(0xE0,0xE0,0xE0,_C_PRINT) // UTF-8 +CHAR(0xE1,0xE1,0xE1,_C_PRINT) // UTF-8 +CHAR(0xE2,0xE2,0xE2,_C_PRINT) // UTF-8 +CHAR(0xE3,0xE3,0xE3,_C_PRINT) // UTF-8 +CHAR(0xE4,0xE4,0xE4,_C_PRINT) // UTF-8 +CHAR(0xE5,0xE5,0xE5,_C_PRINT) // UTF-8 +CHAR(0xE6,0xE6,0xE6,_C_PRINT) // UTF-8 +CHAR(0xE7,0xE7,0xE7,_C_PRINT) // UTF-8 +CHAR(0xE8,0xE8,0xE8,_C_PRINT) // UTF-8 +CHAR(0xE9,0xE9,0xE9,_C_PRINT) // UTF-8 +CHAR(0xEA,0xEA,0xEA,_C_PRINT) // UTF-8 +CHAR(0xEB,0xEB,0xEB,_C_PRINT) // UTF-8 +CHAR(0xEC,0xEC,0xEC,_C_PRINT) // UTF-8 +CHAR(0xED,0xED,0xED,_C_PRINT) // UTF-8 +CHAR(0xEE,0xEE,0xEE,_C_PRINT) // UTF-8 +CHAR(0xEF,0xEF,0xEF,_C_PRINT) // UTF-8 +CHAR(0xF0,0xF0,0xF0,_C_PRINT) // UTF-8 +CHAR(0xF1,0xF1,0xF1,_C_PRINT) // UTF-8 +CHAR(0xF2,0xF2,0xF2,_C_PRINT) // UTF-8 +CHAR(0xF3,0xF3,0xF3,_C_PRINT) // UTF-8 +CHAR(0xF4,0xF4,0xF4,_C_PRINT) // UTF-8 +CHAR(0xF5,0xF5,0xF5,_C_PRINT) // UTF-8 +CHAR(0xF6,0xF6,0xF6,_C_PRINT) // UTF-8 +CHAR(0xF7,0xF7,0xF7,_C_PRINT) // UTF-8 +CHAR(0xF8,0xF8,0xF8,_C_PRINT) // UTF-8 +CHAR(0xF9,0xF9,0xF9,_C_PRINT) // UTF-8 +CHAR(0xFA,0xFA,0xFA,_C_PRINT) // UTF-8 +CHAR(0xFB,0xFB,0xFB,_C_PRINT) // UTF-8 +CHAR(0xFC,0xFC,0xFC,_C_PRINT) // UTF-8 +CHAR(0xFD,0xFD,0xFD,_C_PRINT) // UTF-8 +CHAR(0xFE,0xFE,0xFE,_C_PRINT) // UTF-8 +CHAR(0xFF,0xFF,0xFF,_C_PRINT) // UTF-8 diff --git a/lib/chartype.h b/lib/chartype.h new file mode 100644 index 0000000..09dc1ec --- /dev/null +++ b/lib/chartype.h @@ -0,0 +1,49 @@ +/* + * UCW Library -- Character Types + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CHARTYPE_H +#define _UCW_CHARTYPE_H + +#define _C_UPPER 1 /* Upper-case letters */ +#define _C_LOWER 2 /* Lower-case letters */ +#define _C_PRINT 4 /* Printable */ +#define _C_DIGIT 8 /* Digits */ +#define _C_CTRL 16 /* Control characters */ +#define _C_XDIGIT 32 /* Hexadecimal digits */ +#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */ +#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */ + +#define _C_ALPHA (_C_UPPER | _C_LOWER) +#define _C_ALNUM (_C_ALPHA | _C_DIGIT) +#define _C_WORD (_C_ALNUM | _C_INNER) +#define _C_WSTART (_C_ALPHA | _C_INNER) + +extern const unsigned char _c_cat[256], _c_upper[256], _c_lower[256]; + +#define Category(x) (_c_cat[(unsigned char)(x)]) +#define Ccat(x,y) (Category(x) & y) + +#define Cupper(x) Ccat(x, _C_UPPER) +#define Clower(x) Ccat(x, _C_LOWER) +#define Calpha(x) Ccat(x, _C_ALPHA) +#define Calnum(x) Ccat(x, _C_ALNUM) +#define Cprint(x) Ccat(x, _C_PRINT) +#define Cdigit(x) Ccat(x, _C_DIGIT) +#define Cxdigit(x) Ccat(x, _C_XDIGIT) +#define Cword(x) Ccat(x, _C_WORD) +#define Cblank(x) Ccat(x, _C_BLANK) +#define Cctrl(x) Ccat(x, _C_CTRL) +#define Cspace(x) Cblank(x) + +#define Cupcase(x) _c_upper[(unsigned char)(x)] +#define Clocase(x) _c_lower[(unsigned char)(x)] + +#define Cxvalue(x) (((x)<'A')?((x)-'0'):(((x)&0xdf)-'A'+10)) + +#endif diff --git a/lib/clists.h b/lib/clists.h new file mode 100644 index 0000000..921b7dc --- /dev/null +++ b/lib/clists.h @@ -0,0 +1,132 @@ +/* + * UCW Library -- Circular Linked Lists + * + * (c) 2003--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CLISTS_H +#define _UCW_CLISTS_H + +typedef struct cnode { + struct cnode *next, *prev; +} cnode; + +typedef struct clist { + struct cnode head; +} clist; + +static inline void *clist_head(clist *l) +{ + return (l->head.next != &l->head) ? l->head.next : NULL; +} + +static inline void *clist_tail(clist *l) +{ + return (l->head.prev != &l->head) ? l->head.prev : NULL; +} + +static inline void *clist_next(clist *l, cnode *n) +{ + return (n->next != &l->head) ? (void *) n->next : NULL; +} + +static inline void *clist_prev(clist *l, cnode *n) +{ + return (n->prev != &l->head) ? (void *) n->prev : NULL; +} + +static inline int clist_empty(clist *l) +{ + return (l->head.next == &l->head); +} + +#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) +#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) +#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) +#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) + +#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev) + +static inline void clist_insert_after(cnode *what, cnode *after) +{ + cnode *before = after->next; + what->next = before; + what->prev = after; + before->prev = what; + after->next = what; +} + +static inline void clist_insert_before(cnode *what, cnode *before) +{ + cnode *after = before->prev; + what->next = before; + what->prev = after; + before->prev = what; + after->next = what; +} + +static inline void clist_add_tail(clist *l, cnode *n) +{ + clist_insert_before(n, &l->head); +} + +static inline void clist_add_head(clist *l, cnode *n) +{ + clist_insert_after(n, &l->head); +} + +static inline void clist_remove(cnode *n) +{ + cnode *before = n->prev; + cnode *after = n->next; + before->next = after; + after->prev = before; +} + +static inline void *clist_remove_head(clist *l) +{ + cnode *n = clist_head(l); + if (n) + clist_remove(n); + return n; +} + +static inline void *clist_remove_tail(clist *l) +{ + cnode *n = clist_tail(l); + if (n) + clist_remove(n); + return n; +} + +static inline void clist_init(clist *l) +{ + cnode *head = &l->head; + head->next = head->prev = head; +} + +static inline void clist_insert_list_after(clist *what, cnode *after) +{ + if (!clist_empty(what)) + { + cnode *w = &what->head; + w->prev->next = after->next; + after->next->prev = w->prev; + w->next->prev = after; + after->next = w->next; + clist_init(what); + } +} + +static inline uns clist_size(clist *l) +{ + uns i = 0; + CLIST_FOR_EACH(cnode *, n, *l) + i++; + return i; +} + +#endif diff --git a/lib/conf-alloc.c b/lib/conf-alloc.c new file mode 100644 index 0000000..9f02fd5 --- /dev/null +++ b/lib/conf-alloc.c @@ -0,0 +1,43 @@ +/* + * UCW Library -- Configuration files: memory allocation + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/mempool.h" + +struct mempool *cf_pool; // current pool for loading new configuration + +void * +cf_malloc(uns size) +{ + return mp_alloc(cf_pool, size); +} + +void * +cf_malloc_zero(uns size) +{ + return mp_alloc_zero(cf_pool, size); +} + +char * +cf_strdup(const char *s) +{ + return mp_strdup(cf_pool, s); +} + +char * +cf_printf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = mp_vprintf(cf_pool, fmt, args); + va_end(args); + return res; +} diff --git a/lib/conf-dump.c b/lib/conf-dump.c new file mode 100644 index 0000000..0d40924 --- /dev/null +++ b/lib/conf-dump.c @@ -0,0 +1,123 @@ +/* + * UCW Library -- Configuration files: dumping + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/conf-internal.h" +#include "lib/clists.h" +#include "lib/fastbuf.h" + +static void +spaces(struct fastbuf *fb, uns nr) +{ + for (uns i=0; i= 0 ? u->lookup[ *(int*)ptr ] : "???"); break; + case CT_USER: + if (u->utype->dumper) + u->utype->dumper(fb, ptr); + else + bprintf(fb, "??? "); + break; + } +} + +static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr); + +static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" }; + +static void +dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr) +{ + ptr += (uintptr_t) item->ptr; + enum cf_type type = item->type; + uns size = cf_type_size(item->type, item->u.utype); + int i; + spaces(fb, level); + bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]); + if (item->number == CF_ANY_NUM) + bputs(fb, "any "); + else + bprintf(fb, "%d ", item->number); + if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) { + bprintf(fb, "T%s ", cf_type_names[type]); + if (item->type == CT_USER) + bprintf(fb, "U%s S%d ", item->u.utype->name, size); + } + if (item->cls == CC_STATIC) { + for (i=0; inumber; i++) + dump_basic(fb, ptr + i * size, type, &item->u); + } else if (item->cls == CC_DYNAMIC) { + ptr = * (void**) ptr; + if (ptr) { + int real_nr = DARY_LEN(ptr); + bprintf(fb, "N%d ", real_nr); + for (i=0; iu); + } else + bprintf(fb, "NULL "); + } else if (item->cls == CC_BITMAP) { + u32 mask = * (u32*) ptr; + for (i=0; i<32; i++) { + if (item->type == CT_LOOKUP && !item->u.lookup[i]) + break; + if (mask & (1<type == CT_INT) + bprintf(fb, "%d ", i); + else if (item->type == CT_LOOKUP) + bprintf(fb, "%s ", item->u.lookup[i]); + } + } + } + bputc(fb, '\n'); + if (item->cls == CC_SECTION) + dump_section(fb, item->u.sec, level+1, ptr); + else if (item->cls == CC_LIST) { + uns idx = 0; + CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) { + spaces(fb, level+1); + bprintf(fb, "item %d\n", ++idx); + dump_section(fb, item->u.sec, level+2, n); + } + } +} + +static void +dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr) +{ + spaces(fb, level); + bprintf(fb, "S%d F%x:\n", sec->size, sec->flags); + for (struct cf_item *item=sec->cfg; item->cls; item++) + dump_item(fb, item, level, ptr); +} + +void +cf_dump_sections(struct fastbuf *fb) +{ + dump_section(fb, &cf_sections, 0, NULL); +} + diff --git a/lib/conf-input.c b/lib/conf-input.c new file mode 100644 index 0000000..c5d2527 --- /dev/null +++ b/lib/conf-input.c @@ -0,0 +1,455 @@ +/* + * UCW Library -- Configuration files: parsing input streams + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/conf-internal.h" +#include "lib/mempool.h" +#include "lib/fastbuf.h" +#include "lib/chartype.h" +#include "lib/stkstring.h" + +#include +#include +#include + +/* Text file parser */ + +static const char *name_parse_fb; +static struct fastbuf *parse_fb; +static uns line_num; + +#define MAX_LINE 4096 +static char line_buf[MAX_LINE]; +static char *line = line_buf; + +#include "lib/bbuf.h" +static bb_t copy_buf; +static uns copied; + +#define GBUF_TYPE uns +#define GBUF_PREFIX(x) split_##x +#include "lib/gbuf.h" +static split_t word_buf; +static uns words; +static uns ends_by_brace; // the line is ended by "{" + +static int +get_line(char **msg) +{ + int err = bgets_nodie(parse_fb, line_buf, MAX_LINE); + line_num++; + if (err <= 0) { + *msg = err < 0 ? "Line too long" : NULL; + return 0; + } + line = line_buf; + while (Cblank(*line)) + line++; + return 1; +} + +static void +append(char *start, char *end) +{ + uns len = end - start; + bb_grow(©_buf, copied + len + 1); + memcpy(copy_buf.ptr + copied, start, len); + copied += len + 1; + copy_buf.ptr[copied-1] = 0; +} + +static char * +get_word(uns is_command_name) +{ + char *msg; + if (*line == '\'') { + line++; + while (1) { + char *start = line; + while (*line && *line != '\'') + line++; + append(start, line); + if (*line) + break; + copy_buf.ptr[copied-1] = '\n'; + if (!get_line(&msg)) + return msg ? : "Unterminated apostrophe word at the end"; + } + line++; + + } else if (*line == '"') { + line++; + uns start_copy = copied; + while (1) { + char *start = line; + uns escape = 0; + while (*line) { + if (*line == '"' && !escape) + break; + else if (*line == '\\') + escape ^= 1; + else + escape = 0; + line++; + } + append(start, line); + if (*line) + break; + if (!escape) + copy_buf.ptr[copied-1] = '\n'; + else // merge two lines + copied -= 2; + if (!get_line(&msg)) + return msg ? : "Unterminated quoted word at the end"; + } + line++; + + char *tmp = stk_str_unesc(copy_buf.ptr + start_copy); + uns l = strlen(tmp); + bb_grow(©_buf, start_copy + l + 1); + strcpy(copy_buf.ptr + start_copy, tmp); + copied = start_copy + l + 1; + + } else { + // promised that *line is non-null and non-blank + char *start = line; + while (*line && !Cblank(*line) + && *line != '{' && *line != '}' && *line != ';' + && (*line != '=' || !is_command_name)) + line++; + if (*line == '=') { // nice for setting from a command-line + if (line == start) + return "Assignment without a variable"; + *line = ' '; + } + if (line == start) // already the first char is control + line++; + append(start, line); + } + while (Cblank(*line)) + line++; + return NULL; +} + +static char * +get_token(uns is_command_name, char **err) +{ + *err = NULL; + while (1) { + if (!*line || *line == '#') { + if (!is_command_name || !get_line(err)) + return NULL; + } else if (*line == ';') { + *err = get_word(0); + if (!is_command_name || *err) + return NULL; + } else if (*line == '\\' && !line[1]) { + if (!get_line(err)) { + if (!*err) + *err = "Last line ends by a backslash"; + return NULL; + } + if (!*line || *line == '#') + msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num); + } else { + split_grow(&word_buf, words+1); + uns start = copied; + word_buf.ptr[words++] = copied; + *err = get_word(is_command_name); + return *err ? NULL : copy_buf.ptr + start; + } + } +} + +static char * +split_command(void) +{ + words = copied = ends_by_brace = 0; + char *msg, *start_word; + if (!(start_word = get_token(1, &msg))) + return msg; + if (*start_word == '{') // only one opening brace + return "Unexpected opening brace"; + while (*line != '}') // stays for the next time + { + if (!(start_word = get_token(0, &msg))) + return msg; + if (*start_word == '{') { + words--; // discard the brace + ends_by_brace = 1; + break; + } + } + return NULL; +} + +/* Parsing multiple files */ + +static char * +parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth) +{ + char *err; + name_parse_fb = name_fb; + parse_fb = fb; + line_num = 0; + line = line_buf; + *line = 0; + while (1) + { + err = split_command(); + if (err) + goto error; + if (!words) + return NULL; + char *name = copy_buf.ptr + word_buf.ptr[0]; + char *pars[words-1]; + for (uns i=1; i 8) + err = "Too many nested files"; + else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared + err = "The input command must be the last one on a line"; + if (err) + goto error; + struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14); + if (!new_fb) { + err = cf_printf("Cannot open file %s: %m", pars[0]); + goto error; + } + uns ll = line_num; + err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1); + line_num = ll; + bclose(new_fb); + if (err) + goto error; + parse_fb = fb; + continue; + } + enum cf_operation op; + char *c = strchr(name, ':'); + if (!c) + op = strcmp(name, "}") ? OP_SET : OP_CLOSE; + else { + *c++ = 0; + switch (Clocase(*c)) { + case 's': op = OP_SET; break; + case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break; + case 'a': switch (Clocase(c[1])) { + case 'p': op = OP_APPEND; break; + case 'f': op = OP_AFTER; break; + default: op = OP_ALL; + }; break; + case 'p': op = OP_PREPEND; break; + case 'r': op = OP_REMOVE; break; + case 'e': op = OP_EDIT; break; + case 'b': op = OP_BEFORE; break; + default: op = OP_SET; break; + } + if (strcasecmp(c, cf_op_names[op])) { + err = cf_printf("Unknown operation %s", c); + goto error; + } + } + if (ends_by_brace) + op |= OP_OPEN; + err = cf_interpret_line(name, op, words-1, pars); + if (err) + goto error; + } +error: + if (name_fb) + msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err); + else if (line_num == 1) + msg(L_ERROR, "Manual setting of configuration: %s", err); + else + msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err); + return "included from here"; +} + +#ifndef DEFAULT_CONFIG +#define DEFAULT_CONFIG NULL +#endif +char *cf_def_file = DEFAULT_CONFIG; + +#ifndef ENV_VAR_CONFIG +#define ENV_VAR_CONFIG NULL +#endif +char *cf_env_file = ENV_VAR_CONFIG; + +static uns postpone_commit; // only for cf_getopt() +static uns everything_committed; // after the 1st load, this flag is set on + +static int +done_stack(void) +{ + if (cf_check_stack()) + return 1; + if (cf_commit_all(postpone_commit ? CF_NO_COMMIT : everything_committed ? CF_COMMIT : CF_COMMIT_ALL)) + return 1; + if (!postpone_commit) + everything_committed = 1; + return 0; +} + +static int +load_file(const char *file) +{ + cf_init_stack(); + struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14); + if (!fb) { + msg(L_ERROR, "Cannot open %s: %m", file); + return 1; + } + char *err_msg = parse_fastbuf(file, fb, 0); + bclose(fb); + int err = !!err_msg || done_stack(); + if (!err) + cf_def_file = NULL; + return err; +} + +static int +load_string(const char *string) +{ + cf_init_stack(); + struct fastbuf fb; + fbbuf_init_read(&fb, (byte *)string, strlen(string), 0); + char *msg = parse_fastbuf(NULL, &fb, 0); + return !!msg || done_stack(); +} + +/* Safe loading and reloading */ + +int +cf_reload(const char *file) +{ + cf_journal_swap(); + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + uns ec = everything_committed; + everything_committed = 0; + int err = load_file(file); + if (!err) + { + cf_journal_delete(); + cf_journal_commit_transaction(1, NULL); + } + else + { + everything_committed = ec; + cf_journal_rollback_transaction(1, oldj); + cf_journal_swap(); + } + return err; +} + +int +cf_load(const char *file) +{ + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + int err = load_file(file); + if (!err) + cf_journal_commit_transaction(1, oldj); + else + cf_journal_rollback_transaction(1, oldj); + return err; +} + +int +cf_set(const char *string) +{ + struct cf_journal_item *oldj = cf_journal_new_transaction(0); + int err = load_string(string); + if (!err) + cf_journal_commit_transaction(0, oldj); + else + cf_journal_rollback_transaction(0, oldj); + return err; +} + +/* Command-line parser */ + +static void +load_default(void) +{ + if (cf_def_file) + { + char *env; + if (cf_env_file && (env = getenv(cf_env_file))) + { + if (cf_load(env)) + die("Cannot load config file %s", env); + } + else if (cf_load(cf_def_file)) + die("Cannot load default config %s", cf_def_file); + } + else + { + // We need to create an empty pool + cf_journal_commit_transaction(1, cf_journal_new_transaction(1)); + } +} + +static void +final_commit(void) +{ + if (postpone_commit) { + postpone_commit = 0; + if (done_stack()) + die("Cannot commit after the initialization"); + } +} + +int +cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index) +{ + static int other_options = 0; + while (1) { + int res = getopt_long (argc, argv, short_opts, long_opts, long_index); + if (res == 'S' || res == 'C' || res == 0x64436667) + { + if (other_options) + die("The -S and -C options must precede all other arguments"); + if (res == 'S') { + postpone_commit = 1; + load_default(); + if (cf_set(optarg)) + die("Cannot set %s", optarg); + } else if (res == 'C') { + postpone_commit = 1; + if (cf_load(optarg)) + die("Cannot load config file %s", optarg); + } +#ifdef CONFIG_DEBUG + else { /* --dumpconfig */ + load_default(); + final_commit(); + struct fastbuf *b = bfdopen(1, 4096); + cf_dump_sections(b); + bclose(b); + exit(0); + } +#endif + } else { + /* unhandled option or end of options */ + if (res != ':' && res != '?') + load_default(); + final_commit(); + other_options++; + return res; + } + } +} + diff --git a/lib/conf-internal.h b/lib/conf-internal.h new file mode 100644 index 0000000..377a9cb --- /dev/null +++ b/lib/conf-internal.h @@ -0,0 +1,44 @@ +/* + * UCW Library -- Configuration files: only for internal use of conf-*.c + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONF_INTERNAL_H +#define _UCW_CONF_INTERNAL_H + +/* conf-intr.c */ +#define OP_MASK 0xff // only get the operation +#define OP_OPEN 0x100 // here we only get an opening brace instead of parameters +#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask +#define OP_2ND 0x400 // in the 2nd phase real data are entered +enum cf_operation; +extern char *cf_op_names[]; +extern char *cf_type_names[]; + +uns cf_type_size(enum cf_type type, struct cf_user_type *utype); +char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars); +void cf_init_stack(void); +int cf_check_stack(void); + +/* conf-journal.c */ +void cf_journal_swap(void); +void cf_journal_delete(void); + +/* conf-section.c */ +#define SEC_FLAG_DYNAMIC 0x80000000 // contains a dynamic attribute +#define SEC_FLAG_UNKNOWN 0x40000000 // ignore unknown entriies +#define SEC_FLAG_CANT_COPY 0x20000000 // contains lists or parsers +#define SEC_FLAG_NUMBER 0x0fffffff // number of entries +enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL }; +extern struct cf_section cf_sections; + +struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name); +int cf_commit_all(enum cf_commit_mode cm); +void cf_add_dirty(struct cf_section *sec, void *ptr); + +#endif diff --git a/lib/conf-intr.c b/lib/conf-intr.c new file mode 100644 index 0000000..84f555a --- /dev/null +++ b/lib/conf-intr.c @@ -0,0 +1,645 @@ +/* + * UCW Library -- Configuration files: interpreter + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/conf-internal.h" +#include "lib/clists.h" + +#include +#include + +#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0) + +/* Register size of and parser for each basic type */ + +static char * +cf_parse_string(char *str, char **ptr) +{ + *ptr = cf_strdup(str); + return NULL; +} + +typedef char *cf_basic_parser(char *str, void *ptr); +static struct { + uns size; + void *parser; +} parsers[] = { + { sizeof(int), cf_parse_int }, + { sizeof(u64), cf_parse_u64 }, + { sizeof(double), cf_parse_double }, + { sizeof(u32), cf_parse_ip }, + { sizeof(char*), cf_parse_string }, + { sizeof(int), NULL }, // lookups are parsed extra + { 0, NULL }, // user-defined types are parsed extra +}; + +inline uns +cf_type_size(enum cf_type type, struct cf_user_type *utype) +{ + if (type < CT_USER) + return parsers[type].size; + else + return utype->size; +} + +static char * +cf_parse_lookup(char *str, int *ptr, char **t) +{ + char **n = t; + uns total_len = 0; + while (*n && strcasecmp(*n, str)) { + total_len += strlen(*n) + 2; + n++; + } + if (*n) { + *ptr = n - t; + return NULL; + } + char *err = cf_malloc(total_len + strlen(str) + 60), *c = err; + c += sprintf(err, "Invalid value %s, possible values are: ", str); + for (n=t; *n; n++) + c+= sprintf(c, "%s, ", *n); + if (*t) + c[-2] = 0; + *ptr = -1; + return err; +} + +static char * +cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u) +{ + for (uns i=0; iutype); + if (type < CT_LOOKUP) + msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size); + else if (type == CT_LOOKUP) + msg = cf_parse_lookup(pars[i], ptr + i * size, u->lookup); + else if (type == CT_USER) + msg = u->utype->parser(pars[i], ptr + i * size); + else + ASSERT(0); + if (msg) + return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg; + } + return NULL; +} + +/* Interpreter */ + +#define T(x) #x, +char *cf_op_names[] = { CF_OPERATIONS }; +#undef T +char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" }; + +#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN) + +static char * +interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr) +{ + enum cf_type type = item->type; + cf_journal_block(ptr, sizeof(void*)); + // boundary checks done by the caller + uns size = cf_type_size(item->type, item->u.utype); + *ptr = cf_malloc(DARY_HDR_SIZE + number * size) + DARY_HDR_SIZE; + DARY_LEN(*ptr) = number; + return cf_parse_ary(number, pars, *ptr, type, &item->u); +} + +static char * +interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op) +{ + enum cf_type type = item->type; + void *old_p = *ptr; + uns size = cf_type_size(item->type, item->u.utype); + ASSERT(size >= sizeof(uns)); + int old_nr = old_p ? DARY_LEN(old_p) : 0; + int taken = MIN(number, ABS(item->number)-old_nr); + *processed = taken; + // stretch the dynamic array + void *new_p = cf_malloc(DARY_HDR_SIZE + (old_nr + taken) * size) + DARY_HDR_SIZE; + DARY_LEN(new_p) = old_nr + taken; + cf_journal_block(ptr, sizeof(void*)); + *ptr = new_p; + if (op == OP_APPEND) { + memcpy(new_p, old_p, old_nr * size); + return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u); + } else if (op == OP_PREPEND) { + memcpy(new_p + taken * size, old_p, old_nr * size); + return cf_parse_ary(taken, pars, new_p, type, &item->u); + } else + return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]); +} + +static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic); + +static char * +interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic) +{ + cf_add_dirty(sec, ptr); + *processed = 0; + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + { + int taken; + char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls); + if (msg) + return cf_printf("Item %s: %s", ci->name, msg); + *processed += taken; + number -= taken; + pars += taken; + if (!number) // stop parsing, because many parsers would otherwise complain that number==0 + break; + } + return NULL; +} + +static void +add_to_list(cnode *where, cnode *new_node, enum cf_operation op) +{ + switch (op) + { + case OP_EDIT: // edition has been done in-place + break; + case OP_REMOVE: + CF_JOURNAL_VAR(where->prev->next); + CF_JOURNAL_VAR(where->next->prev); + clist_remove(where); + break; + case OP_AFTER: // implementation dependend (prepend_head = after(list)), and where==list, see clists.h:74 + case OP_PREPEND: + case OP_COPY: + CF_JOURNAL_VAR(where->next->prev); + CF_JOURNAL_VAR(where->next); + clist_insert_after(new_node, where); + break; + case OP_BEFORE: // implementation dependend (append_tail = before(list)) + case OP_APPEND: + case OP_SET: + CF_JOURNAL_VAR(where->prev->next); + CF_JOURNAL_VAR(where->prev); + clist_insert_before(new_node, where); + break; + default: + ASSERT(0); + } +} + +static char * +interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op) +{ + if (op >= OP_REMOVE) + return cf_printf("You have to open a block for operation %s", cf_op_names[op]); + if (!number) + return "Nothing to add to the list"; + struct cf_section *sec = item->u.sec; + *processed = 0; + uns index = 0; + while (number > 0) + { + void *node = cf_malloc(sec->size); + cf_init_section(item->name, sec, node, 1); + add_to_list(ptr, node, op); + int taken; + /* If the node contains any dynamic attribute at the end, we suppress + * auto-repetition here and pass the flag inside instead. */ + index++; + char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC); + if (msg) + return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg); + *processed += taken; + number -= taken; + pars += taken; + if (sec->flags & SEC_FLAG_DYNAMIC) + break; + } + return NULL; +} + +static char * +interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op) +{ + if (op != OP_SET && op != OP_REMOVE) + return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]); + else if (item->type != CT_INT && item->type != CT_LOOKUP) + return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]); + cf_journal_block(ptr, sizeof(u32)); + for (int i=0; itype == CT_INT) + TRY( cf_parse_int(pars[i], &idx) ); + else + TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) ); + if (idx >= 32) + return "Bitmaps only have 32 bits"; + if (op == OP_SET) + *ptr |= 1<cls) + { + case CC_STATIC: + if (!number) + return "Missing value"; + taken = MIN(number, item->number); + *processed = taken; + uns size = cf_type_size(item->type, item->u.utype); + cf_journal_block(ptr, taken * size); + return cf_parse_ary(taken, pars, ptr, item->type, &item->u); + case CC_DYNAMIC: + if (!allow_dynamic) + return "Dynamic array cannot be used here"; + taken = MIN(number, ABS(item->number)); + *processed = taken; + return interpret_set_dynamic(item, taken, pars, ptr); + case CC_PARSER: + if (item->number < 0 && !allow_dynamic) + return "Parsers with variable number of parameters cannot be used here"; + if (item->number > 0 && number < item->number) + return "Not enough parameters available for the parser"; + taken = MIN(number, ABS(item->number)); + *processed = taken; + for (int i=0; iu.par(taken, pars, ptr); + case CC_SECTION: + return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic); + case CC_LIST: + if (!allow_dynamic) + return "Lists cannot be used here"; + return interpret_add_list(item, number, pars, processed, ptr, OP_SET); + case CC_BITMAP: + if (!allow_dynamic) + return "Bitmaps cannot be used here"; + return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET); + default: + ASSERT(0); + } +} + +static char * +interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op) +{ + if (item->cls == CC_BITMAP) { + cf_journal_block(ptr, sizeof(u32)); + if (op == OP_CLEAR) + * (u32*) ptr = 0; + else + if (item->type == CT_INT) + * (u32*) ptr = ~0u; + else { + uns nr = -1; + while (item->u.lookup[++nr]); + * (u32*) ptr = ~0u >> (32-nr); + } + return NULL; + } else if (op != OP_CLEAR) + return "The item is not a bitmap"; + + if (item->cls == CC_LIST) { + cf_journal_block(ptr, sizeof(clist)); + clist_init(ptr); + } else if (item->cls == CC_DYNAMIC) { + cf_journal_block(ptr, sizeof(void *)); + static uns zero = 0; + * (void**) ptr = (&zero) + 1; + } else if (item->cls == CC_STATIC && item->type == CT_STRING) { + cf_journal_block(ptr, item->number * sizeof(char*)); + bzero(ptr, item->number * sizeof(char*)); + } else + return "The item is not a list, dynamic array, bitmap, or string"; + return NULL; +} + +static int +cmp_items(void *i1, void *i2, struct cf_item *item) +{ + ASSERT(item->cls == CC_STATIC); + i1 += (uintptr_t) item->ptr; + i2 += (uintptr_t) item->ptr; + if (item->type == CT_STRING) + return strcmp(* (char**) i1, * (char**) i2); + else // all numeric types + return memcmp(i1, i2, cf_type_size(item->type, item->u.utype)); +} + +static void * +find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask) +{ + CLIST_FOR_EACH(cnode *, n, *list) + { + uns found = 1; + for (uns i=0; i<32; i++) + if (mask & (1<cfg+i)) + { + found = 0; + break; + } + if (found) + return n; + } + return NULL; +} + +static char * +record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask) +{ + uns nr = sec->flags & SEC_FLAG_NUMBER; + if (item >= sec->cfg && item < sec->cfg + nr) // setting an attribute relative to this section + { + uns i = item - sec->cfg; + if (i >= 32) + return "Cannot select list nodes by this attribute"; + if (sec->cfg[i].cls != CC_STATIC) + return "Selection can only be done based on basic attributes"; + *mask |= 1 << i; + } + return NULL; +} + +#define MAX_STACK_SIZE 10 +static struct item_stack { + struct cf_section *sec; // nested section + void *base_ptr; // because original pointers are often relative + enum cf_operation op; // it is performed when a closing brace is encountered + void *list; // list the operations should be done on + u32 mask; // bit array of selectors searching in a list + struct cf_item *item; // cf_item of the list +} stack[MAX_STACK_SIZE]; +static uns level; + +static char * +opening_brace(struct cf_item *item, void *ptr, enum cf_operation op) +{ + if (level >= MAX_STACK_SIZE-1) + return "Too many nested sections"; + enum cf_operation pure_op = op & OP_MASK; + stack[++level] = (struct item_stack) { + .sec = NULL, + .base_ptr = NULL, + .op = pure_op, + .list = NULL, + .mask = 0, + .item = NULL, + }; + if (!item) // unknown is ignored; we just need to trace recursion + return NULL; + stack[level].sec = item->u.sec; + if (item->cls == CC_SECTION) + { + if (pure_op != OP_SET) + return "Only SET operation can be used with a section"; + stack[level].base_ptr = ptr; + stack[level].op = OP_EDIT | OP_2ND; // this list operation does nothing + } + else if (item->cls == CC_LIST) + { + stack[level].base_ptr = cf_malloc(item->u.sec->size); + cf_init_section(item->name, item->u.sec, stack[level].base_ptr, 1); + stack[level].list = ptr; + stack[level].item = item; + if (pure_op == OP_ALL) + return "Operation ALL cannot be applied on lists"; + else if (pure_op < OP_REMOVE) { + add_to_list(ptr, stack[level].base_ptr, pure_op); + stack[level].op |= OP_2ND; + } else + stack[level].op |= OP_1ST; + } + else + return "Opening brace can only be used on sections and lists"; + return NULL; +} + +static char * +closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars) +{ + if (st->op == OP_CLOSE) // top-level + return "Unmatched } parenthesis"; + if (!st->sec) { // dummy run on unknown section + if (!(op & OP_OPEN)) + level--; + return NULL; + } + enum cf_operation pure_op = st->op & OP_MASK; + if (st->op & OP_1ST) + { + st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask); + if (!st->list) + return "Cannot find a node matching the query"; + if (pure_op != OP_REMOVE) + { + if (pure_op == OP_EDIT) + st->base_ptr = st->list; + else if (pure_op == OP_AFTER || pure_op == OP_BEFORE) + cf_init_section(st->item->name, st->sec, st->base_ptr, 1); + else if (pure_op == OP_COPY) { + if (st->sec->flags & SEC_FLAG_CANT_COPY) + return cf_printf("Item %s cannot be copied", st->item->name); + memcpy(st->base_ptr, st->list, st->sec->size); // strings and dynamic arrays are shared + if (st->sec->copy) + TRY( st->sec->copy(st->base_ptr, st->list) ); + } else + ASSERT(0); + if (op & OP_OPEN) { // stay at the same recursion level + st->op = (st->op | OP_2ND) & ~OP_1ST; + add_to_list(st->list, st->base_ptr, pure_op); + return NULL; + } + int taken; // parse parameters on 1 line immediately + TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) ); + number -= taken; + pars += taken; + // and fall-thru to the 2nd phase + } + add_to_list(st->list, st->base_ptr, pure_op); + } + level--; + if (number) + return "No parameters expected after the }"; + else if (op & OP_OPEN) + return "No { is expected"; + else + return NULL; +} + +static struct cf_item * +find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr) +{ + *msg = NULL; + if (name[0] == '^') // absolute name instead of relative + name++, curr_sec = &cf_sections, *ptr = NULL; + if (!curr_sec) // don't even search in an unknown section + return NULL; + while (1) + { + if (curr_sec != &cf_sections) + cf_add_dirty(curr_sec, *ptr); + char *c = strchr(name, '.'); + if (c) + *c++ = 0; + struct cf_item *ci = cf_find_subitem(curr_sec, name); + if (!ci->cls) + { + if (!(curr_sec->flags & SEC_FLAG_UNKNOWN)) // ignore silently unknown top-level sections and unknown attributes in flagged sections + *msg = cf_printf("Unknown item %s", name); + return NULL; + } + *ptr += (uintptr_t) ci->ptr; + if (!c) + return ci; + if (ci->cls != CC_SECTION) + { + *msg = cf_printf("Item %s is not a section", name); + return NULL; + } + curr_sec = ci->u.sec; + name = c; + } +} + +char * +cf_interpret_line(char *name, enum cf_operation op, int number, char **pars) +{ + char *msg; + if ((op & OP_MASK) == OP_CLOSE) + return closing_brace(stack+level, op, number, pars); + void *ptr = stack[level].base_ptr; + struct cf_item *item = find_item(stack[level].sec, name, &msg, &ptr); + if (msg) + return msg; + if (stack[level].op & OP_1ST) + TRY( record_selector(item, stack[level].sec, &stack[level].mask) ); + if (op & OP_OPEN) { // the operation will be performed after the closing brace + if (number) + return "Cannot open a block after a parameter has been passed on a line"; + return opening_brace(item, ptr, op); + } + if (!item) // ignored item in an unknown section + return NULL; + op &= OP_MASK; + + int taken = 0; // process as many parameters as possible + if (op == OP_CLEAR || op == OP_ALL) + msg = interpret_set_all(item, ptr, op); + else if (op == OP_SET) + msg = interpret_set_item(item, number, pars, &taken, ptr, 1); + else if (item->cls == CC_DYNAMIC) + msg = interpret_add_dynamic(item, number, pars, &taken, ptr, op); + else if (item->cls == CC_LIST) + msg = interpret_add_list(item, number, pars, &taken, ptr, op); + else if (item->cls == CC_BITMAP) + msg = interpret_add_bitmap(item, number, pars, &taken, ptr, op); + else + return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name); + if (msg) + return msg; + if (taken < number) + return cf_printf("Too many parameters: %d>%d", number, taken); + + return NULL; +} + +char * +cf_find_item(const char *name, struct cf_item *item) +{ + char *msg; + void *ptr = NULL; + struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr); + if (msg) + return msg; + if (ci) { + *item = *ci; + item->ptr = ptr; + } else + bzero(item, sizeof(struct cf_item)); + return NULL; +} + +char * +cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars) +{ + char *msg; + int taken = 0; + switch (op) { + case OP_SET: + msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1); + break; + case OP_CLEAR: + case OP_ALL: + msg = interpret_set_all(item, item->ptr, op); + break; + case OP_APPEND: + case OP_PREPEND: + if (item->cls == CC_DYNAMIC) + msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op); + else if (item->cls == CC_LIST) + msg = interpret_add_list(item, number, pars, &taken, item->ptr, op); + else + return "The attribute does not support append/prepend"; + break; + case OP_REMOVE: + if (item->cls == CC_BITMAP) + msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op); + else + return "Only applicable on bitmaps"; + break; + default: + return "Unsupported operation"; + } + if (msg) + return msg; + if (taken < number) + return "Too many parameters"; + return NULL; +} + +void +cf_init_stack(void) +{ + static uns initialized = 0; + if (!initialized++) { + cf_sections.flags |= SEC_FLAG_UNKNOWN; + cf_sections.size = 0; // size of allocated array used to be stored here + cf_init_section(NULL, &cf_sections, NULL, 0); + } + level = 0; + stack[0] = (struct item_stack) { + .sec = &cf_sections, + .base_ptr = NULL, + .op = OP_CLOSE, + .list = NULL, + .mask = 0, + .item = NULL + }; +} + +int +cf_check_stack(void) +{ + if (level > 0) { + msg(L_ERROR, "Unterminated block"); + return 1; + } + return 0; +} + diff --git a/lib/conf-journal.c b/lib/conf-journal.c new file mode 100644 index 0000000..591aa8a --- /dev/null +++ b/lib/conf-journal.c @@ -0,0 +1,117 @@ +/* + * UCW Library -- Configuration files: journaling + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/conf-internal.h" +#include "lib/mempool.h" + +#include + +static struct old_pools { + struct old_pools *prev; + struct mempool *pool; +} *pools; // link-list of older cf_pool's + +uns cf_need_journal = 1; // some programs do not need journal +static struct cf_journal_item { + struct cf_journal_item *prev; + byte *ptr; + uns len; + byte copy[0]; +} *journal; + +void +cf_journal_block(void *ptr, uns len) +{ + if (!cf_need_journal) + return; + struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len); + ji->prev = journal; + ji->ptr = ptr; + ji->len = len; + memcpy(ji->copy, ptr, len); + journal = ji; +} + +void +cf_journal_swap(void) + // swaps the contents of the memory and the journal, and reverses the list +{ + struct cf_journal_item *curr, *prev, *next; + for (next=NULL, curr=journal; curr; next=curr, curr=prev) + { + prev = curr->prev; + curr->prev = next; + for (uns i=0; ilen; i++) + { + byte x = curr->copy[i]; + curr->copy[i] = curr->ptr[i]; + curr->ptr[i] = x; + } + } + journal = next; +} + +struct cf_journal_item * +cf_journal_new_transaction(uns new_pool) +{ + if (new_pool) + cf_pool = mp_new(1<<10); + struct cf_journal_item *oldj = journal; + journal = NULL; + return oldj; +} + +void +cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj) +{ + if (new_pool) + { + struct old_pools *p = cf_malloc(sizeof(struct old_pools)); + p->prev = pools; + p->pool = cf_pool; + pools = p; + } + if (oldj) + { + struct cf_journal_item **j = &journal; + while (*j) + j = &(*j)->prev; + *j = oldj; + } +} + +void +cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj) +{ + if (!cf_need_journal) + die("Cannot rollback the configuration, because the journal is disabled."); + cf_journal_swap(); + journal = oldj; + if (new_pool) + { + mp_delete(cf_pool); + cf_pool = pools ? pools->pool : NULL; + } +} + +void +cf_journal_delete(void) +{ + for (struct old_pools *p=pools; p; p=pools) + { + pools = p->prev; + mp_delete(p->pool); + } +} + +/* TODO: more space efficient journal */ diff --git a/lib/conf-parse.c b/lib/conf-parse.c new file mode 100644 index 0000000..c828462 --- /dev/null +++ b/lib/conf-parse.c @@ -0,0 +1,167 @@ +/* + * UCW Library -- Configuration files: parsers for basic types + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/chartype.h" + +#include +#include +#include + +struct unit { + uns name; // one-letter name of the unit + uns num, den; // fraction +}; + +static const struct unit units[] = { + { 'd', 86400, 1 }, + { 'h', 3600, 1 }, + { 'k', 1000, 1 }, + { 'm', 1000000, 1 }, + { 'g', 1000000000, 1 }, + { 'K', 1024, 1 }, + { 'M', 1048576, 1 }, + { 'G', 1073741824, 1 }, + { '%', 1, 100 }, + { 0, 0, 0 } +}; + +static const struct unit * +lookup_unit(const char *value, const char *end, char **msg) +{ + if (end && *end) { + if (end == value || end[1] || *end >= '0' && *end <= '9') + *msg = "Invalid number"; + else { + for (const struct unit *u=units; u->name; u++) + if ((char)u->name == *end) + return u; + *msg = "Invalid unit"; + } + } + return NULL; +} + +static char cf_rngerr[] = "Number out of range"; + +char * +cf_parse_int(const char *str, int *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + char *end; + errno = 0; + uns x = strtoul(str, &end, 0); + if (errno == ERANGE) + msg = cf_rngerr; + else if (u = lookup_unit(str, end, &msg)) { + u64 y = (u64)x * u->num; + if (y % u->den) + msg = "Number is not an integer"; + else { + y /= u->den; + if (y > 0xffffffff) + msg = cf_rngerr; + *ptr = y; + } + } else + *ptr = x; + } + return msg; +} + +char * +cf_parse_u64(const char *str, u64 *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + char *end; + errno = 0; + u64 x = strtoull(str, &end, 0); + if (errno == ERANGE) + msg = cf_rngerr; + else if (u = lookup_unit(str, end, &msg)) { + if (x > ~(u64)0 / u->num) + msg = "Number out of range"; + else { + x *= u->num; + if (x % u->den) + msg = "Number is not an integer"; + else + *ptr = x / u->den; + } + } else + *ptr = x; + } + return msg; +} + +char * +cf_parse_double(const char *str, double *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + double x; + uns read_chars; + if (sscanf(str, "%lf%n", &x, &read_chars) != 1) + msg = "Invalid number"; + else if (u = lookup_unit(str, str + read_chars, &msg)) + *ptr = x * u->num / u->den; + else + *ptr = x; + } + return msg; +} + +char * +cf_parse_ip(const char *p, u32 *varp) +{ + if (!*p) + return "Missing IP address"; + uns x = 0; + char *p2; + if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) { + errno = 0; + x = strtoul(p, &p2, 16); + if (errno == ERANGE || x > 0xffffffff) + goto error; + p = p2; + } + else + for (uns i = 0; i < 4; i++) { + if (i) { + if (*p++ != '.') + goto error; + } + if (!Cdigit(*p)) + goto error; + errno = 0; + uns y = strtoul(p, &p2, 10); + if (errno == ERANGE || p2 == (char*) p || y > 255) + goto error; + p = p2; + x = (x << 8) + y; + } + *varp = x; + return *p ? "Trailing characters" : NULL; +error: + return "Invalid IP address"; +} + diff --git a/lib/conf-section.c b/lib/conf-section.c new file mode 100644 index 0000000..1a9ae17 --- /dev/null +++ b/lib/conf-section.c @@ -0,0 +1,203 @@ +/* + * UCW Library -- Configuration files: sections + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/conf-internal.h" +#include "lib/clists.h" +#include "lib/binsearch.h" + +#include + +/* Dirty sections */ + +struct dirty_section { + struct cf_section *sec; + void *ptr; +}; +#define GBUF_TYPE struct dirty_section +#define GBUF_PREFIX(x) dirtsec_##x +#include "lib/gbuf.h" +static dirtsec_t dirty; +static uns dirties; + +void +cf_add_dirty(struct cf_section *sec, void *ptr) +{ + dirtsec_grow(&dirty, dirties+1); + struct dirty_section *dest = dirty.ptr + dirties; + if (dirties && dest[-1].sec == sec && dest[-1].ptr == ptr) + return; + dest->sec = sec; + dest->ptr = ptr; + dirties++; +} + +#define ASORT_PREFIX(x) dirtsec_##x +#define ASORT_KEY_TYPE struct dirty_section +#define ASORT_ELT(i) dirty.ptr[i] +#define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr +#include "lib/arraysort.h" + +static void +sort_dirty(void) +{ + if (dirties <= 1) + return; + dirtsec_sort(dirties); + // and compress the list + struct dirty_section *read = dirty.ptr + 1, *write = dirty.ptr + 1, *limit = dirty.ptr + dirties; + while (read < limit) { + if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) { + if (read != write) + *write = *read; + write++; + } + read++; + } + dirties = write - dirty.ptr; +} + +/* Initialization */ + +struct cf_section cf_sections; // root section + +struct cf_item * +cf_find_subitem(struct cf_section *sec, const char *name) +{ + struct cf_item *ci = sec->cfg; + for (; ci->cls; ci++) + if (!strcasecmp(ci->name, name)) + return ci; + return ci; +} + +static void +inspect_section(struct cf_section *sec) +{ + sec->flags = 0; + struct cf_item *ci; + for (ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) { + inspect_section(ci->u.sec); + sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY); + } else if (ci->cls == CC_LIST) { + inspect_section(ci->u.sec); + sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY; + } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP) + sec->flags |= SEC_FLAG_DYNAMIC; + else if (ci->cls == CC_PARSER) { + sec->flags |= SEC_FLAG_CANT_COPY; + if (ci->number < 0) + sec->flags |= SEC_FLAG_DYNAMIC; + } + if (sec->copy) + sec->flags &= ~SEC_FLAG_CANT_COPY; + sec->flags |= ci - sec->cfg; // record the number of entries +} + +void +cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown) +{ + if (!cf_sections.cfg) + { + cf_sections.size = 50; + cf_sections.cfg = xmalloc_zero(cf_sections.size * sizeof(struct cf_item)); + } + struct cf_item *ci = cf_find_subitem(&cf_sections, name); + if (ci->cls) + die("Cannot register section %s twice", name); + ci->cls = CC_SECTION; + ci->name = name; + ci->number = 1; + ci->ptr = NULL; + ci->u.sec = sec; + inspect_section(sec); + if (allow_unknown) + sec->flags |= SEC_FLAG_UNKNOWN; + ci++; + if (ci - cf_sections.cfg >= (int) cf_sections.size) + { + cf_sections.cfg = xrealloc(cf_sections.cfg, 2*cf_sections.size * sizeof(struct cf_item)); + bzero(cf_sections.cfg + cf_sections.size, cf_sections.size * sizeof(struct cf_item)); + cf_sections.size *= 2; + } +} + +void +cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero) +{ + if (do_bzero) { + ASSERT(sec->size); + bzero(ptr, sec->size); + } + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) + cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0); + else if (ci->cls == CC_LIST) + clist_init(ptr + (uintptr_t) ci->ptr); + else if (ci->cls == CC_DYNAMIC) { + void **dyn = ptr + (uintptr_t) ci->ptr; + if (!*dyn) { // replace NULL by an empty array + static uns zero = 0; + *dyn = (&zero) + 1; + } + } + if (sec->init) { + char *msg = sec->init(ptr); + if (msg) + die("Cannot initialize section %s: %s", name, msg); + } +} + +static char * +commit_section(struct cf_section *sec, void *ptr, uns commit_all) +{ + char *err; + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) { + if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) { + msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err); + return "commit of a subsection failed"; + } + } else if (ci->cls == CC_LIST) { + uns idx = 0; + CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr)) + if (idx++, err = commit_section(ci->u.sec, n, commit_all)) { + msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err); + return "commit of a list failed"; + } + } + if (sec->commit) { + /* We have to process the whole tree of sections even if just a few changes + * have been made, because there are dependencies between commit-hooks and + * hence we need to call them in a fixed order. */ +#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr + struct dirty_section comp = { sec, ptr }; + uns pos = BIN_SEARCH_FIRST_GE_CMP(dirty.ptr, dirties, comp, ARY_LT_X); + + if (commit_all + || (pos < dirties && dirty.ptr[pos].sec == sec && dirty.ptr[pos].ptr == ptr)) + return sec->commit(ptr); + } + return 0; +} + +int +cf_commit_all(enum cf_commit_mode cm) +{ + sort_dirty(); + if (cm == CF_NO_COMMIT) + return 0; + if (commit_section(&cf_sections, NULL, cm == CF_COMMIT_ALL)) + return 1; + dirties = 0; + return 0; +} diff --git a/lib/conf-test.c b/lib/conf-test.c new file mode 100644 index 0000000..61ba4bc --- /dev/null +++ b/lib/conf-test.c @@ -0,0 +1,219 @@ +/* + * Insane tester of reading configuration files + * + * (c) 2006 Robert Spalek + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/clists.h" +#include "lib/fastbuf.h" + +#include +#include +#include + +static int verbose; + +struct sub_sect_1 { + cnode n; + char *name; + time_t t; + char *level; + int confidence[2]; + double *list; +}; + +static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) }; + +static char * +init_sec_1(struct sub_sect_1 *s) +{ + if (s == &sec1) // this is a static variable; skip clearing + return NULL; + s->name = "unknown"; + s->level = "default"; + s->confidence[0] = 5; + s->confidence[1] = 6; + // leave s->list==NULL + return NULL; +} + +static char * +commit_sec_1(struct sub_sect_1 *s) +{ + if (s->confidence[0] < 0 || s->confidence[0] > 10) + return "Well, this can't be"; + return NULL; +} + +static char * +time_parser(uns number, char **pars, time_t *ptr) +{ + *ptr = number ? atoi(pars[0]) : time(NULL); + return NULL; +} + +static struct cf_section cf_sec_1 = { + CF_TYPE(struct sub_sect_1), + CF_INIT(init_sec_1), + CF_COMMIT(commit_sec_1), +#define F(x) PTR_TO(struct sub_sect_1, x) + CF_ITEMS { + CF_STRING("name", F(name)), + //CF_PARSER("t", F(t), time_parser, 0), + CF_STRING("level", F(level)), + CF_INT_ARY("confidence", F(confidence[0]), 2), // XXX: the [0] is needed for the sake of type checking + CF_DOUBLE_DYN("list", F(list), 100), + CF_END + } +#undef F +}; + +static uns nr1 = 15; +static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1); +static int nrs2[5]; +static char *str1 = "no worries"; +static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob"); +static u64 u1 = 0xCafeBeefDeadC00ll; +static double d1 = -1.1; +static clist secs; +static time_t t1, t2; +static u32 ip; +static int *look = DARY_ALLOC(int, 2, 2, 1); +static u16 numbers[10] = { 2, 100, 1, 5 }; +static u32 bitmap1 = 0xff; +static u32 bitmap2 = 3; + +static char * +parse_u16(char *string, u16 *ptr) +{ + uns a; + char *msg = cf_parse_int(string, &a); + if (msg) + return msg; + if (a >= (1<<16)) + return "Come on, man, this doesn't fit to 16 bits"; + *ptr = a; + return NULL; +} + +static void +dump_u16(struct fastbuf *fb, u16 *ptr) +{ + bprintf(fb, "%d ", *ptr); +} + +static struct cf_user_type u16_type = { + .size = sizeof(u16), + .name = "u16", + .parser = (cf_parser1*) parse_u16, + .dumper = (cf_dumper1*) dump_u16 +}; + +static char * +init_top(void *ptr UNUSED) +{ + for (uns i=0; i<5; i++) + { + struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed + cf_init_section("slaves", &cf_sec_1, s, 1); + s->confidence[1] = i; + clist_add_tail(&secs, &s->n); + } + return NULL; +} + +static char * +commit_top(void *ptr UNUSED) +{ + if (nr1 != 15) + return "Don't touch my variable!"; + return NULL; +} + +static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL }; +static struct cf_section cf_top = { + CF_INIT(init_top), + CF_COMMIT(commit_top), + CF_ITEMS { + CF_UNS("nr1", &nr1), + CF_INT_DYN("nrs1", &nrs1, 1000), + CF_INT_ARY("nrs2", nrs2, 5), + CF_STRING("str1", &str1), + CF_STRING_DYN("str2", &str2, 20), + CF_U64("u1", &u1), + CF_DOUBLE("d1", &d1), + CF_PARSER("FirstTime", &t1, time_parser, -1), + CF_PARSER("SecondTime", &t2, time_parser, 1), + CF_SECTION("master", &sec1, &cf_sec_1), + CF_LIST("slaves", &secs, &cf_sec_1), + CF_IP("ip", &ip), + CF_LOOKUP_DYN("look", &look, alphabet, 1000), + CF_USER_ARY("numbers", numbers, &u16_type, 10), + CF_BITMAP_INT("bitmap1", &bitmap1), + CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) { + "one", "two", "three", "four", "five", "six", "seven", "eight", + "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen", + "eighteen", "nineteen", "twenty", NULL // hidden joke here + })), + CF_END + } +}; + +static byte short_opts[] = CF_SHORT_OPTS "v"; +static struct option long_opts[] = { + CF_LONG_OPTS + {"verbose", 0, 0, 'v'}, + {NULL, 0, 0, 0} +}; + +static char *help = "\ +Usage: conf-test \n\ +\n\ +Options:\n" +CF_USAGE +"-v\t\t\tBe verbose\n\ +"; + +static void NONRET +usage(char *msg, ...) +{ + va_list va; + va_start(va, msg); + if (msg) + vfprintf(stderr, msg, va); + fputs(help, stderr); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + log_init(argv[0]); + cf_declare_section("top", &cf_top, 0); + cf_def_file = "lib/conf-test.cf"; + + int opt; + while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0) + switch (opt) { + case 'v': verbose++; break; + default: usage("unknown option %c\n", opt); + } + if (optind < argc) + usage("too many parameters (%d more)\n", argc-optind); + + /* + cf_load("non-existent file"); + //cf_reload("non-existent file"); + cf_load("non-existent file"); + cf_set("top.d1 -1.1; top.master b"); + */ + + struct fastbuf *out = bfdopen(1, 1<<14); + cf_dump_sections(out); + bclose(out); + + return 0; +} diff --git a/lib/conf-test.cf b/lib/conf-test.cf new file mode 100644 index 0000000..91e6cc3 --- /dev/null +++ b/lib/conf-test.cf @@ -0,0 +1,52 @@ +# test config file +#include lib/conf-test.t ; top.xa=1 +#include 'non-existent file'; #top.xa=1 +Top { \ + + nr1=16 #!!! + nrs1 2 3 5 \ + 7 11 13 \ + \ + 17M + nrs2 3 3k 3 3 3 ; \ + str1 "hello,\t\x2bworld%%\n" + str2 'Hagenuk, + the best' "\ + " qu'est-ce que c'est? + u1 0xbadcafebadbeefc0 + str2:prepend prepended + str2:append appended + d1 7% + d1 -1.14e-25 + firsttime ; secondtime 56 + ^top.master:set alice HB8+ + slaves:clear + ip 0xa + ip 195.113.31.123 + look Alpha + look:prepend Beta GAMMA + numbers 11000 65535 + bitmap1 31 + bitmap1:remove 3 3 + bitmap2:all + bitmap2:remove eleven twelve one +};;;;;; + +unknown.ignored :-) + +top.slaves cairns gpua 7 7 -10% +10% +top.slaves daintree rafc 4 5 -171% +top.slaves coogee pum 9 8 +top.slaves:prepend {name=bondi; level=\ + "PUG"; confidence 10 10} +top.slaves:remove {name daintree} +top.slaveS:edit {level PUG} Bondi PUG! +top.slaveS:before {level pum}{ + confidence 2 + list 123 456 789 +} +top.slaves:copy {name coogee} Coogee2 PUM + +topp.a=15 +top.nr1= ' 15' +a { ;-D } diff --git a/lib/conf.h b/lib/conf.h new file mode 100644 index 0000000..8085744 --- /dev/null +++ b/lib/conf.h @@ -0,0 +1,163 @@ +/* + * UCW Library -- Configuration files + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONF_H +#define _UCW_CONF_H + +enum cf_class { + CC_END, // end of list + CC_STATIC, // single variable or static array + CC_DYNAMIC, // dynamically allocated array + CC_PARSER, // arbitrary parser function + CC_SECTION, // section appears exactly once + CC_LIST, // list with 0..many nodes + CC_BITMAP // of up to 32 items +}; + +enum cf_type { + CT_INT, CT_U64, CT_DOUBLE, // number types + CT_IP, // IP address + CT_STRING, // string type + CT_LOOKUP, // in a string table + CT_USER // user-defined type +}; + +struct fastbuf; +typedef char *cf_parser(uns number, char **pars, void *ptr); + /* A parser function gets an array of (strdup'ed) strings and a pointer with + * the customized information (most likely the target address). It can store + * the parsed value anywhere in any way it likes, however it must first call + * cf_journal_block() on the overwritten memory block. It returns an error + * message or NULL if everything is all right. */ +typedef char *cf_parser1(char *string, void *ptr); + /* A parser function for user-defined types gets a string and a pointer to + * the destination variable. It must store the value within [ptr,ptr+size), + * where size is fixed for each type. It should not call cf_journal_block(). */ +typedef char *cf_hook(void *ptr); + /* An init- or commit-hook gets a pointer to the section or NULL if this + * is the global section. It returns an error message or NULL if everything + * is all right. The init-hook should fill in default values (needed for + * dynamically allocated nodes of link lists or for filling global variables + * that are run-time dependent). The commit-hook should perform sanity + * checks and postprocess the parsed values. Commit-hooks must call + * cf_journal_block() too. Caveat! init-hooks for static sections must not + * use cf_malloc() but normal xmalloc(). */ +typedef void cf_dumper1(struct fastbuf *fb, void *ptr); + /* Dumps the contents of a variable of a user-defined type. */ +typedef char *cf_copier(void *dest, void *src); + /* Similar to init-hook, but it copies attributes from another list node + * instead of setting the attributes to default values. You have to provide + * it if your node contains parsed values and/or sub-lists. */ + +struct cf_user_type { + uns size; // of the parsed attribute + char *name; // name of the type (for dumping) + cf_parser1 *parser; // how to parse it + cf_dumper1 *dumper; // how to dump the type +}; + +struct cf_section; +struct cf_item { + const char *name; // case insensitive + int number; // length of an array or #parameters of a parser (negative means at most) + void *ptr; // pointer to a global variable or an offset in a section + union cf_union { + struct cf_section *sec; // declaration of a section or a list + cf_parser *par; // parser function + char **lookup; // NULL-terminated sequence of allowed strings for lookups + struct cf_user_type *utype; // specification of the user-defined type + } u; + enum cf_class cls:16; // attribute class + enum cf_type type:16; // type of a static or dynamic attribute +}; + +struct cf_section { + uns size; // 0 for a global block, sizeof(struct) for a section + cf_hook *init; // fills in default values (no need to bzero) + cf_hook *commit; // verifies parsed data (optional) + cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes) + struct cf_item *cfg; // CC_END-terminated array of items + uns flags; // for internal use only +}; + +/* Declaration of cf_section */ +#define CF_TYPE(s) .size = sizeof(s) +#define CF_INIT(f) .init = (cf_hook*) f +#define CF_COMMIT(f) .commit = (cf_hook*) f +#define CF_COPY(f) .copy = (cf_copier*) f +#define CF_ITEMS .cfg = ( struct cf_item[] ) +#define CF_END { .cls = CC_END } +/* Configuration items */ +#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) } +#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) } +#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f } +#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s } +#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s } +#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) } +#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t } +/* Configuration items for basic types */ +#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1) +#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c) +#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c) +#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1) +#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c) +#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c) +#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1) +#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c) +#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c) +#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1) +#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c) +#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c) +#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1) +#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c) +#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c) +#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1) +#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c) +#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c) +#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t } +#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t } +#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } +#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } + +/* If you aren't picky about the number of parameters */ +#define CF_ANY_NUM -0x7fffffff + +#define DARY_LEN(a) ((uns*)a)[-1] + // length of a dynamic array +#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a + // creates a static instance of a dynamic array + +/* Memory allocation: conf-alloc.c */ +struct mempool; +extern struct mempool *cf_pool; +void *cf_malloc(uns size); +void *cf_malloc_zero(uns size); +char *cf_strdup(const char *s); +char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2); + +/* Undo journal for error recovery: conf-journal.c */ +extern uns cf_need_journal; +void cf_journal_block(void *ptr, uns len); +#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var)) + +/* Declaration: conf-section.c */ +void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown); +void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero); + +/* Parsers for basic types: conf-parse.c */ +char *cf_parse_int(const char *str, int *ptr); +char *cf_parse_u64(const char *str, u64 *ptr); +char *cf_parse_double(const char *str, double *ptr); +char *cf_parse_ip(const char *p, u32 *varp); + +#endif + diff --git a/lib/config.h b/lib/config.h new file mode 100644 index 0000000..e4c3b33 --- /dev/null +++ b/lib/config.h @@ -0,0 +1,49 @@ +/* + * UCW Library -- Configuration-Dependent Definitions + * + * (c) 1997--2007 Martin Mares + * (c) 2006 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONFIG_H +#define _UCW_CONFIG_H + +/* Configuration switches */ + +#include "autoconf.h" + +/* Tell libc we're going to use all extensions available */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +/* Types (based on standard C99 integers) */ + +#include +#include + +typedef uint8_t byte; /* exactly 8 bits, unsigned */ +typedef uint8_t u8; /* exactly 8 bits, unsigned */ +typedef int8_t s8; /* exactly 8 bits, signed */ +typedef uint16_t u16; /* exactly 16 bits, unsigned */ +typedef int16_t s16; /* exactly 16 bits, signed */ +typedef uint32_t u32; /* exactly 32 bits, unsigned */ +typedef int32_t s32; /* exactly 32 bits, signed */ +typedef uint64_t u64; /* exactly 64 bits, unsigned */ +typedef int64_t s64; /* exactly 64 bits, signed */ + +typedef unsigned int uns; /* at least 32 bits */ +typedef u32 sh_time_t; /* seconds since UNIX epoch */ +typedef s64 timestamp_t; /* milliseconds since UNIX epoch */ + +#ifdef CONFIG_LARGE_FILES /* File positions */ +typedef s64 sh_off_t; +#else +typedef s32 sh_off_t; +#endif + +#endif diff --git a/lib/ctmatch.c b/lib/ctmatch.c new file mode 100644 index 0000000..7e80776 --- /dev/null +++ b/lib/ctmatch.c @@ -0,0 +1,44 @@ +/* + * UCW Library -- Content-Type Pattern Matching + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/chartype.h" + +int +match_ct_patt(const char *p, const char *t) +{ + if (*p == '*' && !p[1]) /* "*" matches everything */ + return 1; + + if (*p == '*' && p[1] == '/') /* "*" on the left-hand side */ + { + while (*t && *t != ' ' && *t != ';' && *t != '/') + t++; + p += 2; + } + else /* Normal left-hand side */ + { + while (*p != '/') + if (Cupcase(*p++) != Cupcase(*t++)) + return 0; + p++; + } + if (*t++ != '/') + return 0; + + if (*p == '*' && !p[1]) /* "*" on the right-hand side */ + return 1; + while (*p) + if (Cupcase(*p++) != Cupcase(*t++)) + return 0; + if (*t && *t != ' ' && *t != ';') + return 0; + + return 1; +} diff --git a/lib/db-emul.c b/lib/db-emul.c new file mode 100644 index 0000000..62540de --- /dev/null +++ b/lib/db-emul.c @@ -0,0 +1,155 @@ +/* + * UCW Library -- SDBM emulator at top of GDBM + * + * (c) 1999 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/db.h" + +#include +#include +#include + +#include + +struct sdbm { + GDBM_FILE db; + datum prevkey; +}; + +struct sdbm * +sdbm_open(struct sdbm_options *o) +{ + struct sdbm *d = xmalloc(sizeof(struct sdbm)); + d->db = gdbm_open(o->name, + (o->page_order ? (1 << o->page_order) : 0), + ((o->flags & SDBM_WRITE) ? ((o->flags & SDBM_CREAT) ? GDBM_WRCREAT : GDBM_WRITER) : GDBM_READER) + | ((o->flags & SDBM_SYNC) ? GDBM_SYNC : 0), + 0666, + NULL); + if (o->cache_size) + gdbm_setopt(d->db, GDBM_CACHESIZE, &o->cache_size, sizeof(o->cache_size)); + d->prevkey.dptr = NULL; + return d; +} + +void +sdbm_close(struct sdbm *d) +{ + sdbm_rewind(d); + gdbm_close(d->db); + xfree(d); +} + +static int +sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen) +{ + if (vallen) + { + if (*vallen < Dl) + return 1; + *vallen = Dl; + } + if (val) + memcpy(val, D, Dl); + return 0; +} + +int +sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) +{ + datum K, V; + int rc; + + K.dptr = key; + K.dsize = keylen; + V.dptr = val; + V.dsize = vallen; + rc = gdbm_store(d->db, K, V, GDBM_INSERT); + return (rc < 0) ? rc : !rc; +} + +int +sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) +{ + datum K, V; + int rc; + + if (!val) + return sdbm_delete(d, key, keylen); + K.dptr = key; + K.dsize = keylen; + V.dptr = val; + V.dsize = vallen; + rc = gdbm_store(d->db, K, V, GDBM_REPLACE); + return (rc < 0) ? rc : !rc; +} + +int +sdbm_delete(struct sdbm *d, byte *key, uns keylen) +{ + datum K; + + K.dptr = key; + K.dsize = keylen; + return !gdbm_delete(d->db, K); +} + +int +sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen) +{ + datum K, V; + int rc; + + K.dptr = key; + K.dsize = keylen; + if (!val && !vallen) + return gdbm_exists(d->db, K); + V = gdbm_fetch(d->db, K); + if (!V.dptr) + return 0; + rc = sdbm_put_user(V.dptr, V.dsize, val, vallen); + xfree(V.dptr); + return rc ? SDBM_ERROR_TOO_LARGE : 1; +} + +void +sdbm_rewind(struct sdbm *d) +{ + if (d->prevkey.dptr) + { + xfree(d->prevkey.dptr); + d->prevkey.dptr = NULL; + } +} + +int +sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen) +{ + datum K; + + if (d->prevkey.dptr) + { + K = gdbm_nextkey(d->db, d->prevkey); + xfree(d->prevkey.dptr); + } + else + K = gdbm_firstkey(d->db); + d->prevkey = K; + if (!K.dptr) + return 0; + if (sdbm_put_user(K.dptr, K.dsize, key, keylen)) + return SDBM_ERROR_TOO_LARGE; + if (val || vallen) + return sdbm_fetch(d, key, *keylen, val, vallen); + return 1; +} + +void +sdbm_sync(struct sdbm *d) +{ +} diff --git a/lib/db-test.c b/lib/db-test.c new file mode 100644 index 0000000..9a6f3bb --- /dev/null +++ b/lib/db-test.c @@ -0,0 +1,475 @@ +/* + * UCW Library -- Database Manager -- Tests and Benchmarks + * + * (c) 1999 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#if 1 +#include "lib/db.c" +#define NAME "SDBM" +#else +#include "lib/db-emul.c" +#define NAME "GDBM" +#endif + +#include +#include +#include +#include + +static struct sdbm_options opts = { + flags: SDBM_CREAT | SDBM_WRITE, + name: "db.test", + page_order: 10, + cache_size: 1024, + key_size: -1, + val_size: -1 +}; + +static struct sdbm *d; +static int key_min, key_max; /* min<0 -> URL distribution */ +static int val_min, val_max; +static int num_keys; /* Number of distinct keys */ +static int verbose; + +static void +help(void) +{ + printf("Usage: dbtest [] \n\ +\n\ +Options:\n\ +-c Use cache of pages\n\ +-p Use pages of order \n\ +-k Use key size \n\ +-k- Use key size uniformly distributed between and \n\ +-kU Use keys with URL distribution\n\ +-n Number of distinct keys\n\ +-d[-] Use specified value size (see -k-)\n\ +-t Perform the tests on an existing database file\n\ +-v Be verbose\n\ +-s Turn on synchronous mode\n\ +-S Turn on supersynchronous mode\n\ +-F Turn on fast mode\n\ +\n\ +Commands:\n\ +c Fill database\n\ +r Rewrite database\n\ +f[

%%][] Find records with probability of success

%% (default=100)\n\ +F[

%%][] Find, but don't fetch values\n\ +d Delete records\n\ +w Walk database\n\ +W Walk, but don't fetch values\n\ +"); + exit(0); +} + +static uns +krand(uns kn) +{ + return kn * 2000000011; +} + +static uns +gen_url_size(uns rnd) +{ + uns l, m, r; + static uns utable[] = { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 22, 108, 245, 481, 979, 3992, 7648, 13110, 19946, 27256, 34993, 43222, 52859, 64563, +80626, 117521, 147685, 188364, 233174, 290177, 347132, 407231, 465787, 540931, 628601, 710246, 808671, 922737, 1025691, 1138303, +1238802, 1344390, 1443843, 1533207, 1636494, 1739082, 1826911, 1910725, 1993940, 2094365, 2188987, 2267827, 2350190, 2441980, +2520713, 2593654, 2668632, 2736009, 2808356, 2889682, 2959300, 3017945, 3086488, 3146032, 3204818, 3251897, 3307001, 3349388, +3392798, 3433429, 3476765, 3529107, 3556884, 3585120, 3633005, 3677697, 3699561, 3716660, 3739823, 3765154, 3795096, 3821184, +3858117, 3908757, 3929095, 3943264, 3957033, 3969588, 3983441, 3994630, 4005413, 4028890, 4039678, 4058007, 4071906, 4087029, +4094233, 4105259, 4111603, 4120338, 4127364, 4133983, 4140310, 4144843, 4150565, 4155974, 4165132, 4170648, 4176811, 4187118, +4190866, 4199051, 4206686, 4216122, 4226109, 4233721, 4254123, 4261792, 4270396, 4276650, 4282932, 4291738, 4295932, 4299370, +4304011, 4307098, 4311866, 4318168, 4325730, 4329774, 4332946, 4336305, 4339770, 4345237, 4349038, 4356129, 4362872, 4366542, +4371077, 4374524, 4376733, 4378794, 4380652, 4382340, 4383552, 4385952, 4386914, 4393123, 4394106, 4395142, 4396593, 4399112, +4399909, 4401015, 4401780, 4402616, 4403454, 4404481, 4405231, 4405947, 4406886, 4408364, 4409159, 4409982, 4410872, 4412010, +4413341, 4414161, 4415673, 4417135, 4418032, 4419117, 4419952, 4420677, 4421387, 4421940, 4422469, 4423210, 4423696, 4424274, +4424982, 4425665, 4426363, 4427018, 4427969, 4428992, 4429791, 4430804, 4432601, 4433440, 4434157, 4434967, 4436280, 4439784, +4444255, 4445544, 4446416, 4447620, 4449638, 4453004, 4455470, 4456982, 4457956, 4458617, 4459538, 4460007, 4460377, 4460768, +4461291, 4461520, 4461678, 4461911, 4462063, 4462239, 4462405, 4462607, 4462666, 4462801, 4462919, 4463108, 4463230, 4463438, +4463530, 4463698, 4463779, 4463908, 4463991, 4464138, 4464188, 4464391, 4464580, 4464868, 4464980, 4465174, 4465255, 4465473, +4465529, 4465681, 4465746, 4465916, 4465983, 4466171, 4466248, 4466430, 4466560, 4466751, 4466930, 4467807, 4468847, 4469940, +4470344, 4470662, 4470716, 4471120, 4471389, 4471814, 4472141, 4472545, 4472687, 4473051, 4473253, 4473603, 4473757, 4474065, +4474125, 4474354, 4474428, 4474655, 4474705, 4474841, 4474858, 4475133, 4475201, 4475327, 4475367, 4475482, 4475533, 4475576, +4475586, 4475616, 4475637, 4475659, 4475696, 4475736, 4475775, 4475794, 4476156, 4476711, 4477004, 4477133, 4477189, 4477676, +4477831, 4477900, 4477973, 4477994, 4478011, 4478040, 4478063, 4478085, 4478468, 4478715, 4479515, 4480034, 4481804, 4483259, +4483866, 4484202, 4484932, 4485693, 4486184, 4486549, 4486869, 4487405, 4487639, 4487845, 4488086, 4488256, 4488505, 4488714, +4492669, 4496233, 4497738, 4498122, 4498653, 4499862, 4501169, 4501627, 4501673, 4501811, 4502182, 4502475, 4502533, 4502542, +4502548, 4502733, 4503389, 4504381, 4505070, 4505378, 4505814, 4506031, 4506336, 4506642, 4506845, 4506971, 4506986, 4507016, +4507051, 4507098, 4507107, 4507114, 4507139, 4507478, 4507643, 4507674, 4507694, 4507814, 4507894, 4507904, 4507929, 4507989, +4508023, 4508047, 4508053, 4508063, 4508075, 4508092, 4508104, 4508113, 4508239, 4508285, 4508324, 4508335, 4508340, 4508378, +4508405, 4508419, 4508436, 4508449, 4508470, 4508488, 4508515, 4508541, 4508564, 4508570, 4508584, 4508594, 4508607, 4508634, +4508652, 4508665, 4508673, 4508692, 4508704, 4508742, 4508755, 4508773, 4508788, 4508798, 4508832, 4508869, 4508885, 4508905, +4508915, 4508947, 4508956, 4509061, 4509070, 4509357, 4509368, 4509380, 4509393, 4509401, 4509412, 4509426, 4509438, 4509451, +4509461, 4509473, 4509489, 4509498, 4509512, 4509537, 4509568, 4509582, 4509621, 4509629, 4509747, 4509766, 4509776, 4509795, +4509802, 4509813, 4509822, 4509829, 4509834, 4509844, 4509854, 4509863, 4509868, 4509875, 4509886, 4509898, 4509908, 4509920, +4509932, 4509941, 4509949, 4509955, 4509967, 4509972, 4509979, 4509987, 4509999, 4510002, 4510010, 4510014, 4510018, 4510025, +4510028, 4510049, 4510055, 4510061, 4510068, 4510079, 4510085, 4510091, 4510098, 4510102, 4510104, 4510110, 4510121, 4510128, +4510132, 4510138, 4510144, 4510145, 4510153, 4510161, 4510174, 4510196, 4510199, 4510208, 4510209, 4510212, 4510216, 4510217, +4510219, 4510222, 4510228, 4510231, 4510236, 4510241, 4510245, 4510248, 4510250, 4510254, 4510255, 4510261, 4510262, 4510266, +4510266, 4510271, 4510285, 4510287, 4510291, 4510295, 4510303, 4510306, 4510308, 4510310, 4510314, 4510319, 4510320, 4510324, +4510328, 4510333, 4510333, 4510336, 4510340, 4510342, 4510348, 4510353, 4510359, 4510362, 4510365, 4510371, 4510373, 4510375, +4510378, 4510380, 4510385, 4510389, 4510391, 4510391, 4510394, 4510396, 4510397, 4510398, 4510400, 4510403, 4510406, 4510407, +4510408, 4510409, 4510411, 4510413, 4510417, 4510417, 4510419, 4510422, 4510426, 4510427, 4510430, 4510435, 4510437, 4510439, +4510440, 4510442, 4510442, 4510446, 4510447, 4510448, 4510450, 4510451, 4510451, 4510453, 4510454, 4510455, 4510457, 4510460, +4510460, 4510460, 4510462, 4510463, 4510466, 4510468, 4510472, 4510475, 4510480, 4510482, 4510483, 4510486, 4510488, 4510492, +4510494, 4510497, 4510497, 4510499, 4510503, 4510505, 4510506, 4510507, 4510509, 4510512, 4510514, 4510527, 4510551, 4510553, +4510554, 4510555, 4510556, 4510558, 4510561, 4510562, 4510566, 4510567, 4510568, 4510570, 4510573, 4510574, 4510586, 4510603, +4510605, 4510607, 4510610, 4510610, 4510613, 4510613, 4510614, 4510614, 4510615, 4510616, 4510616, 4510620, 4510622, 4510623, +4510624, 4510627, 4510628, 4510630, 4510631, 4510632, 4510634, 4510634, 4510634, 4510636, 4510636, 4510639, 4510639, 4510640, +4510643, 4510647, 4510649, 4510650, 4510653, 4510653, 4510653, 4510653, 4510656, 4510659, 4510661, 4510664, 4510665, 4510669, +4510672, 4510673, 4510674, 4510675, 4510680, 4510683, 4510684, 4510686, 4510687, 4510690, 4510691, 4510693, 4510693, 4510697, +4510699, 4510700, 4510703, 4510704, 4510709, 4510711, 4510713, 4510713, 4510720, 4510720, 4510722, 4510724, 4510727, 4510729, +4510735, 4510735, 4510738, 4510740, 4510744, 4510745, 4510746, 4510748, 4510754, 4510756, 4510758, 4510761, 4510764, 4510766, +4510768, 4510768, 4510770, 4510770, 4510772, 4510774, 4510775, 4510775, 4510775, 4510776, 4510777, 4510780, 4510782, 4510783, +4510785, 4510786, 4510788, 4510789, 4510791, 4510793, 4510793, 4510793, 4510795, 4510795, 4510799, 4510803, 4510804, 4510804, +4510804, 4510805, 4510807, 4510809, 4510811, 4510811, 4510813, 4510815, 4510815, 4510816, 4510819, 4510820, 4510824, 4510827, +4510829, 4510829, 4510830, 4510833, 4510835, 4510837, 4510838, 4510838, 4510839, 4510840, 4510840, 4510842, 4510842, 4510843, +4510845, 4510845, 4510845, 4510847, 4510848, 4510848, 4510848, 4510850, 4510853, 4510855, 4510857, 4510859, 4510861, 4510862, +4510864, 4510865, 4510865, 4510865, 4510869, 4510869, 4510869, 4510869, 4510869, 4510870, 4510870, 4510872, 4510872, 4510873, +4510874, 4510875, 4510875, 4510877, 4510879, 4510879, 4510879, 4510879, 4510880, 4510881, 4510882, 4510883, 4510884, 4510885, +4510886, 4510887, 4510890, 4510890, 4510891, 4510892, 4510892, 4510893, 4510893, 4510895, 4510895, 4510896, 4510897, 4510899, +4510901, 4510901, 4510901, 4510902, 4510903, 4510903, 4510903, 4510905, 4510905, 4510906, 4510906, 4510907, 4510907, 4510909, +4510910, 4510911, 4510911, 4510911, 4510913, 4510913, 4510914, 4510914, 4510914, 4510915, 4510916, 4510918, 4510918, 4510919, +4510919, 4510919, 4510920, 4510921, 4510922, 4510923, 4510924, 4510924, 4510924, 4510924, 4510926, 4510927, 4510928, 4510928, +4510928, 4510928, 4510928, 4510930, 4510933, 4510935, 4510935, 4510935, 4510935, 4510935, 4510936, 4510938, 4510947, 4510966, +4510967, 4510969, 4510973, 4510973, 4510974, 4510974, 4510974, 4510974, 4510974, 4510974, 4510975, 4510976, 4510976, 4510976, +4510976, 4510976, 4510976, 4510976, 4510977, 4510979, 4510979, 4510979, 4510979, 4510979, 4510979, 4510980, 4510980, 4510980, +4510980, 4510981, 4510981, 4510981, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510983, 4510983, 4510984, +4510984, 4510984, 4510984, 4510984, 4510985, 4510985, 4510985, 4510985, 4510987, 4510987, 4510987, 4510988, 4510988, 4510989, +4510989, 4510989, 4510989, 4510989, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510991, 4510991, 4510991, +4510991, 4510991, 4510991, 4510991, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510993, 4510993, 4510993, +4510994, 4510994, 4510994, 4510994, 4510995, 4510995, 4510996, 4510997, 4510998, 4510999, 4510999, 4511000, 4511000, 4511001, +4511001, 4511002, 4511002, 4511002, 4511003, 4511004, 4511004, 4511004, 4511004, 4511005, 4511006, 4511008, 4511008, 4511008, +4511009, 4511009, 4511009, 4511009, 4511010, 4511011, 4511011, 4511012, 4511012, 4511012, 4511012, 4511013, 4511013, 4511014, +4511014, 4511014, 4511014, 4511015, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511020, 4511020, 4511020, +4511020, 4511020, 4511020, 4511020, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, +4511021 + }; + + rnd %= utable[1024]; + l = 0; r = 1023; + while (l < r) + { + m = (l+r)/2; + if (utable[m] == rnd) + return m; + if (utable[m] >= rnd) + r = m - 1; + else + l = m + 1; + } + return l; +} + +static uns +gen_size(uns min, uns max, uns rnd) +{ + if (min == max) + return min; + else + return min + rnd % (max - min + 1); +} + +static void +gen_random(byte *buf, uns size, uns kn) +{ + kn = (kn + 0x36221057) ^ (kn << 24) ^ (kn << 15); + while (size--) + { + *buf++ = kn >> 24; + kn = kn*257 + 17; + } +} + +static int +keygen(byte *buf, uns kn) +{ + uns size, rnd; + + rnd = krand(kn); + if (key_min < 0) + size = gen_url_size(rnd); + else + size = gen_size(key_min, key_max, rnd); + *buf++ = kn >> 24; + *buf++ = kn >> 16; + *buf++ = kn >> 8; + *buf++ = kn; + if (size < 4) + return 4; + gen_random(buf, size-4, kn); + return size; +} + +static int +valgen(byte *buf, uns kn) +{ + uns size = gen_size(val_min, val_max, krand(kn)); + gen_random(buf, size, kn); + return size; +} + +static uns +keydec(byte *buf) +{ + return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; +} + +static void +verb(char *msg, ...) +{ + int cat = 1; + va_list args; + + va_start(args, msg); + if (msg[0] == '^' && msg[1]) + { + cat = msg[1] - '0'; + msg += 2; + } + if (verbose >= cat) + vfprintf(stderr, msg, args); + va_end(args); +} + +static void +parse_size(int *min, int *max, char *c) +{ + char *d; + + if ((d = strchr(c, '-'))) + { + *d++ = 0; + *min = atol(c); + *max = atol(d); + } + else + *min = *max = atol(c); +} + +#define PROGRESS(i) if ((verbose > 2) || (verbose > 1 && !(i & 1023))) fprintf(stderr, "%d\r", i) + +int main(int argc, char **argv) +{ + int c, i, j, k, l, m; + byte kb[2048], vb[2048], vb2[2048]; + uns ks, vs, vs2, perc, cnt; + char *ch; + int dont_delete = 0; + timestamp_t timer; + + log_init("dbtest"); + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + while ((c = getopt(argc, argv, "c:p:k:n:d:vsStF")) >= 0) + switch (c) + { + case 'c': + opts.cache_size = atol(optarg); + break; + case 'p': + opts.page_order = atol(optarg); + break; + case 'k': + if (!strcmp(optarg, "U")) + key_min = key_max = -1; + else + parse_size(&key_min, &key_max, optarg); + break; + case 'n': + num_keys = atol(optarg); + break; + case 'd': + parse_size(&val_min, &val_max, optarg); + break; + case 'v': + verbose++; + break; + case 's': + opts.flags |= SDBM_SYNC; + break; + case 'S': + opts.flags |= SDBM_SYNC | SDBM_FSYNC; + break; + case 'F': + opts.flags |= SDBM_FAST; + break; + case 't': + dont_delete = 1; + break; + default: + help(); + } + + if (key_min >= 0 && key_min < 4) + key_min = key_max = 4; + if (key_min == key_max && key_min >= 0) + opts.key_size = key_min; + if (val_min == val_max) + opts.val_size = val_min; + if (!num_keys) + die("Number of keys not given"); + + printf(NAME " benchmark: %d records, keys ", num_keys); + if (key_min < 0) + printf(""); + else + printf("%d-%d", key_min, key_max); + printf(", values %d-%d, page size %d, cache %d pages\n", val_min, val_max, 1 << opts.page_order, opts.cache_size); + + verb("OPEN(%s, key=%d, val=%d, cache=%d, pgorder=%d)\n", opts.name, opts.key_size, opts.val_size, + opts.cache_size, opts.page_order); + if (!dont_delete) + unlink(opts.name); + d = sdbm_open(&opts); + if (!d) + die("open failed: %m"); + + while (optind < argc) + { + char *o = argv[optind++]; + init_timer(&timer); + switch (*o) + { + case 'c': + printf("create %d: ", num_keys); + for(i=0; i= num_keys) + die("get_next: %d out of range", i); + PROGRESS(i); + vs2 = keygen(vb2, i); + if (ks != vs2 || memcmp(kb, vb2, ks)) + die("get_next: key mismatch at %d", i); + if (c) + { + vs2 = valgen(vb2, i); + if (vs != vs2 || memcmp(vb, vb2, vs)) + die("get_next: data mismatch at %d", i); + } + l += k; + m += i; + k++; + } + if (k != num_keys) + die("fetch: wrong # of keys: %d != %d", k, num_keys); + if (l != m) + die("fetch: wrong checksum: %d != %d", l, m); + break; + default: + help(); + } + sdbm_sync(d); + printf("%d ms\n", get_timer(&timer)); + } + + verb("CLOSE\n"); + sdbm_close(d); + + { + struct stat st; + if (stat(opts.name, &st)) die("stat: %m"); + printf("file size: %d bytes\n", (int) st.st_size); + } + return 0; +} diff --git a/lib/db-tool.c b/lib/db-tool.c new file mode 100644 index 0000000..bbb419a --- /dev/null +++ b/lib/db-tool.c @@ -0,0 +1,264 @@ +/* + * SDBM Database Utility + * + * (c) 2000--2001 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/db.h" +#include "lib/db_internal.h" +#include "lib/fastbuf.h" +#include "lib/ff-binary.h" + +#include +#include +#include +#include +#include +#include + +static int verbose=0; +static int cache=1024; +static int force_key=-2; +static int force_val=-2; +static int force_page=-1; + +#define SDBM_DUMP_MAGIC 0x321f120e +#define SDBM_DUMP_VERSION 1 + +static void +dump(char *db, char *dmp) +{ + struct sdbm *src; + struct fastbuf *dest; + struct sdbm_options op; + int e, c=0; + + bzero(&op, sizeof(op)); + op.name = db; + op.cache_size = 16; + op.flags = 0; + src = sdbm_open(&op); + if (!src) + die("Source open failed: %m"); + + dest = bopen(dmp, O_WRONLY | O_CREAT | O_TRUNC, 65536); + bputl(dest, SDBM_DUMP_MAGIC); + bputl(dest, SDBM_DUMP_VERSION); + bputl(dest, src->page_order); + bputl(dest, src->key_size); + bputl(dest, src->val_size); + + fprintf(stderr, "Dumping database...\n"); + sdbm_rewind(src); + for(;;) + { + byte key[65536], val[65536]; + int klen = sizeof(key); + int vlen = sizeof(val); + e = sdbm_get_next(src, key, &klen, val, &vlen); + if (!e) + break; + if (e < 0) + fprintf(stderr, "sdbm_get_next: error %d\n", e); + if (!(c++ % 1024)) + { + fprintf(stderr, "%d\r", c); + fflush(stderr); + } + bputw(dest, klen); + bwrite(dest, key, klen); + bputw(dest, vlen); + bwrite(dest, val, vlen); + } + + sdbm_close(src); + bclose(dest); + fprintf(stderr, "Dumped %d records\n", c); +} + +static void +restore(char *dmp, char *db) +{ + struct sdbm *dest; + struct fastbuf *src; + struct sdbm_options op; + int e, c=0; + + src = bopen(dmp, O_RDONLY, 65536); + if (bgetl(src) != SDBM_DUMP_MAGIC || + bgetl(src) != SDBM_DUMP_VERSION) + die("%s: not a sdbm dump", dmp); + + bzero(&op, sizeof(op)); + op.name = db; + e = unlink(op.name); + if (e < 0 && errno != ENOENT) + die("unlink: %m"); + op.cache_size = cache; + op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST; + op.page_order = bgetl(src); + if (force_page >= 0) + op.page_order = force_page; + op.key_size = bgetl(src); + if (force_key >= 0) + op.key_size = force_key; + op.val_size = bgetl(src); + if (force_val >= 0) + op.val_size = force_val; + dest = sdbm_open(&op); + if (!dest) + die("Destination open failed"); + + fprintf(stderr, "Restoring database...\n"); + for(;;) + { + byte key[65536], val[65536]; + int klen, vlen; + klen = bgetw(src); + if (klen < 0) + break; + breadb(src, key, klen); + vlen = bgetw(src); + if (vlen < 0) + die("Corrupted dump file: value missing"); + breadb(src, val, vlen); + if (!(c++ % 1024)) + { + fprintf(stderr, "%d\r", c); + fflush(stderr); + } + if (sdbm_store(dest, key, klen, val, vlen) == 0) + fprintf(stderr, "sdbm_store: duplicate key\n"); + } + + bclose(src); + sdbm_close(dest); + fprintf(stderr, "Restored %d records\n", c); +} + +static void +rebuild(char *sdb, char *ddb) +{ + struct sdbm *src, *dest; + struct sdbm_options op; + int e, c=0; + + bzero(&op, sizeof(op)); + op.name = sdb; + op.cache_size = 16; + op.flags = 0; + src = sdbm_open(&op); + if (!src) + die("Source open failed: %m"); + + op.name = ddb; + e = unlink(op.name); + if (e < 0 && errno != ENOENT) + die("unlink: %m"); + op.cache_size = cache; + op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST; + op.page_order = (force_page >= 0) ? (u32) force_page : src->root->page_order; + op.key_size = (force_key >= -1) ? force_key : src->root->key_size; + op.val_size = (force_val >= -1) ? force_val : src->root->val_size; + dest = sdbm_open(&op); + if (!dest) + die("Destination open failed"); + + fprintf(stderr, "Rebuilding database...\n"); + sdbm_rewind(src); + for(;;) + { + byte key[65536], val[65536]; + int klen = sizeof(key); + int vlen = sizeof(val); + e = sdbm_get_next(src, key, &klen, val, &vlen); + if (!e) + break; + if (e < 0) + fprintf(stderr, "sdbm_get_next: error %d\n", e); + if (!(c++ % 1024)) + { + fprintf(stderr, "%d\r", c); + fflush(stderr); + } + if (sdbm_store(dest, key, klen, val, vlen) == 0) + fprintf(stderr, "sdbm_store: duplicate key\n"); + } + + sdbm_close(src); + sdbm_close(dest); + fprintf(stderr, "Copied %d records\n", c); +} + +int +main(int argc, char **argv) +{ + int o; + + while ((o = getopt(argc, argv, "vc:k:d:p:")) >= 0) + switch (o) + { + case 'v': + verbose++; + break; + case 'c': + cache=atol(optarg); + break; + case 'k': + force_key=atol(optarg); + break; + case 'd': + force_val=atol(optarg); + break; + case 'p': + force_page=atol(optarg); + break; + default: + bad: + fprintf(stderr, "Usage: db-tool [] \n\ +\n\ +Options:\n\ +-v\t\tBe verbose\n\ +-c\t\tUse cache of pages\n\ +-d\t\tSet data size to (-1=variable) [restore,rebuild]\n\ +-k\t\tSet key size to (-1=variable) [restore,rebuild]\n\ +-p\t\tSet page order to [restore,rebuild]\n\ +\n\ +Commands:\n\ +b \tRebuild database\n\ +d \tDump database\n\ +r \tRestore database from dump\n\ +"); + return 1; + } + argc -= optind; + argv += optind; + if (argc < 1 || strlen(argv[0]) != 1) + goto bad; + + switch (argv[0][0]) + { + case 'b': + if (argc != 3) + goto bad; + rebuild(argv[1], argv[2]); + break; + case 'd': + if (argc != 3) + goto bad; + dump(argv[1], argv[2]); + break; + case 'r': + if (argc != 3) + goto bad; + restore(argv[1], argv[2]); + break; + default: + goto bad; + } + return 0; +} diff --git a/lib/db.c b/lib/db.c new file mode 100644 index 0000000..d9c984b --- /dev/null +++ b/lib/db.c @@ -0,0 +1,598 @@ +/* + * UCW Library -- Fast Database Management Routines + * + * (c) 1999--2001 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This library uses the standard algorithm for external hashing (page directory + * mapping topmost K bits of hash value to page address, directory splits and + * so on). Peculiarities of this implementation (aka design decisions): + * + * o We allow both fixed and variable length keys and values (this includes + * zero size values for cases you want to represent only a set of keys). + * o We assume that key_size + val_size < page_size. + * o We never shrink the directory nor free empty pages. (The reason is that + * if the database was once large, it's likely it will again become large soon.) + * o The only pages which can be freed are those of the directory (during + * directory split), so we keep only a simple 32-entry free block list + * and we assume it's sorted. + * o All pointers are always given in pages from start of the file. + * This gives us page_size*2^32 limit for file size which should be enough. + */ + +#include "lib/lib.h" +#include "lib/lfs.h" +#include "lib/pagecache.h" +#include "lib/db.h" +#include "lib/db_internal.h" + +#include +#include +#include +#include + +#define GET_PAGE(d,x) pgc_get((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) +#define GET_ZERO_PAGE(d,x) pgc_get_zero((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) +#define READ_PAGE(d,x) pgc_read((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) +#define READ_DIR(d,off) pgc_read((d)->cache, (d)->fd, (((sh_off_t)(d)->root->dir_start) << (d)->page_order) + (off)) + +struct sdbm * +sdbm_open(struct sdbm_options *o) +{ + struct sdbm *d; + struct sdbm_root root, *r; + uns cache_size = o->cache_size ? o->cache_size : 16; + + d = xmalloc_zero(sizeof(struct sdbm)); + d->flags = o->flags; + d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666); + if (d->fd >= 0) /* Already exists, let's check it */ + { + if (read(d->fd, &root, sizeof(root)) != sizeof(root)) + goto bad; + if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION) + goto bad; + d->file_size = sh_seek(d->fd, 0, SEEK_END) >> root.page_order; + d->page_order = root.page_order; + d->page_size = 1 << root.page_order; + d->cache = pgc_open(d->page_size, cache_size); + d->root_page = pgc_read(d->cache, d->fd, 0); + d->root = (void *) d->root_page->data; + } + else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0) + { + struct page *q; + uns page_order = o->page_order; + if (page_order < 10) + page_order = 10; + d->page_size = 1 << page_order; + d->cache = pgc_open(d->page_size, cache_size); + d->root_page = GET_ZERO_PAGE(d, 0); + r = d->root = (void *) d->root_page->data; /* Build root page */ + r->magic = SDBM_MAGIC; + r->version = SDBM_VERSION; + r->page_order = d->page_order = page_order; + r->key_size = o->key_size; + r->val_size = o->val_size; + r->dir_start = 1; + r->dir_order = 0; + d->file_size = 3; + q = GET_ZERO_PAGE(d, 1); /* Build page directory */ + GET32(q->data, 0) = 2; + pgc_put(d->cache, q); + q = GET_ZERO_PAGE(d, 2); /* Build single data page */ + pgc_put(d->cache, q); + } + else + goto bad; + d->dir_size = 1 << d->root->dir_order; + d->dir_shift = 32 - d->root->dir_order; + d->page_mask = d->page_size - 1; + d->key_size = d->root->key_size; + d->val_size = d->root->val_size; + return d; + +bad: + sdbm_close(d); + return NULL; +} + +void +sdbm_close(struct sdbm *d) +{ + if (d->root_page) + pgc_put(d->cache, d->root_page); + if (d->cache) + pgc_close(d->cache); + if (d->fd >= 0) + close(d->fd); + xfree(d); +} + +static uns +sdbm_alloc_pages(struct sdbm *d, uns number) +{ + uns where = d->file_size; + if (where + number < where) /* Wrap around? */ + die("SDB: Database file too large, giving up"); + d->file_size += number; + return where; +} + +static uns +sdbm_alloc_page(struct sdbm *d) +{ + uns pos; + + if (!d->root->free_pool[0].count) + return sdbm_alloc_pages(d, 1); + pos = d->root->free_pool[0].first; + d->root->free_pool[0].first++; + if (!--d->root->free_pool[0].count) + { + memmove(d->root->free_pool, d->root->free_pool+1, (SDBM_NUM_FREE_PAGE_POOLS-1) * sizeof(d->root->free_pool[0])); + d->root->free_pool[SDBM_NUM_FREE_PAGE_POOLS-1].count = 0; + } + pgc_mark_dirty(d->cache, d->root_page); + return pos; +} + +static void +sdbm_free_pages(struct sdbm *d, uns start, uns number) +{ + uns i = 0; + + while (d->root->free_pool[i].count) + i++; + ASSERT(i < SDBM_NUM_FREE_PAGE_POOLS); + d->root->free_pool[i].first = start; + d->root->free_pool[i].count = number; + pgc_mark_dirty(d->cache, d->root_page); +} + +u32 +sdbm_hash(byte *key, uns keylen) +{ + /* + * This used to be the same hash function as GDBM uses, + * but it turned out that it tends to give the same results + * on similar keys. Damn it. + */ + u32 value = 0x238F13AF * keylen; + while (keylen--) + value = 37*value + *key++; + return (1103515243 * value + 12345); +} + +static int +sdbm_get_entry(struct sdbm *d, byte *pos, byte **key, uns *keylen, byte **val, uns *vallen) +{ + byte *p = pos; + + if (d->key_size >= 0) + *keylen = d->key_size; + else + { + *keylen = (p[0] << 8) | p[1]; + p += 2; + } + *key = p; + p += *keylen; + if (d->val_size >= 0) + *vallen = d->val_size; + else + { + *vallen = (p[0] << 8) | p[1]; + p += 2; + } + *val = p; + p += *vallen; + return p - pos; +} + +static int +sdbm_entry_len(struct sdbm *d, uns keylen, uns vallen) +{ + uns len = keylen + vallen; + if (d->key_size < 0) + len += 2; + if (d->val_size < 0) + len += 2; + return len; +} + +static void +sdbm_store_entry(struct sdbm *d, byte *pos, byte *key, uns keylen, byte *val, uns vallen) +{ + if (d->key_size < 0) + { + *pos++ = keylen >> 8; + *pos++ = keylen; + } + memmove(pos, key, keylen); + pos += keylen; + if (d->val_size < 0) + { + *pos++ = vallen >> 8; + *pos++ = vallen; + } + memmove(pos, val, vallen); +} + +static uns +sdbm_page_rank(struct sdbm *d, uns dirpos) +{ + struct page *b; + u32 pg, x; + uns l, r; + uns pm = d->page_mask; + + b = READ_DIR(d, dirpos & ~pm); + pg = GET32(b->data, dirpos & pm); + l = dirpos; + while ((l & pm) && GET32(b->data, (l - 4) & pm) == pg) + l -= 4; + r = dirpos + 4; + /* We heavily depend on unused directory entries being zero */ + while ((r & pm) && GET32(b->data, r & pm) == pg) + r += 4; + pgc_put(d->cache, b); + + if (!(l & pm) && !(r & pm)) + { + /* Note that if it spans page boundary, it must contain an integer number of pages */ + while (l) + { + b = READ_DIR(d, (l - 4) & ~pm); + x = GET32(b->data, 0); + pgc_put(d->cache, b); + if (x != pg) + break; + l -= d->page_size; + } + while (r < 4*d->dir_size) + { + b = READ_DIR(d, r & ~pm); + x = GET32(b->data, 0); + pgc_put(d->cache, b); + if (x != pg) + break; + r += d->page_size; + } + } + return (r - l) >> 2; +} + +static void +sdbm_expand_directory(struct sdbm *d) +{ + struct page *b, *c; + int i, ent; + u32 *dir, *t; + + if (d->root->dir_order >= 31) + die("SDB: Database directory too large, giving up"); + + if (4*d->dir_size < d->page_size) + { + /* It still fits within single page */ + b = READ_DIR(d, 0); + dir = (u32 *) b->data; + for(i=d->dir_size-1; i>=0; i--) + dir[2*i] = dir[2*i+1] = dir[i]; + pgc_mark_dirty(d->cache, b); + pgc_put(d->cache, b); + } + else + { + uns old_dir = d->root->dir_start; + uns old_dir_pages = 1 << (d->root->dir_order + 2 - d->page_order); + uns page, new_dir; + new_dir = d->root->dir_start = sdbm_alloc_pages(d, 2*old_dir_pages); + ent = 1 << (d->page_order - 3); + for(page=0; page < old_dir_pages; page++) + { + b = READ_PAGE(d, old_dir + page); + dir = (u32 *) b->data; + c = GET_PAGE(d, new_dir + 2*page); + t = (u32 *) c->data; + for(i=0; icache, c); + c = GET_PAGE(d, new_dir + 2*page + 1); + t = (u32 *) c->data; + for(i=0; icache, c); + pgc_put(d->cache, b); + } + if (!(d->flags & SDBM_FAST)) + { + /* + * Unless in super-fast mode, fill old directory pages with zeroes. + * This slows us down a bit, but allows database reconstruction after + * the free list is lost. + */ + for(page=0; page < old_dir_pages; page++) + { + b = GET_ZERO_PAGE(d, old_dir + page); + pgc_put(d->cache, b); + } + } + sdbm_free_pages(d, old_dir, old_dir_pages); + } + + d->root->dir_order++; + d->dir_size = 1 << d->root->dir_order; + d->dir_shift = 32 - d->root->dir_order; + pgc_mark_dirty(d->cache, d->root_page); + if (!(d->flags & SDBM_FAST)) + sdbm_sync(d); +} + +static void +sdbm_split_data(struct sdbm *d, struct sdbm_bucket *s, struct sdbm_bucket *d0, struct sdbm_bucket *d1, uns sigbit) +{ + byte *sp = s->data; + byte *dp[2] = { d0->data, d1->data }; + byte *K, *D; + uns Kl, Dl, sz, i; + + while (sp < s->data + s->used) + { + sz = sdbm_get_entry(d, sp, &K, &Kl, &D, &Dl); + sp += sz; + i = (sdbm_hash(K, Kl) & (1 << sigbit)) ? 1 : 0; + sdbm_store_entry(d, dp[i], K, Kl, D, Dl); + dp[i] += sz; + } + d0->used = dp[0] - d0->data; + d1->used = dp[1] - d1->data; +} + +static void +sdbm_split_dir(struct sdbm *d, uns dirpos, uns count, uns pos) +{ + struct page *b; + uns i; + + count *= 4; + while (count) + { + b = READ_DIR(d, dirpos & ~d->page_mask); + i = d->page_size - (dirpos & d->page_mask); + if (i > count) + i = count; + count -= i; + while (i) + { + GET32(b->data, dirpos & d->page_mask) = pos; + dirpos += 4; + i -= 4; + } + pgc_mark_dirty(d->cache, b); + pgc_put(d->cache, b); + } +} + +static inline uns +sdbm_dirpos(struct sdbm *d, uns hash) +{ + if (d->dir_shift != 32) /* avoid shifting by 32 bits */ + return (hash >> d->dir_shift) << 2; /* offset in the directory */ + else + return 0; +} + +static struct page * +sdbm_split_page(struct sdbm *d, struct page *b, u32 hash) +{ + struct page *p[2]; + uns i, rank, sigbit, rank_log, dirpos, newpg; + + dirpos = sdbm_dirpos(d, hash); + rank = sdbm_page_rank(d, dirpos); /* rank = # of pointers to this page */ + if (rank == 1) + { + sdbm_expand_directory(d); + rank = 2; + dirpos *= 2; + } + rank_log = 1; /* rank_log = log2(rank) */ + while ((1U << rank_log) < rank) + rank_log++; + sigbit = d->dir_shift + rank_log - 1; /* sigbit = bit we split on */ + p[0] = b; + newpg = sdbm_alloc_page(d); + p[1] = GET_PAGE(d, newpg); + sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit); + sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, newpg); + pgc_mark_dirty(d->cache, p[0]); + i = (hash & (1 << sigbit)) ? 1 : 0; + pgc_put(d->cache, p[!i]); + return p[i]; +} + +static int +sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen) +{ + if (vallen) + { + if (*vallen < Dl) + return 1; + *vallen = Dl; + } + if (val) + memcpy(val, D, Dl); + return 0; +} + +static int +sdbm_access(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen, uns mode) /* 0=read, 1=store, 2=replace */ +{ + struct page *p, *q; + u32 hash, h, pos, size; + struct sdbm_bucket *b; + byte *c, *e; + int rc; + + if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535) + return SDBM_ERROR_BAD_KEY_SIZE; + if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode) + return SDBM_ERROR_BAD_VAL_SIZE; + if (!mode && !(d->flags & SDBM_WRITE)) + return SDBM_ERROR_READ_ONLY; + hash = sdbm_hash(key, keylen); + h = sdbm_dirpos(d, hash); + p = READ_DIR(d, h & ~d->page_mask); + pos = GET32(p->data, h & d->page_mask); + pgc_put(d->cache, p); + q = READ_PAGE(d, pos); + b = (void *) q->data; + c = b->data; + e = c + b->used; + while (c < e) + { + byte *K, *D; + uns Kl, Dl, s; + s = sdbm_get_entry(d, c, &K, &Kl, &D, &Dl); + if (Kl == keylen && !memcmp(K, key, Kl)) + { + /* Gotcha! */ + switch (mode) + { + case 0: /* fetch: found */ + rc = sdbm_put_user(D, Dl, val, vallen); + pgc_put(d->cache, q); + return rc ? SDBM_ERROR_TOO_LARGE : 1; + case 1: /* store: already present */ + pgc_put(d->cache, q); + return 0; + default: /* replace: delete the old one */ + memmove(c, c+s, e-(c+s)); + b->used -= s; + goto insert; + } + } + c += s; + } + if (!mode || !val) /* fetch or delete: no success */ + { + pgc_put(d->cache, q); + return 0; + } + +insert: + if (val) + { + size = sdbm_entry_len(d, keylen, *vallen); + while (b->used + size > d->page_size - sizeof(struct sdbm_bucket)) + { + /* Page overflow, need to split */ + if (size >= d->page_size - sizeof(struct sdbm_bucket)) + { + pgc_put(d->cache, q); + return SDBM_ERROR_GIANT; + } + q = sdbm_split_page(d, q, hash); + b = (void *) q->data; + } + sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen); + b->used += size; + } + pgc_mark_dirty(d->cache, q); + pgc_put(d->cache, q); + if (d->flags & SDBM_SYNC) + sdbm_sync(d); + return 1; +} + +int +sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) +{ + return sdbm_access(d, key, keylen, val, &vallen, 1); +} + +int +sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) +{ + return sdbm_access(d, key, keylen, val, &vallen, 2); +} + +int +sdbm_delete(struct sdbm *d, byte *key, uns keylen) +{ + return sdbm_access(d, key, keylen, NULL, NULL, 2); +} + +int +sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen) +{ + return sdbm_access(d, key, keylen, val, vallen, 0); +} + +void +sdbm_rewind(struct sdbm *d) +{ + d->find_page = 1; + d->find_pos = 0; + d->find_free_list = 0; +} + +int +sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen) +{ + uns page = d->find_page; + uns pos = d->find_pos; + byte *K, *V; + uns c, Kl, Vl; + struct page *p; + struct sdbm_bucket *b; + + for(;;) + { + if (!pos) + { + if (page >= d->file_size) + break; + if (page == d->root->dir_start) + page += (4*d->dir_size + d->page_size - 1) >> d->page_order; + else if (page == d->root->free_pool[d->find_free_list].first) + page += d->root->free_pool[d->find_free_list++].count; + else + pos = 4; + continue; + } + p = READ_PAGE(d, page); + b = (void *) p->data; + if (pos - 4 >= b->used) + { + pos = 0; + page++; + pgc_put(d->cache, p); + continue; + } + c = sdbm_get_entry(d, p->data + pos, &K, &Kl, &V, &Vl); + d->find_page = page; + d->find_pos = pos + c; + c = sdbm_put_user(K, Kl, key, keylen) || + sdbm_put_user(V, Vl, val, vallen); + pgc_put(d->cache, p); + return c ? SDBM_ERROR_TOO_LARGE : 1; + } + d->find_page = page; + d->find_pos = pos; + return 0; +} + +void +sdbm_sync(struct sdbm *d) +{ + pgc_flush(d->cache); + if (d->flags & SDBM_FSYNC) + fsync(d->fd); +} diff --git a/lib/db.h b/lib/db.h new file mode 100644 index 0000000..41b81aa --- /dev/null +++ b/lib/db.h @@ -0,0 +1,50 @@ +/* + * UCW Library -- Fast Database Management Routines + * + * (c) 1999--2001 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_DB_H +#define _UCW_DB_H + +struct sdbm; + +struct sdbm_options { /* Set to 0 for default */ + char *name; /* File name */ + uns flags; /* See SDBM_xxx below */ + uns page_order; /* Binary logarithm of file page size */ + uns cache_size; /* Number of cached pages */ + int key_size; /* Key size, -1=variable */ + int val_size; /* Value size, -1=variable */ +}; + +struct sdbm *sdbm_open(struct sdbm_options *); +void sdbm_close(struct sdbm *); +int sdbm_store(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); +int sdbm_replace(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); /* val == NULL -> delete */ +int sdbm_delete(struct sdbm *, byte *key, uns keylen); +int sdbm_fetch(struct sdbm *, byte *key, uns keylen, byte *val, uns *vallen); /* val can be NULL */ +void sdbm_rewind(struct sdbm *); +int sdbm_get_next(struct sdbm *, byte *key, uns *keylen, byte *val, uns *vallen); /* val can be NULL */ +void sdbm_sync(struct sdbm *); +u32 sdbm_hash(byte *key, uns keylen); + +#define SDBM_CREAT 1 /* Create the database if it doesn't exist */ +#define SDBM_WRITE 2 /* Open the database in read/write mode */ +#define SDBM_SYNC 4 /* Sync after each operation */ +#define SDBM_FAST 8 /* Don't sync on directory splits -- results in slightly faster + * operation, but reconstruction of database after program crash + * may be impossible. + */ +#define SDBM_FSYNC 16 /* When syncing, call fsync() */ + +#define SDBM_ERROR_BAD_KEY_SIZE -1 /* Fixed key size doesn't match */ +#define SDBM_ERROR_BAD_VAL_SIZE -2 /* Fixed value size doesn't match */ +#define SDBM_ERROR_TOO_LARGE -3 /* Key/value doesn't fit in buffer supplied */ +#define SDBM_ERROR_READ_ONLY -4 /* Database has been opened read only */ +#define SDBM_ERROR_GIANT -5 /* Key/value too large to fit in a page */ + +#endif diff --git a/lib/db_internal.h b/lib/db_internal.h new file mode 100644 index 0000000..b480a79 --- /dev/null +++ b/lib/db_internal.h @@ -0,0 +1,58 @@ +/* + * UCW Library -- Fast Database Management Routines -- Internal Declarations + * + * (c) 1999--2001 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#define SDBM_NUM_FREE_PAGE_POOLS 32 + +struct sdbm_root { /* Must fit in 1K which is minimum page size */ + u32 magic; + u32 version; + u32 page_order; /* Binary logarithm of page size */ + s32 key_size; /* Key/val size, -1=variable */ + s32 val_size; + u32 dir_start; /* First page of the page directory */ + u32 dir_order; /* Binary logarithm of directory size */ + /* + * As we know the only thing which can be freed is the page directory + * and it can grow only a limited number of times, we can use a very + * simple-minded representation of the free page pool. We also assume + * these entries are sorted by start position. + */ + struct { + u32 first; + u32 count; + } free_pool[SDBM_NUM_FREE_PAGE_POOLS]; +}; + +struct sdbm_bucket { + u32 used; /* Bytes used in this bucket */ + byte data[0]; +}; + +struct sdbm { + struct page_cache *cache; + int fd; + struct sdbm_root *root; + struct page *root_page; + int key_size; /* Cached values from root page */ + int val_size; + uns page_order; + uns page_size; + uns page_mask; /* page_size - 1 */ + uns dir_size; /* Page directory size in entries */ + uns dir_shift; /* Number of significant bits of hash function */ + uns file_size; /* in pages */ + uns flags; + uns find_page, find_pos; /* Current pointer for sdbm_find_next() */ + uns find_free_list; /* First free list entry not skipped by sdbm_find_next() */ +}; + +#define SDBM_MAGIC 0x5344424d +#define SDBM_VERSION 2 + +#define GET32(p,o) *((u32 *)((p)+(o))) diff --git a/lib/default.cfg b/lib/default.cfg new file mode 100644 index 0000000..ceb6d58 --- /dev/null +++ b/lib/default.cfg @@ -0,0 +1,50 @@ +# Configuration variables of the UCW library and their default values +# (c) 2005--2007 Martin Mares + +# Version of the whole package +Set("SHERLOCK_VERSION" => "3.12.3"); + +# Compile everything with debug information and ASSERT's +UnSet("CONFIG_DEBUG"); + +# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages) +UnSet("CONFIG_EXACT_CPU"); + +# Support files >2GB +Set("CONFIG_LARGE_FILES"); + +# Use shared libraries +UnSet("CONFIG_SHARED"); + +# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock +# use its own regex library (a copy of the glibc one), because the default regex library +# is likely to be crappy. +Set("CONFIG_OWN_REGEX"); + +# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy +# of GNU libc's getopt. This should not be necessary on GNU libc. +UnSet("CONFIG_OWN_GETOPT"); + +# Install libraries and their API includes +UnSet("CONFIG_INSTALL_API"); + +# Build with support for multi-threaded programs +Set("CONFIG_UCW_THREADS" => 1); + +# Include Perl modules +Set("CONFIG_UCW_PERL" => 1); + +# Include Perl modules written in C +UnSet("CONFIG_UCW_PERL_MODULES"); + +# Include support utilities for shell scripts +Set("CONFIG_UCW_SHELL_UTILS" => 1); + +# Default configuration file +UnSet("DEFAULT_CONFIG"); + +# Environment variable with configuration file +UnSet("ENV_VAR_CONFIG"); + +# Return success +1; diff --git a/lib/eltpool.c b/lib/eltpool.c new file mode 100644 index 0000000..f82de84 --- /dev/null +++ b/lib/eltpool.c @@ -0,0 +1,100 @@ +/* + * UCW Library -- Fast Allocator for Fixed-Size Elements + * + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This allocator is optimized for intensive allocation and freeing of small + * blocks of identical sizes. System memory is allocated by multiples of the + * page size and it is returned back only when the whole eltpool is deleted. + * + * In the future, we can add returning of memory to the system and also cache + * coloring like in the SLAB allocator used in the Linux kernel. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/eltpool.h" + +struct eltpool * +ep_new(uns elt_size, uns elts_per_chunk) +{ + struct eltpool *pool = xmalloc_zero(sizeof(*pool)); + pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN); + pool->chunk_size = CPU_PAGE_SIZE; + while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size) + pool->chunk_size *= 2; + pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size; + DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk); + return pool; +} + +void +ep_delete(struct eltpool *pool) +{ + struct eltpool_chunk *ch; + while (ch = pool->first_chunk) + { + pool->first_chunk = ch->next; + page_free(ch, pool->chunk_size); + } + xfree(pool); +} + +void * +ep_alloc_slow(struct eltpool *pool) +{ + struct eltpool_chunk *ch = page_alloc(pool->chunk_size); + void *p = (void *)(ch+1); + for (uns i=1; ielts_per_chunk; i++) + { + struct eltpool_free *f = p; + f->next = pool->first_free; + pool->first_free = f; + p += pool->elt_size; + } + ch->next = pool->first_chunk; + pool->first_chunk = ch; + return p; +} + +#ifdef TEST + +#include +#include "lib/clists.h" + +struct argh { + cnode n; + byte x[1]; +} PACKED; + +int main(void) +{ + struct eltpool *ep = ep_new(sizeof(struct argh), 64); + clist l; + clist_init(&l); + for (uns i=0; i<65536; i++) + { + struct argh *a = ep_alloc(ep); + if (i % 3) + clist_add_tail(&l, &a->n); + else + clist_add_head(&l, &a->n); + if (!(i % 5)) + { + a = clist_head(&l); + clist_remove(&a->n); + ep_free(ep, a); + } + } + ep_delete(ep); + puts("OK"); + return 0; +} + +#endif diff --git a/lib/eltpool.h b/lib/eltpool.h new file mode 100644 index 0000000..7e295fb --- /dev/null +++ b/lib/eltpool.h @@ -0,0 +1,65 @@ +/* + * UCW Library -- Fast Allocator for Fixed-Size Elements + * + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_ELTPOOL_H +#define _UCW_ELTPOOL_H + +struct eltpool { + struct eltpool_chunk *first_chunk; + struct eltpool_free *first_free; + uns elt_size; + uns chunk_size; + uns elts_per_chunk; + uns num_allocated; // Just for debugging +}; + +struct eltpool_chunk { + struct eltpool_chunk *next; + /* Chunk data continue here */ +}; + +struct eltpool_free { + struct eltpool_free *next; +}; + +struct eltpool *ep_new(uns elt_size, uns elts_per_chunk); +void ep_delete(struct eltpool *pool); +void *ep_alloc_slow(struct eltpool *pool); + +static inline void * +ep_alloc(struct eltpool *pool) +{ + pool->num_allocated++; +#ifdef CONFIG_FAKE_ELTPOOL + return xmalloc(pool->elt_size); +#else + struct eltpool_free *elt; + if (elt = pool->first_free) + pool->first_free = elt->next; + else + elt = ep_alloc_slow(pool); + return elt; +#endif +} + +static inline void +ep_free(struct eltpool *pool, void *p) +{ + pool->num_allocated--; +#ifdef CONFIG_FAKE_ELTPOOL + (void) pool; + xfree(p); +#else + struct eltpool_free *elt = p; + elt->next = pool->first_free; + pool->first_free = elt; +#endif +} + +#endif diff --git a/lib/eltpool.test b/lib/eltpool.test new file mode 100644 index 0000000..85bed69 --- /dev/null +++ b/lib/eltpool.test @@ -0,0 +1,4 @@ +# Tests for eltpools + +Run: ../obj/lib/eltpool-t +Out: OK diff --git a/lib/exitstatus.c b/lib/exitstatus.c new file mode 100644 index 0000000..1095c7f --- /dev/null +++ b/lib/exitstatus.c @@ -0,0 +1,36 @@ +/* + * UCW Library -- Formatting of Process Exit Status + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include +#include + +int +format_exit_status(char *msg, int stat) +{ + if (stat < 0) + sprintf(msg, "failed to fork (err=%d)", errno); + else if (WIFEXITED(stat) && WEXITSTATUS(stat) < 256) + { + if (WEXITSTATUS(stat)) + sprintf(msg, "died with exit code %d", WEXITSTATUS(stat)); + else + { + msg[0] = 0; + return 0; + } + } + else if (WIFSIGNALED(stat)) + sprintf(msg, "died on signal %d", WTERMSIG(stat)); + else + sprintf(msg, "died with status %x", stat); + return 1; +} diff --git a/lib/fastbuf.c b/lib/fastbuf.c new file mode 100644 index 0000000..be7e979 --- /dev/null +++ b/lib/fastbuf.c @@ -0,0 +1,204 @@ +/* + * UCW Library -- Fast Buffered I/O + * + * (c) 1997--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include +#include + +void bclose(struct fastbuf *f) +{ + if (f) + { + bflush(f); + if (f->close) + f->close(f); + } +} + +void bflush(struct fastbuf *f) +{ + if (f->bptr > f->bstop) + f->spout(f); + else if (f->bstop > f->buffer) + f->bptr = f->bstop = f->buffer; +} + +inline void bsetpos(struct fastbuf *f, sh_off_t pos) +{ + /* We can optimize seeks only when reading */ + if (pos >= f->pos - (f->bstop - f->buffer) && pos <= f->pos) + f->bptr = f->bstop + (pos - f->pos); + else + { + bflush(f); + if (!f->seek || !f->seek(f, pos, SEEK_SET)) + die("bsetpos: stream not seekable"); + } +} + +void bseek(struct fastbuf *f, sh_off_t pos, int whence) +{ + switch (whence) + { + case SEEK_SET: + return bsetpos(f, pos); + case SEEK_CUR: + return bsetpos(f, btell(f) + pos); + case SEEK_END: + bflush(f); + if (!f->seek || !f->seek(f, pos, SEEK_END)) + die("bseek: stream not seekable"); + break; + default: + die("bseek: invalid whence=%d", whence); + } +} + +int bgetc_slow(struct fastbuf *f) +{ + if (f->bptr < f->bstop) + return *f->bptr++; + if (!f->refill(f)) + return -1; + return *f->bptr++; +} + +int bpeekc_slow(struct fastbuf *f) +{ + if (f->bptr < f->bstop) + return *f->bptr; + if (!f->refill(f)) + return -1; + return *f->bptr; +} + +void bputc_slow(struct fastbuf *f, uns c) +{ + if (f->bptr >= f->bufend) + f->spout(f); + *f->bptr++ = c; +} + +uns bread_slow(struct fastbuf *f, void *b, uns l, uns check) +{ + uns total = 0; + while (l) + { + uns k = f->bstop - f->bptr; + + if (!k) + { + f->refill(f); + k = f->bstop - f->bptr; + if (!k) + break; + } + if (k > l) + k = l; + memcpy(b, f->bptr, k); + f->bptr += k; + b = (byte *)b + k; + l -= k; + total += k; + } + if (check && total && l) + die("breadb: short read"); + return total; +} + +void bwrite_slow(struct fastbuf *f, const void *b, uns l) +{ + while (l) + { + uns k = f->bufend - f->bptr; + + if (!k) + { + f->spout(f); + k = f->bufend - f->bptr; + } + if (k > l) + k = l; + memcpy(f->bptr, b, k); + f->bptr += k; + b = (byte *)b + k; + l -= k; + } +} + +void +bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l) +{ + while (l) + { + byte *fptr, *tptr; + uns favail, tavail, n; + + favail = bdirect_read_prepare(f, &fptr); + if (!favail) + { + if (l == ~0U) + return; + die("bbcopy: source exhausted"); + } + tavail = bdirect_write_prepare(t, &tptr); + n = MIN(l, favail); + n = MIN(n, tavail); + memcpy(tptr, fptr, n); + bdirect_read_commit(f, fptr + n); + bdirect_write_commit(t, tptr + n); + if (l != ~0U) + l -= n; + } +} + +int +bconfig(struct fastbuf *f, uns item, int value) +{ + return f->config ? f->config(f, item, value) : -1; +} + +void +brewind(struct fastbuf *f) +{ + bflush(f); + bsetpos(f, 0); +} + +int +bskip_slow(struct fastbuf *f, uns len) +{ + while (len) + { + byte *buf; + uns l = bdirect_read_prepare(f, &buf); + if (!l) + return 0; + l = MIN(l, len); + bdirect_read_commit(f, buf+l); + len -= l; + } + return 1; +} + +sh_off_t +bfilesize(struct fastbuf *f) +{ + if (!f) + return 0; + sh_off_t pos = btell(f); + bflush(f); + if (!f->seek(f, 0, SEEK_END)) + return -1; + sh_off_t len = btell(f); + bsetpos(f, pos); + return len; +} diff --git a/lib/fastbuf.h b/lib/fastbuf.h new file mode 100644 index 0000000..920d83e --- /dev/null +++ b/lib/fastbuf.h @@ -0,0 +1,410 @@ +/* + * UCW Library -- Fast Buffered I/O + * + * (c) 1997--2007 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_FASTBUF_H +#define _UCW_FASTBUF_H + +#include +#include + +/* + * Generic buffered I/O. You supply hooks to be called for low-level operations + * (swapping of buffers, seeking and closing), we do the rest. + * + * Buffer layout when reading: + * + * +----------------+---------------------------+ + * | read data | free space | + * +----------------+---------------------------+ + * ^ ^ ^ ^ + * buffer bptr bstop bufend + * + * After the last character is read, bptr == bstop and buffer refill + * is deferred to the next read attempt. This gives us an easy way + * how to implement bungetc(). + * + * When writing: + * + * +--------+--------------+--------------------+ + * | unused | written data | free space | + * +--------+--------------+--------------------+ + * ^ ^ ^ ^ + * buffer bstop bptr bufend + * + * Dirty tricks: + * + * - You can mix reads and writes on the same stream, but you must + * call bflush() in between and remember that the file position + * points after the flushed buffer which is not necessarily the same + * as after the data you've read. + * - The spout/refill hooks can change not only bptr and bstop, but also + * the location of the buffer; fb-mem.c takes advantage of it. + * - In some cases, the user of the bdirect interface can be allowed to modify + * the data in the buffer to avoid unnecessary copying. If the back-end + * allows such modifications, it can set can_overwrite_buffer accordingly: + * * 0 if no modification is allowed, + * * 1 if the user can modify the buffer on the condition that + * the modifications will be undone before calling the next + * fastbuf operation + * * 2 if the user is allowed to overwrite the data in the buffer + * if bdirect_read_commit_modified() is called afterwards. + * In this case, the back-end must be prepared for trimming + * of the buffer which is done by the commit function. + */ + +struct fastbuf { + byte is_fastbuf[0]; /* Dummy field for checking of type casts */ + byte *bptr, *bstop; /* Access pointers */ + byte *buffer, *bufend; /* Start and end of the buffer */ + char *name; /* File name for error messages */ + sh_off_t pos; /* Position of bstop in the file */ + int (*refill)(struct fastbuf *); /* Get a buffer with new data */ + void (*spout)(struct fastbuf *); /* Write buffer data to the file */ + int (*seek)(struct fastbuf *, sh_off_t, int); /* Slow path for bseek(), buffer already flushed; returns success */ + void (*close)(struct fastbuf *); /* Close the stream */ + int (*config)(struct fastbuf *, uns, int); /* Configure the stream */ + int can_overwrite_buffer; /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */ +}; + +/* FastIO on files with several configurable back-ends */ + +enum fb_type { /* Which back-end you want to use */ + FB_STD, /* Standard buffered I/O */ + FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */ + FB_MMAP /* Memory mapped files */ +}; + +struct fb_params { + enum fb_type type; + uns buffer_size; /* 0 for default size */ + uns keep_back_buf; /* FB_STD: optimize for bi-directional access */ + uns read_ahead; /* FB_DIRECT options */ + uns write_back; + struct asio_queue *asio; +}; + +struct cf_section; +extern struct cf_section fbpar_cf; +extern struct fb_params fbpar_def; + +struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); /* Use params==NULL for defaults */ +struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); +struct fastbuf *bopen_tmp_file(struct fb_params *params); +struct fastbuf *bopen_fd(int fd, struct fb_params *params); + +/* FastIO on standard files (shortcuts for FB_STD) */ + +struct fastbuf *bopen(const char *name, uns mode, uns buflen); +struct fastbuf *bopen_try(const char *name, uns mode, uns buflen); +struct fastbuf *bopen_tmp(uns buflen); +struct fastbuf *bfdopen(int fd, uns buflen); +struct fastbuf *bfdopen_shared(int fd, uns buflen); +void bfilesync(struct fastbuf *b); + +/* Temporary files */ + +#define TEMP_FILE_NAME_LEN 256 +void temp_file_name(char *name); +void bfix_tmp_file(struct fastbuf *fb, const char *name); + +/* Internal functions of some file back-ends */ + +struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen); +struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode); + +extern uns fbdir_cheat; +struct asio_queue; +struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back); + +void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file); + +/* FastIO on in-memory streams */ + +struct fastbuf *fbmem_create(uns blocksize); /* Create stream and return its writing fastbuf */ +struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */ + +/* FastI on file descriptors with limit */ + +struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit); + +/* FastIO on static buffers */ + +void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite); +void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size); +static inline uns +fbbuf_count_written(struct fastbuf *f) +{ + return f->bptr - f->bstop; +} + +/* FastIO on recyclable growing buffers */ + +struct fastbuf *fbgrow_create(unsigned basic_size); +void fbgrow_reset(struct fastbuf *b); /* Reset stream and prepare for writing */ +void fbgrow_rewind(struct fastbuf *b); /* Prepare for reading */ + +/* FastO on memory pools */ + +struct mempool; +struct fbpool { + struct fastbuf fb; + struct mempool *mp; +}; + +void fbpool_init(struct fbpool *fb); /* Initialize a new fastbuf */ +void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size); + /* Start a new continuous block and prepare for writing (see mp_start()) */ +void *fbpool_end(struct fbpool *fb); /* Close the block and return its address (see mp_end()). + The length can be determined with mp_size(mp, ptr). */ + +/* FastO with atomic writes for multi-threaded programs */ + +struct fb_atomic { + struct fastbuf fb; + struct fb_atomic_file *af; + byte *expected_max_bptr; + uns slack_size; +}; +#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf) + +struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len); +void fbatomic_internal_write(struct fastbuf *b); + +static inline void +fbatomic_commit(struct fastbuf *b) +{ + if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr) + fbatomic_internal_write(b); +} + +/* Configuring stream parameters */ + +enum bconfig_type { + BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */ + BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */ +}; + +int bconfig(struct fastbuf *f, uns type, int data); + +/* Universal functions working on all fastbuf's */ + +void bclose(struct fastbuf *f); +void bflush(struct fastbuf *f); +void bseek(struct fastbuf *f, sh_off_t pos, int whence); +void bsetpos(struct fastbuf *f, sh_off_t pos); +void brewind(struct fastbuf *f); +sh_off_t bfilesize(struct fastbuf *f); /* -1 if not seekable */ + +static inline sh_off_t btell(struct fastbuf *f) +{ + return f->pos + (f->bptr - f->bstop); +} + +int bgetc_slow(struct fastbuf *f); +static inline int bgetc(struct fastbuf *f) +{ + return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f); +} + +int bpeekc_slow(struct fastbuf *f); +static inline int bpeekc(struct fastbuf *f) +{ + return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f); +} + +static inline void bungetc(struct fastbuf *f) +{ + f->bptr--; +} + +void bputc_slow(struct fastbuf *f, uns c); +static inline void bputc(struct fastbuf *f, uns c) +{ + if (f->bptr < f->bufend) + *f->bptr++ = c; + else + bputc_slow(f, c); +} + +static inline uns +bavailr(struct fastbuf *f) +{ + return f->bstop - f->bptr; +} + +static inline uns +bavailw(struct fastbuf *f) +{ + return f->bufend - f->bptr; +} + +uns bread_slow(struct fastbuf *f, void *b, uns l, uns check); +static inline uns bread(struct fastbuf *f, void *b, uns l) +{ + if (bavailr(f) >= l) + { + memcpy(b, f->bptr, l); + f->bptr += l; + return l; + } + else + return bread_slow(f, b, l, 0); +} + +static inline uns breadb(struct fastbuf *f, void *b, uns l) +{ + if (bavailr(f) >= l) + { + memcpy(b, f->bptr, l); + f->bptr += l; + return l; + } + else + return bread_slow(f, b, l, 1); +} + +void bwrite_slow(struct fastbuf *f, const void *b, uns l); +static inline void bwrite(struct fastbuf *f, const void *b, uns l) +{ + if (bavailw(f) >= l) + { + memcpy(f->bptr, b, l); + f->bptr += l; + } + else + bwrite_slow(f, b, l); +} + +/* + * Functions for reading of strings: + * + * bgets() reads a line, strip the trailing '\n' and return a pointer + * to the terminating 0 or NULL on EOF. Dies if the line is too long. + * bgets0() does the same for 0-terminated strings. + * bgets_nodie() a variant of bgets() which returns either the length of the + * string (excluding the terminator) or -1 if the line does not fit + * in the buffer. In such cases, it returns after reading exactly `l' + * bytes of input. + * bgets_bb() a variant of bgets() which allocates the string in a growing buffer + * bgets_mp() the same, but in a mempool + * bgets_stk() the same, but on the stack by alloca() + */ + +char *bgets(struct fastbuf *f, char *b, uns l); +char *bgets0(struct fastbuf *f, char *b, uns l); +int bgets_nodie(struct fastbuf *f, char *b, uns l); + +struct mempool; +struct bb_t; +uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit); +char *bgets_mp(struct fastbuf *f, struct mempool *mp); + +struct bgets_stk_struct { + struct fastbuf *f; + byte *old_buf, *cur_buf, *src; + uns old_len, cur_len, src_len; +}; +void bgets_stk_init(struct bgets_stk_struct *s); +void bgets_stk_step(struct bgets_stk_struct *s); +#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; }) + +static inline void +bputs(struct fastbuf *f, const char *b) +{ + bwrite(f, b, strlen(b)); +} + +static inline void +bputs0(struct fastbuf *f, const char *b) +{ + bwrite(f, b, strlen(b)+1); +} + +static inline void +bputsn(struct fastbuf *f, const char *b) +{ + bputs(f, b); + bputc(f, '\n'); +} + +void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l); +static inline void +bbcopy(struct fastbuf *f, struct fastbuf *t, uns l) +{ + if (bavailr(f) >= l && bavailw(t) >= l) + { + memcpy(t->bptr, f->bptr, l); + t->bptr += l; + f->bptr += l; + } + else + bbcopy_slow(f, t, l); +} + +int bskip_slow(struct fastbuf *f, uns len); +static inline int bskip(struct fastbuf *f, uns len) +{ + if (bavailr(f) >= len) + { + f->bptr += len; + return 1; + } + else + return bskip_slow(f, len); +} + +/* Direct I/O on buffers */ + +static inline uns +bdirect_read_prepare(struct fastbuf *f, byte **buf) +{ + if (f->bptr == f->bstop && !f->refill(f)) + { + *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings + return 0; + } + *buf = f->bptr; + return bavailr(f); +} + +static inline void +bdirect_read_commit(struct fastbuf *f, byte *pos) +{ + f->bptr = pos; +} + +static inline void +bdirect_read_commit_modified(struct fastbuf *f, byte *pos) +{ + f->bptr = pos; + f->buffer = pos; /* Avoid seeking backwards in the buffer */ +} + +static inline uns +bdirect_write_prepare(struct fastbuf *f, byte **buf) +{ + if (f->bptr == f->bufend) + f->spout(f); + *buf = f->bptr; + return bavailw(f); +} + +static inline void +bdirect_write_commit(struct fastbuf *f, byte *pos) +{ + f->bptr = pos; +} + +/* Formatted output */ + +int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3); +int vbprintf(struct fastbuf *b, const char *msg, va_list args); + +#endif diff --git a/lib/fastbuf.t b/lib/fastbuf.t new file mode 100644 index 0000000..6f8681a --- /dev/null +++ b/lib/fastbuf.t @@ -0,0 +1,15 @@ +# Tests for fastbufs + +Run: ../obj/lib/fb-file-t +Out: 112 + + 112 116 + +Run: ../obj/lib/fb-grow-t +Out: <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + +Run: ../obj/lib/fb-pool-t diff --git a/lib/fb-atomic.c b/lib/fb-atomic.c new file mode 100644 index 0000000..234d920 --- /dev/null +++ b/lib/fb-atomic.c @@ -0,0 +1,169 @@ +/* + * UCW Library -- Atomic Buffered Write to Files + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This fastbuf backend is intended for cases where several threads + * of a single program append records to a single file and while the + * record can mix in an arbitrary way, the bytes inside a single + * record must remain uninterrupted. + * + * In case of files with fixed record size, we just allocate the + * buffer to hold a whole number of records and take advantage + * of the atomicity of the write() system call. + * + * With variable-sized records, we need another solution: when + * writing a record, we keep the fastbuf in a locked state, which + * prevents buffer flushing (and if the buffer becomes full, we extend it), + * and we wait for an explicit commit operation which write()s the buffer + * if the free space in the buffer falls below the expected maximum record + * length. + * + * fbatomic_open() is called with the following parameters: + * name - name of the file to open + * master - fbatomic for the master thread or NULL if it's the first open + * bufsize - initial buffer size + * record_len - record length for fixed-size records; + * or -(expected maximum record length) for variable-sized ones. + */ + +#define LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" + +#include +#include +#include + +struct fb_atomic_file { + int fd; + int use_count; + int record_len; + uns locked; + byte name[1]; +}; + +void +fbatomic_internal_write(struct fastbuf *f) +{ + struct fb_atomic_file *af = FB_ATOMIC(f)->af; + int size = f->bptr - f->buffer; + if (size) + { + ASSERT(af->record_len < 0 || !(size % af->record_len)); + int res = write(af->fd, f->buffer, size); + if (res < 0) + die("Error writing %s: %m", f->name); + if (res != size) + die("Unexpected partial write to %s: written only %d bytes of %d", f->name, res, size); + f->bptr = f->buffer; + } +} + +static void +fbatomic_spout(struct fastbuf *f) +{ + if (f->bptr < f->bufend) /* Explicit flushes should be ignored */ + return; + + struct fb_atomic *F = FB_ATOMIC(f); + if (F->af->locked) + { + uns written = f->bptr - f->buffer; + uns size = f->bufend - f->buffer + F->slack_size; + F->slack_size *= 2; + DBG("Reallocating buffer for atomic file %s with slack %d", f->name, F->slack_size); + f->buffer = xrealloc(f->buffer, size); + f->bufend = f->buffer + size; + f->bptr = f->buffer + written; + F->expected_max_bptr = f->bufend - F->slack_size; + } + else + fbatomic_internal_write(f); +} + +static void +fbatomic_close(struct fastbuf *f) +{ + struct fb_atomic_file *af = FB_ATOMIC(f)->af; + fbatomic_internal_write(f); /* Need to flush explicitly, because the file can be locked */ + if (!--af->use_count) + { + close(af->fd); + xfree(af); + } + xfree(f); +} + +struct fastbuf * +fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len) +{ + struct fb_atomic *F = xmalloc_zero(sizeof(*F)); + struct fastbuf *f = &F->fb; + struct fb_atomic_file *af; + if (master) + { + af = FB_ATOMIC(master)->af; + af->use_count++; + ASSERT(af->record_len == record_len); + } + else + { + af = xmalloc_zero(sizeof(*af) + strlen(name)); + if ((af->fd = sh_open(name, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666)) < 0) + die("Cannot create %s: %m", name); + af->use_count = 1; + af->record_len = record_len; + af->locked = (record_len < 0); + strcpy(af->name, name); + } + F->af = af; + if (record_len > 0 && bufsize % record_len) + bufsize += record_len - (bufsize % record_len); + f->buffer = xmalloc(bufsize); + f->bufend = f->buffer + bufsize; + F->slack_size = (record_len < 0) ? -record_len : 0; + ASSERT(bufsize > F->slack_size); + F->expected_max_bptr = f->bufend - F->slack_size; + f->bptr = f->bstop = f->buffer; + f->name = af->name; + f->spout = fbatomic_spout; + f->close = fbatomic_close; + return f; +} + +#ifdef TEST + +int main(int argc UNUSED, char **argv UNUSED) +{ + struct fastbuf *f, *g; + + log(L_INFO, "Testing block writes"); + f = fbatomic_open("test", NULL, 16, 4); + for (u32 i=0; i<17; i++) + bwrite(f, &i, 4); + bclose(f); + + log(L_INFO, "Testing interleaved var-size writes"); + f = fbatomic_open("test2", NULL, 23, -5); + g = fbatomic_open("test2", f, 23, -5); + for (int i=0; i<100; i++) + { + struct fastbuf *x = (i%2) ? g : f; + bprintf(x, "%c<%d>\n", "fg"[i%2], ((259309*i) % 1000000) >> (i % 8)); + fbatomic_commit(x); + } + bclose(f); + bclose(g); + + return 0; +} + +#endif diff --git a/lib/fb-buffer.c b/lib/fb-buffer.c new file mode 100644 index 0000000..d8094eb --- /dev/null +++ b/lib/fb-buffer.c @@ -0,0 +1,70 @@ +/* + * UCW Library -- Fast Buffered I/O on Static Buffers + * + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include +#include + +static int +fbbuf_refill(struct fastbuf *f UNUSED) +{ + return 0; +} + +static int +fbbuf_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + /* Somebody might want to seek to the end of buffer, try to be nice to him. */ + sh_off_t len = f->bufend - f->buffer; + if (whence == SEEK_END) + pos += len; + ASSERT(pos >= 0 && pos <= len); + f->bptr = f->buffer + pos; + f->bstop = f->bufend; + f->pos = len; + return 1; +} + +void +fbbuf_init_read(struct fastbuf *f, byte *buf, uns size, uns can_overwrite) +{ + f->buffer = f->bptr = buf; + f->bstop = f->bufend = buf + size; + f->name = "fbbuf-read"; + f->pos = size; + f->refill = fbbuf_refill; + f->spout = NULL; + f->seek = fbbuf_seek; + f->close = NULL; + f->config = NULL; + f->can_overwrite_buffer = can_overwrite; +} + +static void +fbbuf_spout(struct fastbuf *f UNUSED) +{ + die("fbbuf: buffer overflow on write"); +} + +void +fbbuf_init_write(struct fastbuf *f, byte *buf, uns size) +{ + f->buffer = f->bstop = f->bptr = buf; + f->bufend = buf + size; + f->name = "fbbuf-write"; + f->pos = size; + f->refill = NULL; + f->spout = fbbuf_spout; + f->seek = NULL; + f->close = NULL; + f->config = NULL; + f->can_overwrite_buffer = 0; +} diff --git a/lib/fb-direct.c b/lib/fb-direct.c new file mode 100644 index 0000000..865bbb4 --- /dev/null +++ b/lib/fb-direct.c @@ -0,0 +1,344 @@ +/* + * UCW Library -- Fast Buffered I/O on O_DIRECT Files + * + * (c) 2006--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is a fastbuf backend for fast streaming I/O using O_DIRECT and + * the asynchronous I/O module. It's designed for use on large files + * which don't fit in the disk cache. + * + * CAVEATS: + * + * - All operations with a single fbdirect handle must be done + * within a single thread, unless you provide a custom I/O queue + * and take care of locking. + * + * FIXME: what if the OS doesn't support O_DIRECT? + * FIXME: unaligned seeks and partial writes? + * FIXME: append to unaligned file + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" +#include "lib/asio.h" +#include "lib/conf.h" +#include "lib/threads.h" + +#include +#include +#include +#include + +uns fbdir_cheat; + +static struct cf_section fbdir_cf = { + CF_ITEMS { + CF_UNS("Cheat", &fbdir_cheat), + CF_END + } +}; + +#define FBDIR_ALIGN 512 + +enum fbdir_mode { // Current operating mode + M_NULL, + M_READ, + M_WRITE +}; + +struct fb_direct { + struct fastbuf fb; + int fd; // File descriptor + int is_temp_file; + struct asio_queue *io_queue; // I/O queue to use + struct asio_queue *user_queue; // If io_queue was supplied by the user + struct asio_request *pending_read; + struct asio_request *done_read; + struct asio_request *active_buffer; + enum fbdir_mode mode; + byte name[0]; +}; +#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf) + +static void CONSTRUCTOR +fbdir_global_init(void) +{ + cf_declare_section("FBDirect", &fbdir_cf, 0); +} + +static void +fbdir_read_sync(struct fb_direct *F) +{ + while (F->pending_read) + { + struct asio_request *r = asio_wait(F->io_queue); + ASSERT(r); + struct fb_direct *G = r->user_data; + ASSERT(G); + ASSERT(G->pending_read == r && !G->done_read); + G->pending_read = NULL; + G->done_read = r; + } +} + +static void +fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode) +{ + if (F->mode == mode) + return; + DBG("FB-DIRECT: Switching mode to %d", mode); + switch (F->mode) + { + case M_NULL: + break; + case M_READ: + fbdir_read_sync(F); // Wait for read-ahead requests to finish + if (F->done_read) // Return read-ahead requests if any + { + asio_put(F->done_read); + F->done_read = NULL; + } + break; + case M_WRITE: + asio_sync(F->io_queue); // Wait for pending writebacks + break; + } + if (F->active_buffer) + { + asio_put(F->active_buffer); + F->active_buffer = NULL; + } + F->mode = mode; +} + +static void +fbdir_submit_read(struct fb_direct *F) +{ + struct asio_request *r = asio_get(F->io_queue); + r->fd = F->fd; + r->op = ASIO_READ; + r->len = F->io_queue->buffer_size; + r->user_data = F; + asio_submit(r); + F->pending_read = r; +} + +static int +fbdir_refill(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Refill"); + + if (!F->done_read) + { + if (!F->pending_read) + { + fbdir_change_mode(F, M_READ); + fbdir_submit_read(F); + } + fbdir_read_sync(F); + ASSERT(F->done_read); + } + + struct asio_request *r = F->done_read; + F->done_read = NULL; + if (F->active_buffer) + asio_put(F->active_buffer); + F->active_buffer = r; + if (!r->status) + return 0; + if (r->status < 0) + die("Error reading %s: %s", f->name, strerror(r->returned_errno)); + f->bptr = f->buffer = r->buffer; + f->bstop = f->bufend = f->buffer + r->status; + f->pos += r->status; + + fbdir_submit_read(F); // Read-ahead the next block + + return r->status; +} + +static void +fbdir_spout(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + struct asio_request *r; + + DBG("FB-DIRECT: Spout"); + + fbdir_change_mode(F, M_WRITE); + r = F->active_buffer; + if (r && f->bptr > f->bstop) + { + r->op = ASIO_WRITE_BACK; + r->fd = F->fd; + r->len = f->bptr - f->bstop; + ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat); + f->pos += r->len; + if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes + { + r->len = ALIGN_TO(r->len, FBDIR_ALIGN); + asio_submit(r); + asio_sync(F->io_queue); + DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos); + if (sh_ftruncate(F->fd, f->pos) < 0) + die("Error truncating %s: %m", f->name); + } + else + asio_submit(r); + r = NULL; + } + if (!r) + r = asio_get(F->io_queue); + f->bstop = f->bptr = f->buffer = r->buffer; + f->bufend = f->buffer + F->io_queue->buffer_size; + F->active_buffer = r; +} + +static int +fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence); + + if (whence == SEEK_SET && pos == f->pos) + return 1; + + fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish + sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence); + if (l < 0) + return 0; + f->pos = l; + return 1; +} + +static struct asio_queue * +fbdir_get_io_queue(uns buffer_size, uns write_back) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + if (!q) + { + q = xmalloc_zero(sizeof(struct asio_queue)); + q->buffer_size = buffer_size; + q->max_writebacks = write_back; + asio_init_queue(q); + ctx->io_queue = q; + } + q->use_count++; + DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count); + return q; +} + +static void +fbdir_put_io_queue(void) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + ASSERT(q); + DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count); + if (!--q->use_count) + { + asio_cleanup_queue(q); + xfree(q); + ctx->io_queue = NULL; + } +} + +static void +fbdir_close(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Close"); + + fbdir_change_mode(F, M_NULL); + if (!F->user_queue) + fbdir_put_io_queue(); + + bclose_file_helper(f, F->fd, F->is_temp_file); + xfree(f); +} + +static int +fbdir_config(struct fastbuf *f, uns item, int value) +{ + int orig; + + switch (item) + { + case BCONFIG_IS_TEMP_FILE: + orig = FB_DIRECT(f)->is_temp_file; + FB_DIRECT(f)->is_temp_file = value; + return orig; + default: + return -1; + } +} + +struct fastbuf * +fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back) +{ + int namelen = strlen(name) + 1; + struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen); + struct fastbuf *f = &F->fb; + + DBG("FB-DIRECT: Open"); + bzero(F, sizeof(*F)); + f->name = F->name; + memcpy(f->name, name, namelen); + F->fd = fd; + if (q) + F->io_queue = F->user_queue = q; + else + F->io_queue = fbdir_get_io_queue(buffer_size, write_back); + f->refill = fbdir_refill; + f->spout = fbdir_spout; + f->seek = fbdir_seek; + f->close = fbdir_close; + f->config = fbdir_config; + f->can_overwrite_buffer = 2; + return f; +} + +#ifdef TEST + +#include "lib/getopt.h" + +int main(int argc, char **argv) +{ + struct fb_params par = { .type = FB_DIRECT }; + struct fastbuf *f, *t; + + log_init(NULL); + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0) + die("Hey, whaddya want?"); + f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par); + t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par); + + bbcopy(f, t, ~0U); + ASSERT(btell(f) == btell(t)); + +#if 0 // This triggers unaligned write + bflush(t); + bputc(t, '\n'); +#endif + + brewind(t); + bgetc(t); + ASSERT(btell(t) == 1); + + bclose(f); + bclose(t); + return 0; +} + +#endif diff --git a/lib/fb-file.c b/lib/fb-file.c new file mode 100644 index 0000000..30f8eac --- /dev/null +++ b/lib/fb-file.c @@ -0,0 +1,276 @@ +/* + * UCW Library -- Fast Buffered I/O on Files + * + * (c) 1997--2007 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" + +#include +#include +#include +#include + +struct fb_file { + struct fastbuf fb; + int fd; /* File descriptor */ + int is_temp_file; + int keep_back_buf; /* Optimize for backwards reading */ + sh_off_t wpos; /* Real file position */ + uns wlen; /* Window size */ +}; +#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf) +#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1) + +static int +bfd_refill(struct fastbuf *f) +{ + struct fb_file *F = FB_FILE(f); + byte *read_ptr = (f->buffer = FB_BUFFER(f)); + uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen; + /* Forward or no seek */ + if (F->wpos <= f->pos) + { + sh_off_t diff = f->pos - F->wpos; + /* Formula for long forward seeks (prefer lseek()) */ + if (diff > ((sh_off_t)blen << 2)) + { +long_seek: + f->bptr = f->buffer + back; + f->bstop = f->buffer + blen; + goto seek; + } + /* Short forward seek (prefer read() to skip data )*/ + else if ((uns)diff >= back) + { + uns skip = diff - back; + F->wpos += skip; + while (skip) + { + int l = read(F->fd, f->buffer, MIN(skip, blen)); + if (unlikely(l <= 0)) + if (l < 0) + die("Error reading %s: %m", f->name); + else + { + F->wpos -= skip; + goto eof; + } + skip -= l; + } + } + /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */ + else + { + uns keep = back - (uns)diff; + if (keep >= F->wlen) + back = diff + (keep = F->wlen); + else + memmove(f->buffer, f->buffer + F->wlen - keep, keep); + read_len -= keep; + read_ptr += keep; + } + f->bptr = f->buffer + back; + f->bstop = f->buffer + blen; + } + /* Backwards seek */ + else + { + sh_off_t diff = F->wpos - f->pos; + /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */ + if (diff > ((sh_off_t)blen << 1)) + { + if ((sh_off_t)back > f->pos) + back = f->pos; + goto long_seek; + } + /* Seek into previous window (do nothing... for example brewind) */ + else if ((uns)diff <= F->wlen) + { + f->bstop = f->buffer + F->wlen; + f->bptr = f->bstop - diff; + f->pos = F->wpos; + return 1; + } + back *= 3; + if ((sh_off_t)back > f->pos) + back = f->pos; + f->bptr = f->buffer + back; + read_len = blen; + f->bstop = f->buffer + read_len; + /* Reuse part of previous window */ + if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen) + { + uns keep = read_len + F->wlen - back - diff; + memmove(f->buffer + read_len - keep, f->buffer, keep); + } +seek: + /* Do lseek() */ + F->wpos = f->pos + (f->buffer - f->bptr); + if (sh_seek(F->fd, F->wpos, SEEK_SET) < 0) + die("Error seeking %s: %m", f->name); + } + /* Read (part of) buffer */ + do + { + int l = read(F->fd, read_ptr, read_len); + if (unlikely(l < 0)) + die("Error reading %s: %m", f->name); + if (!l) + if (unlikely(read_ptr < f->bptr)) + goto eof; + else + break; /* Incomplete read because of EOF */ + read_ptr += l; + read_len -= l; + F->wpos += l; + } + while (read_ptr <= f->bptr); + if (read_len) + f->bstop = read_ptr; + f->pos += f->bstop - f->bptr; + F->wlen = f->bstop - f->buffer; + return f->bstop - f->bptr; +eof: + /* Seeked behind EOF */ + f->bptr = f->bstop = f->buffer; + F->wlen = 0; + return 0; +} + +static void +bfd_spout(struct fastbuf *f) +{ + /* Do delayed lseek() if needed */ + if (FB_FILE(f)->wpos != f->pos && sh_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0) + die("Error seeking %s: %m", f->name); + + int l = f->bptr - f->buffer; + byte *c = f->buffer; + + /* Write the buffer */ + FB_FILE(f)->wpos = (f->pos += l); + FB_FILE(f)->wlen = 0; + while (l) + { + int z = write(FB_FILE(f)->fd, c, l); + if (z <= 0) + die("Error writing %s: %m", f->name); + l -= z; + c += z; + } + f->bptr = f->buffer = FB_BUFFER(f); +} + +static int +bfd_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */ + sh_off_t l; + switch (whence) + { + case SEEK_SET: + f->pos = pos; + return 1; + case SEEK_CUR: + l = f->pos + pos; + if ((pos > 0) ^ (l > f->pos)) + return 0; + f->pos = l; + return 1; + case SEEK_END: + l = sh_seek(FB_FILE(f)->fd, pos, SEEK_END); + if (l < 0) + return 0; + FB_FILE(f)->wpos = f->pos = l; + FB_FILE(f)->wlen = 0; + return 1; + default: + ASSERT(0); + } +} + +static void +bfd_close(struct fastbuf *f) +{ + bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file); + xfree(f); +} + +static int +bfd_config(struct fastbuf *f, uns item, int value) +{ + int orig; + + switch (item) + { + case BCONFIG_IS_TEMP_FILE: + orig = FB_FILE(f)->is_temp_file; + FB_FILE(f)->is_temp_file = value; + return orig; + case BCONFIG_KEEP_BACK_BUF: + orig = FB_FILE(f)->keep_back_buf; + FB_FILE(f)->keep_back_buf = value; + return orig; + default: + return -1; + } +} + +struct fastbuf * +bfdopen_internal(int fd, const char *name, uns buflen) +{ + ASSERT(buflen); + int namelen = strlen(name) + 1; + struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen); + struct fastbuf *f = &F->fb; + + bzero(F, sizeof(*F)); + f->buffer = (byte *)(F+1); + f->bptr = f->bstop = f->buffer; + f->bufend = f->buffer + buflen; + f->name = f->bufend; + memcpy(f->name, name, namelen); + F->fd = fd; + f->refill = bfd_refill; + f->spout = bfd_spout; + f->seek = bfd_seek; + f->close = bfd_close; + f->config = bfd_config; + f->can_overwrite_buffer = 2; + return f; +} + +void +bfilesync(struct fastbuf *b) +{ + bflush(b); + if (fsync(FB_FILE(b)->fd) < 0) + msg(L_ERROR, "fsync(%s) failed: %m", b->name); +} + +#ifdef TEST + +int main(void) +{ + struct fastbuf *f, *t; + f = bopen_tmp(16); + t = bfdopen_shared(1, 13); + for (uns i = 0; i < 16; i++) + bwrite(f, "", 7); + bprintf(t, "%d\n", (int)btell(f)); + brewind(f); + bbcopy(f, t, ~0U); + bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t)); + bclose(f); + bclose(t); + return 0; +} + +#endif diff --git a/lib/fb-grow.c b/lib/fb-grow.c new file mode 100644 index 0000000..f9a654a --- /dev/null +++ b/lib/fb-grow.c @@ -0,0 +1,139 @@ +/* + * UCW Library -- Fast Buffered I/O on Growing Buffers + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include +#include + +struct fb_gbuf { + struct fastbuf fb; + byte *last_written; +}; +#define FB_GBUF(f) ((struct fb_gbuf *)(f)->is_fastbuf) + +static int +fbgrow_refill(struct fastbuf *b) +{ + if (b->bstop != FB_GBUF(b)->last_written) + { + /* There was an intervening flush */ + b->bstop = FB_GBUF(b)->last_written; + b->pos = b->bstop - b->buffer; + return 1; + } + /* We are at the end */ + return 0; +} + +static void +fbgrow_spout(struct fastbuf *b) +{ + if (b->bptr >= b->bufend) + { + uns len = b->bufend - b->buffer; + b->buffer = xrealloc(b->buffer, 2*len); + b->bufend = b->buffer + 2*len; + b->bstop = b->buffer; + b->bptr = b->buffer + len; + } +} + +static int +fbgrow_seek(struct fastbuf *b, sh_off_t pos, int whence) +{ + ASSERT(FB_GBUF(b)->last_written); /* Seeks allowed only in read mode */ + sh_off_t len = FB_GBUF(b)->last_written - b->buffer; + if (whence == SEEK_END) + pos += len; + ASSERT(pos >= 0 && pos <= len); + b->bptr = b->buffer + pos; + b->bstop = FB_GBUF(b)->last_written; + b->pos = len; + return 1; +} + +static void +fbgrow_close(struct fastbuf *b) +{ + xfree(b->buffer); + xfree(b); +} + +struct fastbuf * +fbgrow_create(unsigned basic_size) +{ + struct fastbuf *b = xmalloc_zero(sizeof(struct fb_gbuf)); + b->buffer = xmalloc(basic_size); + b->bufend = b->buffer + basic_size; + b->bptr = b->bstop = b->buffer; + b->name = ""; + b->refill = fbgrow_refill; + b->spout = fbgrow_spout; + b->seek = fbgrow_seek; + b->close = fbgrow_close; + b->can_overwrite_buffer = 1; + return b; +} + +void +fbgrow_reset(struct fastbuf *b) +{ + b->bptr = b->bstop = b->buffer; + b->pos = 0; + FB_GBUF(b)->last_written = NULL; +} + +void +fbgrow_rewind(struct fastbuf *b) +{ + if (!FB_GBUF(b)->last_written) + { + /* Last operation was a write, so remember the end position */ + FB_GBUF(b)->last_written = b->bptr; + } + b->bptr = b->buffer; + b->bstop = FB_GBUF(b)->last_written; + b->pos = b->bstop - b->buffer; +} + +#ifdef TEST + +int main(void) +{ + struct fastbuf *f; + uns t; + + f = fbgrow_create(3); + for (uns i=0; i<5; i++) + { + fbgrow_reset(f); + bwrite(f, "12345", 5); + bwrite(f, "12345", 5); + printf("<%d>", (int)btell(f)); + bflush(f); + printf("<%d>", (int)btell(f)); + fbgrow_rewind(f); + printf("<%d>", (int)btell(f)); + while ((t = bgetc(f)) != ~0U) + putchar(t); + printf("<%d>", (int)btell(f)); + fbgrow_rewind(f); + bseek(f, -1, SEEK_END); + printf("<%d>", (int)btell(f)); + while ((t = bgetc(f)) != ~0U) + putchar(t); + printf("<%d>\n", (int)btell(f)); + } + bclose(f); + return 0; +} + +#endif diff --git a/lib/fb-limfd.c b/lib/fb-limfd.c new file mode 100644 index 0000000..50bc521 --- /dev/null +++ b/lib/fb-limfd.c @@ -0,0 +1,75 @@ +/* + * UCW Library -- Fast Buffered Input on Limited File Descriptors + * + * (c) 2003--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include + +struct fb_limfd { + struct fastbuf fb; + int fd; /* File descriptor */ + int limit; +}; +#define FB_LIMFD(f) ((struct fb_limfd *)(f)->is_fastbuf) +#define FB_BUFFER(f) (byte *)(FB_LIMFD(f) + 1) + +static int +bfl_refill(struct fastbuf *f) +{ + f->bptr = f->buffer = FB_BUFFER(f); + int max = MIN(FB_LIMFD(f)->limit - f->pos, f->bufend - f->buffer); + int l = read(FB_LIMFD(f)->fd, f->buffer, max); + if (l < 0) + die("Error reading %s: %m", f->name); + f->bstop = f->buffer + l; + f->pos += l; + return l; +} + +static void +bfl_close(struct fastbuf *f) +{ + xfree(f); +} + +struct fastbuf * +bopen_limited_fd(int fd, uns buflen, uns limit) +{ + struct fb_limfd *F = xmalloc(sizeof(struct fb_limfd) + buflen); + struct fastbuf *f = &F->fb; + + bzero(F, sizeof(*F)); + f->buffer = (char *)(F+1); + f->bptr = f->bstop = f->buffer; + f->bufend = f->buffer + buflen; + f->name = "limited-fd"; + F->fd = fd; + F->limit = limit; + f->refill = bfl_refill; + f->close = bfl_close; + f->can_overwrite_buffer = 2; + return f; +} + +#ifdef TEST + +int main(int argc, char **argv) +{ + struct fastbuf *f = bopen_limited_fd(0, 3, 13); + struct fastbuf *o = bfdopen_shared(1, 16); + int c; + while ((c = bgetc(f)) >= 0) + bputc(o, c); + bclose(o); + bclose(f); + return 0; +} + +#endif diff --git a/lib/fb-mem.c b/lib/fb-mem.c new file mode 100644 index 0000000..c304e16 --- /dev/null +++ b/lib/fb-mem.c @@ -0,0 +1,221 @@ +/* + * UCW Library -- Fast Buffered I/O on Memory Streams + * + * (c) 1997--2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include + +struct memstream { + unsigned blocksize; + unsigned uc; + struct msblock *first; +}; + +struct msblock { + struct msblock *next; + sh_off_t pos; + unsigned size; + byte data[0]; +}; + +struct fb_mem { + struct fastbuf fb; + struct memstream *stream; + struct msblock *block; +}; +#define FB_MEM(f) ((struct fb_mem *)(f)->is_fastbuf) + +static int +fbmem_refill(struct fastbuf *f) +{ + struct memstream *s = FB_MEM(f)->stream; + struct msblock *b = FB_MEM(f)->block; + + if (!b) + { + b = s->first; + if (!b) + return 0; + } + else if (f->buffer == b->data && f->bstop < b->data + b->size) + { + f->bstop = b->data + b->size; + f->pos = b->pos + b->size; + return 1; + } + else if (!b->next) + return 0; + else + b = b->next; + if (!b->size) + return 0; + f->buffer = f->bptr = b->data; + f->bufend = f->bstop = b->data + b->size; + f->pos = b->pos + b->size; + FB_MEM(f)->block = b; + return 1; +} + +static void +fbmem_spout(struct fastbuf *f) +{ + struct memstream *s = FB_MEM(f)->stream; + struct msblock *b = FB_MEM(f)->block; + struct msblock *bb; + + if (b) + { + b->size = f->bptr - b->data; + if (b->size < s->blocksize) + return; + } + bb = xmalloc(sizeof(struct msblock) + s->blocksize); + if (b) + { + b->next = bb; + bb->pos = b->pos + b->size; + } + else + { + s->first = bb; + bb->pos = 0; + } + bb->next = NULL; + bb->size = 0; + f->buffer = f->bptr = f->bstop = bb->data; + f->bufend = bb->data + s->blocksize; + f->pos = bb->pos; + FB_MEM(f)->block = bb; +} + +static int +fbmem_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + struct memstream *m = FB_MEM(f)->stream; + struct msblock *b; + + ASSERT(whence == SEEK_SET || whence == SEEK_END); + if (whence == SEEK_END) + { + for (b=m->first; b; b=b->next) + pos += b->size; + } + /* Yes, this is linear. But considering the average number of buckets, it doesn't matter. */ + for (b=m->first; b; b=b->next) + { + if (pos <= b->pos + (sh_off_t)b->size) /* <=, because we need to be able to seek just after file end */ + { + f->buffer = b->data; + f->bptr = b->data + (pos - b->pos); + f->bufend = f->bstop = b->data + b->size; + f->pos = b->pos + b->size; + FB_MEM(f)->block = b; + return 1; + } + } + if (!m->first && !pos) + { + /* Seeking to offset 0 in an empty file needs an exception */ + f->buffer = f->bptr = f->bufend = NULL; + f->pos = 0; + FB_MEM(f)->block = NULL; + return 1; + } + die("fbmem_seek to invalid offset"); +} + +static void +fbmem_close(struct fastbuf *f) +{ + struct memstream *m = FB_MEM(f)->stream; + struct msblock *b; + + if (!--m->uc) + { + while (b = m->first) + { + m->first = b->next; + xfree(b); + } + xfree(m); + } + xfree(f); +} + +struct fastbuf * +fbmem_create(unsigned blocksize) +{ + struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem)); + struct memstream *s = xmalloc_zero(sizeof(struct memstream)); + + s->blocksize = blocksize; + s->uc = 1; + + FB_MEM(f)->stream = s; + f->name = ""; + f->spout = fbmem_spout; + f->close = fbmem_close; + return f; +} + +struct fastbuf * +fbmem_clone_read(struct fastbuf *b) +{ + struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem)); + struct memstream *s = FB_MEM(b)->stream; + + bflush(b); + s->uc++; + + FB_MEM(f)->stream = s; + f->name = ""; + f->refill = fbmem_refill; + f->seek = fbmem_seek; + f->close = fbmem_close; + f->can_overwrite_buffer = 1; + return f; +} + +#ifdef TEST + +int main(void) +{ + struct fastbuf *w, *r; + int t; + + w = fbmem_create(7); + r = fbmem_clone_read(w); + bwrite(w, "12345", 5); + bwrite(w, "12345", 5); + printf("<%d>", (int)btell(w)); + bflush(w); + printf("<%d>", (int)btell(w)); + printf("<%d>", (int)btell(r)); + while ((t = bgetc(r)) >= 0) + putchar(t); + printf("<%d>", (int)btell(r)); + bwrite(w, "12345", 5); + bwrite(w, "12345", 5); + printf("<%d>", (int)btell(w)); + bclose(w); + bsetpos(r, 0); + printf("", (int)btell(r)); + while ((t = bgetc(r)) >= 0) + putchar(t); + bsetpos(r, 3); + printf("", (int)btell(r)); + while ((t = bgetc(r)) >= 0) + putchar(t); + fflush(stdout); + bclose(r); + return 0; +} + +#endif diff --git a/lib/fb-mmap.c b/lib/fb-mmap.c new file mode 100644 index 0000000..a57d103 --- /dev/null +++ b/lib/fb-mmap.c @@ -0,0 +1,228 @@ +/* + * UCW Library -- Fast Buffered I/O on Memory-Mapped Files + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" +#include "lib/conf.h" + +#include +#include +#include +#include + +static uns mmap_window_size = 16*CPU_PAGE_SIZE; +static uns mmap_extend_size = 4*CPU_PAGE_SIZE; + +static struct cf_section fbmm_config = { + CF_ITEMS { + CF_UNS("WindowSize", &mmap_window_size), + CF_UNS("ExtendSize", &mmap_extend_size), + CF_END + } +}; + +static void CONSTRUCTOR fbmm_init_config(void) +{ + cf_declare_section("FBMMap", &fbmm_config, 0); +} + +struct fb_mmap { + struct fastbuf fb; + int fd; + int is_temp_file; + sh_off_t file_size; + sh_off_t file_extend; + sh_off_t window_pos; + uns window_size; + int mode; +}; +#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf) + +static void +bfmm_map_window(struct fastbuf *f) +{ + struct fb_mmap *F = FB_MMAP(f); + sh_off_t pos0 = f->pos & ~(sh_off_t)(CPU_PAGE_SIZE-1); + int l = MIN((sh_off_t)mmap_window_size, F->file_extend - pos0); + uns ll = ALIGN_TO(l, CPU_PAGE_SIZE); + int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE); + + DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend); + if (ll != F->window_size && f->buffer) + { + munmap(f->buffer, F->window_size); + f->buffer = NULL; + } + F->window_size = ll; + if (!f->buffer) + f->buffer = sh_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0); + else + f->buffer = sh_mmap(f->buffer, ll, prot, MAP_SHARED | MAP_FIXED, F->fd, pos0); + if (f->buffer == (byte *) MAP_FAILED) + die("mmap(%s): %m", f->name); +#ifdef MADV_SEQUENTIAL + if (ll > CPU_PAGE_SIZE) + madvise(f->buffer, ll, MADV_SEQUENTIAL); +#endif + f->bufend = f->buffer + l; + f->bptr = f->buffer + (f->pos - pos0); + F->window_pos = pos0; +} + +static int +bfmm_refill(struct fastbuf *f) +{ + struct fb_mmap *F = FB_MMAP(f); + + DBG("Refill <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend); + if (f->pos >= F->file_size) + return 0; + if (f->bstop >= f->bufend) + bfmm_map_window(f); + if (F->window_pos + (f->bufend - f->buffer) > F->file_size) + f->bstop = f->buffer + (F->file_size - F->window_pos); + else + f->bstop = f->bufend; + f->pos = F->window_pos + (f->bstop - f->buffer); + DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend); + return 1; +} + +static void +bfmm_spout(struct fastbuf *f) +{ + struct fb_mmap *F = FB_MMAP(f); + sh_off_t end = f->pos + (f->bptr - f->bstop); + + DBG("Spout <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend); + if (end > F->file_size) + F->file_size = end; + if (f->bptr < f->bufend) + return; + f->pos = end; + if (f->pos >= F->file_extend) + { + F->file_extend = ALIGN_TO(F->file_extend + mmap_extend_size, (sh_off_t)CPU_PAGE_SIZE); + if (sh_ftruncate(F->fd, F->file_extend)) + die("ftruncate(%s): %m", f->name); + } + bfmm_map_window(f); + f->bstop = f->bptr; + DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend); +} + +static int +bfmm_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + if (whence == SEEK_END) + pos += FB_MMAP(f)->file_size; + else + ASSERT(whence == SEEK_SET); + ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size); + f->pos = pos; + f->bptr = f->bstop = f->bufend = f->buffer; /* force refill/spout call */ + DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend); + return 1; +} + +static void +bfmm_close(struct fastbuf *f) +{ + struct fb_mmap *F = FB_MMAP(f); + + if (f->buffer) + munmap(f->buffer, F->window_size); + if (F->file_extend > F->file_size && + sh_ftruncate(F->fd, F->file_size)) + die("ftruncate(%s): %m", f->name); + bclose_file_helper(f, F->fd, F->is_temp_file); + xfree(f); +} + +static int +bfmm_config(struct fastbuf *f, uns item, int value) +{ + int orig; + + switch (item) + { + case BCONFIG_IS_TEMP_FILE: + orig = FB_MMAP(f)->is_temp_file; + FB_MMAP(f)->is_temp_file = value; + return orig; + default: + return -1; + } +} + +struct fastbuf * +bfmmopen_internal(int fd, const char *name, uns mode) +{ + int namelen = strlen(name) + 1; + struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen); + struct fastbuf *f = &F->fb; + + bzero(F, sizeof(*F)); + f->name = (byte *)(F+1); + memcpy(f->name, name, namelen); + F->fd = fd; + F->file_extend = F->file_size = sh_seek(fd, 0, SEEK_END); + if (F->file_size < 0) + die("seek(%s): %m", name); + if (mode & O_APPEND) + f->pos = F->file_size; + F->mode = mode; + + f->refill = bfmm_refill; + f->spout = bfmm_spout; + f->seek = bfmm_seek; + f->close = bfmm_close; + f->config = bfmm_config; + return f; +} + +#ifdef TEST + +int main(int argc, char **argv) +{ + struct fb_params par = { .type = FB_MMAP }; + struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par); + struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par); + int c; + + DBG("Copying"); + while ((c = bgetc(f)) >= 0) + bputc(g, c); + bclose(f); + DBG("Seek inside last block"); + bsetpos(g, btell(g)-1333); + bputc(g, 13); + DBG("Seek to the beginning & write"); + bsetpos(g, 1333); + bputc(g, 13); + DBG("flush"); + bflush(g); + bputc(g, 13); + bflush(g); + DBG("Seek nearby & read"); + bsetpos(g, 133); + bgetc(g); + DBG("Seek far & read"); + bsetpos(g, 133333); + bgetc(g); + DBG("Closing"); + bclose(g); + + return 0; +} + +#endif diff --git a/lib/fb-param.c b/lib/fb-param.c new file mode 100644 index 0000000..854ba78 --- /dev/null +++ b/lib/fb-param.c @@ -0,0 +1,184 @@ +/* + * UCW Library -- FastIO on files with run-time parametrization + * + * (c) 2007 Pavel Charvat + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/lfs.h" +#include "lib/fastbuf.h" + +#include +#include + +struct fb_params fbpar_def = { + .buffer_size = 65536, + .read_ahead = 1, + .write_back = 1, +}; + +static char * +fbpar_cf_commit(struct fb_params *p UNUSED) +{ +#ifndef CONFIG_UCW_THREADS + if (p->type == FB_DIRECT) + return "Direct I/O is supported only with CONFIG_UCW_THREADS"; +#endif + return NULL; +} + +struct cf_section fbpar_cf = { +# define F(x) PTR_TO(struct fb_params, x) + CF_TYPE(struct fb_params), + CF_COMMIT(fbpar_cf_commit), + CF_ITEMS { + CF_LOOKUP("Type", (int *)F(type), ((char *[]){"std", "direct", "mmap", NULL})), + CF_UNS("BufSize", F(buffer_size)), + CF_UNS("KeepBackBuf", F(keep_back_buf)), + CF_UNS("ReadAhead", F(read_ahead)), + CF_UNS("WriteBack", F(write_back)), + CF_END + } +# undef F +}; + +static struct cf_section fbpar_global_cf = { + CF_ITEMS { + CF_SECTION("Defaults", &fbpar_def, &fbpar_cf), + CF_END + } +}; + +static void CONSTRUCTOR +fbpar_global_init(void) +{ + cf_declare_section("FBParam", &fbpar_global_cf, 0); +} + +static struct fastbuf * +bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name) +{ + char buf[32]; + if (!name) + { + sprintf(buf, "fd%d", fd); + name = buf; + } + struct fastbuf *fb; + switch (params->type) + { +#ifdef CONFIG_UCW_THREADS + case FB_DIRECT: + fb = fbdir_open_fd_internal(fd, name, params->asio, + params->buffer_size ? : fbpar_def.buffer_size, + params->read_ahead ? : fbpar_def.read_ahead, + params->write_back ? : fbpar_def.write_back); + if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0) + msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd); + return fb; +#endif + case FB_STD: + fb = bfdopen_internal(fd, name, + params->buffer_size ? : fbpar_def.buffer_size); + if (params->keep_back_buf) + bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1); + return fb; + case FB_MMAP: + if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0) + die("Cannot get flags of fd %d: %m", fd); + return bfmmopen_internal(fd, name, mode); + default: + ASSERT(0); + } +} + +static struct fastbuf * +bopen_file_internal(const char *name, int mode, struct fb_params *params, int try) +{ + if (!params) + params = &fbpar_def; +#ifdef CONFIG_UCW_THREADS + if (params->type == FB_DIRECT && !fbdir_cheat) + mode |= O_DIRECT; +#endif + if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY) + mode = (mode & ~O_ACCMODE) | O_RDWR; + int fd = sh_open(name, mode, 0666); + if (fd < 0) + if (try) + return NULL; + else + die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name); + struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name); + ASSERT(fb); + if (mode & O_APPEND) + bseek(fb, 0, SEEK_END); + return fb; +} + +struct fastbuf * +bopen_file(const char *name, int mode, struct fb_params *params) +{ + return bopen_file_internal(name, mode, params, 0); +} + +struct fastbuf * +bopen_file_try(const char *name, int mode, struct fb_params *params) +{ + return bopen_file_internal(name, mode, params, 1); +} + +struct fastbuf * +bopen_fd(int fd, struct fb_params *params) +{ + return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL); +} + +/* Function for use by individual file back-ends */ + +void +bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file) +{ + switch (is_temp_file) + { + case 1: + if (unlink(f->name) < 0) + msg(L_ERROR, "unlink(%s): %m", f->name); + case 0: + if (close(fd)) + die("close(%s): %m", f->name); + } +} + +/* Compatibility wrappers */ + +struct fastbuf * +bopen_try(const char *name, uns mode, uns buflen) +{ + return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); +} + +struct fastbuf * +bopen(const char *name, uns mode, uns buflen) +{ + return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); +} + +struct fastbuf * +bfdopen(int fd, uns buflen) +{ + return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); +} + +struct fastbuf * +bfdopen_shared(int fd, uns buflen) +{ + struct fastbuf *f = bfdopen(fd, buflen); + bconfig(f, BCONFIG_IS_TEMP_FILE, 2); + return f; +} diff --git a/lib/fb-pool.c b/lib/fb-pool.c new file mode 100644 index 0000000..e847b7d --- /dev/null +++ b/lib/fb-pool.c @@ -0,0 +1,81 @@ +/* + * UCW Library -- Fast Buffered I/O on Memory Pools + * + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/mempool.h" +#include "lib/fastbuf.h" + +#include +#include + +#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf) + +static void +fbpool_spout(struct fastbuf *b) +{ + if (b->bptr >= b->bufend) + { + uns len = b->bufend - b->buffer; + b->buffer = mp_expand(FB_POOL(b)->mp); + b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp); + b->bstop = b->buffer; + b->bptr = b->buffer + len; + } +} + +void +fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size) +{ + b->mp = mp; + b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size); + b->fb.bufend = b->fb.buffer + mp_avail(mp); +} + +void * +fbpool_end(struct fbpool *b) +{ + return mp_end(b->mp, b->fb.bptr); +} + +void +fbpool_init(struct fbpool *b) +{ + bzero(b, sizeof(*b)); + b->fb.name = ""; + b->fb.spout = fbpool_spout; + b->fb.can_overwrite_buffer = 1; +} + +#ifdef TEST + +int main(void) +{ + struct mempool *mp; + struct fbpool fb; + byte *p; + uns l; + + mp = mp_new(64); + fbpool_init(&fb); + fbpool_start(&fb, mp, 16); + for (uns i = 0; i < 1024; i++) + bprintf(&fb.fb, ""); + p = fbpool_end(&fb); + l = mp_size(mp, p); + if (l != 1024 * 7) + ASSERT(0); + for (uns i = 0; i < 1024; i++) + if (memcmp(p + i * 7, "", 7)) + ASSERT(0); + mp_delete(mp); + + return 0; +} + +#endif diff --git a/lib/fb-temp.c b/lib/fb-temp.c new file mode 100644 index 0000000..1975c8c --- /dev/null +++ b/lib/fb-temp.c @@ -0,0 +1,86 @@ +/* + * UCW Library -- Temporary Fastbufs + * + * (c) 2002--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/fastbuf.h" +#include "lib/threads.h" + +#include +#include +#include + +static char *temp_prefix = "/tmp/temp"; + +static struct cf_section temp_config = { + CF_ITEMS { + CF_STRING("Prefix", &temp_prefix), + CF_END + } +}; + +static void CONSTRUCTOR temp_global_init(void) +{ + cf_declare_section("Tempfiles", &temp_config, 0); +} + +void +temp_file_name(char *buf) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + int cnt = ++ctx->temp_counter; + int pid = getpid(); + if (ctx->thread_id == pid) + sprintf(buf, "%s%d-%d", temp_prefix, pid, cnt); + else + sprintf(buf, "%s%d-%d-%d", temp_prefix, pid, ctx->thread_id, cnt); +} + +struct fastbuf * +bopen_tmp_file(struct fb_params *params) +{ + char name[TEMP_FILE_NAME_LEN]; + temp_file_name(name); + struct fastbuf *fb = bopen_file(name, O_RDWR | O_CREAT | O_TRUNC, params); + bconfig(fb, BCONFIG_IS_TEMP_FILE, 1); + return fb; +} + +struct fastbuf * +bopen_tmp(uns buflen) +{ + return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); +} + +void bfix_tmp_file(struct fastbuf *fb, const char *name) +{ + int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0); + ASSERT(was_temp == 1); + if (rename(fb->name, name)) + die("Cannot rename %s to %s: %m", fb->name, name); + bclose(fb); +} + +#ifdef TEST + +#include "lib/getopt.h" + +int main(int argc, char **argv) +{ + log_init(NULL); + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0) + die("Hey, whaddya want?"); + + struct fastbuf *f = bopen_tmp(65536); + bputsn(f, "Hello, world!"); + bclose(f); + return 0; +} + +#endif diff --git a/lib/ff-binary.c b/lib/ff-binary.c new file mode 100644 index 0000000..81d36df --- /dev/null +++ b/lib/ff-binary.c @@ -0,0 +1,34 @@ +/* + * UCW Library -- Fast Buffered I/O: Binary Numbers + * + * (c) 1997--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/ff-binary.h" + +#define GEN(type, name, size, endian) \ +type bget##name##_##endian##_slow(struct fastbuf *f) \ +{ \ + byte buf[size/8]; \ + if (bread(f, buf, sizeof(buf)) != sizeof(buf)) \ + return ~(type)0; \ + return get_u##size##_##endian(buf); \ +} \ +void bput##name##_##endian##_##slow(struct fastbuf *f, type x) \ +{ \ + byte buf[size/8]; \ + put_u##size##_##endian(buf, x); \ + bwrite_slow(f, buf, sizeof(buf)); \ +} + +#define FF_ALL(type, name, size) GEN(type,name,size,be) GEN(type,name,size,le) + +FF_ALL(int, w, 16) +FF_ALL(uns, l, 32) +FF_ALL(u64, q, 64) +FF_ALL(u64, 5, 40) diff --git a/lib/ff-binary.h b/lib/ff-binary.h new file mode 100644 index 0000000..171ff81 --- /dev/null +++ b/lib/ff-binary.h @@ -0,0 +1,81 @@ +/* + * UCW Library -- Fast Buffered I/O on Binary Values + * + * (c) 1997--2007 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_FF_BINARY_H +#define _UCW_FF_BINARY_H + +#include "lib/fastbuf.h" +#include "lib/unaligned.h" + +#ifdef CPU_BIG_ENDIAN +#define FF_ENDIAN be +#else +#define FF_ENDIAN le +#endif + +#define GET_FUNC(type, name, bits, endian) \ + type bget##name##_##endian##_slow(struct fastbuf *f); \ + static inline type bget##name##_##endian(struct fastbuf *f) \ + { \ + if (bavailr(f) >= bits/8) \ + { \ + type w = get_u##bits##_##endian(f->bptr); \ + f->bptr += bits/8; \ + return w; \ + } \ + else \ + return bget##name##_##endian##_slow(f); \ + } + +#define PUT_FUNC(type, name, bits, endian) \ + void bput##name##_##endian##_slow(struct fastbuf *f, type x); \ + static inline void bput##name##_##endian(struct fastbuf *f, type x) \ + { \ + if (bavailw(f) >= bits/8) \ + { \ + put_u##bits##_##endian(f->bptr, x); \ + f->bptr += bits/8; \ + } \ + else \ + return bput##name##_##endian##_slow(f, x); \ + } + +#define FF_ALL_X(type, name, bits, defendian) \ + GET_FUNC(type, name, bits, be) \ + GET_FUNC(type, name, bits, le) \ + PUT_FUNC(type, name, bits, be) \ + PUT_FUNC(type, name, bits, le) \ + static inline type bget##name(struct fastbuf *f) { return bget##name##_##defendian(f); } \ + static inline void bput##name(struct fastbuf *f, type x) { bput##name##_##defendian(f, x); } + +#define FF_ALL(type, name, bits, defendian) FF_ALL_X(type, name, bits, defendian) + +FF_ALL(int, w, 16, FF_ENDIAN) +FF_ALL(uns, l, 32, FF_ENDIAN) +FF_ALL(u64, q, 64, FF_ENDIAN) +FF_ALL(u64, 5, 40, FF_ENDIAN) + +#undef GET_FUNC +#undef PUT_FUNC +#undef FF_ENDIAN +#undef FF_ALL_X +#undef FF_ALL + +/* I/O on uintptr_t (only native endianity) */ + +#ifdef CPU_64BIT_POINTERS +#define bputa(x,p) bputq(x,p) +#define bgeta(x) bgetq(x) +#else +#define bputa(x,p) bputl(x,p) +#define bgeta(x) bgetl(x) +#endif + +#endif diff --git a/lib/ff-printf.c b/lib/ff-printf.c new file mode 100644 index 0000000..0493092 --- /dev/null +++ b/lib/ff-printf.c @@ -0,0 +1,83 @@ +/* + * UCW Library -- Printf on Fastbuf Streams + * + * (c) 2002--2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include +#include + +int +vbprintf(struct fastbuf *b, const char *msg, va_list args) +{ + byte *buf; + int len, r; + va_list args2; + + len = bdirect_write_prepare(b, &buf); + if (len >= 16) + { + va_copy(args2, args); + r = vsnprintf(buf, len, msg, args2); + va_end(args2); + if (r < 0) + len = 256; + else if (r < len) + { + bdirect_write_commit(b, buf+r); + return r; + } + else + len = r+1; + } + else + len = 256; + + while (1) + { + buf = alloca(len); + va_copy(args2, args); + r = vsnprintf(buf, len, msg, args2); + va_end(args2); + if (r < 0) + len += len; + else if (r < len) + { + bwrite(b, buf, r); + return r; + } + else + len = r+1; + } +} + +int +bprintf(struct fastbuf *b, const char *msg, ...) +{ + va_list args; + int res; + + va_start(args, msg); + res = vbprintf(b, msg, args); + va_end(args); + return res; +} + +#ifdef TEST + +int main(void) +{ + struct fastbuf *b = bfdopen_shared(1, 65536); + for (int i=0; i<10000; i++) + bprintf(b, "13=%d str=<%s> msg=%m\n", 13, "str"); + bclose(b); + return 0; +} + +#endif diff --git a/lib/ff-string.c b/lib/ff-string.c new file mode 100644 index 0000000..913c335 --- /dev/null +++ b/lib/ff-string.c @@ -0,0 +1,286 @@ +/* + * UCW Library -- Fast Buffered I/O: Strings + * + * (c) 1997--2006 Martin Mares + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/mempool.h" +#include "lib/bbuf.h" + +char * /* Non-standard */ +bgets(struct fastbuf *f, char *b, uns l) +{ + ASSERT(l); + byte *src; + uns src_len = bdirect_read_prepare(f, &src); + if (!src_len) + return NULL; + do + { + uns cnt = MIN(l, src_len); + for (uns i = cnt; i--;) + { + byte v = *src++; + if (v == '\n') + { + bdirect_read_commit(f, src); + goto exit; + } + *b++ = v; + } + if (unlikely(cnt == l)) + die("%s: Line too long", f->name); + l -= cnt; + bdirect_read_commit(f, src); + src_len = bdirect_read_prepare(f, &src); + } + while (src_len); +exit: + *b = 0; + return b; +} + +int +bgets_nodie(struct fastbuf *f, char *b, uns l) +{ + ASSERT(l); + byte *src, *start = b; + uns src_len = bdirect_read_prepare(f, &src); + if (!src_len) + return 0; + do + { + uns cnt = MIN(l, src_len); + for (uns i = cnt; i--;) + { + byte v = *src++; + if (v == '\n') + { + bdirect_read_commit(f, src); + goto exit; + } + *b++ = v; + } + bdirect_read_commit(f, src); + if (cnt == l) + return -1; + l -= cnt; + src_len = bdirect_read_prepare(f, &src); + } + while (src_len); +exit: + *b++ = 0; + return b - (char *)start; +} + +uns +bgets_bb(struct fastbuf *f, struct bb_t *bb, uns limit) +{ + ASSERT(limit); + byte *src; + uns src_len = bdirect_read_prepare(f, &src); + if (!src_len) + return 0; + bb_grow(bb, 1); + byte *buf = bb->ptr; + uns len = 0, buf_len = MIN(bb->len, limit); + do + { + uns cnt = MIN(src_len, buf_len); + for (uns i = cnt; i--;) + { + byte v = *src++; + if (v == '\n') + { + bdirect_read_commit(f, src); + goto exit; + } + *buf++ = v; + } + len += cnt; + if (cnt == src_len) + { + bdirect_read_commit(f, src); + src_len = bdirect_read_prepare(f, &src); + } + else + src_len -= cnt; + if (cnt == buf_len) + { + if (unlikely(len == limit)) + die("%s: Line too long", f->name); + bb_do_grow(bb, len + 1); + buf = bb->ptr + len; + buf_len = MIN(bb->len, limit) - len; + } + else + buf_len -= cnt; + } + while (src_len); +exit: + *buf++ = 0; + return buf - bb->ptr; +} + +char * +bgets_mp(struct fastbuf *f, struct mempool *mp) +{ + byte *src; + uns src_len = bdirect_read_prepare(f, &src); + if (!src_len) + return NULL; +#define BLOCK_SIZE (4096 - sizeof(void *)) + struct block { + struct block *prev; + byte data[BLOCK_SIZE]; + } *blocks = NULL; + uns sum = 0, buf_len = BLOCK_SIZE, cnt; + struct block first_block, *new_block = &first_block; + byte *buf = new_block->data; + do + { + cnt = MIN(src_len, buf_len); + for (uns i = cnt; i--;) + { + byte v = *src++; + if (v == '\n') + { + bdirect_read_commit(f, src); + goto exit; + } + *buf++ = v; + } + if (cnt == src_len) + { + bdirect_read_commit(f, src); + src_len = bdirect_read_prepare(f, &src); + } + else + src_len -= cnt; + if (cnt == buf_len) + { + new_block->prev = blocks; + blocks = new_block; + sum += buf_len = BLOCK_SIZE; + new_block = alloca(sizeof(struct block)); + buf = new_block->data; + } + else + buf_len -= cnt; + } + while (src_len); +exit: ; + uns len = buf - new_block->data; + byte *result = mp_alloc(mp, sum + len + 1) + sum; + result[len] = 0; + memcpy(result, new_block->data, len); + while (blocks) + { + result -= BLOCK_SIZE; + memcpy(result, blocks->data, BLOCK_SIZE); + blocks = blocks->prev; + } + return result; +#undef BLOCK_SIZE +} + +void +bgets_stk_init(struct bgets_stk_struct *s) +{ + s->src_len = bdirect_read_prepare(s->f, &s->src); + if (!s->src_len) + { + s->cur_buf = NULL; + s->cur_len = 0; + } + else + { + s->old_buf = NULL; + s->cur_len = 256; + } +} + +void +bgets_stk_step(struct bgets_stk_struct *s) +{ + byte *buf = s->cur_buf; + uns buf_len = s->cur_len; + if (s->old_buf) + { + memcpy( s->cur_buf, s->old_buf, s->old_len); + buf += s->old_len; + buf_len -= s->old_len; + } + do + { + uns cnt = MIN(s->src_len, buf_len); + for (uns i = cnt; i--;) + { + byte v = *s->src++; + if (v == '\n') + { + bdirect_read_commit(s->f, s->src); + goto exit; + } + *buf++ = v; + } + if (cnt == s->src_len) + { + bdirect_read_commit(s->f, s->src); + s->src_len = bdirect_read_prepare(s->f, &s->src); + } + else + s->src_len -= cnt; + if (cnt == buf_len) + { + s->old_len = s->cur_len; + s->old_buf = s->cur_buf; + s->cur_len *= 2; + return; + } + else + buf_len -= cnt; + } + while (s->src_len); +exit: + *buf = 0; + s->cur_len = 0; +} + +char * +bgets0(struct fastbuf *f, char *b, uns l) +{ + ASSERT(l); + byte *src; + uns src_len = bdirect_read_prepare(f, &src); + if (!src_len) + return NULL; + do + { + uns cnt = MIN(l, src_len); + for (uns i = cnt; i--;) + { + *b = *src++; + if (!*b) + { + bdirect_read_commit(f, src); + return b; + } + b++; + } + if (unlikely(cnt == l)) + die("%s: Line too long", f->name); + l -= cnt; + bdirect_read_commit(f, src); + src_len = bdirect_read_prepare(f, &src); + } + while (src_len); + *b = 0; + return b; +} diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c new file mode 100644 index 0000000..6057e24 --- /dev/null +++ b/lib/ff-unicode.c @@ -0,0 +1,346 @@ +/* + * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams + * + * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/unicode.h" +#include "lib/ff-unicode.h" +#include "lib/ff-binary.h" + +/*** UTF-8 ***/ + +int +bget_utf8_slow(struct fastbuf *b, uns repl) +{ + int c = bgetc(b); + int code; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return repl; + if (c >= 0xf0) /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + if (c >= 0xe0) /* 3 bytes */ + { + code = c & 0x0f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + else /* 2 bytes */ + { + code = c & 0x1f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b); + return repl; +} + +int +bget_utf8_32_slow(struct fastbuf *b, uns repl) +{ + int c = bgetc(b); + int code; + int nr; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return repl; + if (c < 0xe0) + { + code = c & 0x1f; + nr = 1; + } + else if (c < 0xf0) + { + code = c & 0x0f; + nr = 2; + } + else if (c < 0xf8) + { + code = c & 0x07; + nr = 3; + } + else if (c < 0xfc) + { + code = c & 0x03; + nr = 4; + } + else if (c < 0xfe) + { + code = c & 0x01; + nr = 5; + } + else /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + while (nr-- > 0) + { + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b); + return repl; +} + +void +bput_utf8_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + bputc(b, 0xe0 | (u >> 12)); + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} + +void +bput_utf8_32_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < (1U<<31)); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + if (u < (1<<16)) + bputc(b, 0xe0 | (u >> 12)); + else + { + if (u < (1<<21)) + bputc(b, 0xf0 | (u >> 18)); + else + { + if (u < (1<<26)) + bputc(b, 0xf8 | (u >> 24)); + else + { + bputc(b, 0xfc | (u >> 30)); + bputc(b, 0x80 | ((u >> 24) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 18) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 12) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} + +/*** UTF-16 ***/ + +int +bget_utf16_be_slow(struct fastbuf *b, uns repl) +{ + if (bpeekc(b) < 0) + return -1; + uns u = bgetw_be(b), x, y; + if ((int)u < 0) + return repl; + if ((x = u - 0xd800) >= 0x800) + return u; + if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400) + return repl; + return 0x10000 + (x << 10) + y; +} + +int +bget_utf16_le_slow(struct fastbuf *b, uns repl) +{ + if (bpeekc(b) < 0) + return -1; + uns u = bgetw_le(b), x, y; + if ((int)u < 0) + return repl; + if ((x = u - 0xd800) >= 0x800) + return u; + if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400) + return repl; + return 0x10000 + (x << 10) + y; +} + +void +bput_utf16_be_slow(struct fastbuf *b, uns u) +{ + if (u < 0xd800 || (u < 0x10000 && u >= 0xe000)) + { + bputc(b, u >> 8); + bputc(b, u & 0xff); + } + else if ((u -= 0x10000) < 0x100000) + { + bputc(b, 0xd8 | (u >> 18)); + bputc(b, (u >> 10) & 0xff); + bputc(b, 0xdc | ((u >> 8) & 0x3)); + bputc(b, u & 0xff); + } + else + ASSERT(0); +} + +void +bput_utf16_le_slow(struct fastbuf *b, uns u) +{ + if (u < 0xd800 || (u < 0x10000 && u >= 0xe000)) + { + bputc(b, u & 0xff); + bputc(b, u >> 8); + } + else if ((u -= 0x10000) < 0x100000) + { + bputc(b, (u >> 10) & 0xff); + bputc(b, 0xd8 | (u >> 18)); + bputc(b, u & 0xff); + bputc(b, 0xdc | ((u >> 8) & 0x3)); + } + else + ASSERT(0); +} + +#ifdef TEST + +#include +#include + +int main(int argc, char **argv) +{ +#define FUNCS \ + F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \ + F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE) + + enum { +#define F(x) FUNC_##x, + FUNCS +#undef F + }; + char *names[] = { +#define F(x) [FUNC_##x] = #x, + FUNCS +#undef F + }; + + uns func = ~0U; + if (argc > 1) + for (uns i = 0; i < ARRAY_SIZE(names); i++) + if (!strcasecmp(names[i], argv[1])) + func = i; + if (!~func) + { + fprintf(stderr, "Invalid usage!\n"); + return 1; + } + + struct fastbuf *b = fbgrow_create(8); + if (func < FUNC_BPUT_UTF8) + { + uns u; + while (scanf("%x", &u) == 1) + bputc(b, u); + fbgrow_rewind(b); + while (bpeekc(b) >= 0) + { + if (btell(b)) + putchar(' '); + switch (func) + { + case FUNC_BGET_UTF8: + u = bget_utf8_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF8_32: + u = bget_utf8_32_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF16_BE: + u = bget_utf16_be_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF16_LE: + u = bget_utf16_le_slow(b, UNI_REPLACEMENT); + break; + default: + ASSERT(0); + } + printf("%04x", u); + } + putchar('\n'); + } + else + { + uns u, i = 0; + while (scanf("%x", &u) == 1) + { + switch (func) + { + case FUNC_BPUT_UTF8: + bput_utf8_slow(b, u); + break; + case FUNC_BPUT_UTF8_32: + bput_utf8_32_slow(b, u); + break; + case FUNC_BPUT_UTF16_BE: + bput_utf16_be_slow(b, u); + break; + case FUNC_BPUT_UTF16_LE: + bput_utf16_le_slow(b, u); + break; + default: + ASSERT(0); + } + fbgrow_rewind(b); + u = 0; + while (bpeekc(b) >= 0) + { + if (i++) + putchar(' '); + printf("%02x", bgetc(b)); + } + fbgrow_reset(b); + } + putchar('\n'); + } + bclose(b); + + return 0; +} + +#endif diff --git a/lib/ff-unicode.h b/lib/ff-unicode.h new file mode 100644 index 0000000..af39bbd --- /dev/null +++ b/lib/ff-unicode.h @@ -0,0 +1,144 @@ +/* + * UCW Library: Reading and writing of UTF-8 and UTF-16 on Fastbuf Streams + * + * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_FF_UNICODE_H +#define _UCW_FF_UNICODE_H + +#include "lib/fastbuf.h" +#include "lib/unicode.h" + +/*** UTF-8 ***/ + +int bget_utf8_slow(struct fastbuf *b, uns repl); +int bget_utf8_32_slow(struct fastbuf *b, uns repl); +void bput_utf8_slow(struct fastbuf *b, uns u); +void bput_utf8_32_slow(struct fastbuf *b, uns u); + +static inline int +bget_utf8_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 3) + { + b->bptr = utf8_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf8_slow(b, repl); +} + +static inline int +bget_utf8_32_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 6) + { + b->bptr = utf8_32_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf8_32_slow(b, repl); +} + +static inline int +bget_utf8(struct fastbuf *b) +{ + return bget_utf8_repl(b, UNI_REPLACEMENT); +} + +static inline int +bget_utf8_32(struct fastbuf *b) +{ + return bget_utf8_32_repl(b, UNI_REPLACEMENT); +} + +static inline void +bput_utf8(struct fastbuf *b, uns u) +{ + if (bavailw(b) >= 3) + b->bptr = utf8_put(b->bptr, u); + else + bput_utf8_slow(b, u); +} + +static inline void +bput_utf8_32(struct fastbuf *b, uns u) +{ + if (bavailw(b) >= 6) + b->bptr = utf8_32_put(b->bptr, u); + else + bput_utf8_32_slow(b, u); +} + +/*** UTF-16 ***/ + +int bget_utf16_be_slow(struct fastbuf *b, uns repl); +int bget_utf16_le_slow(struct fastbuf *b, uns repl); +void bput_utf16_be_slow(struct fastbuf *b, uns u); +void bput_utf16_le_slow(struct fastbuf *b, uns u); + +static inline int +bget_utf16_be_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 4) + { + b->bptr = utf16_be_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf16_be_slow(b, repl); +} + +static inline int +bget_utf16_le_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 4) + { + b->bptr = utf16_le_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf16_le_slow(b, repl); +} + +static inline int +bget_utf16_be(struct fastbuf *b) +{ + return bget_utf16_be_repl(b, UNI_REPLACEMENT); +} + +static inline int +bget_utf16_le(struct fastbuf *b) +{ + return bget_utf16_le_repl(b, UNI_REPLACEMENT); +} + +static inline void +bput_utf16_be(struct fastbuf *b, uns u) +{ + if (bavailw(b) >= 4) + b->bptr = utf16_be_put(b->bptr, u); + else + bput_utf16_be_slow(b, u); +} + +static inline void +bput_utf16_lbe(struct fastbuf *b, uns u) +{ + if (bavailw(b) >= 4) + b->bptr = utf16_le_put(b->bptr, u); + else + bput_utf16_le_slow(b, u); +} + +#endif diff --git a/lib/ff-unicode.t b/lib/ff-unicode.t new file mode 100644 index 0000000..2a8b3df --- /dev/null +++ b/lib/ff-unicode.t @@ -0,0 +1,41 @@ +# Tests for the Unicode module + +Name: bput_utf8 +Run: ../obj/lib/ff-unicode-t bput_utf8 +In: 0041 0048 004f 004a +Out: 41 48 4f 4a + +Name: bget_utf8_32 +Run: ../obj/lib/ff-unicode-t bget_utf8_32 +In: fe 83 81 +Out: fffc + +Name: bput_utf16_be +Run: ../obj/lib/ff-unicode-t bput_utf16_be +In: 0041 004a 2a5f feff 0000 10ffff ffff 10000 +Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00 + +Name: bput_utf16_le +Run: ../obj/lib/ff-unicode-t bput_utf16_le +In: 0041 004a 2a5f feff 0000 10ffff ffff 10000 +Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc + +Name: bget_utf16_be (1) +Run: ../obj/lib/ff-unicode-t bget_utf16_be +In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00 +Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000 + +Name: bget_utf16_be (2) +Run: ../obj/lib/ff-unicode-t bget_utf16_be +In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01 +Out: fffc 2a5f fffc 2a5f fffc + +Name: bget_utf16_le (1) +Run: ../obj/lib/ff-unicode-t bget_utf16_le +In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc +Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000 + +Name: bget_utf16_le (2) +Run: ../obj/lib/ff-unicode-t bget_utf16_le +In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8 +Out: fffc 2a5f fffc 2a5f fffc diff --git a/lib/ff-utf8.h b/lib/ff-utf8.h new file mode 100644 index 0000000..af7543f --- /dev/null +++ b/lib/ff-utf8.h @@ -0,0 +1,15 @@ +/* + * UCW Library: An alias for lib/ff-unicode.h (for backwards compatibility) + * + * (c) 2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_FF_UTF8_H +#define _UCW_FF_UTF8_H + +#include "lib/ff-unicode.h" + +#endif diff --git a/lib/gbuf.h b/lib/gbuf.h new file mode 100644 index 0000000..daf0bfb --- /dev/null +++ b/lib/gbuf.h @@ -0,0 +1,71 @@ +/* + * UCW Library -- A simple growing buffer + * + * (c) 2004, Robert Spalek + * (c) 2005, Martin Mares + * + * Define the following macros: + * + * GBUF_TYPE data type of records stored in the buffer + * GBUF_PREFIX(x) add a name prefix to all global symbols + * GBUF_TRACE(msg...) log growing of buffer [optional] + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#define BUF_T GBUF_PREFIX(t) + +typedef struct BUF_T +{ + uns len; + GBUF_TYPE *ptr; +} +BUF_T; + +static inline void +GBUF_PREFIX(init)(BUF_T *b) +{ + b->ptr = NULL; + b->len = 0; +} + +static void UNUSED +GBUF_PREFIX(done)(BUF_T *b) +{ + if (b->ptr) + xfree(b->ptr); + b->ptr = NULL; + b->len = 0; +} + +static void UNUSED +GBUF_PREFIX(set_size)(BUF_T *b, uns len) +{ + b->len = len; + b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE)); +#ifdef GBUF_TRACE + GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len); +#endif +} + +static void UNUSED +GBUF_PREFIX(do_grow)(BUF_T *b, uns len) +{ + if (len < 2*b->len) // to ensure logarithmic cost + len = 2*b->len; + GBUF_PREFIX(set_size)(b, len); +} + +static inline GBUF_TYPE * +GBUF_PREFIX(grow)(BUF_T *b, uns len) +{ + if (unlikely(len > b->len)) + GBUF_PREFIX(do_grow)(b, len); + return b->ptr; +} + +#undef GBUF_TYPE +#undef GBUF_PREFIX +#undef GBUF_TRACE +#undef BUF_T diff --git a/lib/getopt.c b/lib/getopt.c new file mode 100644 index 0000000..a15c935 --- /dev/null +++ b/lib/getopt.c @@ -0,0 +1,57 @@ +#include "lib/lib.h" +#include "lib/getopt.h" + +void +reset_getopt(void) +{ + // Should work on GNU libc + optind = 0; +} + +#ifdef TEST +#include + +static void +parse(int argc, char **argv) +{ + static struct option longopts[] = { + { "longa", 0, 0, 'a' }, + { "longb", 0, 0, 'b' }, + { "longc", 1, 0, 'c' }, + { "longd", 1, 0, 'd' }, + { 0, 0, 0, 0 } + }; + int opt; + while ((opt = getopt_long(argc, argv, "abc:d:", longopts, NULL)) >= 0) + switch (opt) + { + case 'a': + case 'b': + printf("option %c\n", opt); + break; + case 'c': + case 'd': + printf("option %c with value `%s'\n", opt, optarg); + break; + case '?': + printf("unknown option\n"); + break; + default: + printf("getopt returned unexpected char 0x%02x\n", opt); + break; + } + if (optind != argc) + printf("%d nonoption arguments\n", argc - optind); +} + +int +main(int argc, char **argv) +{ + opterr = 0; + parse(argc, argv); + printf("reset\n"); + reset_getopt(); + parse(argc, argv); + return 0; +} +#endif diff --git a/lib/getopt.h b/lib/getopt.h new file mode 100644 index 0000000..b4ff823 --- /dev/null +++ b/lib/getopt.h @@ -0,0 +1,92 @@ +/* + * UCW Library -- Parsing of configuration and command-line options + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_GETOPT_H +#define _UCW_GETOPT_H + +#ifdef CONFIG_OWN_GETOPT +#include "lib/getopt/getopt-sh.h" +#else +#include +#endif + +void reset_getopt(void); + +/* Safe loading and reloading of configuration files: conf-input.c */ + +extern char *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */ +extern char *cf_env_file; /* ENV_VAR_CONFIG */ +int cf_reload(const char *file); +int cf_load(const char *file); +int cf_set(const char *string); + +/* Direct access to configuration items: conf-intr.c */ + +#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \ + T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY) + /* Closing brace finishes previous block. + * Basic attributes (static, dynamic, parsed) can be used with SET. + * Dynamic arrays can be used with SET, APPEND, PREPEND. + * Sections can be used with SET. + * Lists can be used with everything. */ +#define T(x) OP_##x, +enum cf_operation { CF_OPERATIONS }; +#undef T + +struct cf_item; +char *cf_find_item(const char *name, struct cf_item *item); +char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars); + +/* Debug dumping: conf-dump.c */ + +struct fastbuf; +void cf_dump_sections(struct fastbuf *fb); + +/* Journaling control: conf-journal.c */ + +struct cf_journal_item; +struct cf_journal_item *cf_journal_new_transaction(uns new_pool); +void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj); +void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj); + +/* + * cf_getopt() takes care of parsing the command-line arguments, loading the + * default configuration file (cf_def_file) and processing configuration options. + * The calling convention is the same as with GNU getopt_long(), but you must prefix + * your own short/long options by the CF_(SHORT|LONG)_OPTS or pass CF_NO_LONG_OPTS + * of there are no long options. + * + * The default configuration file can be overriden by the --config options, + * which must come first. During parsing of all other options, the configuration + * is already available. + */ + +#define CF_SHORT_OPTS "C:S:" +#define CF_LONG_OPTS {"config", 1, 0, 'C'}, {"set", 1, 0, 'S'}, CF_LONG_OPTS_DEBUG +#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } } +#ifndef CF_USAGE_TAB +#define CF_USAGE_TAB "" +#endif +#define CF_USAGE \ +"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\ +-S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG + +#ifdef CONFIG_DEBUG +#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } , +#define CF_USAGE_DEBUG " --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n" +#else +#define CF_LONG_OPTS_DEBUG +#define CF_USAGE_DEBUG +#endif + +// conf-input.c +int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index); + +#endif diff --git a/lib/getopt.t b/lib/getopt.t new file mode 100644 index 0000000..79e7fe2 --- /dev/null +++ b/lib/getopt.t @@ -0,0 +1,21 @@ +# Tests for getopt + +Run: ../obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3 +Out: option a + option b + option c with value `2819' + option d with value `-a' + 3 nonoption arguments + reset + option a + option b + option c with value `2819' + option d with value `-a' + 3 nonoption arguments + +Run: ../obj/lib/getopt-t -a -x +Out: option a + unknown option + reset + option a + unknown option diff --git a/lib/getopt/Makefile b/lib/getopt/Makefile new file mode 100644 index 0000000..97692a2 --- /dev/null +++ b/lib/getopt/Makefile @@ -0,0 +1,5 @@ +# Makefile for the UCW GetOpt Library (c) 2007 Pavel Charvat + +DIRS+=lib/getopt + +LIBUCW_MODS+=getopt/getopt-sh diff --git a/lib/getopt/README b/lib/getopt/README new file mode 100644 index 0000000..fe24015 --- /dev/null +++ b/lib/getopt/README @@ -0,0 +1,12 @@ +This directory contains getopt routines from the GNU libc 2.5. +We need this as a fallback for our reset_getopt(), because there is +no standardized interface for such instruction. + +They are distributed under the GNU LGPL. + +All files are exact copies of the original distribution with very +few exceptions commented with `// SHERLOCK' prefix. +I only provided my own getopt-sh.c, getopt-sh.h and Makefile. + + Pavel Charvat, 2007 + diff --git a/lib/getopt/getopt-sh.c b/lib/getopt/getopt-sh.c new file mode 100644 index 0000000..0fe271b --- /dev/null +++ b/lib/getopt/getopt-sh.c @@ -0,0 +1,4 @@ +#include "getopt-sh.h" +#include "getopt_int.h" +#include "getopt.c" +#include "getopt1.c" diff --git a/lib/getopt/getopt-sh.h b/lib/getopt/getopt-sh.h new file mode 100644 index 0000000..4597aca --- /dev/null +++ b/lib/getopt/getopt-sh.h @@ -0,0 +1,14 @@ +#ifndef _UCW_GETOPT_GETOPT_SH_H +#define _UCW_GETOPT_GETOPT_SH_H + +#define getopt sh_getopt +#define getopt_long sh_getopt_long +#define getopt_long_only sh_getopt_longonly +#define optarg sh_optarg +#define optind sh_optind +#define opterr sh_opterr +#define optopt sh_optopt + +#include "lib/getopt/getopt.h" + +#endif diff --git a/lib/getopt/getopt.c b/lib/getopt/getopt.c new file mode 100644 index 0000000..1e13775 --- /dev/null +++ b/lib/getopt/getopt.c @@ -0,0 +1,1226 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +//# define ELIDE_CODE // SHERLOCK: disabled +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#include + +#ifdef VMS +# include +#endif + +#ifdef _LIBC +# include +#else +//# include "gettext.h" // SHERLOCK: replaced by +# include +# define _(msgid) gettext (msgid) +#endif + +#if defined _LIBC && defined USE_IN_LIBIO +# include +#endif + +#ifndef attribute_hidden +# define attribute_hidden +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" +#include "getopt_int.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Keep a global copy of all internal members of getopt_data. */ + +static struct _getopt_data getopt_data; + + +#ifndef __GNU_LIBRARY__ + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +#endif /* not __GNU_LIBRARY__ */ + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (d->__nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (char **argv, struct _getopt_data *d) +{ + int bottom = d->__first_nonopt; + int middle = d->__last_nonopt; + int top = d->optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + d->__nonoption_flags_max_len), + '\0', top + 1 - d->__nonoption_flags_max_len); + d->__nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + d->__first_nonopt += (d->optind - d->__last_nonopt); + d->__last_nonopt = d->optind; +} + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (int argc, char *const *argv, const char *optstring, + struct _getopt_data *d) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + d->__first_nonopt = d->__last_nonopt = d->optind; + + d->__nextchar = NULL; + + d->__posixly_correct = !!getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + d->__ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + d->__ordering = REQUIRE_ORDER; + ++optstring; + } + else if (d->__posixly_correct) + d->__ordering = REQUIRE_ORDER; + else + d->__ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (!d->__posixly_correct + && argc == __libc_argc && argv == __libc_argv) + { + if (d->__nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + d->__nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = d->__nonoption_flags_max_len = strlen (orig_str); + if (d->__nonoption_flags_max_len < argc) + d->__nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (d->__nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + d->__nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', d->__nonoption_flags_max_len - len); + } + } + d->__nonoption_flags_len = d->__nonoption_flags_max_len; + } + else + d->__nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal_r (int argc, char *const *argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d) +{ + int print_errors = d->opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + d->optarg = NULL; + + if (d->optind == 0 || !d->__initialized) + { + if (d->optind == 0) + d->optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring, d); + d->__initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \ + || (d->optind < d->__nonoption_flags_len \ + && __getopt_nonoption_flags[d->optind] == '1')) +#else +# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') +#endif + + if (d->__nextchar == NULL || *d->__nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (d->__last_nonopt > d->optind) + d->__last_nonopt = d->optind; + if (d->__first_nonopt > d->optind) + d->__first_nonopt = d->optind; + + if (d->__ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__last_nonopt != d->optind) + d->__first_nonopt = d->optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (d->optind < argc && NONOPTION_P) + d->optind++; + d->__last_nonopt = d->optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (d->optind != argc && !strcmp (argv[d->optind], "--")) + { + d->optind++; + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__first_nonopt == d->__last_nonopt) + d->__first_nonopt = d->optind; + d->__last_nonopt = argc; + + d->optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (d->optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (d->__first_nonopt != d->__last_nonopt) + d->optind = d->__first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (d->__ordering == REQUIRE_ORDER) + return -1; + d->optarg = argv[d->optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + d->__nextchar = (argv[d->optind] + 1 + + (longopts != NULL && argv[d->optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[d->optind][1] == '-' + || (long_only && (argv[d->optind][2] + || !strchr (optstring, argv[d->optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[d->optind]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[d->optind]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + d->optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + d->optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (argv[d->optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[d->optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[d->optind - 1][0], + pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + + d->__nextchar += strlen (d->__nextchar); + + d->optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[d->optind][1] == '-' + || strchr (optstring, *d->__nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (argv[d->optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], d->__nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], d->__nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[d->optind][0], d->__nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[d->optind][0], d->__nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + d->__nextchar = (char *) ""; + d->optind++; + d->optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *d->__nextchar++; + char *temp = strchr (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*d->__nextchar == '\0') + ++d->optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + int n; +#endif + + if (d->__posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + n = __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (n >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#endif + } + d->optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `d->optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '='; + nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[d->optind]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[d->optind]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + d->__nextchar += strlen (d->__nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 + |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[d->optind - 1]); +#endif + } + d->__nextchar += strlen (d->__nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + d->__nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + d->optind++; + } + else + d->optarg = NULL; + d->__nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + if (__asprintf (&buf, _("\ +%s: option requires an argument -- %c\n"), + argv[0], c) >= 0) + { + _IO_flockfile (stderr); + + int old_flags2 = ((_IO_FILE *) stderr)->_flags2; + ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; + + __fxprintf (NULL, "%s", buf); + + ((_IO_FILE *) stderr)->_flags2 = old_flags2; + _IO_funlockfile (stderr); + + free (buf); + } +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + d->__nextchar = NULL; + } + } + return c; + } +} + +int +_getopt_internal (int argc, char *const *argv, const char *optstring, + const struct option *longopts, int *longind, int long_only) +{ + int result; + + getopt_data.optind = optind; + getopt_data.opterr = opterr; + + result = _getopt_internal_r (argc, argv, optstring, longopts, + longind, long_only, &getopt_data); + + optind = getopt_data.optind; + optarg = getopt_data.optarg; + optopt = getopt_data.optopt; + + return result; +} + +int +getopt (int argc, char *const *argv, const char *optstring) +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == -1) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/lib/getopt/getopt.h b/lib/getopt/getopt.h new file mode 100644 index 0000000..b7a026c --- /dev/null +++ b/lib/getopt/getopt.h @@ -0,0 +1,177 @@ +/* Declarations for getopt. + Copyright (C) 1989-1994,1996-1999,2001,2003,2004 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifndef __THROW +# ifndef __GNUC_PREREQ +# define __GNUC_PREREQ(maj, min) (0) +# endif +# if defined __cplusplus && __GNUC_PREREQ (2,8) +# define __THROW throw () +# else +# define __THROW +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ + const char *name; + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) + __THROW; +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* __GNU_LIBRARY__ */ + +#ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW; +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW; + +#endif + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ diff --git a/lib/getopt/getopt1.c b/lib/getopt/getopt1.c new file mode 100644 index 0000000..0ea9536 --- /dev/null +++ b/lib/getopt/getopt1.c @@ -0,0 +1,192 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _LIBC +# include +#else +# include "getopt.h" +#endif +#include "getopt_int.h" + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +#include +#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +//#define ELIDE_CODE // SHERLOCK: disabled +#endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +int +_getopt_long_r (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 0, d); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + +int +_getopt_long_only_r (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 1, d); +} + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +#include + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/lib/getopt/getopt_init.c b/lib/getopt/getopt_init.c new file mode 100644 index 0000000..d460098 --- /dev/null +++ b/lib/getopt/getopt_init.c @@ -0,0 +1,75 @@ +/* Perform additional initialization for getopt functions in GNU libc. + Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef USE_NONOPTION_FLAGS +/* Attention: this file is *not* necessary when the GNU getopt functions + are used outside the GNU libc. Some additional functionality of the + getopt functions in GNU libc require this additional work. */ + +#include +#include +#include +#include + +#include + +/* Variable to synchronize work. */ +char *__getopt_nonoption_flags; + + +/* Remove the environment variable "__GNU_nonoption_argv_flags_" if + it is still available. If the getopt functions are also used in the + application it does not exist anymore since it was saved for the use + in getopt. */ +void +__getopt_clean_environment (char **env) +{ + /* Bash 2.0 puts a special variable in the environment for each + command it runs, specifying which ARGV elements are the results + of file name wildcard expansion and therefore should not be + considered as options. */ + static const char envvar_tail[] = "_GNU_nonoption_argv_flags_="; + char var[50]; + char *cp, **ep; + size_t len; + + /* Construct the "__GNU_nonoption_argv_flags_=" string. We must + not use `sprintf'. */ + cp = memcpy (&var[sizeof (var) - sizeof (envvar_tail)], envvar_tail, + sizeof (envvar_tail)); + cp = _itoa_word (__getpid (), cp, 10, 0); + /* Note: we omit adding the leading '_' since we explicitly test for + it before calling strncmp. */ + len = (var + sizeof (var) - 1) - cp; + + for (ep = env; *ep != NULL; ++ep) + if ((*ep)[0] == '_' + && __builtin_expect (strncmp (*ep + 1, cp, len) == 0, 0)) + { + /* Found it. Store this pointer and move later ones back. */ + char **dp = ep; + __getopt_nonoption_flags = &(*ep)[len]; + do + dp[0] = dp[1]; + while (*dp++); + /* Continue the loop in case the name appears again. */ + } +} +#endif /* USE_NONOPTION_FLAGS */ diff --git a/lib/getopt/getopt_int.h b/lib/getopt/getopt_int.h new file mode 100644 index 0000000..d982c72 --- /dev/null +++ b/lib/getopt/getopt_int.h @@ -0,0 +1,130 @@ +/* Internal declarations for getopt. + Copyright (C) 1989-1994,1996-1999,2001,2003,2004 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _GETOPT_INT_H +#define _GETOPT_INT_H 1 + +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); + + +/* Reentrant versions which can handle parsing multiple argument + vectors at the same time. */ + +/* Data type for reentrant functions. */ +struct _getopt_data +{ + /* These have exactly the same meaning as the corresponding global + variables, except that they are used for the reentrant + versions of getopt. */ + int optind; + int opterr; + int optopt; + char *optarg; + + /* Internal members. */ + + /* True if the internal members have been initialized. */ + int __initialized; + + /* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + char *__nextchar; + + /* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we + scan, so that eventually all the non-options are at the end. + This allows options to be given in any order, even with programs + that were not written to expect this. + + RETURN_IN_ORDER is an option available to programs that were + written to expect options and other ARGV-elements in any order + and that care about the ordering of the two. We describe each + non-option ARGV-element as if it were the argument of an option + with character code 1. Using `-' as the first character of the + list of option characters selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + + enum + { + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER + } __ordering; + + /* If the POSIXLY_CORRECT environment variable is set. */ + int __posixly_correct; + + + /* Handle permutation of arguments. */ + + /* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first + of them; `last_nonopt' is the index after the last of them. */ + + int __first_nonopt; + int __last_nonopt; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + int __nonoption_flags_max_len; + int __nonoption_flags_len; +# endif +}; + +/* The initializer is necessary to set OPTIND and OPTERR to their + default values and to clear the initialization flag. */ +#define _GETOPT_DATA_INITIALIZER { 1, 1 } + +extern int _getopt_internal_r (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, struct _getopt_data *__data); + +extern int _getopt_long_r (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + struct _getopt_data *__data); + +extern int _getopt_long_only_r (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, + int *__longind, + struct _getopt_data *__data); + +#endif /* getopt_int.h */ diff --git a/lib/hash-test.c b/lib/hash-test.c new file mode 100644 index 0000000..803a030 --- /dev/null +++ b/lib/hash-test.c @@ -0,0 +1,319 @@ +/* Tests for hash table routines */ + +#include "lib/lib.h" +#include "lib/mempool.h" + +#include +#include +#include + +/* TEST 1: integers */ + +struct node1 { + int key; + int data; +}; + +#define HASH_NODE struct node1 +#define HASH_PREFIX(x) test1_##x +#define HASH_KEY_ATOMIC key +#define HASH_ATOMIC_TYPE int +#define HASH_ZERO_FILL + +#define HASH_GIVE_INIT_DATA +static inline void test1_init_data(struct node1 *n) +{ + n->data = n->key + 123; +} + +#define HASH_WANT_FIND +#define HASH_WANT_LOOKUP +#define HASH_WANT_REMOVE + +#include "lib/hashtable.h" + +static void test1(void) +{ + int i; + + test1_init(); + for (i=0; i<1024; i++) + { + struct node1 *n = test1_lookup(i); + ASSERT(n->data == i+123); + } + for (i=1; i<1024; i+=2) + { + struct node1 *n = test1_lookup(i); + test1_remove(n); + } + for (i=0; i<1024; i++) + { + struct node1 *n = test1_find(i); + if (!n != (i&1) || (n && n->data != i+123)) + die("Inconsistency at i=%d", i); + } + i=0; + HASH_FOR_ALL(test1, n) + { + i += 1 + n->key; + } + HASH_END_FOR; + ASSERT(i == 262144); + puts("OK"); +} + +/* TEST 2: external strings */ + +struct node2 { + char *key; + int data; +}; + +#define HASH_NODE struct node2 +#define HASH_PREFIX(x) test2_##x +#define HASH_KEY_STRING key +#define HASH_NOCASE +#define HASH_AUTO_POOL 4096 + +#define HASH_WANT_FIND +#define HASH_WANT_NEW + +#include "lib/hashtable.h" + +static void test2(void) +{ + int i; + + test2_init(); + for (i=0; i<1024; i+=2) + { + char x[32]; + sprintf(x, "abc%d", i); + test2_new(xstrdup(x)); + } + for (i=0; i<1024; i++) + { + char x[32]; + struct node2 *n; + sprintf(x, "ABC%d", i); + n = test2_find(x); + if (!n != (i&1)) + die("Inconsistency at i=%d", i); + } + puts("OK"); +} + +/* TEST 3: internal strings + pools */ + +static struct mempool *pool3; + +struct node3 { + int data; + char key[1]; +}; + +#define HASH_NODE struct node3 +#define HASH_PREFIX(x) test3_##x +#define HASH_KEY_ENDSTRING key + +#define HASH_WANT_FIND +#define HASH_WANT_NEW + +#define HASH_USE_POOL pool3 + +#include "lib/hashtable.h" + +static void test3(void) +{ + int i; + + pool3 = mp_new(16384); + test3_init(); + for (i=0; i<1048576; i+=2) + { + char x[32]; + sprintf(x, "abc%d", i); + test3_new(x); + } + for (i=0; i<1048576; i++) + { + char x[32]; + struct node3 *n; + sprintf(x, "abc%d", i); + n = test3_find(x); + if (!n != (i&1)) + die("Inconsistency at i=%d", i); + } + puts("OK"); +} + +/* TEST 4: complex keys */ + +#include "lib/hashfunc.h" + +struct node4 { + int port; + int data; + char host[1]; +}; + +#define HASH_NODE struct node4 +#define HASH_PREFIX(x) test4_##x +#define HASH_KEY_COMPLEX(x) x host, x port +#define HASH_KEY_DECL char *host, int port + +#define HASH_WANT_CLEANUP +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_WANT_LOOKUP +#define HASH_WANT_DELETE +#define HASH_WANT_REMOVE + +#define HASH_GIVE_HASHFN +static uns test4_hash(char *host, int port) +{ + return hash_string_nocase(host) ^ hash_u32(port); +} + +#define HASH_GIVE_EQ +static inline int test4_eq(char *host1, int port1, char *host2, int port2) +{ + return !strcasecmp(host1,host2) && port1 == port2; +} + +#define HASH_GIVE_EXTRA_SIZE +static inline uns test4_extra_size(char *host, int port UNUSED) +{ + return strlen(host); +} + +#define HASH_GIVE_INIT_KEY +static inline void test4_init_key(struct node4 *n, char *host, int port) +{ + strcpy(n->host, host); + n->port = port; +} + +#include "lib/hashtable.h" + +static void test4(void) +{ + int i; + char x[32]; + struct node4 *n; + + test4_init(); + for (i=0; i<1024; i++) + if ((i % 3) == 0) + { + sprintf(x, "abc%d", i); + n = test4_new(x, i%10); + n->data = i; + } + for (i=0; i<1024; i++) + { + sprintf(x, "abc%d", i); + n = test4_lookup(x, i%10); + n->data = i; + } + for (i=0; i<1024; i++) + if (i % 2) + { + sprintf(x, "aBc%d", i); + if ((i % 7) < 3) + { + n = test4_find(x, i%10); + ASSERT(n); + test4_remove(n); + } + else + test4_delete(x, i%10); + } + for (i=0; i<1024; i++) + { + sprintf(x, "ABC%d", i); + n = test4_find(x, i%10); + if (!n != (i&1) || (n && n->data != i)) + die("Inconsistency at i=%d", i); + } + test4_cleanup(); + puts("OK"); +} + +/* TEST 5: integers again, but this time dynamically */ + +struct node5 { + int key; + int data; +}; + +#define HASH_NODE struct node5 +#define HASH_PREFIX(x) test5_##x +#define HASH_KEY_ATOMIC key +#define HASH_ATOMIC_TYPE int +#define HASH_TABLE_DYNAMIC + +struct test5_table; + +#define HASH_GIVE_INIT_DATA +static inline void test5_init_data(struct test5_table *table UNUSED, struct node5 *n) +{ + n->data = n->key + 123; +} + +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_WANT_DELETE + +#include "lib/hashtable.h" + +static void test5(void) +{ + int i; + struct test5_table tab; + + test5_init(&tab); + for (i=0; i<1024; i++) + { + struct node5 *n = test5_new(&tab, i); + ASSERT(n->data == i+123); + } + for (i=1; i<1024; i+=2) + test5_delete(&tab, i); + for (i=0; i<1024; i++) + { + struct node5 *n = test5_find(&tab, i); + if (!n != (i&1) || (n && n->data != i+123)) + die("Inconsistency at i=%d", i); + } + i=0; + HASH_FOR_ALL_DYNAMIC(test5, &tab, n) + i += 1 + n->key; + HASH_END_FOR; + ASSERT(i == 262144); + puts("OK"); +} + +int +main(int argc, char **argv) +{ + uns m = ~0U; + if (argc > 1) + { + m = 0; + for (int i=1; i + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/hashfunc.h" +#include "lib/chartype.h" + +/* The number of bits the hash in the function hash_*() is rotated by after + * every pass. It should be prime with the word size. */ +#define SHIFT_BITS 7 + +/* A bit-mask which clears higher bytes than a given threshold. */ +static uns mask_higher_bits[sizeof(uns)]; + +static void CONSTRUCTOR +hashfunc_init(void) +{ + uns i, j; + byte *str; + for (i=0; i= sizeof(uns)) + { + hash = ROL(hash, SHIFT_BITS) ^ *u++; + len -= sizeof(uns); + } + hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]); + return hash; +} + +#ifndef CPU_ALLOW_UNALIGNED +uns +str_len(const char *str) +{ + uns shift = UNALIGNED_PART(str, uns); + if (!shift) + return str_len_aligned(str); + else + { + uns i; + shift = sizeof(uns) - shift; + for (i=0; i= len) + break; + hash ^= str[i] << (shift * 8); + } + return hash; + } +} +#endif + +uns +hash_string_nocase(const char *str) +{ + const byte *s = str; + uns hash = 0; + uns i; + for (i=0; ; i++) + { + uns modulo = i % sizeof(uns); + uns shift; +#ifdef CPU_LITTLE_ENDIAN + shift = modulo; +#else + shift = sizeof(uns) - 1 - modulo; +#endif + if (!modulo) + hash = ROL(hash, SHIFT_BITS); + if (!s[i]) + break; + hash ^= Cupcase(s[i]) << (shift * 8); + } + return hash; +} diff --git a/lib/hashfunc.h b/lib/hashfunc.h new file mode 100644 index 0000000..a9fb1a3 --- /dev/null +++ b/lib/hashfunc.h @@ -0,0 +1,43 @@ +/* + * UCW Library -- Hyper-super-meta-alt-control-shift extra fast + * str_len() and hash_*() routines + * + * (c) 2002, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_HASHFUNC_H +#define _UCW_HASHFUNC_H + +#include "lib/lib.h" + +/* The following functions need str to be aligned to uns. */ +uns str_len_aligned(const char *str) PURE; +uns hash_string_aligned(const char *str) PURE; +uns hash_block_aligned(const byte *str, uns len) PURE; + +#ifdef CPU_ALLOW_UNALIGNED +#define str_len(str) str_len_aligned(str) +#define hash_string(str) hash_string_aligned(str) +#define hash_block(str, len) hash_block_aligned(str, len) +#else +uns str_len(const char *str) PURE; +uns hash_string(const char *str) PURE; +uns hash_block(const byte *str, uns len) PURE; +#endif + +uns hash_string_nocase(const char *str) PURE; + +/* + * We hash integers by multiplying by a reasonably large prime with + * few ones in its binary form (to gave the compiler the possibility + * of using shifts and adds on architectures where multiplication + * instructions are slow). + */ +static inline uns CONST hash_u32(uns x) { return 0x01008041*x; } +static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); } +static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); } + +#endif diff --git a/lib/hashtable.h b/lib/hashtable.h new file mode 100644 index 0000000..552e88e --- /dev/null +++ b/lib/hashtable.h @@ -0,0 +1,663 @@ +/* + * UCW Library -- Universal Hash Table + * + * (c) 2002--2004 Martin Mares + * (c) 2002--2005 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is not a normal header file, it's a generator of hash tables. + * Each time you include it with parameters set in the corresponding + * preprocessor macros, it generates a hash table with the parameters + * given. + * + * You need to specify: + * + * HASH_NODE data type where a node dwells (usually a struct). + * HASH_PREFIX(x) macro to add a name prefix (used on all global names + * defined by the hash table generator). + * + * Then decide on type of keys: + * + * HASH_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e., + * a type which can be compared using `==') + * HASH_ATOMIC_TYPE (defaults to int). + * | HASH_KEY_STRING=f use node->f as a string key, allocated + * separately from the rest of the node. + * | HASH_KEY_ENDSTRING=f use node->f as a string key, allocated + * automatically at the end of the node struct + * (to be declared as "char f[1]" at the end). + * | HASH_KEY_COMPLEX use a multi-component key; as the name suggests, + * the passing of parameters is a bit complex then. + * The HASH_KEY_COMPLEX(x) macro should expand to + * `x k1, x k2, ... x kn' and you should also define: + * HASH_KEY_DECL declaration of function parameters in which key + * should be passed to all hash table operations. + * That is, `type1 k1, type2 k2, ... typen kn'. + * With complex keys, HASH_GIVE_HASHFN and HASH_GIVE_EQ + * are mandatory. + * | HASH_KEY_MEMORY=f use node->f as a raw data key, compared using + * memcmp + * HASH_KEY_SIZE the length of the key block + * + * Then specify what operations you request (all names are automatically + * prefixed by calling HASH_PREFIX): + * + * init() -- initialize the hash table. + * HASH_WANT_CLEANUP cleanup() -- deallocate the hash table. + * HASH_WANT_FIND node *find(key) -- find first node with the specified + * key, return NULL if no such node exists. + * HASH_WANT_FIND_NEXT node *find(node *start) -- find next node with the + * specified key, return NULL if no such node exists. + * HASH_WANT_NEW node *new(key) -- create new node with given key. + * Doesn't check whether it already exists. + * HASH_WANT_LOOKUP node *lookup(key) -- find node with given key, + * if it doesn't exist, create it. Defining + * HASH_GIVE_INIT_DATA is strongly recommended. + * HASH_WANT_DELETE int delete(key) -- delete and deallocate node + * with given key. Returns success. + * HASH_WANT_REMOVE remove(node *) -- delete and deallocate given node. + * + * You can also supply several functions: + * + * HASH_GIVE_HASHFN unsigned int hash(key) -- calculate hash value of key. + * We have sensible default hash functions for strings + * and integers. + * HASH_GIVE_EQ int eq(key1, key2) -- return whether keys are equal. + * By default, we use == for atomic types and either + * strcmp or strcasecmp for strings. + * HASH_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the + * node should be allocated for dynamic data. Default=0 + * or length of the string with HASH_KEY_ENDSTRING. + * HASH_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly + * created node. Defaults: assignment for atomic keys + * and static strings, strcpy for end-allocated strings. + * HASH_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a + * newly created node. Very useful for lookup operations. + * HASH_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for + * a node. Default is xmalloc() or pooled allocation, depending + * on HASH_USE_POOL and HASH_AUTO_POOL switches. + * void free(void *) -- the converse. + * + * ... and a couple of extra parameters: + * + * HASH_NOCASE String comparisons should be case-insensitive. + * HASH_DEFAULT_SIZE=n Initially, use hash table of approx. `n' entries. + * HASH_CONSERVE_SPACE Use as little space as possible. + * HASH_FN_BITS=n The hash function gives only `n' significant bits. + * HASH_ATOMIC_TYPE=t Atomic values are of type `t' instead of int. + * HASH_USE_POOL=pool Allocate all nodes from given mempool. Note, however, that + * deallocation is not supported by mempools, so delete/remove + * will leak pool memory. + * HASH_AUTO_POOL=size Create a pool of the given block size automatically. + * HASH_ZERO_FILL New entries should be initialized to all zeroes. + * HASH_TABLE_ALLOC The hash table itself will be allocated and freed using + * the same allocation functions as the nodes instead of + * the default xmalloc(). + * HASH_TABLE_DYNAMIC Support multiple hash tables; the first parameter of all + * hash table operations is struct HASH_PREFIX(table) *. + * + * You also get a iterator macro at no extra charge: + * + * HASH_FOR_ALL(hash_prefix, variable) + * { + * // node *variable gets declared automatically + * do_something_with_node(variable); + * // use HASH_BREAK and HASH_CONTINUE instead of break and continue + * // you must not alter contents of the hash table here + * } + * HASH_END_FOR; + * + * (For dynamic tables, use HASH_FOR_ALL_DYNAMIC(hash_prefix, hash_table, variable) instead.) + * + * Then include "lib/hashtable.h" and voila, you have a hash table + * suiting all your needs (at least those which you've revealed :) ). + * + * After including this file, all parameter macros are automatically + * undef'd. + */ + +#ifndef _UCW_HASHFUNC_H +#include "lib/hashfunc.h" +#endif + +#include + +/* Initial setup of parameters */ + +#if !defined(HASH_NODE) || !defined(HASH_PREFIX) +#error Some of the mandatory configuration macros are missing. +#endif + +#if defined(HASH_KEY_ATOMIC) && !defined(HASH_CONSERVE_SPACE) +#define HASH_CONSERVE_SPACE +#endif + +#define P(x) HASH_PREFIX(x) + +/* Declare buckets and the hash table */ + +typedef HASH_NODE P(node); + +typedef struct P(bucket) { + struct P(bucket) *next; +#ifndef HASH_CONSERVE_SPACE + uns hash; +#endif + P(node) n; +} P(bucket); + +struct P(table) { + uns hash_size; + uns hash_count, hash_max, hash_min, hash_hard_max; + P(bucket) **ht; +#ifdef HASH_AUTO_POOL + struct mempool *pool; +#endif +}; + +#ifdef HASH_TABLE_DYNAMIC +#define T (*table) +#define TA struct P(table) *table +#define TAC TA, +#define TAU TA UNUSED +#define TAUC TA UNUSED, +#define TT table +#define TTC table, +#else +struct P(table) P(table); +#define T P(table) +#define TA void +#define TAC +#define TAU void +#define TAUC +#define TT +#define TTC +#endif + +/* Preset parameters */ + +#if defined(HASH_KEY_ATOMIC) + +#define HASH_KEY(x) x HASH_KEY_ATOMIC + +#ifndef HASH_ATOMIC_TYPE +# define HASH_ATOMIC_TYPE int +#endif +#define HASH_KEY_DECL HASH_ATOMIC_TYPE HASH_KEY( ) + +#ifndef HASH_GIVE_HASHFN +# define HASH_GIVE_HASHFN + static inline int P(hash) (TAUC HASH_ATOMIC_TYPE x) + { return ((sizeof(x) <= 4) ? hash_u32(x) : hash_u64(x)); } +#endif + +#ifndef HASH_GIVE_EQ +# define HASH_GIVE_EQ + static inline int P(eq) (TAUC HASH_ATOMIC_TYPE x, HASH_ATOMIC_TYPE y) + { return x == y; } +#endif + +#ifndef HASH_GIVE_INIT_KEY +# define HASH_GIVE_INIT_KEY + static inline void P(init_key) (TAUC P(node) *n, HASH_ATOMIC_TYPE k) + { HASH_KEY(n->) = k; } +#endif + +#elif defined(HASH_KEY_MEMORY) + +#define HASH_KEY(x) x HASH_KEY_MEMORY + +#define HASH_KEY_DECL byte HASH_KEY( )[HASH_KEY_SIZE] + +#ifndef HASH_GIVE_HASHFN +# define HASH_GIVE_HASHFN + static inline int P(hash) (TAUC byte *x) + { return hash_block(x, HASH_KEY_SIZE); } +#endif + +#ifndef HASH_GIVE_EQ +# define HASH_GIVE_EQ + static inline int P(eq) (TAUC byte *x, byte *y) + { return !memcmp(x, y, HASH_KEY_SIZE); } +#endif + +#ifndef HASH_GIVE_INIT_KEY +# define HASH_GIVE_INIT_KEY + static inline void P(init_key) (TAUC P(node) *n, byte *k) + { memcpy(HASH_KEY(n->), k, HASH_KEY_SIZE); } +#endif + +#elif defined(HASH_KEY_STRING) || defined(HASH_KEY_ENDSTRING) + +#ifdef HASH_KEY_STRING +# define HASH_KEY(x) x HASH_KEY_STRING +# ifndef HASH_GIVE_INIT_KEY +# define HASH_GIVE_INIT_KEY + static inline void P(init_key) (TAUC P(node) *n, char *k) + { HASH_KEY(n->) = k; } +# endif +#else +# define HASH_KEY(x) x HASH_KEY_ENDSTRING +# define HASH_GIVE_EXTRA_SIZE + static inline int P(extra_size) (TAUC char *k) + { return strlen(k); } +# ifndef HASH_GIVE_INIT_KEY +# define HASH_GIVE_INIT_KEY + static inline void P(init_key) (TAUC P(node) *n, char *k) + { strcpy(HASH_KEY(n->), k); } +# endif +#endif +#define HASH_KEY_DECL char *HASH_KEY( ) + +#ifndef HASH_GIVE_HASHFN +#define HASH_GIVE_HASHFN + static inline uns P(hash) (TAUC char *k) + { +# ifdef HASH_NOCASE + return hash_string_nocase(k); +# else + return hash_string(k); +# endif + } +#endif + +#ifndef HASH_GIVE_EQ +# define HASH_GIVE_EQ + static inline int P(eq) (TAUC char *x, char *y) + { +# ifdef HASH_NOCASE + return !strcasecmp(x,y); +# else + return !strcmp(x,y); +# endif + } +#endif + +#elif defined(HASH_KEY_COMPLEX) + +#define HASH_KEY(x) HASH_KEY_COMPLEX(x) + +#else +#error You forgot to set the hash key type. +#endif + +/* Defaults for missing parameters */ + +#ifndef HASH_GIVE_HASHFN +#error Unable to determine which hash function to use. +#endif + +#ifndef HASH_GIVE_EQ +#error Unable to determine how to compare two keys. +#endif + +#ifdef HASH_GIVE_EXTRA_SIZE +/* This trickery is needed to avoid `unused parameter' warnings */ +#define HASH_EXTRA_SIZE(x) P(extra_size)(TTC x) +#else +/* + * Beware, C macros are expanded iteratively, not recursively, + * hence we get only a _single_ argument, although the expansion + * of HASH_KEY contains commas. + */ +#define HASH_EXTRA_SIZE(x) 0 +#endif + +#ifndef HASH_GIVE_INIT_KEY +#error Unable to determine how to initialize keys. +#endif + +#ifndef HASH_GIVE_INIT_DATA +static inline void P(init_data) (TAUC P(node) *n UNUSED) +{ +} +#endif + +#ifdef HASH_GIVE_ALLOC +/* If the caller has requested to use his own allocation functions, do so */ +static inline void P(init_alloc) (TAU) { } +static inline void P(cleanup_alloc) (TAU) { } + +#elif defined(HASH_USE_POOL) +/* If the caller has requested to use his mempool, do so */ +#include "lib/mempool.h" +static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(HASH_USE_POOL, size); } +static inline void P(free) (TAUC void *x UNUSED) { } +static inline void P(init_alloc) (TAU) { } +static inline void P(cleanup_alloc) (TAU) { } + +#elif defined(HASH_AUTO_POOL) +/* Use our own pools */ +#include "lib/mempool.h" +static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(T.pool, size); } +static inline void P(free) (TAUC void *x UNUSED) { } +static inline void P(init_alloc) (TAU) { T.pool = mp_new(HASH_AUTO_POOL); } +static inline void P(cleanup_alloc) (TAU) { mp_delete(T.pool); } +#define HASH_USE_POOL + +#else +/* The default allocation method */ +static inline void * P(alloc) (TAUC unsigned int size) { return xmalloc(size); } +static inline void P(free) (TAUC void *x) { xfree(x); } +static inline void P(init_alloc) (TAU) { } +static inline void P(cleanup_alloc) (TAU) { } + +#endif + +#ifdef HASH_TABLE_ALLOC +static inline void * P(table_alloc) (TAUC unsigned int size) { return P(alloc)(TTC size); } +static inline void P(table_free) (TAUC void *x) { P(free)(TTC x); } +#else +static inline void * P(table_alloc) (TAUC unsigned int size) { return xmalloc(size); } +static inline void P(table_free) (TAUC void *x) { xfree(x); } +#endif + +#ifndef HASH_DEFAULT_SIZE +#define HASH_DEFAULT_SIZE 32 +#endif + +#ifndef HASH_FN_BITS +#define HASH_FN_BITS 32 +#endif + +#ifdef HASH_ZERO_FILL +static inline void * P(new_bucket)(TAUC uns size) +{ + byte *buck = P(alloc)(TTC size); + bzero(buck, size); + return buck; +} +#else +static inline void * P(new_bucket)(TAUC uns size) { return P(alloc)(TTC size); } +#endif + +/* Now the operations */ + +static void P(alloc_table) (TAU) +{ + T.hash_size = next_table_prime(T.hash_size); + T.ht = P(table_alloc)(TTC sizeof(void *) * T.hash_size); + bzero(T.ht, sizeof(void *) * T.hash_size); + if (2*T.hash_size < T.hash_hard_max) + T.hash_max = 2*T.hash_size; + else + T.hash_max = ~0U; + if (T.hash_size/2 > HASH_DEFAULT_SIZE) + T.hash_min = T.hash_size/4; + else + T.hash_min = 0; +} + +static void P(init) (TA) +{ + T.hash_count = 0; + T.hash_size = HASH_DEFAULT_SIZE; +#if HASH_FN_BITS < 28 + T.hash_hard_max = 1 << HASH_FN_BITS; +#else + T.hash_hard_max = 1 << 28; +#endif + P(init_alloc)(TT); + P(alloc_table)(TT); +} + +#ifdef HASH_WANT_CLEANUP +static void P(cleanup) (TA) +{ +#ifndef HASH_USE_POOL + uns i; + P(bucket) *b, *bb; + + for (i=0; inext; + P(free)(TTC b); + } +#endif + P(cleanup_alloc)(TT); + P(table_free)(TTC T.ht); +} +#endif + +static inline uns P(bucket_hash) (TAUC P(bucket) *b) +{ +#ifdef HASH_CONSERVE_SPACE + return P(hash)(TTC HASH_KEY(b->n.)); +#else + return b->hash; +#endif +} + +static void P(rehash) (TAC uns size) +{ + P(bucket) *b, *nb; + P(bucket) **oldt = T.ht, **newt; + uns oldsize = T.hash_size; + uns i, h; + + DBG("Rehashing %d->%d at count %d", oldsize, size, T.hash_count); + T.hash_size = size; + P(alloc_table)(TT); + newt = T.ht; + for (i=0; inext; + h = P(bucket_hash)(TTC b) % T.hash_size; + b->next = newt[h]; + newt[h] = b; + b = nb; + } + } + P(table_free)(TTC oldt); +} + +#ifdef HASH_WANT_FIND +static P(node) * P(find) (TAC HASH_KEY_DECL) +{ + uns h0 = P(hash) (TTC HASH_KEY( )); + uns h = h0 % T.hash_size; + P(bucket) *b; + + for (b=T.ht[h]; b; b=b->next) + { + if ( +#ifndef HASH_CONSERVE_SPACE + b->hash == h0 && +#endif + P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.))) + return &b->n; + } + return NULL; +} +#endif + +#ifdef HASH_WANT_FIND_NEXT +static P(node) * P(find_next) (TAC P(node) *start) +{ +#ifndef HASH_CONSERVE_SPACE + uns h0 = P(hash) (TTC HASH_KEY(start->)); +#endif + P(bucket) *b = SKIP_BACK(P(bucket), n, start); + + for (b=b->next; b; b=b->next) + { + if ( +#ifndef HASH_CONSERVE_SPACE + b->hash == h0 && +#endif + P(eq)(TTC HASH_KEY(start->), HASH_KEY(b->n.))) + return &b->n; + } + return NULL; +} +#endif + +#ifdef HASH_WANT_NEW +static P(node) * P(new) (TAC HASH_KEY_DECL) +{ + uns h0, h; + P(bucket) *b; + + h0 = P(hash) (TTC HASH_KEY( )); + h = h0 % T.hash_size; + b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( ))); + b->next = T.ht[h]; + T.ht[h] = b; +#ifndef HASH_CONSERVE_SPACE + b->hash = h0; +#endif + P(init_key)(TTC &b->n, HASH_KEY( )); + P(init_data)(TTC &b->n); + if (T.hash_count++ >= T.hash_max) + P(rehash)(TTC 2*T.hash_size); + return &b->n; +} +#endif + +#ifdef HASH_WANT_LOOKUP +static P(node) * P(lookup) (TAC HASH_KEY_DECL) +{ + uns h0 = P(hash) (TTC HASH_KEY( )); + uns h = h0 % T.hash_size; + P(bucket) *b; + + for (b=T.ht[h]; b; b=b->next) + { + if ( +#ifndef HASH_CONSERVE_SPACE + b->hash == h0 && +#endif + P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.))) + return &b->n; + } + + b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( ))); + b->next = T.ht[h]; + T.ht[h] = b; +#ifndef HASH_CONSERVE_SPACE + b->hash = h0; +#endif + P(init_key)(TTC &b->n, HASH_KEY( )); + P(init_data)(TTC &b->n); + if (T.hash_count++ >= T.hash_max) + P(rehash)(TTC 2*T.hash_size); + return &b->n; +} +#endif + +#ifdef HASH_WANT_DELETE +static int P(delete) (TAC HASH_KEY_DECL) +{ + uns h0 = P(hash) (TTC HASH_KEY( )); + uns h = h0 % T.hash_size; + P(bucket) *b, **bb; + + for (bb=&T.ht[h]; b=*bb; bb=&b->next) + { + if ( +#ifndef HASH_CONSERVE_SPACE + b->hash == h0 && +#endif + P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.))) + { + *bb = b->next; + P(free)(TTC b); + if (--T.hash_count < T.hash_min) + P(rehash)(TTC T.hash_size/2); + return 1; + } + } + return 0; +} +#endif + +#ifdef HASH_WANT_REMOVE +static void P(remove) (TAC P(node) *n) +{ + P(bucket) *x = SKIP_BACK(struct P(bucket), n, n); + uns h0 = P(bucket_hash)(TTC x); + uns h = h0 % T.hash_size; + P(bucket) *b, **bb; + + for (bb=&T.ht[h]; (b=*bb) && b != x; bb=&b->next) + ; + ASSERT(b); + *bb = b->next; + P(free)(TTC b); + if (--T.hash_count < T.hash_min) + P(rehash)(TTC T.hash_size/2); +} +#endif + +/* And the iterator */ + +#ifndef HASH_FOR_ALL + +#define HASH_FOR_ALL_DYNAMIC(h_px, h_table, h_var) \ +do { \ + uns h_slot; \ + struct GLUE_(h_px,bucket) *h_buck; \ + for (h_slot=0; h_slot < (h_table)->hash_size; h_slot++) \ + for (h_buck = (h_table)->ht[h_slot]; h_buck; h_buck = h_buck->next) \ + { \ + GLUE_(h_px,node) *h_var = &h_buck->n; +#define HASH_FOR_ALL(h_px, h_var) HASH_FOR_ALL_DYNAMIC(h_px, &GLUE_(h_px,table), h_var) +#define HASH_END_FOR } } while(0) +#define HASH_BREAK +#define HASH_CONTINUE continue + +#endif + +/* Finally, undefine all the parameters */ + +#undef P +#undef T +#undef TA +#undef TAC +#undef TAU +#undef TAUC +#undef TT +#undef TTC + +#undef HASH_ATOMIC_TYPE +#undef HASH_CONSERVE_SPACE +#undef HASH_DEFAULT_SIZE +#undef HASH_EXTRA_SIZE +#undef HASH_FN_BITS +#undef HASH_GIVE_ALLOC +#undef HASH_GIVE_EQ +#undef HASH_GIVE_EXTRA_SIZE +#undef HASH_GIVE_HASHFN +#undef HASH_GIVE_INIT_DATA +#undef HASH_GIVE_INIT_KEY +#undef HASH_KEY +#undef HASH_KEY_ATOMIC +#undef HASH_KEY_COMPLEX +#undef HASH_KEY_DECL +#undef HASH_KEY_ENDSTRING +#undef HASH_KEY_STRING +#undef HASH_KEY_MEMORY +#undef HASH_KEY_SIZE +#undef HASH_NOCASE +#undef HASH_NODE +#undef HASH_PREFIX +#undef HASH_USE_POOL +#undef HASH_AUTO_POOL +#undef HASH_WANT_CLEANUP +#undef HASH_WANT_DELETE +#undef HASH_WANT_FIND +#undef HASH_WANT_FIND_NEXT +#undef HASH_WANT_LOOKUP +#undef HASH_WANT_NEW +#undef HASH_WANT_REMOVE +#undef HASH_TABLE_ALLOC +#undef HASH_TABLE_DYNAMIC +#undef HASH_ZERO_FILL diff --git a/lib/heap.h b/lib/heap.h new file mode 100644 index 0000000..4f83776 --- /dev/null +++ b/lib/heap.h @@ -0,0 +1,88 @@ +/* + * UCW Library -- Universal Heap Macros + * + * (c) 2001 Martin Mares + * (c) 2005 Tomas Valla + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \ + for (;;) \ + { \ + _l = 2*_j; \ + if (_l > num) \ + break; \ + if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \ + break; \ + if (_l != num && less(heap[_l+1],heap[_l])) \ + _l++; \ + swap(heap,_j,_l,x); \ + _j = _l; \ + } + +#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \ + while (_j > 1) \ + { \ + _u = _j/2; \ + if (less(heap[_u], heap[_j])) \ + break; \ + swap(heap,_u,_j,x); \ + _j = _u; \ + } + +#define HEAP_INIT(type,heap,num,less,swap) \ + do { \ + uns _i = num; \ + uns _j, _l; \ + type x; \ + while (_i >= 1) \ + { \ + _j = _i; \ + HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \ + _i--; \ + } \ + } while(0) + +#define HEAP_DELMIN(type,heap,num,less,swap) \ + do { \ + uns _j, _l; \ + type x; \ + swap(heap,1,num,x); \ + num--; \ + _j = 1; \ + HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ + } while(0) + +#define HEAP_INSERT(type,heap,num,less,swap) \ + do { \ + uns _j, _u; \ + type x; \ + _j = num; \ + HEAP_BUBBLE_UP_J(heap,num,less,swap); \ + } while(0) + +#define HEAP_INCREASE(type,heap,num,less,swap,pos) \ + do { \ + uns _j, _l; \ + type x; \ + _j = pos; \ + HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ + } while(0) + +#define HEAP_DELETE(type,heap,num,less,swap,pos) \ + do { \ + uns _j, _l, _u; \ + type x; \ + _j = pos; \ + swap(heap,_j,num,x); \ + num--; \ + if (less(heap[_j], heap[num+1])) \ + HEAP_BUBBLE_UP_J(heap,num,less,swap) \ + else \ + HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ + } while(0) + +/* Default swapping macro */ +#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t) diff --git a/lib/ipaccess.c b/lib/ipaccess.c new file mode 100644 index 0000000..5dd388c --- /dev/null +++ b/lib/ipaccess.c @@ -0,0 +1,127 @@ +/* + * UCW Library -- IP address access lists + * + * (c) 1997--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/clists.h" +#include "lib/conf.h" +#include "lib/getopt.h" +#include "lib/fastbuf.h" +#include "lib/ipaccess.h" + +#include + +struct ipaccess_entry { + cnode n; + int allow; + struct ip_addrmask addr; +}; + +static char * +addrmask_parser(char *c, void *ptr) +{ + /* + * This is tricky: addrmasks will be compared by memcmp(), so we must ensure + * that even the padding between structure members is zeroed out. + */ + struct ip_addrmask *am = ptr; + bzero(am, sizeof(*am)); + + char *p = strchr(c, '/'); + if (p) + *p++ = 0; + char *err = cf_parse_ip(c, &am->addr); + if (err) + return err; + if (p) + { + uns len; + if (!cf_parse_int(p, &len) && len <= 32) + am->mask = ~(len == 32 ? 0 : ~0U >> len); + else if (cf_parse_ip(p, &am->mask)) + return "Invalid prefix length or netmask"; + } + else + am->mask = ~0U; + return NULL; +} + +static void +addrmask_dumper(struct fastbuf *fb, void *ptr) +{ + struct ip_addrmask *am = ptr; + bprintf(fb, "%08x/%08x ", am->addr, am->mask); +} + +struct cf_user_type ip_addrmask_type = { + .size = sizeof(struct ip_addrmask), + .name = "ip_addrmask", + .parser = addrmask_parser, + .dumper = addrmask_dumper +}; + +struct cf_section ipaccess_cf = { + CF_TYPE(struct ipaccess_entry), + CF_ITEMS { + CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((char*[]) { "deny", "allow", NULL })), + CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type), + CF_END + } +}; + +int ip_addrmask_match(struct ip_addrmask *am, u32 ip) +{ + return !((ip ^ am->addr) & am->mask); +} + +int +ipaccess_check(clist *l, u32 ip) +{ + CLIST_FOR_EACH(struct ipaccess_entry *, a, *l) + if (ip_addrmask_match(&a->addr, ip)) + return a->allow; + return 0; +} + +#ifdef TEST + +#include + +static clist t; + +static struct cf_section test_cf = { + CF_ITEMS { + CF_LIST("A", &t, &ipaccess_cf), + CF_END + } +}; + +int main(int argc, char **argv) +{ + cf_declare_section("T", &test_cf, 0); + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1) + die("Invalid arguments"); + + byte buf[256]; + while (fgets(buf, sizeof(buf), stdin)) + { + char *c = strchr(buf, '\n'); + if (c) + *c = 0; + u32 ip; + if (cf_parse_ip(buf, &ip)) + puts("Invalid IP address"); + else if (ipaccess_check(&t, ip)) + puts("Allowed"); + else + puts("Denied"); + } + return 0; +} + +#endif diff --git a/lib/ipaccess.h b/lib/ipaccess.h new file mode 100644 index 0000000..b407783 --- /dev/null +++ b/lib/ipaccess.h @@ -0,0 +1,28 @@ +/* + * UCW Library -- IP address access lists + * + * (c) 1997--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_IPACCESS_H +#define _UCW_IPACCESS_H + +#include "lib/clists.h" + +extern struct cf_section ipaccess_cf; +int ipaccess_check(clist *l, u32 ip); + +/* Low-level handling of addresses and masks */ + +struct ip_addrmask { + u32 addr; + u32 mask; +}; + +extern struct cf_user_type ip_addrmask_type; +int ip_addrmask_match(struct ip_addrmask *am, u32 ip); + +#endif diff --git a/lib/kmp-search.h b/lib/kmp-search.h new file mode 100644 index 0000000..b702acc --- /dev/null +++ b/lib/kmp-search.h @@ -0,0 +1,191 @@ +/* + * Knuth-Morris-Pratt's Substring Search for N given strings + * + * (c) 1999--2005, Robert Spalek + * (c) 2006, Pavel Charvat + * + * (In fact, the algorithm is usually referred to as Aho-McCorasick, + * but that's just an extension of KMP to multiple strings.) + */ + +/* + * This is not a normal header file, it's a generator of KMP algorithm. + * Each time you include it with parameters set in the corresponding + * preprocessor macros, it generates KMP structures and functions + * with the parameters given. See lib/kmp.h before reading this description. + * + * This file defines: + * + * struct search structure with both the internal and the user-defined variables + * used during the search and accessible from all macros + * + * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output) + * + * void run(kmp,src) the same, but automatically allocates search structre from the stack + * + * + * Parameters to the generator (these marked with [*] are mandatory): + * + * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names + * defined by the KMP search generator) + * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h + * + * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR); + * if unset, the one from lib/kmp.h is taken + * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c + * + * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string + * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one + * + * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions) + * + * KMPS_INIT(kmp,src,search) statement executed at the beginning of search() + * KMPS_EXIT(kmp,src,search) ... at the end + * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update) + * of the algorithm, but before KMPS_FOUND[_CHAIN] + * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match + * (stored in search->out - NOT necessary search->s!); + * all matches form a NULL-terminated link list (search->out, search->out->next, ...) + * in order of decreasing length + * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out) + * KMPS_WANT_BEST algorithm computes globally longest match, which is available + * in search->best in KMPS_EXIT; if there is no match, it points to the null state + */ + +#define P(x) KMPS_PREFIX(x) +#define KP(x) KMPS_KMP_PREFIX(x) + +#ifdef KMPS_SOURCE +typedef KMPS_SOURCE P(search_source_t); +#else +typedef KP(source_t) P(search_source_t); +#endif + +#ifndef KMPS_GET_CHAR +#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c)) +#endif + +struct P(search) { + struct KP(state) *s; /* current state */ + struct KP(state) *out; /* output state */ +# ifdef KMPS_WANT_BEST + struct KP(state) *best; /* longest match */ +# endif + KP(char_t) c; /* last character */ +# ifdef KMPS_ADD_CONTROLS + uns eof; +# endif +# ifdef KMPS_VARS + struct { + KMPS_VARS + } u; /* user-defined */ +# endif +}; + +static void +P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src) +{ + s->s = &kmp->null; +# ifdef KMPS_WANT_BEST + s->best = &kmp->null; +# endif +# ifdef KMPS_ADD_CONTROLS + s->c = KP(control)(); + s->eof = 0; +# else + s->c = 0; +# endif +# ifdef KMPS_INIT + { KMPS_INIT(kmp, src, s); } +# endif +# ifndef KMPS_ADD_CONTROLS + goto start_read; +# endif + for (;;) + { + for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back); + s->s = s->s ? : &kmp->null; + +# ifdef KMPS_STEP + { KMPS_STEP(kmp, src, s); } +# endif + +# if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST) + s->out = s->s->len ? s->s : s->s->next; + if (s->out) + { +# ifdef KMPS_WANT_BEST + if (s->out->len > s->best->len) + s->best = s->out; +# endif +# ifdef KMPS_FOUND_CHAIN + { KMPS_FOUND_CHAIN(kmp, src, s); } +# endif +# ifdef KMPS_FOUND + do + { KMPS_FOUND(kmp, src, s); } + while (s->out = s->out->next); +# endif + } +# endif + +# ifdef KMPS_ADD_CONTROLS + if (s->eof) + break; +# endif + +# ifndef KMPS_ADD_CONTROLS +start_read: ; +# endif +# ifdef KMPS_MERGE_CONTROLS + KP(char_t) last_c = s->c; +# endif + + do + { + if (!KMPS_GET_CHAR(kmp, src, s)) + { +# ifdef KMPS_ADD_CONTROLS + if (!KP(is_control)(kmp, s->c)) + { + s->c = KP(control)(); + s->eof = 1; + break; + } +# endif + goto exit; + } + } + while (0 +# ifdef KMPS_MERGE_CONTROLS + || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c)) +# endif + ); + } +exit: ; +# ifdef KMPS_EXIT + { KMPS_EXIT(kmp, src, s); } +# endif +} + +static inline void +P(run) (struct KP(struct) *kmp, P(search_source_t) src) +{ + struct P(search) search; + P(search)(kmp, &search, src); +} + +#undef P +#undef KMPS_PREFIX +#undef KMPS_KMP_PREFIX +#undef KMPS_SOURCE +#undef KMPS_GET_CHAR +#undef KMPS_ADD_CONTROLS +#undef KMPS_MERGE_CONTROLS +#undef KMPS_VARS +#undef KMPS_INIT +#undef KMPS_EXIT +#undef KMPS_FOUND +#undef KMPS_FOUND_CHAIN +#undef KMPS_WANT_BEST +#undef KMPS_STEP diff --git a/lib/kmp-test.c b/lib/kmp-test.c new file mode 100644 index 0000000..c066e6e --- /dev/null +++ b/lib/kmp-test.c @@ -0,0 +1,206 @@ +/* + * Test of KMP search + * + * (c) 2006, Pavel Charvat + */ + +#include "lib/lib.h" +#include "lib/mempool.h" +#include + +#if 0 +#define TRACE(x...) do{log(L_DEBUG, x);}while(0) +#else +#define TRACE(x...) do{}while(0) +#endif + +/* TEST1 - multiple searches */ + +#define KMP_PREFIX(x) kmp1_##x +#define KMP_WANT_CLEANUP +#include "lib/kmp.h" +#define KMPS_PREFIX(x) kmp1s1_##x +#define KMPS_KMP_PREFIX(x) kmp1_##x +#define KMPS_WANT_BEST +#define KMPS_EXIT(kmp,src,s) TRACE("Best match has %d characters", s->best->len) +#include "lib/kmp-search.h" +#define KMPS_PREFIX(x) kmp1s2_##x +#define KMPS_KMP_PREFIX(x) kmp1_##x +#define KMPS_VARS uns count; +#define KMPS_INIT(kmp,src,s) s->u.count = 0 +#define KMPS_FOUND(kmp,src,s) s->u.count++ +#include "lib/kmp-search.h" + +static void +test1(void) +{ + TRACE("Running test1"); + struct kmp1_struct kmp; + kmp1_init(&kmp); + kmp1_add(&kmp, "ahoj"); + kmp1_add(&kmp, "hoj"); + kmp1_add(&kmp, "aho"); + kmp1_build(&kmp); + struct kmp1s1_search s1; + kmp1s1_search(&kmp, &s1, "asjlahslhalahosjkjhojsas"); + ASSERT(s1.best->len == 3); + struct kmp1s2_search s2; + kmp1s2_search(&kmp, &s2, "asjlahslhalahojsjkjhojsas"); + ASSERT(s2.u.count == 4); + kmp1_cleanup(&kmp); +} + +/* TEST2 - various tracing */ + +#define KMP_PREFIX(x) kmp2_##x +#define KMP_USE_UTF8 +#define KMP_TOLOWER +#define KMP_ONLYALPHA +#define KMP_STATE_VARS char *str; uns id; +#define KMP_ADD_EXTRA_ARGS uns id +#define KMP_VARS char *start; +#define KMP_ADD_INIT(kmp,src) kmp->u.start = src +#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \ + s->u.str = kmp->u.start; s->u.id = id; }while(0) +#define KMP_ADD_DUP(kmp,src,s) TRACE("String %s already inserted", kmp->u.start) +#define KMP_WANT_CLEANUP +#define KMP_WANT_SEARCH +#define KMPS_ADD_CONTROLS +#define KMPS_MERGE_CONTROLS +#define KMPS_FOUND(kmp,src,s) TRACE("String %s with id %d found", s->out->u.str, s->out->u.id) +#define KMPS_STEP(kmp,src,s) TRACE("Got to state %p after reading %d", s->s, s->c) +#include "lib/kmp.h" + +static void +test2(void) +{ + TRACE("Running test2"); + struct kmp2_struct kmp; + kmp2_init(&kmp); + kmp2_add(&kmp, "ahoj", 1); + kmp2_add(&kmp, "ahoj", 2); + kmp2_add(&kmp, "hoj", 3); + kmp2_add(&kmp, "aho", 4); + kmp2_add(&kmp, "aba", 5); + kmp2_add(&kmp, "aba", 5); + kmp2_add(&kmp, "pěl", 5); + kmp2_build(&kmp); + kmp2_run(&kmp, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla"); + kmp2_cleanup(&kmp); +} + +/* TEST3 - random tests */ + +#define KMP_PREFIX(x) kmp3_##x +#define KMP_STATE_VARS uns index; +#define KMP_ADD_EXTRA_ARGS uns index +#define KMP_VARS char *start; +#define KMP_ADD_INIT(kmp,src) kmp->u.start = src +#define KMP_ADD_NEW(kmp,src,s) s->u.index = index +#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0 +#define KMP_WANT_CLEANUP +#define KMP_WANT_SEARCH +#define KMPS_VARS uns sum, *cnt; +#define KMPS_FOUND(kmp,src,s) do{ ASSERT(s->u.cnt[s->out->u.index]); s->u.cnt[s->out->u.index]--; s->u.sum--; }while(0) +#include "lib/kmp.h" + +static void +test3(void) +{ + TRACE("Running test3"); + struct mempool *pool = mp_new(1024); + for (uns testn = 0; testn < 100; testn++) + { + mp_flush(pool); + uns n = random_max(100); + char *s[n]; + struct kmp3_struct kmp; + kmp3_init(&kmp); + for (uns i = 0; i < n; i++) + { + uns m = random_max(10); + s[i] = mp_alloc(pool, m + 1); + for (uns j = 0; j < m; j++) + s[i][j] = 'a' + random_max(3); + s[i][m] = 0; + kmp3_add(&kmp, s[i], i); + } + kmp3_build(&kmp); + for (uns i = 0; i < 10; i++) + { + uns m = random_max(100); + byte b[m + 1]; + for (uns j = 0; j < m; j++) + b[j] = 'a' + random_max(4); + b[m] = 0; + uns cnt[n]; + struct kmp3_search search; + search.u.sum = 0; + search.u.cnt = cnt; + for (uns j = 0; j < n; j++) + { + cnt[j] = 0; + if (*s[j]) + for (uns k = 0; k < m; k++) + if (!strncmp(b + k, s[j], strlen(s[j]))) + cnt[j]++, search.u.sum++; + } + kmp3_search(&kmp, &search, b); + ASSERT(search.u.sum == 0); + } + kmp3_cleanup(&kmp); + } + mp_delete(pool); +} + +/* TEST4 - user-defined character type */ + +struct kmp4_struct; +struct kmp4_state; + +static inline int +kmp4_eq(struct kmp4_struct *kmp UNUSED, byte *a, byte *b) +{ + return (a == b) || (a && b && *a == *b); +} + +static inline uns +kmp4_hash(struct kmp4_struct *kmp UNUSED, struct kmp4_state *s, byte *c) +{ + return (c ? (*c << 16) : 0) + (uns)(uintptr_t)s; +} + +#define KMP_PREFIX(x) kmp4_##x +#define KMP_CHAR byte * +#define KMP_CONTROL_CHAR NULL +#define KMP_GET_CHAR(kmp,src,c) ({ c = src++; !!*c; }) +#define KMP_GIVE_HASHFN +#define KMP_GIVE_EQ +#define KMP_WANT_CLEANUP +#define KMP_WANT_SEARCH +#define KMPS_FOUND(kmp,src,s) TRACE("found") +#define KMPS_ADD_CONTROLS +#define KMPS_MERGE_CONTROLS +#include "lib/kmp.h" + +static void +test4(void) +{ + TRACE("Running test4"); + struct kmp4_struct kmp; + kmp4_init(&kmp); + kmp4_add(&kmp, "ahoj"); + kmp4_build(&kmp); + kmp4_run(&kmp, "djdhaskjdahoahaahojojshdaksjahdahojskj"); + kmp4_cleanup(&kmp); +} + +int +main(void) +{ + test1(); + test2(); + test3(); + test4(); + return 0; +} diff --git a/lib/kmp-test.t b/lib/kmp-test.t new file mode 100644 index 0000000..bfcc127 --- /dev/null +++ b/lib/kmp-test.t @@ -0,0 +1,3 @@ +# Tests for the kmp module + +Run: ../obj/lib/kmp-test diff --git a/lib/kmp.h b/lib/kmp.h new file mode 100644 index 0000000..8c8c525 --- /dev/null +++ b/lib/kmp.h @@ -0,0 +1,463 @@ +/* + * Knuth-Morris-Pratt's Substring Search for N given strings + * + * (c) 1999--2005, Robert Spalek + * (c) 2006, Pavel Charvat + * + * (In fact, the algorithm is usually referred to as Aho-McCorasick, + * but that's just an extension of KMP to multiple strings.) + */ + +/* + * This is not a normal header file, it's a generator of KMP algorithm. + * Each time you include it with parameters set in the corresponding + * preprocessor macros, it generates KMP structures and functions + * with the parameters given. + * + * This file contains only construction of the automaton. The search + * itself can be generated by inclusion of file lib/kmp-search.h. + * Separeted headers allow the user to define multiple search + * routines for one common set of key strings. + * + * Example: + * + * #define KMP_PREFIX(x) kmp_##x + * #define KMP_WANT_CLEANUP + * #define KMP_WANT_SEARCH // includes lib/kmp-search.h automatically + * #define KMPS_FOUND(kmp,src,s) printf("found\n") + * #include "lib/kmp.h" + * + * [...] + * + * struct kmp_struct kmp; // a structure describing the whole automaton + * kmp_init(&kmp); // initialization (must be called before all other functions) + * + * // add key strings we want to search + * kmp_add(&kmp, "aaa"); + * kmp_add(&kmp, "abc"); + * + * // complete the automaton, no more strings can be added later + * kmp_build(&kmp); + * + * // example of search, should print single "found" to stdout + * kmp_run(&kmp, "aabaabca"); + * + * // destroy all internal structures + * kmp_cleanup(&kmp); + * + * + * Brief description of all parameters: + * + * Basic parameters: + * KMP_PREFIX(x) macro to add a name prefix (used on all global names + * defined by the KMP generator); mandatory; + * we abbreviate this to P(x) below + * + * KMP_CHAR alphabet type, the default is u16 + * + * KMP_SOURCE user-defined text source; KMP_GET_CHAR must + * KMP_GET_CHAR(kmp,src,c) return zero at the end or nonzero together with the next character in c otherwise; + * if not defined, zero-terminated array of bytes is used as the input + * + * KMP_VARS user-defined variables in 'struct P(struct)' + * -- a structure describing the whole automaton; + * these variables are stored in .u substructure to avoid collisions + * KMP_STATE_VARS user-defined variables in 'struct P(state)' + * -- created for each state of the automaton; + * these variables are stored in .u substructure to avoid collisions + * + * Parameters which select how the input is interpreted (if KMP_SOURCE is unset): + * KMP_USE_ASCII reads single bytes from the input (default) + * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed) + * KMP_TOLOWER converts all to lowercase + * KMP_UNACCENT removes accents + * KMP_ONLYALPHA converts non-alphas to KMP_CONTROL_CHAR (see below) + * + * Parameters controlling add(kmp, src): + * KMP_ADD_EXTRA_ARGS extra arguments, should be used carefully because of possible collisions + * KMP_ADD_INIT(kmp,src) called in the beginning of add(), src is the first + * KMP_INIT_STATE(kmp,s) initialization of a new state s (called before KMP_ADD_{NEW,DUP}); + * null state is not included and should be handled after init() if necessary; + * all user-defined data are filled by zeros before call to KMP_INIT_STATE + * KMP_ADD_NEW(kmp,src,s) initialize last state of every new key string (called after KMP_INIT_STATE); + * the string must be parsed before so src is after the last string's character + * KMP_ADD_DUP(kmp,src,s) analogy of KMP_ADD_NEW called for duplicates + * + * Parameters to build(): + * KMP_BUILD_STATE(kmp,s) called for all states (including null) in order of non-decreasing tree depth + * + * Other parameters: + * KMP_WANT_CLEANUP define cleanup() + * KMP_WANT_SEARCH includes lib/kmp-search.h with the same prefix; + * there can be multiple search variants for a single KMP automaton + * KMP_USE_POOL allocates in a given pool + * KMP_CONTROL_CHAR special control character (default is ':') + * KMP_GIVE_ALLOC if set, you must supply custom allocation functions: + * void *alloc(unsigned int size) -- allocate space for + * a state. Default is pooled allocation from a local pool or HASH_USE_POOL. + * void free(void *) -- the converse. + * KMP_GIVE_HASHFN if set, you must supply custom hash function: + * unsigned int hash(struct P(struct) *kmp, struct P(state) *state, KMP_CHAR c); + * default hash function works only for integer character types + * KMP_GIVE_EQ if set, you must supply custom compare function of two characters: + * int eq(struct P(struct) *kmp, KMP_CHAR a, KMP_CHAR b); + * default is 'a == b' + */ + +#ifndef KMP_PREFIX +#error Missing KMP_PREFIX +#endif + +#include "lib/mempool.h" +#include +#include + +#define P(x) KMP_PREFIX(x) + +#ifdef KMP_CHAR +typedef KMP_CHAR P(char_t); +#else +typedef u16 P(char_t); +#endif + +typedef u32 P(len_t); + +#ifdef KMP_NODE +typedef KMP_NODE P(node_t); +#else +typedef struct {} P(node_t); +#endif + +struct P(struct); + +struct P(state) { + struct P(state) *from; /* state with the previous character (forms a tree with null state in the root) */ + struct P(state) *back; /* backwards edge to the longest shorter state with same suffix */ + struct P(state) *next; /* the longest of shorter matches (or NULL) */ + P(len_t) len; /* state depth if it represents a key string, zero otherwise */ + P(char_t) c; /* last character of the represented string */ + struct { +# ifdef KMP_STATE_VARS + KMP_STATE_VARS +# endif + } u; /* user-defined data*/ +}; + +/* Control char */ +static inline P(char_t) +P(control) (void) +{ +# ifdef KMP_CONTROL_CHAR + return KMP_CONTROL_CHAR; +# else + return ':'; +# endif +} + +/* User-defined source */ +struct P(hash_table); + +#define HASH_GIVE_HASHFN +#ifdef KMP_GIVE_HASHFN +static inline uns +P(hash_hash) (struct P(hash_table) *t, struct P(state) *f, P(char_t) c) +{ + return P(hash) ((struct P(struct) *) t, f, c); +} +#else +static inline uns +P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c) +{ + return (((uns)c) << 16) + (uns)(uintptr_t)f; +} +#endif + +#ifndef KMP_GIVE_EQ +static inline int +P(eq) (struct P(struct) *kmp UNUSED, P(char_t) c1, P(char_t) c2) +{ + return c1 == c2; +} +#endif + +static inline int +P(is_control) (struct P(struct) *kmp, P(char_t) c) +{ + return P(eq) (kmp, c, P(control)()); +} + +#define HASH_GIVE_EQ +static inline int +P(hash_eq) (struct P(hash_table) *t, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2) +{ + return f1 == f2 && P(eq)((struct P(struct) *) t, c1, c2); +} + +#ifdef KMP_GIVE_ALLOC +#define HASH_GIVE_ALLOC +static inline void * +P(hash_alloc) (struct P(hash_table) *t, uns size) +{ + return P(alloc) ((struct P(struct) *) t, size); +} + +static inline void +P(hash_free) (struct P(hash_table) *t, void *ptr) +{ + P(free) ((struct P(struct) *) t, ptr); +} +#endif + +#define HASH_GIVE_INIT_KEY +static inline void +P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c) +{ + bzero(s, sizeof(*s)); +# ifdef KMP_INIT_STATE + struct P(struct) *kmp = (struct P(struct) *)t; + { KMP_INIT_STATE(kmp, s); } +# endif + s->from = f; + s->c = c; + s->next = f->back; /* the pointers hold the link-list of sons... changed in build() */ + f->back = s; +} + +#undef P +#define HASH_PREFIX(x) KMP_PREFIX(hash_##x) +#define HASH_NODE struct KMP_PREFIX(state) +#define HASH_KEY_COMPLEX(x) x from, x c +#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c +#define HASH_WANT_NEW +#define HASH_WANT_FIND +#ifdef KMP_WANT_CLEANUP +#define HASH_WANT_CLEANUP +#endif +#if defined(KMP_USE_POOL) +#define HASH_USE_POOL KMP_USE_POOL +#else +#define HASH_AUTO_POOL 4096 +#endif +#define HASH_CONSERVE_SPACE +#define HASH_TABLE_DYNAMIC +#include "lib/hashtable.h" +#define P(x) KMP_PREFIX(x) + +struct P(struct) { + struct P(hash_table) hash; /* hash table of state transitions */ + struct P(state) null; /* null state */ + struct { +# ifdef KMP_VARS + KMP_VARS +# endif + } u; /* user-defined data */ +}; + +#ifdef KMP_SOURCE +typedef KMP_SOURCE P(source_t); +#else +typedef char *P(source_t); +#endif + +#ifdef KMP_GET_CHAR +static inline int +P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src UNUSED, P(char_t) *c UNUSED) +{ + return KMP_GET_CHAR(kmp, (*src), (*c)); +} +#else +# if defined(KMP_USE_UTF8) +# include "lib/unicode.h" +# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT) +# include "charset/unicat.h" +# endif +# elif defined(KMP_USE_ASCII) +# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) +# include "lib/chartype.h" +# endif +# endif +static inline int +P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src, P(char_t) *c) +{ +# ifdef KMP_USE_UTF8 + uns cc; + *src = utf8_get(*src, &cc); +# ifdef KMP_ONLYALPHA + if (!cc) {} + else if (!Ualpha(cc)) + cc = P(control)(); + else +# endif + { +# ifdef KMP_TOLOWER + cc = Utolower(cc); +# endif +# ifdef KMP_UNACCENT + cc = Uunaccent(cc); +# endif + } +# else + uns cc = *(*src)++; +# ifdef KMP_ONLYALPHA + if (!cc) {} + else if (!Calpha(cc)) + cc = P(control)(); + else +# endif +# ifdef KMP_TOLOWER + cc = Clocase(cc); +# endif +# ifdef KMP_UNACCENT +# error Do not know how to unaccent ASCII characters +# endif +# endif + *c = cc; + return !!cc; +} +#endif + +static struct P(state) * +P(add) (struct P(struct) *kmp, P(source_t) src +# ifdef KMP_ADD_EXTRA_ARGS + , KMP_ADD_EXTRA_ARGS +# endif +) +{ +# ifdef KMP_ADD_INIT + { KMP_ADD_INIT(kmp, src); } +# endif + + P(char_t) c; + if (!P(get_char)(kmp, &src, &c)) + return NULL; + struct P(state) *p = &kmp->null, *s; + uns len = 0; + do + { + s = P(hash_find)(&kmp->hash, p, c); + if (!s) + for (;;) + { + s = P(hash_new)(&kmp->hash, p, c); + len++; + if (!(P(get_char)(kmp, &src, &c))) + goto enter_new; + p = s; + } + p = s; + len++; + } + while (P(get_char)(kmp, &src, &c)); + if (s->len) + { +# ifdef KMP_ADD_DUP + { KMP_ADD_DUP(kmp, src, s); } +# endif + return s; + } +enter_new: + s->len = len; +# ifdef KMP_ADD_NEW + { KMP_ADD_NEW(kmp, src, s); } +# endif + return s; +} + +static void +P(init) (struct P(struct) *kmp) +{ + bzero(&kmp->null, sizeof(struct P(state))); + P(hash_init)(&kmp->hash); +} + +#ifdef KMP_WANT_CLEANUP +static inline void +P(cleanup) (struct P(struct) *kmp) +{ + P(hash_cleanup)(&kmp->hash); +} +#endif + +static inline int +P(empty) (struct P(struct) *kmp) +{ + return !kmp->hash.hash_count; +} + +static inline struct P(state) * +P(chain_start) (struct P(state) *s) +{ + return s->len ? s : s->next; +} + +static void +P(build) (struct P(struct) *kmp) +{ + if (P(empty)(kmp)) + return; + uns read = 0, write = 0; + struct P(state) *fifo[kmp->hash.hash_count], *null = &kmp->null; + for (struct P(state) *s = null->back; s; s = s->next) + fifo[write++] = s; + null->back = NULL; +# ifdef KMP_BUILD_STATE + { KMP_BUILD_STATE(kmp, null); } +# endif + while (read != write) + { + struct P(state) *s = fifo[read++], *t; + for (t = s->back; t; t = t->next) + fifo[write++] = t; + for (t = s->from->back; 1; t = t->back) + { + if (!t) + { + s->back = null; + s->next = NULL; + break; + } + s->back = P(hash_find)(&kmp->hash, t, s->c); + if (s->back) + { + s->next = s->back->len ? s->back : s->back->next; + break; + } + } +# ifdef KMP_BUILD_STATE + { KMP_BUILD_STATE(kmp, s); } +# endif + } +} + +#undef P +#undef KMP_CHAR +#undef KMP_SOURCE +#undef KMP_GET_CHAR +#undef KMP_VARS +#undef KMP_STATE_VARS +#undef KMP_CONTEXT +#undef KMP_USE_ASCII +#undef KMP_USE_UTF8 +#undef KMP_TOLOWER +#undef KMP_UNACCENT +#undef KMP_ONLYALPHA +#undef KMP_CONTROL_CHAR +#undef KMP_ADD_EXTRA_ARGS +#undef KMP_ADD_INIT +#undef KMP_ADD_NEW +#undef KMP_ADD_DUP +#undef KMP_INIT_STATE +#undef KMP_BUILD_STATE +#undef KMP_USE_POOL +#undef KMP_GIVE_ALLOC +#undef KMP_GIVE_HASHFN +#undef KMP_GIVE_EQ + +#ifdef KMP_WANT_SEARCH +# undef KMP_WANT_SEARCH +# define KMPS_PREFIX(x) KMP_PREFIX(x) +# define KMPS_KMP_PREFIX(x) KMP_PREFIX(x) +# include "lib/kmp-search.h" +#endif + +#undef KMP_PREFIX diff --git a/lib/lfs-test.c b/lib/lfs-test.c new file mode 100644 index 0000000..c92dcd5 --- /dev/null +++ b/lib/lfs-test.c @@ -0,0 +1,63 @@ +/* Test of large files */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" + +#include +#include +#include +#include +#include + +#define BLOCK (1<<10) +#define COUNT (5<<20) +#define TESTS (1<<20) + +int main(void) +{ + struct fastbuf *b; + byte block[BLOCK]; + uns i; + + srand(time(NULL)); +#if 0 + b = bopen("/big/robert/large-file", O_CREAT | O_TRUNC | O_RDWR, 1<<20); + if (!b) + die("Cannot create large-file"); + + log(L_DEBUG, "Writing %d blocks of size %d", COUNT, BLOCK); + for (i=0; i + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_LFS_H +#define _UCW_LFS_H + +#include +#include + +#ifdef CONFIG_LFS + +#define sh_open open64 +#define sh_seek lseek64 +#define sh_pread pread64 +#define sh_pwrite pwrite64 +#define sh_ftruncate ftruncate64 +#define sh_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o) +#define sh_pread pread64 +#define sh_pwrite pwrite64 +#define sh_stat stat64 +#define sh_fstat fstat64 +typedef struct stat64 sh_stat_t; + +#else /* !CONFIG_LFS */ + +#define sh_open open +#define sh_seek(f,o,w) lseek(f,o,w) +#define sh_ftruncate(f,o) ftruncate(f,o) +#define sh_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o) +#define sh_pread pread +#define sh_pwrite pwrite +#define sh_stat stat +#define sh_fstat fstat +typedef struct stat sh_stat_t; + +#endif /* !CONFIG_LFS */ + +#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0) +#define sh_fdatasync fdatasync +#else +#define sh_fdatasync fsync +#endif + +#define HAVE_PREAD + +static inline sh_off_t +sh_file_size(const char *name) +{ + int fd = sh_open(name, O_RDONLY); + if (fd < 0) + die("Cannot open %s: %m", name); + sh_off_t len = sh_seek(fd, 0, SEEK_END); + close(fd); + return len; +} + +#endif /* !_UCW_LFS_H */ diff --git a/lib/lib.h b/lib/lib.h new file mode 100644 index 0000000..f8659ed --- /dev/null +++ b/lib/lib.h @@ -0,0 +1,279 @@ +/* + * The UCW Library -- Miscellaneous Functions + * + * (c) 1997--2007 Martin Mares + * (c) 2005 Tomas Valla + * (c) 2006 Robert Spalek + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_LIB_H +#define _UCW_LIB_H + +#include "lib/config.h" +#include + +/* Macros for handling structurues, offsets and alignment */ + +#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x)) +#define PTR_TO(s, i) &((s*)0)->i +#define OFFSETOF(s, i) ((unsigned int) PTR_TO(s, i)) +#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i))) +#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1)) +#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p)) +#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type)) + +/* Some other macros */ + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; }) +#define ABS(x) ((x) < 0 ? -(x) : (x)) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) +#define STRINGIFY(x) #x +#define STRINGIFY_EXPANDED(x) STRINGIFY(x) +#define GLUE(x,y) x##y +#define GLUE_(x,y) x##_##y + +#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0) +#define REV_COMPARE(x,y) COMPARE(y,x) +#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0) +#define COMPARE_GT(x,y) COMPARE_LT(y,x) + +#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) /* Bitwise rotation of an uns to the left */ + +/* GCC Extensions */ + +#ifdef __GNUC__ + +#undef inline +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#define CONSTRUCTOR __attribute__((constructor)) +#define PACKED __attribute__((packed)) +#define CONST __attribute__((const)) +#define PURE __attribute__((pure)) +#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z))) +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) +#else +#define ALWAYS_INLINE inline +#endif + +#if __GNUC__ >= 4 +#define LIKE_MALLOC __attribute__((malloc)) +#define SENTINEL_CHECK __attribute__((sentinel)) +#else +#define LIKE_MALLOC +#define SENTINEL_CHECK +#endif + +#else +#error This program requires the GNU C compiler. +#endif + +/* Logging */ + +#define L_DEBUG 'D' /* Debugging messages */ +#define L_INFO 'I' /* Informational msgs, warnings and errors */ +#define L_WARN 'W' +#define L_ERROR 'E' +#define L_INFO_R 'i' /* Errors caused by external events */ +#define L_WARN_R 'w' +#define L_ERROR_R 'e' +#define L_FATAL '!' /* die() */ + +extern char *log_title; /* NULL - print no title, default is log_progname */ +extern char *log_filename; /* Expanded name of the current log file */ +extern volatile int log_switch_nest; /* log_switch() nesting counter, increment to disable automatic switches */ +extern int log_pid; /* 0 if shouldn't be logged */ +extern int log_precise_timings; /* Include microsecond timestamps in log messages */ +extern void (*log_die_hook)(void); +struct tm; +extern void (*log_switch_hook)(struct tm *tm); + +void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3); +void vmsg(uns cat, const char *fmt, va_list args); +void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2); +void log_init(const char *argv0); +void log_file(const char *name); +void log_fork(void); +int log_switch(void); + +void assert_failed(const char *assertion, const char *file, int line) NONRET; +void assert_failed_noinfo(void) NONRET; + +#ifdef DEBUG_ASSERTS +#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; }) +#else +#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; }) +#endif + +#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1] + +#ifdef LOCAL_DEBUG +#define DBG(x,y...) msg(L_DEBUG, x,##y) +#else +#define DBG(x,y...) do { } while(0) +#endif + +static inline void log_switch_disable(void) { log_switch_nest++; } +static inline void log_switch_enable(void) { ASSERT(log_switch_nest); log_switch_nest--; } + +/* Memory allocation */ + +#define xmalloc sh_xmalloc +#define xrealloc sh_xrealloc +#define xfree sh_xfree + +#ifdef DEBUG_DMALLOC +/* + * The standard dmalloc macros tend to produce lots of namespace + * conflicts and we use only xmalloc and xfree, so we can define + * the stubs ourselves. + */ +#define DMALLOC_DISABLE +#include +#define sh_xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size) +#define sh_xrealloc(ptr,size) _xrealloc_leap(__FILE__, __LINE__, ptr, size) +#define sh_xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr) +#else +/* + * Unfortunately, several libraries we might want to link to define + * their own xmalloc and we don't want to interfere with them, hence + * the renaming. + */ +void *xmalloc(uns) LIKE_MALLOC; +void *xrealloc(void *, uns); +void xfree(void *); +#endif + +void *xmalloc_zero(uns) LIKE_MALLOC; +char *xstrdup(const char *) LIKE_MALLOC; + +/* Content-Type pattern matching and filters */ + +int match_ct_patt(const char *, const char *); + +/* wordsplit.c */ + +int sepsplit(char *str, uns sep, char **rec, uns max); +int wordsplit(char *str, char **rec, uns max); + +/* pat(i)match.c: Matching of shell patterns */ + +int match_pattern(const char *patt, const char *str); +int match_pattern_nocase(const char *patt, const char *str); + +/* md5hex.c */ + +void md5_to_hex(const byte *s, char *d); +void hex_to_md5(const char *s, byte *d); + +#define MD5_SIZE 16 +#define MD5_HEX_SIZE 33 + +/* prime.c */ + +int isprime(uns x); +uns nextprime(uns x); + +/* primetable.c */ + +uns next_table_prime(uns x); +uns prev_table_prime(uns x); + +/* timer.c */ + +timestamp_t get_timestamp(void); + +void init_timer(timestamp_t *timer); +uns get_timer(timestamp_t *timer); +uns switch_timer(timestamp_t *old, timestamp_t *new); + +/* regex.c */ + +typedef struct regex regex; + +regex *rx_compile(const char *r, int icase); +void rx_free(regex *r); +int rx_match(regex *r, const char *s); +int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen); + +/* random.c */ + +uns random_u32(void); +uns random_max(uns max); +u64 random_u64(void); +u64 random_max_u64(u64 max); + +/* mmap.c */ + +void *mmap_file(const char *name, unsigned *len, int writeable); +void munmap_file(void *start, unsigned len); + +/* proctitle.c */ + +void setproctitle_init(int argc, char **argv); +void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2); +char *getproctitle(void); + +/* randomkey.c */ + +void randomkey(byte *buf, uns size); + +/* exitstatus.c */ + +#define EXIT_STATUS_MSG_SIZE 32 +int format_exit_status(char *msg, int stat); + +/* runcmd.c */ + +int run_command(const char *cmd, ...); +void NONRET exec_command(const char *cmd, ...); +void echo_command(char *buf, int size, const char *cmd, ...); +int run_command_v(const char *cmd, va_list args); +void NONRET exec_command_v(const char *cmd, va_list args); +void echo_command_v(char *buf, int size, const char *cmd, va_list args); + +/* carefulio.c */ + +int careful_read(int fd, void *buf, int len); +int careful_write(int fd, const void *buf, int len); + +/* sync.c */ + +void sync_dir(const char *name); + +/* sighandler.c */ + +typedef int (*sh_sighandler_t)(int); // gets signum, returns nonzero if abort() should be called + +void handle_signal(int signum); +void unhandle_signal(int signum); +sh_sighandler_t set_signal_handler(int signum, sh_sighandler_t new); + +/* string.c */ + +char *str_unesc(char *dest, const char *src); +char *str_format_flags(char *dest, const char *fmt, uns flags); + +/* bigalloc.c */ + +void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap +void *page_alloc_zero(u64 len) LIKE_MALLOC; +void page_free(void *start, u64 len); +void *page_realloc(void *start, u64 old_len, u64 new_len); + +void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available +void *big_alloc_zero(u64 len) LIKE_MALLOC; +void big_free(void *start, u64 len); + +#endif diff --git a/lib/libucw.pc b/lib/libucw.pc new file mode 100644 index 0000000..745f031 --- /dev/null +++ b/lib/libucw.pc @@ -0,0 +1,16 @@ +# pkg-config metadata for libucw + +libdir=@LIBDIR@ +incdir=. + +#ifdef CONFIG_UCW_THREADS +threads=-lpthread +#else +threads= +#endif + +Name: libucw +Description: A library of utility functions and data structures +Version: @SHERLOCK_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} -lucw ${threads} diff --git a/lib/lists.c b/lib/lists.c new file mode 100644 index 0000000..043c364 --- /dev/null +++ b/lib/lists.c @@ -0,0 +1,77 @@ +/* + * UCW Library -- Linked Lists + * + * (c) 1997--1999 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#define _UCW_LISTS_C +#include "lib/lists.h" + +LIST_INLINE void +add_tail(list *l, node *n) +{ + node *z = l->tail; + + n->next = (node *) &l->null; + n->prev = z; + z->next = n; + l->tail = n; +} + +LIST_INLINE void +add_head(list *l, node *n) +{ + node *z = l->head; + + n->next = z; + n->prev = (node *) &l->head; + z->prev = n; + l->head = n; +} + +LIST_INLINE void +insert_node(node *n, node *after) +{ + node *z = after->next; + + n->next = z; + n->prev = after; + after->next = n; + z->prev = n; +} + +LIST_INLINE void +rem_node(node *n) +{ + node *z = n->prev; + node *x = n->next; + + z->next = x; + x->prev = z; +} + +LIST_INLINE void +init_list(list *l) +{ + l->head = (node *) &l->null; + l->null = NULL; + l->tail = (node *) &l->head; +} + +LIST_INLINE void +add_tail_list(list *to, list *l) +{ + node *p = to->tail; + node *q = l->head; + + p->next = q; + q->prev = p; + q = l->tail; + q->next = (node *) &to->null; + to->tail = q; +} diff --git a/lib/lists.h b/lib/lists.h new file mode 100644 index 0000000..3903691 --- /dev/null +++ b/lib/lists.h @@ -0,0 +1,64 @@ +/* + * UCW Library -- Linked Lists + * + * (c) 1997--1999 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_LISTS_H +#define _UCW_LISTS_H + +/* + * I admit the list structure is very tricky and also somewhat awkward, + * but it's both efficient and easy to manipulate once one understands the + * basic trick: The list head always contains two synthetic nodes which are + * always present in the list: the head and the tail. But as the `next' + * entry of the tail and the `prev' entry of the head are both NULL, the + * nodes can overlap each other: + * + * head head_node.next + * null head_node.prev tail_node.next + * tail tail_node.prev + */ + +typedef struct node { + struct node *next, *prev; +} node; + +typedef struct list { /* In fact two overlayed nodes */ + struct node *head, *null, *tail; +} list; + +#define NODE (node *) +#define HEAD(list) ((void *)((list).head)) +#define TAIL(list) ((void *)((list).tail)) +#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \ + n=(void *)((NODE (n))->next)) +#define DO_FOR_ALL(n,list) WALK_LIST(n,list) +#define WALK_LIST_DELSAFE(n,nxt,list) \ + for(n=HEAD(list); nxt=(void *)((NODE (n))->next); n=(void *) nxt) +#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \ + n=(void *)((NODE (n))->prev)) +#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \ + for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv) + +#define EMPTY_LIST(list) (!(list).head->next) + +void add_tail(list *, node *); +void add_head(list *, node *); +void rem_node(node *); +void add_tail_list(list *, list *); +void init_list(list *); +void insert_node(node *, node *); + +#if !defined(_UCW_LISTS_C) && defined(__GNUC__) +#define LIST_INLINE extern inline +#include "lib/lists.c" +#undef LIST_INLINE +#else +#define LIST_INLINE +#endif + +#endif diff --git a/lib/lizard-safe.c b/lib/lizard-safe.c new file mode 100644 index 0000000..e8e8f6f --- /dev/null +++ b/lib/lizard-safe.c @@ -0,0 +1,102 @@ +/* + * LiZaRd -- Fast compression method based on Lempel-Ziv 77 + * + * (c) 2004, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/threads.h" +#include "lib/lizard.h" + +#include +#include +#include +#include +#include + +struct lizard_buffer { + uns len; + void *ptr; +}; + +struct lizard_buffer * +lizard_alloc(void) +{ + struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer)); + buf->len = 0; + buf->ptr = NULL; + handle_signal(SIGSEGV); + return buf; +} + +void +lizard_free(struct lizard_buffer *buf) +{ + unhandle_signal(SIGSEGV); + if (buf->ptr) + munmap(buf->ptr, buf->len + CPU_PAGE_SIZE); + xfree(buf); +} + +static void +lizard_realloc(struct lizard_buffer *buf, uns max_len) + /* max_len needs to be aligned to CPU_PAGE_SIZE */ +{ + if (max_len <= buf->len) + return; + if (max_len < 2*buf->len) // to ensure logarithmic cost + max_len = 2*buf->len; + + if (buf->ptr) + munmap(buf->ptr, buf->len + CPU_PAGE_SIZE); + buf->len = max_len; + buf->ptr = mmap(NULL, buf->len + CPU_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (buf->ptr == MAP_FAILED) + die("mmap(anonymous, %d bytes): %m", (uns)(buf->len + CPU_PAGE_SIZE)); + if (mprotect(buf->ptr + buf->len, CPU_PAGE_SIZE, PROT_NONE) < 0) + die("mprotect: %m"); +} + +static jmp_buf safe_decompress_jump; +static int +sigsegv_handler(int signal UNUSED) +{ + longjmp(safe_decompress_jump, 1); + return 1; +} + +byte * +lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length) + /* Decompresses in into buf, sets *ptr to the data, and returns the + * uncompressed length. If an error has occured, -1 is returned and errno is + * set. The buffer buf is automatically reallocated. SIGSEGV is caught in + * case of buffer-overflow. The function is not re-entrant because of a + * static longjmp handler. */ +{ + uns lock_offset = ALIGN_TO(expected_length + 3, CPU_PAGE_SIZE); // +3 due to the unaligned access + if (lock_offset > buf->len) + lizard_realloc(buf, lock_offset); + volatile sh_sighandler_t old_handler = set_signal_handler(SIGSEGV, sigsegv_handler); + byte *ptr; + if (!setjmp(safe_decompress_jump)) + { + ptr = buf->ptr + buf->len - lock_offset; + int len = lizard_decompress(in, ptr); + if (len != (int) expected_length) + { + ptr = NULL; + errno = EINVAL; + } + } + else + { + msg(L_ERROR, "SIGSEGV caught in lizard_decompress()"); + ptr = NULL; + errno = EFAULT; + } + set_signal_handler(SIGSEGV, old_handler); + return ptr; +} diff --git a/lib/lizard-test.c b/lib/lizard-test.c new file mode 100644 index 0000000..137cdc7 --- /dev/null +++ b/lib/lizard-test.c @@ -0,0 +1,123 @@ +#include "lib/lib.h" +#include "lib/getopt.h" +#include "lib/fastbuf.h" +#include "lib/ff-binary.h" +#include "lib/lizard.h" +#include +#include +#include +#include + +static char *options = CF_SHORT_OPTS "cdtx"; +static char *help = "\ +Usage: lizard-test input-file [output-file]\n\ +\n\ +Options:\n" +CF_USAGE +"-c\t\tCompress\n\ +-d\t\tDecompress\n\ +-t\t\tCompress, decompress, and compare (in memory only, default)\n\ +-x\t\tLet the test crash by shrinking the output buffer\n\ +"; + +static void NONRET +usage(void) +{ + fputs(help, stderr); + exit(1); +} + +int +main(int argc, char **argv) +{ + int opt; + uns action = 't'; + uns crash = 0; + log_init(argv[0]); + while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0) + switch (opt) + { + case 'c': + case 'd': + case 't': + action = opt; + break; + case 'x': + crash++; + break; + default: + usage(); + } + if (action == 't' && argc != optind+1 + || action != 't' && argc != optind+2) + usage(); + + void *mi, *mo; + int li, lo; + uns adler = 0; + + struct stat st; + stat(argv[optind], &st); + li = st.st_size; + struct fastbuf *fi = bopen(argv[optind], O_RDONLY, 1<<16); + if (action != 'd') + { + lo = li * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD; + li += LIZARD_NEEDS_CHARS; + } + else + { + lo = bgetl(fi); + adler = bgetl(fi); + li -= 8; + } + mi = xmalloc(li); + mo = xmalloc(lo); + li = bread(fi, mi, li); + bclose(fi); + + printf("%d ", li); + if (action == 'd') + printf("->expected %d (%08x) ", lo, adler); + fflush(stdout); + if (action != 'd') + lo = lizard_compress(mi, li, mo); + else + { + lo = lizard_decompress(mi, mo); + if (adler32(mo, lo) != adler) + printf("wrong Adler32 "); + } + printf("-> %d ", lo); + fflush(stdout); + + if (action != 't') + { + struct fastbuf *fo = bopen(argv[optind+1], O_CREAT | O_TRUNC | O_WRONLY, 1<<16); + if (action == 'c') + { + bputl(fo, li); + bputl(fo, adler32(mi, li)); + } + bwrite(fo, mo, lo); + bclose(fo); + } + else + { + int smaller_li; + if (li >= (int) CPU_PAGE_SIZE) + smaller_li = li - CPU_PAGE_SIZE; + else + smaller_li = 0; + struct lizard_buffer *buf = lizard_alloc(); + byte *ptr = lizard_decompress_safe(mo, buf, crash ? smaller_li : li); + if (!ptr) + printf("err: %m"); + else if (memcmp(mi, ptr, li)) + printf("WRONG"); + else + printf("OK"); + lizard_free(buf); + } + printf("\n"); +} diff --git a/lib/lizard.c b/lib/lizard.c new file mode 100644 index 0000000..1da67cc --- /dev/null +++ b/lib/lizard.c @@ -0,0 +1,478 @@ +/* + * LiZaRd -- Fast compression method based on Lempel-Ziv 77 + * + * (c) 2004, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + * + * The file format is based on LZO1X and + * the compression method is based on zlib. + */ + +#include "lib/lib.h" +#include "lib/lizard.h" + +#include + +typedef u16 hash_ptr_t; +struct hash_record { + /* the position in the original text is implicit; it is computed by locate_string() */ + hash_ptr_t next; // 0=end + hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf) +}; + +#define HASH_SIZE (1<<14) // size of hash-table +#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1 +#define CHAIN_MAX_TESTS 8 // crop longer collision chains +#define CHAIN_GOOD_MATCH 32 // we already have a good match => end + +static inline uns +hashf(const byte *string) + /* 0..HASH_SIZE-1 */ +{ + return string[0] ^ (string[1]<<3) ^ (string[2]<<6); +} + +static inline byte * +locate_string(const byte *string, int record_id, int head) + /* The strings are recorded into the hash-table regularly, hence there is no + * need to store the pointer there. */ +{ + string += record_id - head; + if (record_id >= head) + string -= HASH_RECORDS-1; + return (byte *)string; +} + +static inline uns +find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head) + /* hash_tab[hash] == record_id points to the head of the double-linked + * link-list of strings with the same hash. The records are statically + * stored in circular array hash_rec (with the 1st entry unused), and the + * pointers are just 16-bit indices. The strings in every collision chain + * are ordered by age. */ +{ + uns count = CHAIN_MAX_TESTS; + uns best_len = 0; + while (record_id && count-- > 0) + { + byte *record_string = locate_string(string, record_id, head); + byte *cmp = record_string; + if (cmp[0] == string[0] && cmp[2] == string[2]) + /* implies cmp[1] == string[1] */ + { + if (cmp[3] == string[3]) + { + cmp += 4; + if (*cmp++ == string[4] && *cmp++ == string[5] + && *cmp++ == string[6] && *cmp++ == string[7]) + { + const byte *str = string + 8; + while (str <= string_end && *cmp++ == *str++); + } + } + else + cmp += 4; + uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */ + if (len > best_len) + { + best_len = len; + *best_ptr = record_string; + if (best_len >= CHAIN_GOOD_MATCH) /* optimization */ + break; + } + } + record_id = hash_rec[record_id].next; + } + return best_len; +} + +static uns +hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete) + /* We reuse hash-records stored in a circular array. First, delete the old + * one and then add the new one in front of the link-list. */ +{ + struct hash_record *rec = hash_rec + head; + if (*to_delete) /* unlink the original record */ + { + uns prev_id = rec->prev & ((1<<15)-1); + if (rec->prev & (1<<15)) /* was a head */ + hash_tab[prev_id] = 0; + else /* thanks to the ordering, this was a tail */ + hash_rec[prev_id].next = 0; + } + rec->next = hash_tab[hash]; + rec->prev = (1<<15) | hash; + hash_rec[rec->next].prev = head; + hash_tab[hash] = head; /* add the new record before the link-list */ + + if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */ + { + head = 1; + *to_delete = 1; + } + return head; +} + +static inline byte * +dump_unary_value(byte *out, uns l) +{ + while (l > 255) + { + l -= 255; + *out++ = 0; + } + *out++ = l; + return out; +} + +static byte * +flush_copy_command(uns bof, byte *out, const byte *start, uns len) +{ + if (bof && len <= 238) + *out++ = len + 17; + else if (len < 4) + { + /* cannot happen when !!bof */ + out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */ +#ifdef CPU_ALLOW_UNALIGNED + * (u32*) out = * (u32*) start; + return out + len; +#else + while (len-- > 0) + *out++ = *start++; + return out; +#endif + } + else + { + /* leave 2 least significant bits of out[-2] set to 0 */ + if (len <= 18) + *out++ = len - 3; + else + { + *out++ = 0; + out = dump_unary_value(out, len - 18); + } + } + memcpy(out, start, len); + return out + len; +} + +int +lizard_compress(const byte *in, uns in_len, byte *out) + /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY + + * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters + * allocated after in. Returns the actual compressed length. */ +{ + hash_ptr_t hash_tab[HASH_SIZE]; + struct hash_record hash_rec[HASH_RECORDS]; + const byte *in_end = in + in_len; + byte *out_start = out; + const byte *copy_start = in; + uns head = 1; /* 0 in unused */ + uns to_delete = 0, bof = 1; + bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */ + while (in < in_end) + { + uns hash = hashf(in); + byte *best = NULL; + uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head); + if (len < 3) +#if 0 // TODO: now, our routine does not detect matches of length 2 + if (len == 2 && (in - best->string - 1) < (1<<10)) + { /* pass-thru */ } + else +#endif + { +literal: + head = hash_string(hash_tab, hash, hash_rec, head, &to_delete); + in++; /* add a literal */ + continue; + } + + if (in + len > in_end) /* crop EOF */ + { + len = in_end - in; + if (len < 3) + goto literal; + } + /* Record the match. */ + uns copy_len = in - copy_start; + uns is_in_copy_mode = bof || copy_len >= 4; + uns shift = in - best - 1; + /* Try to use a 2-byte sequence. */ +#if 0 + if (len == 2) + { + if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */ + goto literal; + else + goto dump_2sequence; + } else +#endif + /* now, len >= 3 */ + if (shift < (1<<11) && len <= 8) + { + shift |= (len-3 + 2)<<11; +dump_2sequence: + if (copy_len) + out = flush_copy_command(bof, out, copy_start, copy_len); + *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */ + *out++ = shift & 0xff; + } + else if (len == 3 && is_in_copy_mode) + { + if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */ + { + shift -= 1<<11; + goto dump_2sequence; /* shift has 11 bits and contains also len */ + } + else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ + goto literal; + } + /* We have to use a 3-byte sequence. */ + else + { + if (copy_len) + out = flush_copy_command(bof, out, copy_start, copy_len); + if (shift < (1<<14)) + { + if (len <= 33) + *out++ = (1<<5) | (len-2); + else + { + *out++ = 1<<5; + out = dump_unary_value(out, len - 33); + } + } + else /* shift < (1<<15)-1 becase of HASH_RECORDS */ + { + shift++; /* because shift==0 is reserved for EOF */ + byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4); + if (len <= 9) + *out++ = pos_bit | (len-2); + else + { + *out++ = pos_bit; + out = dump_unary_value(out, len - 9); + } + } + *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */ + *out++ = shift & 0xff; + } + /* Update the hash-table. */ + head = hash_string(hash_tab, hash, hash_rec, head, &to_delete); + for (uns i=1; i 17) /* short copy command at BOF */ + { + len = *in++ - 17; + goto perform_copy_command; + } + while (1) + { + uns c = *in++; + uns pos; + if (c < 0x10) + if (expect_copy_command == 1) + { + if (!c) + { + in = read_unary_value(in, &len); + len += 18; + } + else + len = c + 3; + goto perform_copy_command; + } + else + { + pos = ((c&0xc)<<6) | *in++; + if (expect_copy_command == 2) + { + pos += 1<<11; + len = 3; + } + else + len = 2; + pos++; + } + else if (c < 0x20) + { + pos = (c&0x8)<<11; + len = c&0x7; + if (!len) + { + in = read_unary_value(in, &len); + len += 9; + } + else + len += 2; + pos |= (*in++ & 0xfc)<<6; + pos |= *in++; + if (!pos) /* EOF */ + break; + /* do NOT pos++ */ + } + else if (c < 0x40) + { + len = c&0x1f; + if (!len) + { + in = read_unary_value(in, &len); + len += 33; + } + else + len += 2; + pos = (*in++ & 0xfc)<<6; + pos |= *in++; + pos++; + } + else /* high bits encode the length */ + { + len = ((c&0xe0)>>5) -2 +3; + pos = (c&0x1c)<<6; + pos |= *in++; + pos++; + } + /* take from the sliding window */ + if (len <= pos) + { + memcpy(out, out-pos, len); + out += len; + } + else + { /* overlapping */ + for (; len-- > 0; out++) + *out = *(out-pos); + /* It's tempting to use out[-pos] above, but unfortunately it's not the same */ + } + /* extract the copy-bits */ + len = in[-2] & 0x3; + if (len) + { + expect_copy_command = 0; +#ifdef CPU_ALLOW_UNALIGNED + * (u32*) out = * (u32*) in; + out += len; + in += len; +#else + while (len-- > 0) + *out++ = *in++; +#endif + } + else + expect_copy_command = 1; + continue; + +perform_copy_command: + expect_copy_command = 2; + memcpy(out, in, len); + out += len; + in += len; + } + + return out - out_start; +} + +/* + +Description of the LZO1X format : +================================= + +The meaning of the commands depends on the current mode. It can be either +the compressed mode or the copy mode. In some cases, the compressed mode +also distinguishes whether we just left the copy mode or not. + +Beginning of file: +------------------ + +Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file), +so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of +the following characters to the output and switch to the compressed mode. +If it is smaller, interpret it as a regular copy mode command. + +Compressed mode: +---------------- + +Read the first byte of the sequence and determine the type of bit encoding by +looking at the most significant bits. The sequence is always at least 2 bytes +long. Decode sequences of these types until the EOF or END marker is read. + + length L = length of the text taken from the sliding window + + If L=0, then count the number Z of the following zero bytes and add Z*255 + to the value of the following non-zero byte. This allows setting L + arbitrarily high. + + position p = relative position of the beginning of the text + + Exception: 00010001 00000000 00000000 means EOF + + copying C = length 1..3 of copied characters or END=0 + + C following characters will be copied from the compressed text to the + output. The number CC is always stored in the 2 least significant bits of + the second last byte of the sequence. + + If END is read, the algorithm switches to the copy mode. + +pattern length position + +0000ppCC pppppppp 2 10 bits [default interpretation] +0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode] +0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF] +001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits +LLLpppCC pppppppp 3..8 11 bits [LLL >= 010] + +Copy mode: +---------- + +Read the first byte and, if the most significant bits are 0000, perform the +following command, otherwise switch to the compressed mode (and evaluate the +command there). + +pattern length position + +0000LLLL L* 4..18 + extend N/A + + Copy L characters from the compressed text to the output. The overhead for + incompressible strings is only roughly 1/256 + epsilon. + +*/ diff --git a/lib/lizard.h b/lib/lizard.h new file mode 100644 index 0000000..616d17b --- /dev/null +++ b/lib/lizard.h @@ -0,0 +1,49 @@ +/* + * LiZaRd -- Fast compression method based on Lempel-Ziv 77 + * + * (c) 2004, Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_LIZARD_H +#define _UCW_LIZARD_H + +#define LIZARD_NEEDS_CHARS 8 + /* The compression routine needs input buffer 8 characters longer, because it + * does not check the input bounds all the time. */ +#define LIZARD_MAX_MULTIPLY 23./22 +#define LIZARD_MAX_ADD 4 + /* In the worst case, the compressed file will not be longer than its + * original length * 23/22 + 4. + * + * The additive constant is for EOF and the header of the file. + * + * The multiplicative constant comes from 19-byte incompressible string + * followed by a 3-sequence that can be compressed into 2-byte link. This + * breaks the copy-mode and it needs to be restarted with a new header. The + * total length is 2(header) + 19(string) + 2(link) = 23. + */ + +/* lizard.c */ +int lizard_compress(const byte *in, uns in_len, byte *out); +int lizard_decompress(const byte *in, byte *out); + +/* lizard-safe.c */ +struct lizard_buffer; + +struct lizard_buffer *lizard_alloc(void); +void lizard_free(struct lizard_buffer *buf); +byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length); + +/* adler32.c */ +uns update_adler32(uns adler, const byte *ptr, uns len); + +static inline uns +adler32(const byte *buf, uns len) +{ + return update_adler32(1, buf, len); +} + +#endif diff --git a/lib/log-file.c b/lib/log-file.c new file mode 100644 index 0000000..401e797 --- /dev/null +++ b/lib/log-file.c @@ -0,0 +1,108 @@ +/* + * UCW Library -- Keeping of Log Files + * + * (c) 1997--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/lfs.h" +#include "lib/threads.h" + +#include +#include +#include +#include +#include + +static char *log_name_patt; +static int log_params; +static int log_filename_size; +volatile int log_switch_nest; + +static int +do_log_switch(struct tm *tm) +{ + int fd, l; + char name[log_filename_size]; + int switched = 0; + + if (!log_name_patt || + log_filename[0] && !log_params) + return 0; + ucwlib_lock(); + log_switch_nest++; + l = strftime(name, log_filename_size, log_name_patt, tm); + if (l < 0 || l >= log_filename_size) + die("Error formatting log file name: %m"); + if (strcmp(name, log_filename)) + { + strcpy(log_filename, name); + fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666); + if (fd < 0) + die("Unable to open log file %s: %m", name); + dup2(fd, 2); + close(fd); + switched = 1; + } + log_switch_nest--; + ucwlib_unlock(); + return switched; +} + +int +log_switch(void) +{ + time_t tim = time(NULL); + return do_log_switch(localtime(&tim)); +} + +static void +internal_log_switch(struct tm *tm) +{ + if (!log_switch_nest) + do_log_switch(tm); +} + +void +log_file(const char *name) +{ + if (name) + { + if (log_name_patt) + xfree(log_name_patt); + if (log_filename) + { + xfree(log_filename); + log_filename = NULL; + } + log_name_patt = xstrdup(name); + log_params = !!strchr(name, '%'); + log_filename_size = strlen(name) + 64; /* 63 is an upper bound on expansion of % escapes */ + log_filename = xmalloc(log_filename_size); + log_filename[0] = 0; + log_switch(); + log_switch_hook = internal_log_switch; + } +} + +void +log_fork(void) +{ + log_pid = getpid(); +} + +#ifdef TEST + +int main(int argc, char **argv) +{ + log_init(argv[0]); + log_file("/proc/self/fd/1"); + for (int i=1; i + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include +#include +#include +#include +#include +#include + +static char log_progname[32]; +char *log_filename; +char *log_title; +int log_pid; +int log_precise_timings; +void (*log_die_hook)(void); +void (*log_switch_hook)(struct tm *tm); + +void +vmsg(unsigned int cat, const char *fmt, va_list args) +{ + struct timeval tv; + struct tm tm; + byte *buf, *p; + int buflen = 256; + int l, l0, r; + va_list args2; + + gettimeofday(&tv, NULL); + if (!localtime_r(&tv.tv_sec, &tm)) + bzero(&tm, sizeof(tm)); + + if (log_switch_hook) + log_switch_hook(&tm); + while (1) + { + p = buf = alloca(buflen); + *p++ = cat; + /* We cannot use strftime() here, because it's not re-entrant */ + p += sprintf(p, " %4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + if (log_precise_timings) + p += sprintf(p, ".%06d", (int)tv.tv_usec); + *p++ = ' '; + if (log_title) + { + if (log_pid) + p += sprintf(p, "[%s (%d)] ", log_title, log_pid); + else + p += sprintf(p, "[%s] ", log_title); + } + else + { + if (log_pid) + p += sprintf(p, "[%d] ", log_pid); + } + l0 = p - buf + 1; + r = buflen - l0; + va_copy(args2, args); + l = vsnprintf(p, r, fmt, args2); + va_end(args2); + if (l < 0) + l = r; + else if (l < r) + { + while (*p) + { + if (*p < 0x20 && *p != '\t') + *p = 0x7f; + p++; + } + *p = '\n'; + write(2, buf, l + l0); + return; + } + buflen = l + l0 + 1; + } +} + +void +msg(unsigned int cat, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vmsg(cat, fmt, args); + va_end(args); +} + +void +die(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vmsg(L_FATAL, fmt, args); + va_end(args); + if (log_die_hook) + log_die_hook(); +#ifdef DEBUG_DIE_BY_ABORT + abort(); +#else + exit(1); +#endif +} + +void +assert_failed(const char *assertion, const char *file, int line) +{ + msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line); + abort(); +} + +void +assert_failed_noinfo(void) +{ + die("Internal error: Assertion failed."); +} + +static const char * +log_basename(const char *n) +{ + const char *p = n; + + while (*n) + if (*n++ == '/') + p = n; + return p; +} + +void +log_init(const char *argv0) +{ + if (argv0) + { + strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1); + log_progname[sizeof(log_progname)-1] = 0; + log_title = log_progname; + } +} diff --git a/lib/mainloop.c b/lib/mainloop.c new file mode 100644 index 0000000..aeb79de --- /dev/null +++ b/lib/mainloop.c @@ -0,0 +1,545 @@ +/* + * UCW Library -- Main Loop + * + * (c) 2004--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/mainloop.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +timestamp_t main_now; +sh_time_t main_now_seconds; +uns main_shutdown; + +clist main_timer_list, main_file_list, main_hook_list, main_process_list; +static uns main_file_cnt; +static uns main_poll_table_obsolete, main_poll_table_size; +static struct pollfd *main_poll_table; +static uns main_sigchld_set_up; + +void +main_get_time(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + main_now_seconds = tv.tv_sec; + main_now = (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000; + // DBG("It's %lld o'clock", (long long) main_now); +} + +void +main_init(void) +{ + DBG("MAIN: Initializing"); + clist_init(&main_timer_list); + clist_init(&main_file_list); + clist_init(&main_hook_list); + clist_init(&main_process_list); + main_file_cnt = 0; + main_poll_table_obsolete = 1; + main_get_time(); +} + +void +timer_add(struct main_timer *tm, timestamp_t expires) +{ + if (expires) + DBG("MAIN: Setting timer %p (expire at now+%lld)", tm, (long long)(expires-main_now)); + else + DBG("MAIN: Clearing timer %p", tm); + if (tm->expires) + clist_remove(&tm->n); + tm->expires = expires; + if (expires) + { + cnode *t = main_timer_list.head.next; + while (t != &main_timer_list.head && ((struct main_timer *) t)->expires < expires) + t = t->next; + clist_insert_before(&tm->n, t); + } +} + +void +timer_del(struct main_timer *tm) +{ + timer_add(tm, 0); +} + +static void +file_timer_expired(struct main_timer *tm) +{ + struct main_file *fi = tm->data; + timer_del(&fi->timer); + if (fi->error_handler) + fi->error_handler(fi, MFERR_TIMEOUT); +} + +void +file_add(struct main_file *fi) +{ + DBG("MAIN: Adding file %p (fd=%d)", fi, fi->fd); + ASSERT(!fi->n.next); + clist_add_tail(&main_file_list, &fi->n); + fi->timer.handler = file_timer_expired; + fi->timer.data = fi; + main_file_cnt++; + main_poll_table_obsolete = 1; + if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0) + msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd); +} + +void +file_chg(struct main_file *fi) +{ + struct pollfd *p = fi->pollfd; + if (p) + { + p->events = 0; + if (fi->read_handler) + p->events |= POLLIN | POLLHUP | POLLERR; + if (fi->write_handler) + p->events |= POLLOUT | POLLERR; + } +} + +void +file_del(struct main_file *fi) +{ + DBG("MAIN: Deleting file %p (fd=%d)", fi, fi->fd); + ASSERT(fi->n.next); + timer_del(&fi->timer); + clist_remove(&fi->n); + main_file_cnt--; + main_poll_table_obsolete = 1; + fi->n.next = fi->n.prev = NULL; +} + +static int +file_read_handler(struct main_file *fi) +{ + while (fi->rpos < fi->rlen) + { + int l = read(fi->fd, fi->rbuf + fi->rpos, fi->rlen - fi->rpos); + DBG("MAIN: FD %d: read %d", fi->fd, l); + if (l < 0) + { + if (errno != EINTR && errno != EAGAIN && fi->error_handler) + fi->error_handler(fi, MFERR_READ); + return 0; + } + else if (!l) + break; + fi->rpos += l; + } + DBG("MAIN: FD %d done read %d of %d", fi->fd, fi->rpos, fi->rlen); + fi->read_handler = NULL; + file_chg(fi); + fi->read_done(fi); + return 1; +} + +static int +file_write_handler(struct main_file *fi) +{ + while (fi->wpos < fi->wlen) + { + int l = write(fi->fd, fi->wbuf + fi->wpos, fi->wlen - fi->wpos); + DBG("MAIN: FD %d: write %d", fi->fd, l); + if (l < 0) + { + if (errno != EINTR && errno != EAGAIN && fi->error_handler) + fi->error_handler(fi, MFERR_WRITE); + return 0; + } + fi->wpos += l; + } + DBG("MAIN: FD %d done write %d", fi->fd, fi->wpos); + fi->write_handler = NULL; + file_chg(fi); + fi->write_done(fi); + return 1; +} + +void +file_read(struct main_file *fi, void *buf, uns len) +{ + ASSERT(fi->n.next); + if (len) + { + fi->read_handler = file_read_handler; + fi->rbuf = buf; + fi->rpos = 0; + fi->rlen = len; + } + else + { + fi->read_handler = NULL; + fi->rbuf = NULL; + fi->rpos = fi->rlen = 0; + } + file_chg(fi); +} + +void +file_write(struct main_file *fi, void *buf, uns len) +{ + ASSERT(fi->n.next); + if (len) + { + fi->write_handler = file_write_handler; + fi->wbuf = buf; + fi->wpos = 0; + fi->wlen = len; + } + else + { + fi->write_handler = NULL; + fi->wbuf = NULL; + fi->wpos = fi->wlen = 0; + } + file_chg(fi); +} + +void +file_set_timeout(struct main_file *fi, timestamp_t expires) +{ + ASSERT(fi->n.next); + timer_add(&fi->timer, expires); +} + +void +file_close_all(void) +{ + CLIST_FOR_EACH(struct main_file *, f, main_file_list) + close(f->fd); +} + +void +hook_add(struct main_hook *ho) +{ + DBG("MAIN: Adding hook %p", ho); + ASSERT(!ho->n.next); + clist_add_tail(&main_hook_list, &ho->n); +} + +void +hook_del(struct main_hook *ho) +{ + DBG("MAIN: Deleting hook %p", ho); + ASSERT(ho->n.next); + clist_remove(&ho->n); + ho->n.next = ho->n.prev = NULL; +} + +static void +main_sigchld_handler(int x UNUSED) +{ + DBG("SIGCHLD received"); +} + +void +process_add(struct main_process *mp) +{ + DBG("MAIN: Adding process %p (pid=%d)", mp, mp->pid); + ASSERT(!mp->n.next); + ASSERT(mp->handler); + clist_add_tail(&main_process_list, &mp->n); + if (!main_sigchld_set_up) + { + struct sigaction sa; + bzero(&sa, sizeof(sa)); + sa.sa_handler = main_sigchld_handler; + sa.sa_flags = SA_NOCLDSTOP | SA_RESTART; + sigaction(SIGCHLD, &sa, NULL); + main_sigchld_set_up = 1; + } +} + +void +process_del(struct main_process *mp) +{ + DBG("MAIN: Deleting process %p (pid=%d)", mp, mp->pid); + ASSERT(mp->n.next); + clist_remove(&mp->n); + mp->n.next = NULL; +} + +int +process_fork(struct main_process *mp) +{ + pid_t pid = fork(); + if (pid < 0) + { + DBG("MAIN: Fork failed"); + mp->status = -1; + format_exit_status(mp->status_msg, -1); + mp->handler(mp); + return 1; + } + else if (!pid) + return 0; + else + { + DBG("MAIN: Forked process %d", (int) pid); + mp->pid = pid; + process_add(mp); + return 1; + } +} + +void +main_debug(void) +{ +#ifdef CONFIG_DEBUG + msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now); + msg(L_DEBUG, "\tActive timers:"); + struct main_timer *tm; + CLIST_WALK(tm, main_timer_list) + msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data); + struct main_file *fi; + msg(L_DEBUG, "\tActive files:"); + CLIST_WALK(fi, main_file_list) + msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)", + fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler, + (long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data); + msg(L_DEBUG, "\tActive hooks:"); + struct main_hook *ho; + CLIST_WALK(ho, main_hook_list) + msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data); + msg(L_DEBUG, "\tActive processes:"); + struct main_process *pr; + CLIST_WALK(pr, main_process_list) + msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data); +#endif +} + +static void +main_rebuild_poll_table(void) +{ + struct main_file *fi; + if (main_poll_table_size < main_file_cnt) + { + if (main_poll_table) + xfree(main_poll_table); + else + main_poll_table_size = 1; + while (main_poll_table_size < main_file_cnt) + main_poll_table_size *= 2; + main_poll_table = xmalloc(sizeof(struct pollfd) * main_poll_table_size); + } + struct pollfd *p = main_poll_table; + DBG("MAIN: Rebuilding poll table: %d of %d entries set", main_file_cnt, main_poll_table_size); + CLIST_WALK(fi, main_file_list) + { + p->fd = fi->fd; + fi->pollfd = p++; + file_chg(fi); + } + main_poll_table_obsolete = 0; +} + +void +main_loop(void) +{ + DBG("MAIN: Entering main_loop"); + ASSERT(main_timer_list.head.next); + + struct main_file *fi; + struct main_hook *ho; + struct main_timer *tm; + struct main_process *pr; + cnode *tmp; + + for (;;) + { + main_get_time(); + timestamp_t wake = main_now + 1000000000; + while ((tm = clist_head(&main_timer_list)) && tm->expires <= main_now) + { + DBG("MAIN: Timer %p expired at now-%lld", tm, (long long)(main_now - tm->expires)); + tm->handler(tm); + } + int hook_min = HOOK_RETRY; + int hook_max = HOOK_SHUTDOWN; + CLIST_WALK_DELSAFE(ho, main_hook_list, tmp) + { + DBG("MAIN: Hook %p", ho); + int ret = ho->handler(ho); + hook_min = MIN(hook_min, ret); + hook_max = MAX(hook_max, ret); + } + if (hook_min == HOOK_SHUTDOWN || + hook_min == HOOK_DONE && hook_max == HOOK_DONE || + main_shutdown) + { + DBG("MAIN: Shut down by %s", main_shutdown ? "main_shutdown" : "a hook"); + return; + } + if (hook_max == HOOK_RETRY) + wake = 0; + if (main_poll_table_obsolete) + main_rebuild_poll_table(); + if (!clist_empty(&main_process_list)) + { + int stat; + pid_t pid; + wake = MIN(wake, main_now + 10000); + while ((pid = waitpid(-1, &stat, WNOHANG)) > 0) + { + DBG("MAIN: Child %d exited with status %x", pid, stat); + CLIST_WALK(pr, main_process_list) + if (pr->pid == pid) + { + pr->status = stat; + process_del(pr); + format_exit_status(pr->status_msg, pr->status); + DBG("MAIN: Calling process exit handler"); + pr->handler(pr); + break; + } + wake = 0; + } + } + /* FIXME: Here is a small race window where SIGCHLD can come unnoticed. */ + if ((tm = clist_head(&main_timer_list)) && tm->expires < wake) + wake = tm->expires; + int timeout = (wake ? wake - main_now : 0); + DBG("MAIN: Poll for %d fds and timeout %d ms", main_file_cnt, timeout); + if (poll(main_poll_table, main_file_cnt, timeout)) + { + struct pollfd *p = main_poll_table; + main_get_time(); + CLIST_WALK(fi, main_file_list) + { + if (p->revents & (POLLIN | POLLHUP | POLLERR)) + { + do + DBG("MAIN: Read event on fd %d", p->fd); + while (fi->read_handler && fi->read_handler(fi) && !main_poll_table_obsolete); + if (main_poll_table_obsolete) /* File entries have been inserted or deleted => better not risk continuing to nowhere */ + break; + } + if (p->revents & (POLLOUT | POLLERR)) + { + do + DBG("MAIN: Write event on fd %d", p->fd); + while (fi->write_handler && fi->write_handler(fi) && !main_poll_table_obsolete); + if (main_poll_table_obsolete) + break; + } + p++; + } + } + } +} + +#ifdef TEST + +static struct main_process mp; +static struct main_file fin, fout; +static struct main_hook hook; +static struct main_timer tm; + +static byte rb[16]; + +static void dread(struct main_file *fi) +{ + if (fi->rpos < fi->rlen) + { + log(L_INFO, "Read EOF"); + file_del(fi); + } + else + { + log(L_INFO, "Read done"); + file_read(fi, rb, sizeof(rb)); + } +} + +static void derror(struct main_file *fi, int cause) +{ + log(L_INFO, "Error: %m !!! (cause %d)", cause); + file_del(fi); +} + +static void dwrite(struct main_file *fi UNUSED) +{ + log(L_INFO, "Write done"); +} + +static int dhook(struct main_hook *ho UNUSED) +{ + log(L_INFO, "Hook called"); + return 0; +} + +static void dtimer(struct main_timer *tm) +{ + log(L_INFO, "Timer tick"); + timer_add(tm, main_now + 10000); +} + +static void dentry(void) +{ + log(L_INFO, "*** SUBPROCESS START ***"); + sleep(2); + log(L_INFO, "*** SUBPROCESS FINISH ***"); + exit(0); +} + +static void dexit(struct main_process *pr) +{ + log(L_INFO, "Subprocess %d exited with status %x", pr->pid, pr->status); +} + +int +main(void) +{ + log_init(NULL); + main_init(); + + fin.fd = 0; + fin.read_done = dread; + fin.error_handler = derror; + file_add(&fin); + file_read(&fin, rb, sizeof(rb)); + + fout.fd = 1; + fout.write_done = dwrite; + fout.error_handler = derror; + file_add(&fout); + file_write(&fout, "Hello, world!\n", 14); + + hook.handler = dhook; + hook_add(&hook); + + tm.handler = dtimer; + timer_add(&tm, main_now + 1000); + + mp.handler = dexit; + if (!process_fork(&mp)) + dentry(); + + main_debug(); + + main_loop(); + log(L_INFO, "Finished."); +} + +#endif diff --git a/lib/mainloop.h b/lib/mainloop.h new file mode 100644 index 0000000..5c3baf6 --- /dev/null +++ b/lib/mainloop.h @@ -0,0 +1,108 @@ +/* + * UCW Library -- Main Loop + * + * (c) 2004--2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_MAINLOOP_H +#define _UCW_MAINLOOP_H + +#include "lib/clists.h" + +extern timestamp_t main_now; /* Current time in milliseconds since UNIX epoch */ +extern sh_time_t main_now_seconds; /* Current time in seconds since the epoch */ +extern uns main_shutdown; +extern clist main_timer_list, main_file_list, main_hook_list, main_process_list; + +/* User-defined fields are marked with [*], all other fields must be initialized to zero. */ + +/* Timers */ + +struct main_timer { + cnode n; + timestamp_t expires; + void (*handler)(struct main_timer *tm); /* [*] Function to be called when the timer expires. Must re-add/del the timer.*/ + void *data; /* [*] Data for use by the handler */ +}; + +void timer_add(struct main_timer *tm, timestamp_t expires); /* Can modify a running timer, too */ +void timer_del(struct main_timer *tm); + +void main_get_time(void); /* Refresh main_now */ + +/* Files to poll */ + +struct main_file { + cnode n; + int fd; /* [*] File descriptor */ + int (*read_handler)(struct main_file *fi); /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */ + int (*write_handler)(struct main_file *fi); + void (*error_handler)(struct main_file *fi, int cause); /* [*] Handler to call on errors */ + void *data; /* [*] Data for use by the handlers */ + byte *rbuf; /* Read/write pointers for use by file_read/write */ + uns rpos, rlen; + byte *wbuf; + uns wpos, wlen; + void (*read_done)(struct main_file *fi); /* [*] Called when file_read is finished; rpos < rlen if EOF */ + void (*write_done)(struct main_file *fi); /* [*] Called when file_write is finished */ + struct main_timer timer; + struct pollfd *pollfd; +}; + +enum main_file_err_cause { + MFERR_READ, + MFERR_WRITE, + MFERR_TIMEOUT +}; + +void file_add(struct main_file *fi); +void file_chg(struct main_file *fi); +void file_del(struct main_file *fi); +void file_read(struct main_file *fi, void *buf, uns len); +void file_write(struct main_file *fi, void *buf, uns len); +void file_set_timeout(struct main_file *fi, timestamp_t expires); +void file_close_all(void); /* Close all known main_file's; frequently used before fork() */ + +/* Hooks to be called in each iteration of the main loop */ + +struct main_hook { + cnode n; + int (*handler)(struct main_hook *ho); /* [*] Hook function; returns HOOK_xxx */ + void *data; /* [*] For use by the handler */ +}; + +enum main_hook_return { + HOOK_IDLE, /* Call again when the main loop becomes idle again */ + HOOK_RETRY, /* Call again as soon as possible */ + HOOK_DONE = -1, /* Shut down the main loop if all hooks return this value */ + HOOK_SHUTDOWN = -2 /* Shut down the main loop immediately */ +}; + +void hook_add(struct main_hook *ho); +void hook_del(struct main_hook *ho); + +/* Processes to watch */ + +struct main_process { + cnode n; + int pid; /* Process id (0=not running) */ + int status; /* Exit status (-1=fork failed) */ + char status_msg[EXIT_STATUS_MSG_SIZE]; + void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */ + void *data; /* [*] For use by the handler */ +}; + +void process_add(struct main_process *mp); +void process_del(struct main_process *mp); +int process_fork(struct main_process *mp); + +/* The main loop */ + +void main_init(void); +void main_loop(void); +void main_debug(void); + +#endif diff --git a/lib/md5.c b/lib/md5.c new file mode 100644 index 0000000..ba8ff56 --- /dev/null +++ b/lib/md5.c @@ -0,0 +1,249 @@ +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +#include "lib/lib.h" +#include "lib/md5.h" + +#include /* for memcpy() */ + +#ifdef CPU_LITTLE_ENDIAN +#define byteReverse(buf, len) /* Nothing */ +#else +void byteReverse(unsigned char *buf, unsigned longs); + +/* + * Note: this code is harmless on little-endian machines. + */ +void byteReverse(unsigned char *buf, unsigned longs) +{ + uint32 t; + do { + t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | + ((unsigned) buf[1] << 8 | buf[0]); + *(uint32 *) buf = t; + buf += 4; + } while (--longs); +} +#endif + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +void MD5Init(struct MD5Context *ctx) +{ + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) +{ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32) len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if (t) { + unsigned char *p = (unsigned char *) ctx->in + t; + + t = 64 - t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + buf += t; + len -= t; + } + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void MD5Final(unsigned char digest[16], struct MD5Context *ctx) +{ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count - 8); + } + byteReverse(ctx->in, 14); + + /* Append length in bits and transform */ + ((uint32 *) ctx->in)[14] = ctx->bits[0]; + ((uint32 *) ctx->in)[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (uint32 *) ctx->in); + byteReverse((unsigned char *) ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset((char *) ctx, 0, sizeof(ctx)); /* In case it's sensitive */ +} + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +void MD5Transform(uint32 buf[4], uint32 const in[16]) +{ + uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} diff --git a/lib/md5.h b/lib/md5.h new file mode 100644 index 0000000..be51a52 --- /dev/null +++ b/lib/md5.h @@ -0,0 +1,24 @@ +/* + * UCW Library -- MD5 Message Digest + * + * This file is in public domain (see lib/md5.c). + */ + +#ifndef _UCW_MD5_H +#define _UCW_MD5_H + +typedef u32 uint32; + +struct MD5Context { + uint32 buf[4]; + uint32 bits[2]; + unsigned char in[64]; +}; + +void MD5Init(struct MD5Context *context); +void MD5Update(struct MD5Context *context, unsigned char const *buf, + unsigned len); +void MD5Final(unsigned char digest[16], struct MD5Context *context); +void MD5Transform(uint32 buf[4], uint32 const in[16]); + +#endif /* !_UCW_MD5_H */ diff --git a/lib/md5hex.c b/lib/md5hex.c new file mode 100644 index 0000000..93987b0 --- /dev/null +++ b/lib/md5hex.c @@ -0,0 +1,35 @@ +/* + * UCW Library -- MD5 Binary <-> Hex Conversions + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/chartype.h" + +#include + +void +md5_to_hex(const byte *s, char *d) +{ + int i; + for(i=0; i + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/mempool.h" + +#include +#include +#include + +static char * +mp_vprintf_at(struct mempool *mp, uns ofs, const char *fmt, va_list args) +{ + char *ret = mp_grow(mp, ofs + 1) + ofs; + va_list args2; + va_copy(args2, args); + int cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2); + va_end(args2); + if (cnt < 0) + { + /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */ + do + { + ret = mp_expand(mp) + ofs; + va_copy(args2, args); + cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2); + va_end(args2); + } + while (cnt < 0); + } + else if ((uns)cnt >= mp_avail(mp) - ofs) + { + ret = mp_grow(mp, cnt + 1) + ofs; + va_copy(args2, args); + int cnt2 = vsnprintf(ret, cnt + 1, fmt, args2); + va_end(args2); + ASSERT(cnt2 == cnt); + } + mp_end(mp, ret + cnt + 1); + return ret - ofs; +} + +char * +mp_vprintf(struct mempool *mp, const char *fmt, va_list args) +{ + mp_start(mp, 1); + return mp_vprintf_at(mp, 0, fmt, args); +} + +char * +mp_printf(struct mempool *p, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = mp_vprintf(p, fmt, args); + va_end(args); + return res; +} + +char * +mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args) +{ + uns ofs = mp_open(mp, ptr); + ASSERT(ofs); + return mp_vprintf_at(mp, ofs - 1, fmt, args); +} + +char * +mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = mp_vprintf_append(mp, ptr, fmt, args); + va_end(args); + return res; +} + +#ifdef TEST + +int main(void) +{ + struct mempool *mp = mp_new(64); + char *x = mp_printf(mp, "", "World"); + fputs(x, stdout); + x = mp_printf_append(mp, x, ""); + fputs(x, stdout); + x = mp_printf(mp, "\n", "World"); + fputs(x, stdout); + return 0; +} + +#endif diff --git a/lib/mempool-str.c b/lib/mempool-str.c new file mode 100644 index 0000000..176ff3e --- /dev/null +++ b/lib/mempool-str.c @@ -0,0 +1,102 @@ +/* + * UCW Library -- Memory Pools (String Operations) + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/mempool.h" + +#include +#include + +char * +mp_strdup(struct mempool *p, const char *s) +{ + uns l = strlen(s) + 1; + char *t = mp_alloc_fast_noalign(p, l); + memcpy(t, s, l); + return t; +} + +void * +mp_memdup(struct mempool *p, const void *s, uns len) +{ + void *t = mp_alloc_fast(p, len); + memcpy(t, s, len); + return t; +} + +char * +mp_multicat(struct mempool *p, ...) +{ + va_list args, a; + va_start(args, p); + char *x, *y; + uns cnt = 0; + va_copy(a, args); + while (x = va_arg(a, char *)) + cnt++; + uns *sizes = alloca(cnt * sizeof(uns)); + uns len = 1; + cnt = 0; + va_end(a); + va_copy(a, args); + while (x = va_arg(a, char *)) + len += sizes[cnt++] = strlen(x); + char *buf = mp_alloc_fast_noalign(p, len); + y = buf; + va_end(a); + cnt = 0; + while (x = va_arg(args, char *)) + { + memcpy(y, x, sizes[cnt]); + y += sizes[cnt++]; + } + *y = 0; + va_end(args); + return buf; +} + +char * +mp_strjoin(struct mempool *p, char **a, uns n, uns sep) +{ + uns sizes[n]; + uns len = 1; + for (uns i=0; i + +int main(void) +{ + struct mempool *p = mp_new(64); + char *s = mp_strdup(p, "12345"); + char *c = mp_multicat(p, "<<", s, ">>", NULL); + puts(c); + char *a[] = { "bugs", "gnats", "insects" }; + puts(mp_strjoin(p, a, 3, '.')); + puts(mp_strjoin(p, a, 3, 0)); + return 0; +} + +#endif diff --git a/lib/mempool.c b/lib/mempool.c new file mode 100644 index 0000000..658f538 --- /dev/null +++ b/lib/mempool.c @@ -0,0 +1,491 @@ +/* + * UCW Library -- Memory Pools (One-Time Allocation) + * + * (c) 1997--2001 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/mempool.h" + +#include + +#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN) +#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE) + +struct mempool_chunk { + struct mempool_chunk *next; + uns size; +}; + +static uns +mp_align_size(uns size) +{ +#ifdef POOL_IS_MMAP + return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL; +#else + return ALIGN_TO(size, CPU_STRUCT_ALIGN); +#endif +} + +void +mp_init(struct mempool *pool, uns chunk_size) +{ + chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size)); + *pool = (struct mempool) { + .chunk_size = chunk_size, + .threshold = chunk_size >> 1, + .last_big = &pool->last_big }; +} + +static void * +mp_new_big_chunk(uns size) +{ + struct mempool_chunk *chunk; + chunk = xmalloc(size + MP_CHUNK_TAIL) + size; + chunk->size = size; + return chunk; +} + +static void +mp_free_big_chunk(struct mempool_chunk *chunk) +{ + xfree((void *)chunk - chunk->size); +} + +static void * +mp_new_chunk(uns size) +{ +#ifdef POOL_IS_MMAP + struct mempool_chunk *chunk; + chunk = page_alloc(size + MP_CHUNK_TAIL) + size; + chunk->size = size; + return chunk; +#else + return mp_new_big_chunk(size); +#endif +} + +static void +mp_free_chunk(struct mempool_chunk *chunk) +{ +#ifdef POOL_IS_MMAP + page_free((void *)chunk - chunk->size, chunk->size + MP_CHUNK_TAIL); +#else + mp_free_big_chunk(chunk); +#endif +} + +struct mempool * +mp_new(uns chunk_size) +{ + chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size)); + struct mempool_chunk *chunk = mp_new_chunk(chunk_size); + struct mempool *pool = (void *)chunk - chunk_size; + DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size); + chunk->next = NULL; + *pool = (struct mempool) { + .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } }, + .chunk_size = chunk_size, + .threshold = chunk_size >> 1, + .last_big = &pool->last_big }; + return pool; +} + +static void +mp_free_chain(struct mempool_chunk *chunk) +{ + while (chunk) + { + struct mempool_chunk *next = chunk->next; + mp_free_chunk(chunk); + chunk = next; + } +} + +static void +mp_free_big_chain(struct mempool_chunk *chunk) +{ + while (chunk) + { + struct mempool_chunk *next = chunk->next; + mp_free_big_chunk(chunk); + chunk = next; + } +} + +void +mp_delete(struct mempool *pool) +{ + DBG("Deleting mempool %p", pool); + mp_free_big_chain(pool->state.last[1]); + mp_free_chain(pool->unused); + mp_free_chain(pool->state.last[0]); // can contain the mempool structure +} + +void +mp_flush(struct mempool *pool) +{ + mp_free_big_chain(pool->state.last[1]); + struct mempool_chunk *chunk, *next; + for (chunk = pool->state.last[0]; chunk && (void *)chunk - chunk->size != pool; chunk = next) + { + next = chunk->next; + chunk->next = pool->unused; + pool->unused = chunk; + } + pool->state.last[0] = chunk; + pool->state.free[0] = chunk ? chunk->size - sizeof(*pool) : 0; + pool->state.last[1] = NULL; + pool->state.free[1] = 0; + pool->state.next = NULL; + pool->last_big = &pool->last_big; +} + +static void +mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, uns idx) +{ + while (chunk) + { + stats->chain_size[idx] += chunk->size + sizeof(*chunk); + stats->chain_count[idx]++; + chunk = chunk->next; + } + stats->total_size += stats->chain_size[idx]; +} + +void +mp_stats(struct mempool *pool, struct mempool_stats *stats) +{ + bzero(stats, sizeof(*stats)); + mp_stats_chain(pool->state.last[0], stats, 0); + mp_stats_chain(pool->state.last[1], stats, 1); + mp_stats_chain(pool->unused, stats, 2); +} + +void * +mp_alloc_internal(struct mempool *pool, uns size) +{ + struct mempool_chunk *chunk; + if (size <= pool->threshold) + { + pool->idx = 0; + if (pool->unused) + { + chunk = pool->unused; + pool->unused = chunk->next; + } + else + chunk = mp_new_chunk(pool->chunk_size); + chunk->next = pool->state.last[0]; + pool->state.last[0] = chunk; + pool->state.free[0] = pool->chunk_size - size; + return (void *)chunk - pool->chunk_size; + } + else if (likely(size <= MP_SIZE_MAX)) + { + pool->idx = 1; + uns aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN); + chunk = mp_new_big_chunk(aligned); + chunk->next = pool->state.last[1]; + pool->state.last[1] = chunk; + pool->state.free[1] = aligned - size; + return pool->last_big = (void *)chunk - aligned; + } + else + die("Cannot allocate %u bytes from a mempool", size); +} + +void * +mp_alloc(struct mempool *pool, uns size) +{ + return mp_alloc_fast(pool, size); +} + +void * +mp_alloc_noalign(struct mempool *pool, uns size) +{ + return mp_alloc_fast_noalign(pool, size); +} + +void * +mp_alloc_zero(struct mempool *pool, uns size) +{ + void *ptr = mp_alloc_fast(pool, size); + bzero(ptr, size); + return ptr; +} + +void * +mp_start_internal(struct mempool *pool, uns size) +{ + void *ptr = mp_alloc_internal(pool, size); + pool->state.free[pool->idx] += size; + return ptr; +} + +void * +mp_start(struct mempool *pool, uns size) +{ + return mp_start_fast(pool, size); +} + +void * +mp_start_noalign(struct mempool *pool, uns size) +{ + return mp_start_fast_noalign(pool, size); +} + +void * +mp_grow_internal(struct mempool *pool, uns size) +{ + if (unlikely(size > MP_SIZE_MAX)) + die("Cannot allocate %u bytes of memory", size); + uns avail = mp_avail(pool); + void *ptr = mp_ptr(pool); + if (pool->idx) + { + uns amortized = likely(avail <= MP_SIZE_MAX / 2) ? avail * 2 : MP_SIZE_MAX; + amortized = MAX(amortized, size); + amortized = ALIGN_TO(amortized, CPU_STRUCT_ALIGN); + struct mempool_chunk *chunk = pool->state.last[1], *next = chunk->next; + ptr = xrealloc(ptr, amortized + MP_CHUNK_TAIL); + chunk = ptr + amortized; + chunk->next = next; + chunk->size = amortized; + pool->state.last[1] = chunk; + pool->state.free[1] = amortized; + pool->last_big = ptr; + return ptr; + } + else + { + void *p = mp_start_internal(pool, size); + memcpy(p, ptr, avail); + return p; + } +} + +uns +mp_open(struct mempool *pool, void *ptr) +{ + return mp_open_fast(pool, ptr); +} + +void * +mp_realloc(struct mempool *pool, void *ptr, uns size) +{ + return mp_realloc_fast(pool, ptr, size); +} + +void * +mp_realloc_zero(struct mempool *pool, void *ptr, uns size) +{ + uns old_size = mp_open_fast(pool, ptr); + ptr = mp_grow(pool, size); + if (size > old_size) + bzero(ptr + old_size, size - old_size); + mp_end(pool, ptr + size); + return ptr; +} + +void * +mp_spread_internal(struct mempool *pool, void *p, uns size) +{ + void *old = mp_ptr(pool); + void *new = mp_grow_internal(pool, p-old+size); + return p-old+new; +} + +void +mp_restore(struct mempool *pool, struct mempool_state *state) +{ + struct mempool_chunk *chunk, *next; + struct mempool_state s = *state; + for (chunk = pool->state.last[0]; chunk != s.last[0]; chunk = next) + { + next = chunk->next; + chunk->next = pool->unused; + pool->unused = chunk; + } + for (chunk = pool->state.last[1]; chunk != s.last[1]; chunk = next) + { + next = chunk->next; + mp_free_big_chunk(chunk); + } + pool->state = s; + pool->last_big = &pool->last_big; +} + +struct mempool_state * +mp_push(struct mempool *pool) +{ + struct mempool_state state = pool->state; + struct mempool_state *p = mp_alloc_fast(pool, sizeof(*p)); + *p = state; + pool->state.next = p; + return p; +} + +void +mp_pop(struct mempool *pool) +{ + ASSERT(pool->state.next); + struct mempool_state state = pool->state; + mp_restore(pool, &state); +} + +#ifdef TEST + +#include "lib/getopt.h" +#include +#include +#include + +static void +fill(byte *ptr, uns len, uns magic) +{ + while (len--) + *ptr++ = (magic++ & 255); +} + +static void +check(byte *ptr, uns len, uns magic, uns align) +{ + ASSERT(!((uintptr_t)ptr & (align - 1))); + while (len--) + if (*ptr++ != (magic++ & 255)) + ASSERT(0); +} + +int main(int argc, char **argv) +{ + srand(time(NULL)); + log_init(argv[0]); + cf_def_file = NULL; + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || argc != optind) + die("Invalid usage"); + + uns max = 1000, n = 0, m = 0, can_realloc = 0; + void *ptr[max]; + struct mempool_state *state[max]; + uns len[max], num[max], align[max]; + struct mempool *mp = mp_new(128), mp_static; + + for (uns i = 0; i < 5000; i++) + { + for (uns j = 0; j < n; j++) + check(ptr[j], len[j], j, align[j]); +#if 0 + DBG("free_small=%u free_big=%u idx=%u chunk_size=%u last_big=%p", mp->state.free[0], mp->state.free[1], mp->idx, mp->chunk_size, mp->last_big); + for (struct mempool_chunk *ch = mp->state.last[0]; ch; ch = ch->next) + DBG("small %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size); + for (struct mempool_chunk *ch = mp->state.last[1]; ch; ch = ch->next) + DBG("big %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size); +#endif + int r = random_max(100); + if ((r -= 1) < 0) + { + DBG("flush"); + mp_flush(mp); + n = m = 0; + } + else if ((r -= 1) < 0) + { + DBG("delete & new"); + mp_delete(mp); + if (random_max(2)) + mp = mp_new(random_max(0x1000) + 1); + else + mp = &mp_static, mp_init(mp, random_max(512) + 1); + n = m = 0; + } + else if (n < max && (r -= 30) < 0) + { + len[n] = random_max(0x2000); + DBG("alloc(%u)", len[n]); + align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1; + ptr[n] = (align[n] == 1) ? mp_alloc_fast_noalign(mp, len[n]) : mp_alloc_fast(mp, len[n]); + DBG(" -> (%p)", ptr[n]); + fill(ptr[n], len[n], n); + n++; + can_realloc = 1; + } + else if (n < max && (r -= 20) < 0) + { + len[n] = random_max(0x2000); + DBG("start(%u)", len[n]); + align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1; + ptr[n] = (align[n] == 1) ? mp_start_fast_noalign(mp, len[n]) : mp_start_fast(mp, len[n]); + DBG(" -> (%p)", ptr[n]); + fill(ptr[n], len[n], n); + n++; + can_realloc = 1; + goto grow; + } + else if (can_realloc && n && (r -= 10) < 0) + { + if (mp_open(mp, ptr[n - 1]) != len[n - 1]) + ASSERT(0); +grow: + { + uns k = n - 1; + for (uns i = random_max(4); i--; ) + { + uns l = len[k]; + len[k] = random_max(0x2000); + DBG("grow(%u)", len[k]); + ptr[k] = mp_grow(mp, len[k]); + DBG(" -> (%p)", ptr[k]); + check(ptr[k], MIN(l, len[k]), k, align[k]); + fill(ptr[k], len[k], k); + } + mp_end(mp, ptr[k] + len[k]); + } + } + else if (can_realloc && n && (r -= 20) < 0) + { + uns i = n - 1, l = len[i]; + DBG("realloc(%p, %u)", ptr[i], len[i]); + ptr[i] = mp_realloc(mp, ptr[i], len[i] = random_max(0x2000)); + DBG(" -> (%p, %u)", ptr[i], len[i]); + check(ptr[i], MIN(len[i], l), i, align[i]); + fill(ptr[i], len[i], i); + } + else if (m < max && (r -= 5) < 0) + { + DBG("push(%u)", m); + num[m] = n; + state[m++] = mp_push(mp); + can_realloc = 0; + } + else if (m && (r -= 2) < 0) + { + m--; + DBG("pop(%u)", m); + mp_pop(mp); + n = num[m]; + can_realloc = 0; + } + else if (m && (r -= 1) < 0) + { + uns i = random_max(m); + DBG("restore(%u)", i); + mp_restore(mp, state[i]); + n = num[m = i]; + can_realloc = 0; + } + else if (can_realloc && n && (r -= 5) < 0) + ASSERT(mp_size(mp, ptr[n - 1]) == len[n - 1]); + } + + mp_delete(mp); + return 0; +} + +#endif diff --git a/lib/mempool.h b/lib/mempool.h new file mode 100644 index 0000000..c53423a --- /dev/null +++ b/lib/mempool.h @@ -0,0 +1,295 @@ +/* + * UCW Library -- Memory Pools + * + * (c) 1997--2005 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_POOLS_H +#define _UCW_POOLS_H + +/* Memory pool state (see mp_push(), ...) */ +struct mempool_state { + uns free[2]; + void *last[2]; + struct mempool_state *next; +}; + +/* Memory pool */ +struct mempool { + struct mempool_state state; + void *unused, *last_big; + uns chunk_size, threshold, idx; +}; + +/* Statistics (see mp_stats()) */ +struct mempool_stats { + uns total_size; /* Real allocated size in bytes */ + uns chain_count[3]; /* Number of allocated chunks in small/big/unused chains */ + uns chain_size[3]; /* Size of allocated chunks in small/big/unused chains */ +}; + +/* Initialize a given mempool structure. Chunk size must be in the interval [1, UINT_MAX / 2] */ +void mp_init(struct mempool *pool, uns chunk_size); + +/* Allocate and initialize a new memory pool. See mp_init for chunk size limitations. */ +struct mempool *mp_new(uns chunk_size); + +/* Cleanup mempool initialized by mp_init or mp_new */ +void mp_delete(struct mempool *pool); + +/* Free all data on a memory pool (saves some empty chunks for later allocations) */ +void mp_flush(struct mempool *pool); + +/* Compute some statistics for debug purposes. See the definition of the mempool_stats structure. */ +void mp_stats(struct mempool *pool, struct mempool_stats *stats); + + +/*** Allocation routines ***/ + +/* For internal use only, do not call directly */ +void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC; + +/* The function allocates new bytes on a given memory pool. + * If the is zero, the resulting pointer is undefined, + * but it may be safely reallocated or used as the parameter + * to other functions below. + * + * The resulting pointer is always aligned to a multiple of + * CPU_STRUCT_ALIGN bytes and this condition remains true also + * after future reallocations. + */ +void *mp_alloc(struct mempool *pool, uns size); + +/* The same as mp_alloc, but the result may not be aligned */ +void *mp_alloc_noalign(struct mempool *pool, uns size); + +/* The same as mp_alloc, but fills the newly allocated data with zeroes */ +void *mp_alloc_zero(struct mempool *pool, uns size); + +/* Inlined version of mp_alloc() */ +static inline void * +mp_alloc_fast(struct mempool *pool, uns size) +{ + uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1); + if (size <= avail) + { + pool->state.free[0] = avail - size; + return pool->state.last[0] - avail; + } + else + return mp_alloc_internal(pool, size); +} + +/* Inlined version of mp_alloc_noalign() */ +static inline void * +mp_alloc_fast_noalign(struct mempool *pool, uns size) +{ + if (size <= pool->state.free[0]) + { + void *ptr = pool->state.last[0] - pool->state.free[0]; + pool->state.free[0] -= size; + return ptr; + } + else + return mp_alloc_internal(pool, size); +} + + +/*** Usage as a growing buffer ***/ + +/* For internal use only, do not call directly */ +void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC; +void *mp_grow_internal(struct mempool *pool, uns size); +void *mp_spread_internal(struct mempool *pool, void *p, uns size); + +static inline uns +mp_idx(struct mempool *pool, void *ptr) +{ + return ptr == pool->last_big; +} + +/* Open a new growing buffer (at least bytes long). + * If the is zero, the resulting pointer is undefined, + * but it may be safely reallocated or used as the parameter + * to other functions below. + * + * The resulting pointer is always aligned to a multiple of + * CPU_STRUCT_ALIGN bytes and this condition remains true also + * after future reallocations. There is an unaligned version as well. + * + * Keep in mind that you can't make any other allocations + * before you "close" the growing buffer with mp_end(). + */ +void *mp_start(struct mempool *pool, uns size); +void *mp_start_noalign(struct mempool *pool, uns size); + +/* Inlined version of mp_start() */ +static inline void * +mp_start_fast(struct mempool *pool, uns size) +{ + uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1); + if (size <= avail) + { + pool->idx = 0; + pool->state.free[0] = avail; + return pool->state.last[0] - avail; + } + else + return mp_start_internal(pool, size); +} + +/* Inlined version of mp_start_noalign() */ +static inline void * +mp_start_fast_noalign(struct mempool *pool, uns size) +{ + if (size <= pool->state.free[0]) + { + pool->idx = 0; + return pool->state.last[0] - pool->state.free[0]; + } + else + return mp_start_internal(pool, size); +} + +/* Return start pointer of the growing buffer allocated by mp_start() or a similar function */ +static inline void * +mp_ptr(struct mempool *pool) +{ + return pool->state.last[pool->idx] - pool->state.free[pool->idx]; +} + +/* Return the number of bytes available for extending the growing buffer */ +static inline uns +mp_avail(struct mempool *pool) +{ + return pool->state.free[pool->idx]; +} + +/* Grow the buffer allocated by mp_start() to be at least bytes long + * ( may be less than mp_avail(), even zero). Reallocated buffer may + * change its starting position. The content will be unchanged to the minimum + * of the old and new sizes; newly allocated memory will be uninitialized. + * Multiple calls to mp_grow have amortized linear cost wrt. the maximum value of . */ +static inline void * +mp_grow(struct mempool *pool, uns size) +{ + return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size); +} + +/* Grow the buffer by at least one byte -- equivalent to mp_grow(pool, mp_avail(pool) + 1) */ +static inline void * +mp_expand(struct mempool *pool) +{ + return mp_grow_internal(pool, mp_avail(pool) + 1); +} + +/* Ensure that there is at least bytes free after

, if not, reallocate and adjust

. */ +static inline void * +mp_spread(struct mempool *pool, void *p, uns size) +{ + return (((uns)(pool->state.last[pool->idx] - p) >= size) ? p : mp_spread_internal(pool, p, size)); +} + +/* Close the growing buffer. The must point just behind the data, you want to keep + * allocated (so it can be in the interval [mp_ptr(pool), mp_ptr(pool) + mp_avail(pool)]). + * Returns a pointer to the beginning of the just closed block. */ +static inline void * +mp_end(struct mempool *pool, void *end) +{ + void *p = mp_ptr(pool); + pool->state.free[pool->idx] = pool->state.last[pool->idx] - end; + return p; +} + +/* Return size in bytes of the last allocated memory block (with mp_alloc*() or mp_end()). */ +static inline uns +mp_size(struct mempool *pool, void *ptr) +{ + uns idx = mp_idx(pool, ptr); + return pool->state.last[idx] - ptr - pool->state.free[idx]; +} + +/* Open the last memory block (allocated with mp_alloc*() or mp_end()) + * for growing and return its size in bytes. The contents and the start pointer + * remain unchanged. Do not forget to call mp_end() to close it. */ +uns mp_open(struct mempool *pool, void *ptr); + +/* Inlined version of mp_open() */ +static inline uns +mp_open_fast(struct mempool *pool, void *ptr) +{ + pool->idx = mp_idx(pool, ptr); + uns size = pool->state.last[pool->idx] - ptr - pool->state.free[pool->idx]; + pool->state.free[pool->idx] += size; + return size; +} + +/* Reallocate the last memory block (allocated with mp_alloc*() or mp_end()) + * to the new . Behavior is similar to mp_grow(), but the resulting + * block is closed. */ +void *mp_realloc(struct mempool *pool, void *ptr, uns size); + +/* The same as mp_realloc(), but fills the additional bytes (if any) with zeroes */ +void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size); + +/* Inlined version of mp_realloc() */ +static inline void * +mp_realloc_fast(struct mempool *pool, void *ptr, uns size) +{ + mp_open_fast(pool, ptr); + ptr = mp_grow(pool, size); + mp_end(pool, ptr + size); + return ptr; +} + + +/*** Usage as a stack ***/ + +/* Save the current state of a memory pool. + * Do not call this function with an opened growing buffer. */ +static inline void +mp_save(struct mempool *pool, struct mempool_state *state) +{ + *state = pool->state; + pool->state.next = state; +} + +/* Save the current state to a newly allocated mempool_state structure. + * Do not call this function with an opened growing buffer. */ +struct mempool_state *mp_push(struct mempool *pool); + +/* Restore the state saved by mp_save() or mp_push() and free all + * data allocated after that point (including the state structure itself). + * You can't reallocate the last memory block from the saved state. */ +void mp_restore(struct mempool *pool, struct mempool_state *state); + +/* Restore the state saved by the last call to mp_push(). + * mp_pop() and mp_push() works as a stack so you can push more states safely. */ +void mp_pop(struct mempool *pool); + + +/*** mempool-str.c ***/ + +char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC; +void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC; +char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK; +static inline char * LIKE_MALLOC +mp_strcat(struct mempool *mp, const char *x, const char *y) +{ + return mp_multicat(mp, x, y, NULL); +} +char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC; + + +/*** mempool-fmt.c ***/ + +char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC; +char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC; +char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4); +char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args); + +#endif diff --git a/lib/mempool.t b/lib/mempool.t new file mode 100644 index 0000000..10c396e --- /dev/null +++ b/lib/mempool.t @@ -0,0 +1,11 @@ +# Tests for mempool modules + +Run: ../obj/lib/mempool-t + +Run: ../obj/lib/mempool-fmt-t +Out: + +Run: ../obj/lib/mempool-str-t +Out: <<12345>> + bugs.gnats.insects + bugsgnatsinsects diff --git a/lib/mmap.c b/lib/mmap.c new file mode 100644 index 0000000..928da4b --- /dev/null +++ b/lib/mmap.c @@ -0,0 +1,47 @@ +/* + * UCW Library -- Mapping of Files + * + * (c) 1999--2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#include +#include +#include +#include +#include + +void * +mmap_file(const char *name, unsigned *len, int writeable) +{ + int fd = open(name, writeable ? O_RDWR : O_RDONLY); + struct stat st; + void *x; + + if (fd < 0) + die("open(%s): %m", name); + if (fstat(fd, &st) < 0) + die("fstat(%s): %m", name); + if (len) + *len = st.st_size; + if (st.st_size) + { + x = mmap(NULL, st.st_size, writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, 0); + if (x == MAP_FAILED) + die("mmap(%s): %m", name); + } + else /* For empty file, we can return any non-zero address */ + x = ""; + close(fd); + return x; +} + +void +munmap_file(void *start, unsigned len) +{ + munmap(start, len); +} diff --git a/lib/pagecache.c b/lib/pagecache.c new file mode 100644 index 0000000..13ad366 --- /dev/null +++ b/lib/pagecache.c @@ -0,0 +1,429 @@ +/* + * UCW Library -- File Page Cache + * + * (c) 1999--2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/pagecache.h" +#include "lib/lfs.h" + +#include +#include +#include +#include +#include +#include + +struct page_cache { + list free_pages; /* LRU queue of free non-dirty pages */ + list locked_pages; /* List of locked pages (starts with dirty ones) */ + list dirty_pages; /* List of free dirty pages */ + uns page_size; /* Bytes per page (must be a power of two) */ + uns free_count; /* Number of free / dirty pages */ + uns total_count; /* Total number of pages */ + uns max_pages; /* Maximum number of free pages */ + uns hash_size; /* Hash table size */ + uns stat_hit; /* Number of cache hits */ + uns stat_miss; /* Number of cache misses */ + uns stat_write; /* Number of writes */ + list *hash_table; /* List heads corresponding to hash buckets */ +#ifndef HAVE_PREAD + sh_off_t pos; /* Current position in the file */ + int pos_fd; /* FD the position corresponds to */ +#endif +}; + +#define PAGE_NUMBER(pos) ((pos) & ~(sh_off_t)(c->page_size - 1)) +#define PAGE_OFFSET(pos) ((pos) & (c->page_size - 1)) + +struct page_cache * +pgc_open(uns page_size, uns max_pages) +{ + struct page_cache *c = xmalloc_zero(sizeof(struct page_cache)); + uns i; + + init_list(&c->free_pages); + init_list(&c->locked_pages); + init_list(&c->dirty_pages); + c->page_size = page_size; + c->max_pages = max_pages; + c->hash_size = nextprime(c->max_pages); + c->hash_table = xmalloc(sizeof(list) * c->hash_size); + for(i=0; ihash_size; i++) + init_list(&c->hash_table[i]); +#ifndef HAVE_PREAD + c->pos_fd = -1; +#endif + return c; +} + +void +pgc_close(struct page_cache *c) +{ + pgc_cleanup(c); + ASSERT(EMPTY_LIST(c->locked_pages)); + ASSERT(EMPTY_LIST(c->dirty_pages)); + ASSERT(EMPTY_LIST(c->free_pages)); + xfree(c->hash_table); + xfree(c); +} + +static void +pgc_debug_page(struct page *p) +{ + printf("\tp=%08x d=%d f=%x c=%d\n", (uns) p->pos, p->fd, p->flags, p->lock_count); +} + +void +pgc_debug(struct page_cache *c, int mode) +{ + struct page *p; + + printf(">> Page cache dump: pgsize=%d, pages=%d, freepages=%d of %d, hash=%d\n", c->page_size, c->total_count, c->free_count, c->max_pages, c->hash_size); + printf(">> stats: %d hits, %d misses, %d writes\n", c->stat_hit, c->stat_miss, c->stat_write); + if (mode) + { + puts("LRU list:"); + WALK_LIST(p, c->free_pages) + pgc_debug_page(p); + puts("Locked list:"); + WALK_LIST(p, c->locked_pages) + pgc_debug_page(p); + puts("Dirty list:"); + WALK_LIST(p, c->dirty_pages) + pgc_debug_page(p); + } +} + +static void +flush_page(struct page_cache *c, struct page *p) +{ + int s; + + ASSERT(p->flags & PG_FLAG_DIRTY); +#ifdef HAVE_PREAD + s = sh_pwrite(p->fd, p->data, c->page_size, p->pos); +#else + if (c->pos != p->pos || c->pos_fd != (int) p->fd) + sh_seek(p->fd, p->pos, SEEK_SET); + s = write(p->fd, p->data, c->page_size); + c->pos = p->pos + s; + c->pos_fd = p->fd; +#endif + if (s < 0) + die("pgc_write(%d): %m", p->fd); + if (s != (int) c->page_size) + die("pgc_write(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size); + p->flags &= ~PG_FLAG_DIRTY; + c->stat_write++; +} + +static int +flush_cmp(const void *X, const void *Y) +{ + struct page *x = *((struct page **)X); + struct page *y = *((struct page **)Y); + + if (x->fd < y->fd) + return -1; + if (x->fd > y->fd) + return 1; + if (x->pos < y->pos) + return -1; + if (x->pos > y->pos) + return 1; + return 0; +} + +static void +flush_pages(struct page_cache *c, uns force) +{ + uns cnt = 0; + uns max = force ? ~0U : c->free_count / 2; + uns i; + struct page *p, *q, **req, **rr; + + WALK_LIST(p, c->dirty_pages) + { + cnt++; + if (cnt >= max) + break; + } + req = rr = alloca(cnt * sizeof(struct page *)); + i = cnt; + p = HEAD(c->dirty_pages); + while ((q = (struct page *) p->n.next) && i--) + { + rem_node(&p->n); + add_tail(&c->free_pages, &p->n); + *rr++ = p; + p = q; + } + qsort(req, cnt, sizeof(struct page *), flush_cmp); + for(i=0; ihash_size; +} + +static struct page * +get_page(struct page_cache *c, sh_off_t pos, uns fd) +{ + node *n; + struct page *p; + uns hash = hash_page(c, pos, fd); + + /* + * Return locked buffer for given page. + */ + + WALK_LIST(n, c->hash_table[hash]) + { + p = SKIP_BACK(struct page, hn, n); + if (p->pos == pos && p->fd == fd) + { + /* Found in the cache */ + rem_node(&p->n); + if (!p->lock_count) + c->free_count--; + return p; + } + } + if (c->total_count < c->max_pages || !c->free_count) + { + /* Enough free space, expand the cache */ + p = xmalloc(sizeof(struct page) + c->page_size); + c->total_count++; + } + else + { + /* Discard the oldest unlocked page */ + p = HEAD(c->free_pages); + if (!p->n.next) + { + /* There are only dirty pages here */ + flush_pages(c, 0); + p = HEAD(c->free_pages); + ASSERT(p->n.next); + } + ASSERT(!p->lock_count); + rem_node(&p->n); + rem_node(&p->hn); + c->free_count--; + } + p->pos = pos; + p->fd = fd; + p->flags = 0; + p->lock_count = 0; + add_tail(&c->hash_table[hash], &p->hn); + return p; +} + +void +pgc_flush(struct page_cache *c) +{ + struct page *p; + + flush_pages(c, 1); + WALK_LIST(p, c->locked_pages) + if (p->flags & PG_FLAG_DIRTY) + flush_page(c, p); + else + break; +} + +void +pgc_cleanup(struct page_cache *c) +{ + struct page *p; + node *n; + + pgc_flush(c); + WALK_LIST_DELSAFE(p, n, c->free_pages) + { + ASSERT(!(p->flags & PG_FLAG_DIRTY) && !p->lock_count); + rem_node(&p->n); + rem_node(&p->hn); + c->free_count--; + c->total_count--; + xfree(p); + } + ASSERT(!c->free_count); +} + +static inline struct page * +get_and_lock_page(struct page_cache *c, sh_off_t pos, uns fd) +{ + struct page *p = get_page(c, pos, fd); + + add_tail(&c->locked_pages, &p->n); + p->lock_count++; + return p; +} + +struct page * +pgc_read(struct page_cache *c, int fd, sh_off_t pos) +{ + struct page *p; + int s; + + ASSERT(!PAGE_OFFSET(pos)); + p = get_and_lock_page(c, pos, fd); + if (p->flags & PG_FLAG_VALID) + c->stat_hit++; + else + { + c->stat_miss++; +#ifdef HAVE_PREAD + s = sh_pread(fd, p->data, c->page_size, pos); +#else + if (c->pos != pos || c->pos_fd != (int)fd) + sh_seek(fd, pos, SEEK_SET); + s = read(fd, p->data, c->page_size); + c->pos = pos + s; + c->pos_fd = fd; +#endif + if (s < 0) + die("pgc_read(%d): %m", fd); + if (s != (int) c->page_size) + die("pgc_read(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size); + p->flags |= PG_FLAG_VALID; + } + return p; +} + +struct page * +pgc_get(struct page_cache *c, int fd, sh_off_t pos) +{ + struct page *p; + + ASSERT(!PAGE_OFFSET(pos)); + p = get_and_lock_page(c, pos, fd); + p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY; + return p; +} + +struct page * +pgc_get_zero(struct page_cache *c, int fd, sh_off_t pos) +{ + struct page *p; + + ASSERT(!PAGE_OFFSET(pos)); + p = get_and_lock_page(c, pos, fd); + bzero(p->data, c->page_size); + p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY; + return p; +} + +void +pgc_put(struct page_cache *c, struct page *p) +{ + ASSERT(p->lock_count); + if (--p->lock_count) + return; + rem_node(&p->n); + if (p->flags & PG_FLAG_DIRTY) + { + add_tail(&c->dirty_pages, &p->n); + c->free_count++; + } + else if (c->free_count < c->max_pages) + { + add_tail(&c->free_pages, &p->n); + c->free_count++; + } + else + { + rem_node(&p->hn); + xfree(p); + c->total_count--; + } +} + +void +pgc_mark_dirty(struct page_cache *c, struct page *p) +{ + ASSERT(p->lock_count); + if (!(p->flags & PG_FLAG_DIRTY)) + { + p->flags |= PG_FLAG_DIRTY; + rem_node(&p->n); + add_head(&c->locked_pages, &p->n); + } +} + +byte * +pgc_read_data(struct page_cache *c, int fd, sh_off_t pos, uns *len) +{ + struct page *p; + sh_off_t page = PAGE_NUMBER(pos); + uns offset = PAGE_OFFSET(pos); + + p = pgc_read(c, fd, page); + pgc_put(c, p); + *len = c->page_size - offset; + return p->data + offset; +} + +#ifdef TEST + +int main(int argc, char **argv) +{ + struct page_cache *c = pgc_open(1024, 2); + struct page *p, *q, *r; + int fd = open("test", O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) + die("open: %m"); + pgc_debug(c, 1); + p = pgc_get(c, fd, 0); + pgc_debug(c, 1); + strcpy(p->data, "one"); + pgc_put(c, p); + pgc_debug(c, 1); + p = pgc_get(c, fd, 1024); + pgc_debug(c, 1); + strcpy(p->data, "two"); + pgc_put(c, p); + pgc_debug(c, 1); + p = pgc_get(c, fd, 2048); + pgc_debug(c, 1); + strcpy(p->data, "three"); + pgc_put(c, p); + pgc_debug(c, 1); + pgc_flush(c); + pgc_debug(c, 1); + p = pgc_read(c, fd, 0); + pgc_debug(c, 1); + strcpy(p->data, "odin"); + pgc_mark_dirty(c, p); + pgc_debug(c, 1); + pgc_flush(c); + pgc_debug(c, 1); + q = pgc_read(c, fd, 1024); + pgc_debug(c, 1); + r = pgc_read(c, fd, 2048); + pgc_debug(c, 1); + pgc_put(c, p); + pgc_put(c, q); + pgc_put(c, r); + pgc_debug(c, 1); + p = pgc_get(c, fd, 3072); + pgc_debug(c, 1); + strcpy(p->data, "four"); + pgc_put(c, p); + pgc_debug(c, 1); + pgc_cleanup(c); + pgc_debug(c, 1); + pgc_close(c); + return 0; +} + +#endif diff --git a/lib/pagecache.h b/lib/pagecache.h new file mode 100644 index 0000000..ef4bf5a --- /dev/null +++ b/lib/pagecache.h @@ -0,0 +1,42 @@ +/* + * UCW Library -- File Page Cache + * + * (c) 1999--2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_PAGECACHE_H +#define _UCW_PAGECACHE_H + +#include "lib/lists.h" + +struct page_cache; + +struct page { + node n; /* Node in page list */ + node hn; /* Node in hash table */ + sh_off_t pos; + uns fd; + uns flags; + uns lock_count; + byte data[0]; +}; + +#define PG_FLAG_DIRTY 1 +#define PG_FLAG_VALID 2 + +struct page_cache *pgc_open(uns page_size, uns max_pages); +void pgc_close(struct page_cache *); +void pgc_debug(struct page_cache *, int mode); +void pgc_flush(struct page_cache *); /* Write all unwritten pages */ +void pgc_cleanup(struct page_cache *); /* Deallocate all unused buffers */ +struct page *pgc_read(struct page_cache *, int fd, sh_off_t); /* Read page and lock it */ +struct page *pgc_get(struct page_cache *, int fd, sh_off_t); /* Get page for writing */ +struct page *pgc_get_zero(struct page_cache *, int fd, sh_off_t); /* ... and clear it */ +void pgc_put(struct page_cache *, struct page *); /* Release page */ +void pgc_mark_dirty(struct page_cache *, struct page *); /* Mark locked page as dirty */ +byte *pgc_read_data(struct page_cache *, int fd, sh_off_t, uns *); /* Partial reading */ + +#endif diff --git a/lib/partmap.c b/lib/partmap.c new file mode 100644 index 0000000..2b70f1d --- /dev/null +++ b/lib/partmap.c @@ -0,0 +1,95 @@ +/* + * UCW Library -- Mapping of File Parts + * + * (c) 2003--2006 Martin Mares + * (c) 2003--2005 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/lfs.h" +#include "lib/partmap.h" + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PARTMAP_IS_MMAP +#define PARTMAP_WINDOW ~(size_t)0 +#else +#ifdef TEST +#define PARTMAP_WINDOW 4096 +#else +#define PARTMAP_WINDOW 16777216 +#endif +#endif + +struct partmap * +partmap_open(char *name, int writeable) +{ + struct partmap *p = xmalloc_zero(sizeof(struct partmap)); + + p->fd = sh_open(name, writeable ? O_RDWR : O_RDONLY); + if (p->fd < 0) + die("open(%s): %m", name); + if ((p->file_size = sh_seek(p->fd, 0, SEEK_END)) < 0) + die("lseek(%s): %m", name); + p->writeable = writeable; +#ifdef CONFIG_PARTMAP_IS_MMAP + partmap_load(p, 0, p->file_size); +#endif + return p; +} + +sh_off_t +partmap_size(struct partmap *p) +{ + return p->file_size; +} + +void +partmap_close(struct partmap *p) +{ + if (p->start_map) + munmap(p->start_map, p->end_off - p->start_off); + close(p->fd); + xfree(p); +} + +void +partmap_load(struct partmap *p, sh_off_t start, uns size) +{ + if (p->start_map) + munmap(p->start_map, p->end_off - p->start_off); + sh_off_t end = start + size; + sh_off_t win_start = start/CPU_PAGE_SIZE * CPU_PAGE_SIZE; + size_t win_len = PARTMAP_WINDOW; + if ((sh_off_t) (win_start+win_len) > p->file_size) + win_len = ALIGN_TO(p->file_size - win_start, CPU_PAGE_SIZE); + if ((sh_off_t) (win_start+win_len) < end) + die("partmap_map: Window is too small for mapping %d bytes", size); + p->start_map = sh_mmap(NULL, win_len, p->writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, p->fd, win_start); + if (p->start_map == MAP_FAILED) + die("mmap failed at position %lld: %m", (long long)win_start); + p->start_off = win_start; + p->end_off = win_start+win_len; + madvise(p->start_map, win_len, MADV_SEQUENTIAL); +} + +#ifdef TEST +int main(int argc, char **argv) +{ + struct partmap *p = partmap_open(argv[1], 0); + uns l = partmap_size(p); + uns i; + for (i=0; i + * (c) 2003--2005 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_PARTMAP_H +#define _UCW_PARTMAP_H + +struct partmap { + int fd; + sh_off_t file_size; + sh_off_t start_off, end_off; + byte *start_map; + int writeable; +}; + +struct partmap *partmap_open(char *name, int writeable); +void partmap_close(struct partmap *p); +sh_off_t partmap_size(struct partmap *p); +void partmap_load(struct partmap *p, sh_off_t start, uns size); + +static inline void * +partmap_map(struct partmap *p, sh_off_t start, uns size UNUSED) +{ +#ifndef CONFIG_PARTMAP_IS_MMAP + if (unlikely(!p->start_map || start < p->start_off || (sh_off_t) (start+size) > p->end_off)) + partmap_load(p, start, size); +#endif + return p->start_map + (start - p->start_off); +} + +static inline void * +partmap_map_forward(struct partmap *p, sh_off_t start, uns size UNUSED) +{ +#ifndef CONFIG_PARTMAP_IS_MMAP + if (unlikely((sh_off_t) (start+size) > p->end_off)) + partmap_load(p, start, size); +#endif + return p->start_map + (start - p->start_off); +} + +#endif diff --git a/lib/patimatch.c b/lib/patimatch.c new file mode 100644 index 0000000..a0e29af --- /dev/null +++ b/lib/patimatch.c @@ -0,0 +1,16 @@ +/* + * UCW Library -- Shell-Like Case-Insensitive Pattern Matching (currently only '?' and '*') + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/chartype.h" + +#define Convert(x) Cupcase(x) +#define MATCH_FUNC_NAME match_pattern_nocase + +#include "lib/patmatch.h" diff --git a/lib/patmatch.c b/lib/patmatch.c new file mode 100644 index 0000000..bfd8aa5 --- /dev/null +++ b/lib/patmatch.c @@ -0,0 +1,15 @@ +/* + * UCW Library -- Shell-Like Pattern Matching (currently only '?' and '*') + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" + +#define Convert(x) (x) +#define MATCH_FUNC_NAME match_pattern + +#include "lib/patmatch.h" diff --git a/lib/patmatch.h b/lib/patmatch.h new file mode 100644 index 0000000..a47669f --- /dev/null +++ b/lib/patmatch.h @@ -0,0 +1,46 @@ +/* + * UCW Library -- Generic Shell-Like Pattern Matching (currently only '?' and '*') + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +int +MATCH_FUNC_NAME(const char *p, const char *s) +{ + while (*p) + { + if (*p == '?' && *s) + p++, s++; + else if (*p == '*') + { + int z = p[1]; + + if (!z) + return 1; + if (z == '\\' && p[2]) + z = p[2]; + z = Convert(z); + for(;;) + { + while (*s && Convert(*s) != z) + s++; + if (!*s) + return 0; + if (MATCH_FUNC_NAME(p+1, s)) + return 1; + s++; + } + } + else + { + if (*p == '\\' && p[1]) + p++; + if (Convert(*p++) != Convert(*s++)) + return 0; + } + } + return !*s; +} diff --git a/lib/perl/CGI.pm b/lib/perl/CGI.pm new file mode 100644 index 0000000..7d7cc45 --- /dev/null +++ b/lib/perl/CGI.pm @@ -0,0 +1,444 @@ +# Poor Man's CGI Module for Perl +# +# (c) 2002--2007 Martin Mares +# Slightly modified by Tomas Valla +# +# This software may be freely distributed and used according to the terms +# of the GNU Lesser General Public License. + +# FIXME: +# - respond with proper HTTP error codes +# - if we get invalid parameters, generate HTTP error or redirect + +package UCW::CGI; + +# First of all, set up error handling, so that even errors during parsing +# will be reported properly. + +# Variables to be set by the calling module: +# $UCW::CGI::error_mail mail address of the script admin (optional) +# (this one has to be set in the BEGIN block!) +# $UCW::CGI::error_hook function to be called for reporting errors + +my $error_reported; +my $exit_code; +my $debug = 0; + +sub report_bug($) +{ + if (!defined $error_reported) { + $error_reported = 1; + print STDERR $_[0]; + if (defined($UCW::CGI::error_hook)) { + &$UCW::CGI::error_hook($_[0]); + } else { + print "Content-type: text/plain\n\n"; + print "Internal bug:\n"; + print $_[0], "\n"; + print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail; + } + } + die; +} + +BEGIN { + $SIG{__DIE__} = sub { report_bug($_[0]); }; + $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); }; + $exit_code = 0; +} + +END { + $? = $exit_code; +} + +use strict; +use warnings; + +require Exporter; +our $VERSION = 1.0; +our @ISA = qw(Exporter); +our @EXPORT = qw(&html_escape &url_escape &url_param_escape &self_ref &self_form &http_get); +our @EXPORT_OK = qw(); + +### Escaping ### + +sub url_escape($) { + my $x = shift @_; + $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge; + return $x; +} + +sub url_param_escape($) { + my $x = shift @_; + $x = url_escape($x); + $x =~ s/%20/+/g; + return $x; +} + +sub html_escape($) { + my $x = shift @_; + $x =~ s/&/&/g; + $x =~ s//>/g; + $x =~ s/"/"/g; + return $x; +} + +### Analysing RFC 822 Style Headers ### + +sub rfc822_prepare($) { + my $x = shift @_; + # Convert all %'s and backslash escapes to %xx escapes + $x =~ s/%/%25/g; + $x =~ s/\\(.)/"%".unpack("H2",$1)/ge; + # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically) + while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { } + # Remove quotes and escape dangerous characters inside (again closing at the end automatically) + $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge; + # All control characters are properly escaped, tokens are clearly visible. + # Finally remove all unnecessary spaces. + $x =~ s/\s+/ /g; + $x =~ s/(^ | $)//g; + $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g; + return $x; +} + +sub rfc822_deescape($) { + my $x = shift @_; + $x =~ s/%(..)/pack("H2",$1)/ge; + return $x; +} + +### Reading of HTTP headers ### + +sub http_get($) { + my $h = shift @_; + $h =~ tr/a-z-/A-Z_/; + return $ENV{"HTTP_$h"} || $ENV{"$h"}; +} + +### Parsing of Arguments ### + +my $arg_table; + +sub parse_arg_string($) { + my ($s) = @_; + $s =~ s/\s+//; + foreach $_ (split /[&:]/,$s) { + (/^([^=]+)=(.*)$/) or next; + my $arg = $arg_table->{$1} or next; + $_ = $2; + s/\+/ /g; + s/%(..)/pack("H2",$1)/eg; + s/\r\n/\n/g; + s/\r/\n/g; + $arg->{'multiline'} || s/(\n|\t)/ /g; + s/^\s+//; + s/\s+$//; + if (my $rx = $arg->{'check'}) { + if (!/^$rx$/) { $_ = $arg->{'default'}; } + } + + my $r = ref($arg->{'var'}); + if ($r eq 'SCALAR') { + ${$arg->{'var'}} = $_; + } elsif ($r eq 'ARRAY') { + push @{$arg->{'var'}}, $_; + } + } +} + +sub parse_multipart_form_data(); + +sub parse_args($) { + $arg_table = shift @_; + if (!defined $ENV{"GATEWAY_INTERFACE"}) { + print STDERR "Must be called as a CGI script.\n"; + $exit_code = 1; + exit; + } + foreach my $a (values %$arg_table) { + my $r = ref($a->{'var'}); + defined($a->{'default'}) or $a->{'default'}=""; + if ($r eq 'SCALAR') { + ${$a->{'var'}} = $a->{'default'}; + } elsif ($r eq 'ARRAY') { + @{$a->{'var'}} = (); + } + } + my $method = $ENV{"REQUEST_METHOD"}; + my $qs = $ENV{"QUERY_STRING"}; + parse_arg_string($qs) if defined($qs); + if ($method eq "GET") { + } elsif ($method eq "POST") { + if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) { + while () { + chomp; + parse_arg_string($_); + } + } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) { + parse_multipart_form_data(); + } else { + die "Unknown content type for POST data"; + } + } else { + die "Unknown request method"; + } +} + +### Parsing Multipart Form Data ### + +my $boundary; +my $boundary_len; +my $mp_buffer; +my $mp_buffer_i; +my $mp_buffer_boundary; +my $mp_eof; + +sub refill_mp_data($) { + my ($more) = @_; + if ($mp_buffer_boundary >= $mp_buffer_i) { + return $mp_buffer_boundary - $mp_buffer_i; + } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) { + return $more; + } else { + if ($mp_buffer_i) { + $mp_buffer = substr($mp_buffer, $mp_buffer_i); + $mp_buffer_i = 0; + } + while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) { + last if $mp_eof; + my $data; + my $n = read(STDIN, $data, 2048); + if ($n > 0) { + $mp_buffer .= $data; + } else { + $mp_eof = 1; + } + } + $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i); + if ($mp_buffer_boundary >= 0) { + return $mp_buffer_boundary; + } elsif ($mp_eof) { + return length($mp_buffer); + } else { + return length($mp_buffer) - $boundary_len; + } + } +} + +sub get_mp_line($) { + my ($allow_empty) = @_; + my $n = refill_mp_data(1024); + my $i = index($mp_buffer, "\r\n", $mp_buffer_i); + if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) { + my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i); + $mp_buffer_i = $i + 2; + return $s; + } elsif ($allow_empty) { + if ($n) { # An incomplete line + my $s = substr($mp_buffer, $mp_buffer_i, $n); + $mp_buffer_i += $n; + return $s; + } else { # No more lines + return undef; + } + } else { + die "Premature end of multipart POST data"; + } +} + +sub skip_mp_boundary() { + if ($mp_buffer_boundary != $mp_buffer_i) { + die "Premature end of multipart POST data"; + } + $mp_buffer_boundary = -1; + $mp_buffer_i += 2; + my $b = get_mp_line(0); + print STDERR "SEP $b\n" if $debug; + $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i); + if ("\r\n$b" =~ /^$boundary--/) { + return 0; + } else { + return 1; + } +} + +sub parse_mp_header() { + my $h = {}; + my $last; + while ((my $l = get_mp_line(0)) ne "") { + print STDERR "HH $l\n" if $debug; + if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) { + $name =~ tr/A-Z/a-z/; + $h->{$name} = $value; + $last = $name; + } elsif ($l =~ /^\s+/ && $last) { + $h->{$last} .= $l; + } else { + $last = undef; + } + } + foreach my $n (keys %$h) { + $h->{$n} = rfc822_prepare($h->{$n}); + print STDERR "H $n: $h->{$n}\n" if $debug; + } + return (keys %$h) ? $h : undef; +} + +sub parse_multipart_form_data() { + # First of all, find the boundary string + my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"}); + if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) { + die "Multipart content with no boundary string received"; + } + $boundary = rfc822_deescape($boundary); + print STDERR "BOUNDARY IS $boundary\n" if $debug; + + # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string + # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution. + my $agent = http_get("User-agent") || ""; + $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/; + $boundary = "\r\n$boundary"; + $boundary_len = length($boundary) + 2; + + # Check upload size in advance + if (my $size = http_get("Content-Length")) { + my $max_allowed = 0; + foreach my $a (values %$arg_table) { + $max_allowed += $a->{"maxsize"} || 65536; + } + if ($size > $max_allowed) { + die "Maximum form data length exceeded"; + } + } + + # Initialize our buffering mechanism and part splitter + $mp_buffer = "\r\n"; + $mp_buffer_i = 0; + $mp_buffer_boundary = -1; + $mp_eof = 0; + + # Skip garbage before the 1st part + while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; } + skip_mp_boundary() || return; + + # Process individual parts + do { PART: { + print STDERR "NEXT PART\n" if $debug; + my $h = parse_mp_header(); + my ($field, $cdisp, $a); + if ($h && + ($cdisp = $h->{"content-disposition"}) && + $cdisp =~ /^form-data/ && + (($field) = ($cdisp =~ /;name=([^;]+)/)) && + ($a = $arg_table->{"$field"})) { + print STDERR "FIELD $field\n" if $debug; + if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; } + if (defined $a->{"var"}) { + while (defined (my $l = get_mp_line(1))) { + print STDERR "VALUE $l\n" if $debug; + parse_arg_string("$field=$l"); + } + next PART; + } elsif (defined $a->{"file"}) { + require File::Temp; + require IO::Handle; + my $max_size = $a->{"maxsize"} || 1048576; + my @tmpargs = (undef, UNLINK => 1); + push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"}; + my ($fh, $fn) = File::Temp::tempfile(@tmpargs); + print STDERR "FILE UPLOAD to $fn\n" if $debug; + ${$a->{"file"}} = $fn; + ${$a->{"fh"}} = $fh if defined $a->{"fh"}; + my $total_size = 0; + while (my $i = refill_mp_data(4096)) { + print $fh substr($mp_buffer, $mp_buffer_i, $i); + $mp_buffer_i += $i; + $total_size += $i; + if ($total_size > $max_size) { die "Uploaded file too long"; } + } + $fh->flush(); # Don't close the handle, the file would disappear otherwise + next PART; + } + } + print STDERR "SKIPPING\n" if $debug; + while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; } + } } while (skip_mp_boundary()); +} + +### Generating Self-ref URL's ### + +sub make_out_args($) { + my ($overrides) = @_; + my $out = {}; + foreach my $name (keys %$arg_table) { + my $arg = $arg_table->{$name}; + defined($arg->{'var'}) || next; + defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next; + my $value; + if (!defined($value = $overrides->{$name})) { + if (exists $overrides->{$name}) { + $value = $arg->{'default'}; + } else { + $value = ${$arg->{'var'}}; + } + } + if ($value ne $arg->{'default'}) { + $out->{$name} = $value; + } + } + return $out; +} + +sub self_ref(@) { + my %h = @_; + my $out = make_out_args(\%h); + return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out); +} + +sub self_form(@) { + my %h = @_; + my $out = make_out_args(\%h); + return join('', map { "\n" } sort keys %$out); +} + +### Cookies + +sub cookie_esc($) { + my $x = shift @_; + if ($x !~ /^[a-zA-Z0-9%]+$/) { + $x =~ s/([\\\"])/\\$1/g; + $x = "\"$x\""; + } + return $x; +} + +sub set_cookie($$@) { + my $key = shift @_; + my $value = shift @_; + my %other = @_; + $other{'version'} = 1 unless defined $other{'version'}; + print "Set-Cookie: $key=", cookie_esc($value); + foreach my $k (keys %other) { + print ";$k=", cookie_esc($other{$k}); + } + print "\n"; +} + +sub parse_cookies() { + my $h = http_get("Cookie") or return (); + my @cook = (); + while (my ($padding,$name,$val,$xx,$rest) = ($h =~ /\s*([,;]\s*)*([^ =]+)=([^ =,;\"]*|\"([^\"\\]|\\.)*\")(\s.*|;.*|$)/)) { + if ($val =~ /^\"/) { + $val =~ s/^\"//; + $val =~ s/\"$//; + $val =~ s/\\(.)/$1/g; + } + push @cook, $name, $val; + $h = $rest; + } + return @cook; +} + +1; # OK diff --git a/lib/perl/Config.pm b/lib/perl/Config.pm new file mode 100644 index 0000000..552690c --- /dev/null +++ b/lib/perl/Config.pm @@ -0,0 +1,54 @@ +# Perl module for parsing Sherlock configuration files (using the config utility) +# +# (c) 2002--2005 Martin Mares +# +# This software may be freely distributed and used according to the terms +# of the GNU Lesser General Public License. + +package UCW::Config; + +use strict; +use warnings; +use Getopt::Long; + +our %Sections = (); + +our $DefaultConfigFile = ""; +our $Usage = "-C, --config filename Override the default configuration file +-S, --set sec.item=val Manual setting of a configuration item"; + + +sub Parse(@) { + my @options = @_; + my $defargs = ""; + my $override_config = 0; + push @options, "config|C=s" => sub { my ($o,$a)=@_; $defargs .= " -C'$a'"; $override_config=1; }; + push @options, "set|S=s" => sub { my ($o,$a)=@_; $defargs .= " -S'$a'"; }; + Getopt::Long::Configure("bundling"); + Getopt::Long::GetOptions(@options) or return 0; + if (!$override_config && $DefaultConfigFile) { + $defargs = "-C'$DefaultConfigFile' $defargs"; + } + foreach my $section (keys %Sections) { + my $opts = $Sections{$section}; + my $optlist = join(";", keys %$opts); + my %filtered_opts = map { my $t=$_; $t=~s/[#\$]+$//; $t => $$opts{$_} } keys %$opts; + my @l = `bin/config $defargs "$section\{$optlist\}"`; + $? && exit 1; + foreach my $o (@l) { + $o =~ /^CF_.*_([^=]+)='(.*)'\n$/ or die "Cannot parse bin/config output: $_"; + my $var = $filtered_opts{$1}; + my $val = $2; + if (ref $var eq "SCALAR") { + $$var = $val; + } elsif (ref $var eq "ARRAY") { + push @$var, $val; + } elsif (ref $var) { + die ("UCW::Config::Parse: don't know how to set $o"); + } + } + } + 1; +} + +1; # OK diff --git a/lib/perl/Configure.pm b/lib/perl/Configure.pm new file mode 100644 index 0000000..bd1a7cc --- /dev/null +++ b/lib/perl/Configure.pm @@ -0,0 +1,190 @@ +# Perl module for UCW Configure Scripts +# +# (c) 2005 Martin Mares +# +# This software may be freely distributed and used according to the terms +# of the GNU Lesser General Public License. + +package UCW::Configure; + +use strict; +use warnings; + +BEGIN { + # The somewhat hairy Perl export mechanism + use Exporter(); + our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); + $VERSION = 1.0; + @ISA = qw(Exporter); + @EXPORT = qw(&Init &Log &Notice &Warn &Fail &IsSet &Set &UnSet &Append &Override &Get &Test &Include &Finish &FindFile &TryFindFile); + @EXPORT_OK = qw(); + %EXPORT_TAGS = (); +} + +our %vars = (); +our %overriden = (); + +sub Log($) { + print @_; +} + +sub Notice($) { + print @_ if $vars{"VERBOSE"}; +} + +sub Warn($) { + print "WARNING: ", @_; +} + +sub Fail($) { + Log("ERROR: " . (shift @_) . "\n"); + exit 1; +} + +sub IsSet($) { + my ($x) = @_; + return exists $vars{$x}; +} + +sub Get($) { + my ($x) = @_; + return $vars{$x}; +} + +sub Set($;$) { + my ($x,$y) = @_; + $y=1 unless defined $y; + $vars{$x}=$y unless $overriden{$x}; +} + +sub UnSet($) { + my ($x) = @_; + delete $vars{$x} unless $overriden{$x}; +} + +sub Append($$) { + my ($x,$y) = @_; + Set($x, (IsSet($x) ? (Get($x) . " $y") : $y)); +} + +sub Override($;$) { + my ($x,$y) = @_; + $y=1 unless defined $y; + $vars{$x}=$y; + $overriden{$x} = 1; +} + +sub Test($$$) { + my ($var,$msg,$sub) = @_; + Log "$msg ... "; + if (!IsSet($var)) { + Set $var, &$sub(); + } + Log Get($var) . "\n"; +} + +sub TryFindFile($) { + my ($f) = @_; + if (-f $f) { + return $f; + } elsif ($f !~ /^\// && -f (Get("SRCDIR")."/$f")) { + return Get("SRCDIR")."/$f"; + } else { + return undef; + } +} + +sub FindFile($) { + my ($f) = @_; + my $F; + defined ($F = TryFindFile($f)) or Fail "Cannot find file $f"; + return $F; +} + +sub Init($$) { + my ($srcdir,$defconfig) = @_; + if ((!defined $defconfig && !@ARGV) || @ARGV && $ARGV[0] eq "--help") { + print STDERR "Usage: [/]configure " . (defined $defconfig ? "[" : "") . "" . (defined $defconfig ? "]" : "") . + " [

.