From 1034a0c76f50ddfe278313a813097bf32c1d400c Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 16 Mar 2009 23:42:52 +0100 Subject: [PATCH] Updated libucw to current version (Sherlock commit 17f29eb1ab186e9f053299c25f47ce368ee7e1de) --- Makefile | 9 +- build/Makebottom | 97 +- build/Makefile | 19 + build/Maketop | 32 +- build/genconf | 7 +- cf/{library => libucw} | 0 configure | 14 +- default.cfg | 2 + lib/Makefile | 138 - lib/base224.h | 25 - lib/bbuf.h | 22 - lib/chartype.h | 49 - lib/conf.h | 163 - lib/config.h | 49 - lib/ctmatch.c | 44 - lib/db-emul.c | 155 - lib/db-test.c | 475 -- lib/db-tool.c | 264 -- lib/db.c | 598 --- lib/db.h | 50 - lib/db_internal.h | 58 - lib/eltpool.h | 65 - lib/fastbuf.h | 410 -- lib/fastbuf.t | 15 - lib/ff-utf8.h | 15 - lib/gbuf.h | 71 - lib/getopt.h | 92 - lib/getopt/getopt-sh.h | 14 - lib/hash-test.t | 13 - lib/hashfunc.h | 43 - lib/heap.h | 88 - lib/lfs.h | 63 - lib/lib.h | 279 -- lib/lists.c | 77 - lib/lists.h | 64 - lib/lizard.h | 49 - lib/log-file.c | 108 - lib/log.c | 149 - lib/mainloop.h | 108 - lib/md5.c | 249 -- lib/md5.h | 24 - lib/md5hex.c | 35 - lib/mempool.h | 295 -- lib/pagecache.c | 429 -- lib/pagecache.h | 42 - lib/perl/Filelock/Makefile | 24 - lib/perl/Makefile | 10 - lib/perl/Ulimit/Makefile | 24 - lib/regex/Makefile | 7 - lib/regex/README | 9 - lib/regex/regcomp.c | 3544 --------------- lib/regex/regex-sh.h | 24 - lib/regex/regex.c | 10 - lib/regex/regex.h | 574 --- lib/regex/regex_internal.c | 1263 ------ lib/regex/regex_internal.h | 742 --- lib/regex/regexec.c | 3977 ----------------- lib/shell/Makefile | 12 - lib/simple-lists.h | 49 - lib/slists.h | 90 - lib/sorter/Makefile | 13 - sherlock/attrset.c | 2 +- sherlock/attrset.h | 2 +- sherlock/buck2obj.c | 30 +- sherlock/conf-parse.c | 8 +- sherlock/conf.h | 2 +- sherlock/obj-format.c | 2 +- sherlock/obj2buck.c | 8 +- sherlock/object.c | 4 +- sherlock/perl/Object.pm | 3 +- sherlock/sherlock.h | 2 +- ucw/Makefile | 169 + {lib => ucw}/THREADS | 0 {lib => ucw}/adler32.c | 6 +- {lib => ucw}/alloc.c | 6 +- {lib => ucw}/alloc_str.c | 2 +- {lib => ucw}/asio.c | 6 +- {lib => ucw}/asio.h | 4 +- {lib => ucw}/asio.t | 2 +- {lib => ucw}/asort-test.c | 11 +- ucw/asort-test.t | 3 + {lib => ucw}/base224.c | 47 +- ucw/base224.h | 36 + {lib => ucw}/base64.c | 4 +- {lib => ucw}/base64.h | 18 +- ucw/basecode.t | 16 + {lib => ucw}/bbuf.c | 4 +- ucw/bbuf.h | 46 + {lib => ucw}/bbuf.t | 2 +- {lib => ucw}/bigalloc.c | 4 +- {lib => ucw}/binheap-node.h | 17 + {lib => ucw}/binheap-test.c | 6 +- ucw/binheap-test.t | 5 + {lib => ucw}/binheap.h | 70 +- {lib => ucw}/binsearch.h | 22 + {lib => ucw}/bit-ffs.c | 4 +- {lib => ucw}/bit-fls.c | 4 +- {lib => ucw}/bitarray.h | 0 {lib => ucw}/bitops.h | 0 {lib => ucw}/bitops.t | 4 +- {lib => ucw}/bitsig.c | 12 +- {lib => ucw}/bitsig.h | 5 + {lib => ucw}/carefulio.c | 2 +- lib/str_ctype.c => ucw/char-cat.c | 7 +- lib/str_lower.c => ucw/char-lower.c | 7 +- lib/charmap.h => ucw/char-map.h | 0 lib/str_upper.c => ucw/char-upper.c | 7 +- ucw/chartype.h | 64 + {lib => ucw}/clists.h | 102 +- {lib => ucw}/conf-alloc.c | 6 +- {lib => ucw}/conf-dump.c | 12 +- {lib => ucw}/conf-input.c | 121 +- {lib => ucw}/conf-internal.h | 0 {lib => ucw}/conf-intr.c | 86 +- {lib => ucw}/conf-journal.c | 10 +- {lib => ucw}/conf-parse.c | 6 +- {lib => ucw}/conf-section.c | 14 +- {lib => ucw}/conf-test.c | 20 +- {lib => ucw}/conf-test.cf | 4 +- ucw/conf.h | 383 ++ ucw/config.h | 49 + {lib => ucw}/default.cfg | 22 +- ucw/doc/Makefile | 24 + ucw/doc/basecode.txt | 76 + ucw/doc/basics.txt | 33 + ucw/doc/binheap.txt | 19 + ucw/doc/binsearch.txt | 51 + ucw/doc/chartype.txt | 4 + ucw/doc/compress.txt | 17 + ucw/doc/conf.txt | 296 ++ ucw/doc/config.txt | 191 + ucw/doc/configure.txt | 74 + ucw/doc/def_index.txt | 9 + ucw/doc/docsys.txt | 168 + ucw/doc/eltpool.txt | 12 + ucw/doc/fastbuf.txt | 63 + ucw/doc/generic.txt | 105 + ucw/doc/growbuf.txt | 41 + ucw/doc/hash.txt | 88 + ucw/doc/hashtable.txt | 176 + ucw/doc/heap.txt | 40 + ucw/doc/index.txt | 92 + ucw/doc/install.txt | 39 + ucw/doc/lists.txt | 26 + ucw/doc/log.txt | 154 + ucw/doc/mainloop.txt | 29 + ucw/doc/mempool.txt | 123 + ucw/doc/prime.txt | 7 + ucw/doc/sort.txt | 320 ++ ucw/doc/unaligned.txt | 8 + ucw/doc/unicode.txt | 4 + {lib => ucw}/eltpool.c | 13 +- ucw/eltpool.h | 112 + lib/eltpool.test => ucw/eltpool.t | 2 +- {lib => ucw}/exitstatus.c | 2 +- {lib => ucw}/fastbuf.c | 14 +- ucw/fastbuf.h | 741 +++ {lib => ucw}/fb-atomic.c | 64 +- ucw/fb-atomic.t | 7 + {lib => ucw}/fb-buffer.c | 56 +- ucw/fb-buffer.t | 13 + {lib => ucw}/fb-direct.c | 40 +- {lib => ucw}/fb-file.c | 30 +- ucw/fb-file.t | 7 + {lib => ucw}/fb-grow.c | 8 +- ucw/fb-grow.t | 9 + {lib => ucw}/fb-limfd.c | 6 +- ucw/fb-limfd.t | 6 + {lib => ucw}/fb-mem.c | 11 +- ucw/fb-mem.t | 4 + {lib => ucw}/fb-mmap.c | 38 +- ucw/fb-mmap.t | 3 + {lib => ucw}/fb-param.c | 35 +- {lib => ucw}/fb-pool.c | 6 +- ucw/fb-pool.t | 3 + ucw/fb-socket.c | 176 + ucw/fb-socket.h | 35 + ucw/fb-socket.t | 5 + {lib => ucw}/fb-temp.c | 45 +- ucw/fb-temp.t | 3 + {lib => ucw}/ff-binary.c | 6 +- {lib => ucw}/ff-binary.h | 30 +- {lib => ucw}/ff-printf.c | 4 +- ucw/ff-stkstring.c | 80 + {lib => ucw}/ff-string.c | 71 +- {lib => ucw}/ff-unicode.c | 10 +- {lib => ucw}/ff-unicode.h | 48 +- {lib => ucw}/ff-unicode.t | 16 +- ucw/gbuf.h | 98 + {lib => ucw}/getopt.c | 4 +- ucw/getopt.h | 192 + {lib => ucw}/getopt.t | 4 +- {lib => ucw}/getopt/Makefile | 2 +- {lib => ucw}/getopt/README | 0 {lib => ucw}/getopt/getopt-sh.c | 0 ucw/getopt/getopt-sh.h | 14 + {lib => ucw}/getopt/getopt.c | 0 {lib => ucw}/getopt/getopt.h | 0 {lib => ucw}/getopt/getopt1.c | 0 {lib => ucw}/getopt/getopt_init.c | 0 {lib => ucw}/getopt/getopt_int.h | 0 {lib => ucw}/hash-test.c | 16 +- ucw/hash-test.t | 13 + lib/str-test.c => ucw/hashfunc-test.c | 2 +- {lib => ucw}/hashfunc.c | 18 +- ucw/hashfunc.h | 47 + {lib => ucw}/hashtable.h | 70 +- ucw/heap.h | 156 + {lib => ucw}/ipaccess.c | 14 +- {lib => ucw}/ipaccess.h | 2 +- {lib => ucw}/kmp-search.h | 6 +- {lib => ucw}/kmp-test.c | 24 +- {lib => ucw}/kmp-test.t | 2 +- {lib => ucw}/kmp.h | 20 +- {lib => ucw}/lfs-test.c | 12 +- ucw/lfs.h | 63 + ucw/lib.h | 241 + {lib => ucw}/libucw.pc | 12 +- {lib => ucw}/lizard-safe.c | 8 +- {lib => ucw}/lizard-test.c | 10 +- {lib => ucw}/lizard.c | 4 +- ucw/lizard.h | 148 + ucw/log-conf-test.cf | 35 + ucw/log-conf.c | 366 ++ ucw/log-file.c | 202 + ucw/log-internal.h | 34 + ucw/log-stream.c | 203 + ucw/log-syslog.c | 123 + ucw/log.c | 419 ++ ucw/log.h | 268 ++ {lib => ucw}/mainloop.c | 37 +- ucw/mainloop.h | 344 ++ ucw/md5.c | 256 ++ ucw/md5.h | 66 + {lib => ucw}/mempool-fmt.c | 8 +- {lib => ucw}/mempool-str.c | 4 +- {lib => ucw}/mempool.c | 20 +- ucw/mempool.h | 439 ++ {lib => ucw}/mempool.t | 6 +- {lib => ucw}/mmap.c | 2 +- {lib => ucw}/partmap.c | 39 +- {lib => ucw}/partmap.h | 20 +- {lib => ucw}/perl/Filelock/Filelock.pm | 0 {lib => ucw}/perl/Filelock/Filelock.xs | 0 {lib => ucw}/perl/Filelock/MANIFEST | 0 ucw/perl/Filelock/Makefile | 31 + {lib => ucw}/perl/Filelock/Makefile.PL | 0 ucw/perl/Makefile | 10 + {lib/perl => ucw/perl/UCW}/CGI.pm | 74 +- {lib/perl => ucw/perl/UCW}/Config.pm | 0 {lib/perl => ucw/perl/UCW}/Configure.pm | 127 +- ucw/perl/UCW/Configure/Build.pm | 26 + .../perl/UCW/Configure/C.pm | 127 +- ucw/perl/UCW/Configure/Doc.pm | 32 + ucw/perl/UCW/Configure/LibUCW.pm | 74 + ucw/perl/UCW/Configure/Makefile | 15 + ucw/perl/UCW/Configure/Paths.pm | 50 + ucw/perl/UCW/Configure/Pkg.pm | 120 + {lib/perl => ucw/perl/UCW}/Log.pm | 0 ucw/perl/UCW/Makefile | 14 + {lib => ucw}/perl/Ulimit/MANIFEST | 0 ucw/perl/Ulimit/Makefile | 31 + {lib => ucw}/perl/Ulimit/Makefile.PL | 0 {lib => ucw}/perl/Ulimit/Ulimit.pm | 0 {lib => ucw}/perl/Ulimit/Ulimit.xs | 0 {lib => ucw}/prefetch.h | 2 +- {lib => ucw}/prime.c | 3 +- ucw/prime.h | 52 + {lib => ucw}/primetable.c | 5 +- {lib => ucw}/proctitle.c | 2 +- {lib => ucw}/profile.c | 4 +- {lib => ucw}/profile.h | 7 +- {lib => ucw}/qache.c | 22 +- {lib => ucw}/qache.h | 0 {lib => ucw}/random.c | 2 +- {lib => ucw}/randomkey.c | 2 +- {lib => ucw}/realloc.c | 6 +- {lib => ucw}/redblack-test.c | 6 +- ucw/redblack-test.t | 3 + {lib => ucw}/redblack.h | 2 +- {lib => ucw}/regex.c | 17 +- ucw/regex.h | 21 + {lib => ucw}/regex.t | 16 +- {lib => ucw}/runcmd.c | 4 +- {lib => ucw}/semaphore.h | 16 +- ucw/sha1-hmac.c | 107 + ucw/sha1.c | 310 ++ ucw/sha1.h | 83 + ucw/sha1.t | 65 + ucw/shell/Makefile | 20 + {lib => ucw}/shell/config.c | 17 +- {lib => ucw}/shell/config.t | 10 +- {lib => ucw}/shell/libucw.sh | 0 {lib => ucw}/shell/logger.c | 16 +- {lib => ucw}/sighandler.c | 14 +- {lib => ucw}/simple-lists.c | 8 +- ucw/simple-lists.h | 78 + {lib => ucw}/slists.c | 11 +- ucw/slists.h | 178 + {lib => ucw}/slists.t | 2 +- ucw/sorter/Makefile | 21 + {lib => ucw}/sorter/TODO | 2 +- lib/arraysort.h => ucw/sorter/array-simple.h | 27 +- {lib => ucw}/sorter/array.c | 12 +- {lib => ucw}/sorter/array.h | 25 +- {lib => ucw}/sorter/common.h | 4 +- {lib => ucw}/sorter/config.c | 8 +- {lib => ucw}/sorter/govern.c | 28 +- {lib => ucw}/sorter/s-fixint.h | 4 +- {lib => ucw}/sorter/s-internal.h | 4 +- {lib => ucw}/sorter/s-multiway.h | 0 {lib => ucw}/sorter/s-radix.h | 0 {lib => ucw}/sorter/s-twoway.h | 0 {lib => ucw}/sorter/sbuck.c | 12 +- {lib => ucw}/sorter/sort-test.c | 47 +- {lib => ucw}/sorter/sorter.h | 14 +- {lib => ucw}/stkstring.c | 24 +- {lib => ucw}/stkstring.h | 0 {lib => ucw}/stkstring.t | 2 +- lib/string.c => ucw/str-esc.c | 41 +- ucw/str-hex.c | 91 + lib/patimatch.c => ucw/str-imatch.c | 8 +- lib/patmatch.c => ucw/str-match.c | 6 +- lib/patmatch.h => ucw/str-match.h | 2 + lib/wordsplit.c => ucw/str-split.c | 9 +- ucw/string.c | 40 + ucw/string.h | 41 + ucw/string.t | 15 + {lib => ucw}/sync.c | 2 +- ucw/tbf.c | 59 + ucw/tbf.h | 34 + ucw/tempfile.c | 106 + {lib => ucw}/threads-conf.c | 10 +- {lib => ucw}/threads.c | 6 +- {lib => ucw}/threads.h | 4 +- {lib => ucw}/timer.c | 8 +- ucw/trie-test.c | 216 + ucw/trie-test.t | 7 + ucw/trie.h | 955 ++++ {lib => ucw}/unaligned.h | 72 +- {lib => ucw}/unicode.c | 4 +- {lib => ucw}/unicode.h | 138 +- {lib => ucw}/unicode.t | 28 +- {lib => ucw}/url.c | 138 +- {lib => ucw}/url.h | 69 +- ucw/url.t | 91 + ucw/utils/Makefile | 19 + ucw/utils/basecode.c | 122 + ucw/utils/daemon-helper.c | 185 + ucw/utils/rotate-log.pl | 27 + ucw/utils/urltool.c | 145 + {lib => ucw}/wildmatch.c | 20 +- {lib => ucw}/wildmatch.h | 6 +- {lib => ucw}/workqueue.c | 20 +- {lib => ucw}/workqueue.h | 4 +- 355 files changed, 13302 insertions(+), 16622 deletions(-) create mode 100644 build/Makefile rename cf/{library => libucw} (100%) delete mode 100644 lib/Makefile delete mode 100644 lib/base224.h delete mode 100644 lib/bbuf.h delete mode 100644 lib/chartype.h delete mode 100644 lib/conf.h delete mode 100644 lib/config.h delete mode 100644 lib/ctmatch.c delete mode 100644 lib/db-emul.c delete mode 100644 lib/db-test.c delete mode 100644 lib/db-tool.c delete mode 100644 lib/db.c delete mode 100644 lib/db.h delete mode 100644 lib/db_internal.h delete mode 100644 lib/eltpool.h delete mode 100644 lib/fastbuf.h delete mode 100644 lib/fastbuf.t delete mode 100644 lib/ff-utf8.h delete mode 100644 lib/gbuf.h delete mode 100644 lib/getopt.h delete mode 100644 lib/getopt/getopt-sh.h delete mode 100644 lib/hash-test.t delete mode 100644 lib/hashfunc.h delete mode 100644 lib/heap.h delete mode 100644 lib/lfs.h delete mode 100644 lib/lib.h delete mode 100644 lib/lists.c delete mode 100644 lib/lists.h delete mode 100644 lib/lizard.h delete mode 100644 lib/log-file.c delete mode 100644 lib/log.c delete mode 100644 lib/mainloop.h delete mode 100644 lib/md5.c delete mode 100644 lib/md5.h delete mode 100644 lib/md5hex.c delete mode 100644 lib/mempool.h delete mode 100644 lib/pagecache.c delete mode 100644 lib/pagecache.h delete mode 100644 lib/perl/Filelock/Makefile delete mode 100644 lib/perl/Makefile delete mode 100644 lib/perl/Ulimit/Makefile delete mode 100644 lib/regex/Makefile delete mode 100644 lib/regex/README delete mode 100644 lib/regex/regcomp.c delete mode 100644 lib/regex/regex-sh.h delete mode 100644 lib/regex/regex.c delete mode 100644 lib/regex/regex.h delete mode 100644 lib/regex/regex_internal.c delete mode 100644 lib/regex/regex_internal.h delete mode 100644 lib/regex/regexec.c delete mode 100644 lib/shell/Makefile delete mode 100644 lib/simple-lists.h delete mode 100644 lib/slists.h delete mode 100644 lib/sorter/Makefile create mode 100644 ucw/Makefile rename {lib => ucw}/THREADS (100%) rename {lib => ucw}/adler32.c (91%) rename {lib => ucw}/alloc.c (93%) rename {lib => ucw}/alloc_str.c (93%) rename {lib => ucw}/asio.c (98%) rename {lib => ucw}/asio.h (98%) rename {lib => ucw}/asio.t (58%) rename {lib => ucw}/asort-test.c (91%) create mode 100644 ucw/asort-test.t rename {lib => ucw}/base224.c (82%) create mode 100644 ucw/base224.h rename {lib => ucw}/base64.c (98%) rename {lib => ucw}/base64.h (50%) create mode 100644 ucw/basecode.t rename {lib => ucw}/bbuf.c (97%) create mode 100644 ucw/bbuf.h rename {lib => ucw}/bbuf.t (82%) rename {lib => ucw}/bigalloc.c (97%) rename {lib => ucw}/binheap-node.h (64%) rename {lib => ucw}/binheap-test.c (95%) create mode 100644 ucw/binheap-test.t rename {lib => ucw}/binheap.h (63%) rename {lib => ucw}/binsearch.h (50%) rename {lib => ucw}/bit-ffs.c (96%) rename {lib => ucw}/bit-fls.c (93%) rename {lib => ucw}/bitarray.h (100%) rename {lib => ucw}/bitops.h (100%) rename {lib => ucw}/bitops.t (84%) rename {lib => ucw}/bitsig.c (95%) rename {lib => ucw}/bitsig.h (89%) rename {lib => ucw}/carefulio.c (97%) rename lib/str_ctype.c => ucw/char-cat.c (72%) rename lib/str_lower.c => ucw/char-lower.c (72%) rename lib/charmap.h => ucw/char-map.h (100%) rename lib/str_upper.c => ucw/char-upper.c (72%) create mode 100644 ucw/chartype.h rename {lib => ucw}/clists.h (57%) rename {lib => ucw}/conf-alloc.c (91%) rename {lib => ucw}/conf-dump.c (95%) rename {lib => ucw}/conf-input.c (82%) rename {lib => ucw}/conf-internal.h (100%) rename {lib => ucw}/conf-intr.c (90%) rename {lib => ucw}/conf-journal.c (94%) rename {lib => ucw}/conf-parse.c (97%) rename {lib => ucw}/conf-section.c (96%) rename {lib => ucw}/conf-test.c (92%) rename {lib => ucw}/conf-test.cf (92%) create mode 100644 ucw/conf.h create mode 100644 ucw/config.h rename {lib => ucw}/default.cfg (68%) create mode 100644 ucw/doc/Makefile create mode 100644 ucw/doc/basecode.txt create mode 100644 ucw/doc/basics.txt create mode 100644 ucw/doc/binheap.txt create mode 100644 ucw/doc/binsearch.txt create mode 100644 ucw/doc/chartype.txt create mode 100644 ucw/doc/compress.txt create mode 100644 ucw/doc/conf.txt create mode 100644 ucw/doc/config.txt create mode 100644 ucw/doc/configure.txt create mode 100644 ucw/doc/def_index.txt create mode 100644 ucw/doc/docsys.txt create mode 100644 ucw/doc/eltpool.txt create mode 100644 ucw/doc/fastbuf.txt create mode 100644 ucw/doc/generic.txt create mode 100644 ucw/doc/growbuf.txt create mode 100644 ucw/doc/hash.txt create mode 100644 ucw/doc/hashtable.txt create mode 100644 ucw/doc/heap.txt create mode 100644 ucw/doc/index.txt create mode 100644 ucw/doc/install.txt create mode 100644 ucw/doc/lists.txt create mode 100644 ucw/doc/log.txt create mode 100644 ucw/doc/mainloop.txt create mode 100644 ucw/doc/mempool.txt create mode 100644 ucw/doc/prime.txt create mode 100644 ucw/doc/sort.txt create mode 100644 ucw/doc/unaligned.txt create mode 100644 ucw/doc/unicode.txt rename {lib => ucw}/eltpool.c (91%) create mode 100644 ucw/eltpool.h rename lib/eltpool.test => ucw/eltpool.t (53%) rename {lib => ucw}/exitstatus.c (97%) rename {lib => ucw}/fastbuf.c (93%) create mode 100644 ucw/fastbuf.h rename {lib => ucw}/fb-atomic.c (68%) create mode 100644 ucw/fb-atomic.t rename {lib => ucw}/fb-buffer.c (54%) create mode 100644 ucw/fb-buffer.t rename {lib => ucw}/fb-direct.c (95%) rename {lib => ucw}/fb-file.c (90%) create mode 100644 ucw/fb-file.t rename {lib => ucw}/fb-grow.c (94%) create mode 100644 ucw/fb-grow.t rename {lib => ucw}/fb-limfd.c (94%) create mode 100644 ucw/fb-limfd.t rename {lib => ucw}/fb-mem.c (94%) create mode 100644 ucw/fb-mem.t rename {lib => ucw}/fb-mmap.c (86%) create mode 100644 ucw/fb-mmap.t rename {lib => ucw}/fb-param.c (83%) rename {lib => ucw}/fb-pool.c (95%) create mode 100644 ucw/fb-pool.t create mode 100644 ucw/fb-socket.c create mode 100644 ucw/fb-socket.h create mode 100644 ucw/fb-socket.t rename {lib => ucw}/fb-temp.c (53%) create mode 100644 ucw/fb-temp.t rename {lib => ucw}/ff-binary.c (92%) rename {lib => ucw}/ff-binary.h (68%) rename {lib => ucw}/ff-printf.c (96%) create mode 100644 ucw/ff-stkstring.c rename {lib => ucw}/ff-string.c (78%) rename {lib => ucw}/ff-unicode.c (98%) rename {lib => ucw}/ff-unicode.h (68%) rename {lib => ucw}/ff-unicode.t (71%) create mode 100644 ucw/gbuf.h rename {lib => ucw}/getopt.c (95%) create mode 100644 ucw/getopt.h rename {lib => ucw}/getopt.t (76%) rename {lib => ucw}/getopt/Makefile (86%) rename {lib => ucw}/getopt/README (100%) rename {lib => ucw}/getopt/getopt-sh.c (100%) create mode 100644 ucw/getopt/getopt-sh.h rename {lib => ucw}/getopt/getopt.c (100%) rename {lib => ucw}/getopt/getopt.h (100%) rename {lib => ucw}/getopt/getopt1.c (100%) rename {lib => ucw}/getopt/getopt_init.c (100%) rename {lib => ucw}/getopt/getopt_int.h (100%) rename {lib => ucw}/hash-test.c (96%) create mode 100644 ucw/hash-test.t rename lib/str-test.c => ucw/hashfunc-test.c (99%) rename {lib => ucw}/hashfunc.c (92%) create mode 100644 ucw/hashfunc.h rename {lib => ucw}/hashtable.h (89%) create mode 100644 ucw/heap.h rename {lib => ucw}/ipaccess.c (90%) rename {lib => ucw}/ipaccess.h (95%) rename {lib => ucw}/kmp-search.h (96%) rename {lib => ucw}/kmp-test.c (93%) rename {lib => ucw}/kmp-test.t (52%) rename {lib => ucw}/kmp.h (96%) rename {lib => ucw}/lfs-test.c (81%) create mode 100644 ucw/lfs.h create mode 100644 ucw/lib.h rename {lib => ucw}/libucw.pc (53%) rename {lib => ucw}/lizard-safe.c (94%) rename {lib => ucw}/lizard-test.c (95%) rename {lib => ucw}/lizard.c (99%) create mode 100644 ucw/lizard.h create mode 100644 ucw/log-conf-test.cf create mode 100644 ucw/log-conf.c create mode 100644 ucw/log-file.c create mode 100644 ucw/log-internal.h create mode 100644 ucw/log-stream.c create mode 100644 ucw/log-syslog.c create mode 100644 ucw/log.c create mode 100644 ucw/log.h rename {lib => ucw}/mainloop.c (94%) create mode 100644 ucw/mainloop.h create mode 100644 ucw/md5.c create mode 100644 ucw/md5.h rename {lib => ucw}/mempool-fmt.c (94%) rename {lib => ucw}/mempool-str.c (97%) rename {lib => ucw}/mempool.c (97%) create mode 100644 ucw/mempool.h rename {lib => ucw}/mempool.t (68%) rename {lib => ucw}/mmap.c (97%) rename {lib => ucw}/partmap.c (64%) rename {lib => ucw}/partmap.h (57%) rename {lib => ucw}/perl/Filelock/Filelock.pm (100%) rename {lib => ucw}/perl/Filelock/Filelock.xs (100%) rename {lib => ucw}/perl/Filelock/MANIFEST (100%) create mode 100644 ucw/perl/Filelock/Makefile rename {lib => ucw}/perl/Filelock/Makefile.PL (100%) create mode 100644 ucw/perl/Makefile rename {lib/perl => ucw/perl/UCW}/CGI.pm (86%) rename {lib/perl => ucw/perl/UCW}/Config.pm (100%) rename {lib/perl => ucw/perl/UCW}/Configure.pm (58%) create mode 100644 ucw/perl/UCW/Configure/Build.pm rename lib/autoconf.cfg => ucw/perl/UCW/Configure/C.pm (72%) create mode 100644 ucw/perl/UCW/Configure/Doc.pm create mode 100644 ucw/perl/UCW/Configure/LibUCW.pm create mode 100644 ucw/perl/UCW/Configure/Makefile create mode 100644 ucw/perl/UCW/Configure/Paths.pm create mode 100644 ucw/perl/UCW/Configure/Pkg.pm rename {lib/perl => ucw/perl/UCW}/Log.pm (100%) create mode 100644 ucw/perl/UCW/Makefile rename {lib => ucw}/perl/Ulimit/MANIFEST (100%) create mode 100644 ucw/perl/Ulimit/Makefile rename {lib => ucw}/perl/Ulimit/Makefile.PL (100%) rename {lib => ucw}/perl/Ulimit/Ulimit.pm (100%) rename {lib => ucw}/perl/Ulimit/Ulimit.xs (100%) rename {lib => ucw}/prefetch.h (91%) rename {lib => ucw}/prime.c (96%) create mode 100644 ucw/prime.h rename {lib => ucw}/primetable.c (96%) rename {lib => ucw}/proctitle.c (98%) rename {lib => ucw}/profile.c (97%) rename {lib => ucw}/profile.h (97%) rename {lib => ucw}/qache.c (97%) rename {lib => ucw}/qache.h (100%) rename {lib => ucw}/random.c (97%) rename {lib => ucw}/randomkey.c (96%) rename {lib => ucw}/realloc.c (90%) rename {lib => ucw}/redblack-test.c (98%) create mode 100644 ucw/redblack-test.t rename {lib => ucw}/redblack.h (99%) rename {lib => ucw}/regex.c (96%) create mode 100644 ucw/regex.h rename {lib => ucw}/regex.t (77%) rename {lib => ucw}/runcmd.c (97%) rename {lib => ucw}/semaphore.h (73%) create mode 100644 ucw/sha1-hmac.c create mode 100644 ucw/sha1.c create mode 100644 ucw/sha1.h create mode 100644 ucw/sha1.t create mode 100644 ucw/shell/Makefile rename {lib => ucw}/shell/config.c (97%) rename {lib => ucw}/shell/config.t (78%) rename {lib => ucw}/shell/libucw.sh (100%) rename {lib => ucw}/shell/logger.c (70%) rename {lib => ucw}/sighandler.c (80%) rename {lib => ucw}/simple-lists.c (90%) create mode 100644 ucw/simple-lists.h rename {lib => ucw}/slists.c (90%) create mode 100644 ucw/slists.h rename {lib => ucw}/slists.t (66%) create mode 100644 ucw/sorter/Makefile rename {lib => ucw}/sorter/TODO (87%) rename lib/arraysort.h => ucw/sorter/array-simple.h (80%) rename {lib => ucw}/sorter/array.c (98%) rename {lib => ucw}/sorter/array.h (89%) rename {lib => ucw}/sorter/common.h (98%) rename {lib => ucw}/sorter/config.c (94%) rename {lib => ucw}/sorter/govern.c (95%) rename {lib => ucw}/sorter/s-fixint.h (98%) rename {lib => ucw}/sorter/s-internal.h (99%) rename {lib => ucw}/sorter/s-multiway.h (100%) rename {lib => ucw}/sorter/s-radix.h (100%) rename {lib => ucw}/sorter/s-twoway.h (100%) rename {lib => ucw}/sorter/sbuck.c (95%) rename {lib => ucw}/sorter/sort-test.c (94%) rename {lib => ucw}/sorter/sorter.h (97%) rename {lib => ucw}/stkstring.c (83%) rename {lib => ucw}/stkstring.h (100%) rename {lib => ucw}/stkstring.t (78%) rename lib/string.c => ucw/str-esc.c (79%) create mode 100644 ucw/str-hex.c rename lib/patimatch.c => ucw/str-imatch.c (70%) rename lib/patmatch.c => ucw/str-match.c (75%) rename lib/patmatch.h => ucw/str-match.h (96%) rename lib/wordsplit.c => ucw/str-split.c (82%) create mode 100644 ucw/string.c create mode 100644 ucw/string.h create mode 100644 ucw/string.t rename {lib => ucw}/sync.c (95%) create mode 100644 ucw/tbf.c create mode 100644 ucw/tbf.h create mode 100644 ucw/tempfile.c rename {lib => ucw}/threads-conf.c (70%) rename {lib => ucw}/threads.c (94%) rename {lib => ucw}/threads.h (87%) rename {lib => ucw}/timer.c (82%) create mode 100644 ucw/trie-test.c create mode 100644 ucw/trie-test.t create mode 100644 ucw/trie.h rename {lib => ucw}/unaligned.h (65%) rename {lib => ucw}/unicode.c (98%) rename {lib => ucw}/unicode.h (59%) rename {lib => ucw}/unicode.t (79%) rename {lib => ucw}/url.c (85%) rename {lib => ucw}/url.h (55%) create mode 100644 ucw/url.t create mode 100644 ucw/utils/Makefile create mode 100644 ucw/utils/basecode.c create mode 100644 ucw/utils/daemon-helper.c create mode 100644 ucw/utils/rotate-log.pl create mode 100644 ucw/utils/urltool.c rename {lib => ucw}/wildmatch.c (92%) rename {lib => ucw}/wildmatch.h (66%) rename {lib => ucw}/workqueue.c (92%) rename {lib => ucw}/workqueue.h (98%) diff --git a/Makefile b/Makefile index fbae3c5..4b4794c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Makefile for MO-Eval -# (c) 2008 Martin Mares +# (c) 2008--2009 Martin Mares VERSION=1.0.99-20080220 @@ -13,11 +13,12 @@ obj/config.mk: @echo "You need to run configure first." && false # We will use the libucw build system -include $(s)/build/Maketop +BUILDSYS=$(s)/build +include $(BUILDSYS)/Maketop # Include makefiles of libraries we wish to use ifdef CONFIG_UCW_LIBS -include $(s)/lib/Makefile +include $(s)/ucw/Makefile include $(s)/sherlock/Makefile # Disable built-in tests of these libraries TESTS= @@ -37,4 +38,4 @@ include $(s)/mop/Makefile endif # And finally the default rules of the build system -include $(s)/build/Makebottom +include $(BUILDSYS)/Makebottom diff --git a/build/Makebottom b/build/Makebottom index bf9a49e..e2c8402 100644 --- a/build/Makebottom +++ b/build/Makebottom @@ -1,13 +1,15 @@ # Bottom part of Makefile for the UCW Libraries -# (c) 1997--2007 Martin Mares +# (c) 1997--2008 Martin Mares # The run tree -runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS))) +DOCDIR=doc + +runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS)) $(addprefix run/$(DOCDIR)/,$(DOC_MODULES))) run/.tree-stamp: $(o)/config.mk $(M)Creating runtree - $(Q)mkdir -p run $(addprefix run/, cf $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS)) + $(Q)mkdir -p run $(addprefix run/, $(CONFIG_DIR) $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS)) $(Q)touch run/.tree-stamp # Miscellaneous targets @@ -15,7 +17,8 @@ run/.tree-stamp: $(o)/config.mk programs: $(PROGS) datafiles: $(DATAFILES) tests: $(TESTS) -configs: $(addprefix run/cf/,$(CONFIGS)) +configs: $(addprefix run/$(CONFIG_DIR)/,$(CONFIGS)) +docs: runtree $(DOCS) $(DOC_INDICES) tags: etags `find . -name "*.[ch]"` @@ -28,7 +31,7 @@ tags: -include $(o)/depend $(o)/depend: force - $(Q)if [ -s $(o)/depend.new ] ; then $(s)/build/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi + $(Q)if [ -s $(o)/depend.new ] ; then $(BUILDSYS)/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi force: @@ -39,14 +42,14 @@ force: # Rules for configuration files -run/cf/%: $(s)/cf/% $(o)/config.mk $(s)/build/genconf +run/$(CONFIG_DIR)/%: $(s)/$(CONFIG_SRC_DIR)/% $(o)/config.mk $(BUILDSYS)/genconf $(M)CF $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk -$(o)/%.cf: $(s)/%.cf $(o)/config.mk $(s)/build/genconf +$(o)/%.cf: $(s)/%.cf $(o)/config.mk $(BUILDSYS)/genconf $(M)CF $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk - $(Q)cp $@ run/cf/$(basename $(@F)) + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)cp $@ run/$(CONFIG_DIR)/$(basename $(@F)) # Rules for libraries @@ -55,17 +58,25 @@ $(o)/%.cf: $(s)/%.cf $(o)/config.mk $(s)/build/genconf $(Q)rm -f $@ $(Q)ar rcs $@ $^ ifdef CONFIG_INSTALL_API - $(Q)$(call symlink,$@,run/lib) + $(Q)$(call symlink-alias,$@,run/lib,$(*F)$(LIBNAME_INFIX).a) endif %.so: $(M)LD $@ - $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $^ - $(Q)$(call symlink,$@,run/lib) + $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS) + $(Q)$(call symlink-alias,$@,run/$(SO_RUNDIR),$(*F)$(SONAME_INFIX).so$(SONAME_SUFFIX)) + +# On Darwin, gcc expects shared libraries in *.dylib instead of *.so. +# Surprisingly, when a program is run, it suffices to have *.so files. +# We don't want to mess up the whole build system with configurable +# suffices and we also don't want to incur an overhead on Linux, so we +# just create symbolic links on Darwin, if requested. +%.dylib: %.so + cd $(dir $<) && ln -fs $(notdir $<) $(notdir $@) $(o)/%.pc: $(s)/%.pc $(o)/%.$(LS) $(M)PC $< - $(Q)DEPS="$(shell $(s)/build/lib-deps $^)" LIBDIR=$(@D) $(s)/build/genconf $< $@ $(o)/config.mk + $(Q)DEPS="$(shell $(BUILDSYS)/lib-deps $^)" LIBDIR=$(@D) $(BUILDSYS)/genconf $< $@ $(o)/config.mk $(Q)mkdir -p $(o)/pkgconfig $(Q)$(call symlink,$@,$(o)/pkgconfig) @@ -73,17 +84,25 @@ $(o)/%.pc: $(s)/%.pc $(o)/%.$(LS) ifdef CONFIG_INSTALL_API +ifdef CONFIG_LOCAL +# Need an absolute path API_ROOT:=$(shell pwd)/run +API_LIBDIR=$(API_ROOT)/lib +API_INCDIR=$(API_ROOT)/include +else +API_LIBDIR=$(INSTALL_LIB_DIR) +API_INCDIR=$(INSTALL_INCLUDE_DIR) +endif INSTALL_RUNDIRS+=include lib/pkgconfig api: $(API_INCLUDES) $(addprefix run/lib/pkgconfig/,$(addsuffix .pc,$(API_LIBS))) $(o)/%/.include-stamp: - $(Q)$(s)/build/install-includes $($@ "s@^libdir=.*@libdir=$(API_ROOT)/lib@;s@^incdir=.*@incdir=$(API_ROOT)/include@" + $(Q)sed <$< >$@ "s@^libdir=.*@libdir=$(API_LIBDIR)@;s@^incdir=.*@incdir=$(API_INCDIR)@" else api: @@ -123,11 +142,11 @@ $(o)/%-tt.o: $(s)/%.c $(o)/autoconf.h $(o)/%-t: $(o)/%-tt.o $(TESTING_DEPS) $(M)LD-TEST $@ - $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS) -$(o)/%.test: $(s)/%.t $(s)/build/tester +$(o)/%.test: $(s)/%.t $(BUILDSYS)/tester $(M)TEST $@ - $(Q)$(s)/build/tester --rundir=run $(TESTERFLAGS) $< && touch $@ + $(Q)$(BUILDSYS)/tester --rundir=run $(TESTERFLAGS) $< && touch $@ # Rules for binaries @@ -135,30 +154,30 @@ BINDIR=bin $(o)/%: $(o)/%.o $(M)LD $@ - $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS) $(Q)$(call symlink,$@,run/$(BINDIR)) -$(o)/%: $(s)/%.sh $(o)/config.mk $(s)/build/genconf +$(o)/%: $(s)/%.sh $(o)/config.mk $(BUILDSYS)/genconf $(M)PP $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk $(Q)chmod +x $@ $(Q)$(call symlink,$@,run/$(BINDIR)) -$(o)/%: %.sh $(o)/config.mk $(s)/build/genconf +$(o)/%: %.sh $(o)/config.mk $(BUILDSYS)/genconf $(M)PP $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk $(Q)chmod +x $@ $(Q)$(call symlink,$@,run/$(BINDIR)) -$(o)/%: $(s)/%.pl $(o)/config.mk $(s)/build/genconf +$(o)/%: $(s)/%.pl $(o)/config.mk $(BUILDSYS)/genconf $(M)PP $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk $(Q)chmod +x $@ $(Q)$(call symlink,$@,run/$(BINDIR)) -$(o)/%: %.pl $(o)/config.mk $(s)/build/genconf +$(o)/%: %.pl $(o)/config.mk $(BUILDSYS)/genconf $(M)PP $< - $(Q)$(s)/build/genconf $< $@ $(o)/config.mk + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk $(Q)chmod +x $@ $(Q)$(call symlink,$@,run/$(BINDIR)) @@ -183,14 +202,28 @@ $(DATAFILES): $(o)/%: $(s)/% $(Q)cp $^ $@ $(Q)$(call symlink,$@,run/$(DATADIR)) -# Default installation target +# Rules for documentation + +$(o)/%.html: $(o)/%.txt $(BUILDSYS)/asciidoc.conf $(BUILDSYS)/asciidoc-xhtml.conf run/$(DOCDIR)/$(DOC_MODULE)/.dir-stamp + $(M)"DOC-HTML $<" + $(Q)asciidoc -e -f $(BUILDSYS)/asciidoc.conf -f $(BUILDSYS)/asciidoc-xhtml.conf -f $(HOST_PREFIX)/etc/asciidoc/asciidoc.conf -f $(HOST_PREFIX)/etc/asciidoc/xhtml11.conf $< + $(Q)$(call symlink,$@,run/$(DOCDIR)/$(DOC_MODULE)) + +# In reality, we do not depend on the .txt files, but on the corresponding .deflist's. +# However, the Makefile language cannot express that doc-extract generates both .txt +# and .deflist, so we always use the .txt's in dependencies. +$(patsubst %.html,%.txt,$(DOC_INDICES)): $(o)/%.txt: $(patsubst %.html,%.txt,$(DOCS)) $(BUILDSYS)/doc-defs + $(M)"DOC-DEFS $@" + $(Q)echo $@: $(DOC_HEAD) $(DOC_LIST) >> $(o)/depend.new + $(Q)$(BUILDSYS)/doc-defs $(DOC_HEAD) $@ $(DOC_LIST) -default-install: - SH_EXTRA_RUNDIRS="$(sort $(EXTRA_RUNDIRS))" SH_INSTALL_RUNDIRS="$(sort $(INSTALL_RUNDIRS))" SH_CONFIGS="$(sort $(CONFIGS))" SH_AUTO_CONFIRM="$(CONFIRM)" $(s)/build/installer $(INSTALL_DIR) +$(patsubst %.html,%.txt,$(DOCS)): $(o)/%.txt: $(s)/%.txt $(BUILDSYS)/doc-extract + $(M)"DOC-EXT $<" + $(Q)$(BUILDSYS)/doc-extract $< $@ $(o)/depend.new $(s) $(patsubst %.txt,%.deflist,$@) # Don't delete intermediate targets. There shouldn't be any, but due to bugs # in GNU Make rules with targets in not-yet-existing directories are ignored # when searching for implicit rules and thence targets considered intermediate. .SECONDARY: -.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install default-install +.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install docs tests diff --git a/build/Makefile b/build/Makefile new file mode 100644 index 0000000..54698d2 --- /dev/null +++ b/build/Makefile @@ -0,0 +1,19 @@ +# Makefile for Sherlock Build Tools + +DIRS+=build + +$(o)/build/genhash: $(o)/build/genhash.o + +# This is a hack which compensates make's desires for propagating per-rule settings +# of variables: if some module specifies its own LIBS and it depends on genhash, +# genhash is sometimes built with the module's LIBS (if it isn't already built). +# A proper solution would be using a different rule for linking build/*, but +# as it currently concerns only genhash, it's easier to battle this way. +$(o)/build/genhash: LIBS= + +INSTALL_TARGETS+=install-build +install-build: + install -d -m 755 $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build + install -m 755 $(addprefix $(BUILDSYS)/,install-includes doc-defs doc-extract genconf mergedeps tester lib-deps lib-flags) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build + install -m 644 $(addprefix $(BUILDSYS)/,asciidoc.conf asciidoc-xhtml.conf Makebottom Maketop) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build +.PHONY: install-build diff --git a/build/Maketop b/build/Maketop index cf93e1b..4fa79c5 100644 --- a/build/Maketop +++ b/build/Maketop @@ -1,5 +1,5 @@ # Top part of Makefile for the UCW Libraries -# (c) 1997--2007 Martin Mares +# (c) 1997--2008 Martin Mares # Set to 1 if you want verbose output V=0 @@ -16,6 +16,7 @@ LDFLAGS=$(LOPT) $(LEXTRA) DIRS= PROGS= CONFIGS= +CONFIG_SRC_DIR=$(CONFIG_DIR) TESTS= EXTRA_RUNDIRS=tmp log INSTALL_RUNDIRS=bin lib @@ -25,19 +26,25 @@ API_LIBS= # Various files whose type does not fit into PROGS DATAFILES= +ifdef CONFIG_DARWIN +DYNAMIC_LIBRARIES=dylib +SOEXT=bundle +HOST_PREFIX=/sw +else +DYNAMIC_LIBRARIES=so +SOEXT=so +HOST_PREFIX= +endif + ifdef CONFIG_SHARED -LS=so +LS=$(DYNAMIC_LIBRARIES) OS=oo else LS=a OS=o endif -ifdef CONFIG_DARWIN -SOEXT=bundle -else -SOEXT=so -endif +SO_RUNDIR=lib # Whenever "make -s" (silent) is run, turn on verbose mode (paradoxical, but gives the right result) ifneq ($(findstring s,$(MAKEFLAGS)),) @@ -58,12 +65,13 @@ endif # Clean needs to be a double-colon rule since we want sub-makefiles to be able # to define their own cleanup actions. dust:: - rm -f `find . -path "*~" -or -name "\#*\#" -or -name core` + rm -f `find . -path "*~" -or -name "\#*\#"` rm -f allocs.tmp cscope.out TAGS clean:: dust - rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h \)` - rm -rf tests run/{bin,lib,include,.tree-stamp} + rm -rf `find obj/ucw -mindepth 1 -maxdepth 1 -not -name autoconf.h` + rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h -o -name ucw \)` + rm -rf tests run/{bin,lib,include,.tree-stamp,doc} distclean:: clean rm -rf obj run @@ -71,6 +79,9 @@ distclean:: clean testclean:: rm -f `find obj -name "*.test"` +docclean:: + rm -f $(DOCS) $(patsubst %.html,%.txt,$(DOCS)) + # Extra default rules (appended to by submakefiles) extras:: @@ -80,3 +91,4 @@ space:=$(empty) $(empty) backref=$(subst $(space),/,$(patsubst %,..,$(subst /,$(space),$(1)))) tack-on=$(if $(patsubst /%,,$(2)),$(1)/$(2),$(2)) symlink=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/ +symlink-alias=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/$(3) diff --git a/build/genconf b/build/genconf index eaf2d1c..30e8896 100755 --- a/build/genconf +++ b/build/genconf @@ -66,10 +66,13 @@ while () { die "Piped command '$cmd' failed" if $?; print OUT `$1`; } else { - sub repl ($) { + sub repl($); + sub repl($) { my $v = shift @_; exists $vars{$v} or die "Cannot substitute $v: variable not set"; - return $vars{$v}; + my $x = $vars{$v}; + while ($x =~ s/\$\((\w+)\)/repl($1)/ge) { } + return $x; } s/@(\w+)@/repl($1)/ge; print OUT; diff --git a/cf/library b/cf/libucw similarity index 100% rename from cf/library rename to cf/libucw diff --git a/configure b/configure index d8c25e2..2b64617 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #!/usr/bin/perl # Configure script for MO-Eval -# (c) 2008 Martin Mares +# (c) 2008--2009 Martin Mares use warnings; use strict; @@ -17,15 +17,19 @@ BEGIN { die "Don't know how to find myself. Please set SRCDIR manually."; } } - require "$srcdir/lib/perl/Configure.pm"; - UCW::Configure::import UCW::Configure; } +use lib "$srcdir/ucw/perl"; +use UCW::Configure; +use UCW::Configure::Pkg; + Init($srcdir, "default.cfg"); -Include "lib/default.cfg"; +Include "ucw/default.cfg"; Log "### Configuring MO-Eval ###\n\n"; Include Get("CONFIG"); -Include "lib/autoconf.cfg"; +require UCW::Configure::Paths; +require UCW::Configure::C; +require UCW::Configure::LibUCW; if (Get("CONFIG_SUBMIT") || Get("CONFIG_MOP")) { # Build libucw only if it is needed diff --git a/default.cfg b/default.cfg index 3709b5f..699a89d 100644 --- a/default.cfg +++ b/default.cfg @@ -9,6 +9,8 @@ Set("CONFIG_MOP" => 1); # Settings of libucw UnSet("CONFIG_SHARED"); UnSet("CONFIG_UCW_THREADS"); +Set("CONFIG_LOCAL"); +Set("CONFIG_DIR", "cf"); # Return success 1; diff --git a/lib/Makefile b/lib/Makefile deleted file mode 100644 index 7e520c2..0000000 --- a/lib/Makefile +++ /dev/null @@ -1,138 +0,0 @@ -# Makefile for the UCW Library (c) 1997--2007 Martin Mares - -DIRS+=lib -CONFIGS+=library -LIBUCW=$(o)/lib/libucw.pc - -ifdef CONFIG_UCW_DBTOOL -PROGS+=$(o)/lib/db-tool -endif - -LIBUCW_MODS= \ - threads \ - alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \ - mmap pagecache partmap hashfunc \ - lists slists simple-lists bitsig \ - log log-file proctitle \ - conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \ - ipaccess \ - profile \ - fastbuf ff-binary ff-string ff-printf ff-unicode \ - fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \ - str_ctype str_upper str_lower unicode stkstring \ - wildmatch wordsplit ctmatch patimatch patmatch regex \ - prime primetable random timer randomkey \ - bit-ffs bit-fls \ - db \ - url \ - mainloop exitstatus runcmd sighandler \ - lizard lizard-safe adler32 \ - md5 md5hex \ - base64 base224 \ - sync \ - qache \ - string \ - bbuf \ - getopt - -LIBUCW_INCLUDES= \ - lib.h config.h threads.h \ - mempool.h pagecache.h \ - arraysort.h \ - lists.h clists.h slists.h simple-lists.h \ - unaligned.h prefetch.h \ - bbuf.h gbuf.h bitarray.h bitsig.h \ - hashfunc.h hashtable.h \ - heap.h binheap.h binheap-node.h \ - redblack.h \ - binsearch.h \ - bitops.h \ - conf.h getopt.h ipaccess.h \ - profile.h \ - fastbuf.h lfs.h ff-unicode.h ff-utf8.h ff-binary.h \ - chartype.h unicode.h stkstring.h \ - wildmatch.h patmatch.h \ - db.h \ - url.h \ - mainloop.h \ - lizard.h \ - md5.h \ - base64.h base224.h \ - qache.h \ - kmp.h kmp-search.h binsearch.h \ - partmap.h - -ifdef CONFIG_UCW_THREADS -# Some modules require threading -LIBUCW_MODS+=threads-conf workqueue asio fb-direct -LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h -endif - -ifdef CONFIG_OWN_REGEX -include $(s)/lib/regex/Makefile -endif - -ifdef CONFIG_OWN_GETOPT -include $(s)/lib/getopt/Makefile -endif - -include $(s)/lib/sorter/Makefile - -LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS)) - -$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) -$(o)/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) - -$(o)/lib/hashfunc.o $(o)/lib/hashfunc.oo: CFLAGS += -funroll-loops -$(o)/lib/lizard.o: CFLAGS += $(COPT2) -funroll-loops - -$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW) -$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW) -$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW) -$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW) -$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW) -$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW) -$(o)/lib/asort-test: $(o)/lib/asort-test.o $(LIBUCW) -$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW) -$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW) -$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW) -$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET) -$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW) - -TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ - slists.test kmp-test.test bbuf.test getopt.test fastbuf.test ff-unicode.test eltpool.test) - -$(o)/lib/regex.test: $(o)/lib/regex-t -$(o)/lib/unicode.test: $(o)/lib/unicode-t -$(o)/lib/hash-test.test: $(o)/lib/hash-test -$(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t -$(o)/lib/stkstring.test: $(o)/lib/stkstring-t -$(o)/lib/bitops.test: $(o)/lib/bit-ffs-t $(o)/lib/bit-fls-t -$(o)/lib/slists.test: $(o)/lib/slists-t -$(o)/lib/kmp-test.test: $(o)/lib/kmp-test -$(o)/lib/bbuf.test: $(o)/lib/bbuf-t -$(o)/lib/getopt.test: $(o)/lib/getopt-t -$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t -$(o)/lib/ff-unicode.test: $(o)/lib/ff-unicode-t -$(o)/lib/eltpool.test: $(o)/lib/eltpool-t - -ifdef CONFIG_UCW_THREADS -TESTS+=$(addprefix $(o)/lib/,asio.test) -$(o)/lib/asio.test: $(o)/lib/asio-t -endif - -API_LIBS+=libucw -API_INCLUDES+=$(o)/lib/.include-stamp -$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h - $(Q)$(s)/build/install-includes $( - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -uns base224_encode(byte *dest, const byte *src, uns len); -uns base224_decode(byte *dest, const byte *src, uns len); - -/* - * Warning: when encoding, at least 4 bytes of extra space are needed. - * Better use this macro to calculate buffer size. - */ -#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5) - -/* - * When called for BASE224_IN_CHUNK-byte chunks, the result will be - * always BASE224_OUT_CHUNK bytes long. If a longer block is split - * to such chunks, the result will be identical. - */ -#define BASE224_IN_CHUNK 39 -#define BASE224_OUT_CHUNK 40 diff --git a/lib/bbuf.h b/lib/bbuf.h deleted file mode 100644 index 22e62bb..0000000 --- a/lib/bbuf.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * UCW Library -- A simple growing buffer for byte-sized items. - * - * (c) 2004 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_BBUF_H -#define _UCW_BBUF_H - -#define GBUF_TYPE byte -#define GBUF_PREFIX(x) bb_##x -#include "lib/gbuf.h" - -char *bb_vprintf(bb_t *bb, const char *fmt, va_list args); -char *bb_printf(bb_t *bb, const char *fmt, ...); -char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args); -char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...); - -#endif diff --git a/lib/chartype.h b/lib/chartype.h deleted file mode 100644 index 09dc1ec..0000000 --- a/lib/chartype.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * UCW Library -- Character Types - * - * (c) 1997--2004 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_CHARTYPE_H -#define _UCW_CHARTYPE_H - -#define _C_UPPER 1 /* Upper-case letters */ -#define _C_LOWER 2 /* Lower-case letters */ -#define _C_PRINT 4 /* Printable */ -#define _C_DIGIT 8 /* Digits */ -#define _C_CTRL 16 /* Control characters */ -#define _C_XDIGIT 32 /* Hexadecimal digits */ -#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */ -#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */ - -#define _C_ALPHA (_C_UPPER | _C_LOWER) -#define _C_ALNUM (_C_ALPHA | _C_DIGIT) -#define _C_WORD (_C_ALNUM | _C_INNER) -#define _C_WSTART (_C_ALPHA | _C_INNER) - -extern const unsigned char _c_cat[256], _c_upper[256], _c_lower[256]; - -#define Category(x) (_c_cat[(unsigned char)(x)]) -#define Ccat(x,y) (Category(x) & y) - -#define Cupper(x) Ccat(x, _C_UPPER) -#define Clower(x) Ccat(x, _C_LOWER) -#define Calpha(x) Ccat(x, _C_ALPHA) -#define Calnum(x) Ccat(x, _C_ALNUM) -#define Cprint(x) Ccat(x, _C_PRINT) -#define Cdigit(x) Ccat(x, _C_DIGIT) -#define Cxdigit(x) Ccat(x, _C_XDIGIT) -#define Cword(x) Ccat(x, _C_WORD) -#define Cblank(x) Ccat(x, _C_BLANK) -#define Cctrl(x) Ccat(x, _C_CTRL) -#define Cspace(x) Cblank(x) - -#define Cupcase(x) _c_upper[(unsigned char)(x)] -#define Clocase(x) _c_lower[(unsigned char)(x)] - -#define Cxvalue(x) (((x)<'A')?((x)-'0'):(((x)&0xdf)-'A'+10)) - -#endif diff --git a/lib/conf.h b/lib/conf.h deleted file mode 100644 index 8085744..0000000 --- a/lib/conf.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * UCW Library -- Configuration files - * - * (c) 2001--2006 Robert Spalek - * (c) 2003--2006 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_CONF_H -#define _UCW_CONF_H - -enum cf_class { - CC_END, // end of list - CC_STATIC, // single variable or static array - CC_DYNAMIC, // dynamically allocated array - CC_PARSER, // arbitrary parser function - CC_SECTION, // section appears exactly once - CC_LIST, // list with 0..many nodes - CC_BITMAP // of up to 32 items -}; - -enum cf_type { - CT_INT, CT_U64, CT_DOUBLE, // number types - CT_IP, // IP address - CT_STRING, // string type - CT_LOOKUP, // in a string table - CT_USER // user-defined type -}; - -struct fastbuf; -typedef char *cf_parser(uns number, char **pars, void *ptr); - /* A parser function gets an array of (strdup'ed) strings and a pointer with - * the customized information (most likely the target address). It can store - * the parsed value anywhere in any way it likes, however it must first call - * cf_journal_block() on the overwritten memory block. It returns an error - * message or NULL if everything is all right. */ -typedef char *cf_parser1(char *string, void *ptr); - /* A parser function for user-defined types gets a string and a pointer to - * the destination variable. It must store the value within [ptr,ptr+size), - * where size is fixed for each type. It should not call cf_journal_block(). */ -typedef char *cf_hook(void *ptr); - /* An init- or commit-hook gets a pointer to the section or NULL if this - * is the global section. It returns an error message or NULL if everything - * is all right. The init-hook should fill in default values (needed for - * dynamically allocated nodes of link lists or for filling global variables - * that are run-time dependent). The commit-hook should perform sanity - * checks and postprocess the parsed values. Commit-hooks must call - * cf_journal_block() too. Caveat! init-hooks for static sections must not - * use cf_malloc() but normal xmalloc(). */ -typedef void cf_dumper1(struct fastbuf *fb, void *ptr); - /* Dumps the contents of a variable of a user-defined type. */ -typedef char *cf_copier(void *dest, void *src); - /* Similar to init-hook, but it copies attributes from another list node - * instead of setting the attributes to default values. You have to provide - * it if your node contains parsed values and/or sub-lists. */ - -struct cf_user_type { - uns size; // of the parsed attribute - char *name; // name of the type (for dumping) - cf_parser1 *parser; // how to parse it - cf_dumper1 *dumper; // how to dump the type -}; - -struct cf_section; -struct cf_item { - const char *name; // case insensitive - int number; // length of an array or #parameters of a parser (negative means at most) - void *ptr; // pointer to a global variable or an offset in a section - union cf_union { - struct cf_section *sec; // declaration of a section or a list - cf_parser *par; // parser function - char **lookup; // NULL-terminated sequence of allowed strings for lookups - struct cf_user_type *utype; // specification of the user-defined type - } u; - enum cf_class cls:16; // attribute class - enum cf_type type:16; // type of a static or dynamic attribute -}; - -struct cf_section { - uns size; // 0 for a global block, sizeof(struct) for a section - cf_hook *init; // fills in default values (no need to bzero) - cf_hook *commit; // verifies parsed data (optional) - cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes) - struct cf_item *cfg; // CC_END-terminated array of items - uns flags; // for internal use only -}; - -/* Declaration of cf_section */ -#define CF_TYPE(s) .size = sizeof(s) -#define CF_INIT(f) .init = (cf_hook*) f -#define CF_COMMIT(f) .commit = (cf_hook*) f -#define CF_COPY(f) .copy = (cf_copier*) f -#define CF_ITEMS .cfg = ( struct cf_item[] ) -#define CF_END { .cls = CC_END } -/* Configuration items */ -#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) } -#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) } -#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f } -#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s } -#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s } -#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) } -#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t } -/* Configuration items for basic types */ -#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1) -#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c) -#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c) -#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1) -#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c) -#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c) -#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1) -#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c) -#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c) -#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1) -#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c) -#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c) -#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1) -#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c) -#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c) -#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1) -#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c) -#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c) -#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } -#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } -#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t } -#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t } -#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } -#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } - -/* If you aren't picky about the number of parameters */ -#define CF_ANY_NUM -0x7fffffff - -#define DARY_LEN(a) ((uns*)a)[-1] - // length of a dynamic array -#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a - // creates a static instance of a dynamic array - -/* Memory allocation: conf-alloc.c */ -struct mempool; -extern struct mempool *cf_pool; -void *cf_malloc(uns size); -void *cf_malloc_zero(uns size); -char *cf_strdup(const char *s); -char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2); - -/* Undo journal for error recovery: conf-journal.c */ -extern uns cf_need_journal; -void cf_journal_block(void *ptr, uns len); -#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var)) - -/* Declaration: conf-section.c */ -void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown); -void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero); - -/* Parsers for basic types: conf-parse.c */ -char *cf_parse_int(const char *str, int *ptr); -char *cf_parse_u64(const char *str, u64 *ptr); -char *cf_parse_double(const char *str, double *ptr); -char *cf_parse_ip(const char *p, u32 *varp); - -#endif - diff --git a/lib/config.h b/lib/config.h deleted file mode 100644 index e4c3b33..0000000 --- a/lib/config.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * UCW Library -- Configuration-Dependent Definitions - * - * (c) 1997--2007 Martin Mares - * (c) 2006 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_CONFIG_H -#define _UCW_CONFIG_H - -/* Configuration switches */ - -#include "autoconf.h" - -/* Tell libc we're going to use all extensions available */ - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -/* Types (based on standard C99 integers) */ - -#include -#include - -typedef uint8_t byte; /* exactly 8 bits, unsigned */ -typedef uint8_t u8; /* exactly 8 bits, unsigned */ -typedef int8_t s8; /* exactly 8 bits, signed */ -typedef uint16_t u16; /* exactly 16 bits, unsigned */ -typedef int16_t s16; /* exactly 16 bits, signed */ -typedef uint32_t u32; /* exactly 32 bits, unsigned */ -typedef int32_t s32; /* exactly 32 bits, signed */ -typedef uint64_t u64; /* exactly 64 bits, unsigned */ -typedef int64_t s64; /* exactly 64 bits, signed */ - -typedef unsigned int uns; /* at least 32 bits */ -typedef u32 sh_time_t; /* seconds since UNIX epoch */ -typedef s64 timestamp_t; /* milliseconds since UNIX epoch */ - -#ifdef CONFIG_LARGE_FILES /* File positions */ -typedef s64 sh_off_t; -#else -typedef s32 sh_off_t; -#endif - -#endif diff --git a/lib/ctmatch.c b/lib/ctmatch.c deleted file mode 100644 index 7e80776..0000000 --- a/lib/ctmatch.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * UCW Library -- Content-Type Pattern Matching - * - * (c) 1997 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/chartype.h" - -int -match_ct_patt(const char *p, const char *t) -{ - if (*p == '*' && !p[1]) /* "*" matches everything */ - return 1; - - if (*p == '*' && p[1] == '/') /* "*" on the left-hand side */ - { - while (*t && *t != ' ' && *t != ';' && *t != '/') - t++; - p += 2; - } - else /* Normal left-hand side */ - { - while (*p != '/') - if (Cupcase(*p++) != Cupcase(*t++)) - return 0; - p++; - } - if (*t++ != '/') - return 0; - - if (*p == '*' && !p[1]) /* "*" on the right-hand side */ - return 1; - while (*p) - if (Cupcase(*p++) != Cupcase(*t++)) - return 0; - if (*t && *t != ' ' && *t != ';') - return 0; - - return 1; -} diff --git a/lib/db-emul.c b/lib/db-emul.c deleted file mode 100644 index 62540de..0000000 --- a/lib/db-emul.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * UCW Library -- SDBM emulator at top of GDBM - * - * (c) 1999 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/db.h" - -#include -#include -#include - -#include - -struct sdbm { - GDBM_FILE db; - datum prevkey; -}; - -struct sdbm * -sdbm_open(struct sdbm_options *o) -{ - struct sdbm *d = xmalloc(sizeof(struct sdbm)); - d->db = gdbm_open(o->name, - (o->page_order ? (1 << o->page_order) : 0), - ((o->flags & SDBM_WRITE) ? ((o->flags & SDBM_CREAT) ? GDBM_WRCREAT : GDBM_WRITER) : GDBM_READER) - | ((o->flags & SDBM_SYNC) ? GDBM_SYNC : 0), - 0666, - NULL); - if (o->cache_size) - gdbm_setopt(d->db, GDBM_CACHESIZE, &o->cache_size, sizeof(o->cache_size)); - d->prevkey.dptr = NULL; - return d; -} - -void -sdbm_close(struct sdbm *d) -{ - sdbm_rewind(d); - gdbm_close(d->db); - xfree(d); -} - -static int -sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen) -{ - if (vallen) - { - if (*vallen < Dl) - return 1; - *vallen = Dl; - } - if (val) - memcpy(val, D, Dl); - return 0; -} - -int -sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) -{ - datum K, V; - int rc; - - K.dptr = key; - K.dsize = keylen; - V.dptr = val; - V.dsize = vallen; - rc = gdbm_store(d->db, K, V, GDBM_INSERT); - return (rc < 0) ? rc : !rc; -} - -int -sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) -{ - datum K, V; - int rc; - - if (!val) - return sdbm_delete(d, key, keylen); - K.dptr = key; - K.dsize = keylen; - V.dptr = val; - V.dsize = vallen; - rc = gdbm_store(d->db, K, V, GDBM_REPLACE); - return (rc < 0) ? rc : !rc; -} - -int -sdbm_delete(struct sdbm *d, byte *key, uns keylen) -{ - datum K; - - K.dptr = key; - K.dsize = keylen; - return !gdbm_delete(d->db, K); -} - -int -sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen) -{ - datum K, V; - int rc; - - K.dptr = key; - K.dsize = keylen; - if (!val && !vallen) - return gdbm_exists(d->db, K); - V = gdbm_fetch(d->db, K); - if (!V.dptr) - return 0; - rc = sdbm_put_user(V.dptr, V.dsize, val, vallen); - xfree(V.dptr); - return rc ? SDBM_ERROR_TOO_LARGE : 1; -} - -void -sdbm_rewind(struct sdbm *d) -{ - if (d->prevkey.dptr) - { - xfree(d->prevkey.dptr); - d->prevkey.dptr = NULL; - } -} - -int -sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen) -{ - datum K; - - if (d->prevkey.dptr) - { - K = gdbm_nextkey(d->db, d->prevkey); - xfree(d->prevkey.dptr); - } - else - K = gdbm_firstkey(d->db); - d->prevkey = K; - if (!K.dptr) - return 0; - if (sdbm_put_user(K.dptr, K.dsize, key, keylen)) - return SDBM_ERROR_TOO_LARGE; - if (val || vallen) - return sdbm_fetch(d, key, *keylen, val, vallen); - return 1; -} - -void -sdbm_sync(struct sdbm *d) -{ -} diff --git a/lib/db-test.c b/lib/db-test.c deleted file mode 100644 index 9a6f3bb..0000000 --- a/lib/db-test.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - * UCW Library -- Database Manager -- Tests and Benchmarks - * - * (c) 1999 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#if 1 -#include "lib/db.c" -#define NAME "SDBM" -#else -#include "lib/db-emul.c" -#define NAME "GDBM" -#endif - -#include -#include -#include -#include - -static struct sdbm_options opts = { - flags: SDBM_CREAT | SDBM_WRITE, - name: "db.test", - page_order: 10, - cache_size: 1024, - key_size: -1, - val_size: -1 -}; - -static struct sdbm *d; -static int key_min, key_max; /* min<0 -> URL distribution */ -static int val_min, val_max; -static int num_keys; /* Number of distinct keys */ -static int verbose; - -static void -help(void) -{ - printf("Usage: dbtest [] \n\ -\n\ -Options:\n\ --c Use cache of pages\n\ --p Use pages of order \n\ --k Use key size \n\ --k- Use key size uniformly distributed between and \n\ --kU Use keys with URL distribution\n\ --n Number of distinct keys\n\ --d[-] Use specified value size (see -k-)\n\ --t Perform the tests on an existing database file\n\ --v Be verbose\n\ --s Turn on synchronous mode\n\ --S Turn on supersynchronous mode\n\ --F Turn on fast mode\n\ -\n\ -Commands:\n\ -c Fill database\n\ -r Rewrite database\n\ -f[

%%][] Find records with probability of success

%% (default=100)\n\ -F[

%%][] Find, but don't fetch values\n\ -d Delete records\n\ -w Walk database\n\ -W Walk, but don't fetch values\n\ -"); - exit(0); -} - -static uns -krand(uns kn) -{ - return kn * 2000000011; -} - -static uns -gen_url_size(uns rnd) -{ - uns l, m, r; - static uns utable[] = { -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 22, 108, 245, 481, 979, 3992, 7648, 13110, 19946, 27256, 34993, 43222, 52859, 64563, -80626, 117521, 147685, 188364, 233174, 290177, 347132, 407231, 465787, 540931, 628601, 710246, 808671, 922737, 1025691, 1138303, -1238802, 1344390, 1443843, 1533207, 1636494, 1739082, 1826911, 1910725, 1993940, 2094365, 2188987, 2267827, 2350190, 2441980, -2520713, 2593654, 2668632, 2736009, 2808356, 2889682, 2959300, 3017945, 3086488, 3146032, 3204818, 3251897, 3307001, 3349388, -3392798, 3433429, 3476765, 3529107, 3556884, 3585120, 3633005, 3677697, 3699561, 3716660, 3739823, 3765154, 3795096, 3821184, -3858117, 3908757, 3929095, 3943264, 3957033, 3969588, 3983441, 3994630, 4005413, 4028890, 4039678, 4058007, 4071906, 4087029, -4094233, 4105259, 4111603, 4120338, 4127364, 4133983, 4140310, 4144843, 4150565, 4155974, 4165132, 4170648, 4176811, 4187118, -4190866, 4199051, 4206686, 4216122, 4226109, 4233721, 4254123, 4261792, 4270396, 4276650, 4282932, 4291738, 4295932, 4299370, -4304011, 4307098, 4311866, 4318168, 4325730, 4329774, 4332946, 4336305, 4339770, 4345237, 4349038, 4356129, 4362872, 4366542, -4371077, 4374524, 4376733, 4378794, 4380652, 4382340, 4383552, 4385952, 4386914, 4393123, 4394106, 4395142, 4396593, 4399112, -4399909, 4401015, 4401780, 4402616, 4403454, 4404481, 4405231, 4405947, 4406886, 4408364, 4409159, 4409982, 4410872, 4412010, -4413341, 4414161, 4415673, 4417135, 4418032, 4419117, 4419952, 4420677, 4421387, 4421940, 4422469, 4423210, 4423696, 4424274, -4424982, 4425665, 4426363, 4427018, 4427969, 4428992, 4429791, 4430804, 4432601, 4433440, 4434157, 4434967, 4436280, 4439784, -4444255, 4445544, 4446416, 4447620, 4449638, 4453004, 4455470, 4456982, 4457956, 4458617, 4459538, 4460007, 4460377, 4460768, -4461291, 4461520, 4461678, 4461911, 4462063, 4462239, 4462405, 4462607, 4462666, 4462801, 4462919, 4463108, 4463230, 4463438, -4463530, 4463698, 4463779, 4463908, 4463991, 4464138, 4464188, 4464391, 4464580, 4464868, 4464980, 4465174, 4465255, 4465473, -4465529, 4465681, 4465746, 4465916, 4465983, 4466171, 4466248, 4466430, 4466560, 4466751, 4466930, 4467807, 4468847, 4469940, -4470344, 4470662, 4470716, 4471120, 4471389, 4471814, 4472141, 4472545, 4472687, 4473051, 4473253, 4473603, 4473757, 4474065, -4474125, 4474354, 4474428, 4474655, 4474705, 4474841, 4474858, 4475133, 4475201, 4475327, 4475367, 4475482, 4475533, 4475576, -4475586, 4475616, 4475637, 4475659, 4475696, 4475736, 4475775, 4475794, 4476156, 4476711, 4477004, 4477133, 4477189, 4477676, -4477831, 4477900, 4477973, 4477994, 4478011, 4478040, 4478063, 4478085, 4478468, 4478715, 4479515, 4480034, 4481804, 4483259, -4483866, 4484202, 4484932, 4485693, 4486184, 4486549, 4486869, 4487405, 4487639, 4487845, 4488086, 4488256, 4488505, 4488714, -4492669, 4496233, 4497738, 4498122, 4498653, 4499862, 4501169, 4501627, 4501673, 4501811, 4502182, 4502475, 4502533, 4502542, -4502548, 4502733, 4503389, 4504381, 4505070, 4505378, 4505814, 4506031, 4506336, 4506642, 4506845, 4506971, 4506986, 4507016, -4507051, 4507098, 4507107, 4507114, 4507139, 4507478, 4507643, 4507674, 4507694, 4507814, 4507894, 4507904, 4507929, 4507989, -4508023, 4508047, 4508053, 4508063, 4508075, 4508092, 4508104, 4508113, 4508239, 4508285, 4508324, 4508335, 4508340, 4508378, -4508405, 4508419, 4508436, 4508449, 4508470, 4508488, 4508515, 4508541, 4508564, 4508570, 4508584, 4508594, 4508607, 4508634, -4508652, 4508665, 4508673, 4508692, 4508704, 4508742, 4508755, 4508773, 4508788, 4508798, 4508832, 4508869, 4508885, 4508905, -4508915, 4508947, 4508956, 4509061, 4509070, 4509357, 4509368, 4509380, 4509393, 4509401, 4509412, 4509426, 4509438, 4509451, -4509461, 4509473, 4509489, 4509498, 4509512, 4509537, 4509568, 4509582, 4509621, 4509629, 4509747, 4509766, 4509776, 4509795, -4509802, 4509813, 4509822, 4509829, 4509834, 4509844, 4509854, 4509863, 4509868, 4509875, 4509886, 4509898, 4509908, 4509920, -4509932, 4509941, 4509949, 4509955, 4509967, 4509972, 4509979, 4509987, 4509999, 4510002, 4510010, 4510014, 4510018, 4510025, -4510028, 4510049, 4510055, 4510061, 4510068, 4510079, 4510085, 4510091, 4510098, 4510102, 4510104, 4510110, 4510121, 4510128, -4510132, 4510138, 4510144, 4510145, 4510153, 4510161, 4510174, 4510196, 4510199, 4510208, 4510209, 4510212, 4510216, 4510217, -4510219, 4510222, 4510228, 4510231, 4510236, 4510241, 4510245, 4510248, 4510250, 4510254, 4510255, 4510261, 4510262, 4510266, -4510266, 4510271, 4510285, 4510287, 4510291, 4510295, 4510303, 4510306, 4510308, 4510310, 4510314, 4510319, 4510320, 4510324, -4510328, 4510333, 4510333, 4510336, 4510340, 4510342, 4510348, 4510353, 4510359, 4510362, 4510365, 4510371, 4510373, 4510375, -4510378, 4510380, 4510385, 4510389, 4510391, 4510391, 4510394, 4510396, 4510397, 4510398, 4510400, 4510403, 4510406, 4510407, -4510408, 4510409, 4510411, 4510413, 4510417, 4510417, 4510419, 4510422, 4510426, 4510427, 4510430, 4510435, 4510437, 4510439, -4510440, 4510442, 4510442, 4510446, 4510447, 4510448, 4510450, 4510451, 4510451, 4510453, 4510454, 4510455, 4510457, 4510460, -4510460, 4510460, 4510462, 4510463, 4510466, 4510468, 4510472, 4510475, 4510480, 4510482, 4510483, 4510486, 4510488, 4510492, -4510494, 4510497, 4510497, 4510499, 4510503, 4510505, 4510506, 4510507, 4510509, 4510512, 4510514, 4510527, 4510551, 4510553, -4510554, 4510555, 4510556, 4510558, 4510561, 4510562, 4510566, 4510567, 4510568, 4510570, 4510573, 4510574, 4510586, 4510603, -4510605, 4510607, 4510610, 4510610, 4510613, 4510613, 4510614, 4510614, 4510615, 4510616, 4510616, 4510620, 4510622, 4510623, -4510624, 4510627, 4510628, 4510630, 4510631, 4510632, 4510634, 4510634, 4510634, 4510636, 4510636, 4510639, 4510639, 4510640, -4510643, 4510647, 4510649, 4510650, 4510653, 4510653, 4510653, 4510653, 4510656, 4510659, 4510661, 4510664, 4510665, 4510669, -4510672, 4510673, 4510674, 4510675, 4510680, 4510683, 4510684, 4510686, 4510687, 4510690, 4510691, 4510693, 4510693, 4510697, -4510699, 4510700, 4510703, 4510704, 4510709, 4510711, 4510713, 4510713, 4510720, 4510720, 4510722, 4510724, 4510727, 4510729, -4510735, 4510735, 4510738, 4510740, 4510744, 4510745, 4510746, 4510748, 4510754, 4510756, 4510758, 4510761, 4510764, 4510766, -4510768, 4510768, 4510770, 4510770, 4510772, 4510774, 4510775, 4510775, 4510775, 4510776, 4510777, 4510780, 4510782, 4510783, -4510785, 4510786, 4510788, 4510789, 4510791, 4510793, 4510793, 4510793, 4510795, 4510795, 4510799, 4510803, 4510804, 4510804, -4510804, 4510805, 4510807, 4510809, 4510811, 4510811, 4510813, 4510815, 4510815, 4510816, 4510819, 4510820, 4510824, 4510827, -4510829, 4510829, 4510830, 4510833, 4510835, 4510837, 4510838, 4510838, 4510839, 4510840, 4510840, 4510842, 4510842, 4510843, -4510845, 4510845, 4510845, 4510847, 4510848, 4510848, 4510848, 4510850, 4510853, 4510855, 4510857, 4510859, 4510861, 4510862, -4510864, 4510865, 4510865, 4510865, 4510869, 4510869, 4510869, 4510869, 4510869, 4510870, 4510870, 4510872, 4510872, 4510873, -4510874, 4510875, 4510875, 4510877, 4510879, 4510879, 4510879, 4510879, 4510880, 4510881, 4510882, 4510883, 4510884, 4510885, -4510886, 4510887, 4510890, 4510890, 4510891, 4510892, 4510892, 4510893, 4510893, 4510895, 4510895, 4510896, 4510897, 4510899, -4510901, 4510901, 4510901, 4510902, 4510903, 4510903, 4510903, 4510905, 4510905, 4510906, 4510906, 4510907, 4510907, 4510909, -4510910, 4510911, 4510911, 4510911, 4510913, 4510913, 4510914, 4510914, 4510914, 4510915, 4510916, 4510918, 4510918, 4510919, -4510919, 4510919, 4510920, 4510921, 4510922, 4510923, 4510924, 4510924, 4510924, 4510924, 4510926, 4510927, 4510928, 4510928, -4510928, 4510928, 4510928, 4510930, 4510933, 4510935, 4510935, 4510935, 4510935, 4510935, 4510936, 4510938, 4510947, 4510966, -4510967, 4510969, 4510973, 4510973, 4510974, 4510974, 4510974, 4510974, 4510974, 4510974, 4510975, 4510976, 4510976, 4510976, -4510976, 4510976, 4510976, 4510976, 4510977, 4510979, 4510979, 4510979, 4510979, 4510979, 4510979, 4510980, 4510980, 4510980, -4510980, 4510981, 4510981, 4510981, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510983, 4510983, 4510984, -4510984, 4510984, 4510984, 4510984, 4510985, 4510985, 4510985, 4510985, 4510987, 4510987, 4510987, 4510988, 4510988, 4510989, -4510989, 4510989, 4510989, 4510989, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510991, 4510991, 4510991, -4510991, 4510991, 4510991, 4510991, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510993, 4510993, 4510993, -4510994, 4510994, 4510994, 4510994, 4510995, 4510995, 4510996, 4510997, 4510998, 4510999, 4510999, 4511000, 4511000, 4511001, -4511001, 4511002, 4511002, 4511002, 4511003, 4511004, 4511004, 4511004, 4511004, 4511005, 4511006, 4511008, 4511008, 4511008, -4511009, 4511009, 4511009, 4511009, 4511010, 4511011, 4511011, 4511012, 4511012, 4511012, 4511012, 4511013, 4511013, 4511014, -4511014, 4511014, 4511014, 4511015, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511020, 4511020, 4511020, -4511020, 4511020, 4511020, 4511020, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, -4511021 - }; - - rnd %= utable[1024]; - l = 0; r = 1023; - while (l < r) - { - m = (l+r)/2; - if (utable[m] == rnd) - return m; - if (utable[m] >= rnd) - r = m - 1; - else - l = m + 1; - } - return l; -} - -static uns -gen_size(uns min, uns max, uns rnd) -{ - if (min == max) - return min; - else - return min + rnd % (max - min + 1); -} - -static void -gen_random(byte *buf, uns size, uns kn) -{ - kn = (kn + 0x36221057) ^ (kn << 24) ^ (kn << 15); - while (size--) - { - *buf++ = kn >> 24; - kn = kn*257 + 17; - } -} - -static int -keygen(byte *buf, uns kn) -{ - uns size, rnd; - - rnd = krand(kn); - if (key_min < 0) - size = gen_url_size(rnd); - else - size = gen_size(key_min, key_max, rnd); - *buf++ = kn >> 24; - *buf++ = kn >> 16; - *buf++ = kn >> 8; - *buf++ = kn; - if (size < 4) - return 4; - gen_random(buf, size-4, kn); - return size; -} - -static int -valgen(byte *buf, uns kn) -{ - uns size = gen_size(val_min, val_max, krand(kn)); - gen_random(buf, size, kn); - return size; -} - -static uns -keydec(byte *buf) -{ - return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; -} - -static void -verb(char *msg, ...) -{ - int cat = 1; - va_list args; - - va_start(args, msg); - if (msg[0] == '^' && msg[1]) - { - cat = msg[1] - '0'; - msg += 2; - } - if (verbose >= cat) - vfprintf(stderr, msg, args); - va_end(args); -} - -static void -parse_size(int *min, int *max, char *c) -{ - char *d; - - if ((d = strchr(c, '-'))) - { - *d++ = 0; - *min = atol(c); - *max = atol(d); - } - else - *min = *max = atol(c); -} - -#define PROGRESS(i) if ((verbose > 2) || (verbose > 1 && !(i & 1023))) fprintf(stderr, "%d\r", i) - -int main(int argc, char **argv) -{ - int c, i, j, k, l, m; - byte kb[2048], vb[2048], vb2[2048]; - uns ks, vs, vs2, perc, cnt; - char *ch; - int dont_delete = 0; - timestamp_t timer; - - log_init("dbtest"); - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); - while ((c = getopt(argc, argv, "c:p:k:n:d:vsStF")) >= 0) - switch (c) - { - case 'c': - opts.cache_size = atol(optarg); - break; - case 'p': - opts.page_order = atol(optarg); - break; - case 'k': - if (!strcmp(optarg, "U")) - key_min = key_max = -1; - else - parse_size(&key_min, &key_max, optarg); - break; - case 'n': - num_keys = atol(optarg); - break; - case 'd': - parse_size(&val_min, &val_max, optarg); - break; - case 'v': - verbose++; - break; - case 's': - opts.flags |= SDBM_SYNC; - break; - case 'S': - opts.flags |= SDBM_SYNC | SDBM_FSYNC; - break; - case 'F': - opts.flags |= SDBM_FAST; - break; - case 't': - dont_delete = 1; - break; - default: - help(); - } - - if (key_min >= 0 && key_min < 4) - key_min = key_max = 4; - if (key_min == key_max && key_min >= 0) - opts.key_size = key_min; - if (val_min == val_max) - opts.val_size = val_min; - if (!num_keys) - die("Number of keys not given"); - - printf(NAME " benchmark: %d records, keys ", num_keys); - if (key_min < 0) - printf(""); - else - printf("%d-%d", key_min, key_max); - printf(", values %d-%d, page size %d, cache %d pages\n", val_min, val_max, 1 << opts.page_order, opts.cache_size); - - verb("OPEN(%s, key=%d, val=%d, cache=%d, pgorder=%d)\n", opts.name, opts.key_size, opts.val_size, - opts.cache_size, opts.page_order); - if (!dont_delete) - unlink(opts.name); - d = sdbm_open(&opts); - if (!d) - die("open failed: %m"); - - while (optind < argc) - { - char *o = argv[optind++]; - init_timer(&timer); - switch (*o) - { - case 'c': - printf("create %d: ", num_keys); - for(i=0; i= num_keys) - die("get_next: %d out of range", i); - PROGRESS(i); - vs2 = keygen(vb2, i); - if (ks != vs2 || memcmp(kb, vb2, ks)) - die("get_next: key mismatch at %d", i); - if (c) - { - vs2 = valgen(vb2, i); - if (vs != vs2 || memcmp(vb, vb2, vs)) - die("get_next: data mismatch at %d", i); - } - l += k; - m += i; - k++; - } - if (k != num_keys) - die("fetch: wrong # of keys: %d != %d", k, num_keys); - if (l != m) - die("fetch: wrong checksum: %d != %d", l, m); - break; - default: - help(); - } - sdbm_sync(d); - printf("%d ms\n", get_timer(&timer)); - } - - verb("CLOSE\n"); - sdbm_close(d); - - { - struct stat st; - if (stat(opts.name, &st)) die("stat: %m"); - printf("file size: %d bytes\n", (int) st.st_size); - } - return 0; -} diff --git a/lib/db-tool.c b/lib/db-tool.c deleted file mode 100644 index bbb419a..0000000 --- a/lib/db-tool.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * SDBM Database Utility - * - * (c) 2000--2001 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/db.h" -#include "lib/db_internal.h" -#include "lib/fastbuf.h" -#include "lib/ff-binary.h" - -#include -#include -#include -#include -#include -#include - -static int verbose=0; -static int cache=1024; -static int force_key=-2; -static int force_val=-2; -static int force_page=-1; - -#define SDBM_DUMP_MAGIC 0x321f120e -#define SDBM_DUMP_VERSION 1 - -static void -dump(char *db, char *dmp) -{ - struct sdbm *src; - struct fastbuf *dest; - struct sdbm_options op; - int e, c=0; - - bzero(&op, sizeof(op)); - op.name = db; - op.cache_size = 16; - op.flags = 0; - src = sdbm_open(&op); - if (!src) - die("Source open failed: %m"); - - dest = bopen(dmp, O_WRONLY | O_CREAT | O_TRUNC, 65536); - bputl(dest, SDBM_DUMP_MAGIC); - bputl(dest, SDBM_DUMP_VERSION); - bputl(dest, src->page_order); - bputl(dest, src->key_size); - bputl(dest, src->val_size); - - fprintf(stderr, "Dumping database...\n"); - sdbm_rewind(src); - for(;;) - { - byte key[65536], val[65536]; - int klen = sizeof(key); - int vlen = sizeof(val); - e = sdbm_get_next(src, key, &klen, val, &vlen); - if (!e) - break; - if (e < 0) - fprintf(stderr, "sdbm_get_next: error %d\n", e); - if (!(c++ % 1024)) - { - fprintf(stderr, "%d\r", c); - fflush(stderr); - } - bputw(dest, klen); - bwrite(dest, key, klen); - bputw(dest, vlen); - bwrite(dest, val, vlen); - } - - sdbm_close(src); - bclose(dest); - fprintf(stderr, "Dumped %d records\n", c); -} - -static void -restore(char *dmp, char *db) -{ - struct sdbm *dest; - struct fastbuf *src; - struct sdbm_options op; - int e, c=0; - - src = bopen(dmp, O_RDONLY, 65536); - if (bgetl(src) != SDBM_DUMP_MAGIC || - bgetl(src) != SDBM_DUMP_VERSION) - die("%s: not a sdbm dump", dmp); - - bzero(&op, sizeof(op)); - op.name = db; - e = unlink(op.name); - if (e < 0 && errno != ENOENT) - die("unlink: %m"); - op.cache_size = cache; - op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST; - op.page_order = bgetl(src); - if (force_page >= 0) - op.page_order = force_page; - op.key_size = bgetl(src); - if (force_key >= 0) - op.key_size = force_key; - op.val_size = bgetl(src); - if (force_val >= 0) - op.val_size = force_val; - dest = sdbm_open(&op); - if (!dest) - die("Destination open failed"); - - fprintf(stderr, "Restoring database...\n"); - for(;;) - { - byte key[65536], val[65536]; - int klen, vlen; - klen = bgetw(src); - if (klen < 0) - break; - breadb(src, key, klen); - vlen = bgetw(src); - if (vlen < 0) - die("Corrupted dump file: value missing"); - breadb(src, val, vlen); - if (!(c++ % 1024)) - { - fprintf(stderr, "%d\r", c); - fflush(stderr); - } - if (sdbm_store(dest, key, klen, val, vlen) == 0) - fprintf(stderr, "sdbm_store: duplicate key\n"); - } - - bclose(src); - sdbm_close(dest); - fprintf(stderr, "Restored %d records\n", c); -} - -static void -rebuild(char *sdb, char *ddb) -{ - struct sdbm *src, *dest; - struct sdbm_options op; - int e, c=0; - - bzero(&op, sizeof(op)); - op.name = sdb; - op.cache_size = 16; - op.flags = 0; - src = sdbm_open(&op); - if (!src) - die("Source open failed: %m"); - - op.name = ddb; - e = unlink(op.name); - if (e < 0 && errno != ENOENT) - die("unlink: %m"); - op.cache_size = cache; - op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST; - op.page_order = (force_page >= 0) ? (u32) force_page : src->root->page_order; - op.key_size = (force_key >= -1) ? force_key : src->root->key_size; - op.val_size = (force_val >= -1) ? force_val : src->root->val_size; - dest = sdbm_open(&op); - if (!dest) - die("Destination open failed"); - - fprintf(stderr, "Rebuilding database...\n"); - sdbm_rewind(src); - for(;;) - { - byte key[65536], val[65536]; - int klen = sizeof(key); - int vlen = sizeof(val); - e = sdbm_get_next(src, key, &klen, val, &vlen); - if (!e) - break; - if (e < 0) - fprintf(stderr, "sdbm_get_next: error %d\n", e); - if (!(c++ % 1024)) - { - fprintf(stderr, "%d\r", c); - fflush(stderr); - } - if (sdbm_store(dest, key, klen, val, vlen) == 0) - fprintf(stderr, "sdbm_store: duplicate key\n"); - } - - sdbm_close(src); - sdbm_close(dest); - fprintf(stderr, "Copied %d records\n", c); -} - -int -main(int argc, char **argv) -{ - int o; - - while ((o = getopt(argc, argv, "vc:k:d:p:")) >= 0) - switch (o) - { - case 'v': - verbose++; - break; - case 'c': - cache=atol(optarg); - break; - case 'k': - force_key=atol(optarg); - break; - case 'd': - force_val=atol(optarg); - break; - case 'p': - force_page=atol(optarg); - break; - default: - bad: - fprintf(stderr, "Usage: db-tool [] \n\ -\n\ -Options:\n\ --v\t\tBe verbose\n\ --c\t\tUse cache of pages\n\ --d\t\tSet data size to (-1=variable) [restore,rebuild]\n\ --k\t\tSet key size to (-1=variable) [restore,rebuild]\n\ --p\t\tSet page order to [restore,rebuild]\n\ -\n\ -Commands:\n\ -b \tRebuild database\n\ -d \tDump database\n\ -r \tRestore database from dump\n\ -"); - return 1; - } - argc -= optind; - argv += optind; - if (argc < 1 || strlen(argv[0]) != 1) - goto bad; - - switch (argv[0][0]) - { - case 'b': - if (argc != 3) - goto bad; - rebuild(argv[1], argv[2]); - break; - case 'd': - if (argc != 3) - goto bad; - dump(argv[1], argv[2]); - break; - case 'r': - if (argc != 3) - goto bad; - restore(argv[1], argv[2]); - break; - default: - goto bad; - } - return 0; -} diff --git a/lib/db.c b/lib/db.c deleted file mode 100644 index d9c984b..0000000 --- a/lib/db.c +++ /dev/null @@ -1,598 +0,0 @@ -/* - * UCW Library -- Fast Database Management Routines - * - * (c) 1999--2001 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -/* - * This library uses the standard algorithm for external hashing (page directory - * mapping topmost K bits of hash value to page address, directory splits and - * so on). Peculiarities of this implementation (aka design decisions): - * - * o We allow both fixed and variable length keys and values (this includes - * zero size values for cases you want to represent only a set of keys). - * o We assume that key_size + val_size < page_size. - * o We never shrink the directory nor free empty pages. (The reason is that - * if the database was once large, it's likely it will again become large soon.) - * o The only pages which can be freed are those of the directory (during - * directory split), so we keep only a simple 32-entry free block list - * and we assume it's sorted. - * o All pointers are always given in pages from start of the file. - * This gives us page_size*2^32 limit for file size which should be enough. - */ - -#include "lib/lib.h" -#include "lib/lfs.h" -#include "lib/pagecache.h" -#include "lib/db.h" -#include "lib/db_internal.h" - -#include -#include -#include -#include - -#define GET_PAGE(d,x) pgc_get((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) -#define GET_ZERO_PAGE(d,x) pgc_get_zero((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) -#define READ_PAGE(d,x) pgc_read((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order) -#define READ_DIR(d,off) pgc_read((d)->cache, (d)->fd, (((sh_off_t)(d)->root->dir_start) << (d)->page_order) + (off)) - -struct sdbm * -sdbm_open(struct sdbm_options *o) -{ - struct sdbm *d; - struct sdbm_root root, *r; - uns cache_size = o->cache_size ? o->cache_size : 16; - - d = xmalloc_zero(sizeof(struct sdbm)); - d->flags = o->flags; - d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666); - if (d->fd >= 0) /* Already exists, let's check it */ - { - if (read(d->fd, &root, sizeof(root)) != sizeof(root)) - goto bad; - if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION) - goto bad; - d->file_size = sh_seek(d->fd, 0, SEEK_END) >> root.page_order; - d->page_order = root.page_order; - d->page_size = 1 << root.page_order; - d->cache = pgc_open(d->page_size, cache_size); - d->root_page = pgc_read(d->cache, d->fd, 0); - d->root = (void *) d->root_page->data; - } - else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0) - { - struct page *q; - uns page_order = o->page_order; - if (page_order < 10) - page_order = 10; - d->page_size = 1 << page_order; - d->cache = pgc_open(d->page_size, cache_size); - d->root_page = GET_ZERO_PAGE(d, 0); - r = d->root = (void *) d->root_page->data; /* Build root page */ - r->magic = SDBM_MAGIC; - r->version = SDBM_VERSION; - r->page_order = d->page_order = page_order; - r->key_size = o->key_size; - r->val_size = o->val_size; - r->dir_start = 1; - r->dir_order = 0; - d->file_size = 3; - q = GET_ZERO_PAGE(d, 1); /* Build page directory */ - GET32(q->data, 0) = 2; - pgc_put(d->cache, q); - q = GET_ZERO_PAGE(d, 2); /* Build single data page */ - pgc_put(d->cache, q); - } - else - goto bad; - d->dir_size = 1 << d->root->dir_order; - d->dir_shift = 32 - d->root->dir_order; - d->page_mask = d->page_size - 1; - d->key_size = d->root->key_size; - d->val_size = d->root->val_size; - return d; - -bad: - sdbm_close(d); - return NULL; -} - -void -sdbm_close(struct sdbm *d) -{ - if (d->root_page) - pgc_put(d->cache, d->root_page); - if (d->cache) - pgc_close(d->cache); - if (d->fd >= 0) - close(d->fd); - xfree(d); -} - -static uns -sdbm_alloc_pages(struct sdbm *d, uns number) -{ - uns where = d->file_size; - if (where + number < where) /* Wrap around? */ - die("SDB: Database file too large, giving up"); - d->file_size += number; - return where; -} - -static uns -sdbm_alloc_page(struct sdbm *d) -{ - uns pos; - - if (!d->root->free_pool[0].count) - return sdbm_alloc_pages(d, 1); - pos = d->root->free_pool[0].first; - d->root->free_pool[0].first++; - if (!--d->root->free_pool[0].count) - { - memmove(d->root->free_pool, d->root->free_pool+1, (SDBM_NUM_FREE_PAGE_POOLS-1) * sizeof(d->root->free_pool[0])); - d->root->free_pool[SDBM_NUM_FREE_PAGE_POOLS-1].count = 0; - } - pgc_mark_dirty(d->cache, d->root_page); - return pos; -} - -static void -sdbm_free_pages(struct sdbm *d, uns start, uns number) -{ - uns i = 0; - - while (d->root->free_pool[i].count) - i++; - ASSERT(i < SDBM_NUM_FREE_PAGE_POOLS); - d->root->free_pool[i].first = start; - d->root->free_pool[i].count = number; - pgc_mark_dirty(d->cache, d->root_page); -} - -u32 -sdbm_hash(byte *key, uns keylen) -{ - /* - * This used to be the same hash function as GDBM uses, - * but it turned out that it tends to give the same results - * on similar keys. Damn it. - */ - u32 value = 0x238F13AF * keylen; - while (keylen--) - value = 37*value + *key++; - return (1103515243 * value + 12345); -} - -static int -sdbm_get_entry(struct sdbm *d, byte *pos, byte **key, uns *keylen, byte **val, uns *vallen) -{ - byte *p = pos; - - if (d->key_size >= 0) - *keylen = d->key_size; - else - { - *keylen = (p[0] << 8) | p[1]; - p += 2; - } - *key = p; - p += *keylen; - if (d->val_size >= 0) - *vallen = d->val_size; - else - { - *vallen = (p[0] << 8) | p[1]; - p += 2; - } - *val = p; - p += *vallen; - return p - pos; -} - -static int -sdbm_entry_len(struct sdbm *d, uns keylen, uns vallen) -{ - uns len = keylen + vallen; - if (d->key_size < 0) - len += 2; - if (d->val_size < 0) - len += 2; - return len; -} - -static void -sdbm_store_entry(struct sdbm *d, byte *pos, byte *key, uns keylen, byte *val, uns vallen) -{ - if (d->key_size < 0) - { - *pos++ = keylen >> 8; - *pos++ = keylen; - } - memmove(pos, key, keylen); - pos += keylen; - if (d->val_size < 0) - { - *pos++ = vallen >> 8; - *pos++ = vallen; - } - memmove(pos, val, vallen); -} - -static uns -sdbm_page_rank(struct sdbm *d, uns dirpos) -{ - struct page *b; - u32 pg, x; - uns l, r; - uns pm = d->page_mask; - - b = READ_DIR(d, dirpos & ~pm); - pg = GET32(b->data, dirpos & pm); - l = dirpos; - while ((l & pm) && GET32(b->data, (l - 4) & pm) == pg) - l -= 4; - r = dirpos + 4; - /* We heavily depend on unused directory entries being zero */ - while ((r & pm) && GET32(b->data, r & pm) == pg) - r += 4; - pgc_put(d->cache, b); - - if (!(l & pm) && !(r & pm)) - { - /* Note that if it spans page boundary, it must contain an integer number of pages */ - while (l) - { - b = READ_DIR(d, (l - 4) & ~pm); - x = GET32(b->data, 0); - pgc_put(d->cache, b); - if (x != pg) - break; - l -= d->page_size; - } - while (r < 4*d->dir_size) - { - b = READ_DIR(d, r & ~pm); - x = GET32(b->data, 0); - pgc_put(d->cache, b); - if (x != pg) - break; - r += d->page_size; - } - } - return (r - l) >> 2; -} - -static void -sdbm_expand_directory(struct sdbm *d) -{ - struct page *b, *c; - int i, ent; - u32 *dir, *t; - - if (d->root->dir_order >= 31) - die("SDB: Database directory too large, giving up"); - - if (4*d->dir_size < d->page_size) - { - /* It still fits within single page */ - b = READ_DIR(d, 0); - dir = (u32 *) b->data; - for(i=d->dir_size-1; i>=0; i--) - dir[2*i] = dir[2*i+1] = dir[i]; - pgc_mark_dirty(d->cache, b); - pgc_put(d->cache, b); - } - else - { - uns old_dir = d->root->dir_start; - uns old_dir_pages = 1 << (d->root->dir_order + 2 - d->page_order); - uns page, new_dir; - new_dir = d->root->dir_start = sdbm_alloc_pages(d, 2*old_dir_pages); - ent = 1 << (d->page_order - 3); - for(page=0; page < old_dir_pages; page++) - { - b = READ_PAGE(d, old_dir + page); - dir = (u32 *) b->data; - c = GET_PAGE(d, new_dir + 2*page); - t = (u32 *) c->data; - for(i=0; icache, c); - c = GET_PAGE(d, new_dir + 2*page + 1); - t = (u32 *) c->data; - for(i=0; icache, c); - pgc_put(d->cache, b); - } - if (!(d->flags & SDBM_FAST)) - { - /* - * Unless in super-fast mode, fill old directory pages with zeroes. - * This slows us down a bit, but allows database reconstruction after - * the free list is lost. - */ - for(page=0; page < old_dir_pages; page++) - { - b = GET_ZERO_PAGE(d, old_dir + page); - pgc_put(d->cache, b); - } - } - sdbm_free_pages(d, old_dir, old_dir_pages); - } - - d->root->dir_order++; - d->dir_size = 1 << d->root->dir_order; - d->dir_shift = 32 - d->root->dir_order; - pgc_mark_dirty(d->cache, d->root_page); - if (!(d->flags & SDBM_FAST)) - sdbm_sync(d); -} - -static void -sdbm_split_data(struct sdbm *d, struct sdbm_bucket *s, struct sdbm_bucket *d0, struct sdbm_bucket *d1, uns sigbit) -{ - byte *sp = s->data; - byte *dp[2] = { d0->data, d1->data }; - byte *K, *D; - uns Kl, Dl, sz, i; - - while (sp < s->data + s->used) - { - sz = sdbm_get_entry(d, sp, &K, &Kl, &D, &Dl); - sp += sz; - i = (sdbm_hash(K, Kl) & (1 << sigbit)) ? 1 : 0; - sdbm_store_entry(d, dp[i], K, Kl, D, Dl); - dp[i] += sz; - } - d0->used = dp[0] - d0->data; - d1->used = dp[1] - d1->data; -} - -static void -sdbm_split_dir(struct sdbm *d, uns dirpos, uns count, uns pos) -{ - struct page *b; - uns i; - - count *= 4; - while (count) - { - b = READ_DIR(d, dirpos & ~d->page_mask); - i = d->page_size - (dirpos & d->page_mask); - if (i > count) - i = count; - count -= i; - while (i) - { - GET32(b->data, dirpos & d->page_mask) = pos; - dirpos += 4; - i -= 4; - } - pgc_mark_dirty(d->cache, b); - pgc_put(d->cache, b); - } -} - -static inline uns -sdbm_dirpos(struct sdbm *d, uns hash) -{ - if (d->dir_shift != 32) /* avoid shifting by 32 bits */ - return (hash >> d->dir_shift) << 2; /* offset in the directory */ - else - return 0; -} - -static struct page * -sdbm_split_page(struct sdbm *d, struct page *b, u32 hash) -{ - struct page *p[2]; - uns i, rank, sigbit, rank_log, dirpos, newpg; - - dirpos = sdbm_dirpos(d, hash); - rank = sdbm_page_rank(d, dirpos); /* rank = # of pointers to this page */ - if (rank == 1) - { - sdbm_expand_directory(d); - rank = 2; - dirpos *= 2; - } - rank_log = 1; /* rank_log = log2(rank) */ - while ((1U << rank_log) < rank) - rank_log++; - sigbit = d->dir_shift + rank_log - 1; /* sigbit = bit we split on */ - p[0] = b; - newpg = sdbm_alloc_page(d); - p[1] = GET_PAGE(d, newpg); - sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit); - sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, newpg); - pgc_mark_dirty(d->cache, p[0]); - i = (hash & (1 << sigbit)) ? 1 : 0; - pgc_put(d->cache, p[!i]); - return p[i]; -} - -static int -sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen) -{ - if (vallen) - { - if (*vallen < Dl) - return 1; - *vallen = Dl; - } - if (val) - memcpy(val, D, Dl); - return 0; -} - -static int -sdbm_access(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen, uns mode) /* 0=read, 1=store, 2=replace */ -{ - struct page *p, *q; - u32 hash, h, pos, size; - struct sdbm_bucket *b; - byte *c, *e; - int rc; - - if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535) - return SDBM_ERROR_BAD_KEY_SIZE; - if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode) - return SDBM_ERROR_BAD_VAL_SIZE; - if (!mode && !(d->flags & SDBM_WRITE)) - return SDBM_ERROR_READ_ONLY; - hash = sdbm_hash(key, keylen); - h = sdbm_dirpos(d, hash); - p = READ_DIR(d, h & ~d->page_mask); - pos = GET32(p->data, h & d->page_mask); - pgc_put(d->cache, p); - q = READ_PAGE(d, pos); - b = (void *) q->data; - c = b->data; - e = c + b->used; - while (c < e) - { - byte *K, *D; - uns Kl, Dl, s; - s = sdbm_get_entry(d, c, &K, &Kl, &D, &Dl); - if (Kl == keylen && !memcmp(K, key, Kl)) - { - /* Gotcha! */ - switch (mode) - { - case 0: /* fetch: found */ - rc = sdbm_put_user(D, Dl, val, vallen); - pgc_put(d->cache, q); - return rc ? SDBM_ERROR_TOO_LARGE : 1; - case 1: /* store: already present */ - pgc_put(d->cache, q); - return 0; - default: /* replace: delete the old one */ - memmove(c, c+s, e-(c+s)); - b->used -= s; - goto insert; - } - } - c += s; - } - if (!mode || !val) /* fetch or delete: no success */ - { - pgc_put(d->cache, q); - return 0; - } - -insert: - if (val) - { - size = sdbm_entry_len(d, keylen, *vallen); - while (b->used + size > d->page_size - sizeof(struct sdbm_bucket)) - { - /* Page overflow, need to split */ - if (size >= d->page_size - sizeof(struct sdbm_bucket)) - { - pgc_put(d->cache, q); - return SDBM_ERROR_GIANT; - } - q = sdbm_split_page(d, q, hash); - b = (void *) q->data; - } - sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen); - b->used += size; - } - pgc_mark_dirty(d->cache, q); - pgc_put(d->cache, q); - if (d->flags & SDBM_SYNC) - sdbm_sync(d); - return 1; -} - -int -sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) -{ - return sdbm_access(d, key, keylen, val, &vallen, 1); -} - -int -sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen) -{ - return sdbm_access(d, key, keylen, val, &vallen, 2); -} - -int -sdbm_delete(struct sdbm *d, byte *key, uns keylen) -{ - return sdbm_access(d, key, keylen, NULL, NULL, 2); -} - -int -sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen) -{ - return sdbm_access(d, key, keylen, val, vallen, 0); -} - -void -sdbm_rewind(struct sdbm *d) -{ - d->find_page = 1; - d->find_pos = 0; - d->find_free_list = 0; -} - -int -sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen) -{ - uns page = d->find_page; - uns pos = d->find_pos; - byte *K, *V; - uns c, Kl, Vl; - struct page *p; - struct sdbm_bucket *b; - - for(;;) - { - if (!pos) - { - if (page >= d->file_size) - break; - if (page == d->root->dir_start) - page += (4*d->dir_size + d->page_size - 1) >> d->page_order; - else if (page == d->root->free_pool[d->find_free_list].first) - page += d->root->free_pool[d->find_free_list++].count; - else - pos = 4; - continue; - } - p = READ_PAGE(d, page); - b = (void *) p->data; - if (pos - 4 >= b->used) - { - pos = 0; - page++; - pgc_put(d->cache, p); - continue; - } - c = sdbm_get_entry(d, p->data + pos, &K, &Kl, &V, &Vl); - d->find_page = page; - d->find_pos = pos + c; - c = sdbm_put_user(K, Kl, key, keylen) || - sdbm_put_user(V, Vl, val, vallen); - pgc_put(d->cache, p); - return c ? SDBM_ERROR_TOO_LARGE : 1; - } - d->find_page = page; - d->find_pos = pos; - return 0; -} - -void -sdbm_sync(struct sdbm *d) -{ - pgc_flush(d->cache); - if (d->flags & SDBM_FSYNC) - fsync(d->fd); -} diff --git a/lib/db.h b/lib/db.h deleted file mode 100644 index 41b81aa..0000000 --- a/lib/db.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * UCW Library -- Fast Database Management Routines - * - * (c) 1999--2001 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_DB_H -#define _UCW_DB_H - -struct sdbm; - -struct sdbm_options { /* Set to 0 for default */ - char *name; /* File name */ - uns flags; /* See SDBM_xxx below */ - uns page_order; /* Binary logarithm of file page size */ - uns cache_size; /* Number of cached pages */ - int key_size; /* Key size, -1=variable */ - int val_size; /* Value size, -1=variable */ -}; - -struct sdbm *sdbm_open(struct sdbm_options *); -void sdbm_close(struct sdbm *); -int sdbm_store(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); -int sdbm_replace(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); /* val == NULL -> delete */ -int sdbm_delete(struct sdbm *, byte *key, uns keylen); -int sdbm_fetch(struct sdbm *, byte *key, uns keylen, byte *val, uns *vallen); /* val can be NULL */ -void sdbm_rewind(struct sdbm *); -int sdbm_get_next(struct sdbm *, byte *key, uns *keylen, byte *val, uns *vallen); /* val can be NULL */ -void sdbm_sync(struct sdbm *); -u32 sdbm_hash(byte *key, uns keylen); - -#define SDBM_CREAT 1 /* Create the database if it doesn't exist */ -#define SDBM_WRITE 2 /* Open the database in read/write mode */ -#define SDBM_SYNC 4 /* Sync after each operation */ -#define SDBM_FAST 8 /* Don't sync on directory splits -- results in slightly faster - * operation, but reconstruction of database after program crash - * may be impossible. - */ -#define SDBM_FSYNC 16 /* When syncing, call fsync() */ - -#define SDBM_ERROR_BAD_KEY_SIZE -1 /* Fixed key size doesn't match */ -#define SDBM_ERROR_BAD_VAL_SIZE -2 /* Fixed value size doesn't match */ -#define SDBM_ERROR_TOO_LARGE -3 /* Key/value doesn't fit in buffer supplied */ -#define SDBM_ERROR_READ_ONLY -4 /* Database has been opened read only */ -#define SDBM_ERROR_GIANT -5 /* Key/value too large to fit in a page */ - -#endif diff --git a/lib/db_internal.h b/lib/db_internal.h deleted file mode 100644 index b480a79..0000000 --- a/lib/db_internal.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * UCW Library -- Fast Database Management Routines -- Internal Declarations - * - * (c) 1999--2001 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#define SDBM_NUM_FREE_PAGE_POOLS 32 - -struct sdbm_root { /* Must fit in 1K which is minimum page size */ - u32 magic; - u32 version; - u32 page_order; /* Binary logarithm of page size */ - s32 key_size; /* Key/val size, -1=variable */ - s32 val_size; - u32 dir_start; /* First page of the page directory */ - u32 dir_order; /* Binary logarithm of directory size */ - /* - * As we know the only thing which can be freed is the page directory - * and it can grow only a limited number of times, we can use a very - * simple-minded representation of the free page pool. We also assume - * these entries are sorted by start position. - */ - struct { - u32 first; - u32 count; - } free_pool[SDBM_NUM_FREE_PAGE_POOLS]; -}; - -struct sdbm_bucket { - u32 used; /* Bytes used in this bucket */ - byte data[0]; -}; - -struct sdbm { - struct page_cache *cache; - int fd; - struct sdbm_root *root; - struct page *root_page; - int key_size; /* Cached values from root page */ - int val_size; - uns page_order; - uns page_size; - uns page_mask; /* page_size - 1 */ - uns dir_size; /* Page directory size in entries */ - uns dir_shift; /* Number of significant bits of hash function */ - uns file_size; /* in pages */ - uns flags; - uns find_page, find_pos; /* Current pointer for sdbm_find_next() */ - uns find_free_list; /* First free list entry not skipped by sdbm_find_next() */ -}; - -#define SDBM_MAGIC 0x5344424d -#define SDBM_VERSION 2 - -#define GET32(p,o) *((u32 *)((p)+(o))) diff --git a/lib/eltpool.h b/lib/eltpool.h deleted file mode 100644 index 7e295fb..0000000 --- a/lib/eltpool.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * UCW Library -- Fast Allocator for Fixed-Size Elements - * - * (c) 2007 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_ELTPOOL_H -#define _UCW_ELTPOOL_H - -struct eltpool { - struct eltpool_chunk *first_chunk; - struct eltpool_free *first_free; - uns elt_size; - uns chunk_size; - uns elts_per_chunk; - uns num_allocated; // Just for debugging -}; - -struct eltpool_chunk { - struct eltpool_chunk *next; - /* Chunk data continue here */ -}; - -struct eltpool_free { - struct eltpool_free *next; -}; - -struct eltpool *ep_new(uns elt_size, uns elts_per_chunk); -void ep_delete(struct eltpool *pool); -void *ep_alloc_slow(struct eltpool *pool); - -static inline void * -ep_alloc(struct eltpool *pool) -{ - pool->num_allocated++; -#ifdef CONFIG_FAKE_ELTPOOL - return xmalloc(pool->elt_size); -#else - struct eltpool_free *elt; - if (elt = pool->first_free) - pool->first_free = elt->next; - else - elt = ep_alloc_slow(pool); - return elt; -#endif -} - -static inline void -ep_free(struct eltpool *pool, void *p) -{ - pool->num_allocated--; -#ifdef CONFIG_FAKE_ELTPOOL - (void) pool; - xfree(p); -#else - struct eltpool_free *elt = p; - elt->next = pool->first_free; - pool->first_free = elt; -#endif -} - -#endif diff --git a/lib/fastbuf.h b/lib/fastbuf.h deleted file mode 100644 index 920d83e..0000000 --- a/lib/fastbuf.h +++ /dev/null @@ -1,410 +0,0 @@ -/* - * UCW Library -- Fast Buffered I/O - * - * (c) 1997--2007 Martin Mares - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_FASTBUF_H -#define _UCW_FASTBUF_H - -#include -#include - -/* - * Generic buffered I/O. You supply hooks to be called for low-level operations - * (swapping of buffers, seeking and closing), we do the rest. - * - * Buffer layout when reading: - * - * +----------------+---------------------------+ - * | read data | free space | - * +----------------+---------------------------+ - * ^ ^ ^ ^ - * buffer bptr bstop bufend - * - * After the last character is read, bptr == bstop and buffer refill - * is deferred to the next read attempt. This gives us an easy way - * how to implement bungetc(). - * - * When writing: - * - * +--------+--------------+--------------------+ - * | unused | written data | free space | - * +--------+--------------+--------------------+ - * ^ ^ ^ ^ - * buffer bstop bptr bufend - * - * Dirty tricks: - * - * - You can mix reads and writes on the same stream, but you must - * call bflush() in between and remember that the file position - * points after the flushed buffer which is not necessarily the same - * as after the data you've read. - * - The spout/refill hooks can change not only bptr and bstop, but also - * the location of the buffer; fb-mem.c takes advantage of it. - * - In some cases, the user of the bdirect interface can be allowed to modify - * the data in the buffer to avoid unnecessary copying. If the back-end - * allows such modifications, it can set can_overwrite_buffer accordingly: - * * 0 if no modification is allowed, - * * 1 if the user can modify the buffer on the condition that - * the modifications will be undone before calling the next - * fastbuf operation - * * 2 if the user is allowed to overwrite the data in the buffer - * if bdirect_read_commit_modified() is called afterwards. - * In this case, the back-end must be prepared for trimming - * of the buffer which is done by the commit function. - */ - -struct fastbuf { - byte is_fastbuf[0]; /* Dummy field for checking of type casts */ - byte *bptr, *bstop; /* Access pointers */ - byte *buffer, *bufend; /* Start and end of the buffer */ - char *name; /* File name for error messages */ - sh_off_t pos; /* Position of bstop in the file */ - int (*refill)(struct fastbuf *); /* Get a buffer with new data */ - void (*spout)(struct fastbuf *); /* Write buffer data to the file */ - int (*seek)(struct fastbuf *, sh_off_t, int); /* Slow path for bseek(), buffer already flushed; returns success */ - void (*close)(struct fastbuf *); /* Close the stream */ - int (*config)(struct fastbuf *, uns, int); /* Configure the stream */ - int can_overwrite_buffer; /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */ -}; - -/* FastIO on files with several configurable back-ends */ - -enum fb_type { /* Which back-end you want to use */ - FB_STD, /* Standard buffered I/O */ - FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */ - FB_MMAP /* Memory mapped files */ -}; - -struct fb_params { - enum fb_type type; - uns buffer_size; /* 0 for default size */ - uns keep_back_buf; /* FB_STD: optimize for bi-directional access */ - uns read_ahead; /* FB_DIRECT options */ - uns write_back; - struct asio_queue *asio; -}; - -struct cf_section; -extern struct cf_section fbpar_cf; -extern struct fb_params fbpar_def; - -struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); /* Use params==NULL for defaults */ -struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); -struct fastbuf *bopen_tmp_file(struct fb_params *params); -struct fastbuf *bopen_fd(int fd, struct fb_params *params); - -/* FastIO on standard files (shortcuts for FB_STD) */ - -struct fastbuf *bopen(const char *name, uns mode, uns buflen); -struct fastbuf *bopen_try(const char *name, uns mode, uns buflen); -struct fastbuf *bopen_tmp(uns buflen); -struct fastbuf *bfdopen(int fd, uns buflen); -struct fastbuf *bfdopen_shared(int fd, uns buflen); -void bfilesync(struct fastbuf *b); - -/* Temporary files */ - -#define TEMP_FILE_NAME_LEN 256 -void temp_file_name(char *name); -void bfix_tmp_file(struct fastbuf *fb, const char *name); - -/* Internal functions of some file back-ends */ - -struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen); -struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode); - -extern uns fbdir_cheat; -struct asio_queue; -struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back); - -void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file); - -/* FastIO on in-memory streams */ - -struct fastbuf *fbmem_create(uns blocksize); /* Create stream and return its writing fastbuf */ -struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */ - -/* FastI on file descriptors with limit */ - -struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit); - -/* FastIO on static buffers */ - -void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite); -void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size); -static inline uns -fbbuf_count_written(struct fastbuf *f) -{ - return f->bptr - f->bstop; -} - -/* FastIO on recyclable growing buffers */ - -struct fastbuf *fbgrow_create(unsigned basic_size); -void fbgrow_reset(struct fastbuf *b); /* Reset stream and prepare for writing */ -void fbgrow_rewind(struct fastbuf *b); /* Prepare for reading */ - -/* FastO on memory pools */ - -struct mempool; -struct fbpool { - struct fastbuf fb; - struct mempool *mp; -}; - -void fbpool_init(struct fbpool *fb); /* Initialize a new fastbuf */ -void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size); - /* Start a new continuous block and prepare for writing (see mp_start()) */ -void *fbpool_end(struct fbpool *fb); /* Close the block and return its address (see mp_end()). - The length can be determined with mp_size(mp, ptr). */ - -/* FastO with atomic writes for multi-threaded programs */ - -struct fb_atomic { - struct fastbuf fb; - struct fb_atomic_file *af; - byte *expected_max_bptr; - uns slack_size; -}; -#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf) - -struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len); -void fbatomic_internal_write(struct fastbuf *b); - -static inline void -fbatomic_commit(struct fastbuf *b) -{ - if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr) - fbatomic_internal_write(b); -} - -/* Configuring stream parameters */ - -enum bconfig_type { - BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */ - BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */ -}; - -int bconfig(struct fastbuf *f, uns type, int data); - -/* Universal functions working on all fastbuf's */ - -void bclose(struct fastbuf *f); -void bflush(struct fastbuf *f); -void bseek(struct fastbuf *f, sh_off_t pos, int whence); -void bsetpos(struct fastbuf *f, sh_off_t pos); -void brewind(struct fastbuf *f); -sh_off_t bfilesize(struct fastbuf *f); /* -1 if not seekable */ - -static inline sh_off_t btell(struct fastbuf *f) -{ - return f->pos + (f->bptr - f->bstop); -} - -int bgetc_slow(struct fastbuf *f); -static inline int bgetc(struct fastbuf *f) -{ - return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f); -} - -int bpeekc_slow(struct fastbuf *f); -static inline int bpeekc(struct fastbuf *f) -{ - return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f); -} - -static inline void bungetc(struct fastbuf *f) -{ - f->bptr--; -} - -void bputc_slow(struct fastbuf *f, uns c); -static inline void bputc(struct fastbuf *f, uns c) -{ - if (f->bptr < f->bufend) - *f->bptr++ = c; - else - bputc_slow(f, c); -} - -static inline uns -bavailr(struct fastbuf *f) -{ - return f->bstop - f->bptr; -} - -static inline uns -bavailw(struct fastbuf *f) -{ - return f->bufend - f->bptr; -} - -uns bread_slow(struct fastbuf *f, void *b, uns l, uns check); -static inline uns bread(struct fastbuf *f, void *b, uns l) -{ - if (bavailr(f) >= l) - { - memcpy(b, f->bptr, l); - f->bptr += l; - return l; - } - else - return bread_slow(f, b, l, 0); -} - -static inline uns breadb(struct fastbuf *f, void *b, uns l) -{ - if (bavailr(f) >= l) - { - memcpy(b, f->bptr, l); - f->bptr += l; - return l; - } - else - return bread_slow(f, b, l, 1); -} - -void bwrite_slow(struct fastbuf *f, const void *b, uns l); -static inline void bwrite(struct fastbuf *f, const void *b, uns l) -{ - if (bavailw(f) >= l) - { - memcpy(f->bptr, b, l); - f->bptr += l; - } - else - bwrite_slow(f, b, l); -} - -/* - * Functions for reading of strings: - * - * bgets() reads a line, strip the trailing '\n' and return a pointer - * to the terminating 0 or NULL on EOF. Dies if the line is too long. - * bgets0() does the same for 0-terminated strings. - * bgets_nodie() a variant of bgets() which returns either the length of the - * string (excluding the terminator) or -1 if the line does not fit - * in the buffer. In such cases, it returns after reading exactly `l' - * bytes of input. - * bgets_bb() a variant of bgets() which allocates the string in a growing buffer - * bgets_mp() the same, but in a mempool - * bgets_stk() the same, but on the stack by alloca() - */ - -char *bgets(struct fastbuf *f, char *b, uns l); -char *bgets0(struct fastbuf *f, char *b, uns l); -int bgets_nodie(struct fastbuf *f, char *b, uns l); - -struct mempool; -struct bb_t; -uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit); -char *bgets_mp(struct fastbuf *f, struct mempool *mp); - -struct bgets_stk_struct { - struct fastbuf *f; - byte *old_buf, *cur_buf, *src; - uns old_len, cur_len, src_len; -}; -void bgets_stk_init(struct bgets_stk_struct *s); -void bgets_stk_step(struct bgets_stk_struct *s); -#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; }) - -static inline void -bputs(struct fastbuf *f, const char *b) -{ - bwrite(f, b, strlen(b)); -} - -static inline void -bputs0(struct fastbuf *f, const char *b) -{ - bwrite(f, b, strlen(b)+1); -} - -static inline void -bputsn(struct fastbuf *f, const char *b) -{ - bputs(f, b); - bputc(f, '\n'); -} - -void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l); -static inline void -bbcopy(struct fastbuf *f, struct fastbuf *t, uns l) -{ - if (bavailr(f) >= l && bavailw(t) >= l) - { - memcpy(t->bptr, f->bptr, l); - t->bptr += l; - f->bptr += l; - } - else - bbcopy_slow(f, t, l); -} - -int bskip_slow(struct fastbuf *f, uns len); -static inline int bskip(struct fastbuf *f, uns len) -{ - if (bavailr(f) >= len) - { - f->bptr += len; - return 1; - } - else - return bskip_slow(f, len); -} - -/* Direct I/O on buffers */ - -static inline uns -bdirect_read_prepare(struct fastbuf *f, byte **buf) -{ - if (f->bptr == f->bstop && !f->refill(f)) - { - *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings - return 0; - } - *buf = f->bptr; - return bavailr(f); -} - -static inline void -bdirect_read_commit(struct fastbuf *f, byte *pos) -{ - f->bptr = pos; -} - -static inline void -bdirect_read_commit_modified(struct fastbuf *f, byte *pos) -{ - f->bptr = pos; - f->buffer = pos; /* Avoid seeking backwards in the buffer */ -} - -static inline uns -bdirect_write_prepare(struct fastbuf *f, byte **buf) -{ - if (f->bptr == f->bufend) - f->spout(f); - *buf = f->bptr; - return bavailw(f); -} - -static inline void -bdirect_write_commit(struct fastbuf *f, byte *pos) -{ - f->bptr = pos; -} - -/* Formatted output */ - -int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3); -int vbprintf(struct fastbuf *b, const char *msg, va_list args); - -#endif diff --git a/lib/fastbuf.t b/lib/fastbuf.t deleted file mode 100644 index 6f8681a..0000000 --- a/lib/fastbuf.t +++ /dev/null @@ -1,15 +0,0 @@ -# Tests for fastbufs - -Run: ../obj/lib/fb-file-t -Out: 112 - - 112 116 - -Run: ../obj/lib/fb-grow-t -Out: <10><10><0>1234512345<10><9>5<10> - <10><10><0>1234512345<10><9>5<10> - <10><10><0>1234512345<10><9>5<10> - <10><10><0>1234512345<10><9>5<10> - <10><10><0>1234512345<10><9>5<10> - -Run: ../obj/lib/fb-pool-t diff --git a/lib/ff-utf8.h b/lib/ff-utf8.h deleted file mode 100644 index af7543f..0000000 --- a/lib/ff-utf8.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * UCW Library: An alias for lib/ff-unicode.h (for backwards compatibility) - * - * (c) 2008 Pavel Charvat - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_FF_UTF8_H -#define _UCW_FF_UTF8_H - -#include "lib/ff-unicode.h" - -#endif diff --git a/lib/gbuf.h b/lib/gbuf.h deleted file mode 100644 index daf0bfb..0000000 --- a/lib/gbuf.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * UCW Library -- A simple growing buffer - * - * (c) 2004, Robert Spalek - * (c) 2005, Martin Mares - * - * Define the following macros: - * - * GBUF_TYPE data type of records stored in the buffer - * GBUF_PREFIX(x) add a name prefix to all global symbols - * GBUF_TRACE(msg...) log growing of buffer [optional] - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#define BUF_T GBUF_PREFIX(t) - -typedef struct BUF_T -{ - uns len; - GBUF_TYPE *ptr; -} -BUF_T; - -static inline void -GBUF_PREFIX(init)(BUF_T *b) -{ - b->ptr = NULL; - b->len = 0; -} - -static void UNUSED -GBUF_PREFIX(done)(BUF_T *b) -{ - if (b->ptr) - xfree(b->ptr); - b->ptr = NULL; - b->len = 0; -} - -static void UNUSED -GBUF_PREFIX(set_size)(BUF_T *b, uns len) -{ - b->len = len; - b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE)); -#ifdef GBUF_TRACE - GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len); -#endif -} - -static void UNUSED -GBUF_PREFIX(do_grow)(BUF_T *b, uns len) -{ - if (len < 2*b->len) // to ensure logarithmic cost - len = 2*b->len; - GBUF_PREFIX(set_size)(b, len); -} - -static inline GBUF_TYPE * -GBUF_PREFIX(grow)(BUF_T *b, uns len) -{ - if (unlikely(len > b->len)) - GBUF_PREFIX(do_grow)(b, len); - return b->ptr; -} - -#undef GBUF_TYPE -#undef GBUF_PREFIX -#undef GBUF_TRACE -#undef BUF_T diff --git a/lib/getopt.h b/lib/getopt.h deleted file mode 100644 index b4ff823..0000000 --- a/lib/getopt.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * UCW Library -- Parsing of configuration and command-line options - * - * (c) 2001--2006 Robert Spalek - * (c) 2003--2006 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_GETOPT_H -#define _UCW_GETOPT_H - -#ifdef CONFIG_OWN_GETOPT -#include "lib/getopt/getopt-sh.h" -#else -#include -#endif - -void reset_getopt(void); - -/* Safe loading and reloading of configuration files: conf-input.c */ - -extern char *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */ -extern char *cf_env_file; /* ENV_VAR_CONFIG */ -int cf_reload(const char *file); -int cf_load(const char *file); -int cf_set(const char *string); - -/* Direct access to configuration items: conf-intr.c */ - -#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \ - T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY) - /* Closing brace finishes previous block. - * Basic attributes (static, dynamic, parsed) can be used with SET. - * Dynamic arrays can be used with SET, APPEND, PREPEND. - * Sections can be used with SET. - * Lists can be used with everything. */ -#define T(x) OP_##x, -enum cf_operation { CF_OPERATIONS }; -#undef T - -struct cf_item; -char *cf_find_item(const char *name, struct cf_item *item); -char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars); - -/* Debug dumping: conf-dump.c */ - -struct fastbuf; -void cf_dump_sections(struct fastbuf *fb); - -/* Journaling control: conf-journal.c */ - -struct cf_journal_item; -struct cf_journal_item *cf_journal_new_transaction(uns new_pool); -void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj); -void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj); - -/* - * cf_getopt() takes care of parsing the command-line arguments, loading the - * default configuration file (cf_def_file) and processing configuration options. - * The calling convention is the same as with GNU getopt_long(), but you must prefix - * your own short/long options by the CF_(SHORT|LONG)_OPTS or pass CF_NO_LONG_OPTS - * of there are no long options. - * - * The default configuration file can be overriden by the --config options, - * which must come first. During parsing of all other options, the configuration - * is already available. - */ - -#define CF_SHORT_OPTS "C:S:" -#define CF_LONG_OPTS {"config", 1, 0, 'C'}, {"set", 1, 0, 'S'}, CF_LONG_OPTS_DEBUG -#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } } -#ifndef CF_USAGE_TAB -#define CF_USAGE_TAB "" -#endif -#define CF_USAGE \ -"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\ --S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG - -#ifdef CONFIG_DEBUG -#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } , -#define CF_USAGE_DEBUG " --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n" -#else -#define CF_LONG_OPTS_DEBUG -#define CF_USAGE_DEBUG -#endif - -// conf-input.c -int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index); - -#endif diff --git a/lib/getopt/getopt-sh.h b/lib/getopt/getopt-sh.h deleted file mode 100644 index 4597aca..0000000 --- a/lib/getopt/getopt-sh.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _UCW_GETOPT_GETOPT_SH_H -#define _UCW_GETOPT_GETOPT_SH_H - -#define getopt sh_getopt -#define getopt_long sh_getopt_long -#define getopt_long_only sh_getopt_longonly -#define optarg sh_optarg -#define optind sh_optind -#define opterr sh_opterr -#define optopt sh_optopt - -#include "lib/getopt/getopt.h" - -#endif diff --git a/lib/hash-test.t b/lib/hash-test.t deleted file mode 100644 index ba5a61e..0000000 --- a/lib/hash-test.t +++ /dev/null @@ -1,13 +0,0 @@ -# Tests for the hash table modules - -Run: ../obj/lib/hash-test 1 -Out: OK - -Run: ../obj/lib/hash-test 2 -Out: OK - -Run: ../obj/lib/hash-test 3 -Out: OK - -Run: ../obj/lib/hash-test 4 -Out: OK diff --git a/lib/hashfunc.h b/lib/hashfunc.h deleted file mode 100644 index a9fb1a3..0000000 --- a/lib/hashfunc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * UCW Library -- Hyper-super-meta-alt-control-shift extra fast - * str_len() and hash_*() routines - * - * (c) 2002, Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_HASHFUNC_H -#define _UCW_HASHFUNC_H - -#include "lib/lib.h" - -/* The following functions need str to be aligned to uns. */ -uns str_len_aligned(const char *str) PURE; -uns hash_string_aligned(const char *str) PURE; -uns hash_block_aligned(const byte *str, uns len) PURE; - -#ifdef CPU_ALLOW_UNALIGNED -#define str_len(str) str_len_aligned(str) -#define hash_string(str) hash_string_aligned(str) -#define hash_block(str, len) hash_block_aligned(str, len) -#else -uns str_len(const char *str) PURE; -uns hash_string(const char *str) PURE; -uns hash_block(const byte *str, uns len) PURE; -#endif - -uns hash_string_nocase(const char *str) PURE; - -/* - * We hash integers by multiplying by a reasonably large prime with - * few ones in its binary form (to gave the compiler the possibility - * of using shifts and adds on architectures where multiplication - * instructions are slow). - */ -static inline uns CONST hash_u32(uns x) { return 0x01008041*x; } -static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); } -static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); } - -#endif diff --git a/lib/heap.h b/lib/heap.h deleted file mode 100644 index 4f83776..0000000 --- a/lib/heap.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * UCW Library -- Universal Heap Macros - * - * (c) 2001 Martin Mares - * (c) 2005 Tomas Valla - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \ - for (;;) \ - { \ - _l = 2*_j; \ - if (_l > num) \ - break; \ - if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \ - break; \ - if (_l != num && less(heap[_l+1],heap[_l])) \ - _l++; \ - swap(heap,_j,_l,x); \ - _j = _l; \ - } - -#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \ - while (_j > 1) \ - { \ - _u = _j/2; \ - if (less(heap[_u], heap[_j])) \ - break; \ - swap(heap,_u,_j,x); \ - _j = _u; \ - } - -#define HEAP_INIT(type,heap,num,less,swap) \ - do { \ - uns _i = num; \ - uns _j, _l; \ - type x; \ - while (_i >= 1) \ - { \ - _j = _i; \ - HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \ - _i--; \ - } \ - } while(0) - -#define HEAP_DELMIN(type,heap,num,less,swap) \ - do { \ - uns _j, _l; \ - type x; \ - swap(heap,1,num,x); \ - num--; \ - _j = 1; \ - HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ - } while(0) - -#define HEAP_INSERT(type,heap,num,less,swap) \ - do { \ - uns _j, _u; \ - type x; \ - _j = num; \ - HEAP_BUBBLE_UP_J(heap,num,less,swap); \ - } while(0) - -#define HEAP_INCREASE(type,heap,num,less,swap,pos) \ - do { \ - uns _j, _l; \ - type x; \ - _j = pos; \ - HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ - } while(0) - -#define HEAP_DELETE(type,heap,num,less,swap,pos) \ - do { \ - uns _j, _l, _u; \ - type x; \ - _j = pos; \ - swap(heap,_j,num,x); \ - num--; \ - if (less(heap[_j], heap[num+1])) \ - HEAP_BUBBLE_UP_J(heap,num,less,swap) \ - else \ - HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \ - } while(0) - -/* Default swapping macro */ -#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t) diff --git a/lib/lfs.h b/lib/lfs.h deleted file mode 100644 index ede4126..0000000 --- a/lib/lfs.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * UCW Library -- Large File Support - * - * (c) 1999--2002 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_LFS_H -#define _UCW_LFS_H - -#include -#include - -#ifdef CONFIG_LFS - -#define sh_open open64 -#define sh_seek lseek64 -#define sh_pread pread64 -#define sh_pwrite pwrite64 -#define sh_ftruncate ftruncate64 -#define sh_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o) -#define sh_pread pread64 -#define sh_pwrite pwrite64 -#define sh_stat stat64 -#define sh_fstat fstat64 -typedef struct stat64 sh_stat_t; - -#else /* !CONFIG_LFS */ - -#define sh_open open -#define sh_seek(f,o,w) lseek(f,o,w) -#define sh_ftruncate(f,o) ftruncate(f,o) -#define sh_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o) -#define sh_pread pread -#define sh_pwrite pwrite -#define sh_stat stat -#define sh_fstat fstat -typedef struct stat sh_stat_t; - -#endif /* !CONFIG_LFS */ - -#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0) -#define sh_fdatasync fdatasync -#else -#define sh_fdatasync fsync -#endif - -#define HAVE_PREAD - -static inline sh_off_t -sh_file_size(const char *name) -{ - int fd = sh_open(name, O_RDONLY); - if (fd < 0) - die("Cannot open %s: %m", name); - sh_off_t len = sh_seek(fd, 0, SEEK_END); - close(fd); - return len; -} - -#endif /* !_UCW_LFS_H */ diff --git a/lib/lib.h b/lib/lib.h deleted file mode 100644 index f8659ed..0000000 --- a/lib/lib.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * The UCW Library -- Miscellaneous Functions - * - * (c) 1997--2007 Martin Mares - * (c) 2005 Tomas Valla - * (c) 2006 Robert Spalek - * (c) 2007 Pavel Charvat - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_LIB_H -#define _UCW_LIB_H - -#include "lib/config.h" -#include - -/* Macros for handling structurues, offsets and alignment */ - -#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x)) -#define PTR_TO(s, i) &((s*)0)->i -#define OFFSETOF(s, i) ((unsigned int) PTR_TO(s, i)) -#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i))) -#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1)) -#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p)) -#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type)) - -/* Some other macros */ - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) -#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; }) -#define ABS(x) ((x) < 0 ? -(x) : (x)) -#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) -#define STRINGIFY(x) #x -#define STRINGIFY_EXPANDED(x) STRINGIFY(x) -#define GLUE(x,y) x##y -#define GLUE_(x,y) x##_##y - -#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0) -#define REV_COMPARE(x,y) COMPARE(y,x) -#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0) -#define COMPARE_GT(x,y) COMPARE_LT(y,x) - -#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) /* Bitwise rotation of an uns to the left */ - -/* GCC Extensions */ - -#ifdef __GNUC__ - -#undef inline -#define NONRET __attribute__((noreturn)) -#define UNUSED __attribute__((unused)) -#define CONSTRUCTOR __attribute__((constructor)) -#define PACKED __attribute__((packed)) -#define CONST __attribute__((const)) -#define PURE __attribute__((pure)) -#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z))) -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - -#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 -#define ALWAYS_INLINE inline __attribute__((always_inline)) -#define NO_INLINE __attribute__((noinline)) -#else -#define ALWAYS_INLINE inline -#endif - -#if __GNUC__ >= 4 -#define LIKE_MALLOC __attribute__((malloc)) -#define SENTINEL_CHECK __attribute__((sentinel)) -#else -#define LIKE_MALLOC -#define SENTINEL_CHECK -#endif - -#else -#error This program requires the GNU C compiler. -#endif - -/* Logging */ - -#define L_DEBUG 'D' /* Debugging messages */ -#define L_INFO 'I' /* Informational msgs, warnings and errors */ -#define L_WARN 'W' -#define L_ERROR 'E' -#define L_INFO_R 'i' /* Errors caused by external events */ -#define L_WARN_R 'w' -#define L_ERROR_R 'e' -#define L_FATAL '!' /* die() */ - -extern char *log_title; /* NULL - print no title, default is log_progname */ -extern char *log_filename; /* Expanded name of the current log file */ -extern volatile int log_switch_nest; /* log_switch() nesting counter, increment to disable automatic switches */ -extern int log_pid; /* 0 if shouldn't be logged */ -extern int log_precise_timings; /* Include microsecond timestamps in log messages */ -extern void (*log_die_hook)(void); -struct tm; -extern void (*log_switch_hook)(struct tm *tm); - -void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3); -void vmsg(uns cat, const char *fmt, va_list args); -void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2); -void log_init(const char *argv0); -void log_file(const char *name); -void log_fork(void); -int log_switch(void); - -void assert_failed(const char *assertion, const char *file, int line) NONRET; -void assert_failed_noinfo(void) NONRET; - -#ifdef DEBUG_ASSERTS -#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; }) -#else -#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; }) -#endif - -#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1] - -#ifdef LOCAL_DEBUG -#define DBG(x,y...) msg(L_DEBUG, x,##y) -#else -#define DBG(x,y...) do { } while(0) -#endif - -static inline void log_switch_disable(void) { log_switch_nest++; } -static inline void log_switch_enable(void) { ASSERT(log_switch_nest); log_switch_nest--; } - -/* Memory allocation */ - -#define xmalloc sh_xmalloc -#define xrealloc sh_xrealloc -#define xfree sh_xfree - -#ifdef DEBUG_DMALLOC -/* - * The standard dmalloc macros tend to produce lots of namespace - * conflicts and we use only xmalloc and xfree, so we can define - * the stubs ourselves. - */ -#define DMALLOC_DISABLE -#include -#define sh_xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size) -#define sh_xrealloc(ptr,size) _xrealloc_leap(__FILE__, __LINE__, ptr, size) -#define sh_xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr) -#else -/* - * Unfortunately, several libraries we might want to link to define - * their own xmalloc and we don't want to interfere with them, hence - * the renaming. - */ -void *xmalloc(uns) LIKE_MALLOC; -void *xrealloc(void *, uns); -void xfree(void *); -#endif - -void *xmalloc_zero(uns) LIKE_MALLOC; -char *xstrdup(const char *) LIKE_MALLOC; - -/* Content-Type pattern matching and filters */ - -int match_ct_patt(const char *, const char *); - -/* wordsplit.c */ - -int sepsplit(char *str, uns sep, char **rec, uns max); -int wordsplit(char *str, char **rec, uns max); - -/* pat(i)match.c: Matching of shell patterns */ - -int match_pattern(const char *patt, const char *str); -int match_pattern_nocase(const char *patt, const char *str); - -/* md5hex.c */ - -void md5_to_hex(const byte *s, char *d); -void hex_to_md5(const char *s, byte *d); - -#define MD5_SIZE 16 -#define MD5_HEX_SIZE 33 - -/* prime.c */ - -int isprime(uns x); -uns nextprime(uns x); - -/* primetable.c */ - -uns next_table_prime(uns x); -uns prev_table_prime(uns x); - -/* timer.c */ - -timestamp_t get_timestamp(void); - -void init_timer(timestamp_t *timer); -uns get_timer(timestamp_t *timer); -uns switch_timer(timestamp_t *old, timestamp_t *new); - -/* regex.c */ - -typedef struct regex regex; - -regex *rx_compile(const char *r, int icase); -void rx_free(regex *r); -int rx_match(regex *r, const char *s); -int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen); - -/* random.c */ - -uns random_u32(void); -uns random_max(uns max); -u64 random_u64(void); -u64 random_max_u64(u64 max); - -/* mmap.c */ - -void *mmap_file(const char *name, unsigned *len, int writeable); -void munmap_file(void *start, unsigned len); - -/* proctitle.c */ - -void setproctitle_init(int argc, char **argv); -void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2); -char *getproctitle(void); - -/* randomkey.c */ - -void randomkey(byte *buf, uns size); - -/* exitstatus.c */ - -#define EXIT_STATUS_MSG_SIZE 32 -int format_exit_status(char *msg, int stat); - -/* runcmd.c */ - -int run_command(const char *cmd, ...); -void NONRET exec_command(const char *cmd, ...); -void echo_command(char *buf, int size, const char *cmd, ...); -int run_command_v(const char *cmd, va_list args); -void NONRET exec_command_v(const char *cmd, va_list args); -void echo_command_v(char *buf, int size, const char *cmd, va_list args); - -/* carefulio.c */ - -int careful_read(int fd, void *buf, int len); -int careful_write(int fd, const void *buf, int len); - -/* sync.c */ - -void sync_dir(const char *name); - -/* sighandler.c */ - -typedef int (*sh_sighandler_t)(int); // gets signum, returns nonzero if abort() should be called - -void handle_signal(int signum); -void unhandle_signal(int signum); -sh_sighandler_t set_signal_handler(int signum, sh_sighandler_t new); - -/* string.c */ - -char *str_unesc(char *dest, const char *src); -char *str_format_flags(char *dest, const char *fmt, uns flags); - -/* bigalloc.c */ - -void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap -void *page_alloc_zero(u64 len) LIKE_MALLOC; -void page_free(void *start, u64 len); -void *page_realloc(void *start, u64 old_len, u64 new_len); - -void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available -void *big_alloc_zero(u64 len) LIKE_MALLOC; -void big_free(void *start, u64 len); - -#endif diff --git a/lib/lists.c b/lib/lists.c deleted file mode 100644 index 043c364..0000000 --- a/lib/lists.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * UCW Library -- Linked Lists - * - * (c) 1997--1999 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" - -#define _UCW_LISTS_C -#include "lib/lists.h" - -LIST_INLINE void -add_tail(list *l, node *n) -{ - node *z = l->tail; - - n->next = (node *) &l->null; - n->prev = z; - z->next = n; - l->tail = n; -} - -LIST_INLINE void -add_head(list *l, node *n) -{ - node *z = l->head; - - n->next = z; - n->prev = (node *) &l->head; - z->prev = n; - l->head = n; -} - -LIST_INLINE void -insert_node(node *n, node *after) -{ - node *z = after->next; - - n->next = z; - n->prev = after; - after->next = n; - z->prev = n; -} - -LIST_INLINE void -rem_node(node *n) -{ - node *z = n->prev; - node *x = n->next; - - z->next = x; - x->prev = z; -} - -LIST_INLINE void -init_list(list *l) -{ - l->head = (node *) &l->null; - l->null = NULL; - l->tail = (node *) &l->head; -} - -LIST_INLINE void -add_tail_list(list *to, list *l) -{ - node *p = to->tail; - node *q = l->head; - - p->next = q; - q->prev = p; - q = l->tail; - q->next = (node *) &to->null; - to->tail = q; -} diff --git a/lib/lists.h b/lib/lists.h deleted file mode 100644 index 3903691..0000000 --- a/lib/lists.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * UCW Library -- Linked Lists - * - * (c) 1997--1999 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_LISTS_H -#define _UCW_LISTS_H - -/* - * I admit the list structure is very tricky and also somewhat awkward, - * but it's both efficient and easy to manipulate once one understands the - * basic trick: The list head always contains two synthetic nodes which are - * always present in the list: the head and the tail. But as the `next' - * entry of the tail and the `prev' entry of the head are both NULL, the - * nodes can overlap each other: - * - * head head_node.next - * null head_node.prev tail_node.next - * tail tail_node.prev - */ - -typedef struct node { - struct node *next, *prev; -} node; - -typedef struct list { /* In fact two overlayed nodes */ - struct node *head, *null, *tail; -} list; - -#define NODE (node *) -#define HEAD(list) ((void *)((list).head)) -#define TAIL(list) ((void *)((list).tail)) -#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \ - n=(void *)((NODE (n))->next)) -#define DO_FOR_ALL(n,list) WALK_LIST(n,list) -#define WALK_LIST_DELSAFE(n,nxt,list) \ - for(n=HEAD(list); nxt=(void *)((NODE (n))->next); n=(void *) nxt) -#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \ - n=(void *)((NODE (n))->prev)) -#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \ - for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv) - -#define EMPTY_LIST(list) (!(list).head->next) - -void add_tail(list *, node *); -void add_head(list *, node *); -void rem_node(node *); -void add_tail_list(list *, list *); -void init_list(list *); -void insert_node(node *, node *); - -#if !defined(_UCW_LISTS_C) && defined(__GNUC__) -#define LIST_INLINE extern inline -#include "lib/lists.c" -#undef LIST_INLINE -#else -#define LIST_INLINE -#endif - -#endif diff --git a/lib/lizard.h b/lib/lizard.h deleted file mode 100644 index 616d17b..0000000 --- a/lib/lizard.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * LiZaRd -- Fast compression method based on Lempel-Ziv 77 - * - * (c) 2004, Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_LIZARD_H -#define _UCW_LIZARD_H - -#define LIZARD_NEEDS_CHARS 8 - /* The compression routine needs input buffer 8 characters longer, because it - * does not check the input bounds all the time. */ -#define LIZARD_MAX_MULTIPLY 23./22 -#define LIZARD_MAX_ADD 4 - /* In the worst case, the compressed file will not be longer than its - * original length * 23/22 + 4. - * - * The additive constant is for EOF and the header of the file. - * - * The multiplicative constant comes from 19-byte incompressible string - * followed by a 3-sequence that can be compressed into 2-byte link. This - * breaks the copy-mode and it needs to be restarted with a new header. The - * total length is 2(header) + 19(string) + 2(link) = 23. - */ - -/* lizard.c */ -int lizard_compress(const byte *in, uns in_len, byte *out); -int lizard_decompress(const byte *in, byte *out); - -/* lizard-safe.c */ -struct lizard_buffer; - -struct lizard_buffer *lizard_alloc(void); -void lizard_free(struct lizard_buffer *buf); -byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length); - -/* adler32.c */ -uns update_adler32(uns adler, const byte *ptr, uns len); - -static inline uns -adler32(const byte *buf, uns len) -{ - return update_adler32(1, buf, len); -} - -#endif diff --git a/lib/log-file.c b/lib/log-file.c deleted file mode 100644 index 401e797..0000000 --- a/lib/log-file.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * UCW Library -- Keeping of Log Files - * - * (c) 1997--2006 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/lfs.h" -#include "lib/threads.h" - -#include -#include -#include -#include -#include - -static char *log_name_patt; -static int log_params; -static int log_filename_size; -volatile int log_switch_nest; - -static int -do_log_switch(struct tm *tm) -{ - int fd, l; - char name[log_filename_size]; - int switched = 0; - - if (!log_name_patt || - log_filename[0] && !log_params) - return 0; - ucwlib_lock(); - log_switch_nest++; - l = strftime(name, log_filename_size, log_name_patt, tm); - if (l < 0 || l >= log_filename_size) - die("Error formatting log file name: %m"); - if (strcmp(name, log_filename)) - { - strcpy(log_filename, name); - fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666); - if (fd < 0) - die("Unable to open log file %s: %m", name); - dup2(fd, 2); - close(fd); - switched = 1; - } - log_switch_nest--; - ucwlib_unlock(); - return switched; -} - -int -log_switch(void) -{ - time_t tim = time(NULL); - return do_log_switch(localtime(&tim)); -} - -static void -internal_log_switch(struct tm *tm) -{ - if (!log_switch_nest) - do_log_switch(tm); -} - -void -log_file(const char *name) -{ - if (name) - { - if (log_name_patt) - xfree(log_name_patt); - if (log_filename) - { - xfree(log_filename); - log_filename = NULL; - } - log_name_patt = xstrdup(name); - log_params = !!strchr(name, '%'); - log_filename_size = strlen(name) + 64; /* 63 is an upper bound on expansion of % escapes */ - log_filename = xmalloc(log_filename_size); - log_filename[0] = 0; - log_switch(); - log_switch_hook = internal_log_switch; - } -} - -void -log_fork(void) -{ - log_pid = getpid(); -} - -#ifdef TEST - -int main(int argc, char **argv) -{ - log_init(argv[0]); - log_file("/proc/self/fd/1"); - for (int i=1; i - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" - -#include -#include -#include -#include -#include -#include -#include - -static char log_progname[32]; -char *log_filename; -char *log_title; -int log_pid; -int log_precise_timings; -void (*log_die_hook)(void); -void (*log_switch_hook)(struct tm *tm); - -void -vmsg(unsigned int cat, const char *fmt, va_list args) -{ - struct timeval tv; - struct tm tm; - byte *buf, *p; - int buflen = 256; - int l, l0, r; - va_list args2; - - gettimeofday(&tv, NULL); - if (!localtime_r(&tv.tv_sec, &tm)) - bzero(&tm, sizeof(tm)); - - if (log_switch_hook) - log_switch_hook(&tm); - while (1) - { - p = buf = alloca(buflen); - *p++ = cat; - /* We cannot use strftime() here, because it's not re-entrant */ - p += sprintf(p, " %4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); - if (log_precise_timings) - p += sprintf(p, ".%06d", (int)tv.tv_usec); - *p++ = ' '; - if (log_title) - { - if (log_pid) - p += sprintf(p, "[%s (%d)] ", log_title, log_pid); - else - p += sprintf(p, "[%s] ", log_title); - } - else - { - if (log_pid) - p += sprintf(p, "[%d] ", log_pid); - } - l0 = p - buf + 1; - r = buflen - l0; - va_copy(args2, args); - l = vsnprintf(p, r, fmt, args2); - va_end(args2); - if (l < 0) - l = r; - else if (l < r) - { - while (*p) - { - if (*p < 0x20 && *p != '\t') - *p = 0x7f; - p++; - } - *p = '\n'; - write(2, buf, l + l0); - return; - } - buflen = l + l0 + 1; - } -} - -void -msg(unsigned int cat, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vmsg(cat, fmt, args); - va_end(args); -} - -void -die(const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vmsg(L_FATAL, fmt, args); - va_end(args); - if (log_die_hook) - log_die_hook(); -#ifdef DEBUG_DIE_BY_ABORT - abort(); -#else - exit(1); -#endif -} - -void -assert_failed(const char *assertion, const char *file, int line) -{ - msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line); - abort(); -} - -void -assert_failed_noinfo(void) -{ - die("Internal error: Assertion failed."); -} - -static const char * -log_basename(const char *n) -{ - const char *p = n; - - while (*n) - if (*n++ == '/') - p = n; - return p; -} - -void -log_init(const char *argv0) -{ - if (argv0) - { - strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1); - log_progname[sizeof(log_progname)-1] = 0; - log_title = log_progname; - } -} diff --git a/lib/mainloop.h b/lib/mainloop.h deleted file mode 100644 index 5c3baf6..0000000 --- a/lib/mainloop.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * UCW Library -- Main Loop - * - * (c) 2004--2005 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_MAINLOOP_H -#define _UCW_MAINLOOP_H - -#include "lib/clists.h" - -extern timestamp_t main_now; /* Current time in milliseconds since UNIX epoch */ -extern sh_time_t main_now_seconds; /* Current time in seconds since the epoch */ -extern uns main_shutdown; -extern clist main_timer_list, main_file_list, main_hook_list, main_process_list; - -/* User-defined fields are marked with [*], all other fields must be initialized to zero. */ - -/* Timers */ - -struct main_timer { - cnode n; - timestamp_t expires; - void (*handler)(struct main_timer *tm); /* [*] Function to be called when the timer expires. Must re-add/del the timer.*/ - void *data; /* [*] Data for use by the handler */ -}; - -void timer_add(struct main_timer *tm, timestamp_t expires); /* Can modify a running timer, too */ -void timer_del(struct main_timer *tm); - -void main_get_time(void); /* Refresh main_now */ - -/* Files to poll */ - -struct main_file { - cnode n; - int fd; /* [*] File descriptor */ - int (*read_handler)(struct main_file *fi); /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */ - int (*write_handler)(struct main_file *fi); - void (*error_handler)(struct main_file *fi, int cause); /* [*] Handler to call on errors */ - void *data; /* [*] Data for use by the handlers */ - byte *rbuf; /* Read/write pointers for use by file_read/write */ - uns rpos, rlen; - byte *wbuf; - uns wpos, wlen; - void (*read_done)(struct main_file *fi); /* [*] Called when file_read is finished; rpos < rlen if EOF */ - void (*write_done)(struct main_file *fi); /* [*] Called when file_write is finished */ - struct main_timer timer; - struct pollfd *pollfd; -}; - -enum main_file_err_cause { - MFERR_READ, - MFERR_WRITE, - MFERR_TIMEOUT -}; - -void file_add(struct main_file *fi); -void file_chg(struct main_file *fi); -void file_del(struct main_file *fi); -void file_read(struct main_file *fi, void *buf, uns len); -void file_write(struct main_file *fi, void *buf, uns len); -void file_set_timeout(struct main_file *fi, timestamp_t expires); -void file_close_all(void); /* Close all known main_file's; frequently used before fork() */ - -/* Hooks to be called in each iteration of the main loop */ - -struct main_hook { - cnode n; - int (*handler)(struct main_hook *ho); /* [*] Hook function; returns HOOK_xxx */ - void *data; /* [*] For use by the handler */ -}; - -enum main_hook_return { - HOOK_IDLE, /* Call again when the main loop becomes idle again */ - HOOK_RETRY, /* Call again as soon as possible */ - HOOK_DONE = -1, /* Shut down the main loop if all hooks return this value */ - HOOK_SHUTDOWN = -2 /* Shut down the main loop immediately */ -}; - -void hook_add(struct main_hook *ho); -void hook_del(struct main_hook *ho); - -/* Processes to watch */ - -struct main_process { - cnode n; - int pid; /* Process id (0=not running) */ - int status; /* Exit status (-1=fork failed) */ - char status_msg[EXIT_STATUS_MSG_SIZE]; - void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */ - void *data; /* [*] For use by the handler */ -}; - -void process_add(struct main_process *mp); -void process_del(struct main_process *mp); -int process_fork(struct main_process *mp); - -/* The main loop */ - -void main_init(void); -void main_loop(void); -void main_debug(void); - -#endif diff --git a/lib/md5.c b/lib/md5.c deleted file mode 100644 index ba8ff56..0000000 --- a/lib/md5.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * This code implements the MD5 message-digest algorithm. - * The algorithm is due to Ron Rivest. This code was - * written by Colin Plumb in 1993, no copyright is claimed. - * This code is in the public domain; do with it what you wish. - * - * Equivalent code is available from RSA Data Security, Inc. - * This code has been tested against that, and is equivalent, - * except that you don't need to include two pages of legalese - * with every copy. - * - * To compute the message digest of a chunk of bytes, declare an - * MD5Context structure, pass it to MD5Init, call MD5Update as - * needed on buffers full of bytes, and then call MD5Final, which - * will fill a supplied 16-byte array with the digest. - */ - -#include "lib/lib.h" -#include "lib/md5.h" - -#include /* for memcpy() */ - -#ifdef CPU_LITTLE_ENDIAN -#define byteReverse(buf, len) /* Nothing */ -#else -void byteReverse(unsigned char *buf, unsigned longs); - -/* - * Note: this code is harmless on little-endian machines. - */ -void byteReverse(unsigned char *buf, unsigned longs) -{ - uint32 t; - do { - t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | - ((unsigned) buf[1] << 8 | buf[0]); - *(uint32 *) buf = t; - buf += 4; - } while (--longs); -} -#endif - -/* - * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious - * initialization constants. - */ -void MD5Init(struct MD5Context *ctx) -{ - ctx->buf[0] = 0x67452301; - ctx->buf[1] = 0xefcdab89; - ctx->buf[2] = 0x98badcfe; - ctx->buf[3] = 0x10325476; - - ctx->bits[0] = 0; - ctx->bits[1] = 0; -} - -/* - * Update context to reflect the concatenation of another buffer full - * of bytes. - */ -void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) -{ - uint32 t; - - /* Update bitcount */ - - t = ctx->bits[0]; - if ((ctx->bits[0] = t + ((uint32) len << 3)) < t) - ctx->bits[1]++; /* Carry from low to high */ - ctx->bits[1] += len >> 29; - - t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ - - /* Handle any leading odd-sized chunks */ - - if (t) { - unsigned char *p = (unsigned char *) ctx->in + t; - - t = 64 - t; - if (len < t) { - memcpy(p, buf, len); - return; - } - memcpy(p, buf, t); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *) ctx->in); - buf += t; - len -= t; - } - /* Process data in 64-byte chunks */ - - while (len >= 64) { - memcpy(ctx->in, buf, 64); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *) ctx->in); - buf += 64; - len -= 64; - } - - /* Handle any remaining bytes of data. */ - - memcpy(ctx->in, buf, len); -} - -/* - * Final wrapup - pad to 64-byte boundary with the bit pattern - * 1 0* (64-bit count of bits processed, MSB-first) - */ -void MD5Final(unsigned char digest[16], struct MD5Context *ctx) -{ - unsigned count; - unsigned char *p; - - /* Compute number of bytes mod 64 */ - count = (ctx->bits[0] >> 3) & 0x3F; - - /* Set the first char of padding to 0x80. This is safe since there is - always at least one byte free */ - p = ctx->in + count; - *p++ = 0x80; - - /* Bytes of padding needed to make 64 bytes */ - count = 64 - 1 - count; - - /* Pad out to 56 mod 64 */ - if (count < 8) { - /* Two lots of padding: Pad the first block to 64 bytes */ - memset(p, 0, count); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *) ctx->in); - - /* Now fill the next block with 56 bytes */ - memset(ctx->in, 0, 56); - } else { - /* Pad block to 56 bytes */ - memset(p, 0, count - 8); - } - byteReverse(ctx->in, 14); - - /* Append length in bits and transform */ - ((uint32 *) ctx->in)[14] = ctx->bits[0]; - ((uint32 *) ctx->in)[15] = ctx->bits[1]; - - MD5Transform(ctx->buf, (uint32 *) ctx->in); - byteReverse((unsigned char *) ctx->buf, 4); - memcpy(digest, ctx->buf, 16); - memset((char *) ctx, 0, sizeof(ctx)); /* In case it's sensitive */ -} - -/* The four core functions - F1 is optimized somewhat */ - -/* #define F1(x, y, z) (x & y | ~x & z) */ -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -/* This is the central step in the MD5 algorithm. */ -#define MD5STEP(f, w, x, y, z, data, s) \ - ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) - -/* - * The core of the MD5 algorithm, this alters an existing MD5 hash to - * reflect the addition of 16 longwords of new data. MD5Update blocks - * the data and converts bytes into longwords for this routine. - */ -void MD5Transform(uint32 buf[4], uint32 const in[16]) -{ - uint32 a, b, c, d; - - a = buf[0]; - b = buf[1]; - c = buf[2]; - d = buf[3]; - - MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); - - buf[0] += a; - buf[1] += b; - buf[2] += c; - buf[3] += d; -} diff --git a/lib/md5.h b/lib/md5.h deleted file mode 100644 index be51a52..0000000 --- a/lib/md5.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * UCW Library -- MD5 Message Digest - * - * This file is in public domain (see lib/md5.c). - */ - -#ifndef _UCW_MD5_H -#define _UCW_MD5_H - -typedef u32 uint32; - -struct MD5Context { - uint32 buf[4]; - uint32 bits[2]; - unsigned char in[64]; -}; - -void MD5Init(struct MD5Context *context); -void MD5Update(struct MD5Context *context, unsigned char const *buf, - unsigned len); -void MD5Final(unsigned char digest[16], struct MD5Context *context); -void MD5Transform(uint32 buf[4], uint32 const in[16]); - -#endif /* !_UCW_MD5_H */ diff --git a/lib/md5hex.c b/lib/md5hex.c deleted file mode 100644 index 93987b0..0000000 --- a/lib/md5hex.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * UCW Library -- MD5 Binary <-> Hex Conversions - * - * (c) 1997 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/chartype.h" - -#include - -void -md5_to_hex(const byte *s, char *d) -{ - int i; - for(i=0; i - * (c) 2007 Pavel Charvat - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_POOLS_H -#define _UCW_POOLS_H - -/* Memory pool state (see mp_push(), ...) */ -struct mempool_state { - uns free[2]; - void *last[2]; - struct mempool_state *next; -}; - -/* Memory pool */ -struct mempool { - struct mempool_state state; - void *unused, *last_big; - uns chunk_size, threshold, idx; -}; - -/* Statistics (see mp_stats()) */ -struct mempool_stats { - uns total_size; /* Real allocated size in bytes */ - uns chain_count[3]; /* Number of allocated chunks in small/big/unused chains */ - uns chain_size[3]; /* Size of allocated chunks in small/big/unused chains */ -}; - -/* Initialize a given mempool structure. Chunk size must be in the interval [1, UINT_MAX / 2] */ -void mp_init(struct mempool *pool, uns chunk_size); - -/* Allocate and initialize a new memory pool. See mp_init for chunk size limitations. */ -struct mempool *mp_new(uns chunk_size); - -/* Cleanup mempool initialized by mp_init or mp_new */ -void mp_delete(struct mempool *pool); - -/* Free all data on a memory pool (saves some empty chunks for later allocations) */ -void mp_flush(struct mempool *pool); - -/* Compute some statistics for debug purposes. See the definition of the mempool_stats structure. */ -void mp_stats(struct mempool *pool, struct mempool_stats *stats); - - -/*** Allocation routines ***/ - -/* For internal use only, do not call directly */ -void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC; - -/* The function allocates new bytes on a given memory pool. - * If the is zero, the resulting pointer is undefined, - * but it may be safely reallocated or used as the parameter - * to other functions below. - * - * The resulting pointer is always aligned to a multiple of - * CPU_STRUCT_ALIGN bytes and this condition remains true also - * after future reallocations. - */ -void *mp_alloc(struct mempool *pool, uns size); - -/* The same as mp_alloc, but the result may not be aligned */ -void *mp_alloc_noalign(struct mempool *pool, uns size); - -/* The same as mp_alloc, but fills the newly allocated data with zeroes */ -void *mp_alloc_zero(struct mempool *pool, uns size); - -/* Inlined version of mp_alloc() */ -static inline void * -mp_alloc_fast(struct mempool *pool, uns size) -{ - uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1); - if (size <= avail) - { - pool->state.free[0] = avail - size; - return pool->state.last[0] - avail; - } - else - return mp_alloc_internal(pool, size); -} - -/* Inlined version of mp_alloc_noalign() */ -static inline void * -mp_alloc_fast_noalign(struct mempool *pool, uns size) -{ - if (size <= pool->state.free[0]) - { - void *ptr = pool->state.last[0] - pool->state.free[0]; - pool->state.free[0] -= size; - return ptr; - } - else - return mp_alloc_internal(pool, size); -} - - -/*** Usage as a growing buffer ***/ - -/* For internal use only, do not call directly */ -void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC; -void *mp_grow_internal(struct mempool *pool, uns size); -void *mp_spread_internal(struct mempool *pool, void *p, uns size); - -static inline uns -mp_idx(struct mempool *pool, void *ptr) -{ - return ptr == pool->last_big; -} - -/* Open a new growing buffer (at least bytes long). - * If the is zero, the resulting pointer is undefined, - * but it may be safely reallocated or used as the parameter - * to other functions below. - * - * The resulting pointer is always aligned to a multiple of - * CPU_STRUCT_ALIGN bytes and this condition remains true also - * after future reallocations. There is an unaligned version as well. - * - * Keep in mind that you can't make any other allocations - * before you "close" the growing buffer with mp_end(). - */ -void *mp_start(struct mempool *pool, uns size); -void *mp_start_noalign(struct mempool *pool, uns size); - -/* Inlined version of mp_start() */ -static inline void * -mp_start_fast(struct mempool *pool, uns size) -{ - uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1); - if (size <= avail) - { - pool->idx = 0; - pool->state.free[0] = avail; - return pool->state.last[0] - avail; - } - else - return mp_start_internal(pool, size); -} - -/* Inlined version of mp_start_noalign() */ -static inline void * -mp_start_fast_noalign(struct mempool *pool, uns size) -{ - if (size <= pool->state.free[0]) - { - pool->idx = 0; - return pool->state.last[0] - pool->state.free[0]; - } - else - return mp_start_internal(pool, size); -} - -/* Return start pointer of the growing buffer allocated by mp_start() or a similar function */ -static inline void * -mp_ptr(struct mempool *pool) -{ - return pool->state.last[pool->idx] - pool->state.free[pool->idx]; -} - -/* Return the number of bytes available for extending the growing buffer */ -static inline uns -mp_avail(struct mempool *pool) -{ - return pool->state.free[pool->idx]; -} - -/* Grow the buffer allocated by mp_start() to be at least bytes long - * ( may be less than mp_avail(), even zero). Reallocated buffer may - * change its starting position. The content will be unchanged to the minimum - * of the old and new sizes; newly allocated memory will be uninitialized. - * Multiple calls to mp_grow have amortized linear cost wrt. the maximum value of . */ -static inline void * -mp_grow(struct mempool *pool, uns size) -{ - return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size); -} - -/* Grow the buffer by at least one byte -- equivalent to mp_grow(pool, mp_avail(pool) + 1) */ -static inline void * -mp_expand(struct mempool *pool) -{ - return mp_grow_internal(pool, mp_avail(pool) + 1); -} - -/* Ensure that there is at least bytes free after

, if not, reallocate and adjust

. */ -static inline void * -mp_spread(struct mempool *pool, void *p, uns size) -{ - return (((uns)(pool->state.last[pool->idx] - p) >= size) ? p : mp_spread_internal(pool, p, size)); -} - -/* Close the growing buffer. The must point just behind the data, you want to keep - * allocated (so it can be in the interval [mp_ptr(pool), mp_ptr(pool) + mp_avail(pool)]). - * Returns a pointer to the beginning of the just closed block. */ -static inline void * -mp_end(struct mempool *pool, void *end) -{ - void *p = mp_ptr(pool); - pool->state.free[pool->idx] = pool->state.last[pool->idx] - end; - return p; -} - -/* Return size in bytes of the last allocated memory block (with mp_alloc*() or mp_end()). */ -static inline uns -mp_size(struct mempool *pool, void *ptr) -{ - uns idx = mp_idx(pool, ptr); - return pool->state.last[idx] - ptr - pool->state.free[idx]; -} - -/* Open the last memory block (allocated with mp_alloc*() or mp_end()) - * for growing and return its size in bytes. The contents and the start pointer - * remain unchanged. Do not forget to call mp_end() to close it. */ -uns mp_open(struct mempool *pool, void *ptr); - -/* Inlined version of mp_open() */ -static inline uns -mp_open_fast(struct mempool *pool, void *ptr) -{ - pool->idx = mp_idx(pool, ptr); - uns size = pool->state.last[pool->idx] - ptr - pool->state.free[pool->idx]; - pool->state.free[pool->idx] += size; - return size; -} - -/* Reallocate the last memory block (allocated with mp_alloc*() or mp_end()) - * to the new . Behavior is similar to mp_grow(), but the resulting - * block is closed. */ -void *mp_realloc(struct mempool *pool, void *ptr, uns size); - -/* The same as mp_realloc(), but fills the additional bytes (if any) with zeroes */ -void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size); - -/* Inlined version of mp_realloc() */ -static inline void * -mp_realloc_fast(struct mempool *pool, void *ptr, uns size) -{ - mp_open_fast(pool, ptr); - ptr = mp_grow(pool, size); - mp_end(pool, ptr + size); - return ptr; -} - - -/*** Usage as a stack ***/ - -/* Save the current state of a memory pool. - * Do not call this function with an opened growing buffer. */ -static inline void -mp_save(struct mempool *pool, struct mempool_state *state) -{ - *state = pool->state; - pool->state.next = state; -} - -/* Save the current state to a newly allocated mempool_state structure. - * Do not call this function with an opened growing buffer. */ -struct mempool_state *mp_push(struct mempool *pool); - -/* Restore the state saved by mp_save() or mp_push() and free all - * data allocated after that point (including the state structure itself). - * You can't reallocate the last memory block from the saved state. */ -void mp_restore(struct mempool *pool, struct mempool_state *state); - -/* Restore the state saved by the last call to mp_push(). - * mp_pop() and mp_push() works as a stack so you can push more states safely. */ -void mp_pop(struct mempool *pool); - - -/*** mempool-str.c ***/ - -char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC; -void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC; -char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK; -static inline char * LIKE_MALLOC -mp_strcat(struct mempool *mp, const char *x, const char *y) -{ - return mp_multicat(mp, x, y, NULL); -} -char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC; - - -/*** mempool-fmt.c ***/ - -char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC; -char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC; -char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4); -char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args); - -#endif diff --git a/lib/pagecache.c b/lib/pagecache.c deleted file mode 100644 index 13ad366..0000000 --- a/lib/pagecache.c +++ /dev/null @@ -1,429 +0,0 @@ -/* - * UCW Library -- File Page Cache - * - * (c) 1999--2002 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/pagecache.h" -#include "lib/lfs.h" - -#include -#include -#include -#include -#include -#include - -struct page_cache { - list free_pages; /* LRU queue of free non-dirty pages */ - list locked_pages; /* List of locked pages (starts with dirty ones) */ - list dirty_pages; /* List of free dirty pages */ - uns page_size; /* Bytes per page (must be a power of two) */ - uns free_count; /* Number of free / dirty pages */ - uns total_count; /* Total number of pages */ - uns max_pages; /* Maximum number of free pages */ - uns hash_size; /* Hash table size */ - uns stat_hit; /* Number of cache hits */ - uns stat_miss; /* Number of cache misses */ - uns stat_write; /* Number of writes */ - list *hash_table; /* List heads corresponding to hash buckets */ -#ifndef HAVE_PREAD - sh_off_t pos; /* Current position in the file */ - int pos_fd; /* FD the position corresponds to */ -#endif -}; - -#define PAGE_NUMBER(pos) ((pos) & ~(sh_off_t)(c->page_size - 1)) -#define PAGE_OFFSET(pos) ((pos) & (c->page_size - 1)) - -struct page_cache * -pgc_open(uns page_size, uns max_pages) -{ - struct page_cache *c = xmalloc_zero(sizeof(struct page_cache)); - uns i; - - init_list(&c->free_pages); - init_list(&c->locked_pages); - init_list(&c->dirty_pages); - c->page_size = page_size; - c->max_pages = max_pages; - c->hash_size = nextprime(c->max_pages); - c->hash_table = xmalloc(sizeof(list) * c->hash_size); - for(i=0; ihash_size; i++) - init_list(&c->hash_table[i]); -#ifndef HAVE_PREAD - c->pos_fd = -1; -#endif - return c; -} - -void -pgc_close(struct page_cache *c) -{ - pgc_cleanup(c); - ASSERT(EMPTY_LIST(c->locked_pages)); - ASSERT(EMPTY_LIST(c->dirty_pages)); - ASSERT(EMPTY_LIST(c->free_pages)); - xfree(c->hash_table); - xfree(c); -} - -static void -pgc_debug_page(struct page *p) -{ - printf("\tp=%08x d=%d f=%x c=%d\n", (uns) p->pos, p->fd, p->flags, p->lock_count); -} - -void -pgc_debug(struct page_cache *c, int mode) -{ - struct page *p; - - printf(">> Page cache dump: pgsize=%d, pages=%d, freepages=%d of %d, hash=%d\n", c->page_size, c->total_count, c->free_count, c->max_pages, c->hash_size); - printf(">> stats: %d hits, %d misses, %d writes\n", c->stat_hit, c->stat_miss, c->stat_write); - if (mode) - { - puts("LRU list:"); - WALK_LIST(p, c->free_pages) - pgc_debug_page(p); - puts("Locked list:"); - WALK_LIST(p, c->locked_pages) - pgc_debug_page(p); - puts("Dirty list:"); - WALK_LIST(p, c->dirty_pages) - pgc_debug_page(p); - } -} - -static void -flush_page(struct page_cache *c, struct page *p) -{ - int s; - - ASSERT(p->flags & PG_FLAG_DIRTY); -#ifdef HAVE_PREAD - s = sh_pwrite(p->fd, p->data, c->page_size, p->pos); -#else - if (c->pos != p->pos || c->pos_fd != (int) p->fd) - sh_seek(p->fd, p->pos, SEEK_SET); - s = write(p->fd, p->data, c->page_size); - c->pos = p->pos + s; - c->pos_fd = p->fd; -#endif - if (s < 0) - die("pgc_write(%d): %m", p->fd); - if (s != (int) c->page_size) - die("pgc_write(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size); - p->flags &= ~PG_FLAG_DIRTY; - c->stat_write++; -} - -static int -flush_cmp(const void *X, const void *Y) -{ - struct page *x = *((struct page **)X); - struct page *y = *((struct page **)Y); - - if (x->fd < y->fd) - return -1; - if (x->fd > y->fd) - return 1; - if (x->pos < y->pos) - return -1; - if (x->pos > y->pos) - return 1; - return 0; -} - -static void -flush_pages(struct page_cache *c, uns force) -{ - uns cnt = 0; - uns max = force ? ~0U : c->free_count / 2; - uns i; - struct page *p, *q, **req, **rr; - - WALK_LIST(p, c->dirty_pages) - { - cnt++; - if (cnt >= max) - break; - } - req = rr = alloca(cnt * sizeof(struct page *)); - i = cnt; - p = HEAD(c->dirty_pages); - while ((q = (struct page *) p->n.next) && i--) - { - rem_node(&p->n); - add_tail(&c->free_pages, &p->n); - *rr++ = p; - p = q; - } - qsort(req, cnt, sizeof(struct page *), flush_cmp); - for(i=0; ihash_size; -} - -static struct page * -get_page(struct page_cache *c, sh_off_t pos, uns fd) -{ - node *n; - struct page *p; - uns hash = hash_page(c, pos, fd); - - /* - * Return locked buffer for given page. - */ - - WALK_LIST(n, c->hash_table[hash]) - { - p = SKIP_BACK(struct page, hn, n); - if (p->pos == pos && p->fd == fd) - { - /* Found in the cache */ - rem_node(&p->n); - if (!p->lock_count) - c->free_count--; - return p; - } - } - if (c->total_count < c->max_pages || !c->free_count) - { - /* Enough free space, expand the cache */ - p = xmalloc(sizeof(struct page) + c->page_size); - c->total_count++; - } - else - { - /* Discard the oldest unlocked page */ - p = HEAD(c->free_pages); - if (!p->n.next) - { - /* There are only dirty pages here */ - flush_pages(c, 0); - p = HEAD(c->free_pages); - ASSERT(p->n.next); - } - ASSERT(!p->lock_count); - rem_node(&p->n); - rem_node(&p->hn); - c->free_count--; - } - p->pos = pos; - p->fd = fd; - p->flags = 0; - p->lock_count = 0; - add_tail(&c->hash_table[hash], &p->hn); - return p; -} - -void -pgc_flush(struct page_cache *c) -{ - struct page *p; - - flush_pages(c, 1); - WALK_LIST(p, c->locked_pages) - if (p->flags & PG_FLAG_DIRTY) - flush_page(c, p); - else - break; -} - -void -pgc_cleanup(struct page_cache *c) -{ - struct page *p; - node *n; - - pgc_flush(c); - WALK_LIST_DELSAFE(p, n, c->free_pages) - { - ASSERT(!(p->flags & PG_FLAG_DIRTY) && !p->lock_count); - rem_node(&p->n); - rem_node(&p->hn); - c->free_count--; - c->total_count--; - xfree(p); - } - ASSERT(!c->free_count); -} - -static inline struct page * -get_and_lock_page(struct page_cache *c, sh_off_t pos, uns fd) -{ - struct page *p = get_page(c, pos, fd); - - add_tail(&c->locked_pages, &p->n); - p->lock_count++; - return p; -} - -struct page * -pgc_read(struct page_cache *c, int fd, sh_off_t pos) -{ - struct page *p; - int s; - - ASSERT(!PAGE_OFFSET(pos)); - p = get_and_lock_page(c, pos, fd); - if (p->flags & PG_FLAG_VALID) - c->stat_hit++; - else - { - c->stat_miss++; -#ifdef HAVE_PREAD - s = sh_pread(fd, p->data, c->page_size, pos); -#else - if (c->pos != pos || c->pos_fd != (int)fd) - sh_seek(fd, pos, SEEK_SET); - s = read(fd, p->data, c->page_size); - c->pos = pos + s; - c->pos_fd = fd; -#endif - if (s < 0) - die("pgc_read(%d): %m", fd); - if (s != (int) c->page_size) - die("pgc_read(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size); - p->flags |= PG_FLAG_VALID; - } - return p; -} - -struct page * -pgc_get(struct page_cache *c, int fd, sh_off_t pos) -{ - struct page *p; - - ASSERT(!PAGE_OFFSET(pos)); - p = get_and_lock_page(c, pos, fd); - p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY; - return p; -} - -struct page * -pgc_get_zero(struct page_cache *c, int fd, sh_off_t pos) -{ - struct page *p; - - ASSERT(!PAGE_OFFSET(pos)); - p = get_and_lock_page(c, pos, fd); - bzero(p->data, c->page_size); - p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY; - return p; -} - -void -pgc_put(struct page_cache *c, struct page *p) -{ - ASSERT(p->lock_count); - if (--p->lock_count) - return; - rem_node(&p->n); - if (p->flags & PG_FLAG_DIRTY) - { - add_tail(&c->dirty_pages, &p->n); - c->free_count++; - } - else if (c->free_count < c->max_pages) - { - add_tail(&c->free_pages, &p->n); - c->free_count++; - } - else - { - rem_node(&p->hn); - xfree(p); - c->total_count--; - } -} - -void -pgc_mark_dirty(struct page_cache *c, struct page *p) -{ - ASSERT(p->lock_count); - if (!(p->flags & PG_FLAG_DIRTY)) - { - p->flags |= PG_FLAG_DIRTY; - rem_node(&p->n); - add_head(&c->locked_pages, &p->n); - } -} - -byte * -pgc_read_data(struct page_cache *c, int fd, sh_off_t pos, uns *len) -{ - struct page *p; - sh_off_t page = PAGE_NUMBER(pos); - uns offset = PAGE_OFFSET(pos); - - p = pgc_read(c, fd, page); - pgc_put(c, p); - *len = c->page_size - offset; - return p->data + offset; -} - -#ifdef TEST - -int main(int argc, char **argv) -{ - struct page_cache *c = pgc_open(1024, 2); - struct page *p, *q, *r; - int fd = open("test", O_RDWR | O_CREAT | O_TRUNC, 0666); - if (fd < 0) - die("open: %m"); - pgc_debug(c, 1); - p = pgc_get(c, fd, 0); - pgc_debug(c, 1); - strcpy(p->data, "one"); - pgc_put(c, p); - pgc_debug(c, 1); - p = pgc_get(c, fd, 1024); - pgc_debug(c, 1); - strcpy(p->data, "two"); - pgc_put(c, p); - pgc_debug(c, 1); - p = pgc_get(c, fd, 2048); - pgc_debug(c, 1); - strcpy(p->data, "three"); - pgc_put(c, p); - pgc_debug(c, 1); - pgc_flush(c); - pgc_debug(c, 1); - p = pgc_read(c, fd, 0); - pgc_debug(c, 1); - strcpy(p->data, "odin"); - pgc_mark_dirty(c, p); - pgc_debug(c, 1); - pgc_flush(c); - pgc_debug(c, 1); - q = pgc_read(c, fd, 1024); - pgc_debug(c, 1); - r = pgc_read(c, fd, 2048); - pgc_debug(c, 1); - pgc_put(c, p); - pgc_put(c, q); - pgc_put(c, r); - pgc_debug(c, 1); - p = pgc_get(c, fd, 3072); - pgc_debug(c, 1); - strcpy(p->data, "four"); - pgc_put(c, p); - pgc_debug(c, 1); - pgc_cleanup(c); - pgc_debug(c, 1); - pgc_close(c); - return 0; -} - -#endif diff --git a/lib/pagecache.h b/lib/pagecache.h deleted file mode 100644 index ef4bf5a..0000000 --- a/lib/pagecache.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * UCW Library -- File Page Cache - * - * (c) 1999--2002 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_PAGECACHE_H -#define _UCW_PAGECACHE_H - -#include "lib/lists.h" - -struct page_cache; - -struct page { - node n; /* Node in page list */ - node hn; /* Node in hash table */ - sh_off_t pos; - uns fd; - uns flags; - uns lock_count; - byte data[0]; -}; - -#define PG_FLAG_DIRTY 1 -#define PG_FLAG_VALID 2 - -struct page_cache *pgc_open(uns page_size, uns max_pages); -void pgc_close(struct page_cache *); -void pgc_debug(struct page_cache *, int mode); -void pgc_flush(struct page_cache *); /* Write all unwritten pages */ -void pgc_cleanup(struct page_cache *); /* Deallocate all unused buffers */ -struct page *pgc_read(struct page_cache *, int fd, sh_off_t); /* Read page and lock it */ -struct page *pgc_get(struct page_cache *, int fd, sh_off_t); /* Get page for writing */ -struct page *pgc_get_zero(struct page_cache *, int fd, sh_off_t); /* ... and clear it */ -void pgc_put(struct page_cache *, struct page *); /* Release page */ -void pgc_mark_dirty(struct page_cache *, struct page *); /* Mark locked page as dirty */ -byte *pgc_read_data(struct page_cache *, int fd, sh_off_t, uns *); /* Partial reading */ - -#endif diff --git a/lib/perl/Filelock/Makefile b/lib/perl/Filelock/Makefile deleted file mode 100644 index de94cc0..0000000 --- a/lib/perl/Filelock/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# Makefile for the Filelock Perl module (c) 2007 Pavel Chrvat - -DIRS+=lib/perl/Filelock/arch/auto/UCW/Filelock -FILELOCK_DIR=lib/perl/Filelock - -PROGS+=$(o)/lib/perl/Filelock/Filelock.pm - -extras:: $(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT) - -$(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile - $(M)MAKE $@ - $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT) - $(Q)touch $@ - $(Q)cp $@ run/$(DATADIR)/ - -$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL - $(M)PREPARE $@ - $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL - -$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs - $(Q)cp $^ $@ - -$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL - $(Q)cp $^ $@ diff --git a/lib/perl/Makefile b/lib/perl/Makefile deleted file mode 100644 index 197cb9c..0000000 --- a/lib/perl/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# Perl modules - -DIRS+=lib/perl -EXTRA_RUNDIRS+=lib/perl5/UCW -PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm CGI.pm) - -ifdef CONFIG_UCW_PERL_MODULES -include $(s)/lib/perl/Ulimit/Makefile -include $(s)/lib/perl/Filelock/Makefile -endif diff --git a/lib/perl/Ulimit/Makefile b/lib/perl/Ulimit/Makefile deleted file mode 100644 index 8fc355b..0000000 --- a/lib/perl/Ulimit/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# Makefile for the Ulimit Perl module (c) 2003 Tomas Valla - -DIRS+=lib/perl/Ulimit/arch/auto/UCW/Ulimit -ULIMIT_DIR=lib/perl/Ulimit - -PROGS+=$(o)/lib/perl/Ulimit/Ulimit.pm - -extras:: $(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT) - -$(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile - $(M)MAKE $@ - $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT) - $(Q)touch $@ - $(Q)cp $@ run/$(DATADIR)/ - -$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL - $(M)PREPARE $@ - $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL - -$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs - $(Q)cp $^ $@ - -$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL - $(Q)cp $^ $@ diff --git a/lib/regex/Makefile b/lib/regex/Makefile deleted file mode 100644 index 730b249..0000000 --- a/lib/regex/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# Makefile for the UCW Regex Library (c) 2004 Martin Mares - -DIRS+=lib/regex - -LIBUCW_MODS+=regex/regex - -$(o)/lib/regex/regex.o $(o)/lib/regex/regex.oo: CWARNS= diff --git a/lib/regex/README b/lib/regex/README deleted file mode 100644 index f8c2605..0000000 --- a/lib/regex/README +++ /dev/null @@ -1,9 +0,0 @@ -This directory contains regular expression routines from the GNU libc 2.3.2 -which are significantly faster than the default regex libraries on most systems. - -They are distributed under the GNU LGPL. - -All files are exact copies of the original distribution, I only provided my -own regex.c, regex-sh.h and Makefile. - - Martin Mares, March 2004 diff --git a/lib/regex/regcomp.c b/lib/regex/regcomp.c deleted file mode 100644 index f25ecae..0000000 --- a/lib/regex/regcomp.c +++ /dev/null @@ -1,3544 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, - int length, reg_syntax_t syntax); -static void re_compile_fastmap_iter (regex_t *bufp, - const re_dfastate_t *init_state, - char *fastmap); -static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); -static reg_errcode_t init_word_char (re_dfa_t *dfa); -#ifdef RE_ENABLE_I18N -static void free_charset (re_charset_t *cset); -#endif /* RE_ENABLE_I18N */ -static void free_workarea_compile (regex_t *preg); -static reg_errcode_t create_initial_state (re_dfa_t *dfa); -static reg_errcode_t analyze (re_dfa_t *dfa); -static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node); -static void calc_first (re_dfa_t *dfa, bin_tree_t *node); -static void calc_next (re_dfa_t *dfa, bin_tree_t *node); -static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); -static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, - int top_clone_node, int root_node, - unsigned int constraint); -static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, - unsigned int constraint); -static int search_duplicated_node (re_dfa_t *dfa, int org_node, - unsigned int constraint); -static reg_errcode_t calc_eclosure (re_dfa_t *dfa); -static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, - int node, int root); -static void calc_inveclosure (re_dfa_t *dfa); -static int fetch_number (re_string_t *input, re_token_t *token, - reg_syntax_t syntax); -static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax); -static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); -static int peek_token_bracket (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); -static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, - re_dfa_t *dfa, re_token_t *token, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, - re_token_t *token, reg_syntax_t syntax, - reg_errcode_t *err); -static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token, int token_len, - re_dfa_t *dfa, - reg_syntax_t syntax); -static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token); -#ifndef _LIBC -# ifdef RE_ENABLE_I18N -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, int *range_alloc, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *coll_sym_alloc, - const unsigned char *name); -# else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - const unsigned char *name); -# endif /* not RE_ENABLE_I18N */ -#endif /* not _LIBC */ -#ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *equiv_class_alloc, - const unsigned char *name); -static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *char_class_alloc, - const unsigned char *class_name, - reg_syntax_t syntax); -#else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - const unsigned char *name); -static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, - const unsigned char *class_name, - reg_syntax_t syntax); -#endif /* not RE_ENABLE_I18N */ -static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); -static void free_bin_tree (bin_tree_t *tree); -static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, - re_token_type_t type, int index); -static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ - -const char __re_error_msgid[] attribute_hidden = - { -#define REG_NOERROR_IDX 0 - gettext_noop ("Success") /* REG_NOERROR */ - "\0" -#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") - gettext_noop ("No match") /* REG_NOMATCH */ - "\0" -#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") - gettext_noop ("Invalid regular expression") /* REG_BADPAT */ - "\0" -#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") - gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ - "\0" -#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") - gettext_noop ("Invalid character class name") /* REG_ECTYPE */ - "\0" -#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") - gettext_noop ("Trailing backslash") /* REG_EESCAPE */ - "\0" -#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") - gettext_noop ("Invalid back reference") /* REG_ESUBREG */ - "\0" -#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ - "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") - gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ - "\0" -#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") - gettext_noop ("Unmatched \\{") /* REG_EBRACE */ - "\0" -#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") - gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ - "\0" -#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") - gettext_noop ("Invalid range end") /* REG_ERANGE */ - "\0" -#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") - gettext_noop ("Memory exhausted") /* REG_ESPACE */ - "\0" -#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") - gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ - "\0" -#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") - gettext_noop ("Premature end of regular expression") /* REG_EEND */ - "\0" -#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") - gettext_noop ("Regular expression too big") /* REG_ESIZE */ - "\0" -#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") - gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ - }; - -const size_t __re_error_msgid_idx[] attribute_hidden = - { - REG_NOERROR_IDX, - REG_NOMATCH_IDX, - REG_BADPAT_IDX, - REG_ECOLLATE_IDX, - REG_ECTYPE_IDX, - REG_EESCAPE_IDX, - REG_ESUBREG_IDX, - REG_EBRACK_IDX, - REG_EPAREN_IDX, - REG_EBRACE_IDX, - REG_BADBR_IDX, - REG_ERANGE_IDX, - REG_ESPACE_IDX, - REG_BADRPT_IDX, - REG_EEND_IDX, - REG_ESIZE_IDX, - REG_ERPAREN_IDX - }; - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length LENGTH) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = re_compile_internal (bufp, pattern, length, re_syntax_options); - - if (!ret) - return NULL; - return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} -#ifdef _LIBC -weak_alias (__re_compile_pattern, re_compile_pattern) -#endif - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} -#ifdef _LIBC -weak_alias (__re_set_syntax, re_set_syntax) -#endif - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - char *fastmap = bufp->fastmap; - - memset (fastmap, '\0', sizeof (char) * SBC_MAX); - re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); - if (dfa->init_state != dfa->init_state_word) - re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); - if (dfa->init_state != dfa->init_state_nl) - re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); - if (dfa->init_state != dfa->init_state_begbuf) - re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); - bufp->fastmap_accurate = 1; - return 0; -} -#ifdef _LIBC -weak_alias (__re_compile_fastmap, re_compile_fastmap) -#endif - -static inline void -re_set_fastmap (char *fastmap, int icase, int ch) -{ - fastmap[ch] = 1; - if (icase) - fastmap[tolower (ch)] = 1; -} - -/* Helper function for re_compile_fastmap. - Compile fastmap for the initial_state INIT_STATE. */ - -static void -re_compile_fastmap_iter (bufp, init_state, fastmap) - regex_t *bufp; - const re_dfastate_t *init_state; - char *fastmap; -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - int node_cnt; - int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE)); - for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) - { - int node = init_state->nodes.elems[node_cnt]; - re_token_type_t type = dfa->nodes[node].type; - - if (type == CHARACTER) - re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); - else if (type == SIMPLE_BRACKET) - { - int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) - re_set_fastmap (fastmap, icase, ch); - } -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET) - { - int i; - re_charset_t *cset = dfa->nodes[node].opr.mbcset; - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes - || cset->nranges || cset->nchar_classes) - { -# ifdef _LIBC - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) - { - /* In this case we want to catch the bytes which are - the first byte of any collation elements. - e.g. In da_DK, we want to catch 'a' since "aa" - is a valid collation element, and don't catch - 'b' since 'b' is the only collation element - which starts from 'b'. */ - int j, ch; - const int32_t *table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); - } -# else - if (MB_CUR_MAX > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - re_set_fastmap (fastmap, icase, i); -# endif /* not _LIBC */ - } - for (i = 0; i < cset->nmbchars; ++i) - { - char buf[256]; - mbstate_t state; - memset (&state, '\0', sizeof (state)); - __wcrtomb (buf, cset->mbchars[i], &state); - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); - } - } -#endif /* RE_ENABLE_I18N */ - else if (type == END_OF_RE || type == OP_PERIOD) - { - memset (fastmap, '\1', sizeof (char) * SBC_MAX); - if (type == END_OF_RE) - bufp->can_be_null = 1; - return; - } - } -} - -/* Entry point for POSIX code. */ -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *__restrict preg; - const char *__restrict pattern; - int cflags; -{ - reg_errcode_t ret; - reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED - : RE_SYNTAX_POSIX_BASIC); - - preg->buffer = NULL; - preg->allocated = 0; - preg->used = 0; - - /* Try to allocate space for the fastmap. */ - preg->fastmap = re_malloc (char, SBC_MAX); - if (BE (preg->fastmap == NULL, 0)) - return REG_ESPACE; - - syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - preg->no_sub = !!(cflags & REG_NOSUB); - preg->translate = NULL; - - ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) - ret = REG_EPAREN; - - /* We have already checked preg->fastmap != NULL. */ - if (BE (ret == REG_NOERROR, 1)) - /* Compute the fastmap now, since regexec cannot modify the pattern - buffer. This function nevers fails in this implementation. */ - (void) re_compile_fastmap (preg); - else - { - /* Some error occurred while compiling the expression. */ - re_free (preg->fastmap); - preg->fastmap = NULL; - } - - return (int) ret; -} -#ifdef _LIBC -weak_alias (__regcomp, regcomp) -#endif - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (BE (errcode < 0 - || errcode >= (int) (sizeof (__re_error_msgid_idx) - / sizeof (__re_error_msgid_idx[0])), 0)) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (BE (errbuf_size != 0, 1)) - { - if (BE (msg_size > errbuf_size, 0)) - { -#if defined HAVE_MEMPCPY || defined _LIBC - *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; -#else - memcpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; -#endif - } - else - memcpy (errbuf, msg, msg_size); - } - - return msg_size; -} -#ifdef _LIBC -weak_alias (__regerror, regerror) -#endif - - -static void -free_dfa_content (re_dfa_t *dfa) -{ - int i, j; - - re_free (dfa->subexps); - - for (i = 0; i < dfa->nodes_len; ++i) - { - re_token_t *node = dfa->nodes + i; -#ifdef RE_ENABLE_I18N - if (node->type == COMPLEX_BRACKET && node->duplicated == 0) - free_charset (node->opr.mbcset); - else -#endif /* RE_ENABLE_I18N */ - if (node->type == SIMPLE_BRACKET && node->duplicated == 0) - re_free (node->opr.sbcset); - } - re_free (dfa->nexts); - for (i = 0; i < dfa->nodes_len; ++i) - { - if (dfa->eclosures != NULL) - re_node_set_free (dfa->eclosures + i); - if (dfa->inveclosures != NULL) - re_node_set_free (dfa->inveclosures + i); - if (dfa->edests != NULL) - re_node_set_free (dfa->edests + i); - } - re_free (dfa->edests); - re_free (dfa->eclosures); - re_free (dfa->inveclosures); - re_free (dfa->nodes); - - for (i = 0; i <= dfa->state_hash_mask; ++i) - { - struct re_state_table_entry *entry = dfa->state_table + i; - for (j = 0; j < entry->num; ++j) - { - re_dfastate_t *state = entry->array[j]; - free_state (state); - } - re_free (entry->array); - } - re_free (dfa->state_table); - - if (dfa->word_char != NULL) - re_free (dfa->word_char); -#ifdef DEBUG - re_free (dfa->re_str); -#endif - - re_free (dfa); -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - if (BE (dfa != NULL, 1)) - free_dfa_content (dfa); - - re_free (preg->fastmap); -} -#ifdef _LIBC -weak_alias (__regfree, regfree) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -# ifdef _LIBC -/* Make these definitions weak in libc, so POSIX programs can redefine - these names if they don't use our functions, and still use - regcomp/regexec above without link errors. */ -weak_function -# endif -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - char *fastmap; - - if (!s) - { - if (!re_comp_buf.buffer) - return gettext ("No previous regular expression"); - return 0; - } - - if (re_comp_buf.buffer) - { - fastmap = re_comp_buf.fastmap; - re_comp_buf.fastmap = NULL; - __regfree (&re_comp_buf); - memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); - re_comp_buf.fastmap = fastmap; - } - - if (re_comp_buf.fastmap == NULL) - { - re_comp_buf.fastmap = (char *) malloc (SBC_MAX); - if (re_comp_buf.fastmap == NULL) - return (char *) gettext (__re_error_msgid - + __re_error_msgid_idx[(int) REG_ESPACE]); - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); - - if (!ret) - return NULL; - - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} - -#ifdef _LIBC -libc_freeres_fn (free_mem) -{ - __regfree (&re_comp_buf); -} -#endif - -#endif /* _REGEX_RE_COMP */ - -/* Internal entry point. - Compile the regular expression PATTERN, whose length is LENGTH. - SYNTAX indicate regular expression's syntax. */ - -static reg_errcode_t -re_compile_internal (preg, pattern, length, syntax) - regex_t *preg; - const char * pattern; - int length; - reg_syntax_t syntax; -{ - reg_errcode_t err = REG_NOERROR; - re_dfa_t *dfa; - re_string_t regexp; - - /* Initialize the pattern buffer. */ - preg->fastmap_accurate = 0; - preg->syntax = syntax; - preg->not_bol = preg->not_eol = 0; - preg->used = 0; - preg->re_nsub = 0; - preg->can_be_null = 0; - preg->regs_allocated = REGS_UNALLOCATED; - - /* Initialize the dfa. */ - dfa = (re_dfa_t *) preg->buffer; - if (preg->allocated < sizeof (re_dfa_t)) - { - /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. If ->buffer is NULL this - is a simple allocation. */ - dfa = re_realloc (preg->buffer, re_dfa_t, 1); - if (dfa == NULL) - return REG_ESPACE; - preg->allocated = sizeof (re_dfa_t); - } - preg->buffer = (unsigned char *) dfa; - preg->used = sizeof (re_dfa_t); - - err = init_dfa (dfa, length); - if (BE (err != REG_NOERROR, 0)) - { - re_free (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } -#ifdef DEBUG - dfa->re_str = re_malloc (char, length + 1); - strncpy (dfa->re_str, pattern, length + 1); -#endif - - err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE); - if (BE (err != REG_NOERROR, 0)) - { - re_free (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } - - /* Parse the regular expression, and build a structure tree. */ - preg->re_nsub = 0; - dfa->str_tree = parse (®exp, preg, syntax, &err); - if (BE (dfa->str_tree == NULL, 0)) - goto re_compile_internal_free_return; - - /* Analyze the tree and collect information which is necessary to - create the dfa. */ - err = analyze (dfa); - if (BE (err != REG_NOERROR, 0)) - goto re_compile_internal_free_return; - - /* Then create the initial state of the dfa. */ - err = create_initial_state (dfa); - - /* Release work areas. */ - free_workarea_compile (preg); - re_string_destruct (®exp); - - if (BE (err != REG_NOERROR, 0)) - { - re_compile_internal_free_return: - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - } - - return err; -} - -/* Initialize DFA. We use the length of the regular expression PAT_LEN - as the initial length of some arrays. */ - -static reg_errcode_t -init_dfa (dfa, pat_len) - re_dfa_t *dfa; - int pat_len; -{ - int table_size; - - memset (dfa, '\0', sizeof (re_dfa_t)); - - dfa->nodes_alloc = pat_len + 1; - dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); - - dfa->states_alloc = pat_len + 1; - - /* table_size = 2 ^ ceil(log pat_len) */ - for (table_size = 1; table_size > 0; table_size <<= 1) - if (table_size > pat_len) - break; - - dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); - dfa->state_hash_mask = table_size - 1; - - dfa->subexps_alloc = 1; - dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc); - dfa->word_char = NULL; - - if (BE (dfa->nodes == NULL || dfa->state_table == NULL - || dfa->subexps == NULL, 0)) - { - /* We don't bother to free anything which was allocated. Very - soon the process will go down anyway. */ - dfa->subexps = NULL; - dfa->state_table = NULL; - dfa->nodes = NULL; - return REG_ESPACE; - } - return REG_NOERROR; -} - -/* Initialize WORD_CHAR table, which indicate which character is - "word". In this case "word" means that it is the word construction - character used by some operators like "\<", "\>", etc. */ - -static reg_errcode_t -init_word_char (dfa) - re_dfa_t *dfa; -{ - int i, j, ch; - dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); - if (BE (dfa->word_char == NULL, 0)) - return REG_ESPACE; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1 << j; - return REG_NOERROR; -} - -/* Free the work area which are only used while compiling. */ - -static void -free_workarea_compile (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - free_bin_tree (dfa->str_tree); - dfa->str_tree = NULL; - re_free (dfa->org_indices); - dfa->org_indices = NULL; -} - -/* Create initial states for all contexts. */ - -static reg_errcode_t -create_initial_state (dfa) - re_dfa_t *dfa; -{ - int first, i; - reg_errcode_t err; - re_node_set init_nodes; - - /* Initial states have the epsilon closure of the node which is - the first node of the regular expression. */ - first = dfa->str_tree->first; - dfa->init_node = first; - err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* The back-references which are in initial states can epsilon transit, - since in this case all of the subexpressions can be null. - Then we add epsilon closures of the nodes which are the next nodes of - the back-references. */ - if (dfa->nbackref > 0) - for (i = 0; i < init_nodes.nelem; ++i) - { - int node_idx = init_nodes.elems[i]; - re_token_type_t type = dfa->nodes[node_idx].type; - - int clexp_idx; - if (type != OP_BACK_REF) - continue; - for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) - { - re_token_t *clexp_node; - clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; - if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) - break; - } - if (clexp_idx == init_nodes.nelem) - continue; - - if (type == OP_BACK_REF) - { - int dest_idx = dfa->edests[node_idx].elems[0]; - if (!re_node_set_contains (&init_nodes, dest_idx)) - { - re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); - i = 0; - } - } - } - - /* It must be the first time to invoke acquire_state. */ - dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); - /* We don't check ERR here, since the initial state must not be NULL. */ - if (BE (dfa->init_state == NULL, 0)) - return err; - if (dfa->init_state->has_constraint) - { - dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_WORD); - dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_NEWLINE); - dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, - &init_nodes, - CONTEXT_NEWLINE - | CONTEXT_BEGBUF); - if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return err; - } - else - dfa->init_state_word = dfa->init_state_nl - = dfa->init_state_begbuf = dfa->init_state; - - re_node_set_free (&init_nodes); - return REG_NOERROR; -} - -/* Analyze the structure tree, and calculate "first", "next", "edest", - "eclosure", and "inveclosure". */ - -static reg_errcode_t -analyze (dfa) - re_dfa_t *dfa; -{ - int i; - reg_errcode_t ret; - - /* Allocate arrays. */ - dfa->nexts = re_malloc (int, dfa->nodes_alloc); - dfa->org_indices = re_malloc (int, dfa->nodes_alloc); - dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); - dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); - dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); - if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL - || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) - return REG_ESPACE; - /* Initialize them. */ - for (i = 0; i < dfa->nodes_len; ++i) - { - dfa->nexts[i] = -1; - re_node_set_init_empty (dfa->edests + i); - re_node_set_init_empty (dfa->eclosures + i); - re_node_set_init_empty (dfa->inveclosures + i); - } - - ret = analyze_tree (dfa, dfa->str_tree); - if (BE (ret == REG_NOERROR, 1)) - { - ret = calc_eclosure (dfa); - if (ret == REG_NOERROR) - calc_inveclosure (dfa); - } - return ret; -} - -/* Helper functions for analyze. - This function calculate "first", "next", and "edest" for the subtree - whose root is NODE. */ - -static reg_errcode_t -analyze_tree (dfa, node) - re_dfa_t *dfa; - bin_tree_t *node; -{ - reg_errcode_t ret; - if (node->first == -1) - calc_first (dfa, node); - if (node->next == -1) - calc_next (dfa, node); - if (node->eclosure.nelem == 0) - calc_epsdest (dfa, node); - /* Calculate "first" etc. for the left child. */ - if (node->left != NULL) - { - ret = analyze_tree (dfa, node->left); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - /* Calculate "first" etc. for the right child. */ - if (node->right != NULL) - { - ret = analyze_tree (dfa, node->right); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - return REG_NOERROR; -} - -/* Calculate "first" for the node NODE. */ -static void -calc_first (dfa, node) - re_dfa_t *dfa; - bin_tree_t *node; -{ - int idx, type; - idx = node->node_idx; - type = (node->type == 0) ? dfa->nodes[idx].type : node->type; - - switch (type) - { -#ifdef DEBUG - case OP_OPEN_BRACKET: - case OP_CLOSE_BRACKET: - case OP_OPEN_DUP_NUM: - case OP_CLOSE_DUP_NUM: - case OP_NON_MATCH_LIST: - case OP_OPEN_COLL_ELEM: - case OP_CLOSE_COLL_ELEM: - case OP_OPEN_EQUIV_CLASS: - case OP_CLOSE_EQUIV_CLASS: - case OP_OPEN_CHAR_CLASS: - case OP_CLOSE_CHAR_CLASS: - /* These must not be appeared here. */ - assert (0); -#endif - case END_OF_RE: - case CHARACTER: - case OP_PERIOD: - case OP_DUP_ASTERISK: - case OP_DUP_QUESTION: -#ifdef RE_ENABLE_I18N - case COMPLEX_BRACKET: -#endif /* RE_ENABLE_I18N */ - case SIMPLE_BRACKET: - case OP_BACK_REF: - case ANCHOR: - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: - node->first = idx; - break; - case OP_DUP_PLUS: -#ifdef DEBUG - assert (node->left != NULL); -#endif - if (node->left->first == -1) - calc_first (dfa, node->left); - node->first = node->left->first; - break; - case OP_ALT: - node->first = idx; - break; - /* else fall through */ - default: -#ifdef DEBUG - assert (node->left != NULL); -#endif - if (node->left->first == -1) - calc_first (dfa, node->left); - node->first = node->left->first; - break; - } -} - -/* Calculate "next" for the node NODE. */ - -static void -calc_next (dfa, node) - re_dfa_t *dfa; - bin_tree_t *node; -{ - int idx, type; - bin_tree_t *parent = node->parent; - if (parent == NULL) - { - node->next = -1; - idx = node->node_idx; - if (node->type == 0) - dfa->nexts[idx] = node->next; - return; - } - - idx = parent->node_idx; - type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type; - - switch (type) - { - case OP_DUP_ASTERISK: - case OP_DUP_PLUS: - node->next = idx; - break; - case CONCAT: - if (parent->left == node) - { - if (parent->right->first == -1) - calc_first (dfa, parent->right); - node->next = parent->right->first; - break; - } - /* else fall through */ - default: - if (parent->next == -1) - calc_next (dfa, parent); - node->next = parent->next; - break; - } - idx = node->node_idx; - if (node->type == 0) - dfa->nexts[idx] = node->next; -} - -/* Calculate "edest" for the node NODE. */ - -static void -calc_epsdest (dfa, node) - re_dfa_t *dfa; - bin_tree_t *node; -{ - int idx; - idx = node->node_idx; - if (node->type == 0) - { - if (dfa->nodes[idx].type == OP_DUP_ASTERISK - || dfa->nodes[idx].type == OP_DUP_PLUS - || dfa->nodes[idx].type == OP_DUP_QUESTION) - { - if (node->left->first == -1) - calc_first (dfa, node->left); - if (node->next == -1) - calc_next (dfa, node); - re_node_set_init_2 (dfa->edests + idx, node->left->first, - node->next); - } - else if (dfa->nodes[idx].type == OP_ALT) - { - int left, right; - if (node->left != NULL) - { - if (node->left->first == -1) - calc_first (dfa, node->left); - left = node->left->first; - } - else - { - if (node->next == -1) - calc_next (dfa, node); - left = node->next; - } - if (node->right != NULL) - { - if (node->right->first == -1) - calc_first (dfa, node->right); - right = node->right->first; - } - else - { - if (node->next == -1) - calc_next (dfa, node); - right = node->next; - } - re_node_set_init_2 (dfa->edests + idx, left, right); - } - else if (dfa->nodes[idx].type == ANCHOR - || dfa->nodes[idx].type == OP_OPEN_SUBEXP - || dfa->nodes[idx].type == OP_CLOSE_SUBEXP - || dfa->nodes[idx].type == OP_BACK_REF) - re_node_set_init_1 (dfa->edests + idx, node->next); - } -} - -/* Duplicate the epsilon closure of the node ROOT_NODE. - Note that duplicated nodes have constraint INIT_CONSTRAINT in addition - to their own constraint. */ - -static reg_errcode_t -duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, - init_constraint) - re_dfa_t *dfa; - int top_org_node, top_clone_node, root_node; - unsigned int init_constraint; -{ - reg_errcode_t err; - int org_node, clone_node, ret; - unsigned int constraint = init_constraint; - for (org_node = top_org_node, clone_node = top_clone_node;;) - { - int org_dest, clone_dest; - if (dfa->nodes[org_node].type == OP_BACK_REF) - { - /* If the back reference epsilon-transit, its destination must - also have the constraint. Then duplicate the epsilon closure - of the destination of the back reference, and store it in - edests of the back reference. */ - org_dest = dfa->nexts[org_node]; - re_node_set_empty (dfa->edests + clone_node); - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - dfa->nexts[clone_node] = dfa->nexts[org_node]; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - else if (dfa->edests[org_node].nelem == 0) - { - /* In case of the node can't epsilon-transit, don't duplicate the - destination and store the original destination as the - destination of the node. */ - dfa->nexts[clone_node] = dfa->nexts[org_node]; - break; - } - else if (dfa->edests[org_node].nelem == 1) - { - /* In case of the node can epsilon-transit, and it has only one - destination. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - if (dfa->nodes[org_node].type == ANCHOR) - { - /* In case of the node has another constraint, append it. */ - if (org_node == root_node && clone_node != org_node) - { - /* ...but if the node is root_node itself, it means the - epsilon closure have a loop, then tie it to the - destination of the root_node. */ - ret = re_node_set_insert (dfa->edests + clone_node, - org_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - break; - } - constraint |= dfa->nodes[org_node].opr.ctx_type; - } - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - else /* dfa->edests[org_node].nelem == 2 */ - { - /* In case of the node can epsilon-transit, and it has two - destinations. E.g. '|', '*', '+', '?'. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - /* Search for a duplicated node which satisfies the constraint. */ - clone_dest = search_duplicated_node (dfa, org_dest, constraint); - if (clone_dest == -1) - { - /* There are no such a duplicated node, create a new one. */ - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - err = duplicate_node_closure (dfa, org_dest, clone_dest, - root_node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - { - /* There are a duplicated node which satisfy the constraint, - use it to avoid infinite loop. */ - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - - org_dest = dfa->edests[org_node].elems[1]; - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - org_node = org_dest; - clone_node = clone_dest; - } - return REG_NOERROR; -} - -/* Search for a node which is duplicated from the node ORG_NODE, and - satisfies the constraint CONSTRAINT. */ - -static int -search_duplicated_node (dfa, org_node, constraint) - re_dfa_t *dfa; - int org_node; - unsigned int constraint; -{ - int idx; - for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) - { - if (org_node == dfa->org_indices[idx] - && constraint == dfa->nodes[idx].constraint) - return idx; /* Found. */ - } - return -1; /* Not found. */ -} - -/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. - The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, - otherwise return the error code. */ - -static reg_errcode_t -duplicate_node (new_idx, dfa, org_idx, constraint) - re_dfa_t *dfa; - int *new_idx, org_idx; - unsigned int constraint; -{ - re_token_t dup; - int dup_idx; - - dup = dfa->nodes[org_idx]; - dup_idx = re_dfa_add_node (dfa, dup, 1); - if (BE (dup_idx == -1, 0)) - return REG_ESPACE; - dfa->nodes[dup_idx].constraint = constraint; - if (dfa->nodes[org_idx].type == ANCHOR) - dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; - dfa->nodes[dup_idx].duplicated = 1; - re_node_set_init_empty (dfa->edests + dup_idx); - re_node_set_init_empty (dfa->eclosures + dup_idx); - re_node_set_init_empty (dfa->inveclosures + dup_idx); - - /* Store the index of the original node. */ - dfa->org_indices[dup_idx] = org_idx; - *new_idx = dup_idx; - return REG_NOERROR; -} - -static void -calc_inveclosure (dfa) - re_dfa_t *dfa; -{ - int src, idx, dest; - for (src = 0; src < dfa->nodes_len; ++src) - { - for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) - { - dest = dfa->eclosures[src].elems[idx]; - re_node_set_insert (dfa->inveclosures + dest, src); - } - } -} - -/* Calculate "eclosure" for all the node in DFA. */ - -static reg_errcode_t -calc_eclosure (dfa) - re_dfa_t *dfa; -{ - int node_idx, incomplete; -#ifdef DEBUG - assert (dfa->nodes_len > 0); -#endif - incomplete = 0; - /* For each nodes, calculate epsilon closure. */ - for (node_idx = 0; ; ++node_idx) - { - reg_errcode_t err; - re_node_set eclosure_elem; - if (node_idx == dfa->nodes_len) - { - if (!incomplete) - break; - incomplete = 0; - node_idx = 0; - } - -#ifdef DEBUG - assert (dfa->eclosures[node_idx].nelem != -1); -#endif - /* If we have already calculated, skip it. */ - if (dfa->eclosures[node_idx].nelem != 0) - continue; - /* Calculate epsilon closure of `node_idx'. */ - err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (dfa->eclosures[node_idx].nelem == 0) - { - incomplete = 1; - re_node_set_free (&eclosure_elem); - } - } - return REG_NOERROR; -} - -/* Calculate epsilon closure of NODE. */ - -static reg_errcode_t -calc_eclosure_iter (new_set, dfa, node, root) - re_node_set *new_set; - re_dfa_t *dfa; - int node, root; -{ - reg_errcode_t err; - unsigned int constraint; - int i, incomplete; - re_node_set eclosure; - incomplete = 0; - err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* This indicates that we are calculating this node now. - We reference this value to avoid infinite loop. */ - dfa->eclosures[node].nelem = -1; - - constraint = ((dfa->nodes[node].type == ANCHOR) - ? dfa->nodes[node].opr.ctx_type : 0); - /* If the current node has constraints, duplicate all nodes. - Since they must inherit the constraints. */ - if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) - { - int org_node, cur_node; - org_node = cur_node = node; - err = duplicate_node_closure (dfa, node, node, node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Expand each epsilon destination nodes. */ - if (IS_EPSILON_NODE(dfa->nodes[node].type)) - for (i = 0; i < dfa->edests[node].nelem; ++i) - { - re_node_set eclosure_elem; - int edest = dfa->edests[node].elems[i]; - /* If calculating the epsilon closure of `edest' is in progress, - return intermediate result. */ - if (dfa->eclosures[edest].nelem == -1) - { - incomplete = 1; - continue; - } - /* If we haven't calculated the epsilon closure of `edest' yet, - calculate now. Otherwise use calculated epsilon closure. */ - if (dfa->eclosures[edest].nelem == 0) - { - err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - eclosure_elem = dfa->eclosures[edest]; - /* Merge the epsilon closure of `edest'. */ - re_node_set_merge (&eclosure, &eclosure_elem); - /* If the epsilon closure of `edest' is incomplete, - the epsilon closure of this node is also incomplete. */ - if (dfa->eclosures[edest].nelem == 0) - { - incomplete = 1; - re_node_set_free (&eclosure_elem); - } - } - - /* Epsilon closures include itself. */ - re_node_set_insert (&eclosure, node); - if (incomplete && !root) - dfa->eclosures[node].nelem = 0; - else - dfa->eclosures[node] = eclosure; - *new_set = eclosure; - return REG_NOERROR; -} - -/* Functions for token which are used in the parser. */ - -/* Fetch a token from INPUT. - We must not use this function inside bracket expressions. */ - -static re_token_t -fetch_token (input, syntax) - re_string_t *input; - reg_syntax_t syntax; -{ - re_token_t token; - int consumed_byte; - consumed_byte = peek_token (&token, input, syntax); - re_string_skip_bytes (input, consumed_byte); - return token; -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function inside bracket expressions. */ - -static int -peek_token (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; -{ - unsigned char c; - - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - - c = re_string_peek_byte (input, 0); - token->opr.c = c; - -#ifdef RE_ENABLE_I18N - token->mb_partial = 0; - if (MB_CUR_MAX > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - token->mb_partial = 1; - return 1; - } -#endif - if (c == '\\') - { - unsigned char c2; - if (re_string_cur_idx (input) + 1 >= re_string_length (input)) - { - token->type = BACK_SLASH; - return 1; - } - - c2 = re_string_peek_byte_case (input, 1); - token->opr.c = c2; - token->type = CHARACTER; - switch (c2) - { - case '|': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (!(syntax & RE_NO_BK_REFS)) - { - token->type = OP_BACK_REF; - token->opr.idx = c2 - '0'; - } - break; - case '<': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = WORD_FIRST; - } - break; - case '>': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = WORD_LAST; - } - break; - case 'b': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = WORD_DELIM; - } - break; - case 'B': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = INSIDE_WORD; - } - break; - case 'w': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_WORD; - break; - case 'W': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_NOTWORD; - break; - case '`': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = BUF_FIRST; - } - break; - case '\'': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.idx = BUF_LAST; - } - break; - case '(': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_CLOSE_SUBEXP; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_CLOSE_DUP_NUM; - break; - default: - break; - } - return 2; - } - - token->type = CHARACTER; - switch (c) - { - case '\n': - if (syntax & RE_NEWLINE_ALT) - token->type = OP_ALT; - break; - case '|': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '*': - token->type = OP_DUP_ASTERISK; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_CLOSE_DUP_NUM; - break; - case '(': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_CLOSE_SUBEXP; - break; - case '[': - token->type = OP_OPEN_BRACKET; - break; - case '.': - token->type = OP_PERIOD; - break; - case '^': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && - re_string_cur_idx (input) != 0) - { - char prev = re_string_peek_byte (input, -1); - if (prev != '|' && prev != '(' && - (!(syntax & RE_NEWLINE_ALT) || prev != '\n')) - break; - } - token->type = ANCHOR; - token->opr.idx = LINE_FIRST; - break; - case '$': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && - re_string_cur_idx (input) + 1 != re_string_length (input)) - { - re_token_t next; - re_string_skip_bytes (input, 1); - peek_token (&next, input, syntax); - re_string_skip_bytes (input, -1); - if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) - break; - } - token->type = ANCHOR; - token->opr.idx = LINE_LAST; - break; - default: - break; - } - return 1; -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function out of bracket expressions. */ - -static int -peek_token_bracket (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; -{ - unsigned char c; - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - c = re_string_peek_byte (input, 0); - token->opr.c = c; - -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - return 1; - } -#endif /* RE_ENABLE_I18N */ - - if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)) - { - /* In this case, '\' escape a character. */ - unsigned char c2; - re_string_skip_bytes (input, 1); - c2 = re_string_peek_byte (input, 0); - token->opr.c = c2; - token->type = CHARACTER; - return 1; - } - if (c == '[') /* '[' is a special char in a bracket exps. */ - { - unsigned char c2; - int token_len; - c2 = re_string_peek_byte (input, 1); - token->opr.c = c2; - token_len = 2; - switch (c2) - { - case '.': - token->type = OP_OPEN_COLL_ELEM; - break; - case '=': - token->type = OP_OPEN_EQUIV_CLASS; - break; - case ':': - if (syntax & RE_CHAR_CLASSES) - { - token->type = OP_OPEN_CHAR_CLASS; - break; - } - /* else fall through. */ - default: - token->type = CHARACTER; - token->opr.c = c; - token_len = 1; - break; - } - return token_len; - } - switch (c) - { - case '-': - token->type = OP_CHARSET_RANGE; - break; - case ']': - token->type = OP_CLOSE_BRACKET; - break; - case '^': - token->type = OP_NON_MATCH_LIST; - break; - default: - token->type = CHARACTER; - } - return 1; -} - -/* Functions for parser. */ - -/* Entry point of the parser. - Parse the regular expression REGEXP and return the structure tree. - If an error is occured, ERR is set by error code, and return NULL. - This function build the following tree, from regular expression : - CAT - / \ - / \ - EOR - - CAT means concatenation. - EOR means end of regular expression. */ - -static bin_tree_t * -parse (regexp, preg, syntax, err) - re_string_t *regexp; - regex_t *preg; - reg_syntax_t syntax; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *eor, *root; - re_token_t current_token; - int new_idx; - current_token = fetch_token (regexp, syntax); - tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - new_idx = re_dfa_add_node (dfa, current_token, 0); - eor = create_tree (NULL, NULL, 0, new_idx); - if (tree != NULL) - root = create_tree (tree, eor, CONCAT, 0); - else - root = eor; - if (BE (new_idx == -1 || eor == NULL || root == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - return root; -} - -/* This function build the following tree, from regular expression - |: - ALT - / \ - / \ - - - ALT means alternative, which represents the operator `|'. */ - -static bin_tree_t * -parse_reg_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *branch = NULL; - int new_idx; - tree = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type == OP_ALT) - { - re_token_t alt_token = *token; - new_idx = re_dfa_add_node (dfa, alt_token, 0); - *token = fetch_token (regexp, syntax); - if (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - branch = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && branch == NULL, 0)) - { - free_bin_tree (tree); - return NULL; - } - } - else - branch = NULL; - tree = create_tree (tree, branch, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - dfa->has_plural_match = 1; - } - return tree; -} - -/* This function build the following tree, from regular expression - : - CAT - / \ - / \ - - - CAT means concatenation. */ - -static bin_tree_t * -parse_branch (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - bin_tree_t *tree, *exp; - tree = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - exp = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && exp == NULL, 0)) - { - free_bin_tree (tree); - return NULL; - } - if (tree != NULL && exp != NULL) - { - tree = create_tree (tree, exp, CONCAT, 0); - if (tree == NULL) - { - *err = REG_ESPACE; - return NULL; - } - } - else if (tree == NULL) - tree = exp; - /* Otherwise exp == NULL, we don't need to create new tree. */ - } - return tree; -} - -/* This function build the following tree, from regular expression a*: - * - | - a -*/ - -static bin_tree_t * -parse_expression (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; - int new_idx; - switch (token->type) - { - case CHARACTER: - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - while (!re_string_eoi (regexp) - && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) - { - bin_tree_t *mbc_remain; - *token = fetch_token (regexp, syntax); - new_idx = re_dfa_add_node (dfa, *token, 0); - mbc_remain = create_tree (NULL, NULL, 0, new_idx); - tree = create_tree (tree, mbc_remain, CONCAT, 0); - if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - } -#endif - break; - case OP_OPEN_SUBEXP: - tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_OPEN_BRACKET: - tree = parse_bracket_exp (regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_BACK_REF: - if (BE (preg->re_nsub < token->opr.idx - || dfa->subexps[token->opr.idx - 1].end == -1, 0)) - { - *err = REG_ESUBREG; - return NULL; - } - dfa->used_bkref_map |= 1 << (token->opr.idx - 1); - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - ++dfa->nbackref; - dfa->has_mb_node = 1; - break; - case OP_DUP_ASTERISK: - case OP_DUP_PLUS: - case OP_DUP_QUESTION: - case OP_OPEN_DUP_NUM: - if (syntax & RE_CONTEXT_INVALID_OPS) - { - *err = REG_BADRPT; - return NULL; - } - else if (syntax & RE_CONTEXT_INDEP_OPS) - { - *token = fetch_token (regexp, syntax); - return parse_expression (regexp, preg, token, syntax, nest, err); - } - /* else fall through */ - case OP_CLOSE_SUBEXP: - if ((token->type == OP_CLOSE_SUBEXP) && - !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) - { - *err = REG_ERPAREN; - return NULL; - } - /* else fall through */ - case OP_CLOSE_DUP_NUM: - /* We treat it as a normal character. */ - - /* Then we can these characters as normal characters. */ - token->type = CHARACTER; - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - break; - case ANCHOR: - if (dfa->word_char == NULL) - { - *err = init_word_char (dfa); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - if (token->opr.ctx_type == WORD_DELIM) - { - bin_tree_t *tree_first, *tree_last; - int idx_first, idx_last; - token->opr.ctx_type = WORD_FIRST; - idx_first = re_dfa_add_node (dfa, *token, 0); - tree_first = create_tree (NULL, NULL, 0, idx_first); - token->opr.ctx_type = WORD_LAST; - idx_last = re_dfa_add_node (dfa, *token, 0); - tree_last = create_tree (NULL, NULL, 0, idx_last); - token->type = OP_ALT; - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (tree_first, tree_last, 0, new_idx); - if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1 - || tree_first == NULL || tree_last == NULL - || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - else - { - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - /* We must return here, since ANCHORs can't be followed - by repetition operators. - eg. RE"^*" is invalid or "", - it must not be "". */ - *token = fetch_token (regexp, syntax); - return tree; - case OP_PERIOD: - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - if (MB_CUR_MAX > 1) - dfa->has_mb_node = 1; - break; - case OP_WORD: - tree = build_word_op (dfa, 0, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_NOTWORD: - tree = build_word_op (dfa, 1, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_ALT: - case END_OF_RE: - return NULL; - case BACK_SLASH: - *err = REG_EESCAPE; - return NULL; - default: - /* Must not happen? */ -#ifdef DEBUG - assert (0); -#endif - return NULL; - } - *token = fetch_token (regexp, syntax); - - while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS - || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) - { - tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - dfa->has_plural_match = 1; - } - - return tree; -} - -/* This function build the following tree, from regular expression - (): - SUBEXP - | - -*/ - -static bin_tree_t * -parse_sub_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *left_par, *right_par; - size_t cur_nsub; - int new_idx; - cur_nsub = preg->re_nsub++; - if (dfa->subexps_alloc < preg->re_nsub) - { - re_subexp_t *new_array; - dfa->subexps_alloc *= 2; - new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc); - if (BE (new_array == NULL, 0)) - { - dfa->subexps_alloc /= 2; - *err = REG_ESPACE; - return NULL; - } - dfa->subexps = new_array; - } - dfa->subexps[cur_nsub].start = dfa->nodes_len; - dfa->subexps[cur_nsub].end = -1; - - new_idx = re_dfa_add_node (dfa, *token, 0); - left_par = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || left_par == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - dfa->nodes[new_idx].opr.idx = cur_nsub; - *token = fetch_token (regexp, syntax); - - /* The subexpression may be a null string. */ - if (token->type == OP_CLOSE_SUBEXP) - tree = NULL; - else - { - tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - } - if (BE (token->type != OP_CLOSE_SUBEXP, 0)) - { - free_bin_tree (tree); - *err = REG_BADPAT; - return NULL; - } - new_idx = re_dfa_add_node (dfa, *token, 0); - dfa->subexps[cur_nsub].end = dfa->nodes_len; - right_par = create_tree (NULL, NULL, 0, new_idx); - tree = ((tree == NULL) ? right_par - : create_tree (tree, right_par, CONCAT, 0)); - tree = create_tree (left_par, tree, CONCAT, 0); - if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - dfa->nodes[new_idx].opr.idx = cur_nsub; - - return tree; -} - -/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ - -static bin_tree_t * -parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) - bin_tree_t *dup_elem; - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; -{ - re_token_t dup_token; - bin_tree_t *tree = dup_elem, *work_tree; - int new_idx, start_idx = re_string_cur_idx (regexp); - re_token_t start_token = *token; - if (token->type == OP_OPEN_DUP_NUM) - { - int i; - int end = 0; - int start = fetch_number (regexp, token, syntax); - bin_tree_t *elem; - if (start == -1) - { - if (token->type == CHARACTER && token->opr.c == ',') - start = 0; /* We treat "{,m}" as "{0,m}". */ - else - { - *err = REG_BADBR; /* {} is invalid. */ - return NULL; - } - } - if (BE (start != -2, 1)) - { - /* We treat "{n}" as "{n,n}". */ - end = ((token->type == OP_CLOSE_DUP_NUM) ? start - : ((token->type == CHARACTER && token->opr.c == ',') - ? fetch_number (regexp, token, syntax) : -2)); - } - if (BE (start == -2 || end == -2, 0)) - { - /* Invalid sequence. */ - if (token->type == OP_CLOSE_DUP_NUM) - goto parse_dup_op_invalid_interval; - else - goto parse_dup_op_ebrace; - } - if (BE (start == 0 && end == 0, 0)) - { - /* We treat "{0}" and "{0,0}" as null string. */ - *token = fetch_token (regexp, syntax); - free_bin_tree (dup_elem); - return NULL; - } - - /* Extract "{n,m}" to "...{0,}". */ - elem = tree; - for (i = 0; i < start; ++i) - if (i != 0) - { - work_tree = duplicate_tree (elem, dfa); - tree = create_tree (tree, work_tree, CONCAT, 0); - if (BE (work_tree == NULL || tree == NULL, 0)) - goto parse_dup_op_espace; - } - - if (end == -1) - { - /* We treat "{0,}" as "*". */ - dup_token.type = OP_DUP_ASTERISK; - if (start > 0) - { - elem = duplicate_tree (elem, dfa); - new_idx = re_dfa_add_node (dfa, dup_token, 0); - work_tree = create_tree (elem, NULL, 0, new_idx); - tree = create_tree (tree, work_tree, CONCAT, 0); - if (BE (elem == NULL || new_idx == -1 || work_tree == NULL - || tree == NULL, 0)) - goto parse_dup_op_espace; - } - else - { - new_idx = re_dfa_add_node (dfa, dup_token, 0); - tree = create_tree (elem, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - goto parse_dup_op_espace; - } - } - else if (end - start > 0) - { - /* Then extract "{0,m}" to "??...?". */ - dup_token.type = OP_DUP_QUESTION; - if (start > 0) - { - elem = duplicate_tree (elem, dfa); - new_idx = re_dfa_add_node (dfa, dup_token, 0); - elem = create_tree (elem, NULL, 0, new_idx); - tree = create_tree (tree, elem, CONCAT, 0); - if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0)) - goto parse_dup_op_espace; - } - else - { - new_idx = re_dfa_add_node (dfa, dup_token, 0); - tree = elem = create_tree (elem, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - goto parse_dup_op_espace; - } - for (i = 1; i < end - start; ++i) - { - work_tree = duplicate_tree (elem, dfa); - tree = create_tree (tree, work_tree, CONCAT, 0); - if (BE (work_tree == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - } - } - else - { - new_idx = re_dfa_add_node (dfa, *token, 0); - tree = create_tree (tree, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - *token = fetch_token (regexp, syntax); - return tree; - - parse_dup_op_espace: - free_bin_tree (tree); - *err = REG_ESPACE; - return NULL; - - parse_dup_op_ebrace: - if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) - { - *err = REG_EBRACE; - return NULL; - } - goto parse_dup_op_rollback; - parse_dup_op_invalid_interval: - if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) - { - *err = REG_BADBR; - return NULL; - } - parse_dup_op_rollback: - re_string_set_index (regexp, start_idx); - *token = start_token; - token->type = CHARACTER; - return dup_elem; -} - -/* Size of the names for collating symbol/equivalence_class/character_class. - I'm not sure, but maybe enough. */ -#define BRACKET_NAME_BUF_SIZE 32 - -#ifndef _LIBC - /* Local function for parse_bracket_exp only used in case of NOT _LIBC. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - -static reg_errcode_t -# ifdef RE_ENABLE_I18N -build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; -# else /* not RE_ENABLE_I18N */ -build_range_exp (sbcset, start_elem, end_elem) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - bracket_elem_t *start_elem, *end_elem; -{ - unsigned int start_ch, end_ch; - /* Equivalence Classes and Character Classes can't be a range start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - /* We can handle no multi character collating elements without libc - support. */ - if (BE ((start_elem->type == COLL_SYM - && strlen ((char *) start_elem->opr.name) > 1) - || (end_elem->type == COLL_SYM - && strlen ((char *) end_elem->opr.name) > 1), 0)) - return REG_ECOLLATE; - -# ifdef RE_ENABLE_I18N - { - wchar_t wc, start_wc, end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - - start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) - ? __btowc (start_ch) : start_elem->opr.wch); - end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) - ? __btowc (end_ch) : end_elem->opr.wch); - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - if (wcscoll (cmp_buf, cmp_buf + 4) > 0) - return REG_ERANGE; - - /* Check the space of the arrays. */ - if (*range_alloc == mbcset->nranges) - { - /* There are not enough space, need realloc. */ - wchar_t *new_array_start, *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, wchar_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, wchar_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } - - mbcset->range_starts[mbcset->nranges] = start_wc; - mbcset->range_ends[mbcset->nranges++] = end_wc; - - /* Build the table for single byte characters. */ - for (wc = 0; wc <= SBC_MAX; ++wc) - { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - bitset_set (sbcset, wc); - } - } -# else /* not RE_ENABLE_I18N */ - { - unsigned int ch; - start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - if (start_ch > end_ch) - return REG_ERANGE; - /* Build the table for single byte characters. */ - for (ch = 0; ch <= SBC_MAX; ++ch) - if (start_ch <= ch && ch <= end_ch) - bitset_set (sbcset, ch); - } -# endif /* not RE_ENABLE_I18N */ - return REG_NOERROR; -} -#endif /* not _LIBC */ - -#ifndef _LIBC -/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument since we may update it. */ - -static reg_errcode_t -# ifdef RE_ENABLE_I18N -build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; -# else /* not RE_ENABLE_I18N */ -build_collating_symbol (sbcset, name) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; -{ - size_t name_len = strlen ((const char *) name); - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } -} -#endif /* not _LIBC */ - -/* This function parse bracket expression like "[abc]", "[a-c]", - "[[.a-a.]]" etc. */ - -static bin_tree_t * -parse_bracket_exp (regexp, dfa, token, syntax, err) - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; -{ -#ifdef _LIBC - const unsigned char *collseqmb; - const char *collseqwc; - uint32_t nrules; - int32_t table_size; - const int32_t *symb_table; - const unsigned char *extra; - - /* Local function for parse_bracket_exp used in _LIBC environement. - Seek the collating symbol entry correspondings to NAME. - Return the index of the symbol in the SYMB_TABLE. */ - - static inline int32_t - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; - { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - int32_t second = hash % (table_size - 2); - while (symb_table[2 * elem] != 0) - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } - - /* Next entry. */ - elem += second; - } - return elem; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Look up the collation sequence value of BR_ELEM. - Return the value if succeeded, UINT_MAX otherwise. */ - - static inline unsigned int - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; - { - if (br_elem->type == SB_CHAR) - { - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - return collseqmb[br_elem->opr.ch]; - else - { - wint_t wc = __btowc (br_elem->opr.ch); - return collseq_table_lookup (collseqwc, wc); - } - } - else if (br_elem->type == MB_CHAR) - { - return collseq_table_lookup (collseqwc, br_elem->opr.wch); - } - else if (br_elem->type == COLL_SYM) - { - size_t sym_name_len = strlen ((char *) br_elem->opr.name); - if (nrules != 0) - { - int32_t elem, idx; - elem = seek_collating_symbol_entry (br_elem->opr.name, - sym_name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - /* Skip the byte sequence of the collating element. */ - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the multibyte collation sequence value. */ - idx += sizeof (unsigned int); - /* Skip the wide char sequence of the collating element. */ - idx += sizeof (unsigned int) * - (1 + *(unsigned int *) (extra + idx)); - /* Return the collation sequence value. */ - return *(unsigned int *) (extra + idx); - } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) - { - /* No valid character. Match it as a single byte - character. */ - return collseqmb[br_elem->opr.name[0]]; - } - } - else if (sym_name_len == 1) - return collseqmb[br_elem->opr.name[0]]; - } - return UINT_MAX; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - - static inline reg_errcode_t -# ifdef RE_ENABLE_I18N - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; -# else /* not RE_ENABLE_I18N */ - build_range_exp (sbcset, start_elem, end_elem) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - bracket_elem_t *start_elem, *end_elem; - { - unsigned int ch; - uint32_t start_collseq; - uint32_t end_collseq; - -# ifdef RE_ENABLE_I18N - /* Check the space of the arrays. */ - if (*range_alloc == mbcset->nranges) - { - /* There are not enough space, need realloc. */ - uint32_t *new_array_start; - uint32_t *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } -# endif /* RE_ENABLE_I18N */ - - /* Equivalence Classes and Character Classes can't be a range - start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - start_collseq = lookup_collation_sequence_value (start_elem); - end_collseq = lookup_collation_sequence_value (end_elem); - /* Check start/end collation sequence values. */ - if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) - return REG_ECOLLATE; - if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) - return REG_ERANGE; - -# ifdef RE_ENABLE_I18N - /* Got valid collation sequence values, add them as a new entry. */ - mbcset->range_starts[mbcset->nranges] = start_collseq; - mbcset->range_ends[mbcset->nranges++] = end_collseq; -# endif /* RE_ENABLE_I18N */ - - /* Build the table for single byte characters. */ - for (ch = 0; ch <= SBC_MAX; ch++) - { - uint32_t ch_collseq; - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - ch_collseq = collseqmb[ch]; - else - ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch)); - if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) - bitset_set (sbcset, ch); - } - return REG_NOERROR; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument sinse we may update it. */ - - static inline reg_errcode_t -# ifdef RE_ENABLE_I18N - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; -# else /* not RE_ENABLE_I18N */ - build_collating_symbol (sbcset, name) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; - { - int32_t elem, idx; - size_t name_len = strlen ((const char *) name); - if (nrules != 0) - { - elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - } - else if (symb_table[2 * elem] == 0 && name_len == 1) - { - /* No valid character, treat it as a normal - character. */ - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - else - return REG_ECOLLATE; - -# ifdef RE_ENABLE_I18N - /* Got valid collation sequence, add it as a new entry. */ - /* Check the space of the arrays. */ - if (*coll_sym_alloc == mbcset->ncoll_syms) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->ncoll_syms is 0. */ - *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; - /* Use realloc since mbcset->coll_syms is NULL - if *alloc == 0. */ - mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t, - *coll_sym_alloc); - if (BE (mbcset->coll_syms == NULL, 0)) - return REG_ESPACE; - } - mbcset->coll_syms[mbcset->ncoll_syms++] = idx; -# endif /* RE_ENABLE_I18N */ - return REG_NOERROR; - } - else - { - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - } - } -#endif - - re_token_t br_token; - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; - int equiv_class_alloc = 0, char_class_alloc = 0; -#else /* not RE_ENABLE_I18N */ - int non_match = 0; -#endif /* not RE_ENABLE_I18N */ - bin_tree_t *work_tree; - int token_len, new_idx; -#ifdef _LIBC - collseqmb = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules) - { - /* - if (MB_CUR_MAX > 1) - */ - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - } -#endif - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else - if (BE (sbcset == NULL, 0)) -#endif /* RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_NON_MATCH_LIST) - { -#ifdef RE_ENABLE_I18N - int i; - mbcset->non_match = 1; -#else /* not RE_ENABLE_I18N */ - non_match = 1; -#endif /* not RE_ENABLE_I18N */ - if (syntax & RE_HAT_LISTS_NOT_NEWLINE) - bitset_set (sbcset, '\0'); - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - bitset_set (sbcset, i); -#endif /* RE_ENABLE_I18N */ - } - - /* We treat the first ']' as a normal character. */ - if (token->type == OP_CLOSE_BRACKET) - token->type = CHARACTER; - - while (1) - { - bracket_elem_t start_elem, end_elem; - unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; - unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; - reg_errcode_t ret; - int token_len2 = 0, is_range_exp = 0; - re_token_t token2; - - start_elem.opr.name = start_name_buf; - ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, - syntax); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_CHARSET_RANGE) - { - re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ - token_len2 = peek_token_bracket (&token2, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - if (token2.type == OP_CLOSE_BRACKET) - { - /* We treat the last '-' as a normal character. */ - re_string_skip_bytes (regexp, -token_len); - token->type = CHARACTER; - } - else - is_range_exp = 1; - } - - if (is_range_exp == 1) - { - end_elem.opr.name = end_name_buf; - ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, - dfa, syntax); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - *err = build_range_exp (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &range_alloc, -#endif /* RE_ENABLE_I18N */ - &start_elem, &end_elem); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - } - else - { - switch (start_elem.type) - { - case SB_CHAR: - bitset_set (sbcset, start_elem.opr.ch); - break; -#ifdef RE_ENABLE_I18N - case MB_CHAR: - /* Check whether the array has enough space. */ - if (mbchar_alloc == mbcset->nmbchars) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nmbchars is 0. */ - mbchar_alloc = 2 * mbcset->nmbchars + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t, - mbchar_alloc); - if (BE (mbcset->mbchars == NULL, 0)) - goto parse_bracket_exp_espace; - } - mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; - break; -#endif /* RE_ENABLE_I18N */ - case EQUIV_CLASS: - *err = build_equiv_class (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &equiv_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case COLL_SYM: - *err = build_collating_symbol (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &coll_sym_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case CHAR_CLASS: - *err = build_charclass (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &char_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name, syntax); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - default: - assert (0); - break; - } - } - if (token->type == OP_CLOSE_BRACKET) - break; - } - - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - - /* If it is non-matching list. */ -#ifdef RE_ENABLE_I18N - if (mbcset->non_match) -#else /* not RE_ENABLE_I18N */ - if (non_match) -#endif /* not RE_ENABLE_I18N */ - bitset_not (sbcset); - - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - new_idx = re_dfa_add_node (dfa, br_token, 0); - work_tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - -#ifdef RE_ENABLE_I18N - if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes - || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes - || mbcset->non_match))) - { - re_token_t alt_token; - bin_tree_t *mbc_tree; - /* Build a tree for complex bracket. */ - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - dfa->has_mb_node = 1; - new_idx = re_dfa_add_node (dfa, br_token, 0); - mbc_tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || mbc_tree == NULL, 0)) - goto parse_bracket_exp_espace; - /* Then join them by ALT node. */ - dfa->has_plural_match = 1; - alt_token.type = OP_ALT; - new_idx = re_dfa_add_node (dfa, alt_token, 0); - work_tree = create_tree (work_tree, mbc_tree, 0, new_idx); - if (BE (new_idx != -1 && mbc_tree != NULL, 1)) - return work_tree; - } - else - { - free_charset (mbcset); - return work_tree; - } -#else /* not RE_ENABLE_I18N */ - return work_tree; -#endif /* not RE_ENABLE_I18N */ - - parse_bracket_exp_espace: - *err = REG_ESPACE; - parse_bracket_exp_free_return: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - return NULL; -} - -/* Parse an element in the bracket expression. */ - -static reg_errcode_t -parse_bracket_element (elem, regexp, token, token_len, dfa, syntax) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; - int token_len; - re_dfa_t *dfa; - reg_syntax_t syntax; -{ -#ifdef RE_ENABLE_I18N - int cur_char_size; - cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); - if (cur_char_size > 1) - { - elem->type = MB_CHAR; - elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); - re_string_skip_bytes (regexp, cur_char_size); - return REG_NOERROR; - } -#endif /* RE_ENABLE_I18N */ - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS - || token->type == OP_OPEN_EQUIV_CLASS) - return parse_bracket_symbol (elem, regexp, token); - elem->type = SB_CHAR; - elem->opr.ch = token->opr.c; - return REG_NOERROR; -} - -/* Parse a bracket symbol in the bracket expression. Bracket symbols are - such as [::], [..], and - [==]. */ - -static reg_errcode_t -parse_bracket_symbol (elem, regexp, token) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; -{ - unsigned char ch, delim = token->opr.c; - int i = 0; - for (;; ++i) - { - if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE) - return REG_EBRACK; - if (token->type == OP_OPEN_CHAR_CLASS) - ch = re_string_fetch_byte_case (regexp); - else - ch = re_string_fetch_byte (regexp); - if (ch == delim && re_string_peek_byte (regexp, 0) == ']') - break; - elem->opr.name[i] = ch; - } - re_string_skip_bytes (regexp, 1); - elem->opr.name[i] = '\0'; - switch (token->type) - { - case OP_OPEN_COLL_ELEM: - elem->type = COLL_SYM; - break; - case OP_OPEN_EQUIV_CLASS: - elem->type = EQUIV_CLASS; - break; - case OP_OPEN_CHAR_CLASS: - elem->type = CHAR_CLASS; - break; - default: - break; - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the equivalence class which is represented by NAME. - The result are written to MBCSET and SBCSET. - EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) - re_charset_t *mbcset; - int *equiv_class_alloc; -#else /* not RE_ENABLE_I18N */ -build_equiv_class (sbcset, name) -#endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; -{ -#if defined _LIBC && defined RE_ENABLE_I18N - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules != 0) - { - const int32_t *table, *indirect; - const unsigned char *weights, *extra, *cp; - unsigned char char_buf[2]; - int32_t idx1, idx2; - unsigned int ch; - size_t len; - /* This #include defines a local function! */ -# include - /* Calculate the index for equivalence class. */ - cp = name; - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp); - if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) - /* This isn't a valid character. */ - return REG_ECOLLATE; - - /* Build single byte matcing table for this equivalence class. */ - char_buf[1] = (unsigned char) '\0'; - len = weights[idx1]; - for (ch = 0; ch < SBC_MAX; ++ch) - { - char_buf[0] = ch; - cp = char_buf; - idx2 = findidx (&cp); -/* - idx2 = table[ch]; -*/ - if (idx2 == 0) - /* This isn't a valid character. */ - continue; - if (len == weights[idx2]) - { - int cnt = 0; - while (cnt <= len && - weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) - ++cnt; - - if (cnt > len) - bitset_set (sbcset, ch); - } - } - /* Check whether the array has enough space. */ - if (*equiv_class_alloc == mbcset->nequiv_classes) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nequiv_classes is 0. */ - *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; - /* Use realloc since the array is NULL if *alloc == 0. */ - mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, - *equiv_class_alloc); - if (BE (mbcset->equiv_classes == NULL, 0)) - return REG_ESPACE; - } - mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; - } - else -#endif /* _LIBC && RE_ENABLE_I18N */ - { - if (BE (strlen ((const char *) name) != 1, 0)) - return REG_ECOLLATE; - bitset_set (sbcset, *name); - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the character class which is represented by NAME. - The result are written to MBCSET and SBCSET. - CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax) - re_charset_t *mbcset; - int *char_class_alloc; -#else /* not RE_ENABLE_I18N */ -build_charclass (sbcset, class_name, syntax) -#endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *class_name; - reg_syntax_t syntax; -{ - int i; - const char *name = (const char *) class_name; - - /* In case of REG_ICASE "upper" and "lower" match the both of - upper and lower cases. */ - if ((syntax & RE_ICASE) - && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) - name = "alpha"; - -#ifdef RE_ENABLE_I18N - /* Check the space of the arrays. */ - if (*char_class_alloc == mbcset->nchar_classes) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nchar_classes is 0. */ - *char_class_alloc = 2 * mbcset->nchar_classes + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t, - *char_class_alloc); - if (BE (mbcset->char_classes == NULL, 0)) - return REG_ESPACE; - } - mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); -#endif /* RE_ENABLE_I18N */ - -#define BUILD_CHARCLASS_LOOP(ctype_func)\ - for (i = 0; i < SBC_MAX; ++i) \ - { \ - if (ctype_func (i)) \ - bitset_set (sbcset, i); \ - } - - if (strcmp (name, "alnum") == 0) - BUILD_CHARCLASS_LOOP (isalnum) - else if (strcmp (name, "cntrl") == 0) - BUILD_CHARCLASS_LOOP (iscntrl) - else if (strcmp (name, "lower") == 0) - BUILD_CHARCLASS_LOOP (islower) - else if (strcmp (name, "space") == 0) - BUILD_CHARCLASS_LOOP (isspace) - else if (strcmp (name, "alpha") == 0) - BUILD_CHARCLASS_LOOP (isalpha) - else if (strcmp (name, "digit") == 0) - BUILD_CHARCLASS_LOOP (isdigit) - else if (strcmp (name, "print") == 0) - BUILD_CHARCLASS_LOOP (isprint) - else if (strcmp (name, "upper") == 0) - BUILD_CHARCLASS_LOOP (isupper) - else if (strcmp (name, "blank") == 0) - BUILD_CHARCLASS_LOOP (isblank) - else if (strcmp (name, "graph") == 0) - BUILD_CHARCLASS_LOOP (isgraph) - else if (strcmp (name, "punct") == 0) - BUILD_CHARCLASS_LOOP (ispunct) - else if (strcmp (name, "xdigit") == 0) - BUILD_CHARCLASS_LOOP (isxdigit) - else - return REG_ECTYPE; - - return REG_NOERROR; -} - -static bin_tree_t * -build_word_op (dfa, not, err) - re_dfa_t *dfa; - int not; - reg_errcode_t *err; -{ - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - int alloc = 0; -#else /* not RE_ENABLE_I18N */ - int non_match = 0; -#endif /* not RE_ENABLE_I18N */ - reg_errcode_t ret; - re_token_t br_token; - bin_tree_t *tree; - int new_idx; - - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ - -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else /* not RE_ENABLE_I18N */ - if (BE (sbcset == NULL, 0)) -#endif /* not RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - if (not) - { -#ifdef RE_ENABLE_I18N - int i; - /* - if (syntax & RE_HAT_LISTS_NOT_NEWLINE) - bitset_set(cset->sbcset, '\0'); - */ - mbcset->non_match = 1; - if (MB_CUR_MAX > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - bitset_set (sbcset, i); -#else /* not RE_ENABLE_I18N */ - non_match = 1; -#endif /* not RE_ENABLE_I18N */ - } - - /* We don't care the syntax in this case. */ - ret = build_charclass (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &alloc, -#endif /* RE_ENABLE_I18N */ - (const unsigned char *) "alpha", 0); - - if (BE (ret != REG_NOERROR, 0)) - { - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = ret; - return NULL; - } - /* \w match '_' also. */ - bitset_set (sbcset, '_'); - - /* If it is non-matching list. */ -#ifdef RE_ENABLE_I18N - if (mbcset->non_match) -#else /* not RE_ENABLE_I18N */ - if (non_match) -#endif /* not RE_ENABLE_I18N */ - bitset_not (sbcset); - - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - new_idx = re_dfa_add_node (dfa, br_token, 0); - tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || tree == NULL, 0)) - goto build_word_op_espace; - -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - re_token_t alt_token; - bin_tree_t *mbc_tree; - /* Build a tree for complex bracket. */ - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - dfa->has_mb_node = 1; - new_idx = re_dfa_add_node (dfa, br_token, 0); - mbc_tree = create_tree (NULL, NULL, 0, new_idx); - if (BE (new_idx == -1 || mbc_tree == NULL, 0)) - goto build_word_op_espace; - /* Then join them by ALT node. */ - alt_token.type = OP_ALT; - new_idx = re_dfa_add_node (dfa, alt_token, 0); - tree = create_tree (tree, mbc_tree, 0, new_idx); - if (BE (new_idx != -1 && mbc_tree != NULL, 1)) - return tree; - } - else - { - free_charset (mbcset); - return tree; - } -#else /* not RE_ENABLE_I18N */ - return tree; -#endif /* not RE_ENABLE_I18N */ - - build_word_op_espace: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = REG_ESPACE; - return NULL; -} - -/* This is intended for the expressions like "a{1,3}". - Fetch a number from `input', and return the number. - Return -1, if the number field is empty like "{,1}". - Return -2, If an error is occured. */ - -static int -fetch_number (input, token, syntax) - re_string_t *input; - re_token_t *token; - reg_syntax_t syntax; -{ - int num = -1; - unsigned char c; - while (1) - { - *token = fetch_token (input, syntax); - c = token->opr.c; - if (BE (token->type == END_OF_RE, 0)) - return -2; - if (token->type == OP_CLOSE_DUP_NUM || c == ',') - break; - num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) - ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); - num = (num > RE_DUP_MAX) ? -2 : num; - } - return num; -} - -#ifdef RE_ENABLE_I18N -static void -free_charset (re_charset_t *cset) -{ - re_free (cset->mbchars); -# ifdef _LIBC - re_free (cset->coll_syms); - re_free (cset->equiv_classes); - re_free (cset->range_starts); - re_free (cset->range_ends); -# endif - re_free (cset->char_classes); - re_free (cset); -} -#endif /* RE_ENABLE_I18N */ - -/* Functions for binary tree operation. */ - -/* Create a node of tree. - Note: This function automatically free left and right if malloc fails. */ - -static bin_tree_t * -create_tree (left, right, type, index) - bin_tree_t *left; - bin_tree_t *right; - re_token_type_t type; - int index; -{ - bin_tree_t *tree; - tree = re_malloc (bin_tree_t, 1); - if (BE (tree == NULL, 0)) - { - free_bin_tree (left); - free_bin_tree (right); - return NULL; - } - tree->parent = NULL; - tree->left = left; - tree->right = right; - tree->type = type; - tree->node_idx = index; - tree->first = -1; - tree->next = -1; - re_node_set_init_empty (&tree->eclosure); - - if (left != NULL) - left->parent = tree; - if (right != NULL) - right->parent = tree; - return tree; -} - -/* Free the sub tree pointed by TREE. */ - -static void -free_bin_tree (tree) - bin_tree_t *tree; -{ - if (tree == NULL) - return; - /*re_node_set_free (&tree->eclosure);*/ - free_bin_tree (tree->left); - free_bin_tree (tree->right); - re_free (tree); -} - -/* Duplicate the node SRC, and return new node. */ - -static bin_tree_t * -duplicate_tree (src, dfa) - const bin_tree_t *src; - re_dfa_t *dfa; -{ - bin_tree_t *left = NULL, *right = NULL, *new_tree; - int new_node_idx; - /* Since node indies must be according to Post-order of the tree, - we must duplicate the left at first. */ - if (src->left != NULL) - { - left = duplicate_tree (src->left, dfa); - if (left == NULL) - return NULL; - } - - /* Secondaly, duplicate the right. */ - if (src->right != NULL) - { - right = duplicate_tree (src->right, dfa); - if (right == NULL) - { - free_bin_tree (left); - return NULL; - } - } - - /* At last, duplicate itself. */ - if (src->type == NON_TYPE) - { - new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0); - dfa->nodes[new_node_idx].duplicated = 1; - if (BE (new_node_idx == -1, 0)) - { - free_bin_tree (left); - free_bin_tree (right); - return NULL; - } - } - else - new_node_idx = src->type; - - new_tree = create_tree (left, right, src->type, new_node_idx); - if (BE (new_tree == NULL, 0)) - { - free_bin_tree (left); - free_bin_tree (right); - } - return new_tree; -} diff --git a/lib/regex/regex-sh.h b/lib/regex/regex-sh.h deleted file mode 100644 index 723d2d8..0000000 --- a/lib/regex/regex-sh.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Regular Expression Functions from glibc 2.3.2 - * (renamed to sh_* to avoid clashes with the system libraries) - */ - -#ifndef _UCW_REGEX_H -#define _UCW_REGEX_H - -#define regfree sh_regfree -#define regexec sh_regexec -#define regcomp sh_regcomp -#define regerror sh_regerror -#define re_set_registers sh_re_set_registers -#define re_match_2 sh_re_match2 -#define re_match sh_re_match -#define re_search sh_re_search -#define re_compile_pattern sh_re_compile_pattern -#define re_set_syntax sh_re_set_syntax -#define re_search_2 sh_re_search_2 -#define re_compile_fastmap sh_re_compile_fastmap - -#include "lib/regex/regex.h" - -#endif diff --git a/lib/regex/regex.c b/lib/regex/regex.c deleted file mode 100644 index 7cc5237..0000000 --- a/lib/regex/regex.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Regular Expression Functions from glibc 2.3.2 - */ - -#include -#include "regex-sh.h" -#include "regex_internal.h" -#include "regex_internal.c" -#include "regcomp.c" -#include "regexec.c" diff --git a/lib/regex/regex.h b/lib/regex/regex.h deleted file mode 100644 index fac441d..0000000 --- a/lib/regex/regex.h +++ /dev/null @@ -1,574 +0,0 @@ -/* Definitions for data structures and routines for the regular - expression library. - Copyright (C) 1985,1989-93,1995-98,2000,2001,2002 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _REGEX_H -#define _REGEX_H 1 - -/* Allow the use in C++ code. */ -#ifdef __cplusplus -extern "C" { -#endif - -/* POSIX says that must be included (by the caller) before - . */ - -#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS -/* VMS doesn't have `size_t' in , even though POSIX says it - should be there. */ -# include -#endif - -/* The following two types have to be signed and unsigned integer type - wide enough to hold a value of a pointer. For most ANSI compilers - ptrdiff_t and size_t should be likely OK. Still size of these two - types is 2 for Microsoft C. Ugh... */ -typedef long int s_reg_t; -typedef unsigned long int active_reg_t; - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned long int reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \ matches . - If not set, then \ is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* If this bit is set, succeed as soon as we match the whole pattern, - without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) - -/* If this bit is set, do not process the GNU regex operators. - If not set, then the GNU regex operators are recognized. */ -#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) - -/* If this bit is set, turn on internal regex debugging. - If not set, and debugging was on, turn it off. - This only works if regex.c is compiled -DDEBUG. - We define this bit always, so that all that's needed to turn on - debugging is to recompile regex.c; the calling code can always have - this bit set, and it won't affect anything in the normal case. */ -#define RE_DEBUG (RE_NO_GNU_OPS << 1) - -/* If this bit is set, a syntactically invalid interval is treated as - a string of ordinary characters. For example, the ERE 'a{1' is - treated as 'a\{1'. */ -#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ - | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GNU_AWK \ - ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ - | RE_CONTEXT_INVALID_OPS )) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ - | RE_INTERVALS | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ - | RE_INVALID_INTERVAL_ORD) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is - removed and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -# undef RE_DUP_MAX -#endif -/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ -#define RE_DUP_MAX (0x7fff) - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - - -/* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ -typedef enum -{ -#ifdef _XOPEN_SOURCE - REG_ENOSYS = -1, /* This will never happen for this implementation. */ -#endif - - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -#ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE char * -#endif - -struct re_pattern_buffer -{ -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long int allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long int used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - RE_TRANSLATE_TYPE translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -# define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -# define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -# define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - -#if defined _REGEX_RE_COMP || defined _LIBC -# ifndef _CRAY -/* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); -# endif -#endif - -/* GCC 2.95 and later have "__restrict"; C99 compilers have - "restrict", and "configure" may have defined "restrict". */ -#ifndef __restrict -# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) -# if defined restrict || 199901L <= __STDC_VERSION__ -# define __restrict restrict -# else -# define __restrict -# endif -# endif -#endif -/* gcc 3.1 and up support the [restrict] syntax. */ -#ifndef __restrict_arr -# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define __restrict_arr __restrict -# else -# define __restrict_arr -# endif -#endif - -/* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *__restrict __preg, - const char *__restrict __pattern, - int __cflags)); - -extern int regexec _RE_ARGS ((const regex_t *__restrict __preg, - const char *__restrict __string, size_t __nmatch, - regmatch_t __pmatch[__restrict_arr], - int __eflags)); - -extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); - -extern void regfree _RE_ARGS ((regex_t *__preg)); - - -#ifdef __cplusplus -} -#endif /* C++ */ - -#endif /* regex.h */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/lib/regex/regex_internal.c b/lib/regex/regex_internal.c deleted file mode 100644 index f969c7c..0000000 --- a/lib/regex/regex_internal.c +++ /dev/null @@ -1,1263 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static void re_string_construct_common (const char *str, int len, - re_string_t *pstr, - RE_TRANSLATE_TYPE trans, int icase); -#ifdef RE_ENABLE_I18N -static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, - wint_t *last_wc); -#endif /* RE_ENABLE_I18N */ -static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int hash); -static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, - unsigned int hash); -static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int hash); -static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context, - unsigned int hash); -static unsigned int inline calc_state_hash (const re_node_set *nodes, - unsigned int context); - -/* Functions for string operation. */ - -/* This function allocate the buffers. It is necessary to call - re_string_reconstruct before using the object. */ - -static reg_errcode_t -re_string_allocate (pstr, str, len, init_len, trans, icase) - re_string_t *pstr; - const char *str; - int len, init_len, icase; - RE_TRANSLATE_TYPE trans; -{ - reg_errcode_t ret; - int init_buf_len = (len + 1 < init_len) ? len + 1: init_len; - re_string_construct_common (str, len, pstr, trans, icase); - pstr->stop = pstr->len; - - ret = re_string_realloc_buffers (pstr, init_buf_len); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case - : (unsigned char *) str); - pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; - pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) - || MB_CUR_MAX > 1) ? pstr->valid_len : len; - return REG_NOERROR; -} - -/* This function allocate the buffers, and initialize them. */ - -static reg_errcode_t -re_string_construct (pstr, str, len, trans, icase) - re_string_t *pstr; - const char *str; - int len, icase; - RE_TRANSLATE_TYPE trans; -{ - reg_errcode_t ret; - re_string_construct_common (str, len, pstr, trans, icase); - pstr->stop = pstr->len; - /* Set 0 so that this function can initialize whole buffers. */ - pstr->valid_len = 0; - - if (len > 0) - { - ret = re_string_realloc_buffers (pstr, len + 1); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case - : (unsigned char *) str); - pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; - - if (icase) - { -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - build_wcs_upper_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (trans != NULL) - re_string_translate_buffer (pstr); - else - pstr->valid_len = len; - } - } - - /* Initialized whole buffers, then valid_len == bufs_len. */ - pstr->valid_len = pstr->bufs_len; - return REG_NOERROR; -} - -/* Helper functions for re_string_allocate, and re_string_construct. */ - -static reg_errcode_t -re_string_realloc_buffers (pstr, new_buf_len) - re_string_t *pstr; - int new_buf_len; -{ -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->wcs = new_array; - } -#endif /* RE_ENABLE_I18N */ - if (MBS_ALLOCATED (pstr)) - { - unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, - new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->mbs = new_array; - } - if (MBS_CASE_ALLOCATED (pstr)) - { - unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char, - new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->mbs_case = new_array; - if (!MBS_ALLOCATED (pstr)) - pstr->mbs = pstr->mbs_case; - } - pstr->bufs_len = new_buf_len; - return REG_NOERROR; -} - - -static void -re_string_construct_common (str, len, pstr, trans, icase) - const char *str; - int len; - re_string_t *pstr; - RE_TRANSLATE_TYPE trans; - int icase; -{ - memset (pstr, '\0', sizeof (re_string_t)); - pstr->raw_mbs = (const unsigned char *) str; - pstr->len = len; - pstr->trans = trans; - pstr->icase = icase ? 1 : 0; -} - -#ifdef RE_ENABLE_I18N - -/* Build wide character buffer PSTR->WCS. - If the byte sequence of the string are: - (0), (1), (0), (1), - Then wide character buffer will be: - , WEOF , , WEOF , - We use WEOF for padding, they indicate that the position isn't - a first byte of a multibyte character. - - Note that this function assumes PSTR->VALID_LEN elements are already - built and starts from PSTR->VALID_LEN. */ - -static void -build_wcs_buffer (pstr) - re_string_t *pstr; -{ - mbstate_t prev_st; - int byte_idx, end_idx, mbclen, remain_len; - /* Build the buffers from pstr->valid_len to either pstr->len or - pstr->bufs_len. */ - end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; - for (byte_idx = pstr->valid_len; byte_idx < end_idx;) - { - wchar_t wc; - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - pstr->cur_state = prev_st; - } - - /* Apply the translateion if we need. */ - if (pstr->trans != NULL && mbclen == 1) - { - int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; - pstr->mbs_case[byte_idx] = ch; - } - /* Write wide character and padding. */ - pstr->wcs[byte_idx++] = wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - pstr->valid_len = byte_idx; -} - -/* Build wide character buffer PSTR->WCS like build_wcs_buffer, - but for REG_ICASE. */ - -static void -build_wcs_upper_buffer (pstr) - re_string_t *pstr; -{ - mbstate_t prev_st; - int byte_idx, end_idx, mbclen, remain_len; - /* Build the buffers from pstr->valid_len to either pstr->len or - pstr->bufs_len. */ - end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; - for (byte_idx = pstr->valid_len; byte_idx < end_idx;) - { - wchar_t wc; - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) - { - /* In case of a singlebyte character. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - /* Apply the translateion if we need. */ - if (pstr->trans != NULL && mbclen == 1) - { - ch = pstr->trans[ch]; - pstr->mbs_case[byte_idx] = ch; - } - pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; - pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else /* mbclen > 1 */ - { - if (iswlower (wc)) - wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); - else - memcpy (pstr->mbs + byte_idx, - pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); - pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - } - pstr->valid_len = byte_idx; -} - -/* Skip characters until the index becomes greater than NEW_RAW_IDX. - Return the index. */ - -static int -re_string_skip_chars (pstr, new_raw_idx, last_wc) - re_string_t *pstr; - int new_raw_idx; - wint_t *last_wc; -{ - mbstate_t prev_st; - int rawbuf_idx, mbclen; - wchar_t wc = 0; - - /* Skip the characters which are not necessary to check. */ - for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len; - rawbuf_idx < new_raw_idx;) - { - int remain_len; - remain_len = pstr->len - rawbuf_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, - remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - pstr->cur_state = prev_st; - } - /* Then proceed the next character. */ - rawbuf_idx += mbclen; - } - *last_wc = (wint_t) wc; - return rawbuf_idx; -} -#endif /* RE_ENABLE_I18N */ - -/* Build the buffer PSTR->MBS, and apply the translation if we need. - This function is used in case of REG_ICASE. */ - -static void -build_upper_buffer (pstr) - re_string_t *pstr; -{ - int char_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; - if (pstr->trans != NULL) - { - ch = pstr->trans[ch]; - pstr->mbs_case[char_idx] = ch; - } - if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); - else - pstr->mbs[char_idx] = ch; - } - pstr->valid_len = char_idx; -} - -/* Apply TRANS to the buffer in PSTR. */ - -static void -re_string_translate_buffer (pstr) - re_string_t *pstr; -{ - int buf_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; - pstr->mbs_case[buf_idx] = pstr->trans[ch]; - } - - pstr->valid_len = buf_idx; -} - -/* This function re-construct the buffers. - Concretely, convert to wide character in case of MB_CUR_MAX > 1, - convert to upper case in case of REG_ICASE, apply translation. */ - -static reg_errcode_t -re_string_reconstruct (pstr, idx, eflags, newline) - re_string_t *pstr; - int idx, eflags, newline; -{ - int offset = idx - pstr->raw_mbs_idx; - if (offset < 0) - { - /* Reset buffer. */ -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); -#endif /* RE_ENABLE_I18N */ - pstr->len += pstr->raw_mbs_idx; - pstr->stop += pstr->raw_mbs_idx; - pstr->valid_len = pstr->raw_mbs_idx = 0; - pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF); - if (!MBS_CASE_ALLOCATED (pstr)) - pstr->mbs_case = (unsigned char *) pstr->raw_mbs; - if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) - pstr->mbs = (unsigned char *) pstr->raw_mbs; - offset = idx; - } - - if (offset != 0) - { - /* Are the characters which are already checked remain? */ - if (offset < pstr->valid_len) - { - /* Yes, move them to the front of the buffer. */ - pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, - newline); -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wint_t)); -#endif /* RE_ENABLE_I18N */ - if (MBS_ALLOCATED (pstr)) - memmove (pstr->mbs, pstr->mbs + offset, - pstr->valid_len - offset); - if (MBS_CASE_ALLOCATED (pstr)) - memmove (pstr->mbs_case, pstr->mbs_case + offset, - pstr->valid_len - offset); - pstr->valid_len -= offset; -#if DEBUG - assert (pstr->valid_len > 0); -#endif - } - else - { - /* No, skip all characters until IDX. */ - pstr->valid_len = 0; -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - int wcs_idx; - wint_t wc; - pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; - for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) - pstr->wcs[wcs_idx] = WEOF; - if (pstr->trans && wc <= 0xff) - wc = pstr->trans[wc]; - pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD - : ((newline && IS_WIDE_NEWLINE (wc)) - ? CONTEXT_NEWLINE : 0)); - } - else -#endif /* RE_ENABLE_I18N */ - { - int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; - if (pstr->trans) - c = pstr->trans[c]; - pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD - : ((newline && IS_NEWLINE (c)) - ? CONTEXT_NEWLINE : 0)); - } - } - if (!MBS_CASE_ALLOCATED (pstr)) - { - pstr->mbs_case += offset; - /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ - if (!MBS_ALLOCATED (pstr)) - pstr->mbs += offset; - } - } - pstr->raw_mbs_idx = idx; - pstr->len -= offset; - pstr->stop -= offset; - - /* Then build the buffers. */ -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - if (pstr->icase) - build_wcs_upper_buffer (pstr); - else - build_wcs_buffer (pstr); - } - else -#endif /* RE_ENABLE_I18N */ - { - if (pstr->icase) - build_upper_buffer (pstr); - else if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - pstr->cur_idx = 0; - - return REG_NOERROR; -} - -static void -re_string_destruct (pstr) - re_string_t *pstr; -{ -#ifdef RE_ENABLE_I18N - re_free (pstr->wcs); -#endif /* RE_ENABLE_I18N */ - if (MBS_ALLOCATED (pstr)) - re_free (pstr->mbs); - if (MBS_CASE_ALLOCATED (pstr)) - re_free (pstr->mbs_case); -} - -/* Return the context at IDX in INPUT. */ - -static unsigned int -re_string_context_at (input, idx, eflags, newline_anchor) - const re_string_t *input; - int idx, eflags, newline_anchor; -{ - int c; - if (idx < 0 || idx == input->len) - { - if (idx < 0) - /* In this case, we use the value stored in input->tip_context, - since we can't know the character in input->mbs[-1] here. */ - return input->tip_context; - else /* (idx == input->len) */ - return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF - : CONTEXT_NEWLINE | CONTEXT_ENDBUF); - } -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - wint_t wc; - int wc_idx = idx; - while(input->wcs[wc_idx] == WEOF) - { -#ifdef DEBUG - /* It must not happen. */ - assert (wc_idx >= 0); -#endif - --wc_idx; - if (wc_idx < 0) - return input->tip_context; - } - wc = input->wcs[wc_idx]; - if (IS_WIDE_WORD_CHAR (wc)) - return CONTEXT_WORD; - return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0; - } - else -#endif - { - c = re_string_byte_at (input, idx); - if (IS_WORD_CHAR (c)) - return CONTEXT_WORD; - return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0; - } -} - -/* Functions for set operation. */ - -static reg_errcode_t -re_node_set_alloc (set, size) - re_node_set *set; - int size; -{ - set->alloc = size; - set->nelem = 0; - set->elems = re_malloc (int, size); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_1 (set, elem) - re_node_set *set; - int elem; -{ - set->alloc = 1; - set->nelem = 1; - set->elems = re_malloc (int, 1); - if (BE (set->elems == NULL, 0)) - { - set->alloc = set->nelem = 0; - return REG_ESPACE; - } - set->elems[0] = elem; - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_2 (set, elem1, elem2) - re_node_set *set; - int elem1, elem2; -{ - set->alloc = 2; - set->elems = re_malloc (int, 2); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - if (elem1 == elem2) - { - set->nelem = 1; - set->elems[0] = elem1; - } - else - { - set->nelem = 2; - if (elem1 < elem2) - { - set->elems[0] = elem1; - set->elems[1] = elem2; - } - else - { - set->elems[0] = elem2; - set->elems[1] = elem1; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_copy (dest, src) - re_node_set *dest; - const re_node_set *src; -{ - dest->nelem = src->nelem; - if (src->nelem > 0) - { - dest->alloc = dest->nelem; - dest->elems = re_malloc (int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - { - dest->alloc = dest->nelem = 0; - return REG_ESPACE; - } - memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); - } - else - re_node_set_init_empty (dest); - return REG_NOERROR; -} - -/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. - Note: We assume dest->elems is NULL, when dest->alloc is 0. */ - -static reg_errcode_t -re_node_set_add_intersect (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; -{ - int i1, i2, id; - if (src1->nelem > 0 && src2->nelem > 0) - { - if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) - { - dest->alloc = src1->nelem + src2->nelem + dest->nelem; - dest->elems = re_realloc (dest->elems, int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } - } - else - return REG_NOERROR; - - for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) - { - if (src1->elems[i1] > src2->elems[i2]) - { - ++i2; - continue; - } - if (src1->elems[i1] == src2->elems[i2]) - { - while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) - ++id; - if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) - ++id; - else - { - memmove (dest->elems + id + 1, dest->elems + id, - sizeof (int) * (dest->nelem - id)); - dest->elems[id++] = src2->elems[i2++]; - ++dest->nelem; - } - } - ++i1; - } - return REG_NOERROR; -} - -/* Calculate the union set of the sets SRC1 and SRC2. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -re_node_set_init_union (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; -{ - int i1, i2, id; - if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) - { - dest->alloc = src1->nelem + src2->nelem; - dest->elems = re_malloc (int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } - else - { - if (src1 != NULL && src1->nelem > 0) - return re_node_set_init_copy (dest, src1); - else if (src2 != NULL && src2->nelem > 0) - return re_node_set_init_copy (dest, src2); - else - re_node_set_init_empty (dest); - return REG_NOERROR; - } - for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) - { - if (src1->elems[i1] > src2->elems[i2]) - { - dest->elems[id++] = src2->elems[i2++]; - continue; - } - if (src1->elems[i1] == src2->elems[i2]) - ++i2; - dest->elems[id++] = src1->elems[i1++]; - } - if (i1 < src1->nelem) - { - memcpy (dest->elems + id, src1->elems + i1, - (src1->nelem - i1) * sizeof (int)); - id += src1->nelem - i1; - } - else if (i2 < src2->nelem) - { - memcpy (dest->elems + id, src2->elems + i2, - (src2->nelem - i2) * sizeof (int)); - id += src2->nelem - i2; - } - dest->nelem = id; - return REG_NOERROR; -} - -/* Calculate the union set of the sets DEST and SRC. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -re_node_set_merge (dest, src) - re_node_set *dest; - const re_node_set *src; -{ - int si, di; - if (src == NULL || src->nelem == 0) - return REG_NOERROR; - if (dest->alloc < src->nelem + dest->nelem) - { - int *new_buffer; - dest->alloc = 2 * (src->nelem + dest->alloc); - new_buffer = re_realloc (dest->elems, int, dest->alloc); - if (BE (new_buffer == NULL, 0)) - return REG_ESPACE; - dest->elems = new_buffer; - } - - for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) - { - int cp_from, ncp, mid, right, src_elem = src->elems[si]; - /* Binary search the spot we will add the new element. */ - right = dest->nelem; - while (di < right) - { - mid = (di + right) / 2; - if (dest->elems[mid] < src_elem) - di = mid + 1; - else - right = mid; - } - if (di >= dest->nelem) - break; - - if (dest->elems[di] == src_elem) - { - /* Skip since, DEST already has the element. */ - ++di; - ++si; - continue; - } - - /* Skip the src elements which are less than dest->elems[di]. */ - cp_from = si; - while (si < src->nelem && src->elems[si] < dest->elems[di]) - ++si; - /* Copy these src elements. */ - ncp = si - cp_from; - memmove (dest->elems + di + ncp, dest->elems + di, - sizeof (int) * (dest->nelem - di)); - memcpy (dest->elems + di, src->elems + cp_from, - sizeof (int) * ncp); - /* Update counters. */ - di += ncp; - dest->nelem += ncp; - } - - /* Copy remaining src elements. */ - if (si < src->nelem) - { - memcpy (dest->elems + di, src->elems + si, - sizeof (int) * (src->nelem - si)); - dest->nelem += src->nelem - si; - } - return REG_NOERROR; -} - -/* Insert the new element ELEM to the re_node_set* SET. - return 0 if SET already has ELEM, - return -1 if an error is occured, return 1 otherwise. */ - -static int -re_node_set_insert (set, elem) - re_node_set *set; - int elem; -{ - int idx, right, mid; - /* In case of the set is empty. */ - if (set->elems == NULL || set->alloc == 0) - { - if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) - return 1; - else - return -1; - } - - /* Binary search the spot we will add the new element. */ - idx = 0; - right = set->nelem; - while (idx < right) - { - mid = (idx + right) / 2; - if (set->elems[mid] < elem) - idx = mid + 1; - else - right = mid; - } - - /* Realloc if we need. */ - if (set->alloc < set->nelem + 1) - { - int *new_array; - set->alloc = set->alloc * 2; - new_array = re_malloc (int, set->alloc); - if (BE (new_array == NULL, 0)) - return -1; - /* Copy the elements they are followed by the new element. */ - if (idx > 0) - memcpy (new_array, set->elems, sizeof (int) * (idx)); - /* Copy the elements which follows the new element. */ - if (set->nelem - idx > 0) - memcpy (new_array + idx + 1, set->elems + idx, - sizeof (int) * (set->nelem - idx)); - re_free (set->elems); - set->elems = new_array; - } - else - { - /* Move the elements which follows the new element. */ - if (set->nelem - idx > 0) - memmove (set->elems + idx + 1, set->elems + idx, - sizeof (int) * (set->nelem - idx)); - } - /* Insert the new element. */ - set->elems[idx] = elem; - ++set->nelem; - return 1; -} - -/* Compare two node sets SET1 and SET2. - return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */ - -static int -re_node_set_compare (set1, set2) - const re_node_set *set1, *set2; -{ - int i; - if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) - return 0; - for (i = 0 ; i < set1->nelem ; i++) - if (set1->elems[i] != set2->elems[i]) - return 0; - return 1; -} - -/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ - -static int -re_node_set_contains (set, elem) - const re_node_set *set; - int elem; -{ - int idx, right, mid; - if (set->nelem <= 0) - return 0; - - /* Binary search the element. */ - idx = 0; - right = set->nelem - 1; - while (idx < right) - { - mid = (idx + right) / 2; - if (set->elems[mid] < elem) - idx = mid + 1; - else - right = mid; - } - return set->elems[idx] == elem ? idx + 1 : 0; -} - -static void -re_node_set_remove_at (set, idx) - re_node_set *set; - int idx; -{ - if (idx < 0 || idx >= set->nelem) - return; - if (idx < set->nelem - 1) - memmove (set->elems + idx, set->elems + idx + 1, - sizeof (int) * (set->nelem - idx - 1)); - --set->nelem; -} - - -/* Add the token TOKEN to dfa->nodes, and return the index of the token. - Or return -1, if an error will be occured. */ - -static int -re_dfa_add_node (dfa, token, mode) - re_dfa_t *dfa; - re_token_t token; - int mode; -{ - if (dfa->nodes_len >= dfa->nodes_alloc) - { - re_token_t *new_array; - dfa->nodes_alloc *= 2; - new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc); - if (BE (new_array == NULL, 0)) - return -1; - else - dfa->nodes = new_array; - if (mode) - { - int *new_nexts, *new_indices; - re_node_set *new_edests, *new_eclosures, *new_inveclosures; - - new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); - new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc); - new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); - new_eclosures = re_realloc (dfa->eclosures, re_node_set, - dfa->nodes_alloc); - new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, - dfa->nodes_alloc); - if (BE (new_nexts == NULL || new_indices == NULL - || new_edests == NULL || new_eclosures == NULL - || new_inveclosures == NULL, 0)) - return -1; - dfa->nexts = new_nexts; - dfa->org_indices = new_indices; - dfa->edests = new_edests; - dfa->eclosures = new_eclosures; - dfa->inveclosures = new_inveclosures; - } - } - dfa->nodes[dfa->nodes_len] = token; - dfa->nodes[dfa->nodes_len].duplicated = 0; - dfa->nodes[dfa->nodes_len].constraint = 0; - return dfa->nodes_len++; -} - -static unsigned int inline -calc_state_hash (nodes, context) - const re_node_set *nodes; - unsigned int context; -{ - unsigned int hash = nodes->nelem + context; - int i; - for (i = 0 ; i < nodes->nelem ; i++) - hash += nodes->elems[i]; - return hash; -} - -/* Search for the state whose node_set is equivalent to NODES. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t* -re_acquire_state (err, dfa, nodes) - reg_errcode_t *err; - re_dfa_t *dfa; - const re_node_set *nodes; -{ - unsigned int hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - int i; - if (BE (nodes->nelem == 0, 0)) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, 0); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (hash != state->hash) - continue; - if (re_node_set_compare (&state->nodes, nodes)) - return state; - } - - /* There are no appropriate state in the dfa, create the new one. */ - new_state = create_ci_newstate (dfa, nodes, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } -} - -/* Search for the state whose node_set is equivalent to NODES and - whose context is equivalent to CONTEXT. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t* -re_acquire_state_context (err, dfa, nodes, context) - reg_errcode_t *err; - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context; -{ - unsigned int hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - int i; - if (nodes->nelem == 0) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, context); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (hash != state->hash) - continue; - if (re_node_set_compare (state->entrance_nodes, nodes) - && state->context == context) - return state; - } - /* There are no appropriate state in `dfa', create the new one. */ - new_state = create_cd_newstate (dfa, nodes, context, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } -} - -/* Allocate memory for DFA state and initialize common properties. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -create_newstate_common (dfa, nodes, hash) - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int hash; -{ - re_dfastate_t *newstate; - reg_errcode_t err; - newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) - return NULL; - err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_free (newstate); - return NULL; - } - newstate->trtable = NULL; - newstate->trtable_search = NULL; - newstate->hash = hash; - return newstate; -} - -/* Store the new state NEWSTATE whose hash value is HASH in appropriate - position. Return value indicate the error code if failed. */ - -static reg_errcode_t -register_state (dfa, newstate, hash) - re_dfa_t *dfa; - re_dfastate_t *newstate; - unsigned int hash; -{ - struct re_state_table_entry *spot; - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - if (spot->alloc <= spot->num) - { - re_dfastate_t **new_array; - spot->alloc = 2 * spot->num + 2; - new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - spot->array = new_array; - } - spot->array[spot->num++] = newstate; - return REG_NOERROR; -} - -/* Create the new state which is independ of contexts. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -create_ci_newstate (dfa, nodes, hash) - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int hash; -{ - int i; - reg_errcode_t err; - re_dfastate_t *newstate; - newstate = create_newstate_common (dfa, nodes, hash); - if (BE (newstate == NULL, 0)) - return NULL; - newstate->entrance_nodes = &newstate->nodes; - - for (i = 0 ; i < nodes->nelem ; i++) - { - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (type == CHARACTER && !node->constraint) - continue; - - /* If the state has the halt node, the state is a halt state. */ - else if (type == END_OF_RE) - newstate->halt = 1; -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET - || (type == OP_PERIOD && MB_CUR_MAX > 1)) - newstate->accept_mb = 1; -#endif /* RE_ENABLE_I18N */ - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR || node->constraint) - newstate->has_constraint = 1; - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} - -/* Create the new state which is depend on the context CONTEXT. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -create_cd_newstate (dfa, nodes, context, hash) - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context, hash; -{ - int i, nctx_nodes = 0; - reg_errcode_t err; - re_dfastate_t *newstate; - - newstate = create_newstate_common (dfa, nodes, hash); - if (BE (newstate == NULL, 0)) - return NULL; - newstate->context = context; - newstate->entrance_nodes = &newstate->nodes; - - for (i = 0 ; i < nodes->nelem ; i++) - { - unsigned int constraint = 0; - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (node->constraint) - constraint = node->constraint; - - if (type == CHARACTER && !constraint) - continue; - /* If the state has the halt node, the state is a halt state. */ - else if (type == END_OF_RE) - newstate->halt = 1; -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET - || (type == OP_PERIOD && MB_CUR_MAX > 1)) - newstate->accept_mb = 1; -#endif /* RE_ENABLE_I18N */ - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR) - constraint = node->opr.ctx_type; - - if (constraint) - { - if (newstate->entrance_nodes == &newstate->nodes) - { - newstate->entrance_nodes = re_malloc (re_node_set, 1); - if (BE (newstate->entrance_nodes == NULL, 0)) - { - free_state (newstate); - return NULL; - } - re_node_set_init_copy (newstate->entrance_nodes, nodes); - nctx_nodes = 0; - newstate->has_constraint = 1; - } - - if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) - { - re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); - ++nctx_nodes; - } - } - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} - -static void -free_state (state) - re_dfastate_t *state; -{ - if (state->entrance_nodes != &state->nodes) - { - re_node_set_free (state->entrance_nodes); - re_free (state->entrance_nodes); - } - re_node_set_free (&state->nodes); - re_free (state->trtable); - re_free (state->trtable_search); - re_free (state); -} diff --git a/lib/regex/regex_internal.h b/lib/regex/regex_internal.h deleted file mode 100644 index bf84ad6..0000000 --- a/lib/regex/regex_internal.h +++ /dev/null @@ -1,742 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _REGEX_INTERNAL_H -#define _REGEX_INTERNAL_H 1 - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include -#include -#include - -#if defined HAVE_LOCALE_H || defined _LIBC -# include -#endif -#if defined HAVE_WCHAR_H || defined _LIBC -# include -#endif /* HAVE_WCHAR_H || _LIBC */ -#if defined HAVE_WCTYPE_H || defined _LIBC -# include -#endif /* HAVE_WCTYPE_H || _LIBC */ - -/* In case that the system doesn't have isblank(). */ -#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank -# define isblank(ch) ((ch) == ' ' || (ch) == '\t') -#endif - -#ifdef _LIBC -# ifndef _RE_DEFINE_LOCALE_FUNCTIONS -# define _RE_DEFINE_LOCALE_FUNCTIONS 1 -# include -# include -# include -# endif -#endif - -/* This is for other GNU distributions with internationalized messages. */ -#if HAVE_LIBINTL_H || defined _LIBC -# include -# ifdef _LIBC -# undef gettext -# define gettext(msgid) \ - INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES) -# endif -#else -# define gettext(msgid) (msgid) -#endif - -#ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -#endif - -#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC -# define RE_ENABLE_I18N -#endif - -#if __GNUC__ >= 3 -# define BE(expr, val) __builtin_expect (expr, val) -#else -# define BE(expr, val) (expr) -# define inline -#endif - -/* Number of bits in a byte. */ -#define BYTE_BITS 8 -/* Number of single byte character. */ -#define SBC_MAX 256 - -#define COLL_ELEM_LEN_MAX 8 - -/* The character which represents newline. */ -#define NEWLINE_CHAR '\n' -#define WIDE_NEWLINE_CHAR L'\n' - -/* Rename to standard API for using out of glibc. */ -#ifndef _LIBC -# define __wctype wctype -# define __iswctype iswctype -# define __btowc btowc -# define __mempcpy mempcpy -# define __wcrtomb wcrtomb -# define attribute_hidden -#endif /* not _LIBC */ - -extern const char __re_error_msgid[] attribute_hidden; -extern const size_t __re_error_msgid_idx[] attribute_hidden; - -/* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS) -/* Number of unsigned int in an bit_set. */ -#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) -typedef unsigned int bitset[BITSET_UINTS]; -typedef unsigned int *re_bitset_ptr_t; - -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS)) -#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_set_all(set) \ - memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_copy(dest,src) \ - memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS) -static inline void bitset_not (bitset set); -static inline void bitset_merge (bitset dest, const bitset src); -static inline void bitset_not_merge (bitset dest, const bitset src); - -#define PREV_WORD_CONSTRAINT 0x0001 -#define PREV_NOTWORD_CONSTRAINT 0x0002 -#define NEXT_WORD_CONSTRAINT 0x0004 -#define NEXT_NOTWORD_CONSTRAINT 0x0008 -#define PREV_NEWLINE_CONSTRAINT 0x0010 -#define NEXT_NEWLINE_CONSTRAINT 0x0020 -#define PREV_BEGBUF_CONSTRAINT 0x0040 -#define NEXT_ENDBUF_CONSTRAINT 0x0080 -#define DUMMY_CONSTRAINT 0x0100 - -typedef enum -{ - INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, - LINE_FIRST = PREV_NEWLINE_CONSTRAINT, - LINE_LAST = NEXT_NEWLINE_CONSTRAINT, - BUF_FIRST = PREV_BEGBUF_CONSTRAINT, - BUF_LAST = NEXT_ENDBUF_CONSTRAINT, - WORD_DELIM = DUMMY_CONSTRAINT -} re_context_type; - -typedef struct -{ - int alloc; - int nelem; - int *elems; -} re_node_set; - -typedef enum -{ - NON_TYPE = 0, - - /* Token type, these are used only by token. */ - OP_OPEN_BRACKET, - OP_CLOSE_BRACKET, - OP_CHARSET_RANGE, - OP_OPEN_DUP_NUM, - OP_CLOSE_DUP_NUM, - OP_NON_MATCH_LIST, - OP_OPEN_COLL_ELEM, - OP_CLOSE_COLL_ELEM, - OP_OPEN_EQUIV_CLASS, - OP_CLOSE_EQUIV_CLASS, - OP_OPEN_CHAR_CLASS, - OP_CLOSE_CHAR_CLASS, - OP_WORD, - OP_NOTWORD, - BACK_SLASH, - - /* Tree type, these are used only by tree. */ - CONCAT, - ALT, - SUBEXP, - SIMPLE_BRACKET, -#ifdef RE_ENABLE_I18N - COMPLEX_BRACKET, -#endif /* RE_ENABLE_I18N */ - - /* Node type, These are used by token, node, tree. */ - OP_OPEN_SUBEXP, - OP_CLOSE_SUBEXP, - OP_PERIOD, - CHARACTER, - END_OF_RE, - OP_ALT, - OP_DUP_ASTERISK, - OP_DUP_PLUS, - OP_DUP_QUESTION, - OP_BACK_REF, - ANCHOR, - - /* Dummy marker. */ - END_OF_RE_TOKEN_T -} re_token_type_t; - -#ifdef RE_ENABLE_I18N -typedef struct -{ - /* Multibyte characters. */ - wchar_t *mbchars; - - /* Collating symbols. */ -# ifdef _LIBC - int32_t *coll_syms; -# endif - - /* Equivalence classes. */ -# ifdef _LIBC - int32_t *equiv_classes; -# endif - - /* Range expressions. */ -# ifdef _LIBC - uint32_t *range_starts; - uint32_t *range_ends; -# else /* not _LIBC */ - wchar_t *range_starts; - wchar_t *range_ends; -# endif /* not _LIBC */ - - /* Character classes. */ - wctype_t *char_classes; - - /* If this character set is the non-matching list. */ - unsigned int non_match : 1; - - /* # of multibyte characters. */ - int nmbchars; - - /* # of collating symbols. */ - int ncoll_syms; - - /* # of equivalence classes. */ - int nequiv_classes; - - /* # of range expressions. */ - int nranges; - - /* # of character classes. */ - int nchar_classes; -} re_charset_t; -#endif /* RE_ENABLE_I18N */ - -typedef struct -{ - union - { - unsigned char c; /* for CHARACTER */ - re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; /* for COMPLEX_BRACKET */ -#endif /* RE_ENABLE_I18N */ - int idx; /* for BACK_REF */ - re_context_type ctx_type; /* for ANCHOR */ - } opr; -#if __GNUC__ >= 2 - re_token_type_t type : 8; -#else - re_token_type_t type; -#endif - unsigned int constraint : 10; /* context constraint */ - unsigned int duplicated : 1; -#ifdef RE_ENABLE_I18N - unsigned int mb_partial : 1; -#endif -} re_token_t; - -#define IS_EPSILON_NODE(type) \ - ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \ - || (type) == OP_DUP_QUESTION || (type) == ANCHOR \ - || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP) - -#define ACCEPT_MB_NODE(type) \ - ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) - -struct re_string_t -{ - /* Indicate the raw buffer which is the original string passed as an - argument of regexec(), re_search(), etc.. */ - const unsigned char *raw_mbs; - /* Store the multibyte string. In case of "case insensitive mode" like - REG_ICASE, upper cases of the string are stored, otherwise MBS points - the same address that RAW_MBS points. */ - unsigned char *mbs; - /* Store the case sensitive multibyte string. In case of - "case insensitive mode", the original string are stored, - otherwise MBS_CASE points the same address that MBS points. */ - unsigned char *mbs_case; -#ifdef RE_ENABLE_I18N - /* Store the wide character string which is corresponding to MBS. */ - wint_t *wcs; - mbstate_t cur_state; -#endif - /* Index in RAW_MBS. Each character mbs[i] corresponds to - raw_mbs[raw_mbs_idx + i]. */ - int raw_mbs_idx; - /* The length of the valid characters in the buffers. */ - int valid_len; - /* The length of the buffers MBS, MBS_CASE, and WCS. */ - int bufs_len; - /* The index in MBS, which is updated by re_string_fetch_byte. */ - int cur_idx; - /* This is length_of_RAW_MBS - RAW_MBS_IDX. */ - int len; - /* End of the buffer may be shorter than its length in the cases such - as re_match_2, re_search_2. Then, we use STOP for end of the buffer - instead of LEN. */ - int stop; - - /* The context of mbs[0]. We store the context independently, since - the context of mbs[0] may be different from raw_mbs[0], which is - the beginning of the input string. */ - unsigned int tip_context; - /* The translation passed as a part of an argument of re_compile_pattern. */ - RE_TRANSLATE_TYPE trans; - /* 1 if REG_ICASE. */ - unsigned int icase : 1; -}; -typedef struct re_string_t re_string_t; -/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */ -#define MBS_ALLOCATED(pstr) (pstr->icase) -/* In case that we need translation, we allocate the buffer dynamically - for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */ -#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL) - - -static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, - int len, int init_len, - RE_TRANSLATE_TYPE trans, int icase); -static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, - int len, RE_TRANSLATE_TYPE trans, - int icase); -static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, - int eflags, int newline); -static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, - int new_buf_len); -#ifdef RE_ENABLE_I18N -static void build_wcs_buffer (re_string_t *pstr); -static void build_wcs_upper_buffer (re_string_t *pstr); -#endif /* RE_ENABLE_I18N */ -static void build_upper_buffer (re_string_t *pstr); -static void re_string_translate_buffer (re_string_t *pstr); -static void re_string_destruct (re_string_t *pstr); -#ifdef RE_ENABLE_I18N -static int re_string_elem_size_at (const re_string_t *pstr, int idx); -static inline int re_string_char_size_at (const re_string_t *pstr, int idx); -static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx); -#endif /* RE_ENABLE_I18N */ -static unsigned int re_string_context_at (const re_string_t *input, int idx, - int eflags, int newline_anchor); -#define re_string_peek_byte(pstr, offset) \ - ((pstr)->mbs[(pstr)->cur_idx + offset]) -#define re_string_peek_byte_case(pstr, offset) \ - ((pstr)->mbs_case[(pstr)->cur_idx + offset]) -#define re_string_fetch_byte(pstr) \ - ((pstr)->mbs[(pstr)->cur_idx++]) -#define re_string_fetch_byte_case(pstr) \ - ((pstr)->mbs_case[(pstr)->cur_idx++]) -#define re_string_first_byte(pstr, idx) \ - ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF) -#define re_string_is_single_byte_char(pstr, idx) \ - ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \ - || (pstr)->wcs[(idx) + 1] != WEOF)) -#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) -#define re_string_cur_idx(pstr) ((pstr)->cur_idx) -#define re_string_get_buffer(pstr) ((pstr)->mbs) -#define re_string_length(pstr) ((pstr)->len) -#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) -#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) -#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) - -#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) -#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) -#define re_free(p) free (p) - -struct bin_tree_t -{ - struct bin_tree_t *parent; - struct bin_tree_t *left; - struct bin_tree_t *right; - - /* `node_idx' is the index in dfa->nodes, if `type' == 0. - Otherwise `type' indicate the type of this node. */ - re_token_type_t type; - int node_idx; - - int first; - int next; - re_node_set eclosure; -}; -typedef struct bin_tree_t bin_tree_t; - - -#define CONTEXT_WORD 1 -#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) -#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) -#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) - -#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) -#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) -#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) -#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) -#define IS_ORDINARY_CONTEXT(c) ((c) == 0) - -#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') -#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) -#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') -#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) - -#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ - ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ - || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) - -#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ - ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ - || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) - -struct re_dfastate_t -{ - unsigned int hash; - re_node_set nodes; - re_node_set *entrance_nodes; - struct re_dfastate_t **trtable; - struct re_dfastate_t **trtable_search; - /* If this state is a special state. - A state is a special state if the state is the halt state, or - a anchor. */ - unsigned int context : 2; - unsigned int halt : 1; - /* If this state can accept `multi byte'. - Note that we refer to multibyte characters, and multi character - collating elements as `multi byte'. */ - unsigned int accept_mb : 1; - /* If this state has backreference node(s). */ - unsigned int has_backref : 1; - unsigned int has_constraint : 1; -}; -typedef struct re_dfastate_t re_dfastate_t; - -typedef struct -{ - /* start <= node < end */ - int start; - int end; -} re_subexp_t; - -struct re_state_table_entry -{ - int num; - int alloc; - re_dfastate_t **array; -}; - -/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ - -typedef struct -{ - int next_idx; - int alloc; - re_dfastate_t **array; -} state_array_t; - -/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ - -typedef struct -{ - int node; - int str_idx; /* The position NODE match at. */ - state_array_t path; -} re_sub_match_last_t; - -/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. - And information about the node, whose type is OP_CLOSE_SUBEXP, - corresponding to NODE is stored in LASTS. */ - -typedef struct -{ - int str_idx; - int node; - int next_last_offset; - state_array_t *path; - int alasts; /* Allocation size of LASTS. */ - int nlasts; /* The number of LASTS. */ - re_sub_match_last_t **lasts; -} re_sub_match_top_t; - -struct re_backref_cache_entry -{ - int node; - int str_idx; - int subexp_from; - int subexp_to; - int flag; -}; - -typedef struct -{ - /* EFLAGS of the argument of regexec. */ - int eflags; - /* Where the matching ends. */ - int match_last; - int last_node; - /* The string object corresponding to the input string. */ - re_string_t *input; - /* The state log used by the matcher. */ - re_dfastate_t **state_log; - int state_log_top; - /* Back reference cache. */ - int nbkref_ents; - int abkref_ents; - struct re_backref_cache_entry *bkref_ents; - int max_mb_elem_len; - int nsub_tops; - int asub_tops; - re_sub_match_top_t **sub_tops; -} re_match_context_t; - -typedef struct -{ - int cur_bkref; - int cls_subexp_idx; - - re_dfastate_t **sifted_states; - re_dfastate_t **limited_states; - - re_node_set limits; - - int last_node; - int last_str_idx; - int check_subexp; -} re_sift_context_t; - -struct re_fail_stack_ent_t -{ - int idx; - int node; - regmatch_t *regs; - re_node_set eps_via_nodes; -}; - -struct re_fail_stack_t -{ - int num; - int alloc; - struct re_fail_stack_ent_t *stack; -}; - -struct re_dfa_t -{ - re_bitset_ptr_t word_char; - - /* number of subexpressions `re_nsub' is in regex_t. */ - int subexps_alloc; - re_subexp_t *subexps; - - re_token_t *nodes; - int nodes_alloc; - int nodes_len; - bin_tree_t *str_tree; - int *nexts; - int *org_indices; - re_node_set *edests; - re_node_set *eclosures; - re_node_set *inveclosures; - struct re_state_table_entry *state_table; - unsigned int state_hash_mask; - re_dfastate_t *init_state; - re_dfastate_t *init_state_word; - re_dfastate_t *init_state_nl; - re_dfastate_t *init_state_begbuf; - int states_alloc; - int init_node; - int nbackref; /* The number of backreference in this dfa. */ - /* Bitmap expressing which backreference is used. */ - unsigned int used_bkref_map; -#ifdef DEBUG - char* re_str; -#endif - unsigned int has_plural_match : 1; - /* If this dfa has "multibyte node", which is a backreference or - a node which can accept multibyte character or multi character - collating element. */ - unsigned int has_mb_node : 1; -}; -typedef struct re_dfa_t re_dfa_t; - -static reg_errcode_t re_node_set_alloc (re_node_set *set, int size); -static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem); -static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, - int elem2); -#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) -static reg_errcode_t re_node_set_init_copy (re_node_set *dest, - const re_node_set *src); -static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2); -static reg_errcode_t re_node_set_init_union (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2); -static reg_errcode_t re_node_set_merge (re_node_set *dest, - const re_node_set *src); -static int re_node_set_insert (re_node_set *set, int elem); -static int re_node_set_compare (const re_node_set *set1, - const re_node_set *set2); -static int re_node_set_contains (const re_node_set *set, int elem); -static void re_node_set_remove_at (re_node_set *set, int idx); -#define re_node_set_remove(set,id) \ - (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) -#define re_node_set_empty(p) ((p)->nelem = 0) -#define re_node_set_free(set) re_free ((set)->elems) -static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); -static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, - const re_node_set *nodes); -static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, - re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context); -static void free_state (re_dfastate_t *state); - - -typedef enum -{ - SB_CHAR, - MB_CHAR, - EQUIV_CLASS, - COLL_SYM, - CHAR_CLASS -} bracket_elem_type; - -typedef struct -{ - bracket_elem_type type; - union - { - unsigned char ch; - unsigned char *name; - wchar_t wch; - } opr; -} bracket_elem_t; - - -/* Inline functions for bitset operation. */ -static inline void -bitset_not (set) - bitset set; -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - set[bitset_i] = ~set[bitset_i]; -} - -static inline void -bitset_merge (dest, src) - bitset dest; - const bitset src; -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - dest[bitset_i] |= src[bitset_i]; -} - -static inline void -bitset_not_merge (dest, src) - bitset dest; - const bitset src; -{ - int i; - for (i = 0; i < BITSET_UINTS; ++i) - dest[i] |= ~src[i]; -} - -#ifdef RE_ENABLE_I18N -/* Inline functions for re_string. */ -static inline int -re_string_char_size_at (pstr, idx) - const re_string_t *pstr; - int idx; -{ - int byte_idx; - if (MB_CUR_MAX == 1) - return 1; - for (byte_idx = 1; idx + byte_idx < pstr->len; ++byte_idx) - if (pstr->wcs[idx + byte_idx] != WEOF) - break; - return byte_idx; -} - -static inline wint_t -re_string_wchar_at (pstr, idx) - const re_string_t *pstr; - int idx; -{ - if (MB_CUR_MAX == 1) - return (wint_t) pstr->mbs[idx]; - return (wint_t) pstr->wcs[idx]; -} - -static int -re_string_elem_size_at (pstr, idx) - const re_string_t *pstr; - int idx; -{ -#ifdef _LIBC - const unsigned char *p, *extra; - const int32_t *table, *indirect; - int32_t tmp; -# include - uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - - if (nrules != 0) - { - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - p = pstr->mbs + idx; - tmp = findidx (&p); - return p - pstr->mbs - idx; - } - else -#endif /* _LIBC */ - return 1; -} -#endif /* RE_ENABLE_I18N */ - -#endif /* _REGEX_INTERNAL_H */ diff --git a/lib/regex/regexec.c b/lib/regex/regexec.c deleted file mode 100644 index 6ea14a6..0000000 --- a/lib/regex/regexec.c +++ /dev/null @@ -1,3977 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, - re_string_t *input, int n); -static void match_ctx_clean (re_match_context_t *mctx); -static void match_ctx_free (re_match_context_t *cache); -static void match_ctx_free_subtops (re_match_context_t *mctx); -static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, - int str_idx, int from, int to); -static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx); -static void match_ctx_clear_flag (re_match_context_t *mctx); -static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, - int str_idx); -static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, - int node, int str_idx); -static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, int last_node, - int last_str_idx, int check_subexp); -static reg_errcode_t re_search_internal (const regex_t *preg, - const char *string, int length, - int start, int range, int stop, - size_t nmatch, regmatch_t pmatch[], - int eflags); -static int re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, int length1, - const char *string2, int length2, - int start, int range, struct re_registers *regs, - int stop, int ret_len); -static int re_search_stub (struct re_pattern_buffer *bufp, - const char *string, int length, int start, - int range, int stop, struct re_registers *regs, - int ret_len); -static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - int nregs, int regs_allocated); -static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err, - const regex_t *preg, - const re_match_context_t *mctx, - int idx); -static reg_errcode_t prune_impossible_nodes (const regex_t *preg, - re_match_context_t *mctx); -static int check_matching (const regex_t *preg, re_match_context_t *mctx, - int fl_search, int fl_longest_match); -static int check_halt_node_context (const re_dfa_t *dfa, int node, - unsigned int context); -static int check_halt_state_context (const regex_t *preg, - const re_dfastate_t *state, - const re_match_context_t *mctx, int idx); -static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node, - int cur_idx, int nmatch); -static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs, - const re_match_context_t *mctx, - int *pidx, int node, re_node_set *eps_via_nodes, - struct re_fail_stack_t *fs); -static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, - int str_idx, int *dests, int nregs, - regmatch_t *regs, - re_node_set *eps_via_nodes); -static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, - regmatch_t *regs, re_node_set *eps_via_nodes); -static reg_errcode_t set_regs (const regex_t *preg, - const re_match_context_t *mctx, - size_t nmatch, regmatch_t *pmatch, - int fl_backtrack); -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); - -#ifdef RE_ENABLE_I18N -static int sift_states_iter_mb (const regex_t *preg, - const re_match_context_t *mctx, - re_sift_context_t *sctx, - int node_idx, int str_idx, int max_str_idx); -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t sift_states_backward (const regex_t *preg, - re_match_context_t *mctx, - re_sift_context_t *sctx); -static reg_errcode_t update_cur_sifted_state (const regex_t *preg, - re_match_context_t *mctx, - re_sift_context_t *sctx, - int str_idx, - re_node_set *dest_nodes); -static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates); -static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, - re_node_set *dest_nodes, - const re_node_set *and_nodes); -static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits, - re_match_context_t *mctx, int dst_node, - int dst_idx, int src_node, int src_idx); -static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx, - int limit, re_node_set *eclosures, - int subexp_idx, int node, int str_idx); -static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates, - re_node_set *limits, - struct re_backref_cache_entry *bkref_ents, - int str_idx); -static reg_errcode_t sift_states_bkref (const regex_t *preg, - re_match_context_t *mctx, - re_sift_context_t *sctx, - int str_idx, re_node_set *dest_nodes); -static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx, - int next_state_log_idx); -static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, int num); -static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg, - re_match_context_t *mctx, - re_dfastate_t *state, int fl_search); -static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa, - re_match_context_t *mctx, - re_node_set *cur_nodes, - int str_idx); -static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg, - re_dfastate_t *pstate, - int fl_search, - re_match_context_t *mctx); -#ifdef RE_ENABLE_I18N -static reg_errcode_t transit_state_mb (const regex_t *preg, - re_dfastate_t *pstate, - re_match_context_t *mctx); -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t transit_state_bkref (const regex_t *preg, - re_node_set *nodes, - re_match_context_t *mctx); -static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx, - int bkref_node, int bkref_str_idx); -static reg_errcode_t get_subexp_sub (const regex_t *preg, - re_match_context_t *mctx, - re_sub_match_top_t *sub_top, - re_sub_match_last_t *sub_last, - int bkref_node, int bkref_str); -static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes, - int subexp_idx, int fl_open); -static reg_errcode_t check_arrival (const regex_t *preg, - re_match_context_t *mctx, - state_array_t *path, int top_node, - int top_str, int last_node, int last_str, - int fl_open); -static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg, - re_dfa_t *dfa, - re_match_context_t *mctx, - int str_idx, - re_node_set *cur_nodes, - re_node_set *next_nodes); -static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa, - re_node_set *cur_nodes, - int ex_subexp, int fl_open); -static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa, - re_node_set *dst_nodes, - int target, int ex_subexp, - int fl_open); -static reg_errcode_t expand_bkref_cache (const regex_t *preg, - re_match_context_t *mctx, - re_node_set *cur_nodes, int cur_str, - int last_str, int subexp_num, - int fl_open); -static re_dfastate_t **build_trtable (const regex_t *dfa, - const re_dfastate_t *state, - int fl_search); -#ifdef RE_ENABLE_I18N -static int check_node_accept_bytes (const regex_t *preg, int node_idx, - const re_string_t *input, int idx); -# ifdef _LIBC -static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len); -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ -static int group_nodes_into_DFAstates (const regex_t *dfa, - const re_dfastate_t *state, - re_node_set *states_node, - bitset *states_ch); -static int check_node_accept (const regex_t *preg, const re_token_t *node, - const re_match_context_t *mctx, int idx); -static reg_errcode_t extend_buffers (re_match_context_t *mctx); - -/* Entry point for POSIX code. */ - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *__restrict preg; - const char *__restrict string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - reg_errcode_t err; - int length = strlen (string); - if (preg->no_sub) - err = re_search_internal (preg, string, length, 0, length, length, 0, - NULL, eflags); - else - err = re_search_internal (preg, string, length, 0, length, length, nmatch, - pmatch, eflags); - return err != REG_NOERROR; -} -#ifdef _LIBC -weak_alias (__regexec, regexec) -#endif - -/* Entry points for GNU code. */ - -/* re_match, re_search, re_match_2, re_search_2 - - The former two functions operate on STRING with length LENGTH, - while the later two operate on concatenation of STRING1 and STRING2 - with lengths LENGTH1 and LENGTH2, respectively. - - re_match() matches the compiled pattern in BUFP against the string, - starting at index START. - - re_search() first tries matching at index START, then it tries to match - starting from index START + 1, and so on. The last start position tried - is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same - way as re_match().) - - The parameter STOP of re_{match,search}_2 specifies that no match exceeding - the first STOP characters of the concatenation of the strings should be - concerned. - - If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match - and all groups is stroed in REGS. (For the "_2" variants, the offsets are - computed relative to the concatenation, not relative to the individual - strings.) - - On success, re_match* functions return the length of the match, re_search* - return the position of the start of the match. Return value -1 means no - match was found and -2 indicates an internal error. */ - -int -re_match (bufp, string, length, start, regs) - struct re_pattern_buffer *bufp; - const char *string; - int length, start; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, 0, length, regs, 1); -} -#ifdef _LIBC -weak_alias (__re_match, re_match) -#endif - -int -re_search (bufp, string, length, start, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int length, start, range; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, range, length, regs, 0); -} -#ifdef _LIBC -weak_alias (__re_search, re_search) -#endif - -int -re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, stop; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, 0, regs, stop, 1); -} -#ifdef _LIBC -weak_alias (__re_match_2, re_match_2) -#endif - -int -re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, range, stop; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, range, regs, stop, 0); -} -#ifdef _LIBC -weak_alias (__re_search_2, re_search_2) -#endif - -static int -re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, - stop, ret_len) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, range, stop, ret_len; - struct re_registers *regs; -{ - const char *str; - int rval; - int len = length1 + length2; - int free_str = 0; - - if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) - return -2; - - /* Concatenate the strings. */ - if (length2 > 0) - if (length1 > 0) - { - char *s = re_malloc (char, len); - - if (BE (s == NULL, 0)) - return -2; - memcpy (s, string1, length1); - memcpy (s + length1, string2, length2); - str = s; - free_str = 1; - } - else - str = string2; - else - str = string1; - - rval = re_search_stub (bufp, str, len, start, range, stop, regs, - ret_len); - if (free_str) - re_free ((char *) str); - return rval; -} - -/* The parameters have the same meaning as those of re_search. - Additional parameters: - If RET_LEN is nonzero the length of the match is returned (re_match style); - otherwise the position of the match is returned. */ - -static int -re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) - struct re_pattern_buffer *bufp; - const char *string; - int length, start, range, stop, ret_len; - struct re_registers *regs; -{ - reg_errcode_t result; - regmatch_t *pmatch; - int nregs, rval; - int eflags = 0; - - /* Check for out-of-range. */ - if (BE (start < 0 || start > length, 0)) - return -1; - if (BE (start + range > length, 0)) - range = length - start; - else if (BE (start + range < 0, 0)) - range = -start; - - eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; - eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; - - /* Compile fastmap if we haven't yet. */ - if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) - re_compile_fastmap (bufp); - - if (BE (bufp->no_sub, 0)) - regs = NULL; - - /* We need at least 1 register. */ - if (regs == NULL) - nregs = 1; - else if (BE (bufp->regs_allocated == REGS_FIXED && - regs->num_regs < bufp->re_nsub + 1, 0)) - { - nregs = regs->num_regs; - if (BE (nregs < 1, 0)) - { - /* Nothing can be copied to regs. */ - regs = NULL; - nregs = 1; - } - } - else - nregs = bufp->re_nsub + 1; - pmatch = re_malloc (regmatch_t, nregs); - if (BE (pmatch == NULL, 0)) - return -2; - - result = re_search_internal (bufp, string, length, start, range, stop, - nregs, pmatch, eflags); - - rval = 0; - - /* I hope we needn't fill ther regs with -1's when no match was found. */ - if (result != REG_NOERROR) - rval = -1; - else if (regs != NULL) - { - /* If caller wants register contents data back, copy them. */ - bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, - bufp->regs_allocated); - if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) - rval = -2; - } - - if (BE (rval == 0, 1)) - { - if (ret_len) - { - assert (pmatch[0].rm_so == start); - rval = pmatch[0].rm_eo - start; - } - else - rval = pmatch[0].rm_so; - } - re_free (pmatch); - return rval; -} - -static unsigned -re_copy_regs (regs, pmatch, nregs, regs_allocated) - struct re_registers *regs; - regmatch_t *pmatch; - int nregs, regs_allocated; -{ - int rval = REGS_REALLOCATE; - int i; - int need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code - uses. */ - - /* Have the register data arrays been allocated? */ - if (regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. */ - regs->start = re_malloc (regoff_t, need_regs); - if (BE (regs->start == NULL, 0)) - return REGS_UNALLOCATED; - regs->end = re_malloc (regoff_t, need_regs); - if (BE (regs->end == NULL, 0)) - { - re_free (regs->start); - return REGS_UNALLOCATED; - } - regs->num_regs = need_regs; - } - else if (regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (need_regs > regs->num_regs) - { - regs->start = re_realloc (regs->start, regoff_t, need_regs); - if (BE (regs->start == NULL, 0)) - { - if (regs->end != NULL) - re_free (regs->end); - return REGS_UNALLOCATED; - } - regs->end = re_realloc (regs->end, regoff_t, need_regs); - if (BE (regs->end == NULL, 0)) - { - re_free (regs->start); - return REGS_UNALLOCATED; - } - regs->num_regs = need_regs; - } - } - else - { - assert (regs_allocated == REGS_FIXED); - /* This function may not be called with REGS_FIXED and nregs too big. */ - assert (regs->num_regs >= nregs); - rval = REGS_FIXED; - } - - /* Copy the regs. */ - for (i = 0; i < nregs; ++i) - { - regs->start[i] = pmatch[i].rm_so; - regs->end[i] = pmatch[i].rm_eo; - } - for ( ; i < regs->num_regs; ++i) - regs->start[i] = regs->end[i] = -1; - - return rval; -} - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} -#ifdef _LIBC -weak_alias (__re_set_registers, re_set_registers) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC -int -# ifdef _LIBC -weak_function -# endif -re_exec (s) - const char *s; -{ - return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); -} -#endif /* _REGEX_RE_COMP */ - -static re_node_set empty_set; - -/* Internal entry point. */ - -/* Searches for a compiled pattern PREG in the string STRING, whose - length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - mingings with regexec. START, and RANGE have the same meanings - with re_search. - Return REG_NOERROR if we find a match, and REG_NOMATCH if not, - otherwise return the error code. - Note: We assume front end functions already check ranges. - (START + RANGE >= 0 && START + RANGE <= LENGTH) */ - -static reg_errcode_t -re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, - eflags) - const regex_t *preg; - const char *string; - int length, start, range, stop, eflags; - size_t nmatch; - regmatch_t pmatch[]; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - re_string_t input; - int left_lim, right_lim, incr; - int fl_longest_match, match_first, match_last = -1; - int fast_translate, sb; - re_match_context_t mctx; - char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate - && range && !preg->can_be_null) ? preg->fastmap : NULL); - - /* Check if the DFA haven't been compiled. */ - if (BE (preg->used == 0 || dfa->init_state == NULL - || dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return REG_NOMATCH; - - re_node_set_init_empty (&empty_set); - memset (&mctx, '\0', sizeof (re_match_context_t)); - - /* We must check the longest matching, if nmatch > 0. */ - fl_longest_match = (nmatch != 0 || dfa->nbackref); - - err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - input.stop = stop; - - err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* We will log all the DFA states through which the dfa pass, - if nmatch > 1, or this dfa has "multibyte node", which is a - back-reference or a node which can accept multibyte character or - multi character collating element. */ - if (nmatch > 1 || dfa->has_mb_node) - { - mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1); - if (BE (mctx.state_log == NULL, 0)) - { - err = REG_ESPACE; - goto free_return; - } - } - else - mctx.state_log = NULL; - -#ifdef DEBUG - /* We assume front-end functions already check them. */ - assert (start + range >= 0 && start + range <= length); -#endif - - match_first = start; - input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF); - - /* Check incrementally whether of not the input string match. */ - incr = (range < 0) ? -1 : 1; - left_lim = (range < 0) ? start + range : start; - right_lim = (range < 0) ? start : start + range; - sb = MB_CUR_MAX == 1; - fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate); - - for (;;) - { - /* At first get the current byte from input string. */ - if (fastmap) - { - if (BE (fast_translate, 1)) - { - unsigned RE_TRANSLATE_TYPE t - = (unsigned RE_TRANSLATE_TYPE) preg->translate; - if (BE (range >= 0, 1)) - { - if (BE (t != NULL, 0)) - { - while (BE (match_first < right_lim, 1) - && !fastmap[t[(unsigned char) string[match_first]]]) - ++match_first; - } - else - { - while (BE (match_first < right_lim, 1) - && !fastmap[(unsigned char) string[match_first]]) - ++match_first; - } - if (BE (match_first == right_lim, 0)) - { - int ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (!fastmap[t ? t[ch] : ch]) - break; - } - } - else - { - while (match_first >= left_lim) - { - int ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (fastmap[t ? t[ch] : ch]) - break; - --match_first; - } - if (match_first < left_lim) - break; - } - } - else - { - int ch; - - do - { - /* In this case, we can't determine easily the current byte, - since it might be a component byte of a multibyte - character. Then we use the constructed buffer - instead. */ - /* If MATCH_FIRST is out of the valid range, reconstruct the - buffers. */ - if (input.raw_mbs_idx + input.valid_len <= match_first - || match_first < input.raw_mbs_idx) - { - err = re_string_reconstruct (&input, match_first, eflags, - preg->newline_anchor); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - /* If MATCH_FIRST is out of the buffer, leave it as '\0'. - Note that MATCH_FIRST must not be smaller than 0. */ - ch = ((match_first >= length) ? 0 - : re_string_byte_at (&input, - match_first - input.raw_mbs_idx)); - if (fastmap[ch]) - break; - match_first += incr; - } - while (match_first >= left_lim && match_first <= right_lim); - if (! fastmap[ch]) - break; - } - } - - /* Reconstruct the buffers so that the matcher can assume that - the matching starts from the begining of the buffer. */ - err = re_string_reconstruct (&input, match_first, eflags, - preg->newline_anchor); - if (BE (err != REG_NOERROR, 0)) - goto free_return; -#ifdef RE_ENABLE_I18N - /* Eliminate it when it is a component of a multibyte character - and isn't the head of a multibyte character. */ - if (sb || re_string_first_byte (&input, 0)) -#endif - { - /* It seems to be appropriate one, then use the matcher. */ - /* We assume that the matching starts from 0. */ - mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; - match_last = check_matching (preg, &mctx, 0, fl_longest_match); - if (match_last != -1) - { - if (BE (match_last == -2, 0)) - { - err = REG_ESPACE; - goto free_return; - } - else - { - mctx.match_last = match_last; - if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) - { - re_dfastate_t *pstate = mctx.state_log[match_last]; - mctx.last_node = check_halt_state_context (preg, pstate, - &mctx, match_last); - } - if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) - || dfa->nbackref) - { - err = prune_impossible_nodes (preg, &mctx); - if (err == REG_NOERROR) - break; - if (BE (err != REG_NOMATCH, 0)) - goto free_return; - } - else - break; /* We found a matching. */ - } - } - match_ctx_clean (&mctx); - } - /* Update counter. */ - match_first += incr; - if (match_first < left_lim || right_lim < match_first) - break; - } - - /* Set pmatch[] if we need. */ - if (match_last != -1 && nmatch > 0) - { - int reg_idx; - - /* Initialize registers. */ - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; - - /* Set the points where matching start/end. */ - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = mctx.match_last; - - if (!preg->no_sub && nmatch > 1) - { - err = set_regs (preg, &mctx, nmatch, pmatch, - dfa->has_plural_match && dfa->nbackref > 0); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - - /* At last, add the offset to the each registers, since we slided - the buffers so that We can assume that the matching starts from 0. */ - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so != -1) - { - pmatch[reg_idx].rm_so += match_first; - pmatch[reg_idx].rm_eo += match_first; - } - } - err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR; - free_return: - re_free (mctx.state_log); - if (dfa->nbackref) - match_ctx_free (&mctx); - re_string_destruct (&input); - return err; -} - -static reg_errcode_t -prune_impossible_nodes (preg, mctx) - const regex_t *preg; - re_match_context_t *mctx; -{ - int halt_node, match_last; - reg_errcode_t ret; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - re_dfastate_t **sifted_states; - re_dfastate_t **lim_states = NULL; - re_sift_context_t sctx; -#ifdef DEBUG - assert (mctx->state_log != NULL); -#endif - match_last = mctx->match_last; - halt_node = mctx->last_node; - sifted_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (sifted_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - if (dfa->nbackref) - { - lim_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (lim_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - while (1) - { - memset (lim_states, '\0', - sizeof (re_dfastate_t *) * (match_last + 1)); - match_ctx_clear_flag (mctx); - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, - match_last, 0); - ret = sift_states_backward (preg, mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - if (sifted_states[0] != NULL || lim_states[0] != NULL) - break; - do - { - --match_last; - if (match_last < 0) - { - ret = REG_NOMATCH; - goto free_return; - } - } while (!mctx->state_log[match_last]->halt); - halt_node = check_halt_state_context (preg, - mctx->state_log[match_last], - mctx, match_last); - } - ret = merge_state_array (dfa, sifted_states, lim_states, - match_last + 1); - re_free (lim_states); - lim_states = NULL; - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - else - { - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, - match_last, 0); - ret = sift_states_backward (preg, mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - re_free (mctx->state_log); - mctx->state_log = sifted_states; - sifted_states = NULL; - mctx->last_node = halt_node; - mctx->match_last = match_last; - ret = REG_NOERROR; - free_return: - re_free (sifted_states); - re_free (lim_states); - return ret; -} - -/* Acquire an initial state and return it. - We must select appropriate initial state depending on the context, - since initial states may have constraints like "\<", "^", etc.. */ - -static inline re_dfastate_t * -acquire_init_state_context (err, preg, mctx, idx) - reg_errcode_t *err; - const regex_t *preg; - const re_match_context_t *mctx; - int idx; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - - *err = REG_NOERROR; - if (dfa->init_state->has_constraint) - { - unsigned int context; - context = re_string_context_at (mctx->input, idx - 1, mctx->eflags, - preg->newline_anchor); - if (IS_WORD_CONTEXT (context)) - return dfa->init_state_word; - else if (IS_ORDINARY_CONTEXT (context)) - return dfa->init_state; - else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_begbuf; - else if (IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_nl; - else if (IS_BEGBUF_CONTEXT (context)) - { - /* It is relatively rare case, then calculate on demand. */ - return re_acquire_state_context (err, dfa, - dfa->init_state->entrance_nodes, - context); - } - else - /* Must not happen? */ - return dfa->init_state; - } - else - return dfa->init_state; -} - -/* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end, return -1 if not match, - or return -2 in case of an error. - FL_SEARCH means we must search where the matching starts, - FL_LONGEST_MATCH means we want the POSIX longest matching. - Note that the matcher assume that the maching starts from the current - index of the buffer. */ - -static int -check_matching (preg, mctx, fl_search, fl_longest_match) - const regex_t *preg; - re_match_context_t *mctx; - int fl_search, fl_longest_match; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - reg_errcode_t err; - int match = 0; - int match_last = -1; - int cur_str_idx = re_string_cur_idx (mctx->input); - re_dfastate_t *cur_state; - - cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx); - /* An initial state must not be NULL(invalid state). */ - if (BE (cur_state == NULL, 0)) - return -2; - if (mctx->state_log != NULL) - mctx->state_log[cur_str_idx] = cur_state; - - /* Check OP_OPEN_SUBEXP in the initial state in case that we use them - later. E.g. Processing back references. */ - if (dfa->nbackref) - { - err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (cur_state->has_backref) - { - err = transit_state_bkref (preg, &cur_state->nodes, mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* If the RE accepts NULL string. */ - if (cur_state->halt) - { - if (!cur_state->has_constraint - || check_halt_state_context (preg, cur_state, mctx, cur_str_idx)) - { - if (!fl_longest_match) - return cur_str_idx; - else - { - match_last = cur_str_idx; - match = 1; - } - } - } - - while (!re_string_eoi (mctx->input)) - { - cur_state = transit_state (&err, preg, mctx, cur_state, - fl_search && !match); - if (cur_state == NULL) /* Reached at the invalid state or an error. */ - { - cur_str_idx = re_string_cur_idx (mctx->input); - if (BE (err != REG_NOERROR, 0)) - return -2; - if (fl_search && !match) - { - /* Restart from initial state, since we are searching - the point from where matching start. */ -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX == 1 - || re_string_first_byte (mctx->input, cur_str_idx)) -#endif /* RE_ENABLE_I18N */ - cur_state = acquire_init_state_context (&err, preg, mctx, - cur_str_idx); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - return -2; - if (mctx->state_log != NULL) - mctx->state_log[cur_str_idx] = cur_state; - } - else if (!fl_longest_match && match) - break; - else /* (fl_longest_match && match) || (!fl_search && !match) */ - { - if (mctx->state_log == NULL) - break; - else - { - int max = mctx->state_log_top; - for (; cur_str_idx <= max; ++cur_str_idx) - if (mctx->state_log[cur_str_idx] != NULL) - break; - if (cur_str_idx > max) - break; - } - } - } - - if (cur_state != NULL && cur_state->halt) - { - /* Reached at a halt state. - Check the halt state can satisfy the current context. */ - if (!cur_state->has_constraint - || check_halt_state_context (preg, cur_state, mctx, - re_string_cur_idx (mctx->input))) - { - /* We found an appropriate halt state. */ - match_last = re_string_cur_idx (mctx->input); - match = 1; - if (!fl_longest_match) - break; - } - } - } - return match_last; -} - -/* Check NODE match the current context. */ - -static int check_halt_node_context (dfa, node, context) - const re_dfa_t *dfa; - int node; - unsigned int context; -{ - re_token_type_t type = dfa->nodes[node].type; - unsigned int constraint = dfa->nodes[node].constraint; - if (type != END_OF_RE) - return 0; - if (!constraint) - return 1; - if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) - return 0; - return 1; -} - -/* Check the halt state STATE match the current context. - Return 0 if not match, if the node, STATE has, is a halt node and - match the context, return the node. */ - -static int -check_halt_state_context (preg, state, mctx, idx) - const regex_t *preg; - const re_dfastate_t *state; - const re_match_context_t *mctx; - int idx; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int i; - unsigned int context; -#ifdef DEBUG - assert (state->halt); -#endif - context = re_string_context_at (mctx->input, idx, mctx->eflags, - preg->newline_anchor); - for (i = 0; i < state->nodes.nelem; ++i) - if (check_halt_node_context (dfa, state->nodes.elems[i], context)) - return state->nodes.elems[i]; - return 0; -} - -/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA - corresponding to the DFA). - Return the destination node, and update EPS_VIA_NODES, return -1 in case - of errors. */ - -static int -proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs) - const regex_t *preg; - regmatch_t *regs; - const re_match_context_t *mctx; - int nregs, *pidx, node; - re_node_set *eps_via_nodes; - struct re_fail_stack_t *fs; -{ - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int i, err, dest_node; - dest_node = -1; - if (IS_EPSILON_NODE (dfa->nodes[node].type)) - { - re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; - int ndest, dest_nodes[2]; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) - return -1; - /* Pick up valid destinations. */ - for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) - { - int candidate = dfa->edests[node].elems[i]; - if (!re_node_set_contains (cur_nodes, candidate)) - continue; - dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; - dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; - ++ndest; - } - if (ndest <= 1) - return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); - /* In order to avoid infinite loop like "(a*)*". */ - if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) - return dest_nodes[1]; - if (fs != NULL) - push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes); - return dest_nodes[0]; - } - else - { - int naccepted = 0; - re_token_type_t type = dfa->nodes[node].type; - -#ifdef RE_ENABLE_I18N - if (ACCEPT_MB_NODE (type)) - naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx); - else -#endif /* RE_ENABLE_I18N */ - if (type == OP_BACK_REF) - { - int subexp_idx = dfa->nodes[node].opr.idx; - naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; - if (fs != NULL) - { - if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) - return -1; - else if (naccepted) - { - char *buf = (char *) re_string_get_buffer (mctx->input); - if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, - naccepted) != 0) - return -1; - } - } - - if (naccepted == 0) - { - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) - return -2; - dest_node = dfa->edests[node].elems[0]; - if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node)) - return dest_node; - } - } - - if (naccepted != 0 - || check_node_accept (preg, dfa->nodes + node, mctx, *pidx)) - { - dest_node = dfa->nexts[node]; - *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; - if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL - || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node))) - return -1; - re_node_set_empty (eps_via_nodes); - return dest_node; - } - } - return -1; -} - -static reg_errcode_t -push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int str_idx, *dests, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; -{ - reg_errcode_t err; - int num = fs->num++; - if (fs->num == fs->alloc) - { - struct re_fail_stack_ent_t *new_array; - fs->alloc *= 2; - new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) - * fs->alloc)); - if (new_array == NULL) - return REG_ESPACE; - fs->stack = new_array; - } - fs->stack[num].idx = str_idx; - fs->stack[num].node = dests[1]; - fs->stack[num].regs = re_malloc (regmatch_t, nregs); - memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); - err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); - return err; -} - -static int -pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int *pidx, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; -{ - int num = --fs->num; - assert (num >= 0); - *pidx = fs->stack[num].idx; - memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); - re_node_set_free (eps_via_nodes); - re_free (fs->stack[num].regs); - *eps_via_nodes = fs->stack[num].eps_via_nodes; - return fs->stack[num].node; -} - -/* Set the positions where the subexpressions are starts/ends to registers - PMATCH. - Note: We assume that pmatch[0] is already set, and - pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */ - -static reg_errcode_t -set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) - const regex_t *preg; - const re_match_context_t *mctx; - size_t nmatch; - regmatch_t *pmatch; - int fl_backtrack; -{ - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int idx, cur_node, real_nmatch; - re_node_set eps_via_nodes; - struct re_fail_stack_t *fs; - struct re_fail_stack_t fs_body = {0, 2, NULL}; -#ifdef DEBUG - assert (nmatch > 1); - assert (mctx->state_log != NULL); -#endif - if (fl_backtrack) - { - fs = &fs_body; - fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); - } - else - fs = NULL; - cur_node = dfa->init_node; - real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1; - re_node_set_init_empty (&eps_via_nodes); - for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) - { - update_regs (dfa, pmatch, cur_node, idx, real_nmatch); - if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) - { - int reg_idx; - if (fs) - { - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) - break; - if (reg_idx == nmatch) - { - re_node_set_free (&eps_via_nodes); - return free_fail_stack_return (fs); - } - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - } - else - { - re_node_set_free (&eps_via_nodes); - return REG_NOERROR; - } - } - - /* Proceed to next node. */ - cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node, - &eps_via_nodes, fs); - - if (BE (cur_node < 0, 0)) - { - if (cur_node == -2) - return REG_ESPACE; - if (fs) - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - else - { - re_node_set_free (&eps_via_nodes); - return REG_NOMATCH; - } - } - } - re_node_set_free (&eps_via_nodes); - return free_fail_stack_return (fs); -} - -static reg_errcode_t -free_fail_stack_return (fs) - struct re_fail_stack_t *fs; -{ - if (fs) - { - int fs_idx; - for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) - { - re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); - re_free (fs->stack[fs_idx].regs); - } - re_free (fs->stack); - } - return REG_NOERROR; -} - -static void -update_regs (dfa, pmatch, cur_node, cur_idx, nmatch) - re_dfa_t *dfa; - regmatch_t *pmatch; - int cur_node, cur_idx, nmatch; -{ - int type = dfa->nodes[cur_node].type; - int reg_num; - if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP) - return; - reg_num = dfa->nodes[cur_node].opr.idx + 1; - if (reg_num >= nmatch) - return; - if (type == OP_OPEN_SUBEXP) - { - /* We are at the first node of this sub expression. */ - pmatch[reg_num].rm_so = cur_idx; - pmatch[reg_num].rm_eo = -1; - } - else if (type == OP_CLOSE_SUBEXP) - /* We are at the first node of this sub expression. */ - pmatch[reg_num].rm_eo = cur_idx; -} - -#define NUMBER_OF_STATE 1 - -/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 - and sift the nodes in each states according to the following rules. - Updated state_log will be wrote to STATE_LOG. - - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... - 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': - i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - throwed away, we throw away the node `a'. - 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b': - i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, - we throw away the node `a'. */ - -#define STATE_NODE_CONTAINS(state,node) \ - ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) - -static reg_errcode_t -sift_states_backward (preg, mctx, sctx) - const regex_t *preg; - re_match_context_t *mctx; - re_sift_context_t *sctx; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int null_cnt = 0; - int str_idx = sctx->last_str_idx; - re_node_set cur_dest; - re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */ - -#ifdef DEBUG - assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); -#endif - cur_src = &mctx->state_log[str_idx]->nodes; - - /* Build sifted state_log[str_idx]. It has the nodes which can epsilon - transit to the last_node and the last_node itself. */ - err = re_node_set_init_1 (&cur_dest, sctx->last_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* Then check each states in the state_log. */ - while (str_idx > 0) - { - int i, ret; - /* Update counters. */ - null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; - if (null_cnt > mctx->max_mb_elem_len) - { - memset (sctx->sifted_states, '\0', - sizeof (re_dfastate_t *) * str_idx); - re_node_set_free (&cur_dest); - return REG_NOERROR; - } - re_node_set_empty (&cur_dest); - --str_idx; - cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set - : &mctx->state_log[str_idx]->nodes); - - /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. - Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]'. */ - for (i = 0; i < cur_src->nelem; i++) - { - int prev_node = cur_src->elems[i]; - int naccepted = 0; - re_token_type_t type = dfa->nodes[prev_node].type; - - if (IS_EPSILON_NODE(type)) - continue; -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (ACCEPT_MB_NODE (type)) - naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node, - str_idx, sctx->last_str_idx); - -#endif /* RE_ENABLE_I18N */ - /* We don't check backreferences here. - See update_cur_sifted_state(). */ - - if (!naccepted - && check_node_accept (preg, dfa->nodes + prev_node, mctx, - str_idx) - && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], - dfa->nexts[prev_node])) - naccepted = 1; - - if (naccepted == 0) - continue; - - if (sctx->limits.nelem) - { - int to_idx = str_idx + naccepted; - if (check_dst_limits (dfa, &sctx->limits, mctx, - dfa->nexts[prev_node], to_idx, - prev_node, str_idx)) - continue; - } - ret = re_node_set_insert (&cur_dest, prev_node); - if (BE (ret == -1, 0)) - { - err = REG_ESPACE; - goto free_return; - } - } - - /* Add all the nodes which satisfy the following conditions: - - It can epsilon transit to a node in CUR_DEST. - - It is in CUR_SRC. - And update state_log. */ - err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - err = REG_NOERROR; - free_return: - re_node_set_free (&cur_dest); - return err; -} - -/* Helper functions. */ - -static inline reg_errcode_t -clean_state_log_if_need (mctx, next_state_log_idx) - re_match_context_t *mctx; - int next_state_log_idx; -{ - int top = mctx->state_log_top; - - if (next_state_log_idx >= mctx->input->bufs_len - || (next_state_log_idx >= mctx->input->valid_len - && mctx->input->valid_len < mctx->input->len)) - { - reg_errcode_t err; - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (top < next_state_log_idx) - { - memset (mctx->state_log + top + 1, '\0', - sizeof (re_dfastate_t *) * (next_state_log_idx - top)); - mctx->state_log_top = next_state_log_idx; - } - return REG_NOERROR; -} - -static reg_errcode_t -merge_state_array (dfa, dst, src, num) - re_dfa_t *dfa; - re_dfastate_t **dst; - re_dfastate_t **src; - int num; -{ - int st_idx; - reg_errcode_t err; - for (st_idx = 0; st_idx < num; ++st_idx) - { - if (dst[st_idx] == NULL) - dst[st_idx] = src[st_idx]; - else if (src[st_idx] != NULL) - { - re_node_set merged_set; - err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, - &src[st_idx]->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); - re_node_set_free (&merged_set); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes) - const regex_t *preg; - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *dest_nodes; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - const re_node_set *candidates; - candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set - : &mctx->state_log[str_idx]->nodes); - - /* At first, add the nodes which can epsilon transit to a node in - DEST_NODE. */ - if (dest_nodes->nelem) - { - err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Then, check the limitations in the current sift_context. */ - if (dest_nodes->nelem && sctx->limits.nelem) - { - err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, - mctx->bkref_ents, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Update state_log. */ - sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); - if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0)) - return err; - - if ((mctx->state_log[str_idx] != NULL - && mctx->state_log[str_idx]->has_backref)) - { - err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} - -static reg_errcode_t -add_epsilon_src_nodes (dfa, dest_nodes, candidates) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; -{ - reg_errcode_t err; - int src_idx; - re_node_set src_copy; - - err = re_node_set_init_copy (&src_copy, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx) - { - err = re_node_set_add_intersect (dest_nodes, candidates, - dfa->inveclosures - + src_copy.elems[src_idx]); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&src_copy); - return err; - } - } - re_node_set_free (&src_copy); - return REG_NOERROR; -} - -static reg_errcode_t -sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) - re_dfa_t *dfa; - int node; - re_node_set *dest_nodes; - const re_node_set *candidates; -{ - int ecl_idx; - reg_errcode_t err; - re_node_set *inv_eclosure = dfa->inveclosures + node; - re_node_set except_nodes; - re_node_set_init_empty (&except_nodes); - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - int cur_node = inv_eclosure->elems[ecl_idx]; - if (cur_node == node) - continue; - if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) - { - int edst1 = dfa->edests[cur_node].elems[0]; - int edst2 = ((dfa->edests[cur_node].nelem > 1) - ? dfa->edests[cur_node].elems[1] : -1); - if ((!re_node_set_contains (inv_eclosure, edst1) - && re_node_set_contains (dest_nodes, edst1)) - || (edst2 > 0 - && !re_node_set_contains (inv_eclosure, edst2) - && re_node_set_contains (dest_nodes, edst2))) - { - err = re_node_set_add_intersect (&except_nodes, candidates, - dfa->inveclosures + cur_node); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&except_nodes); - return err; - } - } - } - } - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - int cur_node = inv_eclosure->elems[ecl_idx]; - if (!re_node_set_contains (&except_nodes, cur_node)) - { - int idx = re_node_set_contains (dest_nodes, cur_node) - 1; - re_node_set_remove_at (dest_nodes, idx); - } - } - re_node_set_free (&except_nodes); - return REG_NOERROR; -} - -static int -check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx) - re_dfa_t *dfa; - re_node_set *limits; - re_match_context_t *mctx; - int dst_node, dst_idx, src_node, src_idx; -{ - int lim_idx, src_pos, dst_pos; - - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - int subexp_idx; - struct re_backref_cache_entry *ent; - ent = mctx->bkref_ents + limits->elems[lim_idx]; - subexp_idx = dfa->nodes[ent->node].opr.idx - 1; - - dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], - dfa->eclosures + dst_node, - subexp_idx, dst_node, dst_idx); - src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], - dfa->eclosures + src_node, - subexp_idx, src_node, src_idx); - - /* In case of: - ( ) - ( ) - ( ) */ - if (src_pos == dst_pos) - continue; /* This is unrelated limitation. */ - else - return 1; - } - return 0; -} - -static int -check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, - str_idx) - re_dfa_t *dfa; - re_match_context_t *mctx; - re_node_set *eclosures; - int limit, subexp_idx, node, str_idx; -{ - struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; - int pos = (str_idx < lim->subexp_from ? -1 - : (lim->subexp_to < str_idx ? 1 : 0)); - if (pos == 0 - && (str_idx == lim->subexp_from || str_idx == lim->subexp_to)) - { - int node_idx; - for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) - { - int node = eclosures->elems[node_idx]; - re_token_type_t type= dfa->nodes[node].type; - if (type == OP_BACK_REF) - { - int bi = search_cur_bkref_entry (mctx, str_idx); - for (; bi < mctx->nbkref_ents; ++bi) - { - struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; - if (ent->str_idx > str_idx) - break; - if (ent->node == node && ent->subexp_from == ent->subexp_to) - { - int cpos, dst; - dst = dfa->edests[node].elems[0]; - cpos = check_dst_limits_calc_pos (dfa, mctx, limit, - dfa->eclosures + dst, - subexp_idx, dst, - str_idx); - if ((str_idx == lim->subexp_from && cpos == -1) - || (str_idx == lim->subexp_to && cpos == 0)) - return cpos; - } - } - } - if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx - && str_idx == lim->subexp_from) - { - pos = -1; - break; - } - if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx - && str_idx == lim->subexp_to) - break; - } - if (node_idx == eclosures->nelem && str_idx == lim->subexp_to) - pos = 1; - } - return pos; -} - -/* Check the limitations of sub expressions LIMITS, and remove the nodes - which are against limitations from DEST_NODES. */ - -static reg_errcode_t -check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; - re_node_set *limits; - struct re_backref_cache_entry *bkref_ents; - int str_idx; -{ - reg_errcode_t err; - int node_idx, lim_idx; - - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - int subexp_idx; - struct re_backref_cache_entry *ent; - ent = bkref_ents + limits->elems[lim_idx]; - - if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) - continue; /* This is unrelated limitation. */ - - subexp_idx = dfa->nodes[ent->node].opr.idx - 1; - if (ent->subexp_to == str_idx) - { - int ops_node = -1; - int cls_node = -1; - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - re_token_type_t type= dfa->nodes[node].type; - if (type == OP_OPEN_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - ops_node = node; - else if (type == OP_CLOSE_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - cls_node = node; - } - - /* Check the limitation of the open subexpression. */ - /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ - if (ops_node >= 0) - { - err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - /* Check the limitation of the close subexpression. */ - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - if (!re_node_set_contains (dfa->inveclosures + node, cls_node) - && !re_node_set_contains (dfa->eclosures + node, cls_node)) - { - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes(dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - --node_idx; - } - } - } - else /* (ent->subexp_to != str_idx) */ - { - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - re_token_type_t type= dfa->nodes[node].type; - if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) - { - if (subexp_idx != dfa->nodes[node].opr.idx) - continue; - if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx) - || (type == OP_OPEN_SUBEXP)) - { - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes(dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - } - } - } - return REG_NOERROR; -} - -static reg_errcode_t -sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) - const regex_t *preg; - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *dest_nodes; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int node_idx, node; - re_sift_context_t local_sctx; - const re_node_set *candidates; - candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set - : &mctx->state_log[str_idx]->nodes); - local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ - - for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) - { - int cur_bkref_idx = re_string_cur_idx (mctx->input); - re_token_type_t type; - node = candidates->elems[node_idx]; - type = dfa->nodes[node].type; - if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) - continue; - /* Avoid infinite loop for the REs like "()\1+". */ - if (node == sctx->last_node && str_idx == sctx->last_str_idx) - continue; - if (type == OP_BACK_REF) - { - int enabled_idx = search_cur_bkref_entry (mctx, str_idx); - for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) - { - int disabled_idx, subexp_len, to_idx, dst_node; - struct re_backref_cache_entry *entry; - entry = mctx->bkref_ents + enabled_idx; - if (entry->str_idx > str_idx) - break; - if (entry->node != node) - continue; - subexp_len = entry->subexp_to - entry->subexp_from; - to_idx = str_idx + subexp_len; - dst_node = (subexp_len ? dfa->nexts[node] - : dfa->edests[node].elems[0]); - - if (to_idx > sctx->last_str_idx - || sctx->sifted_states[to_idx] == NULL - || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], - dst_node) - || check_dst_limits (dfa, &sctx->limits, mctx, node, - str_idx, dst_node, to_idx)) - continue; - { - re_dfastate_t *cur_state; - entry->flag = 0; - for (disabled_idx = enabled_idx + 1; - disabled_idx < mctx->nbkref_ents; ++disabled_idx) - { - struct re_backref_cache_entry *entry2; - entry2 = mctx->bkref_ents + disabled_idx; - if (entry2->str_idx > str_idx) - break; - entry2->flag = (entry2->node == node) ? 1 : entry2->flag; - } - - if (local_sctx.sifted_states == NULL) - { - local_sctx = *sctx; - err = re_node_set_init_copy (&local_sctx.limits, - &sctx->limits); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.last_node = node; - local_sctx.last_str_idx = str_idx; - err = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (err < 0, 0)) - { - err = REG_ESPACE; - goto free_return; - } - cur_state = local_sctx.sifted_states[str_idx]; - err = sift_states_backward (preg, mctx, &local_sctx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - if (sctx->limited_states != NULL) - { - err = merge_state_array (dfa, sctx->limited_states, - local_sctx.sifted_states, - str_idx + 1); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.sifted_states[str_idx] = cur_state; - re_node_set_remove (&local_sctx.limits, enabled_idx); - /* We must not use the variable entry here, since - mctx->bkref_ents might be realloced. */ - mctx->bkref_ents[enabled_idx].flag = 1; - } - } - enabled_idx = search_cur_bkref_entry (mctx, str_idx); - for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) - { - struct re_backref_cache_entry *entry; - entry = mctx->bkref_ents + enabled_idx; - if (entry->str_idx > str_idx) - break; - if (entry->node == node) - entry->flag = 0; - } - } - } - err = REG_NOERROR; - free_return: - if (local_sctx.sifted_states != NULL) - { - re_node_set_free (&local_sctx.limits); - } - - return err; -} - - -#ifdef RE_ENABLE_I18N -static int -sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx) - const regex_t *preg; - const re_match_context_t *mctx; - re_sift_context_t *sctx; - int node_idx, str_idx, max_str_idx; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int naccepted; - /* Check the node can accept `multi byte'. */ - naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx); - if (naccepted > 0 && str_idx + naccepted <= max_str_idx && - !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], - dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the - destination was already throwed away, then the node - could't accept the current input `multi byte'. */ - naccepted = 0; - /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ - return naccepted; -} -#endif /* RE_ENABLE_I18N */ - - -/* Functions for state transition. */ - -/* Return the next state to which the current state STATE will transit by - accepting the current input byte, and update STATE_LOG if necessary. - If STATE can accept a multibyte char/collating element/back reference - update the destination of STATE_LOG. */ - -static re_dfastate_t * -transit_state (err, preg, mctx, state, fl_search) - reg_errcode_t *err; - const regex_t *preg; - re_match_context_t *mctx; - re_dfastate_t *state; - int fl_search; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - re_dfastate_t **trtable, *next_state; - unsigned char ch; - int cur_idx; - - if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len - || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len - && mctx->input->valid_len < mctx->input->len)) - { - *err = extend_buffers (mctx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - - *err = REG_NOERROR; - if (state == NULL) - { - next_state = state; - re_string_skip_bytes (mctx->input, 1); - } - else - { -#ifdef RE_ENABLE_I18N - /* If the current state can accept multibyte. */ - if (state->accept_mb) - { - *err = transit_state_mb (preg, state, mctx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } -#endif /* RE_ENABLE_I18N */ - - /* Then decide the next state with the single byte. */ - if (1) - { - /* Use transition table */ - ch = re_string_fetch_byte (mctx->input); - trtable = fl_search ? state->trtable_search : state->trtable; - if (trtable == NULL) - { - trtable = build_trtable (preg, state, fl_search); - if (fl_search) - state->trtable_search = trtable; - else - state->trtable = trtable; - } - next_state = trtable[ch]; - } - else - { - /* don't use transition table */ - next_state = transit_state_sb (err, preg, state, fl_search, mctx); - if (BE (next_state == NULL && err != REG_NOERROR, 0)) - return NULL; - } - } - - cur_idx = re_string_cur_idx (mctx->input); - /* Update the state_log if we need. */ - if (mctx->state_log != NULL) - { - if (cur_idx > mctx->state_log_top) - { - mctx->state_log[cur_idx] = next_state; - mctx->state_log_top = cur_idx; - } - else if (mctx->state_log[cur_idx] == 0) - { - mctx->state_log[cur_idx] = next_state; - } - else - { - re_dfastate_t *pstate; - unsigned int context; - re_node_set next_nodes, *log_nodes, *table_nodes = NULL; - /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ - pstate = mctx->state_log[cur_idx]; - log_nodes = pstate->entrance_nodes; - if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, - log_nodes); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - else - next_nodes = *log_nodes; - /* Note: We already add the nodes of the initial state, - then we don't need to add them here. */ - - context = re_string_context_at (mctx->input, - re_string_cur_idx (mctx->input) - 1, - mctx->eflags, preg->newline_anchor); - next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - if (table_nodes != NULL) - re_node_set_free (&next_nodes); - } - } - - /* Check OP_OPEN_SUBEXP in the current state in case that we use them - later. We must check them here, since the back references in the - next state might use them. */ - if (dfa->nbackref && next_state/* && fl_process_bkref */) - { - *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes, - cur_idx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - - /* If the next state has back references. */ - if (next_state != NULL && next_state->has_backref) - { - *err = transit_state_bkref (preg, &next_state->nodes, mctx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - next_state = mctx->state_log[cur_idx]; - } - return next_state; -} - -/* Helper functions for transit_state. */ - -/* From the node set CUR_NODES, pick up the nodes whose types are - OP_OPEN_SUBEXP and which have corresponding back references in the regular - expression. And register them to use them later for evaluating the - correspoding back references. */ - -static reg_errcode_t -check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx) - re_dfa_t *dfa; - re_match_context_t *mctx; - re_node_set *cur_nodes; - int str_idx; -{ - int node_idx; - reg_errcode_t err; - - /* TODO: This isn't efficient. - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) - { - int node = cur_nodes->elems[node_idx]; - if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx)) - { - err = match_ctx_add_subtop (mctx, node, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -/* Return the next state to which the current state STATE will transit by - accepting the current input byte. */ - -static re_dfastate_t * -transit_state_sb (err, preg, state, fl_search, mctx) - reg_errcode_t *err; - const regex_t *preg; - re_dfastate_t *state; - int fl_search; - re_match_context_t *mctx; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - re_node_set next_nodes; - re_dfastate_t *next_state; - int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input); - unsigned int context; - - *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) - { - int cur_node = state->nodes.elems[node_cnt]; - if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx)) - { - *err = re_node_set_merge (&next_nodes, - dfa->eclosures + dfa->nexts[cur_node]); - if (BE (*err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return NULL; - } - } - } - if (fl_search) - { -#ifdef RE_ENABLE_I18N - int not_initial = 0; - if (MB_CUR_MAX > 1) - for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt) - if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER) - { - not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial; - break; - } - if (!not_initial) -#endif - { - *err = re_node_set_merge (&next_nodes, - dfa->init_state->entrance_nodes); - if (BE (*err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return NULL; - } - } - } - context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags, - preg->newline_anchor); - next_state = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - re_node_set_free (&next_nodes); - re_string_skip_bytes (mctx->input, 1); - return next_state; -} - -#ifdef RE_ENABLE_I18N -static reg_errcode_t -transit_state_mb (preg, pstate, mctx) - const regex_t *preg; - re_dfastate_t *pstate; - re_match_context_t *mctx; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int i; - - for (i = 0; i < pstate->nodes.nelem; ++i) - { - re_node_set dest_nodes, *new_nodes; - int cur_node_idx = pstate->nodes.elems[i]; - int naccepted = 0, dest_idx; - unsigned int context; - re_dfastate_t *dest_state; - - if (dfa->nodes[cur_node_idx].constraint) - { - context = re_string_context_at (mctx->input, - re_string_cur_idx (mctx->input), - mctx->eflags, preg->newline_anchor); - if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, - context)) - continue; - } - - /* How many bytes the node can accepts? */ - if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type)) - naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input, - re_string_cur_idx (mctx->input)); - if (naccepted == 0) - continue; - - /* The node can accepts `naccepted' bytes. */ - dest_idx = re_string_cur_idx (mctx->input) + naccepted; - mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted - : mctx->max_mb_elem_len); - err = clean_state_log_if_need (mctx, dest_idx); - if (BE (err != REG_NOERROR, 0)) - return err; -#ifdef DEBUG - assert (dfa->nexts[cur_node_idx] != -1); -#endif - /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE, - then we use pstate->nodes.elems[i] instead. */ - new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]]; - - dest_state = mctx->state_log[dest_idx]; - if (dest_state == NULL) - dest_nodes = *new_nodes; - else - { - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, new_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags, - preg->newline_anchor); - mctx->state_log[dest_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - if (dest_state != NULL) - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} -#endif /* RE_ENABLE_I18N */ - -static reg_errcode_t -transit_state_bkref (preg, nodes, mctx) - const regex_t *preg; - re_node_set *nodes; - re_match_context_t *mctx; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int i; - int cur_str_idx = re_string_cur_idx (mctx->input); - - for (i = 0; i < nodes->nelem; ++i) - { - int dest_str_idx, prev_nelem, bkc_idx; - int node_idx = nodes->elems[i]; - unsigned int context; - re_token_t *node = dfa->nodes + node_idx; - re_node_set *new_dest_nodes; - - /* Check whether `node' is a backreference or not. */ - if (node->type != OP_BACK_REF) - continue; - - if (node->constraint) - { - context = re_string_context_at (mctx->input, cur_str_idx, - mctx->eflags, preg->newline_anchor); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - continue; - } - - /* `node' is a backreference. - Check the substring which the substring matched. */ - bkc_idx = mctx->nbkref_ents; - err = get_subexp (preg, mctx, node_idx, cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* And add the epsilon closures (which is `new_dest_nodes') of - the backreference to appropriate state_log. */ -#ifdef DEBUG - assert (dfa->nexts[node_idx] != -1); -#endif - for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) - { - int subexp_len; - re_dfastate_t *dest_state; - struct re_backref_cache_entry *bkref_ent; - bkref_ent = mctx->bkref_ents + bkc_idx; - if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) - continue; - subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; - new_dest_nodes = (subexp_len == 0 - ? dfa->eclosures + dfa->edests[node_idx].elems[0] - : dfa->eclosures + dfa->nexts[node_idx]); - dest_str_idx = (cur_str_idx + bkref_ent->subexp_to - - bkref_ent->subexp_from); - context = re_string_context_at (mctx->input, dest_str_idx - 1, - mctx->eflags, preg->newline_anchor); - dest_state = mctx->state_log[dest_str_idx]; - prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 - : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ - if (dest_state == NULL) - { - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, new_dest_nodes, - context); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - else - { - re_node_set dest_nodes; - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, - new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&dest_nodes); - goto free_return; - } - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - /* We need to check recursively if the backreference can epsilon - transit. */ - if (subexp_len == 0 - && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) - { - err = check_subexp_matching_top (dfa, mctx, new_dest_nodes, - cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - err = transit_state_bkref (preg, new_dest_nodes, mctx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - } - } - err = REG_NOERROR; - free_return: - return err; -} - -/* Enumerate all the candidates which the backreference BKREF_NODE can match - at BKREF_STR_IDX, and register them by match_ctx_add_entry(). - Note that we might collect inappropriate candidates here. - However, the cost of checking them strictly here is too high, then we - delay these checking for prune_impossible_nodes(). */ - -static reg_errcode_t -get_subexp (preg, mctx, bkref_node, bkref_str_idx) - const regex_t *preg; - re_match_context_t *mctx; - int bkref_node, bkref_str_idx; -{ - int subexp_num, sub_top_idx; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - char *buf = (char *) re_string_get_buffer (mctx->input); - /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ - int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); - for (; cache_idx < mctx->nbkref_ents; ++cache_idx) - { - struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; - if (entry->str_idx > bkref_str_idx) - break; - if (entry->node == bkref_node) - return REG_NOERROR; /* We already checked it. */ - } - subexp_num = dfa->nodes[bkref_node].opr.idx - 1; - - /* For each sub expression */ - for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) - { - reg_errcode_t err; - re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; - re_sub_match_last_t *sub_last; - int sub_last_idx, sl_str; - char *bkref_str; - - if (dfa->nodes[sub_top->node].opr.idx != subexp_num) - continue; /* It isn't related. */ - - sl_str = sub_top->str_idx; - bkref_str = buf + bkref_str_idx; - /* At first, check the last node of sub expressions we already - evaluated. */ - for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) - { - int sl_str_diff; - sub_last = sub_top->lasts[sub_last_idx]; - sl_str_diff = sub_last->str_idx - sl_str; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_diff > 0 - && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0) - break; /* We don't need to search this sub expression any more. */ - bkref_str += sl_str_diff; - sl_str += sl_str_diff; - err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - } - if (sub_last_idx < sub_top->nlasts) - continue; - if (sub_last_idx > 0) - ++sl_str; - /* Then, search for the other last nodes of the sub expression. */ - for (; sl_str <= bkref_str_idx; ++sl_str) - { - int cls_node, sl_str_off; - re_node_set *nodes; - sl_str_off = sl_str - sub_top->str_idx; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_off > 0 - && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0) - break; /* We don't need to search this sub expression any more. */ - if (mctx->state_log[sl_str] == NULL) - continue; - /* Does this state have a ')' of the sub expression? */ - nodes = &mctx->state_log[sl_str]->nodes; - cls_node = find_subexp_node (dfa, nodes, subexp_num, 0); - if (cls_node == -1) - continue; /* No. */ - if (sub_top->path == NULL) - { - sub_top->path = calloc (sizeof (state_array_t), - sl_str - sub_top->str_idx + 1); - if (sub_top->path == NULL) - return REG_ESPACE; - } - /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node - in the current context? */ - err = check_arrival (preg, mctx, sub_top->path, sub_top->node, - sub_top->str_idx, cls_node, sl_str, 0); - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); - if (BE (sub_last == NULL, 0)) - return REG_ESPACE; - err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - if (err == REG_NOMATCH) - continue; - } - } - return REG_NOERROR; -} - -/* Helper functions for get_subexp(). */ - -/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. - If it can arrive, register the sub expression expressed with SUB_TOP - and SUB_LAST. */ - -static reg_errcode_t -get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str) - const regex_t *preg; - re_match_context_t *mctx; - re_sub_match_top_t *sub_top; - re_sub_match_last_t *sub_last; - int bkref_node, bkref_str; -{ - reg_errcode_t err; - int to_idx; - /* Can the subexpression arrive the back reference? */ - err = check_arrival (preg, mctx, &sub_last->path, sub_last->node, - sub_last->str_idx, bkref_node, bkref_str, 1); - if (err != REG_NOERROR) - return err; - err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, - sub_last->str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; - clean_state_log_if_need (mctx, to_idx); - return REG_NOERROR; -} - -/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. - Search '(' if FL_OPEN, or search ')' otherwise. - TODO: This function isn't efficient... - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - -static int -find_subexp_node (dfa, nodes, subexp_idx, fl_open) - re_dfa_t *dfa; - re_node_set *nodes; - int subexp_idx, fl_open; -{ - int cls_idx; - for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) - { - int cls_node = nodes->elems[cls_idx]; - re_token_t *node = dfa->nodes + cls_node; - if (((fl_open && node->type == OP_OPEN_SUBEXP) - || (!fl_open && node->type == OP_CLOSE_SUBEXP)) - && node->opr.idx == subexp_idx) - return cls_node; - } - return -1; -} - -/* Check whether the node TOP_NODE at TOP_STR can arrive to the node - LAST_NODE at LAST_STR. We record the path onto PATH since it will be - heavily reused. - Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ - -static reg_errcode_t -check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str, - fl_open) - const regex_t *preg; - re_match_context_t *mctx; - state_array_t *path; - int top_node, top_str, last_node, last_str, fl_open; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - reg_errcode_t err; - int subexp_num, backup_cur_idx, str_idx, null_cnt; - re_dfastate_t *cur_state = NULL; - re_node_set *cur_nodes, next_nodes; - re_dfastate_t **backup_state_log; - unsigned int context; - - subexp_num = dfa->nodes[top_node].opr.idx; - /* Extend the buffer if we need. */ - if (path->alloc < last_str + mctx->max_mb_elem_len + 1) - { - re_dfastate_t **new_array; - int old_alloc = path->alloc; - path->alloc += last_str + mctx->max_mb_elem_len + 1; - new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); - if (new_array == NULL) - return REG_ESPACE; - path->array = new_array; - memset (new_array + old_alloc, '\0', - sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); - } - - str_idx = path->next_idx == 0 ? top_str : path->next_idx; - - /* Temporary modify MCTX. */ - backup_state_log = mctx->state_log; - backup_cur_idx = mctx->input->cur_idx; - mctx->state_log = path->array; - mctx->input->cur_idx = str_idx; - - /* Setup initial node set. */ - context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags, - preg->newline_anchor); - if (str_idx == top_str) - { - err = re_node_set_init_1 (&next_nodes, top_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - else - { - cur_state = mctx->state_log[str_idx]; - if (cur_state && cur_state->has_backref) - { - err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE ( err != REG_NOERROR, 0)) - return err; - } - else - re_node_set_init_empty (&next_nodes); - } - if (str_idx == top_str || (cur_state && cur_state->has_backref)) - { - if (next_nodes.nelem) - { - err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str, - subexp_num, fl_open); - if (BE ( err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - } - - for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) - { - re_node_set_empty (&next_nodes); - if (mctx->state_log[str_idx + 1]) - { - err = re_node_set_merge (&next_nodes, - &mctx->state_log[str_idx + 1]->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - if (cur_state) - { - err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx, - &cur_state->nodes, &next_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - ++str_idx; - if (next_nodes.nelem) - { - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, - fl_open); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str, - subexp_num, fl_open); - if (BE ( err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags, - preg->newline_anchor); - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - null_cnt = cur_state == NULL ? null_cnt + 1 : 0; - } - re_node_set_free (&next_nodes); - cur_nodes = (mctx->state_log[last_str] == NULL ? NULL - : &mctx->state_log[last_str]->nodes); - path->next_idx = str_idx; - - /* Fix MCTX. */ - mctx->state_log = backup_state_log; - mctx->input->cur_idx = backup_cur_idx; - - if (cur_nodes == NULL) - return REG_NOMATCH; - /* Then check the current node set has the node LAST_NODE. */ - return (re_node_set_contains (cur_nodes, last_node) - || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR - : REG_NOMATCH); -} - -/* Helper functions for check_arrival. */ - -/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them - to NEXT_NODES. - TODO: This function is similar to the functions transit_state*(), - however this function has many additional works. - Can't we unify them? */ - -static reg_errcode_t -check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes) - const regex_t *preg; - re_dfa_t *dfa; - re_match_context_t *mctx; - int str_idx; - re_node_set *cur_nodes, *next_nodes; -{ - int cur_idx; - reg_errcode_t err; - re_node_set union_set; - re_node_set_init_empty (&union_set); - for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) - { - int naccepted = 0; - int cur_node = cur_nodes->elems[cur_idx]; - re_token_type_t type = dfa->nodes[cur_node].type; - if (IS_EPSILON_NODE(type)) - continue; -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (ACCEPT_MB_NODE (type)) - { - naccepted = check_node_accept_bytes (preg, cur_node, mctx->input, - str_idx); - if (naccepted > 1) - { - re_dfastate_t *dest_state; - int next_node = dfa->nexts[cur_node]; - int next_idx = str_idx + naccepted; - dest_state = mctx->state_log[next_idx]; - re_node_set_empty (&union_set); - if (dest_state) - { - err = re_node_set_merge (&union_set, &dest_state->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - err = re_node_set_insert (&union_set, next_node); - if (BE (err < 0, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - } - else - { - err = re_node_set_insert (&union_set, next_node); - if (BE (err < 0, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - } - mctx->state_log[next_idx] = re_acquire_state (&err, dfa, - &union_set); - if (BE (mctx->state_log[next_idx] == NULL - && err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - } - } -#endif /* RE_ENABLE_I18N */ - if (naccepted - || check_node_accept (preg, dfa->nodes + cur_node, mctx, - str_idx)) - { - err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (err < 0, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - } - } - re_node_set_free (&union_set); - return REG_NOERROR; -} - -/* For all the nodes in CUR_NODES, add the epsilon closures of them to - CUR_NODES, however exclude the nodes which are: - - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. - - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. -*/ - -static reg_errcode_t -check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open) - re_dfa_t *dfa; - re_node_set *cur_nodes; - int ex_subexp, fl_open; -{ - reg_errcode_t err; - int idx, outside_node; - re_node_set new_nodes; -#ifdef DEBUG - assert (cur_nodes->nelem); -#endif - err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) - return err; - /* Create a new node set NEW_NODES with the nodes which are epsilon - closures of the node in CUR_NODES. */ - - for (idx = 0; idx < cur_nodes->nelem; ++idx) - { - int cur_node = cur_nodes->elems[idx]; - re_node_set *eclosure = dfa->eclosures + cur_node; - outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open); - if (outside_node == -1) - { - /* There are no problematic nodes, just merge them. */ - err = re_node_set_merge (&new_nodes, eclosure); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - else - { - /* There are problematic nodes, re-calculate incrementally. */ - err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, - ex_subexp, fl_open); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - } - re_node_set_free (cur_nodes); - *cur_nodes = new_nodes; - return REG_NOERROR; -} - -/* Helper function for check_arrival_expand_ecl. - Check incrementally the epsilon closure of TARGET, and if it isn't - problematic append it to DST_NODES. */ - -static reg_errcode_t -check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open) - re_dfa_t *dfa; - int target, ex_subexp, fl_open; - re_node_set *dst_nodes; -{ - int cur_node, type; - for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) - { - int err; - type = dfa->nodes[cur_node].type; - - if (((type == OP_OPEN_SUBEXP && fl_open) - || (type == OP_CLOSE_SUBEXP && !fl_open)) - && dfa->nodes[cur_node].opr.idx == ex_subexp) - { - if (!fl_open) - { - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) - return REG_ESPACE; - } - break; - } - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) - return REG_ESPACE; - if (dfa->edests[cur_node].nelem == 0) - break; - if (dfa->edests[cur_node].nelem == 2) - { - err = check_arrival_expand_ecl_sub (dfa, dst_nodes, - dfa->edests[cur_node].elems[1], - ex_subexp, fl_open); - if (BE (err != REG_NOERROR, 0)) - return err; - } - cur_node = dfa->edests[cur_node].elems[0]; - } - return REG_NOERROR; -} - - -/* For all the back references in the current state, calculate the - destination of the back references by the appropriate entry - in MCTX->BKREF_ENTS. */ - -static reg_errcode_t -expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num, - fl_open) - const regex_t *preg; - re_match_context_t *mctx; - int cur_str, last_str, subexp_num, fl_open; - re_node_set *cur_nodes; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int cache_idx, cache_idx_start; - /* The current state. */ - - cache_idx_start = search_cur_bkref_entry (mctx, cur_str); - for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx) - { - int to_idx, next_node; - struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx; - if (ent->str_idx > cur_str) - break; - /* Is this entry ENT is appropriate? */ - if (!re_node_set_contains (cur_nodes, ent->node)) - continue; /* No. */ - - to_idx = cur_str + ent->subexp_to - ent->subexp_from; - /* Calculate the destination of the back reference, and append it - to MCTX->STATE_LOG. */ - if (to_idx == cur_str) - { - /* The backreference did epsilon transit, we must re-check all the - node in the current state. */ - re_node_set new_dests; - reg_errcode_t err2, err3; - next_node = dfa->edests[ent->node].elems[0]; - if (re_node_set_contains (cur_nodes, next_node)) - continue; - err = re_node_set_init_1 (&new_dests, next_node); - err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, - fl_open); - err3 = re_node_set_merge (cur_nodes, &new_dests); - re_node_set_free (&new_dests); - if (BE (err != REG_NOERROR || err2 != REG_NOERROR - || err3 != REG_NOERROR, 0)) - { - err = (err != REG_NOERROR ? err - : (err2 != REG_NOERROR ? err2 : err3)); - return err; - } - /* TODO: It is still inefficient... */ - cache_idx = cache_idx_start - 1; - continue; - } - else - { - re_node_set union_set; - next_node = dfa->nexts[ent->node]; - if (mctx->state_log[to_idx]) - { - int ret; - if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, - next_node)) - continue; - err = re_node_set_init_copy (&union_set, - &mctx->state_log[to_idx]->nodes); - ret = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ret < 0, 0)) - { - re_node_set_free (&union_set); - err = err != REG_NOERROR ? err : REG_ESPACE; - return err; - } - } - else - { - err = re_node_set_init_1 (&union_set, next_node); - if (BE (err != REG_NOERROR, 0)) - return err; - } - mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); - re_node_set_free (&union_set); - if (BE (mctx->state_log[to_idx] == NULL - && err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -/* Build transition table for the state. - Return the new table if succeeded, otherwise return NULL. */ - -static re_dfastate_t ** -build_trtable (preg, state, fl_search) - const regex_t *preg; - const re_dfastate_t *state; - int fl_search; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int i, j, k, ch; - int dests_node_malloced = 0, dest_states_malloced = 0; - int ndests; /* Number of the destination states from `state'. */ - re_dfastate_t **trtable; - re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; - re_node_set follows, *dests_node; - bitset *dests_ch; - bitset acceptable; - - /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the - characters which i-th destination state accepts. */ -#ifdef _LIBC - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - else -#endif - { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) - return NULL; - dests_node_malloced = 1; - } - dests_ch = (bitset *) (dests_node + SBC_MAX); - - /* Initialize transiton table. */ - trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (trtable == NULL, 0)) - { - if (dests_node_malloced) - free (dests_node); - return NULL; - } - - /* At first, group all nodes belonging to `state' into several - destinations. */ - ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); - if (BE (ndests <= 0, 0)) - { - if (dests_node_malloced) - free (dests_node); - /* Return NULL in case of an error, trtable otherwise. */ - if (ndests == 0) - return trtable; - free (trtable); - return NULL; - } - - err = re_node_set_alloc (&follows, ndests + 1); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - -#ifdef _LIBC - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX - + ndests * 3 * sizeof (re_dfastate_t *))) - dest_states = (re_dfastate_t **) - alloca (ndests * 3 * sizeof (re_dfastate_t *)); - else -#endif - { - dest_states = (re_dfastate_t **) - malloc (ndests * 3 * sizeof (re_dfastate_t *)); - if (BE (dest_states == NULL, 0)) - { -out_free: - if (dest_states_malloced) - free (dest_states); - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - free (trtable); - if (dests_node_malloced) - free (dests_node); - return NULL; - } - dest_states_malloced = 1; - } - dest_states_word = dest_states + ndests; - dest_states_nl = dest_states_word + ndests; - bitset_empty (acceptable); - - /* Then build the states for all destinations. */ - for (i = 0; i < ndests; ++i) - { - int next_node; - re_node_set_empty (&follows); - /* Merge the follows of this destination states. */ - for (j = 0; j < dests_node[i].nelem; ++j) - { - next_node = dfa->nexts[dests_node[i].elems[j]]; - if (next_node != -1) - { - err = re_node_set_merge (&follows, dfa->eclosures + next_node); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - } - } - /* If search flag is set, merge the initial state. */ - if (fl_search) - { -#ifdef RE_ENABLE_I18N - int not_initial = 0; - for (j = 0; j < follows.nelem; ++j) - if (dfa->nodes[follows.elems[j]].type == CHARACTER) - { - not_initial = dfa->nodes[follows.elems[j]].mb_partial; - break; - } - if (!not_initial) -#endif - { - err = re_node_set_merge (&follows, - dfa->init_state->entrance_nodes); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - } - } - dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); - if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - /* If the new state has context constraint, - build appropriate states for these contexts. */ - if (dest_states[i]->has_constraint) - { - dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_WORD); - if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_NEWLINE); - if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - } - else - { - dest_states_word[i] = dest_states[i]; - dest_states_nl[i] = dest_states[i]; - } - bitset_merge (acceptable, dests_ch[i]); - } - - /* Update the transition table. */ - /* For all characters ch...: */ - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if ((acceptable[i] >> j) & 1) - { - /* The current state accepts the character ch. */ - if (IS_WORD_CHAR (ch)) - { - for (k = 0; k < ndests; ++k) - if ((dests_ch[k][i] >> j) & 1) - { - /* k-th destination accepts the word character ch. */ - trtable[ch] = dest_states_word[k]; - /* There must be only one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - break; - } - } - else /* not WORD_CHAR */ - { - for (k = 0; k < ndests; ++k) - if ((dests_ch[k][i] >> j) & 1) - { - /* k-th destination accepts the non-word character ch. */ - trtable[ch] = dest_states[k]; - /* There must be only one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - break; - } - } - } - /* new line */ - if (bitset_contain (acceptable, NEWLINE_CHAR)) - { - /* The current state accepts newline character. */ - for (k = 0; k < ndests; ++k) - if (bitset_contain (dests_ch[k], NEWLINE_CHAR)) - { - /* k-th destination accepts newline character. */ - trtable[NEWLINE_CHAR] = dest_states_nl[k]; - /* There must be only one destination which accepts - newline. See group_nodes_into_DFAstates. */ - break; - } - } - - if (dest_states_malloced) - free (dest_states); - - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - - if (dests_node_malloced) - free (dests_node); - - return trtable; -} - -/* Group all nodes belonging to STATE into several destinations. - Then for all destinations, set the nodes belonging to the destination - to DESTS_NODE[i] and set the characters accepted by the destination - to DEST_CH[i]. This function return the number of destinations. */ - -static int -group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) - const regex_t *preg; - const re_dfastate_t *state; - re_node_set *dests_node; - bitset *dests_ch; -{ - reg_errcode_t err; - const re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int i, j, k; - int ndests; /* Number of the destinations from `state'. */ - bitset accepts; /* Characters a node can accept. */ - const re_node_set *cur_nodes = &state->nodes; - bitset_empty (accepts); - ndests = 0; - - /* For all the nodes belonging to `state', */ - for (i = 0; i < cur_nodes->nelem; ++i) - { - re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; - re_token_type_t type = node->type; - unsigned int constraint = node->constraint; - - /* Enumerate all single byte character this node can accept. */ - if (type == CHARACTER) - bitset_set (accepts, node->opr.c); - else if (type == SIMPLE_BRACKET) - { - bitset_merge (accepts, node->opr.sbcset); - } - else if (type == OP_PERIOD) - { - bitset_set_all (accepts); - if (!(preg->syntax & RE_DOT_NEWLINE)) - bitset_clear (accepts, '\n'); - if (preg->syntax & RE_DOT_NOT_NULL) - bitset_clear (accepts, '\0'); - } - else - continue; - - /* Check the `accepts' and sift the characters which are not - match it the context. */ - if (constraint) - { - if (constraint & NEXT_WORD_CONSTRAINT) - for (j = 0; j < BITSET_UINTS; ++j) - accepts[j] &= dfa->word_char[j]; - if (constraint & NEXT_NOTWORD_CONSTRAINT) - for (j = 0; j < BITSET_UINTS; ++j) - accepts[j] &= ~dfa->word_char[j]; - if (constraint & NEXT_NEWLINE_CONSTRAINT) - { - int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); - bitset_empty (accepts); - if (accepts_newline) - bitset_set (accepts, NEWLINE_CHAR); - else - continue; - } - } - - /* Then divide `accepts' into DFA states, or create a new - state. */ - for (j = 0; j < ndests; ++j) - { - bitset intersec; /* Intersection sets, see below. */ - bitset remains; - /* Flags, see below. */ - int has_intersec, not_subset, not_consumed; - - /* Optimization, skip if this state doesn't accept the character. */ - if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) - continue; - - /* Enumerate the intersection set of this state and `accepts'. */ - has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) - has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; - /* And skip if the intersection set is empty. */ - if (!has_intersec) - continue; - - /* Then check if this state is a subset of `accepts'. */ - not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) - { - not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; - not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; - } - - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ - if (not_subset) - { - bitset_copy (dests_ch[ndests], remains); - bitset_copy (dests_ch[j], intersec); - err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - } - - /* Put the position in the current group. */ - err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (err < 0, 0)) - goto error_return; - - /* If all characters are consumed, go to next node. */ - if (!not_consumed) - break; - } - /* Some characters remain, create a new group. */ - if (j == ndests) - { - bitset_copy (dests_ch[ndests], accepts); - err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - bitset_empty (accepts); - } - } - return ndests; - error_return: - for (j = 0; j < ndests; ++j) - re_node_set_free (dests_node + j); - return -1; -} - -#ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. - Return the number of the bytes the node accepts. - STR_IDX is the current index of the input string. - - This function handles the nodes which can accept one character, or - one collating element like '.', '[a-z]', opposite to the other nodes - can only accept one byte. */ - -static int -check_node_accept_bytes (preg, node_idx, input, str_idx) - const regex_t *preg; - int node_idx, str_idx; - const re_string_t *input; -{ - const re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - const re_token_t *node = dfa->nodes + node_idx; - int elem_len = re_string_elem_size_at (input, str_idx); - int char_len = re_string_char_size_at (input, str_idx); - int i; -# ifdef _LIBC - int j; - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); -# endif /* _LIBC */ - if (elem_len <= 1 && char_len <= 1) - return 0; - if (node->type == OP_PERIOD) - { - /* '.' accepts any one character except the following two cases. */ - if ((!(preg->syntax & RE_DOT_NEWLINE) && - re_string_byte_at (input, str_idx) == '\n') || - ((preg->syntax & RE_DOT_NOT_NULL) && - re_string_byte_at (input, str_idx) == '\0')) - return 0; - return char_len; - } - else if (node->type == COMPLEX_BRACKET) - { - const re_charset_t *cset = node->opr.mbcset; -# ifdef _LIBC - const unsigned char *pin = ((char *) re_string_get_buffer (input) - + str_idx); -# endif /* _LIBC */ - int match_len = 0; - wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) - ? re_string_wchar_at (input, str_idx) : 0); - - /* match with multibyte character? */ - for (i = 0; i < cset->nmbchars; ++i) - if (wc == cset->mbchars[i]) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - /* match with character_class? */ - for (i = 0; i < cset->nchar_classes; ++i) - { - wctype_t wt = cset->char_classes[i]; - if (__iswctype (wc, wt)) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - -# ifdef _LIBC - if (nrules != 0) - { - unsigned int in_collseq = 0; - const int32_t *table, *indirect; - const unsigned char *weights, *extra; - const char *collseqwc; - int32_t idx; - /* This #include defines a local function! */ -# include - - /* match with collating_symbol? */ - if (cset->ncoll_syms) - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - for (i = 0; i < cset->ncoll_syms; ++i) - { - const unsigned char *coll_sym = extra + cset->coll_syms[i]; - /* Compare the length of input collating element and - the length of current collating element. */ - if (*coll_sym != elem_len) - continue; - /* Compare each bytes. */ - for (j = 0; j < *coll_sym; j++) - if (pin[j] != coll_sym[1 + j]) - break; - if (j == *coll_sym) - { - /* Match if every bytes is equal. */ - match_len = j; - goto check_node_accept_bytes_match; - } - } - - if (cset->nranges) - { - if (elem_len <= char_len) - { - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - in_collseq = collseq_table_lookup (collseqwc, wc); - } - else - in_collseq = find_collation_sequence_value (pin, elem_len); - } - /* match with range expression? */ - for (i = 0; i < cset->nranges; ++i) - if (cset->range_starts[i] <= in_collseq - && in_collseq <= cset->range_ends[i]) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - - /* match with equivalence_class? */ - if (cset->nequiv_classes) - { - const unsigned char *cp = pin; - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx = findidx (&cp); - if (idx > 0) - for (i = 0; i < cset->nequiv_classes; ++i) - { - int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx]; - if (weight_len == weights[equiv_class_idx]) - { - int cnt = 0; - while (cnt <= weight_len - && (weights[equiv_class_idx + 1 + cnt] - == weights[idx + 1 + cnt])) - ++cnt; - if (cnt > weight_len) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - } - } - } - } - else -# endif /* _LIBC */ - { - /* match with range expression? */ -#if __GNUC__ >= 2 - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif - for (i = 0; i < cset->nranges; ++i) - { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - } - check_node_accept_bytes_match: - if (!cset->non_match) - return match_len; - else - { - if (match_len > 0) - return 0; - else - return (elem_len > char_len) ? elem_len : char_len; - } - } - return 0; -} - -# ifdef _LIBC -static unsigned int -find_collation_sequence_value (mbs, mbs_len) - const unsigned char *mbs; - size_t mbs_len; -{ - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules == 0) - { - if (mbs_len == 1) - { - /* No valid character. Match it as a single byte character. */ - const unsigned char *collseq = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - return collseq[mbs[0]]; - } - return UINT_MAX; - } - else - { - int32_t idx; - const unsigned char *extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - - for (idx = 0; ;) - { - int mbs_cnt, found = 0; - int32_t elem_mbs_len; - /* Skip the name of collating element name. */ - idx = idx + extra[idx] + 1; - elem_mbs_len = extra[idx++]; - if (mbs_len == elem_mbs_len) - { - for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) - if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) - break; - if (mbs_cnt == elem_mbs_len) - /* Found the entry. */ - found = 1; - } - /* Skip the byte sequence of the collating element. */ - idx += elem_mbs_len; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); - /* If we found the entry, return the sequence value. */ - if (found) - return *(uint32_t *) (extra + idx); - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - } - } -} -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ - -/* Check whether the node accepts the byte which is IDX-th - byte of the INPUT. */ - -static int -check_node_accept (preg, node, mctx, idx) - const regex_t *preg; - const re_token_t *node; - const re_match_context_t *mctx; - int idx; -{ - unsigned char ch; - if (node->constraint) - { - /* The node has constraints. Check whether the current context - satisfies the constraints. */ - unsigned int context = re_string_context_at (mctx->input, idx, - mctx->eflags, - preg->newline_anchor); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return 0; - } - ch = re_string_byte_at (mctx->input, idx); - if (node->type == CHARACTER) - return node->opr.c == ch; - else if (node->type == SIMPLE_BRACKET) - return bitset_contain (node->opr.sbcset, ch); - else if (node->type == OP_PERIOD) - return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE)) - || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL))); - else - return 0; -} - -/* Extend the buffers, if the buffers have run out. */ - -static reg_errcode_t -extend_buffers (mctx) - re_match_context_t *mctx; -{ - reg_errcode_t ret; - re_string_t *pstr = mctx->input; - - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - if (mctx->state_log != NULL) - { - /* And double the length of state_log. */ - re_dfastate_t **new_array; - new_array = re_realloc (mctx->state_log, re_dfastate_t *, - pstr->bufs_len * 2); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->state_log = new_array; - } - - /* Then reconstruct the buffers. */ - if (pstr->icase) - { -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - build_wcs_upper_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - else - pstr->valid_len = pstr->bufs_len; - } - } - return REG_NOERROR; -} - - -/* Functions for matching context. */ - -/* Initialize MCTX. */ - -static reg_errcode_t -match_ctx_init (mctx, eflags, input, n) - re_match_context_t *mctx; - int eflags, n; - re_string_t *input; -{ - mctx->eflags = eflags; - mctx->input = input; - mctx->match_last = -1; - if (n > 0) - { - mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); - mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); - if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) - return REG_ESPACE; - } - else - mctx->bkref_ents = NULL; - mctx->nbkref_ents = 0; - mctx->abkref_ents = n; - mctx->max_mb_elem_len = 1; - mctx->nsub_tops = 0; - mctx->asub_tops = n; - return REG_NOERROR; -} - -/* Clean the entries which depend on the current input in MCTX. - This function must be invoked when the matcher changes the start index - of the input, or changes the input string. */ - -static void -match_ctx_clean (mctx) - re_match_context_t *mctx; -{ - match_ctx_free_subtops (mctx); - mctx->nsub_tops = 0; - mctx->nbkref_ents = 0; -} - -/* Free all the memory associated with MCTX. */ - -static void -match_ctx_free (mctx) - re_match_context_t *mctx; -{ - match_ctx_free_subtops (mctx); - re_free (mctx->sub_tops); - re_free (mctx->bkref_ents); -} - -/* Free all the memory associated with MCTX->SUB_TOPS. */ - -static void -match_ctx_free_subtops (mctx) - re_match_context_t *mctx; -{ - int st_idx; - for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) - { - int sl_idx; - re_sub_match_top_t *top = mctx->sub_tops[st_idx]; - for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) - { - re_sub_match_last_t *last = top->lasts[sl_idx]; - re_free (last->path.array); - re_free (last); - } - re_free (top->lasts); - if (top->path) - { - re_free (top->path->array); - re_free (top->path); - } - free (top); - } -} - -/* Add a new backreference entry to MCTX. - Note that we assume that caller never call this function with duplicate - entry, and call with STR_IDX which isn't smaller than any existing entry. -*/ - -static reg_errcode_t -match_ctx_add_entry (mctx, node, str_idx, from, to) - re_match_context_t *mctx; - int node, str_idx, from, to; -{ - if (mctx->nbkref_ents >= mctx->abkref_ents) - { - struct re_backref_cache_entry* new_entry; - new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, - mctx->abkref_ents * 2); - if (BE (new_entry == NULL, 0)) - { - re_free (mctx->bkref_ents); - return REG_ESPACE; - } - mctx->bkref_ents = new_entry; - memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', - sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); - mctx->abkref_ents *= 2; - } - mctx->bkref_ents[mctx->nbkref_ents].node = node; - mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; - mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; - mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; - mctx->bkref_ents[mctx->nbkref_ents++].flag = 0; - if (mctx->max_mb_elem_len < to - from) - mctx->max_mb_elem_len = to - from; - return REG_NOERROR; -} - -/* Search for the first entry which has the same str_idx. - Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ - -static int -search_cur_bkref_entry (mctx, str_idx) - re_match_context_t *mctx; - int str_idx; -{ - int left, right, mid; - right = mctx->nbkref_ents; - for (left = 0; left < right;) - { - mid = (left + right) / 2; - if (mctx->bkref_ents[mid].str_idx < str_idx) - left = mid + 1; - else - right = mid; - } - return left; -} - -static void -match_ctx_clear_flag (mctx) - re_match_context_t *mctx; -{ - int i; - for (i = 0; i < mctx->nbkref_ents; ++i) - { - mctx->bkref_ents[i].flag = 0; - } -} - -/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches - at STR_IDX. */ - -static reg_errcode_t -match_ctx_add_subtop (mctx, node, str_idx) - re_match_context_t *mctx; - int node, str_idx; -{ -#ifdef DEBUG - assert (mctx->sub_tops != NULL); - assert (mctx->asub_tops > 0); -#endif - if (mctx->nsub_tops == mctx->asub_tops) - { - re_sub_match_top_t **new_array; - mctx->asub_tops *= 2; - new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *, - mctx->asub_tops); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->sub_tops = new_array; - } - mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); - if (mctx->sub_tops[mctx->nsub_tops] == NULL) - return REG_ESPACE; - mctx->sub_tops[mctx->nsub_tops]->node = node; - mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; - return REG_NOERROR; -} - -/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches - at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ - -static re_sub_match_last_t * -match_ctx_add_sublast (subtop, node, str_idx) - re_sub_match_top_t *subtop; - int node, str_idx; -{ - re_sub_match_last_t *new_entry; - if (subtop->nlasts == subtop->alasts) - { - re_sub_match_last_t **new_array; - subtop->alasts = 2 * subtop->alasts + 1; - new_array = re_realloc (subtop->lasts, re_sub_match_last_t *, - subtop->alasts); - if (BE (new_array == NULL, 0)) - return NULL; - subtop->lasts = new_array; - } - new_entry = calloc (1, sizeof (re_sub_match_last_t)); - if (BE (new_entry == NULL, 0)) - return NULL; - subtop->lasts[subtop->nlasts] = new_entry; - new_entry->node = node; - new_entry->str_idx = str_idx; - ++subtop->nlasts; - return new_entry; -} - -static void -sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx, - check_subexp) - re_sift_context_t *sctx; - re_dfastate_t **sifted_sts, **limited_sts; - int last_node, last_str_idx, check_subexp; -{ - sctx->sifted_states = sifted_sts; - sctx->limited_states = limited_sts; - sctx->last_node = last_node; - sctx->last_str_idx = last_str_idx; - sctx->check_subexp = check_subexp; - sctx->cur_bkref = -1; - sctx->cls_subexp_idx = -1; - re_node_set_init_empty (&sctx->limits); -} diff --git a/lib/shell/Makefile b/lib/shell/Makefile deleted file mode 100644 index 5d6d65e..0000000 --- a/lib/shell/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# Support routines for shell scripts - -DIRS+=lib/shell -PROGS+=$(o)/lib/shell/config $(o)/lib/shell/logger -DATAFILES+=$(o)/lib/shell/libucw.sh - -$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBUCW) -$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBUCW) - -TESTS+=$(addprefix $(o)/lib/shell/,config.test) - -$(o)/lib/shell/config.test: $(o)/lib/shell/config diff --git a/lib/simple-lists.h b/lib/simple-lists.h deleted file mode 100644 index f553a6b..0000000 --- a/lib/simple-lists.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * UCW Library -- Linked Lists of Simple Items - * - * (c) 2006 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_SIMPLE_LISTS_H -#define _UCW_SIMPLE_LISTS_H - -#include "lib/clists.h" - -typedef struct simp_node { - cnode n; - union { - char *s; - void *p; - int i; - uns u; - }; -} simp_node; - -typedef struct simp2_node { - cnode n; - union { - char *s1; - void *p1; - int i1; - uns u1; - }; - union { - char *s2; - void *p2; - int i2; - uns u2; - }; -} simp2_node; - -struct mempool; -simp_node *simp_append(struct mempool *mp, clist *l); -simp2_node *simp2_append(struct mempool *mp, clist *l); - -/* Configuration sections */ -extern struct cf_section cf_string_list_config; -extern struct cf_section cf_2string_list_config; - -#endif diff --git a/lib/slists.h b/lib/slists.h deleted file mode 100644 index b0e9f4e..0000000 --- a/lib/slists.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * UCW Library -- Single-Linked Lists - * - * (c) 2005 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_SLISTS_H -#define _UCW_SLISTS_H - -typedef struct snode { - struct snode *next; -} snode; - -typedef struct slist { - struct snode head, *last; -} slist; - -static inline void *slist_head(slist *l) -{ - return l->head.next; -} - -static inline void *slist_tail(slist *l) -{ - return l->last; -} - -static inline void *slist_next(snode *n) -{ - return n->next; -} - -static inline int slist_empty(slist *l) -{ - return !l->head.next; -} - -#define SLIST_WALK(n,list) for(n=(void*)(list).head.next; (n); (n)=(void*)((snode*)(n))->next) -#define SLIST_WALK_DELSAFE(n,list,prev) for((prev)=(void*)&(list).head; (n)=(void*)((snode*)prev)->next; (prev)=(((snode*)(prev))->next==(snode*)(n) ? (void*)(n) : (void*)(prev))) -#define SLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; n; n=(void*)((snode*)(n))->next) - -static inline void slist_insert_after(slist *l, snode *what, snode *after) -{ - what->next = after->next; - after->next = what; - if (!what->next) - l->last = what; -} - -static inline void slist_add_head(slist *l, snode *n) -{ - n->next = l->head.next; - l->head.next = n; - if (!l->last) - l->last = n; -} - -static inline void slist_add_tail(slist *l, snode *n) -{ - if (l->last) - l->last->next = n; - else - l->head.next = n; - n->next = NULL; - l->last = n; -} - -static inline void slist_init(slist *l) -{ - l->head.next = l->last = NULL; -} - -static inline void slist_remove_after(slist *l, snode *after) -{ - snode *n = after->next; - after->next = n->next; - if (l->last == n) - l->last = (after == &l->head) ? NULL : after; -} - -/* Non-trivial functions */ - -void *slist_prev(slist *l, snode *n); -void slist_insert_before(slist *l, snode *what, snode *before); -void slist_remove(slist *l, snode *n); - -#endif diff --git a/lib/sorter/Makefile b/lib/sorter/Makefile deleted file mode 100644 index b54c05f..0000000 --- a/lib/sorter/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# Makefile for the UCW Sorter (c) 2007 Martin Mares - -DIRS+=lib/sorter - -LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array) -LIBUCW_INCLUDES+=$(addprefix sorter/, array.h common.h s-fixint.h \ - s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h) - -ifdef CONFIG_DEBUG_TOOLS -PROGS+=$(o)/lib/sorter/sort-test -endif - -$(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW) diff --git a/sherlock/attrset.c b/sherlock/attrset.c index a025739..d0419d4 100644 --- a/sherlock/attrset.c +++ b/sherlock/attrset.c @@ -10,7 +10,7 @@ #include "sherlock/sherlock.h" #include "sherlock/object.h" #include "sherlock/attrset.h" -#include "lib/clists.h" +#include "ucw/clists.h" #include "sherlock/conf.h" struct attr_node { diff --git a/sherlock/attrset.h b/sherlock/attrset.h index 62e6162..d454e79 100644 --- a/sherlock/attrset.h +++ b/sherlock/attrset.h @@ -10,7 +10,7 @@ #ifndef _SHERLOCK_ATTRSET_H #define _SHERLOCK_ATTRSET_H -#include "lib/bitarray.h" +#include "ucw/bitarray.h" #include "sherlock/object.h" COMPILE_ASSERT(son_value, OBJ_ATTR_SON == 256); diff --git a/sherlock/buck2obj.c b/sherlock/buck2obj.c index 6bee56b..660ba91 100644 --- a/sherlock/buck2obj.c +++ b/sherlock/buck2obj.c @@ -11,15 +11,15 @@ #undef LOCAL_DEBUG #include "sherlock/sherlock.h" -#include "lib/unaligned.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" -#include "lib/unicode.h" +#include "ucw/unaligned.h" +#include "ucw/mempool.h" +#include "ucw/fastbuf.h" +#include "ucw/unicode.h" #include "sherlock/object.h" #include "sherlock/objread.h" -#include "lib/lizard.h" -#include "lib/bbuf.h" -#include "lib/ff-unicode.h" +#include "ucw/lizard.h" +#include "ucw/bbuf.h" +#include "ucw/ff-unicode.h" #include #include @@ -73,7 +73,7 @@ get_attr(byte **pos, byte *end, struct parsed_attr *attr) else { uns len; - GET_UTF8_32(ptr, len); + ptr = utf8_32_get(ptr, &len); if (!len--) { *pos = ptr; @@ -208,7 +208,7 @@ decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite) while (ptr < end) { uns len; - GET_UTF8_32(ptr, len); + ptr = utf8_32_get(ptr, &len); if (!len--) break; byte type = ptr[len]; @@ -222,7 +222,7 @@ decode_attributes(byte *ptr, byte *end, struct odes *o, uns can_overwrite) while (ptr < end) { uns len; - GET_UTF8_32(ptr, len); + ptr = utf8_32_get(ptr, &len); if (!len--) break; byte type = ptr[len]; @@ -251,7 +251,7 @@ buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fas obj_read_start(&st, o_hdr); byte *b; // ignore empty lines and read until the end of the bucket - sh_off_t end = btell(body) + buck_len; + ucw_off_t end = btell(body) + buck_len; while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U)) if ((b = buf->bb.ptr)[0]) obj_read_attr(&st, b[0], b+1); @@ -260,8 +260,8 @@ buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fas } else if (buck_type == BUCKET_TYPE_V30) { - sh_off_t start = btell(body); - sh_off_t end = start + buck_len; + ucw_off_t start = btell(body); + ucw_off_t end = start + buck_len; byte *b; struct obj_read_state st; obj_read_start(&st, o_hdr); @@ -285,8 +285,8 @@ buck2obj_parse(struct buck2obj_buf *buf, uns buck_type, uns buck_len, struct fas /* Avoid reading the whole bucket if only its header is needed. */ if (body_start) { - sh_off_t start = btell(body); - sh_off_t end = start + buck_len; + ucw_off_t start = btell(body); + ucw_off_t end = start + buck_len; obj_read_start(&st, o_hdr); while (btell(body) < end) { diff --git a/sherlock/conf-parse.c b/sherlock/conf-parse.c index bf66d39..85a9f3b 100644 --- a/sherlock/conf-parse.c +++ b/sherlock/conf-parse.c @@ -10,10 +10,10 @@ #include "sherlock/sherlock.h" #include "sherlock/object.h" -#include "lib/chartype.h" -#include "lib/fastbuf.h" -#include "lib/ff-unicode.h" -#include "lib/unicode.h" +#include "ucw/chartype.h" +#include "ucw/fastbuf.h" +#include "ucw/ff-unicode.h" +#include "ucw/unicode.h" #include "sherlock/conf.h" /*** Attribute names ***/ diff --git a/sherlock/conf.h b/sherlock/conf.h index 4883c21..d6bfe1a 100644 --- a/sherlock/conf.h +++ b/sherlock/conf.h @@ -11,7 +11,7 @@ #ifndef _SHERLOCK_CONF_H #define _SHERLOCK_CONF_H -#include "lib/conf.h" +#include "ucw/conf.h" /* All of the following objects are defined in conf-parse.c * diff --git a/sherlock/obj-format.c b/sherlock/obj-format.c index 96aeb1d..de114e4 100644 --- a/sherlock/obj-format.c +++ b/sherlock/obj-format.c @@ -10,7 +10,7 @@ #include "sherlock/sherlock.h" #include "sherlock/object.h" -#include "lib/stkstring.h" +#include "ucw/stkstring.h" #include diff --git a/sherlock/obj2buck.c b/sherlock/obj2buck.c index 3e1873a..869fae2 100644 --- a/sherlock/obj2buck.c +++ b/sherlock/obj2buck.c @@ -9,8 +9,8 @@ */ #include "sherlock/sherlock.h" -#include "lib/fastbuf.h" -#include "lib/ff-unicode.h" +#include "ucw/fastbuf.h" +#include "ucw/ff-unicode.h" #include "sherlock/object.h" #include @@ -73,7 +73,7 @@ put_attr(byte *ptr, uns type, byte *val, uns len) { if (use_v33) { - PUT_UTF8_32(ptr, len+1); + ptr = utf8_32_put(ptr, len+1); memcpy(ptr, val, len); ptr += len; *ptr++ = type; @@ -104,7 +104,7 @@ put_attr_vformat(byte *ptr, uns type, byte *mask, va_list va) if (len >= 127) { byte tmp[6], *tmp_end = tmp; - PUT_UTF8_32(tmp_end, len+1); + tmp_end = utf8_32_put(tmp_end, len+1); uns l = tmp_end - tmp; memmove(ptr+l, ptr+1, len); memcpy(ptr, tmp, l); diff --git a/sherlock/object.c b/sherlock/object.c index d12270b..05e6c2e 100644 --- a/sherlock/object.c +++ b/sherlock/object.c @@ -9,8 +9,8 @@ */ #include "sherlock/sherlock.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" +#include "ucw/mempool.h" +#include "ucw/fastbuf.h" #include "sherlock/object.h" #include diff --git a/sherlock/perl/Object.pm b/sherlock/perl/Object.pm index 07f7899..38a9d2e 100644 --- a/sherlock/perl/Object.pm +++ b/sherlock/perl/Object.pm @@ -197,10 +197,11 @@ sub read($$@) { my $read_something = 0; my $obj = $self; my $raw; + my $read = $opts{read} ? $opts{read} : sub { my $fh = shift; return $_ = <$fh>; }; if ($opts{raw}) { $raw = $obj->{"RAW"} = []; } - while (<$fh>) { + while ($read->($fh)) { chomp; /^$/ && last; my ($a, $v) = /^(.)(.*)$/ or return undef; diff --git a/sherlock/sherlock.h b/sherlock/sherlock.h index 0f4b2da..5df776a 100644 --- a/sherlock/sherlock.h +++ b/sherlock/sherlock.h @@ -13,7 +13,7 @@ #ifndef _SHERLOCK_LIB_H #define _SHERLOCK_LIB_H -#include "lib/lib.h" +#include "ucw/lib.h" #ifdef CONFIG_MAX_CONTEXTS #define CONFIG_CONTEXTS diff --git a/ucw/Makefile b/ucw/Makefile new file mode 100644 index 0000000..a696e7a --- /dev/null +++ b/ucw/Makefile @@ -0,0 +1,169 @@ +# Makefile for the UCW Library (c) 1997--2009 Martin Mares + +DIRS+=ucw +LIBUCW=$(o)/ucw/libucw.pc + +ifdef CONFIG_UCW_UTILS +include $(s)/ucw/utils/Makefile +endif + +LIBUCW_MODS= \ + threads \ + alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \ + mmap partmap hashfunc \ + slists simple-lists bitsig \ + log log-stream log-file log-syslog log-conf proctitle tbf \ + conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \ + ipaccess \ + profile \ + fastbuf ff-binary ff-string ff-printf ff-unicode ff-stkstring \ + fb-file carefulio fb-mem fb-temp tempfile fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param fb-socket \ + char-cat char-upper char-lower unicode stkstring \ + wildmatch regex \ + prime primetable random timer randomkey \ + bit-ffs bit-fls \ + url \ + mainloop exitstatus runcmd sighandler \ + lizard lizard-safe adler32 \ + md5 sha1 sha1-hmac \ + base64 base224 \ + sync \ + qache \ + string str-esc str-split str-match str-imatch str-hex \ + bbuf \ + getopt + +LIBUCW_MAIN_INCLUDES= \ + lib.h threads.h \ + mempool.h \ + clists.h slists.h simple-lists.h \ + string.h stkstring.h unicode.h chartype.h regex.h \ + wildmatch.h \ + unaligned.h prefetch.h \ + bbuf.h gbuf.h bitarray.h bitsig.h \ + hashfunc.h hashtable.h \ + heap.h binheap.h binheap-node.h \ + redblack.h \ + prime.h \ + bitops.h \ + conf.h getopt.h ipaccess.h \ + profile.h \ + fastbuf.h lfs.h ff-unicode.h ff-binary.h \ + url.h \ + mainloop.h \ + lizard.h \ + md5.h \ + base64.h base224.h \ + qache.h \ + kmp.h kmp-search.h binsearch.h \ + partmap.h + +ifdef CONFIG_UCW_THREADS +# Some modules require threading +LIBUCW_MODS+=threads-conf workqueue asio +LIBUCW_MAIN_INCLUDES+=workqueue.h semaphore.h asio.h +endif + +ifdef CONFIG_UCW_FB_DIRECT +LIBUCW_MODS+=fb-direct +endif + +ifdef CONFIG_OWN_GETOPT +include $(s)/ucw/getopt/Makefile +endif + +LIBUCW_INCLUDES=$(LIBUCW_MAIN_INCLUDES) + +include $(s)/ucw/sorter/Makefile +include $(s)/ucw/doc/Makefile + +LIBUCW_MOD_PATHS=$(addprefix $(o)/ucw/,$(LIBUCW_MODS)) + +$(o)/ucw/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) +$(o)/ucw/libucw-pic.a: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) +$(o)/ucw/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) + +$(o)/ucw/hashfunc.o $(o)/ucw/hashfunc.oo: CFLAGS += -funroll-loops +$(o)/ucw/lizard.o: CFLAGS += $(COPT2) -funroll-loops + +$(o)/ucw/conf-test: $(o)/ucw/conf-test.o $(LIBUCW) +$(o)/ucw/lfs-test: $(o)/ucw/lfs-test.o $(LIBUCW) +$(o)/ucw/hash-test: $(o)/ucw/hash-test.o $(LIBUCW) +$(o)/ucw/hashfunc-test: $(o)/ucw/hashfunc-test.o $(LIBUCW) +$(o)/ucw/asort-test: $(o)/ucw/asort-test.o $(LIBUCW) +$(o)/ucw/redblack-test: $(o)/ucw/redblack-test.o $(LIBUCW) +$(o)/ucw/binheap-test: $(o)/ucw/binheap-test.o $(LIBUCW) +$(o)/ucw/lizard-test: $(o)/ucw/lizard-test.o $(LIBUCW) +$(o)/ucw/kmp-test: $(o)/ucw/kmp-test.o $(LIBUCW) +ifdef CONFIG_CHARSET +$(o)/ucw/kmp-test: $(LIBCHARSET) +endif +$(o)/ucw/ipaccess-test: $(o)/ucw/ipaccess-test.o $(LIBUCW) +$(o)/ucw/trie-test: $(o)/ucw/trie-test.o $(LIBUCW) + +TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ + slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \ + fb-socket.test trie-test.test string.test sha1.test asort-test.test binheap-test.test \ + redblack-test.test fb-file.test fb-grow.test fb-pool.test fb-atomic.test \ + fb-limfd.test fb-temp.test fb-mem.test fb-buffer.test fb-mmap.test url.test) + +$(o)/ucw/regex.test: $(o)/ucw/regex-t +$(o)/ucw/unicode.test: $(o)/ucw/unicode-t +$(o)/ucw/hash-test.test: $(o)/ucw/hash-test +$(o)/ucw/mempool.test: $(o)/ucw/mempool-t $(o)/ucw/mempool-fmt-t $(o)/ucw/mempool-str-t +$(o)/ucw/stkstring.test: $(o)/ucw/stkstring-t +$(o)/ucw/bitops.test: $(o)/ucw/bit-ffs-t $(o)/ucw/bit-fls-t +$(o)/ucw/slists.test: $(o)/ucw/slists-t +$(o)/ucw/kmp-test.test: $(o)/ucw/kmp-test +$(o)/ucw/bbuf.test: $(o)/ucw/bbuf-t +$(o)/ucw/getopt.test: $(o)/ucw/getopt-t +$(o)/ucw/ff-unicode.test: $(o)/ucw/ff-unicode-t +$(o)/ucw/eltpool.test: $(o)/ucw/eltpool-t +$(o)/ucw/string.test: $(o)/ucw/str-hex-t $(o)/ucw/str-esc-t +$(o)/ucw/sha1.test: $(o)/ucw/sha1-t $(o)/ucw/sha1-hmac-t +$(o)/ucw/trie-test.test: $(o)/ucw/trie-test +$(o)/ucw/asort-test.test: $(o)/ucw/asort-test +$(o)/ucw/binheap-test.test: $(o)/ucw/binheap-test +$(o)/ucw/redblack-test.test: $(o)/ucw/redblack-test +$(addprefix $(o)/ucw/fb-,file.test grow.test pool.test socket.test atomic.test \ + limfd.test temp.test mem.test buffer.test mmap.test): %.test: %-t +$(o)/ucw/url.test: $(o)/ucw/url-t + +ifdef CONFIG_UCW_THREADS +TESTS+=$(addprefix $(o)/ucw/,asio.test) +$(o)/ucw/asio.test: $(o)/ucw/asio-t +endif + +# The version of autoconf.h that is a part of the public API needs to have +# the internal symbols filtered out, so we generate ucw/autoconf.h in the +# configure script and let the public config.h refer to +# instead of plain "autoconf.h". + +API_LIBS+=libucw +API_INCLUDES+=$(o)/ucw/.include-stamp +$(o)/ucw/.include-stamp: $(addprefix $(s)/ucw/,$(LIBUCW_INCLUDES)) $(o)/ucw/autoconf.h + $(Q)$(BUILDSYS)/install-includes $(/' <$(s)/ucw/config.h >run/include/ucw/config.h + $(Q)touch $@ +run/lib/pkgconfig/libucw.pc: $(o)/ucw/libucw.pc + +ifdef CONFIG_UCW_PERL +include $(s)/ucw/perl/Makefile +endif + +ifdef CONFIG_UCW_SHELL_UTILS +include $(s)/ucw/shell/Makefile +endif + +CONFIGS+=libucw + +INSTALL_TARGETS+=install-libucw +install-libucw: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/ $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) $(DESTDIR)$(INSTALL_CONFIG_DIR) + install -m 644 $(addprefix run/include/ucw/,$(LIBUCW_MAIN_INCLUDES) autoconf.h config.h) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/ + install -m 644 run/lib/pkgconfig/libucw.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 run/lib/libucw.$(LS) $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/$(CONFIG_DIR)/libucw $(DESTDIR)$(INSTALL_CONFIG_DIR) + +.PHONY: install-libucw diff --git a/lib/THREADS b/ucw/THREADS similarity index 100% rename from lib/THREADS rename to ucw/THREADS diff --git a/lib/adler32.c b/ucw/adler32.c similarity index 91% rename from lib/adler32.c rename to ucw/adler32.c index cf3a652..28b2b05 100644 --- a/lib/adler32.c +++ b/ucw/adler32.c @@ -7,8 +7,8 @@ * distribution and use, see copyright notice in zlib.h. */ -#include "lib/lib.h" -#include "lib/lizard.h" +#include "ucw/lib.h" +#include "ucw/lizard.h" #define BASE 65521UL /* largest prime smaller than 65536 */ #define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ @@ -21,7 +21,7 @@ #define MOD(a) a %= BASE uns -update_adler32(uns adler, const byte *buf, uns len) +adler32_update(uns adler, const byte *buf, uns len) { uns s1 = adler & 0xffff; uns s2 = (adler >> 16) & 0xffff; diff --git a/lib/alloc.c b/ucw/alloc.c similarity index 93% rename from lib/alloc.c rename to ucw/alloc.c index 678901a..4f53136 100644 --- a/lib/alloc.c +++ b/ucw/alloc.c @@ -7,13 +7,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include #include -#ifndef DEBUG_DMALLOC - void * xmalloc(uns size) { @@ -23,8 +21,6 @@ xmalloc(uns size) return x; } -#endif - void * xmalloc_zero(uns size) { diff --git a/lib/alloc_str.c b/ucw/alloc_str.c similarity index 93% rename from lib/alloc_str.c rename to ucw/alloc_str.c index 05e803c..5b3d839 100644 --- a/lib/alloc_str.c +++ b/ucw/alloc_str.c @@ -7,7 +7,7 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include diff --git a/lib/asio.c b/ucw/asio.c similarity index 98% rename from lib/asio.c rename to ucw/asio.c index e33e03e..038bc18 100644 --- a/lib/asio.c +++ b/ucw/asio.c @@ -9,9 +9,9 @@ #undef LOCAL_DEBUG -#include "lib/lib.h" -#include "lib/asio.h" -#include "lib/threads.h" +#include "ucw/lib.h" +#include "ucw/asio.h" +#include "ucw/threads.h" #include #include diff --git a/lib/asio.h b/ucw/asio.h similarity index 98% rename from lib/asio.h rename to ucw/asio.h index 6773c81..f80cc69 100644 --- a/lib/asio.h +++ b/ucw/asio.h @@ -10,8 +10,8 @@ #ifndef _UCW_ASIO_H #define _UCW_ASIO_H -#include "lib/workqueue.h" -#include "lib/clists.h" +#include "ucw/workqueue.h" +#include "ucw/clists.h" /* * This module takes care of scheduling and executing asynchronous I/O requests diff --git a/lib/asio.t b/ucw/asio.t similarity index 58% rename from lib/asio.t rename to ucw/asio.t index b660657..a98974b 100644 --- a/lib/asio.t +++ b/ucw/asio.t @@ -1,4 +1,4 @@ # Tests for asynchronous I/O -Run: echo y | ../obj/lib/asio-t +Run: echo y | ../obj/ucw/asio-t Out: ABCDEFGHIJ diff --git a/lib/asort-test.c b/ucw/asort-test.c similarity index 91% rename from lib/asort-test.c rename to ucw/asort-test.c index 9c6abd4..f89e0b8 100644 --- a/lib/asort-test.c +++ b/ucw/asort-test.c @@ -7,7 +7,7 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include #include @@ -38,12 +38,17 @@ static void generate(void) #endif } +static int errors = 0; + static void check(void) { uns i; for (i=0; i - -int main(int argc, char **argv) -{ -#if 0 - byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 }; - byte o[256], w[256]; - uns l; - l = base224_encode(o, i, sizeof(i)); - fwrite(o, 1, l, stdout); - fputc(0xaa, stdout); - l = base224_decode(w, o, l); - fwrite(w, 1, l, stdout); -#else - if (argc > 1) - { - byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17]; - uns l; - while (l = fread(i, 1, sizeof(i), stdin)) - { - l = base224_decode(o, i, l); - fwrite(o, 1, l, stdout); - } - } - else - { - byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23]; - uns l; - while (l = fread(i, 1, sizeof(i), stdin)) - { - l = base224_encode(o, i, l); - fwrite(o, 1, l, stdout); - } - } -#endif - - return 0; -} - -#endif diff --git a/ucw/base224.h b/ucw/base224.h new file mode 100644 index 0000000..beb1748 --- /dev/null +++ b/ucw/base224.h @@ -0,0 +1,36 @@ +/* + * UCW Library -- Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/** + * Encodes @len bytes of data pointed to by @src by base224 encoding. + * Stores them in @dest and returns the number of bytes the output + * takes. + */ +uns base224_encode(byte *dest, const byte *src, uns len); +/** + * Decodes @len bytes of data pointed to by @src from base224 encoding. + * All invalid characters are ignored. The result is stored into @dest + * and length of the result is returned. + */ +uns base224_decode(byte *dest, const byte *src, uns len); + +/** + * Use this macro to calculate @base224_encode() output buffer size. + * It can happen 4 more bytes would be needed, this macro takes care + * of that. + */ +#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5) + +/* + * When called for BASE224_IN_CHUNK-byte chunks, the result will be + * always BASE224_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE224_IN_CHUNK 39 /** Chunk size on the un-encoded side. **/ +#define BASE224_OUT_CHUNK 40 /** Chunk size on the encoded side. **/ diff --git a/lib/base64.c b/ucw/base64.c similarity index 98% rename from lib/base64.c rename to ucw/base64.c index ef8faa4..9865f4c 100644 --- a/lib/base64.c +++ b/ucw/base64.c @@ -9,8 +9,8 @@ #undef LOCAL_DEBUG -#include "lib/lib.h" -#include "lib/base64.h" +#include "ucw/lib.h" +#include "ucw/base64.h" #include diff --git a/lib/base64.h b/ucw/base64.h similarity index 50% rename from lib/base64.h rename to ucw/base64.h index 7890966..31c1cb8 100644 --- a/lib/base64.h +++ b/ucw/base64.h @@ -7,11 +7,21 @@ * of the GNU Lesser General Public License. */ +/** + * Encodes @len bytes of data pointed to by @src by base64 encoding. + * Stores them in @dest and returns the number of bytes the output + * takes. + */ uns base64_encode(byte *dest, const byte *src, uns len); +/** + * Decodes @len bytes of data pointed to by @src from base64 encoding. + * All invalid characters are ignored. The result is stored into @dest + * and length of the result is returned. + */ uns base64_decode(byte *dest, const byte *src, uns len); -/* - * Use this macro to calculate buffer size. +/** + * Use this macro to calculate @base64_encode() output buffer size. */ #define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4) @@ -20,6 +30,6 @@ uns base64_decode(byte *dest, const byte *src, uns len); * always BASE64_OUT_CHUNK bytes long. If a longer block is split * to such chunks, the result will be identical. */ -#define BASE64_IN_CHUNK 3 -#define BASE64_OUT_CHUNK 4 +#define BASE64_IN_CHUNK 3 /** Size of chunk on the un-encoded side. **/ +#define BASE64_OUT_CHUNK 4 /** Size of chunk on the encoded side. **/ diff --git a/ucw/basecode.t b/ucw/basecode.t new file mode 100644 index 0000000..a704813 --- /dev/null +++ b/ucw/basecode.t @@ -0,0 +1,16 @@ +# Tests for base64 and base224 modules + +Name: Base64 encode +Run: ../obj/ucw/basecode -e +Input: Here are some test data +Output: SGVyZSBhcmUgc29tZSB0ZXN0IGRhdGEK + +Name: Base64 decode +Run: ../obj/ucw/basecode -d +Input: SGVyZSBhcmUgc29tZSB0ZXN0IGRhdGEK +Output: Here are some test data + +Name: Base224 encode & decode +Run: ../obj/ucw/basecode -E | ../obj/ucw/basecode -D +Input: Some more test data for 224 encoding +Output: Some more test data for 224 encoding diff --git a/lib/bbuf.c b/ucw/bbuf.c similarity index 97% rename from lib/bbuf.c rename to ucw/bbuf.c index 9d4af26..61c2a27 100644 --- a/lib/bbuf.c +++ b/ucw/bbuf.c @@ -7,8 +7,8 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/bbuf.h" +#include "ucw/lib.h" +#include "ucw/bbuf.h" #include diff --git a/ucw/bbuf.h b/ucw/bbuf.h new file mode 100644 index 0000000..ecc1f66 --- /dev/null +++ b/ucw/bbuf.h @@ -0,0 +1,46 @@ +/* + * UCW Library -- A simple growing buffer for byte-sized items. + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BBUF_H +#define _UCW_BBUF_H + +#define GBUF_TYPE byte +#define GBUF_PREFIX(x) bb_##x +#include "ucw/gbuf.h" + +/** + * printf() into a growing buffer with `va_list` arguments. + * Generates a `'\0'`-terminated string at the beginning of the buffer + * and returns pointer to it. + * + * See @bb_printf(). + **/ +char *bb_vprintf(bb_t *bb, const char *fmt, va_list args); +/** + * printf() into a growing buffer. + * Generates a `'\0'`-terminated string at the beginning of the buffer + * and returns pointer to it. + * + * See @bb_vprintf(). + **/ +char *bb_printf(bb_t *bb, const char *fmt, ...); +/** + * Like @bb_vprintf(), but it does not start at the beginning of the + * buffer, but @ofs bytes further. + * + * Returns pointer to the new string (eg. @ofs bytes after the + * beginning of buffer). + **/ +char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args); +/** + * Like @bb_vprintf_at(), but it takes individual arguments. + **/ +char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...); + +#endif diff --git a/lib/bbuf.t b/ucw/bbuf.t similarity index 82% rename from lib/bbuf.t rename to ucw/bbuf.t index ebf9ecc..ca8f64f 100644 --- a/lib/bbuf.t +++ b/ucw/bbuf.t @@ -1,4 +1,4 @@ # Tests for growing buffers -Run: ../obj/lib/bbuf-t +Run: ../obj/ucw/bbuf-t Out: diff --git a/lib/bigalloc.c b/ucw/bigalloc.c similarity index 97% rename from lib/bigalloc.c rename to ucw/bigalloc.c index 9581188..3133170 100644 --- a/lib/bigalloc.c +++ b/ucw/bigalloc.c @@ -8,7 +8,7 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include #include @@ -17,6 +17,8 @@ void * page_alloc(u64 len) { + if (!len) + return NULL; if (len > SIZE_MAX) die("page_alloc: Size %llu is too large for the current architecture", (long long) len); ASSERT(!(len & (CPU_PAGE_SIZE-1))); diff --git a/lib/binheap-node.h b/ucw/binheap-node.h similarity index 64% rename from lib/binheap-node.h rename to ucw/binheap-node.h index 44be9f4..c0cb16b 100644 --- a/lib/binheap-node.h +++ b/ucw/binheap-node.h @@ -7,6 +7,18 @@ * of the GNU Lesser General Public License. */ +#ifndef _UCW_BINHEAP_NODE_H +#define _UCW_BINHEAP_NODE_H + +/*** + * [[common]] + * Common definitions + * ------------------ + ***/ + +/** + * Common header of binomial heap nodes. + **/ struct bh_node { struct bh_node *first_son; struct bh_node *last_son; @@ -14,6 +26,11 @@ struct bh_node { byte order; }; +/** + * A binomial heap. + **/ struct bh_heap { struct bh_node root; }; + +#endif diff --git a/lib/binheap-test.c b/ucw/binheap-test.c similarity index 95% rename from lib/binheap-test.c rename to ucw/binheap-test.c index bfd28a0..9589b07 100644 --- a/lib/binheap-test.c +++ b/ucw/binheap-test.c @@ -7,7 +7,7 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include #include @@ -16,7 +16,7 @@ #define BH_WANT_INSERT #define BH_WANT_FINDMIN #define BH_WANT_DELETEMIN -#include "lib/binheap-node.h" +#include "ucw/binheap-node.h" struct item { struct bh_node n; @@ -52,7 +52,7 @@ bht_dump(struct bh_heap *h) bht_do_dump(b, b->last_son, 1); } -#include "lib/binheap.h" +#include "ucw/binheap.h" int main(void) { diff --git a/ucw/binheap-test.t b/ucw/binheap-test.t new file mode 100644 index 0000000..109d6e9 --- /dev/null +++ b/ucw/binheap-test.t @@ -0,0 +1,5 @@ +# Test for the binheap module + +Run: ../obj/ucw/binheap-test +Out: cnt=1048576 + root diff --git a/lib/binheap.h b/ucw/binheap.h similarity index 63% rename from lib/binheap.h rename to ucw/binheap.h index 0c6ee5a..42253e1 100644 --- a/lib/binheap.h +++ b/ucw/binheap.h @@ -12,49 +12,53 @@ * this file with parameters set in the corresponding preprocessor macros * as described below, it generates functions for manipulating the particular * version of the binomial heap. + */ + +/*** + * [[generator]] + * Interface to the generator + * -------------------------- * - * You need to specify: + * To use the binomial heaps, you need to specify: * - * BH_PREFIX(x) macro to add a name prefix (used on all global names - * defined by the hash table generator). All further - * names mentioned here except for macro names will be - * implicitly prefixed. + * - `BH_PREFIX(x)` -- macro to add a name prefix (used on all global names + * defined by the generator). All further names mentioned + * here except for macro names will be implicitly prefixed. * - * Then you continue by including "lib/binheap-node.h" which defines struct bh_node - * and struct bh_root (both without prefix). The heap elements are always allocated by - * you and they must include struct bh_node which serves as a handle used for all - * the heap functions and it contains all information needed for heap-keeping. - * The heap itself is also allocated by you and it's represented by struct bh_heap. + * Then you continue by including `ucw/binheap-node.h` which defines <> + * and <> (both without prefix). The heap elements are always allocated by + * you and they must include `struct bh_node` which serves as a handle used for all + * the heap functions and it contains all information needed for heap-keeping. + * The heap itself is also allocated by you and it's represented by `struct bh_heap`. * - * When you have the declaration of heap nodes, you continue with defining: + * When you have the declaration of heap nodes, you continue with defining: * - * less(p,q) returns 1 if the key corresponding to bh_node *p - * is less than the one corresponding to *q. + * - `less(p,q)` -- returns `1` if the key corresponding to `bh_node *p` + * is less than the one corresponding to `*q`. * - * Then specify what operations you request: + * Then specify what operations you request: * - * init(heap*) -- initialize the heap. - * BH_WANT_INSERT insert(heap*, node*) -- insert the node to the heap. - * BH_WANT_FINDMIN node *findmin(heap*) -- find node with minimum key. - * BH_WANT_DELETEMIN node *deletemin(heap*) -- findmin and delete the node. + * - `init(heap\*)` -- initialize the heap (always defined). + * - `insert(heap\*, node\*)` -- insert the node to the heap (`BH_WANT_INSERT`). + * - `node\* findmin(heap\*)` -- find node with minimum key (`BH_WANT_FINDMIN`). + * - `node\* deletemin(heap\*)` -- findmin and delete the node (`BH_WANT_DELETEMIN`). * - * Then include "lib/binheap.h" and voila, you have a binomial heap - * suiting all your needs (at least those which you've revealed :) ). + * Then include `ucw/binheap.h` and voila, you have a binomial heap + * suiting all your needs (at least those which you've revealed :) ). * - * You also get a iterator macro at no extra charge: + * You also get a iterator macro at no extra charge: * - * BH_FOR_ALL(bh_prefix, hash*, variable) - * { - * // node *variable gets declared automatically - * do_something_with_node(variable); - * // use BH_BREAK and BH_CONTINUE instead of break and continue - * // you must not alter contents of the hash table here - * } - * BH_END_FOR; + * BH_FOR_ALL(bh_prefix, heap*, variable) + * { + * // node* variable gets declared automatically + * do_something_with_node(variable); + * // use BH_BREAK and BH_CONTINUE instead of break and continue + * // you must not alter contents of the binomial heap here + * } + * BH_END_FOR; * - * After including this file, all parameter macros are automatically - * undef'd. - */ + * After including this file, all parameter macros are automatically undef'd. + ***/ #define BH_NODE struct bh_node #define BH_HEAP struct bh_heap @@ -107,7 +111,7 @@ BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b) s->next_sibling = q; q = s; } - else /* otherwise put the result to the a's list */ + else /* otherwise put the result to the a's list */ { p = s->next_sibling = *pp; *pp = s; diff --git a/lib/binsearch.h b/ucw/binsearch.h similarity index 50% rename from lib/binsearch.h rename to ucw/binsearch.h index 6741956..f625c2a 100644 --- a/lib/binsearch.h +++ b/ucw/binsearch.h @@ -7,6 +7,17 @@ * of the GNU Lesser General Public License. */ +/*** + * [[defs]] + * Definitions + * ----------- + ***/ + +/** + * Find the first element not lower than @x in the sorted array @ary of @N elements (non-decreasing order). + * Returns the index of the found element or @N if no exists. Uses `ary_lt_x(ary,i,x)` to compare the @i'th element with @x. + * The time complexity is `O(log(N))`. + **/ #define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \ uns l = 0, r = (N); \ while (l < r) \ @@ -20,7 +31,18 @@ l; \ }) +/** + * The default comparision macro for @BIN_SEARCH_FIRST_GE_CMP(). + **/ #define ARY_LT_NUM(ary,i,x) (ary)[i] < (x) +/** + * Same as @BIN_SEARCH_FIRST_GE_CMP(), but uses the default `<` operator for comparisions. + **/ #define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM) + +/** + * Search the sorted array @ary of @N elements (non-decreasing) for the first occurence of @x. + * Returns the index or -1 if no such element exists. Uses the `<` operator for comparisions. + **/ #define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; }) diff --git a/lib/bit-ffs.c b/ucw/bit-ffs.c similarity index 96% rename from lib/bit-ffs.c rename to ucw/bit-ffs.c index 8a9198d..0775f53 100644 --- a/lib/bit-ffs.c +++ b/ucw/bit-ffs.c @@ -7,8 +7,8 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/bitops.h" +#include "ucw/lib.h" +#include "ucw/bitops.h" /* Just a table, the rest is in bitops.h */ diff --git a/lib/bit-fls.c b/ucw/bit-fls.c similarity index 93% rename from lib/bit-fls.c rename to ucw/bit-fls.c index 6a6227d..ef38df3 100644 --- a/lib/bit-fls.c +++ b/ucw/bit-fls.c @@ -7,8 +7,8 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/bitops.h" +#include "ucw/lib.h" +#include "ucw/bitops.h" int bit_fls(u32 x) diff --git a/lib/bitarray.h b/ucw/bitarray.h similarity index 100% rename from lib/bitarray.h rename to ucw/bitarray.h diff --git a/lib/bitops.h b/ucw/bitops.h similarity index 100% rename from lib/bitops.h rename to ucw/bitops.h diff --git a/lib/bitops.t b/ucw/bitops.t similarity index 84% rename from lib/bitops.t rename to ucw/bitops.t index 97b2b35..4224066 100644 --- a/lib/bitops.t +++ b/ucw/bitops.t @@ -1,6 +1,6 @@ # Tests for bitops modules -Run: ../obj/lib/bit-ffs-t +Run: ../obj/ucw/bit-ffs-t In: 1 2 3 @@ -26,7 +26,7 @@ Out: 0 30 31 -Run: ../obj/lib/bit-fls-t +Run: ../obj/ucw/bit-fls-t In: 1 2 3 diff --git a/lib/bitsig.c b/ucw/bitsig.c similarity index 95% rename from lib/bitsig.c rename to ucw/bitsig.c index 8ffe8db..85b18a6 100644 --- a/lib/bitsig.c +++ b/ucw/bitsig.c @@ -41,9 +41,9 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/bitsig.h" -#include "lib/md5.h" +#include "ucw/lib.h" +#include "ucw/bitsig.h" +#include "ucw/md5.h" #include @@ -85,11 +85,7 @@ bitsig_free(struct bitsig *b) static void bitsig_hash_init(struct bitsig *b, byte *item) { - struct MD5Context c; - - MD5Init(&c); - MD5Update(&c, item, strlen(item)); - MD5Final((byte *) b->hash, &c); + md5_hash_buffer((byte *) b->hash, item, strlen(item)); b->hindex = 0; } diff --git a/lib/bitsig.h b/ucw/bitsig.h similarity index 89% rename from lib/bitsig.h rename to ucw/bitsig.h index 60a5b14..d9e78e2 100644 --- a/lib/bitsig.h +++ b/ucw/bitsig.h @@ -7,9 +7,14 @@ * of the GNU Lesser General Public License. */ +#ifndef _UCW_BITSIG_H +#define _UCW_BITSIG_H + struct bitsig; struct bitsig *bitsig_init(uns perrlog, uns maxn); void bitsig_free(struct bitsig *b); int bitsig_member(struct bitsig *b, byte *item); int bitsig_insert(struct bitsig *b, byte *item); + +#endif diff --git a/lib/carefulio.c b/ucw/carefulio.c similarity index 97% rename from lib/carefulio.c rename to ucw/carefulio.c index b8d865d..44e01e1 100644 --- a/lib/carefulio.c +++ b/ucw/carefulio.c @@ -7,7 +7,7 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" +#include "ucw/lib.h" #include diff --git a/lib/str_ctype.c b/ucw/char-cat.c similarity index 72% rename from lib/str_ctype.c rename to ucw/char-cat.c index 2857d8e..e1bd281 100644 --- a/lib/str_ctype.c +++ b/ucw/char-cat.c @@ -7,10 +7,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/chartype.h" +#include "ucw/lib.h" +#include "ucw/chartype.h" -const unsigned char _c_cat[256] = { +const byte _c_cat[256] = { #define CHAR(code,upper,lower,cat) cat, -#include "lib/charmap.h" +#include "ucw/char-map.h" #undef CHAR }; diff --git a/lib/str_lower.c b/ucw/char-lower.c similarity index 72% rename from lib/str_lower.c rename to ucw/char-lower.c index f548a11..91ff8aa 100644 --- a/lib/str_lower.c +++ b/ucw/char-lower.c @@ -7,10 +7,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/chartype.h" +#include "ucw/lib.h" +#include "ucw/chartype.h" -const unsigned char _c_lower[256] = { +const byte _c_lower[256] = { #define CHAR(code,upper,lower,cat) lower, -#include "lib/charmap.h" +#include "ucw/char-map.h" #undef CHAR }; diff --git a/lib/charmap.h b/ucw/char-map.h similarity index 100% rename from lib/charmap.h rename to ucw/char-map.h diff --git a/lib/str_upper.c b/ucw/char-upper.c similarity index 72% rename from lib/str_upper.c rename to ucw/char-upper.c index e527956..53846d7 100644 --- a/lib/str_upper.c +++ b/ucw/char-upper.c @@ -7,10 +7,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/chartype.h" +#include "ucw/lib.h" +#include "ucw/chartype.h" -const unsigned char _c_upper[256] = { +const byte _c_upper[256] = { #define CHAR(code,upper,lower,cat) upper, -#include "lib/charmap.h" +#include "ucw/char-map.h" #undef CHAR }; diff --git a/ucw/chartype.h b/ucw/chartype.h new file mode 100644 index 0000000..055b540 --- /dev/null +++ b/ucw/chartype.h @@ -0,0 +1,64 @@ +/* + * UCW Library -- Character Types + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CHARTYPE_H +#define _UCW_CHARTYPE_H + +/*** + * We define our own routines to classify 8-bit characters (based on US-ASCII charset). + * This way we bypass most possible problems with different compilation environments. + * + * All functions and macros accept any numbers and if it is necessary, they simply ignore higher bits. + * It does not matter whether a parameter is signed or unsigned. Parameters are evaluated exactly once, + * so they can have side-effects. + ***/ + +#define _C_UPPER 1 /* Upper-case letters */ +#define _C_LOWER 2 /* Lower-case letters */ +#define _C_PRINT 4 /* Printable */ +#define _C_DIGIT 8 /* Digits */ +#define _C_CTRL 16 /* Control characters */ +#define _C_XDIGIT 32 /* Hexadecimal digits */ +#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */ +#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */ + +#define _C_ALPHA (_C_UPPER | _C_LOWER) +#define _C_ALNUM (_C_ALPHA | _C_DIGIT) +#define _C_WORD (_C_ALNUM | _C_INNER) +#define _C_WSTART (_C_ALPHA | _C_INNER) + +extern const byte _c_cat[256], _c_upper[256], _c_lower[256]; + +#define Category(x) (_c_cat[(byte)(x)]) +#define Ccat(x,y) (Category(x) & y) + +#define Cupper(x) Ccat(x, _C_UPPER) /** Checks for an upper-case character (`A-Z`). **/ +#define Clower(x) Ccat(x, _C_LOWER) /** Checks for a lower-case character (`a-z`). **/ +#define Calpha(x) Ccat(x, _C_ALPHA) /** Checks for an alphabetic character (`a-z`, `A-Z`). **/ +#define Calnum(x) Ccat(x, _C_ALNUM) /** Checks for an alpha-numeric character (`a-z`, `A-Z`, `0-9`). */ +#define Cprint(x) Ccat(x, _C_PRINT) /** Checks for printable characters, including 8-bit values (`\t`, `0x20-0x7E`, `0x80-0xFF`). **/ +#define Cdigit(x) Ccat(x, _C_DIGIT) /** Checks for a digit (`0-9`). **/ +#define Cxdigit(x) Ccat(x, _C_XDIGIT) /** Checks for a hexadecimal digit (`0-9`, `a-f`, `A-F`). **/ +#define Cword(x) Ccat(x, _C_WORD) /** Checks for an alpha-numeric character or an inner punctation (`a-z`, `A-Z`, `0-9`, `_`). **/ +#define Cblank(x) Ccat(x, _C_BLANK) /** Checks for a white space (`0x20`, `\t`, `\n`, `\r`, `0x8`, `0xC`). **/ +#define Cctrl(x) Ccat(x, _C_CTRL) /** Checks for control characters (`0x0-0x1F`, `0x7F`). **/ +#define Cspace(x) Cblank(x) + +#define Cupcase(x) (_c_upper[(byte)(x)]) /** Convert a letter to upper case, leave non-letter characters unchanged. **/ +#define Clocase(x) (_c_lower[(byte)(x)]) /** Convert a letter to lower case, leave non-letter characters unchanged. **/ + +/** + * Compute the value of a valid hexadecimal character (ie. passed the @Cxdigit() check). + **/ +static inline uns Cxvalue(byte x) +{ + return (x < (uns)'A') ? x - '0' : (x & 0xdf) - 'A' + 10; +} + +#endif diff --git a/lib/clists.h b/ucw/clists.h similarity index 57% rename from lib/clists.h rename to ucw/clists.h index 921b7dc..17d2201 100644 --- a/lib/clists.h +++ b/ucw/clists.h @@ -10,46 +10,98 @@ #ifndef _UCW_CLISTS_H #define _UCW_CLISTS_H +/** + * Common header for list nodes. + **/ typedef struct cnode { struct cnode *next, *prev; } cnode; +/** + * Circilar linked list. + **/ typedef struct clist { struct cnode head; } clist; +/** + * Initialize a new circular linked list. Must be called before any other function. + **/ +static inline void clist_init(clist *l) +{ + cnode *head = &l->head; + head->next = head->prev = head; +} + +/** + * Return the first node on @l or NULL if @l is empty. + **/ static inline void *clist_head(clist *l) { return (l->head.next != &l->head) ? l->head.next : NULL; } +/** + * Return the last node on @l or NULL if @l is empty. + **/ static inline void *clist_tail(clist *l) { return (l->head.prev != &l->head) ? l->head.prev : NULL; } +/** + * Find the next node to @n or NULL if @n is the last one. + **/ static inline void *clist_next(clist *l, cnode *n) { return (n->next != &l->head) ? (void *) n->next : NULL; } +/** + * Find the previous node to @n or NULL if @n is the first one. + **/ static inline void *clist_prev(clist *l, cnode *n) { return (n->prev != &l->head) ? (void *) n->prev : NULL; } +/** + * Return a non-zero value iff @l is empty. + **/ static inline int clist_empty(clist *l) { return (l->head.next == &l->head); } +/** + * Loop over all nodes in the @list and perform the next C statement on them. The current node is stored in @n which must be defined before as pointer to any type. + * The list should not be changed during this loop command. + **/ #define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) + +/** + * Same as @CLIST_WALK(), but allows removal of the current node. This macro requires one more variable to store some temporary pointers. + **/ #define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) + +/** + * Same as @CLIST_WALK(), but it defines the variable for the current node in place. @type should be a pointer type. + **/ #define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) + +/** + * Same as @CLIST_WALK_DELSAFE(), but it defines the variable for the current node in place. @type should be a pointer type. The temporary variable must be still known before. + **/ #define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) +/** + * Reversed version of @CLIST_FOR_EACH(). + **/ #define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev) +/** + * Insert a new node just after the node @after. To insert at the head of the list, use @clist_add_head() instead. + **/ static inline void clist_insert_after(cnode *what, cnode *after) { cnode *before = after->next; @@ -59,6 +111,9 @@ static inline void clist_insert_after(cnode *what, cnode *after) after->next = what; } +/** + * Insert a new node just before the node @before. To insert at the tail of the list, use @clist_add_tail() instead. + **/ static inline void clist_insert_before(cnode *what, cnode *before) { cnode *after = before->prev; @@ -68,16 +123,25 @@ static inline void clist_insert_before(cnode *what, cnode *before) after->next = what; } -static inline void clist_add_tail(clist *l, cnode *n) +/** + * Insert a new node in front of all other nodes. + **/ +static inline void clist_add_head(clist *l, cnode *n) { - clist_insert_before(n, &l->head); + clist_insert_after(n, &l->head); } -static inline void clist_add_head(clist *l, cnode *n) +/** + * Insert a new node after all other nodes. + **/ +static inline void clist_add_tail(clist *l, cnode *n) { - clist_insert_after(n, &l->head); + clist_insert_before(n, &l->head); } +/** + * Remove node @n. + **/ static inline void clist_remove(cnode *n) { cnode *before = n->prev; @@ -86,6 +150,9 @@ static inline void clist_remove(cnode *n) after->prev = before; } +/** + * Remove the first node in @l. The list can be empty. + **/ static inline void *clist_remove_head(clist *l) { cnode *n = clist_head(l); @@ -94,6 +161,9 @@ static inline void *clist_remove_head(clist *l) return n; } +/** + * Remove the last node in @l. The list can be empty. + **/ static inline void *clist_remove_tail(clist *l) { cnode *n = clist_tail(l); @@ -102,12 +172,10 @@ static inline void *clist_remove_tail(clist *l) return n; } -static inline void clist_init(clist *l) -{ - cnode *head = &l->head; - head->next = head->prev = head; -} - +/** + * Merge two lists by inserting the list @what just after the node @after in a different list. + * The first list is then cleared. + **/ static inline void clist_insert_list_after(clist *what, cnode *after) { if (!clist_empty(what)) @@ -121,6 +189,20 @@ static inline void clist_insert_list_after(clist *what, cnode *after) } } +/** + * Move all items from a source list to a destination list. The source list + * becomes empty, the original contents of the destination list are destroyed. + **/ +static inline void clist_move(clist *to, clist *from) +{ + clist_init(to); + clist_insert_list_after(from, &to->head); + clist_init(from); +} + +/** + * Compute the number of nodes in @l. Beware linear time complexity. + **/ static inline uns clist_size(clist *l) { uns i = 0; diff --git a/lib/conf-alloc.c b/ucw/conf-alloc.c similarity index 91% rename from lib/conf-alloc.c rename to ucw/conf-alloc.c index 9f02fd5..1a0de4f 100644 --- a/lib/conf-alloc.c +++ b/ucw/conf-alloc.c @@ -8,9 +8,9 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/mempool.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/mempool.h" struct mempool *cf_pool; // current pool for loading new configuration diff --git a/lib/conf-dump.c b/ucw/conf-dump.c similarity index 95% rename from lib/conf-dump.c rename to ucw/conf-dump.c index 0d40924..a2a92df 100644 --- a/lib/conf-dump.c +++ b/ucw/conf-dump.c @@ -8,12 +8,12 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/getopt.h" -#include "lib/conf-internal.h" -#include "lib/clists.h" -#include "lib/fastbuf.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/getopt.h" +#include "ucw/conf-internal.h" +#include "ucw/clists.h" +#include "ucw/fastbuf.h" static void spaces(struct fastbuf *fb, uns nr) diff --git a/lib/conf-input.c b/ucw/conf-input.c similarity index 82% rename from lib/conf-input.c rename to ucw/conf-input.c index c5d2527..fffc6d5 100644 --- a/lib/conf-input.c +++ b/ucw/conf-input.c @@ -2,20 +2,22 @@ * UCW Library -- Configuration files: parsing input streams * * (c) 2001--2006 Robert Spalek - * (c) 2003--2006 Martin Mares + * (c) 2003--2009 Martin Mares * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/getopt.h" -#include "lib/conf-internal.h" -#include "lib/mempool.h" -#include "lib/fastbuf.h" -#include "lib/chartype.h" -#include "lib/stkstring.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/getopt.h" +#include "ucw/conf-internal.h" +#include "ucw/clists.h" +#include "ucw/mempool.h" +#include "ucw/fastbuf.h" +#include "ucw/chartype.h" +#include "ucw/string.h" +#include "ucw/stkstring.h" #include #include @@ -31,13 +33,13 @@ static uns line_num; static char line_buf[MAX_LINE]; static char *line = line_buf; -#include "lib/bbuf.h" +#include "ucw/bbuf.h" static bb_t copy_buf; static uns copied; #define GBUF_TYPE uns #define GBUF_PREFIX(x) split_##x -#include "lib/gbuf.h" +#include "ucw/gbuf.h" static split_t word_buf; static uns words; static uns ends_by_brace; // the line is ended by "{" @@ -221,7 +223,7 @@ parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth) else if (depth > 8) err = "Too many nested files"; else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared - err = "The input command must be the last one on a line"; + err = "The include command must be the last one on a line"; if (err) goto error; struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14); @@ -253,7 +255,7 @@ parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth) default: op = OP_ALL; }; break; case 'p': op = OP_PREPEND; break; - case 'r': op = OP_REMOVE; break; + case 'r': op = (c[1] && Clocase(c[2]) == 'm') ? OP_REMOVE : OP_RESET; break; case 'e': op = OP_EDIT; break; case 'b': op = OP_BEFORE; break; default: op = OP_SET; break; @@ -283,6 +285,7 @@ error: #define DEFAULT_CONFIG NULL #endif char *cf_def_file = DEFAULT_CONFIG; +static int cf_def_loaded; #ifndef ENV_VAR_CONFIG #define ENV_VAR_CONFIG NULL @@ -315,10 +318,7 @@ load_file(const char *file) } char *err_msg = parse_fastbuf(file, fb, 0); bclose(fb); - int err = !!err_msg || done_stack(); - if (!err) - cf_def_file = NULL; - return err; + return !!err_msg || done_stack(); } static int @@ -333,6 +333,30 @@ load_string(const char *string) /* Safe loading and reloading */ +struct conf_entry { /* We remember a list of actions to apply upon reload */ + cnode n; + enum { + CE_FILE = 1, + CE_STRING = 2, + } type; + char *arg; +}; + +static clist conf_entries; + +static void +cf_remember_entry(uns type, const char *arg) +{ + if (!cf_need_journal) + return; + if (!postpone_commit) + return; + struct conf_entry *ce = cf_malloc(sizeof(*ce)); + ce->type = type; + ce->arg = cf_strdup(arg); + clist_add_tail(&conf_entries, &ce->n); +} + int cf_reload(const char *file) { @@ -340,17 +364,39 @@ cf_reload(const char *file) struct cf_journal_item *oldj = cf_journal_new_transaction(1); uns ec = everything_committed; everything_committed = 0; - int err = load_file(file); + + if (!conf_entries.head.next) + clist_init(&conf_entries); + clist old_entries; + clist_move(&old_entries, &conf_entries); + postpone_commit = 1; + + int err = 0; + if (file) + err = load_file(file); + else + CLIST_FOR_EACH(struct conf_entry *, ce, old_entries) { + if (ce->type == CE_FILE) + err |= load_file(ce->arg); + else + err |= load_string(ce->arg); + if (err) + break; + cf_remember_entry(ce->type, ce->arg); + } + + postpone_commit = 0; if (!err) - { + err |= done_stack(); + + if (!err) { cf_journal_delete(); cf_journal_commit_transaction(1, NULL); - } - else - { + } else { everything_committed = ec; cf_journal_rollback_transaction(1, oldj); cf_journal_swap(); + clist_move(&conf_entries, &old_entries); } return err; } @@ -360,9 +406,11 @@ cf_load(const char *file) { struct cf_journal_item *oldj = cf_journal_new_transaction(1); int err = load_file(file); - if (!err) + if (!err) { cf_journal_commit_transaction(1, oldj); - else + cf_remember_entry(CE_FILE, file); + cf_def_loaded = 1; + } else cf_journal_rollback_transaction(1, oldj); return err; } @@ -372,9 +420,10 @@ cf_set(const char *string) { struct cf_journal_item *oldj = cf_journal_new_transaction(0); int err = load_string(string); - if (!err) + if (!err) { cf_journal_commit_transaction(0, oldj); - else + cf_remember_entry(CE_STRING, string); + } else cf_journal_rollback_transaction(0, oldj); return err; } @@ -384,6 +433,8 @@ cf_set(const char *string) static void load_default(void) { + if (cf_def_loaded++) + return; if (cf_def_file) { char *env; @@ -397,8 +448,11 @@ load_default(void) } else { - // We need to create an empty pool - cf_journal_commit_transaction(1, cf_journal_new_transaction(1)); + // We need to create an empty pool and initialize all configuration items + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + cf_init_stack(); + done_stack(); + cf_journal_commit_transaction(1, oldj); } } @@ -415,6 +469,9 @@ final_commit(void) int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index) { + clist_init(&conf_entries); + postpone_commit = 1; + static int other_options = 0; while (1) { int res = getopt_long (argc, argv, short_opts, long_opts, long_index); @@ -423,12 +480,10 @@ cf_getopt(int argc, char * const argv[], const char *short_opts, const struct op if (other_options) die("The -S and -C options must precede all other arguments"); if (res == 'S') { - postpone_commit = 1; load_default(); if (cf_set(optarg)) die("Cannot set %s", optarg); } else if (res == 'C') { - postpone_commit = 1; if (cf_load(optarg)) die("Cannot load config file %s", optarg); } @@ -444,12 +499,12 @@ cf_getopt(int argc, char * const argv[], const char *short_opts, const struct op #endif } else { /* unhandled option or end of options */ - if (res != ':' && res != '?') + if (res != ':' && res != '?') { load_default(); - final_commit(); + final_commit(); + } other_options++; return res; } } } - diff --git a/lib/conf-internal.h b/ucw/conf-internal.h similarity index 100% rename from lib/conf-internal.h rename to ucw/conf-internal.h diff --git a/lib/conf-intr.c b/ucw/conf-intr.c similarity index 90% rename from lib/conf-intr.c rename to ucw/conf-intr.c index 84f555a..deae450 100644 --- a/lib/conf-intr.c +++ b/ucw/conf-intr.c @@ -8,11 +8,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/getopt.h" -#include "lib/conf-internal.h" -#include "lib/clists.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/getopt.h" +#include "ucw/conf-internal.h" +#include "ucw/clists.h" #include #include @@ -52,9 +52,9 @@ cf_type_size(enum cf_type type, struct cf_user_type *utype) } static char * -cf_parse_lookup(char *str, int *ptr, char **t) +cf_parse_lookup(char *str, int *ptr, const char * const *t) { - char **n = t; + const char * const *n = t; uns total_len = 0; while (*n && strcasecmp(*n, str)) { total_len += strlen(*n) + 2; @@ -168,21 +168,21 @@ add_to_list(cnode *where, cnode *new_node, enum cf_operation op) { switch (op) { - case OP_EDIT: // edition has been done in-place + case OP_EDIT: // editation has been done in-place break; case OP_REMOVE: CF_JOURNAL_VAR(where->prev->next); CF_JOURNAL_VAR(where->next->prev); clist_remove(where); break; - case OP_AFTER: // implementation dependend (prepend_head = after(list)), and where==list, see clists.h:74 + case OP_AFTER: // implementation dependent (prepend_head = after(list)), and where==list, see clists.h:74 case OP_PREPEND: case OP_COPY: CF_JOURNAL_VAR(where->next->prev); CF_JOURNAL_VAR(where->next); clist_insert_after(new_node, where); break; - case OP_BEFORE: // implementation dependend (append_tail = before(list)) + case OP_BEFORE: // implementation dependent (append_tail = before(list)) case OP_APPEND: case OP_SET: CF_JOURNAL_VAR(where->prev->next); @@ -228,6 +228,8 @@ interpret_add_list(struct cf_item *item, int number, char **pars, int *processed static char * interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op) { + if (op == OP_PREPEND || op == OP_APPEND) + op = OP_SET; if (op != OP_SET && op != OP_REMOVE) return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]); else if (item->type != CT_INT && item->type != CT_LOOKUP) @@ -517,6 +519,21 @@ find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr) } } +static char * +interpret_add(char *name, struct cf_item *item, int number, char **pars, int *takenp, void *ptr, enum cf_operation op) +{ + switch (item->cls) { + case CC_DYNAMIC: + return interpret_add_dynamic(item, number, pars, takenp, ptr, op); + case CC_LIST: + return interpret_add_list(item, number, pars, takenp, ptr, op); + case CC_BITMAP: + return interpret_add_bitmap(item, number, pars, takenp, ptr, op); + default: + return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name); + } +} + char * cf_interpret_line(char *name, enum cf_operation op, int number, char **pars) { @@ -539,18 +556,22 @@ cf_interpret_line(char *name, enum cf_operation op, int number, char **pars) op &= OP_MASK; int taken = 0; // process as many parameters as possible - if (op == OP_CLEAR || op == OP_ALL) - msg = interpret_set_all(item, ptr, op); - else if (op == OP_SET) - msg = interpret_set_item(item, number, pars, &taken, ptr, 1); - else if (item->cls == CC_DYNAMIC) - msg = interpret_add_dynamic(item, number, pars, &taken, ptr, op); - else if (item->cls == CC_LIST) - msg = interpret_add_list(item, number, pars, &taken, ptr, op); - else if (item->cls == CC_BITMAP) - msg = interpret_add_bitmap(item, number, pars, &taken, ptr, op); - else - return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name); + switch (op) { + case OP_CLEAR: + case OP_ALL: + msg = interpret_set_all(item, ptr, op); + break; + case OP_SET: + msg = interpret_set_item(item, number, pars, &taken, ptr, 1); + break; + case OP_RESET: + msg = interpret_set_all(item, ptr, OP_CLEAR); + if (!msg) + msg = interpret_add(name, item, number, pars, &taken, ptr, OP_APPEND); + break; + default: + msg = interpret_add(name, item, number, pars, &taken, ptr, op); + } if (msg) return msg; if (taken < number) @@ -576,7 +597,7 @@ cf_find_item(const char *name, struct cf_item *item) } char * -cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars) +cf_modify_item(struct cf_item *item, enum cf_operation op, int number, char **pars) { char *msg; int taken = 0; @@ -590,12 +611,19 @@ cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **par break; case OP_APPEND: case OP_PREPEND: - if (item->cls == CC_DYNAMIC) - msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op); - else if (item->cls == CC_LIST) - msg = interpret_add_list(item, number, pars, &taken, item->ptr, op); - else - return "The attribute does not support append/prepend"; + switch (item->cls) { + case CC_DYNAMIC: + msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op); + break; + case CC_LIST: + msg = interpret_add_list(item, number, pars, &taken, item->ptr, op); + break; + case CC_BITMAP: + msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op); + break; + default: + return "The attribute does not support append/prepend"; + } break; case OP_REMOVE: if (item->cls == CC_BITMAP) diff --git a/lib/conf-journal.c b/ucw/conf-journal.c similarity index 94% rename from lib/conf-journal.c rename to ucw/conf-journal.c index 591aa8a..b0b9013 100644 --- a/lib/conf-journal.c +++ b/ucw/conf-journal.c @@ -8,11 +8,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/getopt.h" -#include "lib/conf-internal.h" -#include "lib/mempool.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/getopt.h" +#include "ucw/conf-internal.h" +#include "ucw/mempool.h" #include diff --git a/lib/conf-parse.c b/ucw/conf-parse.c similarity index 97% rename from lib/conf-parse.c rename to ucw/conf-parse.c index c828462..4cfeee1 100644 --- a/lib/conf-parse.c +++ b/ucw/conf-parse.c @@ -8,9 +8,9 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/chartype.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/chartype.h" #include #include diff --git a/lib/conf-section.c b/ucw/conf-section.c similarity index 96% rename from lib/conf-section.c rename to ucw/conf-section.c index 1a9ae17..4112d88 100644 --- a/lib/conf-section.c +++ b/ucw/conf-section.c @@ -8,11 +8,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/conf-internal.h" -#include "lib/clists.h" -#include "lib/binsearch.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/conf-internal.h" +#include "ucw/clists.h" +#include "ucw/binsearch.h" #include @@ -24,7 +24,7 @@ struct dirty_section { }; #define GBUF_TYPE struct dirty_section #define GBUF_PREFIX(x) dirtsec_##x -#include "lib/gbuf.h" +#include "ucw/gbuf.h" static dirtsec_t dirty; static uns dirties; @@ -44,7 +44,7 @@ cf_add_dirty(struct cf_section *sec, void *ptr) #define ASORT_KEY_TYPE struct dirty_section #define ASORT_ELT(i) dirty.ptr[i] #define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr -#include "lib/arraysort.h" +#include "ucw/sorter/array-simple.h" static void sort_dirty(void) diff --git a/lib/conf-test.c b/ucw/conf-test.c similarity index 92% rename from lib/conf-test.c rename to ucw/conf-test.c index 61ba4bc..e226fae 100644 --- a/lib/conf-test.c +++ b/ucw/conf-test.c @@ -4,11 +4,11 @@ * (c) 2006 Robert Spalek */ -#include "lib/lib.h" -#include "lib/conf.h" -#include "lib/getopt.h" -#include "lib/clists.h" -#include "lib/fastbuf.h" +#include "ucw/lib.h" +#include "ucw/conf.h" +#include "ucw/getopt.h" +#include "ucw/clists.h" +#include "ucw/fastbuf.h" #include #include @@ -133,7 +133,7 @@ commit_top(void *ptr UNUSED) return NULL; } -static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL }; +static const char * const alphabet[] = { "alpha", "beta", "gamma", "delta", NULL }; static struct cf_section cf_top = { CF_INIT(init_top), CF_COMMIT(commit_top), @@ -153,7 +153,7 @@ static struct cf_section cf_top = { CF_LOOKUP_DYN("look", &look, alphabet, 1000), CF_USER_ARY("numbers", numbers, &u16_type, 10), CF_BITMAP_INT("bitmap1", &bitmap1), - CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) { + CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((const char* const[]) { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen", "eighteen", "nineteen", "twenty", NULL // hidden joke here @@ -162,7 +162,7 @@ static struct cf_section cf_top = { } }; -static byte short_opts[] = CF_SHORT_OPTS "v"; +static char short_opts[] = CF_SHORT_OPTS "v"; static struct option long_opts[] = { CF_LONG_OPTS {"verbose", 0, 0, 'v'}, @@ -193,7 +193,7 @@ main(int argc, char *argv[]) { log_init(argv[0]); cf_declare_section("top", &cf_top, 0); - cf_def_file = "lib/conf-test.cf"; + cf_def_file = "ucw/conf-test.cf"; int opt; while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0) @@ -209,6 +209,8 @@ main(int argc, char *argv[]) //cf_reload("non-existent file"); cf_load("non-existent file"); cf_set("top.d1 -1.1; top.master b"); + cf_reload(NULL); + cf_reload(NULL); */ struct fastbuf *out = bfdopen(1, 1<<14); diff --git a/lib/conf-test.cf b/ucw/conf-test.cf similarity index 92% rename from lib/conf-test.cf rename to ucw/conf-test.cf index 91e6cc3..d533af4 100644 --- a/lib/conf-test.cf +++ b/ucw/conf-test.cf @@ -1,5 +1,5 @@ # test config file -#include lib/conf-test.t ; top.xa=1 +#include ucw/conf-test.t ; top.xa=1 #include 'non-existent file'; #top.xa=1 Top { \ @@ -34,7 +34,7 @@ Top { \ unknown.ignored :-) -top.slaves cairns gpua 7 7 -10% +10% +top.slaves:reset cairns gpua 7 7 -10% +10% top.slaves daintree rafc 4 5 -171% top.slaves coogee pum 9 8 top.slaves:prepend {name=bondi; level=\ diff --git a/ucw/conf.h b/ucw/conf.h new file mode 100644 index 0000000..50dbddc --- /dev/null +++ b/ucw/conf.h @@ -0,0 +1,383 @@ +/* + * UCW Library -- Configuration files + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONF_H +#define _UCW_CONF_H + +/*** === Data types [[conf_types]] ***/ + +enum cf_class { /** Class of the configuration item. **/ + CC_END, // end of list + CC_STATIC, // single variable or static array + CC_DYNAMIC, // dynamically allocated array + CC_PARSER, // arbitrary parser function + CC_SECTION, // section appears exactly once + CC_LIST, // list with 0..many nodes + CC_BITMAP // of up to 32 items +}; + +enum cf_type { /** Type of a single value. **/ + CT_INT, CT_U64, CT_DOUBLE, // number types + CT_IP, // IP address + CT_STRING, // string type + CT_LOOKUP, // in a string table + CT_USER // user-defined type +}; + +struct fastbuf; + +/** + * A parser function gets an array of (strdup'ed) strings and a pointer with + * the customized information (most likely the target address). It can store + * the parsed value anywhere in any way it likes, however it must first call + * @cf_journal_block() on the overwritten memory block. It returns an error + * message or NULL if everything is all right. + **/ +typedef char *cf_parser(uns number, char **pars, void *ptr); +/** + * A parser function for user-defined types gets a string and a pointer to + * the destination variable. It must store the value within [ptr,ptr+size), + * where size is fixed for each type. It should not call @cf_journal_block(). + **/ +typedef char *cf_parser1(char *string, void *ptr); +/** + * An init- or commit-hook gets a pointer to the section or NULL if this + * is the global section. It returns an error message or NULL if everything + * is all right. The init-hook should fill in default values (needed for + * dynamically allocated nodes of link lists or for filling global variables + * that are run-time dependent). The commit-hook should perform sanity + * checks and postprocess the parsed values. Commit-hooks must call + * @cf_journal_block() too. Caveat! init-hooks for static sections must not + * use @cf_malloc() but normal <>. + **/ +typedef char *cf_hook(void *ptr); +/** + * Dumps the contents of a variable of a user-defined type. + **/ +typedef void cf_dumper1(struct fastbuf *fb, void *ptr); +/** + * Similar to init-hook, but it copies attributes from another list node + * instead of setting the attributes to default values. You have to provide + * it if your node contains parsed values and/or sub-lists. + **/ +typedef char *cf_copier(void *dest, void *src); + +struct cf_user_type { /** Structure to store information about user-defined variable type. **/ + uns size; // of the parsed attribute + char *name; // name of the type (for dumping) + cf_parser1 *parser; // how to parse it + cf_dumper1 *dumper; // how to dump the type +}; + +struct cf_section; +struct cf_item { /** Single configuration item. **/ + const char *name; // case insensitive + int number; // length of an array or #parameters of a parser (negative means at most) + void *ptr; // pointer to a global variable or an offset in a section + union cf_union { + struct cf_section *sec; // declaration of a section or a list + cf_parser *par; // parser function + const char * const *lookup; // NULL-terminated sequence of allowed strings for lookups + struct cf_user_type *utype; // specification of the user-defined type + } u; + enum cf_class cls:16; // attribute class + enum cf_type type:16; // type of a static or dynamic attribute +}; + +struct cf_section { /** A section. **/ + uns size; // 0 for a global block, sizeof(struct) for a section + cf_hook *init; // fills in default values (no need to bzero) + cf_hook *commit; // verifies parsed data (optional) + cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes) + struct cf_item *cfg; // CC_END-terminated array of items + uns flags; // for internal use only +}; + +/*** + * [[conf_macros]] + * Convenience macros + * ~~~~~~~~~~~~~~~~~~ + * + * You could create the structures manually, but you can use these macros to + * save some typing. + */ + +/*** + * Declaration of <> + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * These macros can be used to configure the <> + * structure. + ***/ + +/** + * Data type of a section. + * If you store the section into a structure, use this macro. + * + * Storing a section into a structure is useful mostly when you may have multiple instances of the + * section (eg. <>). + * + * Example: + * + * struct list_node { + * cnode n; // This one is for the list itself + * char *name; + * uns value; + * }; + * + * static struct clist nodes; + * + * static struct cf_section node = { + * CF_TYPE(struct list_node), + * CF_ITEMS { + * CF_STRING("name", PTR_TO(struct list_node, name)), + * CF_UNS("value", PTR_TO(struct list_node, value)), + * CF_END + * } + * }; + * + * static struct cf_section section = { + * CF_LIST("node", &nodes, &node), + * CF_END + * }; + * + * You could use <> or <> + * macros to create arrays. + */ +#define CF_TYPE(s) .size = sizeof(s) +/** + * An init <>. + * You can use this to initialize dynamically allocated items (for a dynamic array or list). + * The hook returns an error message or NULL if everything was OK. + */ +#define CF_INIT(f) .init = (cf_hook*) f +/** + * A commit <>. + * You can use this one to check sanity of loaded data and postprocess them. + * You must call @cf_journal_block() if you change anything. + * + * Return error message or NULL if everything went OK. + **/ +#define CF_COMMIT(f) .commit = (cf_hook*) f +/** + * A <>. + * You need to provide one for too complicated sections where a memcpy is not + * enough to copy it properly. It happens, for example, when you have a dynamically + * allocated section containing a list of other sections. + * + * You return an error message or NULL if you succeed. + **/ +#define CF_COPY(f) .copy = (cf_copier*) f /** **/ +#define CF_ITEMS .cfg = ( struct cf_item[] ) /** List of sub-items. **/ +#define CF_END { .cls = CC_END } /** End of the structure. **/ +/*** + * Declaration of a configuration item + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * Each of these describe single <>. They are mostly + * for internal use, do not use them directly unless you really know what you are doing. + ***/ + +/** + * Static array of items. + * Expects you to allocate the memory and provide pointer to it. + **/ +#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) } +/** + * Dynamic array of items. + * Expects you to provide pointer to your pointer to data and it will allocate new memory for it + * and set your pointer to it. + **/ +#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) } +#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f } /** A low-level parser. **/ +#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s } /** A sub-section. **/ +#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s } /** A list with sub-items. **/ +#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) } /** A bitmap. **/ +#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t } /** A bitmap with named bits. **/ +/*** + * Basic configuration items + * ^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * They describe basic data types used in the configuration. This should be enough for + * most real-life purposes. + * + * The parameters are as follows: + * + * * @n -- name of the item. + * * @p -- pointer to the variable where it shall be stored. + * * @c -- count. + **/ +#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1) /** Single `int` value. **/ +#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c) /** Static array of integers. **/ +#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c) /** Dynamic array of integers. **/ +#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1) /** Single `uns` (`unsigned`) value. **/ +#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c) /** Static array of unsigned integers. **/ +#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c) /** Dynamic array of unsigned integers. **/ +#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1) /** Single unsigned 64bit integer (`u64`). **/ +#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c) /** Static array of u64s. **/ +#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c) /** Dynamic array of u64s. **/ +#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1) /** Single instance of `double`. **/ +#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c) /** Static array of doubles. **/ +#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c) /** Dynamic array of doubles. **/ +#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1) /** Single IPv4 address. **/ +#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c) /** Static array of IP addresses. **/. +#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c) /** Dynamic array of IP addresses. **/ +/** + * A string. + * You provide a pointer to a `char *` variable and it will fill it with + * dynamically allocated string. For example: + * + * static char *string = "Default string"; + * + * static struct cf_section section = { + * CF_ITEMS { + * CF_STRING("string", &string), + * CF_END + * } + * }; + **/ +#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1) +#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c) /** Static array of strings. **/ +#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c) /** Dynamic array of strings. **/ +/** + * One string out of a predefined set. + * You provide the set as an array of strings terminated by NULL (similar to @argv argument + * of main()) as the @t parameter. + * + * The configured variable (pointer to `int`) is set to index of the string. + * So, it works this way: + * + * static *strings[] = { "First", "Second", "Third", NULL }; + * + * static int variable; + * + * static struct cf_section section = { + * CF_ITEMS { + * CF_LOOKUP("choice", &variable, strings), + * CF_END + * } + * }; + * + * Now, if the configuration contains `choice "Second"`, `variable` will be set to 1. + **/ +#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +/** + * Static array of strings out of predefined set. + **/ +#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +/** + * Dynamic array of strings out of predefined set. + **/ +#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t } +/** + * A user-defined type. + * See <> section if you want to know more. + **/ +#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t } +/** + * Static array of user-defined types (all of the same type). + * See <> section. + **/ +#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } +/** + * Dynamic array of user-defined types. + * See <> section. + **/ +#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } + +/** + * Any number of dynamic array elements + **/ +#define CF_ANY_NUM -0x7fffffff + +#define DARY_LEN(a) ((uns*)a)[-1] /** Length of an dynamic array. **/ +#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a + // creates a static instance of a dynamic array + +/*** + * [[alloc]] + * Memory allocation + * ~~~~~~~~~~~~~~~~~ + * + * Uses <> for efficiency and journal recovery. + * You should use these routines when implementing custom parsers. + ***/ +struct mempool; +/** + * A <> for configuration parser needs. + * Memory allocated from here is valid as long as the current config is loaded + * (if you allocate some memory and rollback the transaction or you load some + * other configuration, it gets lost). + **/ +extern struct mempool *cf_pool; +void *cf_malloc(uns size); /** Returns @size bytes of memory. Allocates from <>. **/ +void *cf_malloc_zero(uns size); /** Like @cf_malloc(), but zeroes the memory. **/ +char *cf_strdup(const char *s); /** Copy a string into @cf_malloc()ed memory. **/ +char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2); /** printf() into @cf_malloc()ed memory. **/ + +/*** + * [[journal]] + * Undo journal + * ~~~~~~~~~~~~ + * + * For error recovery when <>. + ***/ +extern uns cf_need_journal; /** Is the journal needed? If you do not reload configuration, you set this to 0 and gain a little more performance and free memory. **/ +/** + * When a block of memory is about to be changed, put the old value + * into journal with this function. You need to call it from a <> + * if you change anything. It is used internally by low-level parsers. + * <> do not need to call it, it is called + * before them. + **/ +void cf_journal_block(void *ptr, uns len); +#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var)) // Store single value into journal. + +/*** + * [[declare]] + * Section declaration + * ~~~~~~~~~~~~~~~~~~~ + **/ + +/** + * Plug another top-level section into the configuration system. + * @name is the name in the configuration file, + * @sec is pointer to the section description. + * If @allow_unknown is set to 0 and a variable not described in @sec + * is found in the configuration file, it produces an error. + * If you set it to 1, all such variables are ignored. + **/ +void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown); +/** + * If you have a section in a structure and you want to initialize it + * (eg. if you want a copy of default values outside the configuration), + * you can use this. It initializes it recursively. + * + * This is used mostly internally. You probably do not need it. + **/ +void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero); + +/*** + * [[bparser]] + * Parsers for basic types + * ~~~~~~~~~~~~~~~~~~~~~~~ + * + * Each of them gets a string to parse and pointer to store the value. + * It returns either NULL or error message. + * + * The parsers support units. See <>. + ***/ +char *cf_parse_int(const char *str, int *ptr); /** Parser for integers. **/ +char *cf_parse_u64(const char *str, u64 *ptr); /** Parser for 64 unsigned integers. **/ +char *cf_parse_double(const char *str, double *ptr); /** Parser for doubles. **/ +char *cf_parse_ip(const char *p, u32 *varp); /** Parser for IP addresses. **/ + +#endif + diff --git a/ucw/config.h b/ucw/config.h new file mode 100644 index 0000000..a43e9ea --- /dev/null +++ b/ucw/config.h @@ -0,0 +1,49 @@ +/* + * UCW Library -- Configuration-Dependent Definitions + * + * (c) 1997--2009 Martin Mares + * (c) 2006 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONFIG_H +#define _UCW_CONFIG_H + +/* Configuration switches */ + +#include "autoconf.h" + +/* Tell libc we're going to use all extensions available */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +/* Types (based on standard C99 integers) */ + +#include +#include + +typedef uint8_t byte; /** Exactly 8 bits, unsigned **/ +typedef uint8_t u8; /** Exactly 8 bits, unsigned **/ +typedef int8_t s8; /** Exactly 8 bits, signed **/ +typedef uint16_t u16; /** Exactly 16 bits, unsigned **/ +typedef int16_t s16; /** Exactly 16 bits, signed **/ +typedef uint32_t u32; /** Exactly 32 bits, unsigned **/ +typedef int32_t s32; /** Exactly 32 bits, signed **/ +typedef uint64_t u64; /** Exactly 64 bits, unsigned **/ +typedef int64_t s64; /** Exactly 64 bits, signed **/ + +typedef unsigned int uns; /** A better pronounceable alias for `unsigned int` **/ +typedef u32 ucw_time_t; /** Seconds since UNIX epoch **/ +typedef s64 timestamp_t; /** Milliseconds since UNIX epoch **/ + +#ifdef CONFIG_LARGE_FILES +typedef s64 ucw_off_t; /** File position (either 32- or 64-bit, depending on `CONFIG_LARGE_FILES`). **/ +#else +typedef s32 ucw_off_t; +#endif + +#endif diff --git a/lib/default.cfg b/ucw/default.cfg similarity index 68% rename from lib/default.cfg rename to ucw/default.cfg index d61c2e9..033b32d 100644 --- a/lib/default.cfg +++ b/ucw/default.cfg @@ -1,8 +1,11 @@ # Configuration variables of the UCW library and their default values -# (c) 2005--2007 Martin Mares +# (c) 2005--2009 Martin Mares # Version of the whole package -Set("SHERLOCK_VERSION" => "3.12.3"); +Set("SHERLOCK_VERSION" => "3.99.2"); +Set("SHERLOCK_VERSION_CODE" => 3099002); +Set("UCW_VERSION" => Get("SHERLOCK_VERSION")); +Set("UCW_VERSION_CODE" => Get("SHERLOCK_VERSION_CODE")); # Compile everything with debug information and ASSERT's UnSet("CONFIG_DEBUG"); @@ -16,11 +19,6 @@ Set("CONFIG_LARGE_FILES"); # Use shared libraries UnSet("CONFIG_SHARED"); -# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock -# use its own regex library (a copy of the glibc one), because the default regex library -# is likely to be crappy. -UnSet("CONFIG_OWN_REGEX"); - # If your system can't reset getopt with 'optind = 0', you need to compile our internal copy # of GNU libc's getopt. This should not be necessary on GNU libc. UnSet("CONFIG_OWN_GETOPT"); @@ -40,11 +38,21 @@ UnSet("CONFIG_UCW_PERL_MODULES"); # Include support utilities for shell scripts Set("CONFIG_UCW_SHELL_UTILS" => 1); +# Include utilities +Set("CONFIG_UCW_UTILS" => 1); + # Default configuration file UnSet("DEFAULT_CONFIG"); # Environment variable with configuration file UnSet("ENV_VAR_CONFIG"); +# Use obsolete URL escaping rules (if you need behavior identical to the older versions of libucw) +UnSet("CONFIG_URL_ESCAPE_COMPAT"); + +# Allow use of direct IO on files +Set("CONFIG_DIRECT_IO"); +Set("CONFIG_UCW_FB_DIRECT"); + # Return success 1; diff --git a/ucw/doc/Makefile b/ucw/doc/Makefile new file mode 100644 index 0000000..6ffeb12 --- /dev/null +++ b/ucw/doc/Makefile @@ -0,0 +1,24 @@ +# Makefile for the UCW documentation, (c) 2008 Michal Vaner + +DIRS+=ucw/doc + +UCW_DOCS=basics log fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort hashtable +UCW_INDEX=$(o)/ucw/doc/def_index.html +UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS))) + +$(UCW_INDEX): DOC_HEAD=$(s)/ucw/doc/def_index.txt +$(UCW_INDEX): DOC_LIST=$(patsubst %,$(o)/ucw/doc/%.deflist,$(UCW_DOCS)) +$(UCW_INDEX) $(UCW_DOCS_HTML): DOC_MODULE=ucw + +DOCS+=$(UCW_DOCS_HTML) +DOC_INDICES+=$(UCW_INDEX) +DOC_MODULES+=ucw + +ifdef CONFIG_DOC +INSTALL_TARGETS+=install-libucw-docs +endif + +.PHONY: install-libucw-docs +install-libucw-docs: $(UCW_INDEX) $(UCW_DOCS_HTML) + install -d -m 755 $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/ + install -m 644 $^ $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/ diff --git a/ucw/doc/basecode.txt b/ucw/doc/basecode.txt new file mode 100644 index 0000000..6741470 --- /dev/null +++ b/ucw/doc/basecode.txt @@ -0,0 +1,76 @@ +Base64 and Base224 encodings +============================ + +These modules can be used to encode and decode data to and from +base64 (described in RFC 3548) and base224 (not described in any +standard, uses all non-control characters, briefly described in +a comment at the beginning of `ucw/base224.c`). + +- <> +- <> +- <> +- <> + +[[base64]] +ucw/base64.h +------------ +!!ucw/base64.h + +[[base224]] +ucw/base224.h +------------- +!!ucw/base224.h + +[[usage]] +Usage +----- + +- You may want to encode small block of known size. Just allocate the + output buffer and feed the data to the function. + + byte output[BASE64_ENC_LENGTH(input_size)]; + uns output_size = base64_encode(output, input, input_size); + +- Decoding can be done in similar way. It is enough to have output + buffer of the same size as the input one. + +- Encoding of a stream of unknown or large size can be split into + chunks. The input chunk size must be multiple of `BASE64_IN_CHUNK`. + The output will be corresponding multiple of `BASE64_OUT_CHUNK`. + + uns input_size; + byte input[BASE64_IN_CHUNK * 10]; + while(input_size = read_chunk(input, BASE64_IN_CHUNK * 10)) { + byte output[BASE64_OUT_CHUNK * 10]; + uns output_size = base64_encode(output, input, input_size); + use_chunk(output, output_size); + } + +- Decoding of a stream is done in the same way, just swap + `BASE64_IN_CHUNK` and `BASE64_OUT_CHUNK` (you feed the decode + function with `BASE64_OUT_CHUNK` multiple and get `BASE64_IN_CHUNK` + multiple). + +The base224 has similar interface, therefore you can use it the same +way as base64. + +[[basecode]] +The basecode utility +-------------------- +You can use the encoding/decoding routines from command line, trough +`basecode` command. You have to specify the operation by a command +line argument and give it the data on standard input. The arguments +are: + +- `-e`: Encode to base64. +- `-d`: Decode from base64. +- `-E`: Encode to base224. +- `-D`: Decode from base224. + +Furthermore, you can provide `--prefix` argument. If you do, the +output (when encoding) will be split to lines by default number of +chunks and the value of prefix will be prepended to each of them. +When decoding, it removes the prefix from the beginning of line. + +You can override the default number of blocks for line-splitting by +`--blocks` argument. diff --git a/ucw/doc/basics.txt b/ucw/doc/basics.txt new file mode 100644 index 0000000..42f0097 --- /dev/null +++ b/ucw/doc/basics.txt @@ -0,0 +1,33 @@ +LibUCW Basics +============= + +Every program using LibUCW should start with `#include ` which +brings in the most frequently used library functions, macros and types. +This should be done before you include any of the system headers, since +`lib.h` defines the feature macros of the system C library. + +Portability +----------- + +LibUCW is written in C99 with a couple of GNU extensions mixed in where needed. +It currently requires the GNU C compiler version 4.0 or newer, but most modules +should be very easy to adapt to a different C99 compiler. (A notable exception +is `stkstring.h`, which is heavily tied to GNU extensions.) + +The library has been developed on Linux with the GNU libc and it is known to run +on Darwin, too. The authors did not try using it on other systems, but most of +the code is written for a generic POSIX system, so porting to any UNIX-like system +should be a piece of cake. + +ucw/lib.h +--------- +*Only partially documented.* + +!!ucw/lib.h + +ucw/config.h +------------ +This header contains the standard set of types used by LibUCW. It is automatically +included by `ucw/lib.h`. + +!!ucw/config.h diff --git a/ucw/doc/binheap.txt b/ucw/doc/binheap.txt new file mode 100644 index 0000000..adc2601 --- /dev/null +++ b/ucw/doc/binheap.txt @@ -0,0 +1,19 @@ +Binomial heaps +============== + +* <> +* <> +* <> + +[[intro]] +Introduction +------------ + +Binomial heap is a data structure that supports for example efficient merge of two heaps, insertions, deletions or access to the minimum element. +All these operations are logarithimc in the worst case. If the merge is not significat, it is usually better to use simplier <>. + +They are defined in `ucw/binheap.h` as <>, some common definitions are also in `ucw/binheap-node.h`. + +!!ucw/binheap-node.h + +!!ucw/binheap.h diff --git a/ucw/doc/binsearch.txt b/ucw/doc/binsearch.txt new file mode 100644 index 0000000..211bd73 --- /dev/null +++ b/ucw/doc/binsearch.txt @@ -0,0 +1,51 @@ +Binary search +============= + +* <> +* <> + +!!ucw/binsearch.h + +[[examples]] +Examples +-------- + +You can find few examples of binary search usage. Although we define only few macros, they can be used +for several different cases, for example to find lower elements in a (non-)decreasing array or even to find +elements in a (non-)increasing array. + + static int inc[10] = { 1, 4, 4, 5, 6, 10, 11, 20, 25, 50 }; + static const char *str[5] = { "aaa", "abc", "bflmpsvz", "rep", "rep" }; + static int dec[3] = { 5, 2, 1 }; + + // find the first equal element + printf("%d\n", BIN_SEARCH_EQ(inc, 10, 4)); // prints 1 + printf("%d\n", BIN_SEARCH_EQ(inc, 10, 15)); // prints -1 (not found) + + // find the first greater or equal element + printf("%d\n", BIN_SEARCH_GE(inc, 10, 9)); // prints 5 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 10)); // prints 5 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 4)); // prints 1 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 99)); // prints 10 (not found) + + // find the last equal element (or -1 if does not exist) + #define CMP_LE(ary, i, x) ((ary[i]) <= (x)) + int i = BIN_SEARCH_FIRST_GE_CMP(inc, 10, 4, CMP_LE); + printf("%d\n", (i && inc[i - 1] == 4) ? i - 1 : -1); // prints 2 + + // find the first greater element + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE)); // prints 9 + + // find the last lower or equal element (or -1 if does not exist) + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE) - 1); // prints 8 + + // find the last lower element (or -1 if does not exist) + printf("%d\n", BIN_SEARCH_FIRST_GE(inc, 10, 25) - 1); // prints 7 + + // find the first greater or equal string + #define CMP_STR(ary, i, x) (strcmp((ary[i]), (x)) < 0) + printf("%d\n", BIN_SEARCH_GE_CMP(str, 5, "bfl", CMP_STR)); // prints 2 + + // find the first lower or equal element in the non-increasing array + #define CMP_GT(ary, i, x) ((ary[i]) > (x)) + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(dec, 3, 4, CMP_GT)); // prints 1 diff --git a/ucw/doc/chartype.txt b/ucw/doc/chartype.txt new file mode 100644 index 0000000..a19560d --- /dev/null +++ b/ucw/doc/chartype.txt @@ -0,0 +1,4 @@ +Single-byte characters +====================== + +!!ucw/chartype.h diff --git a/ucw/doc/compress.txt b/ucw/doc/compress.txt new file mode 100644 index 0000000..d452763 --- /dev/null +++ b/ucw/doc/compress.txt @@ -0,0 +1,17 @@ +Compression +=========== + +The library contains a compression routine, called LiZaRd. It is +modified Lempel-Ziv 77 method with slightly worse compression ratio, +but with faster compression and decompression (compression is few times +faster than zlib, decompression is slightly slower than memcpy()). + +The data format and inspiration for code comes from the LZO project +(which couldn't be used due to licence problems). They might be +compatible, but no-one tested that. + +- <> +- <> +- <> + +!!ucw/lizard.h diff --git a/ucw/doc/conf.txt b/ucw/doc/conf.txt new file mode 100644 index 0000000..31698a6 --- /dev/null +++ b/ucw/doc/conf.txt @@ -0,0 +1,296 @@ +Configuration and command line parser +===================================== + +Libucw contains a parser for configuration files described in +<>. + +The principle is you specify the structure of the configuration file, +the section names, variable names and types and your C variables that +are assigned to them. Then you run the parser and it fills your +variables with the values from the configuration file. + +It is modular. It means you do not have to write all configuration at +the same place, you just declare the parts you need locally and do not +care about the other parts. + +The command line parser has the same interface as unix getopt_long(), +but handles setting of configuration files and configuration values +from command line. + +- <> + * <> + * <> +- <> + * <> + * <> + * <> + * <> +- <> + * <> + * <> + * <> + * <> + * <> + * <> +- <> + * <> + * <> + * <> + * <> + * <> + +[[example]] +Example +------- +If you want to just load simple configuration, this is the part you +want to read. This simple example should give you the overview. Look +into the <> section to see list of +supported data types, sections, etc. + +[[ex_cfile]] +Let's say you have configuration file with this content and want to +load it: + + HelloWorld { + Text "Hello planet" + Count 3 + } + +[[ex_structure]] +The structure +~~~~~~~~~~~~~ +First, you declare the structure and let the configuration parser know +it exists. + + #include + #include + + static char *hw_text = "Hello world"; + static int hw_count = 1; + static int hw_wait_answer = 0; + + static struct cf_section hw_config = { + CF_ITEMS { + CF_STRING("Text", &hw_text), + CF_INT("Count", &hw_count), + CF_INT("WaitAnswer", &hw_wait_answer), + CF_END + } + }; + + static void CONSTRUCTOR hw_init(void) { + cf_declare_section("HelloWorld", &hw_config, 0); + } + +The variables are used to store the loaded values. Their initial +values work as default, if nothing else is loaded. The hw_config() +structure assigns the variables to configuration names. The hw_init() +function (because of the `CONSTRUCTOR` macro) is run before main() +is called and it plugs in the whole section to the parser (alternatively, +you can call @cf_declare_section() at the start of your main()). + +You can plug in as many configuration sections as you like, from +various places across your code. + +[[ex_load]] +Loading of the values +~~~~~~~~~~~~~~~~~~~~~ +Suppose you need to parse the command line arguments and load the +configuration. Then @cf_getopt() is there for you: it works like +the the traditional @getopt() from the C library, but it also handles +configuration files. + + #include + #include + #include + + static char short_opts[] = CF_SHORT_OPTS "v"; + static struct option long_opts[] = { + CF_LONG_OPTS + { "verbose", 0, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + + static int verbose; + + int main(int argc, char *argv[]) { + cf_def_file = "default.cf"; + int opt; + while((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0) + switch(opt) { + case 'v': verbose = 1; break; + default: fprintf("Unknown option %c\n", opt); return 1; + } + } + +The `short_opts` and `long_opts` variables describe the command line +arguments. Notice the `CF_SHORT_OPTS` and `CF_LONG_OPTS` macros. They +add the `-S` and `-C` options for the configuration parser as described +in <>. These options are handled internally by @cf_getopt(). + +You can rely on the configuration files having been loaded before the +first of your program's options is parsed. + +[[deep]] +Getting deeper +-------------- + +Since the configuration system is somehow complicated, this part gives +you a little overview of what you can find and where. + +[[conf_multi]] +Arrays and lists +~~~~~~~~~~~~~~~~ + +It is sometime needed to have multiple items of the same type. There +are three ways to do that: + +*Static arrays*:: + An array with fixed maximum length. You provide + the length and already allocated array which is filled with items. + The configuration may contain less than the maximum length items. ++ +For example, you can have an static array of five unsigned integers: ++ + static uns array[] = { 1, 2, 3, 4, 5 }; ++ + static struct cf_section section = { + CF_ITEMS { + CF_UNS_ARY("array", array, 5), + CF_END + } + }; + +*Dynamic arrays*:: + Similar to static array, but you provide pointer + to pointer to the given item (eg. if you want dynamic array of + integers, you give `**int`). The parser allocates an array of needed + size. You can use the <> macro to find out + the number of elements actually loaded. ++ +If you want dynamic array of strings, you would use: ++ + static char *array[]; ++ + static struct cf_section section = { + CF_ITEMS { + CF_STRING_DYN("array", &array, CF_ANY_NUM), + CF_END + } + }; + +*Lists*:: + Linked lists based on <>. You provide description + of single node and pointer to the + <> variable. All the nodes will + be created dynamically and put there. ++ +First element of your structure must be <>. ++ +The first example is list of strings and uses <>: ++ + static struct clist list; ++ + static struct cf_section section = { + CF_ITEMS { + CF_LIST("list", &list, &cf_string_list_config), + CF_END + } + }; ++ +Another example, describing how to create more complicated list node +than just a string can be found at the <> macro. + +[[reload]] +Reloading configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +The configuration system allows you to reload configuration at +runtime. The new config changes the values against the default values. +It means, if the default value for variable `A` is `10`, the currently +loaded config sets it to `42` and the new config does not talk about +this variable, `A` will have a value of `10` after a successful load. + +Furthermore, if the loading of a new configuration fails, the current +configuration is preserved. + +All this is done with <>. The load of the +first config creates a journal entry. If you try to load some new +configuration, it is partially rolled back to defaults (the rollback +happens, but instead of removing the journal entry, another journal +entry is added for the rollback). If the loading succeeds, the two +journal entries are removed and a new one, for the new configuration, +is added. If it fails, the first one is replayed and the rollback +entry is removed. + +See <>. + +[[custom_parser]] +Creating custom parsers +~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to parse some data type the configuration system can't +handle, you can write your own parser. But before you start, you +should know a few things. + +The parser needs to support <>. To accomplish that, +you have to use the <> for memory allocation. + +Now, you need a function with the same signature as +<>. Parse the first parameter (the +string) and store the data in the second parameter. You may want to +write a dumper function, with signature of +<> (needed for debug dumps). + +Fill in a structure <> and use the +new data type in your configuration description with +<> macro as its @t parameter. + +You do not need to call @cf_journal_block() on the variable you store +the result. It is true you change it, but it was stored to journal +before your parser function was called. + +[[hooks]] +Hooks +~~~~~ + +The configuration system supports hooks. They are used to initialize the +configuration (if simple default value of variable is not enough) and +to check the sanity of loaded data. + +Each hook is of type <> and you can include +them in configuration description using <> and +<> macros. + +The hooks should follow similar guidelines as custom parsers (well, +init hooks do not need to call @cf_journal_block()) to support +journaling. If you change nothing in the commit hook, you do not need +to care about the journaling either. + +You may use the return value to inform about errors. Just return the +error message, or NULL if everything went well. + +Another similar function is a copy function. It is very similar to a +hook and is used when the item is copied and is too complicated to use +simple memcpy(). Its type is <> and is +specified by the <> macro. It's return value is +the same as the one of a hook. + +[[conf_h]] +ucw/conf.h +---------- + +Use this file if you want define a configuration section, request +loading of some variables or create new item type. + +!!ucw/conf.h + +[[getopt_h]] +ucw/getopt.h +------------ + +This header contains routines for parsing command line arguments and +loading the configuration. + +!!ucw/getopt.h diff --git a/ucw/doc/config.txt b/ucw/doc/config.txt new file mode 100644 index 0000000..7bf6369 --- /dev/null +++ b/ucw/doc/config.txt @@ -0,0 +1,191 @@ +Configuration files +=================== + +This document describes run-time configuration of libucw-based +programs using config files. For compile-time configuration, +see <>. + +[[terminology]] +Terminology +----------- + +Configuration items of all modules are organized into sections. +The sections form a tree structure with top-level sections corresponding +to program modules. + +Each configuration item belongs to one of the following classes: + + 1. single value or a fixed-length array of values + 2. variable-length array of values + 3. subsection with several nested attributes + 4. list of nodes, each being an instance of a subsection + 5. bitmap of small integers (0..31) or fixed list of strings + 6. exceptions (items with irregular syntax; however, they always + appear as a sequence of strings, only the semantics differ) + +Both fixed- and variable-length arrays consist of items of the same +type. The basic types supported by the configuration mechanism are: + + 1. 32-bit integer + 2. 64-bit integer + 3. floating point number + 4. IP address + 5. string + 6. choice (one of a fixed list of strings) + +Program modules can define their own special types (such as network +masks or attribute names) and decide how are they parsed. + +[[format]] +Format of configuration files +----------------------------- + +Configuration files are text files that usually set one attribute per +line, though it is possible to split one assignment into multiple lines +and/or assign several attributes in one line. The basic format of an +assignment command is + + name value1 value2 ... valueN + +or + + name=value1 value2 ... valueN + +The end of line means also end of a command unless it is preceded by a +backslash. On the other hand, a semicolon terminates the command and +another command can start after the semicolon. A hash starts a comment +that lasts until the end of the line. A value can be enclosed in +apostrophes or quotation marks and then it can contain spaces and/or +control characters, otherwise the first space or control character +denotes the end of the value. Values enclosed in quotation marks are +interpreted as C-strings. For example, the following are valid +assignment commands: + + Database "main db\x2b"; Directory='index/'; Weights 100 20 30 \ + 40 50 80 # a comment that is ignored + +Numerical values can be succeeded by a unit. The following units are +supported: + +[[units]] + + d=86400 k=1000 K=1024 + h=3600 m=1000000 M=1048576 + %=0.01 g=1000000000 G=1073741824 + +Attributes of a section or a list node can be set in two ways. First, +you can write the name of the section or list, open a bracket, and then +set the attributes inside the section. For example, + + Section1 { + Attr1 value1 + Attr2 value2 + ListNode { #creates a list and adds its first node + Attr3 value3 + Attr4 value4 + } + ListNode { Attr3=value5; Attr4=value6 } + #appends a new node; this is still the same syntax + } + +The second possibility is using a shorter syntax when all attributes of a +section are set on one line in a fixed order. The above example could +be as well written as + + Section1 { + Attr1 value1 + Attr2 value2 + ListNode value3 value4 + ListNode value5 value6 + } + +Of course, you cannot use the latter syntax when the attributes allow +variable numbers of parameters. The parser of the configuration files +checks this possibility. + +If you want to set a single attribute in some section, you can also +refer to the attribute as Section.Attribute. + +Lists support several operations besides adding a new node. You just +have to write a colon immediately after the attribute name, followed by +the name of the operation. The following operations are supported: + +[[operations]] + + List:clear # removes all nodes + List:append { attr1=value1; ... } # adds a new node at the end + List:prepend { attr1=value1; ... } # adds a new node at the beginning + List:remove { attr1=search1 } # find a node and delete it + List:edit { attr1=search1 } { attr1=value1; ... } + # find a node and edit it + List:after { attr1=search1 } { ... } # insert a node after a found node + List:before { attr1=search1 } { ... } # insert a node before a found node + List:copy { attr1=search1 } { ... } # duplicate a node and edit the copy + List:reset { attr=value1; ... } # equivalent to :clear and :append + +You can specify several attributes in the search condition and the nodes +are tested for equality in all these attributes. In the editing +commands, you can either open a second block with overridden attributes, +or specify the new values using the shorter one-line syntax. + +The commands :clear, :append, and :prepend are also supported by var-length +arrays. The command :clear can also be used on string values. The following +operations can be used on bitmaps: :set (which is equal to :append and :prepend), +:remove, :clear, and :all (set all bits). + +[[include]] +Including other files +--------------------- + +To include another file, use the command + + include another/file + +(Beware that this command has to be the last one on the line.) + +[[command_line]] +Command-line parameters +----------------------- + +The default configuration file (cf_def_file possibly overriden +by environment variable cf_env_file) is read before the program is started. +You can use a -C option to override the name of the configuration file. +If you use this parameter several times, then all those files are loaded +consecutively. A parameter -S can be used to execute a configuration +command directly (after loading the default or specified configuration +file). Example: + + bin/program -Ccf/my-config -S'module.trace=2;module.logfile:clear' ... + +If the program is compiled with debugging information, then one more +parameter `--dumpconfig` is supported. It prints all parsed configuration +items and exits. + +All these switches must be used before any other parameters of the +program. + +[[preprocess]] +Preprocessing +------------- + +During compilation, all configuration files are pre-processed by a simple +C-like preprocessor, which supports `#ifdef`, `#ifndef`, `#if`, +`#elsif`, `#else` and `#endif` directives referring to compile-time +configuration variables (the ones detected by `configure` script, you +can see list of them in `obj/autoconf.h`). `#if` and `#elsif` can contain +any Perl expression where each `CONFIG_xyz` configuration variable is +substituted to 0 or 1 depending on its value. + +The preprocessor also substitutes `@VARIABLE@` by the value of the variable, +which must be defined. + +[[caveats]] +Caveats +------- + +Trying to access an unknown attribute causes an error, but unrecognized +top-level sections are ignored. The reason is that a common config file +is used for a lot of programs which recognize only their own sections. + +Names of sections, attributes and choices are case-insensitive. Units are +case-sensitive. diff --git a/ucw/doc/configure.txt b/ucw/doc/configure.txt new file mode 100644 index 0000000..35784a2 --- /dev/null +++ b/ucw/doc/configure.txt @@ -0,0 +1,74 @@ +How to Configure Sherlock libraries +=================================== + +What can be configured +---------------------- +There are two different levels of configuring/customizing program +based on sherlock libraries: + + - runtime configuration in configuration files (see <>) + + - compile-time configuration of the libraries: config switches set + before compiling, selecting optional features. + +Where to build +-------------- +If you run configure in the source directory, it prepares for compilation inside +the source tree. In this case, an `obj` subdirectory is created to hold all generated +files (object files, binaries, generated source files etc.) and all final files +are linked to the `run` subdirectory. No other parts of the source tree are written into. + +Alternatively, you can compile in a separate object tree (which is useful when you +want to build several different configurations from a single source tree). In order +to do that, switch to the destination directory and issue `/configure ...`. +This way, configure will create the `obj` and `run` directories locally and set up +a Makefile which refers to the original source tree. + +How to configure +---------------- +To set up compilation, possibly overriding default compile-time +options, just run: + + ./configure [