# Makefile for MO-Eval
-# (c) 2008 Martin Mares <mj@ucw.cz>
+# (c) 2008--2009 Martin Mares <mj@ucw.cz>
VERSION=1.0.99-20080220
@echo "You need to run configure first." && false
# We will use the libucw build system
-include $(s)/build/Maketop
+BUILDSYS=$(s)/build
+include $(BUILDSYS)/Maketop
# Include makefiles of libraries we wish to use
ifdef CONFIG_UCW_LIBS
-include $(s)/lib/Makefile
+include $(s)/ucw/Makefile
include $(s)/sherlock/Makefile
# Disable built-in tests of these libraries
TESTS=
endif
# And finally the default rules of the build system
-include $(s)/build/Makebottom
+include $(BUILDSYS)/Makebottom
# Bottom part of Makefile for the UCW Libraries
-# (c) 1997--2007 Martin Mares <mj@ucw.cz>
+# (c) 1997--2008 Martin Mares <mj@ucw.cz>
# The run tree
-runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS)))
+DOCDIR=doc
+
+runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS)) $(addprefix run/$(DOCDIR)/,$(DOC_MODULES)))
run/.tree-stamp: $(o)/config.mk
$(M)Creating runtree
- $(Q)mkdir -p run $(addprefix run/, cf $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS))
+ $(Q)mkdir -p run $(addprefix run/, $(CONFIG_DIR) $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS))
$(Q)touch run/.tree-stamp
# Miscellaneous targets
programs: $(PROGS)
datafiles: $(DATAFILES)
tests: $(TESTS)
-configs: $(addprefix run/cf/,$(CONFIGS))
+configs: $(addprefix run/$(CONFIG_DIR)/,$(CONFIGS))
+docs: runtree $(DOCS) $(DOC_INDICES)
tags:
etags `find . -name "*.[ch]"`
-include $(o)/depend
$(o)/depend: force
- $(Q)if [ -s $(o)/depend.new ] ; then $(s)/build/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi
+ $(Q)if [ -s $(o)/depend.new ] ; then $(BUILDSYS)/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi
force:
# Rules for configuration files
-run/cf/%: $(s)/cf/% $(o)/config.mk $(s)/build/genconf
+run/$(CONFIG_DIR)/%: $(s)/$(CONFIG_SRC_DIR)/% $(o)/config.mk $(BUILDSYS)/genconf
$(M)CF $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
-$(o)/%.cf: $(s)/%.cf $(o)/config.mk $(s)/build/genconf
+$(o)/%.cf: $(s)/%.cf $(o)/config.mk $(BUILDSYS)/genconf
$(M)CF $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
- $(Q)cp $@ run/cf/$(basename $(@F))
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
+ $(Q)cp $@ run/$(CONFIG_DIR)/$(basename $(@F))
# Rules for libraries
$(Q)rm -f $@
$(Q)ar rcs $@ $^
ifdef CONFIG_INSTALL_API
- $(Q)$(call symlink,$@,run/lib)
+ $(Q)$(call symlink-alias,$@,run/lib,$(*F)$(LIBNAME_INFIX).a)
endif
%.so:
$(M)LD $@
- $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $^
- $(Q)$(call symlink,$@,run/lib)
+ $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS)
+ $(Q)$(call symlink-alias,$@,run/$(SO_RUNDIR),$(*F)$(SONAME_INFIX).so$(SONAME_SUFFIX))
+
+# On Darwin, gcc expects shared libraries in *.dylib instead of *.so.
+# Surprisingly, when a program is run, it suffices to have *.so files.
+# We don't want to mess up the whole build system with configurable
+# suffices and we also don't want to incur an overhead on Linux, so we
+# just create symbolic links on Darwin, if requested.
+%.dylib: %.so
+ cd $(dir $<) && ln -fs $(notdir $<) $(notdir $@)
$(o)/%.pc: $(s)/%.pc $(o)/%.$(LS)
$(M)PC $<
- $(Q)DEPS="$(shell $(s)/build/lib-deps $^)" LIBDIR=$(@D) $(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)DEPS="$(shell $(BUILDSYS)/lib-deps $^)" LIBDIR=$(@D) $(BUILDSYS)/genconf $< $@ $(o)/config.mk
$(Q)mkdir -p $(o)/pkgconfig
$(Q)$(call symlink,$@,$(o)/pkgconfig)
ifdef CONFIG_INSTALL_API
+ifdef CONFIG_LOCAL
+# Need an absolute path
API_ROOT:=$(shell pwd)/run
+API_LIBDIR=$(API_ROOT)/lib
+API_INCDIR=$(API_ROOT)/include
+else
+API_LIBDIR=$(INSTALL_LIB_DIR)
+API_INCDIR=$(INSTALL_INCLUDE_DIR)
+endif
INSTALL_RUNDIRS+=include lib/pkgconfig
api: $(API_INCLUDES) $(addprefix run/lib/pkgconfig/,$(addsuffix .pc,$(API_LIBS)))
$(o)/%/.include-stamp:
- $(Q)$(s)/build/install-includes $(<D) run/include/$(IDST) $(?F)
+ $(Q)$(BUILDSYS)/install-includes $(<D) run/include/$(IDST) $(?F)
$(Q)touch $@
run/lib/pkgconfig/%.pc: # RHS supplied in the sub-makefile
$(M)PC-API $@
- $(Q)sed <$< >$@ "s@^libdir=.*@libdir=$(API_ROOT)/lib@;s@^incdir=.*@incdir=$(API_ROOT)/include@"
+ $(Q)sed <$< >$@ "s@^libdir=.*@libdir=$(API_LIBDIR)@;s@^incdir=.*@incdir=$(API_INCDIR)@"
else
api:
$(o)/%-t: $(o)/%-tt.o $(TESTING_DEPS)
$(M)LD-TEST $@
- $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS)
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS)
-$(o)/%.test: $(s)/%.t $(s)/build/tester
+$(o)/%.test: $(s)/%.t $(BUILDSYS)/tester
$(M)TEST $@
- $(Q)$(s)/build/tester --rundir=run $(TESTERFLAGS) $< && touch $@
+ $(Q)$(BUILDSYS)/tester --rundir=run $(TESTERFLAGS) $< && touch $@
# Rules for binaries
$(o)/%: $(o)/%.o
$(M)LD $@
- $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS)
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(BUILDSYS)/lib-flags $^) $(LIBS)
$(Q)$(call symlink,$@,run/$(BINDIR))
-$(o)/%: $(s)/%.sh $(o)/config.mk $(s)/build/genconf
+$(o)/%: $(s)/%.sh $(o)/config.mk $(BUILDSYS)/genconf
$(M)PP $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
$(Q)chmod +x $@
$(Q)$(call symlink,$@,run/$(BINDIR))
-$(o)/%: %.sh $(o)/config.mk $(s)/build/genconf
+$(o)/%: %.sh $(o)/config.mk $(BUILDSYS)/genconf
$(M)PP $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
$(Q)chmod +x $@
$(Q)$(call symlink,$@,run/$(BINDIR))
-$(o)/%: $(s)/%.pl $(o)/config.mk $(s)/build/genconf
+$(o)/%: $(s)/%.pl $(o)/config.mk $(BUILDSYS)/genconf
$(M)PP $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
$(Q)chmod +x $@
$(Q)$(call symlink,$@,run/$(BINDIR))
-$(o)/%: %.pl $(o)/config.mk $(s)/build/genconf
+$(o)/%: %.pl $(o)/config.mk $(BUILDSYS)/genconf
$(M)PP $<
- $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk
$(Q)chmod +x $@
$(Q)$(call symlink,$@,run/$(BINDIR))
$(Q)cp $^ $@
$(Q)$(call symlink,$@,run/$(DATADIR))
-# Default installation target
+# Rules for documentation
+
+$(o)/%.html: $(o)/%.txt $(BUILDSYS)/asciidoc.conf $(BUILDSYS)/asciidoc-xhtml.conf run/$(DOCDIR)/$(DOC_MODULE)/.dir-stamp
+ $(M)"DOC-HTML $<"
+ $(Q)asciidoc -e -f $(BUILDSYS)/asciidoc.conf -f $(BUILDSYS)/asciidoc-xhtml.conf -f $(HOST_PREFIX)/etc/asciidoc/asciidoc.conf -f $(HOST_PREFIX)/etc/asciidoc/xhtml11.conf $<
+ $(Q)$(call symlink,$@,run/$(DOCDIR)/$(DOC_MODULE))
+
+# In reality, we do not depend on the .txt files, but on the corresponding .deflist's.
+# However, the Makefile language cannot express that doc-extract generates both .txt
+# and .deflist, so we always use the .txt's in dependencies.
+$(patsubst %.html,%.txt,$(DOC_INDICES)): $(o)/%.txt: $(patsubst %.html,%.txt,$(DOCS)) $(BUILDSYS)/doc-defs
+ $(M)"DOC-DEFS $@"
+ $(Q)echo $@: $(DOC_HEAD) $(DOC_LIST) >> $(o)/depend.new
+ $(Q)$(BUILDSYS)/doc-defs $(DOC_HEAD) $@ $(DOC_LIST)
-default-install:
- SH_EXTRA_RUNDIRS="$(sort $(EXTRA_RUNDIRS))" SH_INSTALL_RUNDIRS="$(sort $(INSTALL_RUNDIRS))" SH_CONFIGS="$(sort $(CONFIGS))" SH_AUTO_CONFIRM="$(CONFIRM)" $(s)/build/installer $(INSTALL_DIR)
+$(patsubst %.html,%.txt,$(DOCS)): $(o)/%.txt: $(s)/%.txt $(BUILDSYS)/doc-extract
+ $(M)"DOC-EXT $<"
+ $(Q)$(BUILDSYS)/doc-extract $< $@ $(o)/depend.new $(s) $(patsubst %.txt,%.deflist,$@)
# Don't delete intermediate targets. There shouldn't be any, but due to bugs
# in GNU Make rules with targets in not-yet-existing directories are ignored
# when searching for implicit rules and thence targets considered intermediate.
.SECONDARY:
-.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install default-install
+.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install docs tests
--- /dev/null
+# Makefile for Sherlock Build Tools
+
+DIRS+=build
+
+$(o)/build/genhash: $(o)/build/genhash.o
+
+# This is a hack which compensates make's desires for propagating per-rule settings
+# of variables: if some module specifies its own LIBS and it depends on genhash,
+# genhash is sometimes built with the module's LIBS (if it isn't already built).
+# A proper solution would be using a different rule for linking build/*, but
+# as it currently concerns only genhash, it's easier to battle this way.
+$(o)/build/genhash: LIBS=
+
+INSTALL_TARGETS+=install-build
+install-build:
+ install -d -m 755 $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build
+ install -m 755 $(addprefix $(BUILDSYS)/,install-includes doc-defs doc-extract genconf mergedeps tester lib-deps lib-flags) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build
+ install -m 644 $(addprefix $(BUILDSYS)/,asciidoc.conf asciidoc-xhtml.conf Makebottom Maketop) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build
+.PHONY: install-build
# Top part of Makefile for the UCW Libraries
-# (c) 1997--2007 Martin Mares <mj@ucw.cz>
+# (c) 1997--2008 Martin Mares <mj@ucw.cz>
# Set to 1 if you want verbose output
V=0
DIRS=
PROGS=
CONFIGS=
+CONFIG_SRC_DIR=$(CONFIG_DIR)
TESTS=
EXTRA_RUNDIRS=tmp log
INSTALL_RUNDIRS=bin lib
# Various files whose type does not fit into PROGS
DATAFILES=
+ifdef CONFIG_DARWIN
+DYNAMIC_LIBRARIES=dylib
+SOEXT=bundle
+HOST_PREFIX=/sw
+else
+DYNAMIC_LIBRARIES=so
+SOEXT=so
+HOST_PREFIX=
+endif
+
ifdef CONFIG_SHARED
-LS=so
+LS=$(DYNAMIC_LIBRARIES)
OS=oo
else
LS=a
OS=o
endif
-ifdef CONFIG_DARWIN
-SOEXT=bundle
-else
-SOEXT=so
-endif
+SO_RUNDIR=lib
# Whenever "make -s" (silent) is run, turn on verbose mode (paradoxical, but gives the right result)
ifneq ($(findstring s,$(MAKEFLAGS)),)
# Clean needs to be a double-colon rule since we want sub-makefiles to be able
# to define their own cleanup actions.
dust::
- rm -f `find . -path "*~" -or -name "\#*\#" -or -name core`
+ rm -f `find . -path "*~" -or -name "\#*\#"`
rm -f allocs.tmp cscope.out TAGS
clean:: dust
- rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h \)`
- rm -rf tests run/{bin,lib,include,.tree-stamp}
+ rm -rf `find obj/ucw -mindepth 1 -maxdepth 1 -not -name autoconf.h`
+ rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h -o -name ucw \)`
+ rm -rf tests run/{bin,lib,include,.tree-stamp,doc}
distclean:: clean
rm -rf obj run
testclean::
rm -f `find obj -name "*.test"`
+docclean::
+ rm -f $(DOCS) $(patsubst %.html,%.txt,$(DOCS))
+
# Extra default rules (appended to by submakefiles)
extras::
backref=$(subst $(space),/,$(patsubst %,..,$(subst /,$(space),$(1))))
tack-on=$(if $(patsubst /%,,$(2)),$(1)/$(2),$(2))
symlink=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/
+symlink-alias=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/$(3)
die "Piped command '$cmd' failed" if $?;
print OUT `$1`;
} else {
- sub repl ($) {
+ sub repl($);
+ sub repl($) {
my $v = shift @_;
exists $vars{$v} or die "Cannot substitute $v: variable not set";
- return $vars{$v};
+ my $x = $vars{$v};
+ while ($x =~ s/\$\((\w+)\)/repl($1)/ge) { }
+ return $x;
}
s/@(\w+)@/repl($1)/ge;
print OUT;
+++ /dev/null
-# Configuration of libucw modules
-
-######## Memory Mapped Access to Files ##########################################
-
-# Whenever you specify 0 for I/O buffer size, memory mapping is used instead.
-FBMMap {
-
-# Map this many bytes at once (needs to be a multiple of CPU page size)
-WindowSize 1M
-
-# When in need to extend a file, grow it by so many bytes (>= page size)
-ExtendSize 1M
-
-}
-
-######## Direct Streamed I/O on Files ###########################################
-
-FBDirect {
-
-# Debug: Cheat by turning off O_DIRECT
-#Cheat 1
-
-}
-
-######## Parametrized I/O on Files ##############################################
-
-FBParam {
-
-Defaults {
-
-# Access type (std|direct|mmap).
-Type std
-
-# Size of I/O buffer. Something of the order of megabytes for fast disks is recommended for direct I/O.
-BufSize 64K
-
-# Optimize for mixed forward/backward reading (standard I/O only)
-KeepBackBuf 0
-
-# Perform read-ahead (direct I/O only)
-ReadAhead 1
-
-# Maximum number of write-back requests queued (direct I/O only)
-WriteBack 1
-
-}
-
-}
-
-######## Temporary files ########################################################
-
-Tempfiles {
-
-# Filename prefix for temporary files ("pid(-tid)-counter" is appended)
-# The directory should not be writeable by malicious users.
-Prefix tmp/temp
-
-}
-
-######## Threads ################################################################
-
-Threads {
-
-# Default thread stack size
-DefaultStackSize 64K
-
-}
-
-######## Sorter #################################################################
-
-Sorter {
-
-# Trace sorting (1=basic statistics, 2=more stats, 3 and more for debugging)
-Trace 2
-
-# Trace array sorting (internal sorters)
-TraceArray 0
-
-# How much memory is the sorter allowed to use
-SortBuffer 4M
-
-# File access used by the sorter (see FBParam section for details)
-FileAccess std 256K
-
-# Use a different file access method for small inputs (less than the specified size)
-SmallFileAccess std 64K
-SmallInput 64M
-
-# Min-/Maximum number of bits to use in the external radix-sort (beware, we will open
-# 1+2^this files and require a stream buffer for each of them; however, while we are
-# doing that, the sort buffer is not allocated). Set both to zero to disable radix-sorting.
-MinRadixBits 2
-MaxRadixBits 4
-
-# The same for multi-way merging. The memory requirements are also the same,
-# but please keep in mind that this can create lots of SortBuffer-sized files,
-# so it is probably better to keep it disabled if you have a small SortBuffer.
-MinMultiwayBits 2
-MaxMultiwayBits 4
-
-# If we did not use radix-sorter to the full width, we still might add some more
-# bits to the width to get chunks which are even smaller than SortBuffer, because
-# it can speed up internal sorting later. However, we also want to avoid small
-# files, so we add only a little.
-AddRadixBits 2
-
-# Number of threads used for sorting (0=disable threading)
-Threads 0
-
-# Minimum size of input (in bytes) to consider multi-threaded internal sorting
-ThreadThreshold 1M
-
-# Chunks smaller than ThreadThreshold are sorted by a sequential algorithm, but
-# if they are at least of the following size, different chunks are sorted in
-# parallel. There is a slight space penalty for setting up the parallel process,
-# so better avoid setting this number too small.
-ThreadChunk 256
-
-# Internal radix-sort stops at this size and switches to QuickSort (must be >0)
-RadixThreshold 4K
-
-# Debugging switches (see the source)
-Debug 0
-
-}
-
-######## URL processing #########################################################
-
-URL {
-
-# Ignore spaces at the start/end of a URL
-IgnoreSpaces 1
-
-# Ignore underflows in relative paths (/../ from root)
-IgnoreUnderflow 1
-
-# Some URL's with many repeated components are filtered out to avoid infinite
-# URL's (e.g. http://czech.recoder.cz/win/iso/win/iso/file.html, or
-# http://a.com/?a=b&a=b&a=b, ...).
-# The URL is split to components divided by any of the specified separators.
-# Then the separators are forgotten and the components between them are
-# examined.
-ComponentSeparators /&?
-
-# URL is filtered out if there's a sequence of components in a row with at most
-# MaxRepeatLength components and the sequence is repeated more than MinRepeatCount
-# times. Default values are high MinRepeatCount and low MaxRepeatLength, so the
-# mechanism is disabled.
-MinRepeatCount 4
-MaxRepeatLength 4
-
-# Maximum number of occurences of a single component in the entire URL (possibly interleaved
-# by different components). The detector is disabled by default.
-MaxOccurences 4
-
-}
--- /dev/null
+# Configuration of libucw modules
+
+######## Memory Mapped Access to Files ##########################################
+
+# Whenever you specify 0 for I/O buffer size, memory mapping is used instead.
+FBMMap {
+
+# Map this many bytes at once (needs to be a multiple of CPU page size)
+WindowSize 1M
+
+# When in need to extend a file, grow it by so many bytes (>= page size)
+ExtendSize 1M
+
+}
+
+######## Direct Streamed I/O on Files ###########################################
+
+FBDirect {
+
+# Debug: Cheat by turning off O_DIRECT
+#Cheat 1
+
+}
+
+######## Parametrized I/O on Files ##############################################
+
+FBParam {
+
+Defaults {
+
+# Access type (std|direct|mmap).
+Type std
+
+# Size of I/O buffer. Something of the order of megabytes for fast disks is recommended for direct I/O.
+BufSize 64K
+
+# Optimize for mixed forward/backward reading (standard I/O only)
+KeepBackBuf 0
+
+# Perform read-ahead (direct I/O only)
+ReadAhead 1
+
+# Maximum number of write-back requests queued (direct I/O only)
+WriteBack 1
+
+}
+
+}
+
+######## Temporary files ########################################################
+
+Tempfiles {
+
+# Filename prefix for temporary files ("pid(-tid)-counter" is appended)
+# The directory should not be writeable by malicious users.
+Prefix tmp/temp
+
+}
+
+######## Threads ################################################################
+
+Threads {
+
+# Default thread stack size
+DefaultStackSize 64K
+
+}
+
+######## Sorter #################################################################
+
+Sorter {
+
+# Trace sorting (1=basic statistics, 2=more stats, 3 and more for debugging)
+Trace 2
+
+# Trace array sorting (internal sorters)
+TraceArray 0
+
+# How much memory is the sorter allowed to use
+SortBuffer 4M
+
+# File access used by the sorter (see FBParam section for details)
+FileAccess std 256K
+
+# Use a different file access method for small inputs (less than the specified size)
+SmallFileAccess std 64K
+SmallInput 64M
+
+# Min-/Maximum number of bits to use in the external radix-sort (beware, we will open
+# 1+2^this files and require a stream buffer for each of them; however, while we are
+# doing that, the sort buffer is not allocated). Set both to zero to disable radix-sorting.
+MinRadixBits 2
+MaxRadixBits 4
+
+# The same for multi-way merging. The memory requirements are also the same,
+# but please keep in mind that this can create lots of SortBuffer-sized files,
+# so it is probably better to keep it disabled if you have a small SortBuffer.
+MinMultiwayBits 2
+MaxMultiwayBits 4
+
+# If we did not use radix-sorter to the full width, we still might add some more
+# bits to the width to get chunks which are even smaller than SortBuffer, because
+# it can speed up internal sorting later. However, we also want to avoid small
+# files, so we add only a little.
+AddRadixBits 2
+
+# Number of threads used for sorting (0=disable threading)
+Threads 0
+
+# Minimum size of input (in bytes) to consider multi-threaded internal sorting
+ThreadThreshold 1M
+
+# Chunks smaller than ThreadThreshold are sorted by a sequential algorithm, but
+# if they are at least of the following size, different chunks are sorted in
+# parallel. There is a slight space penalty for setting up the parallel process,
+# so better avoid setting this number too small.
+ThreadChunk 256
+
+# Internal radix-sort stops at this size and switches to QuickSort (must be >0)
+RadixThreshold 4K
+
+# Debugging switches (see the source)
+Debug 0
+
+}
+
+######## URL processing #########################################################
+
+URL {
+
+# Ignore spaces at the start/end of a URL
+IgnoreSpaces 1
+
+# Ignore underflows in relative paths (/../ from root)
+IgnoreUnderflow 1
+
+# Some URL's with many repeated components are filtered out to avoid infinite
+# URL's (e.g. http://czech.recoder.cz/win/iso/win/iso/file.html, or
+# http://a.com/?a=b&a=b&a=b, ...).
+# The URL is split to components divided by any of the specified separators.
+# Then the separators are forgotten and the components between them are
+# examined.
+ComponentSeparators /&?
+
+# URL is filtered out if there's a sequence of components in a row with at most
+# MaxRepeatLength components and the sequence is repeated more than MinRepeatCount
+# times. Default values are high MinRepeatCount and low MaxRepeatLength, so the
+# mechanism is disabled.
+MinRepeatCount 4
+MaxRepeatLength 4
+
+# Maximum number of occurences of a single component in the entire URL (possibly interleaved
+# by different components). The detector is disabled by default.
+MaxOccurences 4
+
+}
#!/usr/bin/perl
# Configure script for MO-Eval
-# (c) 2008 Martin Mares <mj@ucw.cz>
+# (c) 2008--2009 Martin Mares <mj@ucw.cz>
use warnings;
use strict;
die "Don't know how to find myself. Please set SRCDIR manually.";
}
}
- require "$srcdir/lib/perl/Configure.pm";
- UCW::Configure::import UCW::Configure;
}
+use lib "$srcdir/ucw/perl";
+use UCW::Configure;
+use UCW::Configure::Pkg;
+
Init($srcdir, "default.cfg");
-Include "lib/default.cfg";
+Include "ucw/default.cfg";
Log "### Configuring MO-Eval ###\n\n";
Include Get("CONFIG");
-Include "lib/autoconf.cfg";
+require UCW::Configure::Paths;
+require UCW::Configure::C;
+require UCW::Configure::LibUCW;
if (Get("CONFIG_SUBMIT") || Get("CONFIG_MOP")) {
# Build libucw only if it is needed
# Settings of libucw
UnSet("CONFIG_SHARED");
UnSet("CONFIG_UCW_THREADS");
+Set("CONFIG_LOCAL");
+Set("CONFIG_DIR", "cf");
# Return success
1;
+++ /dev/null
-# Makefile for the UCW Library (c) 1997--2007 Martin Mares <mj@ucw.cz>
-
-DIRS+=lib
-CONFIGS+=library
-LIBUCW=$(o)/lib/libucw.pc
-
-ifdef CONFIG_UCW_DBTOOL
-PROGS+=$(o)/lib/db-tool
-endif
-
-LIBUCW_MODS= \
- threads \
- alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \
- mmap pagecache partmap hashfunc \
- lists slists simple-lists bitsig \
- log log-file proctitle \
- conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \
- ipaccess \
- profile \
- fastbuf ff-binary ff-string ff-printf ff-unicode \
- fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \
- str_ctype str_upper str_lower unicode stkstring \
- wildmatch wordsplit ctmatch patimatch patmatch regex \
- prime primetable random timer randomkey \
- bit-ffs bit-fls \
- db \
- url \
- mainloop exitstatus runcmd sighandler \
- lizard lizard-safe adler32 \
- md5 md5hex \
- base64 base224 \
- sync \
- qache \
- string \
- bbuf \
- getopt
-
-LIBUCW_INCLUDES= \
- lib.h config.h threads.h \
- mempool.h pagecache.h \
- arraysort.h \
- lists.h clists.h slists.h simple-lists.h \
- unaligned.h prefetch.h \
- bbuf.h gbuf.h bitarray.h bitsig.h \
- hashfunc.h hashtable.h \
- heap.h binheap.h binheap-node.h \
- redblack.h \
- binsearch.h \
- bitops.h \
- conf.h getopt.h ipaccess.h \
- profile.h \
- fastbuf.h lfs.h ff-unicode.h ff-utf8.h ff-binary.h \
- chartype.h unicode.h stkstring.h \
- wildmatch.h patmatch.h \
- db.h \
- url.h \
- mainloop.h \
- lizard.h \
- md5.h \
- base64.h base224.h \
- qache.h \
- kmp.h kmp-search.h binsearch.h \
- partmap.h
-
-ifdef CONFIG_UCW_THREADS
-# Some modules require threading
-LIBUCW_MODS+=threads-conf workqueue asio fb-direct
-LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h
-endif
-
-ifdef CONFIG_OWN_REGEX
-include $(s)/lib/regex/Makefile
-endif
-
-ifdef CONFIG_OWN_GETOPT
-include $(s)/lib/getopt/Makefile
-endif
-
-include $(s)/lib/sorter/Makefile
-
-LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS))
-
-$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
-$(o)/lib/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
-
-$(o)/lib/hashfunc.o $(o)/lib/hashfunc.oo: CFLAGS += -funroll-loops
-$(o)/lib/lizard.o: CFLAGS += $(COPT2) -funroll-loops
-
-$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW)
-$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW)
-$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW)
-$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW)
-$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW)
-$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW)
-$(o)/lib/asort-test: $(o)/lib/asort-test.o $(LIBUCW)
-$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW)
-$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW)
-$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW)
-$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET)
-$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW)
-
-TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \
- slists.test kmp-test.test bbuf.test getopt.test fastbuf.test ff-unicode.test eltpool.test)
-
-$(o)/lib/regex.test: $(o)/lib/regex-t
-$(o)/lib/unicode.test: $(o)/lib/unicode-t
-$(o)/lib/hash-test.test: $(o)/lib/hash-test
-$(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t
-$(o)/lib/stkstring.test: $(o)/lib/stkstring-t
-$(o)/lib/bitops.test: $(o)/lib/bit-ffs-t $(o)/lib/bit-fls-t
-$(o)/lib/slists.test: $(o)/lib/slists-t
-$(o)/lib/kmp-test.test: $(o)/lib/kmp-test
-$(o)/lib/bbuf.test: $(o)/lib/bbuf-t
-$(o)/lib/getopt.test: $(o)/lib/getopt-t
-$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t
-$(o)/lib/ff-unicode.test: $(o)/lib/ff-unicode-t
-$(o)/lib/eltpool.test: $(o)/lib/eltpool-t
-
-ifdef CONFIG_UCW_THREADS
-TESTS+=$(addprefix $(o)/lib/,asio.test)
-$(o)/lib/asio.test: $(o)/lib/asio-t
-endif
-
-API_LIBS+=libucw
-API_INCLUDES+=$(o)/lib/.include-stamp
-$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h
- $(Q)$(s)/build/install-includes $(<D) run/include/lib $(LIBUCW_INCLUDES)
- $(Q)$(s)/build/install-includes obj run/include/lib autoconf.h
- $(Q)touch $@
-run/lib/pkgconfig/libucw.pc: $(o)/lib/libucw.pc
-
-ifdef CONFIG_UCW_PERL
-include $(s)/lib/perl/Makefile
-endif
-
-ifdef CONFIG_UCW_SHELL_UTILS
-include $(s)/lib/shell/Makefile
-endif
+++ /dev/null
-Generally, functions in the UCW library are reentrant as long as you call them
-on different data. Calling on the same object is not, unless otherwise told,
-which also includes functions acting on any kind of global state.
-
-There are some exceptions:
-
-- setproctitle() is not safe, it modifies global state
+++ /dev/null
-/*
- * adler32.c -- compute the Adler-32 checksum of a data stream
- *
- * Copyright (C) 1995--2003 Mark Adler
- *
- * Taken from zlib-1.2.1 and adjusted by Robert Spalek. For conditions of
- * distribution and use, see copyright notice in zlib.h.
- */
-
-#include "lib/lib.h"
-#include "lib/lizard.h"
-
-#define BASE 65521UL /* largest prime smaller than 65536 */
-#define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-
-#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
-#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
-#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
-#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
-#define DO16(buf) DO8(buf,0); DO8(buf,8);
-#define MOD(a) a %= BASE
-
-uns
-update_adler32(uns adler, const byte *buf, uns len)
-{
- uns s1 = adler & 0xffff;
- uns s2 = (adler >> 16) & 0xffff;
- int k;
-
- if (!buf) return 1L;
-
- while (len > 0) {
- k = len < NMAX ? (int)len : NMAX;
- len -= k;
- while (k >= 16) {
- DO16(buf);
- buf += 16;
- k -= 16;
- }
- if (k != 0) do {
- s1 += *buf++;
- s2 += s1;
- } while (--k);
- MOD(s1);
- MOD(s2);
- }
- return (s2 << 16) | s1;
-}
+++ /dev/null
-/*
- * UCW Library -- Memory Allocation
- *
- * (c) 2000 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef DEBUG_DMALLOC
-
-void *
-xmalloc(uns size)
-{
- void *x = malloc(size);
- if (!x)
- die("Cannot allocate %d bytes of memory", size);
- return x;
-}
-
-#endif
-
-void *
-xmalloc_zero(uns size)
-{
- void *x = xmalloc(size);
- bzero(x, size);
- return x;
-}
-
-void
-xfree(void *ptr)
-{
- /*
- * Maybe it is a little waste of resources to make this a function instead
- * of a macro, but xmalloc() is not used for anything critical anyway,
- * so let's prefer simplicity.
- */
- free(ptr);
-}
+++ /dev/null
-/*
- * UCW Library -- String Allocation
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <string.h>
-
-char *
-xstrdup(const char *s)
-{
- uns l = strlen(s) + 1;
- return memcpy(xmalloc(l), s, l);
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Array Sorter
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is not a normal header file, it's a generator of sorting
- * routines. Each time you include it with parameters set in the
- * corresponding preprocessor macros, it generates an array sorter
- * with the parameters given.
- *
- * You might wonder why the heck do we implement our own array sorter
- * instead of using qsort(). The primary reason is that qsort handles
- * only continuous arrays, but we need to sort array-like data structures
- * where the only way to access elements is by using an indexing macro.
- * Besides that, we are more than 2 times faster.
- *
- * So much for advocacy, there are the parameters (those marked with [*]
- * are mandatory):
- *
- * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
- * defined by the sorter)
- * ASORT_KEY_TYPE [*] data type of a single array entry key
- * ASORT_ELT(i) [*] returns the key of i-th element
- * ASORT_LT(x,y) x < y for ASORT_TYPE (default: "x<y")
- * ASORT_SWAP(i,j) swap i-th and j-th element (default: assume _ELT
- * is an l-value and swap just the keys)
- * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
- * ASORT_EXTRA_ARGS extra arguments for the sort function (they are always
- * visible in all the macros supplied above), starts with comma
- *
- * After including this file, a function ASORT_PREFIX(sort)(uns array_size)
- * is declared and all parameter macros are automatically undef'd.
- */
-
-#ifndef ASORT_LT
-#define ASORT_LT(x,y) ((x) < (y))
-#endif
-
-#ifndef ASORT_SWAP
-#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0)
-#endif
-
-#ifndef ASORT_THRESHOLD
-#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
-#endif
-
-#ifndef ASORT_EXTRA_ARGS
-#define ASORT_EXTRA_ARGS
-#endif
-
-static void ASORT_PREFIX(sort)(uns array_size ASORT_EXTRA_ARGS)
-{
- struct stk { int l, r; } stack[8*sizeof(uns)];
- int l, r, left, right, m;
- uns sp = 0;
- ASORT_KEY_TYPE pivot;
-
- if (array_size <= 1)
- return;
-
- /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */
-
- left = 0;
- right = array_size - 1;
- for(;;)
- {
- l = left;
- r = right;
- m = (l+r)/2;
- if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
- ASORT_SWAP(l,m);
- if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m)))
- {
- ASORT_SWAP(m,r);
- if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
- ASORT_SWAP(l,m);
- }
- pivot = ASORT_ELT(m);
- do
- {
- while (ASORT_LT(ASORT_ELT(l), pivot))
- l++;
- while (ASORT_LT(pivot, ASORT_ELT(r)))
- r--;
- if (l < r)
- {
- ASORT_SWAP(l,r);
- l++;
- r--;
- }
- else if (l == r)
- {
- l++;
- r--;
- }
- }
- while (l <= r);
- if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
- {
- /* Both partitions ok => push the larger one */
- if ((r - left) > (right - l))
- {
- stack[sp].l = left;
- stack[sp].r = r;
- left = l;
- }
- else
- {
- stack[sp].l = l;
- stack[sp].r = right;
- right = r;
- }
- sp++;
- }
- else if ((r - left) >= ASORT_THRESHOLD)
- {
- /* Left partition OK, right undersize */
- right = r;
- }
- else if ((right - l) >= ASORT_THRESHOLD)
- {
- /* Right partition OK, left undersize */
- left = l;
- }
- else
- {
- /* Both partitions undersize => pop */
- if (!sp)
- break;
- sp--;
- left = stack[sp].l;
- right = stack[sp].r;
- }
- }
-
- /*
- * We have a partially sorted array, finish by insertsort. Inspired
- * by qsort() in GNU libc.
- */
-
- /* Find minimal element which will serve as a barrier */
- r = MIN(array_size, ASORT_THRESHOLD);
- m = 0;
- for (l=1; l<r; l++)
- if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m)))
- m = l;
- ASORT_SWAP(0,m);
-
- /* Insertion sort */
- for (m=1; m<(int)array_size; m++)
- {
- l=m;
- while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1)))
- l--;
- while (l < m)
- {
- ASORT_SWAP(l,m);
- l++;
- }
- }
-}
-
-#undef ASORT_PREFIX
-#undef ASORT_KEY_TYPE
-#undef ASORT_ELT
-#undef ASORT_LT
-#undef ASORT_SWAP
-#undef ASORT_THRESHOLD
-#undef ASORT_EXTRA_ARGS
+++ /dev/null
-/*
- * UCW Library -- Asynchronous I/O
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/asio.h"
-#include "lib/threads.h"
-
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-
-static uns asio_num_users;
-static struct worker_pool asio_wpool;
-
-static void
-asio_init_unlocked(void)
-{
- if (asio_num_users++)
- return;
-
- DBG("ASIO: INIT");
- asio_wpool.num_threads = 1;
- worker_pool_init(&asio_wpool);
-}
-
-static void
-asio_cleanup_unlocked(void)
-{
- if (--asio_num_users)
- return;
-
- DBG("ASIO: CLEANUP");
- worker_pool_cleanup(&asio_wpool);
-}
-
-void
-asio_init_queue(struct asio_queue *q)
-{
- ucwlib_lock();
- asio_init_unlocked();
- ucwlib_unlock();
-
- DBG("ASIO: New queue %p", q);
- ASSERT(q->buffer_size);
- q->allocated_requests = 0;
- q->running_requests = 0;
- q->running_writebacks = 0;
- q->use_count = 0;
- clist_init(&q->idle_list);
- clist_init(&q->done_list);
- work_queue_init(&asio_wpool, &q->queue);
-}
-
-void
-asio_cleanup_queue(struct asio_queue *q)
-{
- DBG("ASIO: Removing queue %p", q);
- ASSERT(!q->running_requests);
- ASSERT(!q->running_writebacks);
- ASSERT(!q->allocated_requests);
- ASSERT(clist_empty(&q->done_list));
-
- struct asio_request *r;
- while (r = clist_remove_head(&q->idle_list))
- {
- big_free(r->buffer, q->buffer_size);
- xfree(r);
- }
-
- work_queue_cleanup(&q->queue);
-
- ucwlib_lock();
- asio_cleanup_unlocked();
- ucwlib_unlock();
-}
-
-struct asio_request *
-asio_get(struct asio_queue *q)
-{
- q->allocated_requests++;
- struct asio_request *r = clist_head(&q->idle_list);
- if (!r)
- {
- r = xmalloc_zero(sizeof(*r));
- r->queue = q;
- r->buffer = big_alloc(q->buffer_size);
- DBG("ASIO: Got %p (new)", r);
- }
- else
- {
- clist_remove(&r->work.n);
- DBG("ASIO: Got %p", r);
- }
- r->op = ASIO_FREE;
- r->fd = -1;
- r->len = 0;
- r->status = -1;
- r->returned_errno = -1;
- r->submitted = 0;
- return r;
-}
-
-static int
-asio_raw_wait(struct asio_queue *q)
-{
- struct asio_request *r = (struct asio_request *) work_wait(&q->queue);
- if (!r)
- return 0;
- r->submitted = 0;
- q->running_requests--;
- if (r->op == ASIO_WRITE_BACK)
- {
- DBG("ASIO: Finished writeback %p", r);
- if (r->status < 0)
- die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno));
- if (r->status != (int)r->len)
- die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len);
- q->running_writebacks--;
- asio_put(r);
- }
- else
- clist_add_tail(&q->done_list, &r->work.n);
- return 1;
-}
-
-static void
-asio_handler(struct worker_thread *t UNUSED, struct work *w)
-{
- struct asio_request *r = (struct asio_request *) w;
-
- DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r,
- (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len);
- errno = 0;
- switch (r->op)
- {
- case ASIO_READ:
- r->status = read(r->fd, r->buffer, r->len);
- break;
- case ASIO_WRITE:
- case ASIO_WRITE_BACK:
- r->status = write(r->fd, r->buffer, r->len);
- break;
- default:
- die("ASIO: Got unknown request type %d", r->op);
- }
- r->returned_errno = errno;
- DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno);
-}
-
-void
-asio_submit(struct asio_request *r)
-{
- struct asio_queue *q = r->queue;
- DBG("ASIO: Submitting %p on queue %p", r, q);
- ASSERT(r->op != ASIO_FREE);
- ASSERT(!r->submitted);
- if (r->op == ASIO_WRITE_BACK)
- {
- while (q->running_writebacks >= q->max_writebacks)
- {
- DBG("ASIO: Waiting for free writebacks");
- if (!asio_raw_wait(q))
- ASSERT(0);
- }
- q->running_writebacks++;
- }
- q->running_requests++;
- r->submitted = 1;
- r->work.go = asio_handler;
- r->work.priority = 0;
- work_submit(&q->queue, &r->work);
-}
-
-struct asio_request *
-asio_wait(struct asio_queue *q)
-{
- struct asio_request *r;
- while (!(r = clist_head(&q->done_list)))
- {
- DBG("ASIO: Waiting on queue %p", q);
- if (!asio_raw_wait(q))
- return NULL;
- }
- clist_remove(&r->work.n);
- DBG("ASIO: Done %p", r);
- return r;
-}
-
-void
-asio_put(struct asio_request *r)
-{
- struct asio_queue *q = r->queue;
- DBG("ASIO: Put %p", r);
- ASSERT(!r->submitted);
- ASSERT(q->allocated_requests);
- clist_add_tail(&q->idle_list, &r->work.n);
- q->allocated_requests--;
-}
-
-void
-asio_sync(struct asio_queue *q)
-{
- DBG("ASIO: Syncing queue %p", q);
- while (q->running_requests)
- if (!asio_raw_wait(q))
- ASSERT(0);
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct asio_queue q;
- struct asio_request *r;
-
- q.buffer_size = 4096;
- q.max_writebacks = 2;
- asio_init_queue(&q);
-
-#if 0
-
- for (;;)
- {
- r = asio_get(&q);
- r->op = ASIO_READ;
- r->fd = 0;
- r->len = q.buffer_size;
- asio_submit(r);
- r = asio_wait(&q);
- ASSERT(r);
- if (r->status <= 0)
- {
- asio_put(r);
- break;
- }
- r->op = ASIO_WRITE_BACK;
- r->fd = 1;
- r->len = r->status;
- asio_submit(r);
- }
- asio_sync(&q);
-
-#else
-
- r = asio_get(&q);
- r->op = ASIO_READ;
- r->fd = 0;
- r->len = 1;
- asio_submit(r);
- r = asio_wait(&q);
- ASSERT(r);
- asio_put(r);
-
- for (uns i=0; i<10; i++)
- {
- r = asio_get(&q);
- r->op = ASIO_WRITE_BACK;
- r->fd = 1;
- r->len = 1;
- r->buffer[0] = 'A' + i;
- asio_submit(r);
- }
- asio_sync(&q);
-
- r = asio_get(&q);
- r->op = ASIO_WRITE;
- r->fd = 1;
- r->len = 1;
- r->buffer[0] = '\n';
- asio_submit(r);
- r = asio_wait(&q);
- ASSERT(r);
- asio_put(r);
-
-#endif
-
- asio_cleanup_queue(&q);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Asynchronous I/O
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_ASIO_H
-#define _UCW_ASIO_H
-
-#include "lib/workqueue.h"
-#include "lib/clists.h"
-
-/*
- * This module takes care of scheduling and executing asynchronous I/O requests
- * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf
- * back-end, but you can use it explicitly, too.
- *
- * You can define several I/O queues, each for use by a single thread. Requests
- * on a single queue are always processed in order of their submits, requests
- * from different queues may be interleaved (although the current implementation
- * does not do so). Normal read and write requests are returned to their queue
- * when they are completed. Write-back requests are automatically freed when
- * done, but the number of such requests in fly is limited in order to avoid
- * consuming all memory, so a submit of a write-back request can block.
- */
-
-struct asio_queue {
- uns buffer_size; // How large buffers do we use [user-settable]
- uns max_writebacks; // Maximum number of writeback requests active [user-settable]
- uns allocated_requests;
- uns running_requests; // Total number of running requests
- uns running_writebacks; // How many of them are writebacks
- clist idle_list; // Recycled requests waiting for get
- clist done_list; // Finished requests
- struct work_queue queue;
- uns use_count; // For use by the caller
-};
-
-enum asio_op {
- ASIO_FREE,
- ASIO_READ,
- ASIO_WRITE,
- ASIO_WRITE_BACK, // Background write with no success notification
-};
-
-struct asio_request {
- struct work work; // asio_requests are internally just work nodes
- struct asio_queue *queue;
- byte *buffer;
- int fd;
- enum asio_op op;
- uns len;
- int status;
- int returned_errno;
- int submitted;
- void *user_data; // For use by the caller
-};
-
-void asio_init_queue(struct asio_queue *q); // Initialize a new queue
-void asio_cleanup_queue(struct asio_queue *q);
-struct asio_request *asio_get(struct asio_queue *q); // Get an empty request
-void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks)
-struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more
-void asio_put(struct asio_request *r); // Return a finished request for recycling
-void asio_sync(struct asio_queue *q); // Wait until all requests are finished
-
-#endif /* !_UCW_ASIO_H */
+++ /dev/null
-# Tests for asynchronous I/O
-
-Run: echo y | ../obj/lib/asio-t
-Out: ABCDEFGHIJ
+++ /dev/null
-/*
- * UCW Library -- Universal Array Sorter Test and Benchmark
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#define N 4000037 /* a prime */
-
-struct elt {
- u32 key;
- u32 x, y;
-};
-
-static struct elt array[N];
-
-#define ASORT_KEY_TYPE u32
-#define ASORT_ELT(i) array[i].key
-#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
-
-static void generate(void)
-{
- uns i;
- for (i=0; i<N; i++)
-#if 0
- ASORT_ELT(i) = N-i-1;
-#elif 0
- ASORT_ELT(i) = i;
-#else
- ASORT_ELT(i) = (i ? ASORT_ELT(i-1)+1944833754 : 3141592) % N;
-#endif
-}
-
-static void check(void)
-{
- uns i;
- for (i=0; i<N; i++)
- if (ASORT_ELT(i) != i)
- printf("error at pos %d: %08x != %08x\n", i, ASORT_ELT(i), i);
-}
-
-static int qs_comp(const struct elt *X, const struct elt *Y)
-{
- if (X->key < Y->key)
- return -1;
- else if (X->key > Y->key)
- return 1;
- else
- return 0;
-}
-
-#define ASORT_PREFIX(x) as_##x
-#include "lib/arraysort.h"
-
-int main(void)
-{
- timestamp_t timer;
-
- generate();
- init_timer(&timer);
- qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp);
- printf("qsort: %d ms\n", get_timer(&timer));
- check();
- generate();
- init_timer(&timer);
- as_sort(N);
- printf("asort: %d ms\n", get_timer(&timer));
- check();
- return 0;
-}
+++ /dev/null
-# Automatic configuration of the UCW Library
-# (c) 2005--2007 Martin Mares <mj@ucw.cz>
-# (c) 2006 Robert Spalek <robert@ucw.cz>
-
-### OS ###
-
-Test("OS", "Checking on which OS we run", sub {
- my $os = `uname`;
- chomp $os;
- Fail "Unable to determine OS type" if $? || $os eq "";
- return $os;
-});
-
-if (Get("OS") eq "Linux") {
- Set("CONFIG_LINUX");
-} elsif (Get("OS") eq "Darwin") {
- Set("CONFIG_DARWIN");
-} else {
- Fail "Don't know how to run on this operating system.";
-}
-
-### Compiler ###
-
-# Default compiler
-Test("CC", "Checking for C compiler", sub { return "gcc"; });
-
-# GCC version
-Test("GCCVER", "Checking for GCC version", sub {
- my $gcc = Get("CC");
- my $ver = `$gcc --version | sed '2,\$d; s/^\\(.* \\)*\\([0-9]*\\.[0-9]*\\).*/\\2/'`;
- chomp $ver;
- Fail "Unable to determine GCC version" if $? || $ver eq "";
- return $ver;
-});
-my ($gccmaj, $gccmin) = split(/\./, Get("GCCVER"));
-my $gccver = 1000*$gccmaj + $gccmin;
-$gccver >= 3000 or Fail "GCC older than 3.0 doesn't support C99 well enough.";
-
-### CPU ###
-
-Test("ARCH", "Checking for machine architecture", sub {
- my $mach = `uname -m`;
- chomp $mach;
- Fail "Unable to determine machine type" if $? || $mach eq "";
- if ($mach =~ /^i[0-9]86$/) {
- return "i386";
- } elsif ($mach =~ /^(x86[_-]|amd)64$/) {
- return "amd64";
- } else {
- return "unknown";
- }
-});
-
-sub parse_cpuinfo_linux() {
- open X, "/proc/cpuinfo" || undef;
- my %pc = ();
- while (<X>) {
- chomp;
- /^$/ && last;
- /^([^\t]+)\t+:\s*(.*)$/ and $pc{$1}=$2;
- }
- close X;
- return ($pc{'vendor_id'},
- $pc{'cpu family'},
- $pc{'model'});
-}
-
-sub parse_cpuinfo_darwin() {
- @cpu = (`sysctl -n machdep.cpu.vendor`,
- `sysctl -n machdep.cpu.family`,
- `sysctl -n machdep.cpu.model`);
- chomp @cpu;
- return @cpu;
-}
-
-sub parse_cpuinfo() {
- my @cpu;
- if (IsSet("CONFIG_LINUX")) {
- @cpu = parse_cpuinfo_linux();
- } elsif (IsSet("CONFIG_DARWIN")) {
- @cpu = parse_cpuinfo_darwin();
- }
- $cpu[0] = "" if !defined $cpu[0];
- $cpu[1] = 0 if !defined $cpu[1];
- $cpu[2] = 0 if !defined $cpu[2];
- return @cpu;
-}
-
-Test("CPU_ARCH", "Checking for CPU architecture", sub {
- my $mach = Get("ARCH");
- my $arch = "";
- if ($mach eq "i386") {
- Set("CPU_I386");
- UnSet("CPU_64BIT_POINTERS");
- Set("CPU_LITTLE_ENDIAN");
- UnSet("CPU_BIG_ENDIAN");
- Set("CPU_ALLOW_UNALIGNED");
- Set("CPU_STRUCT_ALIGN" => 4);
- if (IsSet("CONFIG_EXACT_CPU")) {
- my ($vendor, $family, $model) = parse_cpuinfo();
- # Try to understand CPU vendor, family and model [inspired by MPlayer's configure script]
- if ($vendor eq "AuthenticAMD") {
- if ($family >= 6) {
- if ($model >= 31 && $gccver >= 3004) { $arch = "athlon64"; }
- elsif ($model >= 6 && $gccver >= 3003) { $arch = "athlon-xp"; }
- else { $arch = "athlon"; }
- }
- } elsif ($vendor eq "GenuineIntel") {
- if ($family >= 15 && $gccver >= 3003) {
- if ($model >= 4) { $arch = "nocona"; }
- elsif ($model >= 3) { $arch = "prescott"; }
- else { $arch = "pentium4"; }
- } elsif ($family == 6 && $gccver >= 3003) {
- if ($model == 15) { $arch = "prescott"; }
- elsif (($model == 9 || $model == 13) && $gccver >= 3004) { $arch = "pentium-m"; }
- elsif ($model >= 7) { $arch = "pentium3"; }
- elsif ($model >= 3) { $arch = "pentium2"; }
- }
- }
-
- # No match on vendor, try the family
- if ($arch eq "") {
- if ($family >= 6) {
- $arch = "i686";
- } elsif ($family >= 3) {
- $arch = "i${family}86";
- }
- }
- Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
- return $arch;
- } else {
- return "default";
- }
- } elsif ($mach eq "amd64") {
- Set("CPU_AMD64");
- Set("CPU_64BIT_POINTERS");
- Set("CPU_LITTLE_ENDIAN");
- UnSet("CPU_BIG_ENDIAN");
- Set("CPU_ALLOW_UNALIGNED");
- Set("CPU_STRUCT_ALIGN" => 8);
- if (IsSet("CONFIG_EXACT_CPU")) {
- # In x86-64 world, the detection is somewhat easier so far...
- my ($vendor, $family, $model) = parse_cpuinfo();
- if ($vendor eq "AuthenticAMD") {
- $arch = "athlon64";
- } elsif ($vendor eq "GenuineIntel") {
- $arch = "nocona";
- }
- Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
- return $arch;
- } else {
- return "default";
- }
- } else {
- return "unknown";
- }
-});
-
-if (Get("CPU_ARCH") eq "unknown") {
- Warn "CPU architecture not recognized, using defaults, keep fingers crossed.\n";
-}
-
-### Compiler and its Options ###
-
-# C flags: tell the compiler we're speaking C99, and disable common symbols
-Set("CLANG" => "-std=gnu99 -fno-common");
-
-# C optimizations
-Set("COPT" => '-O2');
-if (Get("CPU_ARCH") ne "unknown" && Get("CPU_ARCH") ne "default") {
- Append("COPT", '-march=$(CPU_ARCH)');
-}
-
-# C optimizations for highly exposed code
-Set("COPT2" => '-O3');
-
-# Warnings
-Set("CWARNS" => '-Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline');
-Set("CWARNS_OFF" => '');
-
-# Linker flags
-Set("LOPT" => "");
-
-# Extra libraries
-Set("LIBS" => "");
-
-# Extra flags for compiling and linking shared libraries
-Set("CSHARED" => '-fPIC');
-if (IsSet("CONFIG_DARWIN")) {
- Set("LSHARED" => '-dynamiclib -install_name lib/$(@F) -undefined dynamic_lookup');
-} else {
- Set("LSHARED" => '-shared -Wl,-soname,lib/$(@F)');
-}
-
-# Extra switches depending on GCC version:
-if ($gccver == 3000) {
- Append("COPT" => "-fstrict-aliasing");
-} elsif ($gccver == 3003) {
- Append("CWARNS" => "-Wundef -Wredundant-decls");
- Append("COPT" => "-finline-limit=20000 --param max-inline-insns-auto=1000");
-} elsif ($gccver == 3004) {
- Append("CWARNS" => "-Wundef -Wredundant-decls");
- Append("COPT" => "-finline-limit=2000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
-} elsif ($gccver == 4000 || $gccver == 4001) {
- Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
- Append("CWARNS_OFF" => "-Wno-pointer-sign");
- Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
-} elsif ($gccver == 4002) {
- Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
- Append("CWARNS_OFF" => "-Wno-pointer-sign");
- Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400 -fgnu89-inline");
-} else {
- Warn "Don't know anything about this GCC version, using default switches.\n";
-}
-
-if (IsSet("CONFIG_DEBUG")) {
- # If debugging:
- Set("DEBUG_ASSERTS");
- Set("DEBUG_DIE_BY_ABORT") if Get("CONFIG_DEBUG") > 1;
- Set("CDEBUG" => "-ggdb");
-} else {
- # If building a release version:
- Append("COPT" => "-fomit-frame-pointer");
- Append("LOPT" => "-s");
-}
-
-if (IsSet("CONFIG_DARWIN")) {
- # gcc-4.0 on Darwin doesn't set this in the gnu99 mode
- Append("CLANG" => "-fnested-functions");
- # Directory hierarchy of the fink project
- Append("LIBS" => "-L/sw/lib");
- Append("COPT" => "-I/sw/include");
- # Fill in some constants not found in the system header files
- Set("SOL_TCP" => 6); # missing in /usr/include/netinet/tcp.h
-}
-
-# Determine page size
-Test("CPU_PAGE_SIZE", "Determining page size", sub {
- my $p;
- if (IsSet("CONFIG_DARWIN")) {
- $p = `sysctl -n hw.pagesize`;
- defined $p or Fail "sysctl hw.pagesize failed";
- } elsif (IsSet("CONFIG_LINUX")) {
- $p = `getconf PAGE_SIZE`;
- defined $p or Fail "getconf PAGE_SIZE failed";
- }
- chomp $p;
- return $p;
-});
-
-if (IsSet("CONFIG_LARGE_FILES") && IsSet("CONFIG_LINUX")) {
- # Use 64-bit versions of file functions
- Set("CONFIG_LFS");
-}
-
-# Decide how will lib/partmap.c work
-Set("PARTMAP_IS_MMAP") if IsSet("CPU_64BIT_POINTERS");
-
-# Option for lib/mempool.c
-Set("POOL_IS_MMAP");
-
-# Guess optimal bit width of the radix-sorter
-if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) {
- # This should be safe everywhere
- Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10);
-} else {
- # Use this on modern CPU's
- Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12);
-}
-
-# If debugging memory allocations:
-#LIBS+=-lefence
-#CDEBUG+=-DDEBUG_DMALLOC
-#LIBS+=-ldmalloc
-
-# Return success
-1;
+++ /dev/null
-/*
- * UCW Library -- Base 224 Encoding & Decoding
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * The `base-224' encoding transforms general sequences of bytes
- * to sequences of non-control 8-bit characters (0x20-0xff). Since
- * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l)
- * and we want to avoid lengthy calculations, we cheat a bit:
- *
- * Each base-224 digit can be represented as a (base-7 digit, base-32 digit)
- * pair, so we pass the lower 5 bits directly and use a base-7 encoder
- * for the upper part. We process blocks of 39 bits and encode them
- * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert
- * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get
- * the 7 upper parts we need (with a little redundancy). Little endian
- * ordering is used to make handling of partial blocks easy.
- *
- * We transform 39 source bits to 40 destination bits, stretching the data
- * by 1/39 = approx. 2.56%.
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/base224.h"
-
-static void
-encode_block(byte *w, u32 hi, u32 lo)
-{
- uns x, y;
-
- /*
- * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7.
- * +----------------+----------------+----------------+----------------+----------------+
- * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0|
- * +----------------+----------------+----------------+----------------+----------------+
- */
-
- w[0] = lo & 0x1f;
- w[1] = (lo >> 7) & 0x1f;
- w[2] = (lo >> 15) & 0x1f;
- w[3] = (lo >> 23) & 0x1f;
- w[4] = (lo >> 31) | ((hi << 1) & 0x1e);
- x = (lo >> 5) & 0x0003
- | (lo >> 10) & 0x001c
- | (lo >> 15) & 0x00e0
- | (lo >> 20) & 0x0700
- | (hi << 7) & 0x3800;
- DBG("<<< h=%08x l=%08x x=%d", hi, lo, x);
- for (y=0; y<5; y++)
- {
- w[y] += 0x20 + ((x % 7) << 5);
- x /= 7;
- }
-}
-
-uns
-base224_encode(byte *dest, const byte *src, uns len)
-{
- u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */
- uns i=0; /* How many source bits do we have buffered */
- u32 x;
- byte *w=dest;
-
- while (len--)
- {
- x = *src++;
- if (i < 32)
- {
- lo |= x << i;
- if (i > 24)
- hi |= x >> (32-i);
- }
- else
- hi |= x << (i-32);
- i += 8;
- if (i >= 39)
- {
- encode_block(w, hi, lo);
- w += 5;
- lo = hi >> 7;
- hi = 0;
- i -= 39;
- }
- }
- if (i) /* Partial block */
- {
- encode_block(w, hi, lo);
- w += (i+8)/8; /* Just check logarithms if you want to understand */
- }
- return w - dest;
-}
-
-uns
-base224_decode(byte *dest, const byte *src, uns len)
-{
- u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */
- uns i=0; /* How many bits do we have accumulated */
- u32 h, l; /* Decoding of the current block */
- uns x; /* base-7 part of the current block */
- uns len0;
- byte *start = dest;
-
- do
- {
- if (!len)
- break;
- len0 = len;
-
- ASSERT(*src >= 0x20); /* byte 0 */
- h = 0;
- l = *src & 0x1f;
- x = (*src++ >> 5) - 1;
- if (!--len)
- goto blockend;
-
- ASSERT(*src >= 0x20); /* byte 1 */
- l |= (*src & 0x1f) << 7;
- x += ((*src++ >> 5) - 1) * 7;
- if (!--len)
- goto blockend;
-
- ASSERT(*src >= 0x20); /* byte 2 */
- l |= (*src & 0x1f) << 15;
- x += ((*src++ >> 5) - 1) * 7*7;
- if (!--len)
- goto blockend;
-
- ASSERT(*src >= 0x20); /* byte 3 */
- l |= (*src & 0x1f) << 23;
- x += ((*src++ >> 5) - 1) * 7*7*7;
- if (!--len)
- goto blockend;
-
- ASSERT(*src >= 0x20); /* byte 4 */
- l |= *src << 31;
- h = (*src & 0x1f) >> 1;
- x += ((*src++ >> 5) - 1) * 7*7*7*7;
- --len;
-
- blockend:
- len0 -= len;
- l |= ((x & 0x0003) << 5) /* Decode base-7 */
- | ((x & 0x001c) << 10)
- | ((x & 0x00e0) << 15)
- | ((x & 0x0700) << 20);
- h |= (x & 0x3800) >> 7;
-
- DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0);
- lo |= l << i;
- hi |= h << i;
- if (i)
- hi |= l >> (32-i);
- i += len0*8 - 1;
-
- while (i >= 8)
- {
- *dest++ = lo;
- lo = (lo >> 8U) | (hi << 24);
- hi >>= 8;
- i -= 8;
- }
- }
- while (len0 == 5);
- return dest-start;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int main(int argc, char **argv)
-{
-#if 0
- byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 };
- byte o[256], w[256];
- uns l;
- l = base224_encode(o, i, sizeof(i));
- fwrite(o, 1, l, stdout);
- fputc(0xaa, stdout);
- l = base224_decode(w, o, l);
- fwrite(w, 1, l, stdout);
-#else
- if (argc > 1)
- {
- byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17];
- uns l;
- while (l = fread(i, 1, sizeof(i), stdin))
- {
- l = base224_decode(o, i, l);
- fwrite(o, 1, l, stdout);
- }
- }
- else
- {
- byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23];
- uns l;
- while (l = fread(i, 1, sizeof(i), stdin))
- {
- l = base224_encode(o, i, l);
- fwrite(o, 1, l, stdout);
- }
- }
-#endif
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Base 224 Encoding & Decoding
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-uns base224_encode(byte *dest, const byte *src, uns len);
-uns base224_decode(byte *dest, const byte *src, uns len);
-
-/*
- * Warning: when encoding, at least 4 bytes of extra space are needed.
- * Better use this macro to calculate buffer size.
- */
-#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5)
-
-/*
- * When called for BASE224_IN_CHUNK-byte chunks, the result will be
- * always BASE224_OUT_CHUNK bytes long. If a longer block is split
- * to such chunks, the result will be identical.
- */
-#define BASE224_IN_CHUNK 39
-#define BASE224_OUT_CHUNK 40
+++ /dev/null
-/*
- * UCW Library -- Base 64 Encoding & Decoding
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/base64.h"
-
-#include <string.h>
-
-static const byte base64_table[] =
- { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
- };
-static const byte base64_pad = '=';
-
-uns
-base64_encode(byte *dest, const byte *src, uns len)
-{
- const byte *current = src;
- uns i = 0;
-
- while (len > 2) { /* keep going until we have less than 24 bits */
- dest[i++] = base64_table[current[0] >> 2];
- dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
- dest[i++] = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
- dest[i++] = base64_table[current[2] & 0x3f];
-
- current += 3;
- len -= 3; /* we just handle 3 octets of data */
- }
-
- /* now deal with the tail end of things */
- if (len != 0) {
- dest[i++] = base64_table[current[0] >> 2];
- if (len > 1) {
- dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
- dest[i++] = base64_table[(current[1] & 0x0f) << 2];
- dest[i++] = base64_pad;
- }
- else {
- dest[i++] = base64_table[(current[0] & 0x03) << 4];
- dest[i++] = base64_pad;
- dest[i++] = base64_pad;
- }
- }
- return i;
-}
-
-/* as above, but backwards. :) */
-uns
-base64_decode(byte *dest, const byte *src, uns len)
-{
- const byte *current = src;
- uns ch;
- uns i = 0, j = 0;
- static byte reverse_table[256];
- static uns table_built = 0;
-
- if (table_built == 0) {
- byte *chp;
- table_built = 1;
- for(ch = 0; ch < 256; ch++) {
- chp = strchr(base64_table, ch);
- if(chp) {
- reverse_table[ch] = chp - base64_table;
- } else {
- reverse_table[ch] = 0xff;
- }
- }
- }
-
- /* run through the whole string, converting as we go */
- ch = 0;
- while (len > 0) {
- len--;
- ch = *current++;
- if (ch == base64_pad) break;
-
- /* When Base64 gets POSTed, all pluses are interpreted as spaces.
- This line changes them back. It's not exactly the Base64 spec,
- but it is completely compatible with it (the spec says that
- spaces are invalid). This will also save many people considerable
- headache. - Turadg Aleahmad <turadg@wise.berkeley.edu>
- */
-
- if (ch == ' ') ch = '+';
-
- ch = reverse_table[ch];
- if (ch == 0xff) continue;
-
- switch(i % 4) {
- case 0:
- dest[j] = ch << 2;
- break;
- case 1:
- dest[j++] |= ch >> 4;
- dest[j] = (ch & 0x0f) << 4;
- break;
- case 2:
- dest[j++] |= ch >>2;
- dest[j] = (ch & 0x03) << 6;
- break;
- case 3:
- dest[j++] |= ch;
- break;
- }
- i++;
- }
- return j;
-}
+++ /dev/null
-/*
- * UCW Library -- Base 64 Encoding & Decoding
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-uns base64_encode(byte *dest, const byte *src, uns len);
-uns base64_decode(byte *dest, const byte *src, uns len);
-
-/*
- * Use this macro to calculate buffer size.
- */
-#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4)
-
-/*
- * When called for BASE64_IN_CHUNK-byte chunks, the result will be
- * always BASE64_OUT_CHUNK bytes long. If a longer block is split
- * to such chunks, the result will be identical.
- */
-#define BASE64_IN_CHUNK 3
-#define BASE64_OUT_CHUNK 4
-
+++ /dev/null
-/*
- * UCW Library -- A simple growing buffers for byte-sized items
- *
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/bbuf.h"
-
-#include <stdio.h>
-
-char *
-bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args)
-{
- bb_grow(bb, ofs + 1);
- va_list args2;
- va_copy(args2, args);
- int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
- va_end(args2);
- if (cnt < 0)
- {
- /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
- do
- {
- bb_do_grow(bb, bb->len + 1);
- va_copy(args2, args);
- cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
- va_end(args2);
- }
- while (cnt < 0);
- }
- else if ((uns)cnt >= bb->len - ofs)
- {
- bb_do_grow(bb, ofs + cnt + 1);
- va_copy(args2, args);
- int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
- va_end(args2);
- ASSERT(cnt2 == cnt);
- }
- return bb->ptr + ofs;
-}
-
-char *
-bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- char *res = bb_vprintf_at(bb, ofs, fmt, args);
- va_end(args);
- return res;
-}
-
-char *
-bb_vprintf(bb_t *bb, const char *fmt, va_list args)
-{
- return bb_vprintf_at(bb, 0, fmt, args);
-}
-
-char *
-bb_printf(bb_t *bb, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- char *res = bb_vprintf_at(bb, 0, fmt, args);
- va_end(args);
- return res;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- bb_t bb;
- bb_init(&bb);
- char *x = bb_printf(&bb, "<Hello, %s!>", "World");
- fputs(x, stdout);
- x = bb_printf_at(&bb, 5, "<Hello, %50s!>\n", "World");
- fputs(x, stdout);
- bb_done(&bb);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- A simple growing buffer for byte-sized items.
- *
- * (c) 2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_BBUF_H
-#define _UCW_BBUF_H
-
-#define GBUF_TYPE byte
-#define GBUF_PREFIX(x) bb_##x
-#include "lib/gbuf.h"
-
-char *bb_vprintf(bb_t *bb, const char *fmt, va_list args);
-char *bb_printf(bb_t *bb, const char *fmt, ...);
-char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args);
-char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...);
-
-#endif
+++ /dev/null
-# Tests for growing buffers
-
-Run: ../obj/lib/bbuf-t
-Out: <Hello, World!><Hello, World!>
+++ /dev/null
-/*
- * UCW Library -- Allocation of Large Aligned Buffers
- *
- * (c) 2006--2007 Martin Mares <mj@ucw.cz>
- * (c) 2007 Pavel Charvat <char@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <sys/mman.h>
-#include <string.h>
-#include <limits.h>
-
-void *
-page_alloc(u64 len)
-{
- if (len > SIZE_MAX)
- die("page_alloc: Size %llu is too large for the current architecture", (long long) len);
- ASSERT(!(len & (CPU_PAGE_SIZE-1)));
- byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
- if (p == (byte*) MAP_FAILED)
- die("Cannot mmap %llu bytes of memory: %m", (long long)len);
- return p;
-}
-
-void *
-page_alloc_zero(u64 len)
-{
- void *p = page_alloc(len);
- bzero(p, len);
- return p;
-}
-
-void
-page_free(void *start, u64 len)
-{
- ASSERT(!(len & (CPU_PAGE_SIZE-1)));
- ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
- munmap(start, len);
-}
-
-void *
-page_realloc(void *start, u64 old_len, u64 new_len)
-{
- void *p = page_alloc(new_len);
- memcpy(p, start, MIN(old_len, new_len));
- page_free(start, old_len);
- return p;
-}
-
-static u64
-big_round(u64 len)
-{
- return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
-}
-
-void *
-big_alloc(u64 len)
-{
- u64 l = big_round(len);
- if (l > SIZE_MAX - 2*CPU_PAGE_SIZE)
- die("big_alloc: Size %llu is too large for the current architecture", (long long) len);
-#ifdef CONFIG_DEBUG
- l += 2*CPU_PAGE_SIZE;
-#endif
- byte *p = page_alloc(l);
-#ifdef CONFIG_DEBUG
- *(u64*)p = len;
- mprotect(p, CPU_PAGE_SIZE, PROT_NONE);
- mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
- p += CPU_PAGE_SIZE;
-#endif
- return p;
-}
-
-void *
-big_alloc_zero(u64 len)
-{
- void *p = big_alloc(len);
- bzero(p, big_round(len));
- return p;
-}
-
-void
-big_free(void *start, u64 len)
-{
- byte *p = start;
- u64 l = big_round(len);
-#ifdef CONFIG_DEBUG
- p -= CPU_PAGE_SIZE;
- mprotect(p, CPU_PAGE_SIZE, PROT_READ);
- ASSERT(*(u64*)p == len);
- l += 2*CPU_PAGE_SIZE;
-#endif
- page_free(p, l);
-}
-
-#ifdef TEST
-
-int main(void)
-{
- byte *p = big_alloc(123456);
- // p[-1] = 1;
- big_free(p, 123456);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Binomial Heaps: Declarations
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-struct bh_node {
- struct bh_node *first_son;
- struct bh_node *last_son;
- struct bh_node *next_sibling;
- byte order;
-};
-
-struct bh_heap {
- struct bh_node root;
-};
+++ /dev/null
-/*
- * UCW Library -- Binomial Heaps: Testing
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#define BH_PREFIX(x) bht_##x
-#define BH_WANT_INSERT
-#define BH_WANT_FINDMIN
-#define BH_WANT_DELETEMIN
-#include "lib/binheap-node.h"
-
-struct item {
- struct bh_node n;
- uns key;
-};
-
-static inline uns bht_key(struct bh_node *n)
-{
- return ((struct item *)n)->key;
-}
-
-static inline uns bht_less(struct bh_node *a, struct bh_node *b)
-{
- return bht_key(a) < bht_key(b);
-}
-
-static void
-bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uns offset)
-{
- if (!a)
- return;
- printf("%*s", offset, "");
- printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : "");
- for (struct bh_node *b=a->first_son; b; b=b->next_sibling)
- bht_do_dump(b, a->last_son, offset+1);
-}
-
-static void
-bht_dump(struct bh_heap *h)
-{
- printf("root\n");
- for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling)
- bht_do_dump(b, b->last_son, 1);
-}
-
-#include "lib/binheap.h"
-
-int main(void)
-{
- uns i;
- struct bh_heap h;
-#define N 1048576
-#define K(i) ((259309*i+1009)%N)
-
- bht_init(&h);
-
- for (i=0; i<N; i++)
- {
- struct item *a = xmalloc_zero(sizeof(*a));
- a->key = K(i);
- // printf("Insert %d\n", a->key);
- bht_insert(&h, &a->n);
- // bht_dump(&h);
- }
- // bht_dump(&h);
- ASSERT(bht_key(bht_findmin(&h)) == 0);
- uns cnt = 0;
- BH_FOR_ALL(bht_, &h, a)
- {
- cnt++;
- }
- BH_END_FOR;
- printf("cnt=%d\n", cnt);
- ASSERT(cnt == N);
- for (i=0; i<N; i++)
- {
- struct item *a = (struct item *) bht_deletemin(&h);
- // printf("\nDeleted %d:\n", a->key);
- ASSERT(a->key == i);
- // bht_dump(&h);
- }
- bht_dump(&h);
-
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Binomial Heaps
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is a generic implementation of Binomial Heaps. Each time you include
- * this file with parameters set in the corresponding preprocessor macros
- * as described below, it generates functions for manipulating the particular
- * version of the binomial heap.
- *
- * You need to specify:
- *
- * BH_PREFIX(x) macro to add a name prefix (used on all global names
- * defined by the hash table generator). All further
- * names mentioned here except for macro names will be
- * implicitly prefixed.
- *
- * Then you continue by including "lib/binheap-node.h" which defines struct bh_node
- * and struct bh_root (both without prefix). The heap elements are always allocated by
- * you and they must include struct bh_node which serves as a handle used for all
- * the heap functions and it contains all information needed for heap-keeping.
- * The heap itself is also allocated by you and it's represented by struct bh_heap.
- *
- * When you have the declaration of heap nodes, you continue with defining:
- *
- * less(p,q) returns 1 if the key corresponding to bh_node *p
- * is less than the one corresponding to *q.
- *
- * Then specify what operations you request:
- *
- * <always defined> init(heap*) -- initialize the heap.
- * BH_WANT_INSERT insert(heap*, node*) -- insert the node to the heap.
- * BH_WANT_FINDMIN node *findmin(heap*) -- find node with minimum key.
- * BH_WANT_DELETEMIN node *deletemin(heap*) -- findmin and delete the node.
- *
- * Then include "lib/binheap.h" and voila, you have a binomial heap
- * suiting all your needs (at least those which you've revealed :) ).
- *
- * You also get a iterator macro at no extra charge:
- *
- * BH_FOR_ALL(bh_prefix, hash*, variable)
- * {
- * // node *variable gets declared automatically
- * do_something_with_node(variable);
- * // use BH_BREAK and BH_CONTINUE instead of break and continue
- * // you must not alter contents of the hash table here
- * }
- * BH_END_FOR;
- *
- * After including this file, all parameter macros are automatically
- * undef'd.
- */
-
-#define BH_NODE struct bh_node
-#define BH_HEAP struct bh_heap
-
-static void
-BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b)
-{
- BH_NODE **pp = &a->first_son;
- BH_NODE *q = b->first_son;
- BH_NODE *p, *r, *s;
-
- while ((p = *pp) && q)
- {
- /* p,q are the next nodes of a,b; pp points to where p is linked */
- if (p->order < q->order) /* p is smaller => skip it */
- pp = &p->next_sibling;
- else if (p->order > q->order) /* q is smaller => insert it before p */
- {
- r = q;
- q = q->next_sibling;
- r->next_sibling = p;
- *pp = r;
- pp = &r->next_sibling;
- }
- else /* p and q are of the same order => need to merge them */
- {
- if (BH_PREFIX(less)(p, q)) /* we'll hang r below s */
- {
- r = q;
- s = p;
- }
- else
- {
- r = p;
- s = q;
- }
- *pp = p->next_sibling; /* unlink p,q from their lists */
- q = q->next_sibling;
-
- if (s->last_son) /* merge r to s, increasing order */
- s->last_son->next_sibling = r;
- else
- s->first_son = r;
- s->last_son = r;
- s->order++;
- r->next_sibling = NULL;
-
- if (!q || q->order > s->order) /* put the result into the b's list if possible */
- {
- s->next_sibling = q;
- q = s;
- }
- else /* otherwise put the result to the a's list */
- {
- p = s->next_sibling = *pp;
- *pp = s;
- if (p && p->order == s->order) /* 3-collision */
- pp = &s->next_sibling;
- }
- }
- }
- if (!p)
- *pp = q;
-}
-
-#ifdef BH_WANT_INSERT
-static void
-BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a)
-{
- BH_NODE sh;
-
- sh.first_son = a;
- a->first_son = a->last_son = a->next_sibling = NULL;
- BH_PREFIX(merge)(&heap->root, &sh);
-}
-#endif
-
-#ifdef BH_WANT_FINDMIN
-static BH_NODE *
-BH_PREFIX(findmin)(BH_HEAP *heap)
-{
- BH_NODE *p, *best;
-
- best = NULL;
- for (p=heap->root.first_son; p; p=p->next_sibling)
- if (!best || BH_PREFIX(less)(p, best))
- best = p;
- return best;
-}
-#endif
-
-#ifdef BH_WANT_DELETEMIN
-static BH_NODE *
-BH_PREFIX(deletemin)(BH_HEAP *heap)
-{
- BH_NODE *p, **pp, **bestp;
-
- bestp = NULL;
- for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling)
- if (!bestp || BH_PREFIX(less)(p, *bestp))
- bestp = pp;
- if (!bestp)
- return NULL;
-
- p = *bestp;
- *bestp = p->next_sibling;
- BH_PREFIX(merge)(&heap->root, p);
- return p;
-}
-#endif
-
-static inline void
-BH_PREFIX(init)(BH_HEAP *heap)
-{
- bzero(heap, sizeof(*heap));
-}
-
-#ifndef BH_FOR_ALL
-
-#define BH_FOR_ALL(bh_px, bh_heap, bh_var) \
-do { \
- struct bh_node *bh_stack[32]; \
- uns bh_sp = 0; \
- if (bh_stack[0] = (bh_heap)->root.first_son) \
- bh_sp++; \
- while (bh_sp) { \
- struct bh_node *bh_var = bh_stack[--bh_sp]; \
- if (bh_var->next_sibling) \
- bh_stack[bh_sp++] = bh_var->next_sibling; \
- if (bh_var->first_son) \
- bh_stack[bh_sp++] = bh_var->first_son;
-#define BH_END_FOR \
- } \
-} while (0)
-
-#define BH_BREAK { bh_sp=0; break; }
-#define BH_CONTINUE continue
-
-#endif
-
-#undef BH_PREFIX
-#undef BH_NODE
-#undef BH_HEAP
-#undef BH_WANT_INSERT
-#undef BH_WANT_FINDMIN
-#undef BH_WANT_DELETEMIN
+++ /dev/null
-/*
- * UCW Library -- Generic Binary Search
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \
- uns l = 0, r = (N); \
- while (l < r) \
- { \
- uns m = (l+r)/2; \
- if (ary_lt_x(ary,m,x)) \
- l = m+1; \
- else \
- r = m; \
- } \
- l; \
-})
-
-#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x)
-
-#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM)
-#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; })
+++ /dev/null
-/*
- * UCW Library -- Find Lowest Set Bit
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/bitops.h"
-
-/* Just a table, the rest is in bitops.h */
-
-const byte ffs_table[] = {
- 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int main(void)
-{
- uns i;
- while (scanf("%x", &i) == 1)
- printf("%d\n", bit_ffs(i));
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Find Highest Set Bit
- *
- * (c) 1997-2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/bitops.h"
-
-int
-bit_fls(u32 x)
-{
- uns l;
-
- if (!x)
- return -1;
-
- l = 0;
- if (x & 0xffff0000) { l += 16; x &= 0xffff0000; }
- if (x & 0xff00ff00) { l += 8; x &= 0xff00ff00; }
- if (x & 0xf0f0f0f0) { l += 4; x &= 0xf0f0f0f0; }
- if (x & 0xcccccccc) { l += 2; x &= 0xcccccccc; }
- if (x & 0xaaaaaaaa) l++;
- return l;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int main(void)
-{
- uns i;
- while (scanf("%x", &i) == 1)
- printf("%d\n", bit_fls(i));
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Bit Array Operations
- *
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_BITARRAY_H
-#define _UCW_BITARRAY_H
-
-#include <string.h>
-
-typedef u32 *bitarray_t;
-#define BIT_ARRAY_WORDS(n) (((n)+31)/32)
-#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n))
-#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)]
-
-static inline bitarray_t
-bit_array_xmalloc(uns n)
-{
- return xmalloc(BIT_ARRAY_BYTES(n));
-}
-
-static inline bitarray_t
-bit_array_xmalloc_zero(uns n)
-{
- return xmalloc_zero(BIT_ARRAY_BYTES(n));
-}
-
-static inline void
-bit_array_zero(bitarray_t a, uns n)
-{
- bzero(a, BIT_ARRAY_BYTES(n));
-}
-
-static inline void
-bit_array_set_all(bitarray_t a, uns n)
-{
- memset(a, 255, BIT_ARRAY_BYTES(n));
-}
-
-static inline void
-bit_array_set(bitarray_t a, uns i)
-{
- a[i/32] |= (1 << (i%32));
-}
-
-static inline void
-bit_array_clear(bitarray_t a, uns i)
-{
- a[i/32] &= ~(1 << (i%32));
-}
-
-static inline void
-bit_array_assign(bitarray_t a, uns i, uns x)
-{
- if (x)
- bit_array_set(a, i);
- else
- bit_array_clear(a, i);
-}
-
-static inline uns
-bit_array_isset(bitarray_t a, uns i)
-{
- return a[i/32] & (1 << (i%32));
-}
-
-static inline uns
-bit_array_get(bitarray_t a, uns i)
-{
- return !! bit_array_isset(a, i);
-}
-
-static inline uns
-bit_array_test_and_set(bitarray_t a, uns i)
-{
- uns t = bit_array_isset(a, i);
- bit_array_set(a, i);
- return t;
-}
-
-static inline uns
-bit_array_test_and_clear(bitarray_t a, uns i)
-{
- uns t = bit_array_isset(a, i);
- bit_array_clear(a, i);
- return t;
-}
-
-/* Iterate over all set bits, possibly destructively */
-#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size) \
- for (uns var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++) \
- for (uns var##_lo=0; ary[var##_hi]; var##_lo++) \
- if (ary[var##_hi] & (1 << var##_lo)) \
- { \
- uns var = 32*var##_hi + var##_lo; \
- ary[var##_hi] &= ~(1 << var##_lo); \
- do
-
-#define BIT_ARRAY_FISH_BITS_END \
- while (0); \
- }
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Bit Operations
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_BITOPS_H
-#define _UCW_BITOPS_H
-
-/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */
-
-int bit_fls(u32 x); /* bit_fls(0)=-1 */
-
-/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */
-
-extern const byte ffs_table[256];
-
-#ifdef __pentium4 /* On other ia32 machines, the C version is faster */
-
-static inline uns bit_ffs(uns w)
-{
- asm("bsfl %1,%0" :"=r" (w) :"rm" (w));
- return w;
-}
-
-#else
-
-static inline uns bit_ffs(uns w)
-{
- uns b = (w & 0xffff) ? 0 : 16;
- b += ((w >> b) & 0xff) ? 0 : 8;
- return b + ffs_table[(w >> b) & 0xff];
-}
-
-#endif
-
-#endif
+++ /dev/null
-# Tests for bitops modules
-
-Run: ../obj/lib/bit-ffs-t
-In: 1
- 2
- 3
- 4
- 5
- 6
- 12345678
- 23030300
- 23030000
- 23000000
- 40000000
- 80000000
-Out: 0
- 1
- 0
- 2
- 0
- 1
- 3
- 8
- 16
- 24
- 30
- 31
-
-Run: ../obj/lib/bit-fls-t
-In: 1
- 2
- 3
- 4
- 5
- 6
- 12345678
- 23030303
- 03030303
- 00030303
- 00000303
- 0fedcba9
-Out: 0
- 1
- 1
- 2
- 2
- 2
- 28
- 29
- 25
- 17
- 9
- 27
+++ /dev/null
-/*
- * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files
- * (An access method for documents and its analytical performance evaluation),
- * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984.
- *
- * This data structure provides a very compact representation
- * of a set of strings with insertion and membership search,
- * but with a certain low probability it cheats by incidentally
- * reporting a non-member as a member. Generally the larger you
- * create the structure, the lower this probability is.
- *
- * How does it work: the structure is just an array of M bits
- * and each possible element is hashed to a set of (at most) L
- * bit positions. For each element of the represented set, we
- * set its L bits to ones and we report as present all elements
- * whose all L bits ar set.
- *
- * Analysis: Let's assume N items have already been stored and let A
- * denote L/M (density of the hash function). The probability that
- * a fixed bit of the array is set by any of the N items is
- * 1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA.
- * This is minimized by setting A=(ln 2)/N (try taking derivative).
- * Given a non-present item, the probability that all of the bits
- * corresponding to this item are set by the other items (that is,
- * the structure gives a false answer) is (1-e^-NA)^L = 2^-L.
- * Hence, if we want to give false answers with probability less
- * than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L.
- *
- * Example: For a set of 10^7 items with P[error] < 10^-6, we set
- * L := 20 and M := 290*10^6 bits = cca 34.5 MB (29 bits per item).
- *
- * We leave L and an upper bound for N as parameters set during
- * creation of the structure. Currently, the structure is limited
- * to 4 Gb = 512 MB.
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/bitsig.h"
-#include "lib/md5.h"
-
-#include <string.h>
-
-struct bitsig {
- uns l, m, n, maxn, max_m_mult;
- u32 hash[4];
- uns hindex;
- byte array[0];
-};
-
-struct bitsig *
-bitsig_init(uns perrlog, uns maxn)
-{
- struct bitsig *b;
- u64 m;
- uns mbytes;
-
- m = ((u64) maxn * perrlog * 145 + 99) / 100;
- if (m >= (u64) 1 << 32)
- die("bitsig_init: bitsig array too large (maximum is 4 Gb)");
- mbytes = (m + 7) >> 3U;
- b = xmalloc(sizeof(struct bitsig) + mbytes);
- b->l = perrlog;
- b->m = m;
- b->n = 0;
- b->maxn = maxn;
- b->max_m_mult = (0xffffffff / m) * m;
- bzero(b->array, mbytes);
- msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
- return b;
-}
-
-void
-bitsig_free(struct bitsig *b)
-{
- xfree(b);
-}
-
-static void
-bitsig_hash_init(struct bitsig *b, byte *item)
-{
- struct MD5Context c;
-
- MD5Init(&c);
- MD5Update(&c, item, strlen(item));
- MD5Final((byte *) b->hash, &c);
- b->hindex = 0;
-}
-
-static inline uns
-bitsig_hash_bit(struct bitsig *b)
-{
- u32 h;
- do
- {
- h = b->hash[b->hindex];
- b->hash[b->hindex] *= 3006477127U;
- b->hindex = (b->hindex+1) % 4;
- }
- while (h >= b->max_m_mult);
- return h % b->m;
-}
-
-int
-bitsig_member(struct bitsig *b, byte *item)
-{
- uns i, bit;
-
- bitsig_hash_init(b, item);
- for (i=0; i<b->l; i++)
- {
- bit = bitsig_hash_bit(b);
- if (!(b->array[bit >> 3] & (1 << (bit & 7))))
- return 0;
- }
- return 1;
-}
-
-int
-bitsig_insert(struct bitsig *b, byte *item)
-{
- uns i, bit, was;
-
- bitsig_hash_init(b, item);
- was = 1;
- for (i=0; i<b->l; i++)
- {
- bit = bitsig_hash_bit(b);
- if (!(b->array[bit >> 3] & (1 << (bit & 7))))
- {
- was = 0;
- b->array[bit >> 3] |= (1 << (bit & 7));
- }
- }
- if (!was && b->n++ == b->maxn+1)
- msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
- return was;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-#include <stdlib.h>
-
-int main(int argc, char **argv)
-{
- struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2]));
- byte buf[1024];
-
- while (fgets(buf, 1024, stdin))
- printf("%d\n", bitsig_insert(b, buf));
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-struct bitsig;
-
-struct bitsig *bitsig_init(uns perrlog, uns maxn);
-void bitsig_free(struct bitsig *b);
-int bitsig_member(struct bitsig *b, byte *item);
-int bitsig_insert(struct bitsig *b, byte *item);
+++ /dev/null
-/*
- * UCW Library -- Careful Read/Write
- *
- * (c) 2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <unistd.h>
-
-/*
- * Reads and writes on sockets and pipes can return partial results,
- * so we implement an iterated read/write call.
- */
-
-int
-careful_read(int fd, void *buf, int len)
-{
- byte *pos = buf;
- while (len)
- {
- int l = read(fd, pos, len);
- if (l < 0)
- return -1;
- if (!l)
- return 0;
- pos += l;
- len -= l;
- }
- return 1;
-}
-
-int
-careful_write(int fd, const void *buf, int len)
-{
- const byte *pos = buf;
- while (len)
- {
- int l = write(fd, pos, len);
- if (l < 0)
- return -1;
- if (!l)
- return 0;
- pos += l;
- len -= l;
- }
- return 1;
-}
+++ /dev/null
-/*
- * UCW Library -- Character Code Map (UTF-8 Version)
- *
- * (c) 1998--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/* Syntax: CHAR(code, uppercase, lowercase, category) */
-
-CHAR(0x00,0x00,0x00,_C_CTRL) // <control>
-CHAR(0x01,0x01,0x01,_C_CTRL) // <control>
-CHAR(0x02,0x02,0x02,_C_CTRL) // <control>
-CHAR(0x03,0x03,0x03,_C_CTRL) // <control>
-CHAR(0x04,0x04,0x04,_C_CTRL) // <control>
-CHAR(0x05,0x05,0x05,_C_CTRL) // <control>
-CHAR(0x06,0x06,0x06,_C_CTRL) // <control>
-CHAR(0x07,0x07,0x07,_C_CTRL) // <control>
-CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK) // <control>
-CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT) // <control>
-CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK) // <control>
-CHAR(0x0B,0x0B,0x0B,_C_CTRL) // <control>
-CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK) // <control>
-CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK) // <control>
-CHAR(0x0E,0x0E,0x0E,_C_CTRL) // <control>
-CHAR(0x0F,0x0F,0x0F,_C_CTRL) // <control>
-CHAR(0x10,0x10,0x10,_C_CTRL) // <control>
-CHAR(0x11,0x11,0x11,_C_CTRL) // <control>
-CHAR(0x12,0x12,0x12,_C_CTRL) // <control>
-CHAR(0x13,0x13,0x13,_C_CTRL) // <control>
-CHAR(0x14,0x14,0x14,_C_CTRL) // <control>
-CHAR(0x15,0x15,0x15,_C_CTRL) // <control>
-CHAR(0x16,0x16,0x16,_C_CTRL) // <control>
-CHAR(0x17,0x17,0x17,_C_CTRL) // <control>
-CHAR(0x18,0x18,0x18,_C_CTRL) // <control>
-CHAR(0x19,0x19,0x19,_C_CTRL) // <control>
-CHAR(0x1A,0x1A,0x1A,_C_CTRL) // <control>
-CHAR(0x1B,0x1B,0x1B,_C_CTRL) // <control>
-CHAR(0x1C,0x1C,0x1C,_C_CTRL) // <control>
-CHAR(0x1D,0x1D,0x1D,_C_CTRL) // <control>
-CHAR(0x1E,0x1E,0x1E,_C_CTRL) // <control>
-CHAR(0x1F,0x1F,0x1F,_C_CTRL) // <control>
-CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT) // SPACE
-CHAR(0x21,0x21,0x21,_C_PRINT) // EXCLAMATION MARK
-CHAR(0x22,0x22,0x22,_C_PRINT) // QUOTATION MARK
-CHAR(0x23,0x23,0x23,_C_PRINT) // NUMBER SIGN
-CHAR(0x24,0x24,0x24,_C_PRINT) // DOLLAR SIGN
-CHAR(0x25,0x25,0x25,_C_PRINT) // PERCENT SIGN
-CHAR(0x26,0x26,0x26,_C_PRINT) // AMPERSAND
-CHAR(0x27,0x27,0x27,_C_PRINT) // APOSTROPHE
-CHAR(0x28,0x28,0x28,_C_PRINT) // LEFT PARENTHESIS
-CHAR(0x29,0x29,0x29,_C_PRINT) // RIGHT PARENTHESIS
-CHAR(0x2A,0x2A,0x2A,_C_PRINT) // ASTERISK
-CHAR(0x2B,0x2B,0x2B,_C_PRINT) // PLUS SIGN
-CHAR(0x2C,0x2C,0x2C,_C_PRINT) // COMMA
-CHAR(0x2D,0x2D,0x2D,_C_PRINT) // HYPHEN-MINUS
-CHAR(0x2E,0x2E,0x2E,_C_PRINT) // FULL STOP
-CHAR(0x2F,0x2F,0x2F,_C_PRINT) // SOLIDUS
-CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ZERO
-CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ONE
-CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT TWO
-CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT THREE
-CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FOUR
-CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FIVE
-CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SIX
-CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SEVEN
-CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT EIGHT
-CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT NINE
-CHAR(0x3A,0x3A,0x3A,_C_PRINT) // COLON
-CHAR(0x3B,0x3B,0x3B,_C_PRINT) // SEMICOLON
-CHAR(0x3C,0x3C,0x3C,_C_PRINT) // LESS-THAN SIGN
-CHAR(0x3D,0x3D,0x3D,_C_PRINT) // EQUALS SIGN
-CHAR(0x3E,0x3E,0x3E,_C_PRINT) // GREATER-THAN SIGN
-CHAR(0x3F,0x3F,0x3F,_C_PRINT) // QUESTION MARK
-CHAR(0x40,0x40,0x40,_C_PRINT) // COMMERCIAL AT
-CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER A
-CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER B
-CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER C
-CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER D
-CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER E
-CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER F
-CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER G
-CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER H
-CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER I
-CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER J
-CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER K
-CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER L
-CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER M
-CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER N
-CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER O
-CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER P
-CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Q
-CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER R
-CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER S
-CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER T
-CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER U
-CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER V
-CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER W
-CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER X
-CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Y
-CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Z
-CHAR(0x5B,0x5B,0x5B,_C_PRINT) // LEFT SQUARE BRACKET
-CHAR(0x5C,0x5C,0x5C,_C_PRINT) // REVERSE SOLIDUS
-CHAR(0x5D,0x5D,0x5D,_C_PRINT) // RIGHT SQUARE BRACKET
-CHAR(0x5E,0x5E,0x5E,_C_PRINT) // CIRCUMFLEX ACCENT
-CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT) // LOW LINE
-CHAR(0x60,0x60,0x60,_C_PRINT) // GRAVE ACCENT
-CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER A
-CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER B
-CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER C
-CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER D
-CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER E
-CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER F
-CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER G
-CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER H
-CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER I
-CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER J
-CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER K
-CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER L
-CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER M
-CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER N
-CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER O
-CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER P
-CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Q
-CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER R
-CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER S
-CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER T
-CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER U
-CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER V
-CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER W
-CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER X
-CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Y
-CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Z
-CHAR(0x7B,0x7B,0x7B,_C_PRINT) // LEFT CURLY BRACKET
-CHAR(0x7C,0x7C,0x7C,_C_PRINT) // VERTICAL LINE
-CHAR(0x7D,0x7D,0x7D,_C_PRINT) // RIGHT CURLY BRACKET
-CHAR(0x7E,0x7E,0x7E,_C_PRINT) // TILDE
-CHAR(0x7F,0x7F,0x7F,_C_CTRL) // <control>
-CHAR(0x80,0x80,0x80,_C_PRINT) // UTF-8
-CHAR(0x81,0x81,0x81,_C_PRINT) // UTF-8
-CHAR(0x82,0x82,0x82,_C_PRINT) // UTF-8
-CHAR(0x83,0x83,0x83,_C_PRINT) // UTF-8
-CHAR(0x84,0x84,0x84,_C_PRINT) // UTF-8
-CHAR(0x85,0x85,0x85,_C_PRINT) // UTF-8
-CHAR(0x86,0x86,0x86,_C_PRINT) // UTF-8
-CHAR(0x87,0x87,0x87,_C_PRINT) // UTF-8
-CHAR(0x88,0x88,0x88,_C_PRINT) // UTF-8
-CHAR(0x89,0x89,0x89,_C_PRINT) // UTF-8
-CHAR(0x8A,0x8A,0x8A,_C_PRINT) // UTF-8
-CHAR(0x8B,0x8B,0x8B,_C_PRINT) // UTF-8
-CHAR(0x8C,0x8C,0x8C,_C_PRINT) // UTF-8
-CHAR(0x8D,0x8D,0x8D,_C_PRINT) // UTF-8
-CHAR(0x8E,0x8E,0x8E,_C_PRINT) // UTF-8
-CHAR(0x8F,0x8F,0x8F,_C_PRINT) // UTF-8
-CHAR(0x90,0x90,0x90,_C_PRINT) // UTF-8
-CHAR(0x91,0x91,0x91,_C_PRINT) // UTF-8
-CHAR(0x92,0x92,0x92,_C_PRINT) // UTF-8
-CHAR(0x93,0x93,0x93,_C_PRINT) // UTF-8
-CHAR(0x94,0x94,0x94,_C_PRINT) // UTF-8
-CHAR(0x95,0x95,0x95,_C_PRINT) // UTF-8
-CHAR(0x96,0x96,0x96,_C_PRINT) // UTF-8
-CHAR(0x97,0x97,0x97,_C_PRINT) // UTF-8
-CHAR(0x98,0x98,0x98,_C_PRINT) // UTF-8
-CHAR(0x99,0x99,0x99,_C_PRINT) // UTF-8
-CHAR(0x9A,0x9A,0x9A,_C_PRINT) // UTF-8
-CHAR(0x9B,0x9B,0x9B,_C_PRINT) // UTF-8
-CHAR(0x9C,0x9C,0x9C,_C_PRINT) // UTF-8
-CHAR(0x9D,0x9D,0x9D,_C_PRINT) // UTF-8
-CHAR(0x9E,0x9E,0x9E,_C_PRINT) // UTF-8
-CHAR(0x9F,0x9F,0x9F,_C_PRINT) // UTF-8
-CHAR(0xA0,0xA0,0xA0,_C_PRINT) // UTF-8
-CHAR(0xA1,0xA1,0xA1,_C_PRINT) // UTF-8
-CHAR(0xA2,0xA2,0xA2,_C_PRINT) // UTF-8
-CHAR(0xA3,0xA3,0xA3,_C_PRINT) // UTF-8
-CHAR(0xA4,0xA4,0xA4,_C_PRINT) // UTF-8
-CHAR(0xA5,0xA5,0xA5,_C_PRINT) // UTF-8
-CHAR(0xA6,0xA6,0xA6,_C_PRINT) // UTF-8
-CHAR(0xA7,0xA7,0xA7,_C_PRINT) // UTF-8
-CHAR(0xA8,0xA8,0xA8,_C_PRINT) // UTF-8
-CHAR(0xA9,0xA9,0xA9,_C_PRINT) // UTF-8
-CHAR(0xAA,0xAA,0xAA,_C_PRINT) // UTF-8
-CHAR(0xAB,0xAB,0xAB,_C_PRINT) // UTF-8
-CHAR(0xAC,0xAC,0xAC,_C_PRINT) // UTF-8
-CHAR(0xAD,0xAD,0xAD,_C_PRINT) // UTF-8
-CHAR(0xAE,0xAE,0xAE,_C_PRINT) // UTF-8
-CHAR(0xAF,0xAF,0xAF,_C_PRINT) // UTF-8
-CHAR(0xB0,0xB0,0xB0,_C_PRINT) // UTF-8
-CHAR(0xB1,0xB1,0xB1,_C_PRINT) // UTF-8
-CHAR(0xB2,0xB2,0xB2,_C_PRINT) // UTF-8
-CHAR(0xB3,0xB3,0xB3,_C_PRINT) // UTF-8
-CHAR(0xB4,0xB4,0xB4,_C_PRINT) // UTF-8
-CHAR(0xB5,0xB5,0xB5,_C_PRINT) // UTF-8
-CHAR(0xB6,0xB6,0xB6,_C_PRINT) // UTF-8
-CHAR(0xB7,0xB7,0xB7,_C_PRINT) // UTF-8
-CHAR(0xB8,0xB8,0xB8,_C_PRINT) // UTF-8
-CHAR(0xB9,0xB9,0xB9,_C_PRINT) // UTF-8
-CHAR(0xBA,0xBA,0xBA,_C_PRINT) // UTF-8
-CHAR(0xBB,0xBB,0xBB,_C_PRINT) // UTF-8
-CHAR(0xBC,0xBC,0xBC,_C_PRINT) // UTF-8
-CHAR(0xBD,0xBD,0xBD,_C_PRINT) // UTF-8
-CHAR(0xBE,0xBE,0xBE,_C_PRINT) // UTF-8
-CHAR(0xBF,0xBF,0xBF,_C_PRINT) // UTF-8
-CHAR(0xC0,0xC0,0xC0,_C_PRINT) // UTF-8
-CHAR(0xC1,0xC1,0xC1,_C_PRINT) // UTF-8
-CHAR(0xC2,0xC2,0xC2,_C_PRINT) // UTF-8
-CHAR(0xC3,0xC3,0xC3,_C_PRINT) // UTF-8
-CHAR(0xC4,0xC4,0xC4,_C_PRINT) // UTF-8
-CHAR(0xC5,0xC5,0xC5,_C_PRINT) // UTF-8
-CHAR(0xC6,0xC6,0xC6,_C_PRINT) // UTF-8
-CHAR(0xC7,0xC7,0xC7,_C_PRINT) // UTF-8
-CHAR(0xC8,0xC8,0xC8,_C_PRINT) // UTF-8
-CHAR(0xC9,0xC9,0xC9,_C_PRINT) // UTF-8
-CHAR(0xCA,0xCA,0xCA,_C_PRINT) // UTF-8
-CHAR(0xCB,0xCB,0xCB,_C_PRINT) // UTF-8
-CHAR(0xCC,0xCC,0xCC,_C_PRINT) // UTF-8
-CHAR(0xCD,0xCD,0xCD,_C_PRINT) // UTF-8
-CHAR(0xCE,0xCE,0xCE,_C_PRINT) // UTF-8
-CHAR(0xCF,0xCF,0xCF,_C_PRINT) // UTF-8
-CHAR(0xD0,0xD0,0xD0,_C_PRINT) // UTF-8
-CHAR(0xD1,0xD1,0xD1,_C_PRINT) // UTF-8
-CHAR(0xD2,0xD2,0xD2,_C_PRINT) // UTF-8
-CHAR(0xD3,0xD3,0xD3,_C_PRINT) // UTF-8
-CHAR(0xD4,0xD4,0xD4,_C_PRINT) // UTF-8
-CHAR(0xD5,0xD5,0xD5,_C_PRINT) // UTF-8
-CHAR(0xD6,0xD6,0xD6,_C_PRINT) // UTF-8
-CHAR(0xD7,0xD7,0xD7,_C_PRINT) // UTF-8
-CHAR(0xD8,0xD8,0xD8,_C_PRINT) // UTF-8
-CHAR(0xD9,0xD9,0xD9,_C_PRINT) // UTF-8
-CHAR(0xDA,0xDA,0xDA,_C_PRINT) // UTF-8
-CHAR(0xDB,0xDB,0xDB,_C_PRINT) // UTF-8
-CHAR(0xDC,0xDC,0xDC,_C_PRINT) // UTF-8
-CHAR(0xDD,0xDD,0xDD,_C_PRINT) // UTF-8
-CHAR(0xDE,0xDE,0xDE,_C_PRINT) // UTF-8
-CHAR(0xDF,0xDF,0xDF,_C_PRINT) // UTF-8
-CHAR(0xE0,0xE0,0xE0,_C_PRINT) // UTF-8
-CHAR(0xE1,0xE1,0xE1,_C_PRINT) // UTF-8
-CHAR(0xE2,0xE2,0xE2,_C_PRINT) // UTF-8
-CHAR(0xE3,0xE3,0xE3,_C_PRINT) // UTF-8
-CHAR(0xE4,0xE4,0xE4,_C_PRINT) // UTF-8
-CHAR(0xE5,0xE5,0xE5,_C_PRINT) // UTF-8
-CHAR(0xE6,0xE6,0xE6,_C_PRINT) // UTF-8
-CHAR(0xE7,0xE7,0xE7,_C_PRINT) // UTF-8
-CHAR(0xE8,0xE8,0xE8,_C_PRINT) // UTF-8
-CHAR(0xE9,0xE9,0xE9,_C_PRINT) // UTF-8
-CHAR(0xEA,0xEA,0xEA,_C_PRINT) // UTF-8
-CHAR(0xEB,0xEB,0xEB,_C_PRINT) // UTF-8
-CHAR(0xEC,0xEC,0xEC,_C_PRINT) // UTF-8
-CHAR(0xED,0xED,0xED,_C_PRINT) // UTF-8
-CHAR(0xEE,0xEE,0xEE,_C_PRINT) // UTF-8
-CHAR(0xEF,0xEF,0xEF,_C_PRINT) // UTF-8
-CHAR(0xF0,0xF0,0xF0,_C_PRINT) // UTF-8
-CHAR(0xF1,0xF1,0xF1,_C_PRINT) // UTF-8
-CHAR(0xF2,0xF2,0xF2,_C_PRINT) // UTF-8
-CHAR(0xF3,0xF3,0xF3,_C_PRINT) // UTF-8
-CHAR(0xF4,0xF4,0xF4,_C_PRINT) // UTF-8
-CHAR(0xF5,0xF5,0xF5,_C_PRINT) // UTF-8
-CHAR(0xF6,0xF6,0xF6,_C_PRINT) // UTF-8
-CHAR(0xF7,0xF7,0xF7,_C_PRINT) // UTF-8
-CHAR(0xF8,0xF8,0xF8,_C_PRINT) // UTF-8
-CHAR(0xF9,0xF9,0xF9,_C_PRINT) // UTF-8
-CHAR(0xFA,0xFA,0xFA,_C_PRINT) // UTF-8
-CHAR(0xFB,0xFB,0xFB,_C_PRINT) // UTF-8
-CHAR(0xFC,0xFC,0xFC,_C_PRINT) // UTF-8
-CHAR(0xFD,0xFD,0xFD,_C_PRINT) // UTF-8
-CHAR(0xFE,0xFE,0xFE,_C_PRINT) // UTF-8
-CHAR(0xFF,0xFF,0xFF,_C_PRINT) // UTF-8
+++ /dev/null
-/*
- * UCW Library -- Character Types
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_CHARTYPE_H
-#define _UCW_CHARTYPE_H
-
-#define _C_UPPER 1 /* Upper-case letters */
-#define _C_LOWER 2 /* Lower-case letters */
-#define _C_PRINT 4 /* Printable */
-#define _C_DIGIT 8 /* Digits */
-#define _C_CTRL 16 /* Control characters */
-#define _C_XDIGIT 32 /* Hexadecimal digits */
-#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */
-#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */
-
-#define _C_ALPHA (_C_UPPER | _C_LOWER)
-#define _C_ALNUM (_C_ALPHA | _C_DIGIT)
-#define _C_WORD (_C_ALNUM | _C_INNER)
-#define _C_WSTART (_C_ALPHA | _C_INNER)
-
-extern const unsigned char _c_cat[256], _c_upper[256], _c_lower[256];
-
-#define Category(x) (_c_cat[(unsigned char)(x)])
-#define Ccat(x,y) (Category(x) & y)
-
-#define Cupper(x) Ccat(x, _C_UPPER)
-#define Clower(x) Ccat(x, _C_LOWER)
-#define Calpha(x) Ccat(x, _C_ALPHA)
-#define Calnum(x) Ccat(x, _C_ALNUM)
-#define Cprint(x) Ccat(x, _C_PRINT)
-#define Cdigit(x) Ccat(x, _C_DIGIT)
-#define Cxdigit(x) Ccat(x, _C_XDIGIT)
-#define Cword(x) Ccat(x, _C_WORD)
-#define Cblank(x) Ccat(x, _C_BLANK)
-#define Cctrl(x) Ccat(x, _C_CTRL)
-#define Cspace(x) Cblank(x)
-
-#define Cupcase(x) _c_upper[(unsigned char)(x)]
-#define Clocase(x) _c_lower[(unsigned char)(x)]
-
-#define Cxvalue(x) (((x)<'A')?((x)-'0'):(((x)&0xdf)-'A'+10))
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Circular Linked Lists
- *
- * (c) 2003--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_CLISTS_H
-#define _UCW_CLISTS_H
-
-typedef struct cnode {
- struct cnode *next, *prev;
-} cnode;
-
-typedef struct clist {
- struct cnode head;
-} clist;
-
-static inline void *clist_head(clist *l)
-{
- return (l->head.next != &l->head) ? l->head.next : NULL;
-}
-
-static inline void *clist_tail(clist *l)
-{
- return (l->head.prev != &l->head) ? l->head.prev : NULL;
-}
-
-static inline void *clist_next(clist *l, cnode *n)
-{
- return (n->next != &l->head) ? (void *) n->next : NULL;
-}
-
-static inline void *clist_prev(clist *l, cnode *n)
-{
- return (n->prev != &l->head) ? (void *) n->prev : NULL;
-}
-
-static inline int clist_empty(clist *l)
-{
- return (l->head.next == &l->head);
-}
-
-#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
-#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
-#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
-#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
-
-#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev)
-
-static inline void clist_insert_after(cnode *what, cnode *after)
-{
- cnode *before = after->next;
- what->next = before;
- what->prev = after;
- before->prev = what;
- after->next = what;
-}
-
-static inline void clist_insert_before(cnode *what, cnode *before)
-{
- cnode *after = before->prev;
- what->next = before;
- what->prev = after;
- before->prev = what;
- after->next = what;
-}
-
-static inline void clist_add_tail(clist *l, cnode *n)
-{
- clist_insert_before(n, &l->head);
-}
-
-static inline void clist_add_head(clist *l, cnode *n)
-{
- clist_insert_after(n, &l->head);
-}
-
-static inline void clist_remove(cnode *n)
-{
- cnode *before = n->prev;
- cnode *after = n->next;
- before->next = after;
- after->prev = before;
-}
-
-static inline void *clist_remove_head(clist *l)
-{
- cnode *n = clist_head(l);
- if (n)
- clist_remove(n);
- return n;
-}
-
-static inline void *clist_remove_tail(clist *l)
-{
- cnode *n = clist_tail(l);
- if (n)
- clist_remove(n);
- return n;
-}
-
-static inline void clist_init(clist *l)
-{
- cnode *head = &l->head;
- head->next = head->prev = head;
-}
-
-static inline void clist_insert_list_after(clist *what, cnode *after)
-{
- if (!clist_empty(what))
- {
- cnode *w = &what->head;
- w->prev->next = after->next;
- after->next->prev = w->prev;
- w->next->prev = after;
- after->next = w->next;
- clist_init(what);
- }
-}
-
-static inline uns clist_size(clist *l)
-{
- uns i = 0;
- CLIST_FOR_EACH(cnode *, n, *l)
- i++;
- return i;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Configuration files: memory allocation
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/mempool.h"
-
-struct mempool *cf_pool; // current pool for loading new configuration
-
-void *
-cf_malloc(uns size)
-{
- return mp_alloc(cf_pool, size);
-}
-
-void *
-cf_malloc_zero(uns size)
-{
- return mp_alloc_zero(cf_pool, size);
-}
-
-char *
-cf_strdup(const char *s)
-{
- return mp_strdup(cf_pool, s);
-}
-
-char *
-cf_printf(const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- char *res = mp_vprintf(cf_pool, fmt, args);
- va_end(args);
- return res;
-}
+++ /dev/null
-/*
- * UCW Library -- Configuration files: dumping
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/conf-internal.h"
-#include "lib/clists.h"
-#include "lib/fastbuf.h"
-
-static void
-spaces(struct fastbuf *fb, uns nr)
-{
- for (uns i=0; i<nr; i++)
- bputs(fb, " ");
-}
-
-static void
-dump_basic(struct fastbuf *fb, void *ptr, enum cf_type type, union cf_union *u)
-{
- switch (type) {
- case CT_INT: bprintf(fb, "%d ", *(uns*)ptr); break;
- case CT_U64: bprintf(fb, "%llu ", (long long) *(u64*)ptr); break;
- case CT_DOUBLE: bprintf(fb, "%lg ", *(double*)ptr); break;
- case CT_IP: bprintf(fb, "%08x ", *(uns*)ptr); break;
- case CT_STRING:
- if (*(char**)ptr)
- bprintf(fb, "'%s' ", *(char**)ptr);
- else
- bprintf(fb, "NULL ");
- break;
- case CT_LOOKUP: bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : "???"); break;
- case CT_USER:
- if (u->utype->dumper)
- u->utype->dumper(fb, ptr);
- else
- bprintf(fb, "??? ");
- break;
- }
-}
-
-static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr);
-
-static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
-
-static void
-dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr)
-{
- ptr += (uintptr_t) item->ptr;
- enum cf_type type = item->type;
- uns size = cf_type_size(item->type, item->u.utype);
- int i;
- spaces(fb, level);
- bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]);
- if (item->number == CF_ANY_NUM)
- bputs(fb, "any ");
- else
- bprintf(fb, "%d ", item->number);
- if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) {
- bprintf(fb, "T%s ", cf_type_names[type]);
- if (item->type == CT_USER)
- bprintf(fb, "U%s S%d ", item->u.utype->name, size);
- }
- if (item->cls == CC_STATIC) {
- for (i=0; i<item->number; i++)
- dump_basic(fb, ptr + i * size, type, &item->u);
- } else if (item->cls == CC_DYNAMIC) {
- ptr = * (void**) ptr;
- if (ptr) {
- int real_nr = DARY_LEN(ptr);
- bprintf(fb, "N%d ", real_nr);
- for (i=0; i<real_nr; i++)
- dump_basic(fb, ptr + i * size, type, &item->u);
- } else
- bprintf(fb, "NULL ");
- } else if (item->cls == CC_BITMAP) {
- u32 mask = * (u32*) ptr;
- for (i=0; i<32; i++) {
- if (item->type == CT_LOOKUP && !item->u.lookup[i])
- break;
- if (mask & (1<<i)) {
- if (item->type == CT_INT)
- bprintf(fb, "%d ", i);
- else if (item->type == CT_LOOKUP)
- bprintf(fb, "%s ", item->u.lookup[i]);
- }
- }
- }
- bputc(fb, '\n');
- if (item->cls == CC_SECTION)
- dump_section(fb, item->u.sec, level+1, ptr);
- else if (item->cls == CC_LIST) {
- uns idx = 0;
- CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) {
- spaces(fb, level+1);
- bprintf(fb, "item %d\n", ++idx);
- dump_section(fb, item->u.sec, level+2, n);
- }
- }
-}
-
-static void
-dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr)
-{
- spaces(fb, level);
- bprintf(fb, "S%d F%x:\n", sec->size, sec->flags);
- for (struct cf_item *item=sec->cfg; item->cls; item++)
- dump_item(fb, item, level, ptr);
-}
-
-void
-cf_dump_sections(struct fastbuf *fb)
-{
- dump_section(fb, &cf_sections, 0, NULL);
-}
-
+++ /dev/null
-/*
- * UCW Library -- Configuration files: parsing input streams
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/conf-internal.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-#include "lib/chartype.h"
-#include "lib/stkstring.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-
-/* Text file parser */
-
-static const char *name_parse_fb;
-static struct fastbuf *parse_fb;
-static uns line_num;
-
-#define MAX_LINE 4096
-static char line_buf[MAX_LINE];
-static char *line = line_buf;
-
-#include "lib/bbuf.h"
-static bb_t copy_buf;
-static uns copied;
-
-#define GBUF_TYPE uns
-#define GBUF_PREFIX(x) split_##x
-#include "lib/gbuf.h"
-static split_t word_buf;
-static uns words;
-static uns ends_by_brace; // the line is ended by "{"
-
-static int
-get_line(char **msg)
-{
- int err = bgets_nodie(parse_fb, line_buf, MAX_LINE);
- line_num++;
- if (err <= 0) {
- *msg = err < 0 ? "Line too long" : NULL;
- return 0;
- }
- line = line_buf;
- while (Cblank(*line))
- line++;
- return 1;
-}
-
-static void
-append(char *start, char *end)
-{
- uns len = end - start;
- bb_grow(©_buf, copied + len + 1);
- memcpy(copy_buf.ptr + copied, start, len);
- copied += len + 1;
- copy_buf.ptr[copied-1] = 0;
-}
-
-static char *
-get_word(uns is_command_name)
-{
- char *msg;
- if (*line == '\'') {
- line++;
- while (1) {
- char *start = line;
- while (*line && *line != '\'')
- line++;
- append(start, line);
- if (*line)
- break;
- copy_buf.ptr[copied-1] = '\n';
- if (!get_line(&msg))
- return msg ? : "Unterminated apostrophe word at the end";
- }
- line++;
-
- } else if (*line == '"') {
- line++;
- uns start_copy = copied;
- while (1) {
- char *start = line;
- uns escape = 0;
- while (*line) {
- if (*line == '"' && !escape)
- break;
- else if (*line == '\\')
- escape ^= 1;
- else
- escape = 0;
- line++;
- }
- append(start, line);
- if (*line)
- break;
- if (!escape)
- copy_buf.ptr[copied-1] = '\n';
- else // merge two lines
- copied -= 2;
- if (!get_line(&msg))
- return msg ? : "Unterminated quoted word at the end";
- }
- line++;
-
- char *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
- uns l = strlen(tmp);
- bb_grow(©_buf, start_copy + l + 1);
- strcpy(copy_buf.ptr + start_copy, tmp);
- copied = start_copy + l + 1;
-
- } else {
- // promised that *line is non-null and non-blank
- char *start = line;
- while (*line && !Cblank(*line)
- && *line != '{' && *line != '}' && *line != ';'
- && (*line != '=' || !is_command_name))
- line++;
- if (*line == '=') { // nice for setting from a command-line
- if (line == start)
- return "Assignment without a variable";
- *line = ' ';
- }
- if (line == start) // already the first char is control
- line++;
- append(start, line);
- }
- while (Cblank(*line))
- line++;
- return NULL;
-}
-
-static char *
-get_token(uns is_command_name, char **err)
-{
- *err = NULL;
- while (1) {
- if (!*line || *line == '#') {
- if (!is_command_name || !get_line(err))
- return NULL;
- } else if (*line == ';') {
- *err = get_word(0);
- if (!is_command_name || *err)
- return NULL;
- } else if (*line == '\\' && !line[1]) {
- if (!get_line(err)) {
- if (!*err)
- *err = "Last line ends by a backslash";
- return NULL;
- }
- if (!*line || *line == '#')
- msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num);
- } else {
- split_grow(&word_buf, words+1);
- uns start = copied;
- word_buf.ptr[words++] = copied;
- *err = get_word(is_command_name);
- return *err ? NULL : copy_buf.ptr + start;
- }
- }
-}
-
-static char *
-split_command(void)
-{
- words = copied = ends_by_brace = 0;
- char *msg, *start_word;
- if (!(start_word = get_token(1, &msg)))
- return msg;
- if (*start_word == '{') // only one opening brace
- return "Unexpected opening brace";
- while (*line != '}') // stays for the next time
- {
- if (!(start_word = get_token(0, &msg)))
- return msg;
- if (*start_word == '{') {
- words--; // discard the brace
- ends_by_brace = 1;
- break;
- }
- }
- return NULL;
-}
-
-/* Parsing multiple files */
-
-static char *
-parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth)
-{
- char *err;
- name_parse_fb = name_fb;
- parse_fb = fb;
- line_num = 0;
- line = line_buf;
- *line = 0;
- while (1)
- {
- err = split_command();
- if (err)
- goto error;
- if (!words)
- return NULL;
- char *name = copy_buf.ptr + word_buf.ptr[0];
- char *pars[words-1];
- for (uns i=1; i<words; i++)
- pars[i-1] = copy_buf.ptr + word_buf.ptr[i];
- if (!strcasecmp(name, "include"))
- {
- if (words != 2)
- err = "Expecting one filename";
- else if (depth > 8)
- err = "Too many nested files";
- else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared
- err = "The input command must be the last one on a line";
- if (err)
- goto error;
- struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14);
- if (!new_fb) {
- err = cf_printf("Cannot open file %s: %m", pars[0]);
- goto error;
- }
- uns ll = line_num;
- err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
- line_num = ll;
- bclose(new_fb);
- if (err)
- goto error;
- parse_fb = fb;
- continue;
- }
- enum cf_operation op;
- char *c = strchr(name, ':');
- if (!c)
- op = strcmp(name, "}") ? OP_SET : OP_CLOSE;
- else {
- *c++ = 0;
- switch (Clocase(*c)) {
- case 's': op = OP_SET; break;
- case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break;
- case 'a': switch (Clocase(c[1])) {
- case 'p': op = OP_APPEND; break;
- case 'f': op = OP_AFTER; break;
- default: op = OP_ALL;
- }; break;
- case 'p': op = OP_PREPEND; break;
- case 'r': op = OP_REMOVE; break;
- case 'e': op = OP_EDIT; break;
- case 'b': op = OP_BEFORE; break;
- default: op = OP_SET; break;
- }
- if (strcasecmp(c, cf_op_names[op])) {
- err = cf_printf("Unknown operation %s", c);
- goto error;
- }
- }
- if (ends_by_brace)
- op |= OP_OPEN;
- err = cf_interpret_line(name, op, words-1, pars);
- if (err)
- goto error;
- }
-error:
- if (name_fb)
- msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err);
- else if (line_num == 1)
- msg(L_ERROR, "Manual setting of configuration: %s", err);
- else
- msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err);
- return "included from here";
-}
-
-#ifndef DEFAULT_CONFIG
-#define DEFAULT_CONFIG NULL
-#endif
-char *cf_def_file = DEFAULT_CONFIG;
-
-#ifndef ENV_VAR_CONFIG
-#define ENV_VAR_CONFIG NULL
-#endif
-char *cf_env_file = ENV_VAR_CONFIG;
-
-static uns postpone_commit; // only for cf_getopt()
-static uns everything_committed; // after the 1st load, this flag is set on
-
-static int
-done_stack(void)
-{
- if (cf_check_stack())
- return 1;
- if (cf_commit_all(postpone_commit ? CF_NO_COMMIT : everything_committed ? CF_COMMIT : CF_COMMIT_ALL))
- return 1;
- if (!postpone_commit)
- everything_committed = 1;
- return 0;
-}
-
-static int
-load_file(const char *file)
-{
- cf_init_stack();
- struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14);
- if (!fb) {
- msg(L_ERROR, "Cannot open %s: %m", file);
- return 1;
- }
- char *err_msg = parse_fastbuf(file, fb, 0);
- bclose(fb);
- int err = !!err_msg || done_stack();
- if (!err)
- cf_def_file = NULL;
- return err;
-}
-
-static int
-load_string(const char *string)
-{
- cf_init_stack();
- struct fastbuf fb;
- fbbuf_init_read(&fb, (byte *)string, strlen(string), 0);
- char *msg = parse_fastbuf(NULL, &fb, 0);
- return !!msg || done_stack();
-}
-
-/* Safe loading and reloading */
-
-int
-cf_reload(const char *file)
-{
- cf_journal_swap();
- struct cf_journal_item *oldj = cf_journal_new_transaction(1);
- uns ec = everything_committed;
- everything_committed = 0;
- int err = load_file(file);
- if (!err)
- {
- cf_journal_delete();
- cf_journal_commit_transaction(1, NULL);
- }
- else
- {
- everything_committed = ec;
- cf_journal_rollback_transaction(1, oldj);
- cf_journal_swap();
- }
- return err;
-}
-
-int
-cf_load(const char *file)
-{
- struct cf_journal_item *oldj = cf_journal_new_transaction(1);
- int err = load_file(file);
- if (!err)
- cf_journal_commit_transaction(1, oldj);
- else
- cf_journal_rollback_transaction(1, oldj);
- return err;
-}
-
-int
-cf_set(const char *string)
-{
- struct cf_journal_item *oldj = cf_journal_new_transaction(0);
- int err = load_string(string);
- if (!err)
- cf_journal_commit_transaction(0, oldj);
- else
- cf_journal_rollback_transaction(0, oldj);
- return err;
-}
-
-/* Command-line parser */
-
-static void
-load_default(void)
-{
- if (cf_def_file)
- {
- char *env;
- if (cf_env_file && (env = getenv(cf_env_file)))
- {
- if (cf_load(env))
- die("Cannot load config file %s", env);
- }
- else if (cf_load(cf_def_file))
- die("Cannot load default config %s", cf_def_file);
- }
- else
- {
- // We need to create an empty pool
- cf_journal_commit_transaction(1, cf_journal_new_transaction(1));
- }
-}
-
-static void
-final_commit(void)
-{
- if (postpone_commit) {
- postpone_commit = 0;
- if (done_stack())
- die("Cannot commit after the initialization");
- }
-}
-
-int
-cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index)
-{
- static int other_options = 0;
- while (1) {
- int res = getopt_long (argc, argv, short_opts, long_opts, long_index);
- if (res == 'S' || res == 'C' || res == 0x64436667)
- {
- if (other_options)
- die("The -S and -C options must precede all other arguments");
- if (res == 'S') {
- postpone_commit = 1;
- load_default();
- if (cf_set(optarg))
- die("Cannot set %s", optarg);
- } else if (res == 'C') {
- postpone_commit = 1;
- if (cf_load(optarg))
- die("Cannot load config file %s", optarg);
- }
-#ifdef CONFIG_DEBUG
- else { /* --dumpconfig */
- load_default();
- final_commit();
- struct fastbuf *b = bfdopen(1, 4096);
- cf_dump_sections(b);
- bclose(b);
- exit(0);
- }
-#endif
- } else {
- /* unhandled option or end of options */
- if (res != ':' && res != '?')
- load_default();
- final_commit();
- other_options++;
- return res;
- }
- }
-}
-
+++ /dev/null
-/*
- * UCW Library -- Configuration files: only for internal use of conf-*.c
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_CONF_INTERNAL_H
-#define _UCW_CONF_INTERNAL_H
-
-/* conf-intr.c */
-#define OP_MASK 0xff // only get the operation
-#define OP_OPEN 0x100 // here we only get an opening brace instead of parameters
-#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask
-#define OP_2ND 0x400 // in the 2nd phase real data are entered
-enum cf_operation;
-extern char *cf_op_names[];
-extern char *cf_type_names[];
-
-uns cf_type_size(enum cf_type type, struct cf_user_type *utype);
-char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars);
-void cf_init_stack(void);
-int cf_check_stack(void);
-
-/* conf-journal.c */
-void cf_journal_swap(void);
-void cf_journal_delete(void);
-
-/* conf-section.c */
-#define SEC_FLAG_DYNAMIC 0x80000000 // contains a dynamic attribute
-#define SEC_FLAG_UNKNOWN 0x40000000 // ignore unknown entriies
-#define SEC_FLAG_CANT_COPY 0x20000000 // contains lists or parsers
-#define SEC_FLAG_NUMBER 0x0fffffff // number of entries
-enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL };
-extern struct cf_section cf_sections;
-
-struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name);
-int cf_commit_all(enum cf_commit_mode cm);
-void cf_add_dirty(struct cf_section *sec, void *ptr);
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Configuration files: interpreter
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/conf-internal.h"
-#include "lib/clists.h"
-
-#include <string.h>
-#include <stdio.h>
-
-#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0)
-
-/* Register size of and parser for each basic type */
-
-static char *
-cf_parse_string(char *str, char **ptr)
-{
- *ptr = cf_strdup(str);
- return NULL;
-}
-
-typedef char *cf_basic_parser(char *str, void *ptr);
-static struct {
- uns size;
- void *parser;
-} parsers[] = {
- { sizeof(int), cf_parse_int },
- { sizeof(u64), cf_parse_u64 },
- { sizeof(double), cf_parse_double },
- { sizeof(u32), cf_parse_ip },
- { sizeof(char*), cf_parse_string },
- { sizeof(int), NULL }, // lookups are parsed extra
- { 0, NULL }, // user-defined types are parsed extra
-};
-
-inline uns
-cf_type_size(enum cf_type type, struct cf_user_type *utype)
-{
- if (type < CT_USER)
- return parsers[type].size;
- else
- return utype->size;
-}
-
-static char *
-cf_parse_lookup(char *str, int *ptr, char **t)
-{
- char **n = t;
- uns total_len = 0;
- while (*n && strcasecmp(*n, str)) {
- total_len += strlen(*n) + 2;
- n++;
- }
- if (*n) {
- *ptr = n - t;
- return NULL;
- }
- char *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
- c += sprintf(err, "Invalid value %s, possible values are: ", str);
- for (n=t; *n; n++)
- c+= sprintf(c, "%s, ", *n);
- if (*t)
- c[-2] = 0;
- *ptr = -1;
- return err;
-}
-
-static char *
-cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u)
-{
- for (uns i=0; i<number; i++)
- {
- char *msg;
- uns size = cf_type_size(type, u->utype);
- if (type < CT_LOOKUP)
- msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size);
- else if (type == CT_LOOKUP)
- msg = cf_parse_lookup(pars[i], ptr + i * size, u->lookup);
- else if (type == CT_USER)
- msg = u->utype->parser(pars[i], ptr + i * size);
- else
- ASSERT(0);
- if (msg)
- return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg;
- }
- return NULL;
-}
-
-/* Interpreter */
-
-#define T(x) #x,
-char *cf_op_names[] = { CF_OPERATIONS };
-#undef T
-char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
-
-#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN)
-
-static char *
-interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr)
-{
- enum cf_type type = item->type;
- cf_journal_block(ptr, sizeof(void*));
- // boundary checks done by the caller
- uns size = cf_type_size(item->type, item->u.utype);
- *ptr = cf_malloc(DARY_HDR_SIZE + number * size) + DARY_HDR_SIZE;
- DARY_LEN(*ptr) = number;
- return cf_parse_ary(number, pars, *ptr, type, &item->u);
-}
-
-static char *
-interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op)
-{
- enum cf_type type = item->type;
- void *old_p = *ptr;
- uns size = cf_type_size(item->type, item->u.utype);
- ASSERT(size >= sizeof(uns));
- int old_nr = old_p ? DARY_LEN(old_p) : 0;
- int taken = MIN(number, ABS(item->number)-old_nr);
- *processed = taken;
- // stretch the dynamic array
- void *new_p = cf_malloc(DARY_HDR_SIZE + (old_nr + taken) * size) + DARY_HDR_SIZE;
- DARY_LEN(new_p) = old_nr + taken;
- cf_journal_block(ptr, sizeof(void*));
- *ptr = new_p;
- if (op == OP_APPEND) {
- memcpy(new_p, old_p, old_nr * size);
- return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u);
- } else if (op == OP_PREPEND) {
- memcpy(new_p + taken * size, old_p, old_nr * size);
- return cf_parse_ary(taken, pars, new_p, type, &item->u);
- } else
- return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]);
-}
-
-static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic);
-
-static char *
-interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
-{
- cf_add_dirty(sec, ptr);
- *processed = 0;
- for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
- {
- int taken;
- char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
- if (msg)
- return cf_printf("Item %s: %s", ci->name, msg);
- *processed += taken;
- number -= taken;
- pars += taken;
- if (!number) // stop parsing, because many parsers would otherwise complain that number==0
- break;
- }
- return NULL;
-}
-
-static void
-add_to_list(cnode *where, cnode *new_node, enum cf_operation op)
-{
- switch (op)
- {
- case OP_EDIT: // edition has been done in-place
- break;
- case OP_REMOVE:
- CF_JOURNAL_VAR(where->prev->next);
- CF_JOURNAL_VAR(where->next->prev);
- clist_remove(where);
- break;
- case OP_AFTER: // implementation dependend (prepend_head = after(list)), and where==list, see clists.h:74
- case OP_PREPEND:
- case OP_COPY:
- CF_JOURNAL_VAR(where->next->prev);
- CF_JOURNAL_VAR(where->next);
- clist_insert_after(new_node, where);
- break;
- case OP_BEFORE: // implementation dependend (append_tail = before(list))
- case OP_APPEND:
- case OP_SET:
- CF_JOURNAL_VAR(where->prev->next);
- CF_JOURNAL_VAR(where->prev);
- clist_insert_before(new_node, where);
- break;
- default:
- ASSERT(0);
- }
-}
-
-static char *
-interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op)
-{
- if (op >= OP_REMOVE)
- return cf_printf("You have to open a block for operation %s", cf_op_names[op]);
- if (!number)
- return "Nothing to add to the list";
- struct cf_section *sec = item->u.sec;
- *processed = 0;
- uns index = 0;
- while (number > 0)
- {
- void *node = cf_malloc(sec->size);
- cf_init_section(item->name, sec, node, 1);
- add_to_list(ptr, node, op);
- int taken;
- /* If the node contains any dynamic attribute at the end, we suppress
- * auto-repetition here and pass the flag inside instead. */
- index++;
- char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
- if (msg)
- return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg);
- *processed += taken;
- number -= taken;
- pars += taken;
- if (sec->flags & SEC_FLAG_DYNAMIC)
- break;
- }
- return NULL;
-}
-
-static char *
-interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op)
-{
- if (op != OP_SET && op != OP_REMOVE)
- return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]);
- else if (item->type != CT_INT && item->type != CT_LOOKUP)
- return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]);
- cf_journal_block(ptr, sizeof(u32));
- for (int i=0; i<number; i++) {
- uns idx;
- if (item->type == CT_INT)
- TRY( cf_parse_int(pars[i], &idx) );
- else
- TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) );
- if (idx >= 32)
- return "Bitmaps only have 32 bits";
- if (op == OP_SET)
- *ptr |= 1<<idx;
- else
- *ptr &= ~(1<<idx);
- }
- *processed = number;
- return NULL;
-}
-
-static char *
-interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
-{
- int taken;
- switch (item->cls)
- {
- case CC_STATIC:
- if (!number)
- return "Missing value";
- taken = MIN(number, item->number);
- *processed = taken;
- uns size = cf_type_size(item->type, item->u.utype);
- cf_journal_block(ptr, taken * size);
- return cf_parse_ary(taken, pars, ptr, item->type, &item->u);
- case CC_DYNAMIC:
- if (!allow_dynamic)
- return "Dynamic array cannot be used here";
- taken = MIN(number, ABS(item->number));
- *processed = taken;
- return interpret_set_dynamic(item, taken, pars, ptr);
- case CC_PARSER:
- if (item->number < 0 && !allow_dynamic)
- return "Parsers with variable number of parameters cannot be used here";
- if (item->number > 0 && number < item->number)
- return "Not enough parameters available for the parser";
- taken = MIN(number, ABS(item->number));
- *processed = taken;
- for (int i=0; i<taken; i++)
- pars[i] = cf_strdup(pars[i]);
- return item->u.par(taken, pars, ptr);
- case CC_SECTION:
- return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic);
- case CC_LIST:
- if (!allow_dynamic)
- return "Lists cannot be used here";
- return interpret_add_list(item, number, pars, processed, ptr, OP_SET);
- case CC_BITMAP:
- if (!allow_dynamic)
- return "Bitmaps cannot be used here";
- return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET);
- default:
- ASSERT(0);
- }
-}
-
-static char *
-interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op)
-{
- if (item->cls == CC_BITMAP) {
- cf_journal_block(ptr, sizeof(u32));
- if (op == OP_CLEAR)
- * (u32*) ptr = 0;
- else
- if (item->type == CT_INT)
- * (u32*) ptr = ~0u;
- else {
- uns nr = -1;
- while (item->u.lookup[++nr]);
- * (u32*) ptr = ~0u >> (32-nr);
- }
- return NULL;
- } else if (op != OP_CLEAR)
- return "The item is not a bitmap";
-
- if (item->cls == CC_LIST) {
- cf_journal_block(ptr, sizeof(clist));
- clist_init(ptr);
- } else if (item->cls == CC_DYNAMIC) {
- cf_journal_block(ptr, sizeof(void *));
- static uns zero = 0;
- * (void**) ptr = (&zero) + 1;
- } else if (item->cls == CC_STATIC && item->type == CT_STRING) {
- cf_journal_block(ptr, item->number * sizeof(char*));
- bzero(ptr, item->number * sizeof(char*));
- } else
- return "The item is not a list, dynamic array, bitmap, or string";
- return NULL;
-}
-
-static int
-cmp_items(void *i1, void *i2, struct cf_item *item)
-{
- ASSERT(item->cls == CC_STATIC);
- i1 += (uintptr_t) item->ptr;
- i2 += (uintptr_t) item->ptr;
- if (item->type == CT_STRING)
- return strcmp(* (char**) i1, * (char**) i2);
- else // all numeric types
- return memcmp(i1, i2, cf_type_size(item->type, item->u.utype));
-}
-
-static void *
-find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask)
-{
- CLIST_FOR_EACH(cnode *, n, *list)
- {
- uns found = 1;
- for (uns i=0; i<32; i++)
- if (mask & (1<<i))
- if (cmp_items(n, query, sec->cfg+i))
- {
- found = 0;
- break;
- }
- if (found)
- return n;
- }
- return NULL;
-}
-
-static char *
-record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask)
-{
- uns nr = sec->flags & SEC_FLAG_NUMBER;
- if (item >= sec->cfg && item < sec->cfg + nr) // setting an attribute relative to this section
- {
- uns i = item - sec->cfg;
- if (i >= 32)
- return "Cannot select list nodes by this attribute";
- if (sec->cfg[i].cls != CC_STATIC)
- return "Selection can only be done based on basic attributes";
- *mask |= 1 << i;
- }
- return NULL;
-}
-
-#define MAX_STACK_SIZE 10
-static struct item_stack {
- struct cf_section *sec; // nested section
- void *base_ptr; // because original pointers are often relative
- enum cf_operation op; // it is performed when a closing brace is encountered
- void *list; // list the operations should be done on
- u32 mask; // bit array of selectors searching in a list
- struct cf_item *item; // cf_item of the list
-} stack[MAX_STACK_SIZE];
-static uns level;
-
-static char *
-opening_brace(struct cf_item *item, void *ptr, enum cf_operation op)
-{
- if (level >= MAX_STACK_SIZE-1)
- return "Too many nested sections";
- enum cf_operation pure_op = op & OP_MASK;
- stack[++level] = (struct item_stack) {
- .sec = NULL,
- .base_ptr = NULL,
- .op = pure_op,
- .list = NULL,
- .mask = 0,
- .item = NULL,
- };
- if (!item) // unknown is ignored; we just need to trace recursion
- return NULL;
- stack[level].sec = item->u.sec;
- if (item->cls == CC_SECTION)
- {
- if (pure_op != OP_SET)
- return "Only SET operation can be used with a section";
- stack[level].base_ptr = ptr;
- stack[level].op = OP_EDIT | OP_2ND; // this list operation does nothing
- }
- else if (item->cls == CC_LIST)
- {
- stack[level].base_ptr = cf_malloc(item->u.sec->size);
- cf_init_section(item->name, item->u.sec, stack[level].base_ptr, 1);
- stack[level].list = ptr;
- stack[level].item = item;
- if (pure_op == OP_ALL)
- return "Operation ALL cannot be applied on lists";
- else if (pure_op < OP_REMOVE) {
- add_to_list(ptr, stack[level].base_ptr, pure_op);
- stack[level].op |= OP_2ND;
- } else
- stack[level].op |= OP_1ST;
- }
- else
- return "Opening brace can only be used on sections and lists";
- return NULL;
-}
-
-static char *
-closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars)
-{
- if (st->op == OP_CLOSE) // top-level
- return "Unmatched } parenthesis";
- if (!st->sec) { // dummy run on unknown section
- if (!(op & OP_OPEN))
- level--;
- return NULL;
- }
- enum cf_operation pure_op = st->op & OP_MASK;
- if (st->op & OP_1ST)
- {
- st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask);
- if (!st->list)
- return "Cannot find a node matching the query";
- if (pure_op != OP_REMOVE)
- {
- if (pure_op == OP_EDIT)
- st->base_ptr = st->list;
- else if (pure_op == OP_AFTER || pure_op == OP_BEFORE)
- cf_init_section(st->item->name, st->sec, st->base_ptr, 1);
- else if (pure_op == OP_COPY) {
- if (st->sec->flags & SEC_FLAG_CANT_COPY)
- return cf_printf("Item %s cannot be copied", st->item->name);
- memcpy(st->base_ptr, st->list, st->sec->size); // strings and dynamic arrays are shared
- if (st->sec->copy)
- TRY( st->sec->copy(st->base_ptr, st->list) );
- } else
- ASSERT(0);
- if (op & OP_OPEN) { // stay at the same recursion level
- st->op = (st->op | OP_2ND) & ~OP_1ST;
- add_to_list(st->list, st->base_ptr, pure_op);
- return NULL;
- }
- int taken; // parse parameters on 1 line immediately
- TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) );
- number -= taken;
- pars += taken;
- // and fall-thru to the 2nd phase
- }
- add_to_list(st->list, st->base_ptr, pure_op);
- }
- level--;
- if (number)
- return "No parameters expected after the }";
- else if (op & OP_OPEN)
- return "No { is expected";
- else
- return NULL;
-}
-
-static struct cf_item *
-find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr)
-{
- *msg = NULL;
- if (name[0] == '^') // absolute name instead of relative
- name++, curr_sec = &cf_sections, *ptr = NULL;
- if (!curr_sec) // don't even search in an unknown section
- return NULL;
- while (1)
- {
- if (curr_sec != &cf_sections)
- cf_add_dirty(curr_sec, *ptr);
- char *c = strchr(name, '.');
- if (c)
- *c++ = 0;
- struct cf_item *ci = cf_find_subitem(curr_sec, name);
- if (!ci->cls)
- {
- if (!(curr_sec->flags & SEC_FLAG_UNKNOWN)) // ignore silently unknown top-level sections and unknown attributes in flagged sections
- *msg = cf_printf("Unknown item %s", name);
- return NULL;
- }
- *ptr += (uintptr_t) ci->ptr;
- if (!c)
- return ci;
- if (ci->cls != CC_SECTION)
- {
- *msg = cf_printf("Item %s is not a section", name);
- return NULL;
- }
- curr_sec = ci->u.sec;
- name = c;
- }
-}
-
-char *
-cf_interpret_line(char *name, enum cf_operation op, int number, char **pars)
-{
- char *msg;
- if ((op & OP_MASK) == OP_CLOSE)
- return closing_brace(stack+level, op, number, pars);
- void *ptr = stack[level].base_ptr;
- struct cf_item *item = find_item(stack[level].sec, name, &msg, &ptr);
- if (msg)
- return msg;
- if (stack[level].op & OP_1ST)
- TRY( record_selector(item, stack[level].sec, &stack[level].mask) );
- if (op & OP_OPEN) { // the operation will be performed after the closing brace
- if (number)
- return "Cannot open a block after a parameter has been passed on a line";
- return opening_brace(item, ptr, op);
- }
- if (!item) // ignored item in an unknown section
- return NULL;
- op &= OP_MASK;
-
- int taken = 0; // process as many parameters as possible
- if (op == OP_CLEAR || op == OP_ALL)
- msg = interpret_set_all(item, ptr, op);
- else if (op == OP_SET)
- msg = interpret_set_item(item, number, pars, &taken, ptr, 1);
- else if (item->cls == CC_DYNAMIC)
- msg = interpret_add_dynamic(item, number, pars, &taken, ptr, op);
- else if (item->cls == CC_LIST)
- msg = interpret_add_list(item, number, pars, &taken, ptr, op);
- else if (item->cls == CC_BITMAP)
- msg = interpret_add_bitmap(item, number, pars, &taken, ptr, op);
- else
- return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name);
- if (msg)
- return msg;
- if (taken < number)
- return cf_printf("Too many parameters: %d>%d", number, taken);
-
- return NULL;
-}
-
-char *
-cf_find_item(const char *name, struct cf_item *item)
-{
- char *msg;
- void *ptr = NULL;
- struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr);
- if (msg)
- return msg;
- if (ci) {
- *item = *ci;
- item->ptr = ptr;
- } else
- bzero(item, sizeof(struct cf_item));
- return NULL;
-}
-
-char *
-cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars)
-{
- char *msg;
- int taken = 0;
- switch (op) {
- case OP_SET:
- msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1);
- break;
- case OP_CLEAR:
- case OP_ALL:
- msg = interpret_set_all(item, item->ptr, op);
- break;
- case OP_APPEND:
- case OP_PREPEND:
- if (item->cls == CC_DYNAMIC)
- msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op);
- else if (item->cls == CC_LIST)
- msg = interpret_add_list(item, number, pars, &taken, item->ptr, op);
- else
- return "The attribute does not support append/prepend";
- break;
- case OP_REMOVE:
- if (item->cls == CC_BITMAP)
- msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op);
- else
- return "Only applicable on bitmaps";
- break;
- default:
- return "Unsupported operation";
- }
- if (msg)
- return msg;
- if (taken < number)
- return "Too many parameters";
- return NULL;
-}
-
-void
-cf_init_stack(void)
-{
- static uns initialized = 0;
- if (!initialized++) {
- cf_sections.flags |= SEC_FLAG_UNKNOWN;
- cf_sections.size = 0; // size of allocated array used to be stored here
- cf_init_section(NULL, &cf_sections, NULL, 0);
- }
- level = 0;
- stack[0] = (struct item_stack) {
- .sec = &cf_sections,
- .base_ptr = NULL,
- .op = OP_CLOSE,
- .list = NULL,
- .mask = 0,
- .item = NULL
- };
-}
-
-int
-cf_check_stack(void)
-{
- if (level > 0) {
- msg(L_ERROR, "Unterminated block");
- return 1;
- }
- return 0;
-}
-
+++ /dev/null
-/*
- * UCW Library -- Configuration files: journaling
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/conf-internal.h"
-#include "lib/mempool.h"
-
-#include <string.h>
-
-static struct old_pools {
- struct old_pools *prev;
- struct mempool *pool;
-} *pools; // link-list of older cf_pool's
-
-uns cf_need_journal = 1; // some programs do not need journal
-static struct cf_journal_item {
- struct cf_journal_item *prev;
- byte *ptr;
- uns len;
- byte copy[0];
-} *journal;
-
-void
-cf_journal_block(void *ptr, uns len)
-{
- if (!cf_need_journal)
- return;
- struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len);
- ji->prev = journal;
- ji->ptr = ptr;
- ji->len = len;
- memcpy(ji->copy, ptr, len);
- journal = ji;
-}
-
-void
-cf_journal_swap(void)
- // swaps the contents of the memory and the journal, and reverses the list
-{
- struct cf_journal_item *curr, *prev, *next;
- for (next=NULL, curr=journal; curr; next=curr, curr=prev)
- {
- prev = curr->prev;
- curr->prev = next;
- for (uns i=0; i<curr->len; i++)
- {
- byte x = curr->copy[i];
- curr->copy[i] = curr->ptr[i];
- curr->ptr[i] = x;
- }
- }
- journal = next;
-}
-
-struct cf_journal_item *
-cf_journal_new_transaction(uns new_pool)
-{
- if (new_pool)
- cf_pool = mp_new(1<<10);
- struct cf_journal_item *oldj = journal;
- journal = NULL;
- return oldj;
-}
-
-void
-cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj)
-{
- if (new_pool)
- {
- struct old_pools *p = cf_malloc(sizeof(struct old_pools));
- p->prev = pools;
- p->pool = cf_pool;
- pools = p;
- }
- if (oldj)
- {
- struct cf_journal_item **j = &journal;
- while (*j)
- j = &(*j)->prev;
- *j = oldj;
- }
-}
-
-void
-cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj)
-{
- if (!cf_need_journal)
- die("Cannot rollback the configuration, because the journal is disabled.");
- cf_journal_swap();
- journal = oldj;
- if (new_pool)
- {
- mp_delete(cf_pool);
- cf_pool = pools ? pools->pool : NULL;
- }
-}
-
-void
-cf_journal_delete(void)
-{
- for (struct old_pools *p=pools; p; p=pools)
- {
- pools = p->prev;
- mp_delete(p->pool);
- }
-}
-
-/* TODO: more space efficient journal */
+++ /dev/null
-/*
- * UCW Library -- Configuration files: parsers for basic types
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/chartype.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-
-struct unit {
- uns name; // one-letter name of the unit
- uns num, den; // fraction
-};
-
-static const struct unit units[] = {
- { 'd', 86400, 1 },
- { 'h', 3600, 1 },
- { 'k', 1000, 1 },
- { 'm', 1000000, 1 },
- { 'g', 1000000000, 1 },
- { 'K', 1024, 1 },
- { 'M', 1048576, 1 },
- { 'G', 1073741824, 1 },
- { '%', 1, 100 },
- { 0, 0, 0 }
-};
-
-static const struct unit *
-lookup_unit(const char *value, const char *end, char **msg)
-{
- if (end && *end) {
- if (end == value || end[1] || *end >= '0' && *end <= '9')
- *msg = "Invalid number";
- else {
- for (const struct unit *u=units; u->name; u++)
- if ((char)u->name == *end)
- return u;
- *msg = "Invalid unit";
- }
- }
- return NULL;
-}
-
-static char cf_rngerr[] = "Number out of range";
-
-char *
-cf_parse_int(const char *str, int *ptr)
-{
- char *msg = NULL;
- if (!*str)
- msg = "Missing number";
- else {
- const struct unit *u;
- char *end;
- errno = 0;
- uns x = strtoul(str, &end, 0);
- if (errno == ERANGE)
- msg = cf_rngerr;
- else if (u = lookup_unit(str, end, &msg)) {
- u64 y = (u64)x * u->num;
- if (y % u->den)
- msg = "Number is not an integer";
- else {
- y /= u->den;
- if (y > 0xffffffff)
- msg = cf_rngerr;
- *ptr = y;
- }
- } else
- *ptr = x;
- }
- return msg;
-}
-
-char *
-cf_parse_u64(const char *str, u64 *ptr)
-{
- char *msg = NULL;
- if (!*str)
- msg = "Missing number";
- else {
- const struct unit *u;
- char *end;
- errno = 0;
- u64 x = strtoull(str, &end, 0);
- if (errno == ERANGE)
- msg = cf_rngerr;
- else if (u = lookup_unit(str, end, &msg)) {
- if (x > ~(u64)0 / u->num)
- msg = "Number out of range";
- else {
- x *= u->num;
- if (x % u->den)
- msg = "Number is not an integer";
- else
- *ptr = x / u->den;
- }
- } else
- *ptr = x;
- }
- return msg;
-}
-
-char *
-cf_parse_double(const char *str, double *ptr)
-{
- char *msg = NULL;
- if (!*str)
- msg = "Missing number";
- else {
- const struct unit *u;
- double x;
- uns read_chars;
- if (sscanf(str, "%lf%n", &x, &read_chars) != 1)
- msg = "Invalid number";
- else if (u = lookup_unit(str, str + read_chars, &msg))
- *ptr = x * u->num / u->den;
- else
- *ptr = x;
- }
- return msg;
-}
-
-char *
-cf_parse_ip(const char *p, u32 *varp)
-{
- if (!*p)
- return "Missing IP address";
- uns x = 0;
- char *p2;
- if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) {
- errno = 0;
- x = strtoul(p, &p2, 16);
- if (errno == ERANGE || x > 0xffffffff)
- goto error;
- p = p2;
- }
- else
- for (uns i = 0; i < 4; i++) {
- if (i) {
- if (*p++ != '.')
- goto error;
- }
- if (!Cdigit(*p))
- goto error;
- errno = 0;
- uns y = strtoul(p, &p2, 10);
- if (errno == ERANGE || p2 == (char*) p || y > 255)
- goto error;
- p = p2;
- x = (x << 8) + y;
- }
- *varp = x;
- return *p ? "Trailing characters" : NULL;
-error:
- return "Invalid IP address";
-}
-
+++ /dev/null
-/*
- * UCW Library -- Configuration files: sections
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/conf-internal.h"
-#include "lib/clists.h"
-#include "lib/binsearch.h"
-
-#include <string.h>
-
-/* Dirty sections */
-
-struct dirty_section {
- struct cf_section *sec;
- void *ptr;
-};
-#define GBUF_TYPE struct dirty_section
-#define GBUF_PREFIX(x) dirtsec_##x
-#include "lib/gbuf.h"
-static dirtsec_t dirty;
-static uns dirties;
-
-void
-cf_add_dirty(struct cf_section *sec, void *ptr)
-{
- dirtsec_grow(&dirty, dirties+1);
- struct dirty_section *dest = dirty.ptr + dirties;
- if (dirties && dest[-1].sec == sec && dest[-1].ptr == ptr)
- return;
- dest->sec = sec;
- dest->ptr = ptr;
- dirties++;
-}
-
-#define ASORT_PREFIX(x) dirtsec_##x
-#define ASORT_KEY_TYPE struct dirty_section
-#define ASORT_ELT(i) dirty.ptr[i]
-#define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr
-#include "lib/arraysort.h"
-
-static void
-sort_dirty(void)
-{
- if (dirties <= 1)
- return;
- dirtsec_sort(dirties);
- // and compress the list
- struct dirty_section *read = dirty.ptr + 1, *write = dirty.ptr + 1, *limit = dirty.ptr + dirties;
- while (read < limit) {
- if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) {
- if (read != write)
- *write = *read;
- write++;
- }
- read++;
- }
- dirties = write - dirty.ptr;
-}
-
-/* Initialization */
-
-struct cf_section cf_sections; // root section
-
-struct cf_item *
-cf_find_subitem(struct cf_section *sec, const char *name)
-{
- struct cf_item *ci = sec->cfg;
- for (; ci->cls; ci++)
- if (!strcasecmp(ci->name, name))
- return ci;
- return ci;
-}
-
-static void
-inspect_section(struct cf_section *sec)
-{
- sec->flags = 0;
- struct cf_item *ci;
- for (ci=sec->cfg; ci->cls; ci++)
- if (ci->cls == CC_SECTION) {
- inspect_section(ci->u.sec);
- sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY);
- } else if (ci->cls == CC_LIST) {
- inspect_section(ci->u.sec);
- sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY;
- } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP)
- sec->flags |= SEC_FLAG_DYNAMIC;
- else if (ci->cls == CC_PARSER) {
- sec->flags |= SEC_FLAG_CANT_COPY;
- if (ci->number < 0)
- sec->flags |= SEC_FLAG_DYNAMIC;
- }
- if (sec->copy)
- sec->flags &= ~SEC_FLAG_CANT_COPY;
- sec->flags |= ci - sec->cfg; // record the number of entries
-}
-
-void
-cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown)
-{
- if (!cf_sections.cfg)
- {
- cf_sections.size = 50;
- cf_sections.cfg = xmalloc_zero(cf_sections.size * sizeof(struct cf_item));
- }
- struct cf_item *ci = cf_find_subitem(&cf_sections, name);
- if (ci->cls)
- die("Cannot register section %s twice", name);
- ci->cls = CC_SECTION;
- ci->name = name;
- ci->number = 1;
- ci->ptr = NULL;
- ci->u.sec = sec;
- inspect_section(sec);
- if (allow_unknown)
- sec->flags |= SEC_FLAG_UNKNOWN;
- ci++;
- if (ci - cf_sections.cfg >= (int) cf_sections.size)
- {
- cf_sections.cfg = xrealloc(cf_sections.cfg, 2*cf_sections.size * sizeof(struct cf_item));
- bzero(cf_sections.cfg + cf_sections.size, cf_sections.size * sizeof(struct cf_item));
- cf_sections.size *= 2;
- }
-}
-
-void
-cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero)
-{
- if (do_bzero) {
- ASSERT(sec->size);
- bzero(ptr, sec->size);
- }
- for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
- if (ci->cls == CC_SECTION)
- cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0);
- else if (ci->cls == CC_LIST)
- clist_init(ptr + (uintptr_t) ci->ptr);
- else if (ci->cls == CC_DYNAMIC) {
- void **dyn = ptr + (uintptr_t) ci->ptr;
- if (!*dyn) { // replace NULL by an empty array
- static uns zero = 0;
- *dyn = (&zero) + 1;
- }
- }
- if (sec->init) {
- char *msg = sec->init(ptr);
- if (msg)
- die("Cannot initialize section %s: %s", name, msg);
- }
-}
-
-static char *
-commit_section(struct cf_section *sec, void *ptr, uns commit_all)
-{
- char *err;
- for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
- if (ci->cls == CC_SECTION) {
- if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) {
- msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
- return "commit of a subsection failed";
- }
- } else if (ci->cls == CC_LIST) {
- uns idx = 0;
- CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr))
- if (idx++, err = commit_section(ci->u.sec, n, commit_all)) {
- msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
- return "commit of a list failed";
- }
- }
- if (sec->commit) {
- /* We have to process the whole tree of sections even if just a few changes
- * have been made, because there are dependencies between commit-hooks and
- * hence we need to call them in a fixed order. */
-#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr
- struct dirty_section comp = { sec, ptr };
- uns pos = BIN_SEARCH_FIRST_GE_CMP(dirty.ptr, dirties, comp, ARY_LT_X);
-
- if (commit_all
- || (pos < dirties && dirty.ptr[pos].sec == sec && dirty.ptr[pos].ptr == ptr))
- return sec->commit(ptr);
- }
- return 0;
-}
-
-int
-cf_commit_all(enum cf_commit_mode cm)
-{
- sort_dirty();
- if (cm == CF_NO_COMMIT)
- return 0;
- if (commit_section(&cf_sections, NULL, cm == CF_COMMIT_ALL))
- return 1;
- dirties = 0;
- return 0;
-}
+++ /dev/null
-/*
- * Insane tester of reading configuration files
- *
- * (c) 2006 Robert Spalek <robert@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/clists.h"
-#include "lib/fastbuf.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-static int verbose;
-
-struct sub_sect_1 {
- cnode n;
- char *name;
- time_t t;
- char *level;
- int confidence[2];
- double *list;
-};
-
-static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) };
-
-static char *
-init_sec_1(struct sub_sect_1 *s)
-{
- if (s == &sec1) // this is a static variable; skip clearing
- return NULL;
- s->name = "unknown";
- s->level = "default";
- s->confidence[0] = 5;
- s->confidence[1] = 6;
- // leave s->list==NULL
- return NULL;
-}
-
-static char *
-commit_sec_1(struct sub_sect_1 *s)
-{
- if (s->confidence[0] < 0 || s->confidence[0] > 10)
- return "Well, this can't be";
- return NULL;
-}
-
-static char *
-time_parser(uns number, char **pars, time_t *ptr)
-{
- *ptr = number ? atoi(pars[0]) : time(NULL);
- return NULL;
-}
-
-static struct cf_section cf_sec_1 = {
- CF_TYPE(struct sub_sect_1),
- CF_INIT(init_sec_1),
- CF_COMMIT(commit_sec_1),
-#define F(x) PTR_TO(struct sub_sect_1, x)
- CF_ITEMS {
- CF_STRING("name", F(name)),
- //CF_PARSER("t", F(t), time_parser, 0),
- CF_STRING("level", F(level)),
- CF_INT_ARY("confidence", F(confidence[0]), 2), // XXX: the [0] is needed for the sake of type checking
- CF_DOUBLE_DYN("list", F(list), 100),
- CF_END
- }
-#undef F
-};
-
-static uns nr1 = 15;
-static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1);
-static int nrs2[5];
-static char *str1 = "no worries";
-static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob");
-static u64 u1 = 0xCafeBeefDeadC00ll;
-static double d1 = -1.1;
-static clist secs;
-static time_t t1, t2;
-static u32 ip;
-static int *look = DARY_ALLOC(int, 2, 2, 1);
-static u16 numbers[10] = { 2, 100, 1, 5 };
-static u32 bitmap1 = 0xff;
-static u32 bitmap2 = 3;
-
-static char *
-parse_u16(char *string, u16 *ptr)
-{
- uns a;
- char *msg = cf_parse_int(string, &a);
- if (msg)
- return msg;
- if (a >= (1<<16))
- return "Come on, man, this doesn't fit to 16 bits";
- *ptr = a;
- return NULL;
-}
-
-static void
-dump_u16(struct fastbuf *fb, u16 *ptr)
-{
- bprintf(fb, "%d ", *ptr);
-}
-
-static struct cf_user_type u16_type = {
- .size = sizeof(u16),
- .name = "u16",
- .parser = (cf_parser1*) parse_u16,
- .dumper = (cf_dumper1*) dump_u16
-};
-
-static char *
-init_top(void *ptr UNUSED)
-{
- for (uns i=0; i<5; i++)
- {
- struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed
- cf_init_section("slaves", &cf_sec_1, s, 1);
- s->confidence[1] = i;
- clist_add_tail(&secs, &s->n);
- }
- return NULL;
-}
-
-static char *
-commit_top(void *ptr UNUSED)
-{
- if (nr1 != 15)
- return "Don't touch my variable!";
- return NULL;
-}
-
-static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
-static struct cf_section cf_top = {
- CF_INIT(init_top),
- CF_COMMIT(commit_top),
- CF_ITEMS {
- CF_UNS("nr1", &nr1),
- CF_INT_DYN("nrs1", &nrs1, 1000),
- CF_INT_ARY("nrs2", nrs2, 5),
- CF_STRING("str1", &str1),
- CF_STRING_DYN("str2", &str2, 20),
- CF_U64("u1", &u1),
- CF_DOUBLE("d1", &d1),
- CF_PARSER("FirstTime", &t1, time_parser, -1),
- CF_PARSER("SecondTime", &t2, time_parser, 1),
- CF_SECTION("master", &sec1, &cf_sec_1),
- CF_LIST("slaves", &secs, &cf_sec_1),
- CF_IP("ip", &ip),
- CF_LOOKUP_DYN("look", &look, alphabet, 1000),
- CF_USER_ARY("numbers", numbers, &u16_type, 10),
- CF_BITMAP_INT("bitmap1", &bitmap1),
- CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) {
- "one", "two", "three", "four", "five", "six", "seven", "eight",
- "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen",
- "eighteen", "nineteen", "twenty", NULL // hidden joke here
- })),
- CF_END
- }
-};
-
-static byte short_opts[] = CF_SHORT_OPTS "v";
-static struct option long_opts[] = {
- CF_LONG_OPTS
- {"verbose", 0, 0, 'v'},
- {NULL, 0, 0, 0}
-};
-
-static char *help = "\
-Usage: conf-test <options>\n\
-\n\
-Options:\n"
-CF_USAGE
-"-v\t\t\tBe verbose\n\
-";
-
-static void NONRET
-usage(char *msg, ...)
-{
- va_list va;
- va_start(va, msg);
- if (msg)
- vfprintf(stderr, msg, va);
- fputs(help, stderr);
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- log_init(argv[0]);
- cf_declare_section("top", &cf_top, 0);
- cf_def_file = "lib/conf-test.cf";
-
- int opt;
- while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0)
- switch (opt) {
- case 'v': verbose++; break;
- default: usage("unknown option %c\n", opt);
- }
- if (optind < argc)
- usage("too many parameters (%d more)\n", argc-optind);
-
- /*
- cf_load("non-existent file");
- //cf_reload("non-existent file");
- cf_load("non-existent file");
- cf_set("top.d1 -1.1; top.master b");
- */
-
- struct fastbuf *out = bfdopen(1, 1<<14);
- cf_dump_sections(out);
- bclose(out);
-
- return 0;
-}
+++ /dev/null
-# test config file
-#include lib/conf-test.t ; top.xa=1
-#include 'non-existent file'; #top.xa=1
-Top { \
-
- nr1=16 #!!!
- nrs1 2 3 5 \
- 7 11 13 \
- \
- 17M
- nrs2 3 3k 3 3 3 ; \
- str1 "hello,\t\x2bworld%%\n"
- str2 'Hagenuk,
- the best' "\
- " qu'est-ce que c'est?
- u1 0xbadcafebadbeefc0
- str2:prepend prepended
- str2:append appended
- d1 7%
- d1 -1.14e-25
- firsttime ; secondtime 56
- ^top.master:set alice HB8+
- slaves:clear
- ip 0xa
- ip 195.113.31.123
- look Alpha
- look:prepend Beta GAMMA
- numbers 11000 65535
- bitmap1 31
- bitmap1:remove 3 3
- bitmap2:all
- bitmap2:remove eleven twelve one
-};;;;;;
-
-unknown.ignored :-)
-
-top.slaves cairns gpua 7 7 -10% +10%
-top.slaves daintree rafc 4 5 -171%
-top.slaves coogee pum 9 8
-top.slaves:prepend {name=bondi; level=\
- "PUG"; confidence 10 10}
-top.slaves:remove {name daintree}
-top.slaveS:edit {level PUG} Bondi PUG!
-top.slaveS:before {level pum}{
- confidence 2
- list 123 456 789
-}
-top.slaves:copy {name coogee} Coogee2 PUM
-
-topp.a=15
-top.nr1= ' 15'
-a { ;-D }
+++ /dev/null
-/*
- * UCW Library -- Configuration files
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_CONF_H
-#define _UCW_CONF_H
-
-enum cf_class {
- CC_END, // end of list
- CC_STATIC, // single variable or static array
- CC_DYNAMIC, // dynamically allocated array
- CC_PARSER, // arbitrary parser function
- CC_SECTION, // section appears exactly once
- CC_LIST, // list with 0..many nodes
- CC_BITMAP // of up to 32 items
-};
-
-enum cf_type {
- CT_INT, CT_U64, CT_DOUBLE, // number types
- CT_IP, // IP address
- CT_STRING, // string type
- CT_LOOKUP, // in a string table
- CT_USER // user-defined type
-};
-
-struct fastbuf;
-typedef char *cf_parser(uns number, char **pars, void *ptr);
- /* A parser function gets an array of (strdup'ed) strings and a pointer with
- * the customized information (most likely the target address). It can store
- * the parsed value anywhere in any way it likes, however it must first call
- * cf_journal_block() on the overwritten memory block. It returns an error
- * message or NULL if everything is all right. */
-typedef char *cf_parser1(char *string, void *ptr);
- /* A parser function for user-defined types gets a string and a pointer to
- * the destination variable. It must store the value within [ptr,ptr+size),
- * where size is fixed for each type. It should not call cf_journal_block(). */
-typedef char *cf_hook(void *ptr);
- /* An init- or commit-hook gets a pointer to the section or NULL if this
- * is the global section. It returns an error message or NULL if everything
- * is all right. The init-hook should fill in default values (needed for
- * dynamically allocated nodes of link lists or for filling global variables
- * that are run-time dependent). The commit-hook should perform sanity
- * checks and postprocess the parsed values. Commit-hooks must call
- * cf_journal_block() too. Caveat! init-hooks for static sections must not
- * use cf_malloc() but normal xmalloc(). */
-typedef void cf_dumper1(struct fastbuf *fb, void *ptr);
- /* Dumps the contents of a variable of a user-defined type. */
-typedef char *cf_copier(void *dest, void *src);
- /* Similar to init-hook, but it copies attributes from another list node
- * instead of setting the attributes to default values. You have to provide
- * it if your node contains parsed values and/or sub-lists. */
-
-struct cf_user_type {
- uns size; // of the parsed attribute
- char *name; // name of the type (for dumping)
- cf_parser1 *parser; // how to parse it
- cf_dumper1 *dumper; // how to dump the type
-};
-
-struct cf_section;
-struct cf_item {
- const char *name; // case insensitive
- int number; // length of an array or #parameters of a parser (negative means at most)
- void *ptr; // pointer to a global variable or an offset in a section
- union cf_union {
- struct cf_section *sec; // declaration of a section or a list
- cf_parser *par; // parser function
- char **lookup; // NULL-terminated sequence of allowed strings for lookups
- struct cf_user_type *utype; // specification of the user-defined type
- } u;
- enum cf_class cls:16; // attribute class
- enum cf_type type:16; // type of a static or dynamic attribute
-};
-
-struct cf_section {
- uns size; // 0 for a global block, sizeof(struct) for a section
- cf_hook *init; // fills in default values (no need to bzero)
- cf_hook *commit; // verifies parsed data (optional)
- cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes)
- struct cf_item *cfg; // CC_END-terminated array of items
- uns flags; // for internal use only
-};
-
-/* Declaration of cf_section */
-#define CF_TYPE(s) .size = sizeof(s)
-#define CF_INIT(f) .init = (cf_hook*) f
-#define CF_COMMIT(f) .commit = (cf_hook*) f
-#define CF_COPY(f) .copy = (cf_copier*) f
-#define CF_ITEMS .cfg = ( struct cf_item[] )
-#define CF_END { .cls = CC_END }
-/* Configuration items */
-#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) }
-#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) }
-#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f }
-#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s }
-#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s }
-#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) }
-#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t }
-/* Configuration items for basic types */
-#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1)
-#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c)
-#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c)
-#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1)
-#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c)
-#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c)
-#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1)
-#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c)
-#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c)
-#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1)
-#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c)
-#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c)
-#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1)
-#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c)
-#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c)
-#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1)
-#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c)
-#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c)
-#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
-#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
-#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t }
-#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t }
-#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
-#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
-
-/* If you aren't picky about the number of parameters */
-#define CF_ANY_NUM -0x7fffffff
-
-#define DARY_LEN(a) ((uns*)a)[-1]
- // length of a dynamic array
-#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a
- // creates a static instance of a dynamic array
-
-/* Memory allocation: conf-alloc.c */
-struct mempool;
-extern struct mempool *cf_pool;
-void *cf_malloc(uns size);
-void *cf_malloc_zero(uns size);
-char *cf_strdup(const char *s);
-char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2);
-
-/* Undo journal for error recovery: conf-journal.c */
-extern uns cf_need_journal;
-void cf_journal_block(void *ptr, uns len);
-#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var))
-
-/* Declaration: conf-section.c */
-void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown);
-void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero);
-
-/* Parsers for basic types: conf-parse.c */
-char *cf_parse_int(const char *str, int *ptr);
-char *cf_parse_u64(const char *str, u64 *ptr);
-char *cf_parse_double(const char *str, double *ptr);
-char *cf_parse_ip(const char *p, u32 *varp);
-
-#endif
-
+++ /dev/null
-/*
- * UCW Library -- Configuration-Dependent Definitions
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- * (c) 2006 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_CONFIG_H
-#define _UCW_CONFIG_H
-
-/* Configuration switches */
-
-#include "autoconf.h"
-
-/* Tell libc we're going to use all extensions available */
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-/* Types (based on standard C99 integers) */
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef uint8_t byte; /* exactly 8 bits, unsigned */
-typedef uint8_t u8; /* exactly 8 bits, unsigned */
-typedef int8_t s8; /* exactly 8 bits, signed */
-typedef uint16_t u16; /* exactly 16 bits, unsigned */
-typedef int16_t s16; /* exactly 16 bits, signed */
-typedef uint32_t u32; /* exactly 32 bits, unsigned */
-typedef int32_t s32; /* exactly 32 bits, signed */
-typedef uint64_t u64; /* exactly 64 bits, unsigned */
-typedef int64_t s64; /* exactly 64 bits, signed */
-
-typedef unsigned int uns; /* at least 32 bits */
-typedef u32 sh_time_t; /* seconds since UNIX epoch */
-typedef s64 timestamp_t; /* milliseconds since UNIX epoch */
-
-#ifdef CONFIG_LARGE_FILES /* File positions */
-typedef s64 sh_off_t;
-#else
-typedef s32 sh_off_t;
-#endif
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Content-Type Pattern Matching
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-
-int
-match_ct_patt(const char *p, const char *t)
-{
- if (*p == '*' && !p[1]) /* "*" matches everything */
- return 1;
-
- if (*p == '*' && p[1] == '/') /* "*" on the left-hand side */
- {
- while (*t && *t != ' ' && *t != ';' && *t != '/')
- t++;
- p += 2;
- }
- else /* Normal left-hand side */
- {
- while (*p != '/')
- if (Cupcase(*p++) != Cupcase(*t++))
- return 0;
- p++;
- }
- if (*t++ != '/')
- return 0;
-
- if (*p == '*' && !p[1]) /* "*" on the right-hand side */
- return 1;
- while (*p)
- if (Cupcase(*p++) != Cupcase(*t++))
- return 0;
- if (*t && *t != ' ' && *t != ';')
- return 0;
-
- return 1;
-}
+++ /dev/null
-/*
- * UCW Library -- SDBM emulator at top of GDBM
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/db.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <gdbm.h>
-
-struct sdbm {
- GDBM_FILE db;
- datum prevkey;
-};
-
-struct sdbm *
-sdbm_open(struct sdbm_options *o)
-{
- struct sdbm *d = xmalloc(sizeof(struct sdbm));
- d->db = gdbm_open(o->name,
- (o->page_order ? (1 << o->page_order) : 0),
- ((o->flags & SDBM_WRITE) ? ((o->flags & SDBM_CREAT) ? GDBM_WRCREAT : GDBM_WRITER) : GDBM_READER)
- | ((o->flags & SDBM_SYNC) ? GDBM_SYNC : 0),
- 0666,
- NULL);
- if (o->cache_size)
- gdbm_setopt(d->db, GDBM_CACHESIZE, &o->cache_size, sizeof(o->cache_size));
- d->prevkey.dptr = NULL;
- return d;
-}
-
-void
-sdbm_close(struct sdbm *d)
-{
- sdbm_rewind(d);
- gdbm_close(d->db);
- xfree(d);
-}
-
-static int
-sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
-{
- if (vallen)
- {
- if (*vallen < Dl)
- return 1;
- *vallen = Dl;
- }
- if (val)
- memcpy(val, D, Dl);
- return 0;
-}
-
-int
-sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
-{
- datum K, V;
- int rc;
-
- K.dptr = key;
- K.dsize = keylen;
- V.dptr = val;
- V.dsize = vallen;
- rc = gdbm_store(d->db, K, V, GDBM_INSERT);
- return (rc < 0) ? rc : !rc;
-}
-
-int
-sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
-{
- datum K, V;
- int rc;
-
- if (!val)
- return sdbm_delete(d, key, keylen);
- K.dptr = key;
- K.dsize = keylen;
- V.dptr = val;
- V.dsize = vallen;
- rc = gdbm_store(d->db, K, V, GDBM_REPLACE);
- return (rc < 0) ? rc : !rc;
-}
-
-int
-sdbm_delete(struct sdbm *d, byte *key, uns keylen)
-{
- datum K;
-
- K.dptr = key;
- K.dsize = keylen;
- return !gdbm_delete(d->db, K);
-}
-
-int
-sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
-{
- datum K, V;
- int rc;
-
- K.dptr = key;
- K.dsize = keylen;
- if (!val && !vallen)
- return gdbm_exists(d->db, K);
- V = gdbm_fetch(d->db, K);
- if (!V.dptr)
- return 0;
- rc = sdbm_put_user(V.dptr, V.dsize, val, vallen);
- xfree(V.dptr);
- return rc ? SDBM_ERROR_TOO_LARGE : 1;
-}
-
-void
-sdbm_rewind(struct sdbm *d)
-{
- if (d->prevkey.dptr)
- {
- xfree(d->prevkey.dptr);
- d->prevkey.dptr = NULL;
- }
-}
-
-int
-sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
-{
- datum K;
-
- if (d->prevkey.dptr)
- {
- K = gdbm_nextkey(d->db, d->prevkey);
- xfree(d->prevkey.dptr);
- }
- else
- K = gdbm_firstkey(d->db);
- d->prevkey = K;
- if (!K.dptr)
- return 0;
- if (sdbm_put_user(K.dptr, K.dsize, key, keylen))
- return SDBM_ERROR_TOO_LARGE;
- if (val || vallen)
- return sdbm_fetch(d, key, *keylen, val, vallen);
- return 1;
-}
-
-void
-sdbm_sync(struct sdbm *d)
-{
-}
+++ /dev/null
-/*
- * UCW Library -- Database Manager -- Tests and Benchmarks
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#if 1
-#include "lib/db.c"
-#define NAME "SDBM"
-#else
-#include "lib/db-emul.c"
-#define NAME "GDBM"
-#endif
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <sys/stat.h>
-
-static struct sdbm_options opts = {
- flags: SDBM_CREAT | SDBM_WRITE,
- name: "db.test",
- page_order: 10,
- cache_size: 1024,
- key_size: -1,
- val_size: -1
-};
-
-static struct sdbm *d;
-static int key_min, key_max; /* min<0 -> URL distribution */
-static int val_min, val_max;
-static int num_keys; /* Number of distinct keys */
-static int verbose;
-
-static void
-help(void)
-{
- printf("Usage: dbtest [<options>] <commands>\n\
-\n\
-Options:\n\
--c<n> Use cache of <n> pages\n\
--p<n> Use pages of order <n>\n\
--k<n> Use key size <n>\n\
--k<m>-<n> Use key size uniformly distributed between <m> and <n>\n\
--kU Use keys with URL distribution\n\
--n<n> Number of distinct keys\n\
--d<m>[-<n>] Use specified value size (see -k<m>-<n>)\n\
--t Perform the tests on an existing database file\n\
--v Be verbose\n\
--s Turn on synchronous mode\n\
--S Turn on supersynchronous mode\n\
--F Turn on fast mode\n\
-\n\
-Commands:\n\
-c Fill database\n\
-r Rewrite database\n\
-f[<p>%%][<n>] Find <n> records with probability of success <p>%% (default=100)\n\
-F[<p>%%][<n>] Find, but don't fetch values\n\
-d Delete records\n\
-w Walk database\n\
-W Walk, but don't fetch values\n\
-");
- exit(0);
-}
-
-static uns
-krand(uns kn)
-{
- return kn * 2000000011;
-}
-
-static uns
-gen_url_size(uns rnd)
-{
- uns l, m, r;
- static uns utable[] = {
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 22, 108, 245, 481, 979, 3992, 7648, 13110, 19946, 27256, 34993, 43222, 52859, 64563,
-80626, 117521, 147685, 188364, 233174, 290177, 347132, 407231, 465787, 540931, 628601, 710246, 808671, 922737, 1025691, 1138303,
-1238802, 1344390, 1443843, 1533207, 1636494, 1739082, 1826911, 1910725, 1993940, 2094365, 2188987, 2267827, 2350190, 2441980,
-2520713, 2593654, 2668632, 2736009, 2808356, 2889682, 2959300, 3017945, 3086488, 3146032, 3204818, 3251897, 3307001, 3349388,
-3392798, 3433429, 3476765, 3529107, 3556884, 3585120, 3633005, 3677697, 3699561, 3716660, 3739823, 3765154, 3795096, 3821184,
-3858117, 3908757, 3929095, 3943264, 3957033, 3969588, 3983441, 3994630, 4005413, 4028890, 4039678, 4058007, 4071906, 4087029,
-4094233, 4105259, 4111603, 4120338, 4127364, 4133983, 4140310, 4144843, 4150565, 4155974, 4165132, 4170648, 4176811, 4187118,
-4190866, 4199051, 4206686, 4216122, 4226109, 4233721, 4254123, 4261792, 4270396, 4276650, 4282932, 4291738, 4295932, 4299370,
-4304011, 4307098, 4311866, 4318168, 4325730, 4329774, 4332946, 4336305, 4339770, 4345237, 4349038, 4356129, 4362872, 4366542,
-4371077, 4374524, 4376733, 4378794, 4380652, 4382340, 4383552, 4385952, 4386914, 4393123, 4394106, 4395142, 4396593, 4399112,
-4399909, 4401015, 4401780, 4402616, 4403454, 4404481, 4405231, 4405947, 4406886, 4408364, 4409159, 4409982, 4410872, 4412010,
-4413341, 4414161, 4415673, 4417135, 4418032, 4419117, 4419952, 4420677, 4421387, 4421940, 4422469, 4423210, 4423696, 4424274,
-4424982, 4425665, 4426363, 4427018, 4427969, 4428992, 4429791, 4430804, 4432601, 4433440, 4434157, 4434967, 4436280, 4439784,
-4444255, 4445544, 4446416, 4447620, 4449638, 4453004, 4455470, 4456982, 4457956, 4458617, 4459538, 4460007, 4460377, 4460768,
-4461291, 4461520, 4461678, 4461911, 4462063, 4462239, 4462405, 4462607, 4462666, 4462801, 4462919, 4463108, 4463230, 4463438,
-4463530, 4463698, 4463779, 4463908, 4463991, 4464138, 4464188, 4464391, 4464580, 4464868, 4464980, 4465174, 4465255, 4465473,
-4465529, 4465681, 4465746, 4465916, 4465983, 4466171, 4466248, 4466430, 4466560, 4466751, 4466930, 4467807, 4468847, 4469940,
-4470344, 4470662, 4470716, 4471120, 4471389, 4471814, 4472141, 4472545, 4472687, 4473051, 4473253, 4473603, 4473757, 4474065,
-4474125, 4474354, 4474428, 4474655, 4474705, 4474841, 4474858, 4475133, 4475201, 4475327, 4475367, 4475482, 4475533, 4475576,
-4475586, 4475616, 4475637, 4475659, 4475696, 4475736, 4475775, 4475794, 4476156, 4476711, 4477004, 4477133, 4477189, 4477676,
-4477831, 4477900, 4477973, 4477994, 4478011, 4478040, 4478063, 4478085, 4478468, 4478715, 4479515, 4480034, 4481804, 4483259,
-4483866, 4484202, 4484932, 4485693, 4486184, 4486549, 4486869, 4487405, 4487639, 4487845, 4488086, 4488256, 4488505, 4488714,
-4492669, 4496233, 4497738, 4498122, 4498653, 4499862, 4501169, 4501627, 4501673, 4501811, 4502182, 4502475, 4502533, 4502542,
-4502548, 4502733, 4503389, 4504381, 4505070, 4505378, 4505814, 4506031, 4506336, 4506642, 4506845, 4506971, 4506986, 4507016,
-4507051, 4507098, 4507107, 4507114, 4507139, 4507478, 4507643, 4507674, 4507694, 4507814, 4507894, 4507904, 4507929, 4507989,
-4508023, 4508047, 4508053, 4508063, 4508075, 4508092, 4508104, 4508113, 4508239, 4508285, 4508324, 4508335, 4508340, 4508378,
-4508405, 4508419, 4508436, 4508449, 4508470, 4508488, 4508515, 4508541, 4508564, 4508570, 4508584, 4508594, 4508607, 4508634,
-4508652, 4508665, 4508673, 4508692, 4508704, 4508742, 4508755, 4508773, 4508788, 4508798, 4508832, 4508869, 4508885, 4508905,
-4508915, 4508947, 4508956, 4509061, 4509070, 4509357, 4509368, 4509380, 4509393, 4509401, 4509412, 4509426, 4509438, 4509451,
-4509461, 4509473, 4509489, 4509498, 4509512, 4509537, 4509568, 4509582, 4509621, 4509629, 4509747, 4509766, 4509776, 4509795,
-4509802, 4509813, 4509822, 4509829, 4509834, 4509844, 4509854, 4509863, 4509868, 4509875, 4509886, 4509898, 4509908, 4509920,
-4509932, 4509941, 4509949, 4509955, 4509967, 4509972, 4509979, 4509987, 4509999, 4510002, 4510010, 4510014, 4510018, 4510025,
-4510028, 4510049, 4510055, 4510061, 4510068, 4510079, 4510085, 4510091, 4510098, 4510102, 4510104, 4510110, 4510121, 4510128,
-4510132, 4510138, 4510144, 4510145, 4510153, 4510161, 4510174, 4510196, 4510199, 4510208, 4510209, 4510212, 4510216, 4510217,
-4510219, 4510222, 4510228, 4510231, 4510236, 4510241, 4510245, 4510248, 4510250, 4510254, 4510255, 4510261, 4510262, 4510266,
-4510266, 4510271, 4510285, 4510287, 4510291, 4510295, 4510303, 4510306, 4510308, 4510310, 4510314, 4510319, 4510320, 4510324,
-4510328, 4510333, 4510333, 4510336, 4510340, 4510342, 4510348, 4510353, 4510359, 4510362, 4510365, 4510371, 4510373, 4510375,
-4510378, 4510380, 4510385, 4510389, 4510391, 4510391, 4510394, 4510396, 4510397, 4510398, 4510400, 4510403, 4510406, 4510407,
-4510408, 4510409, 4510411, 4510413, 4510417, 4510417, 4510419, 4510422, 4510426, 4510427, 4510430, 4510435, 4510437, 4510439,
-4510440, 4510442, 4510442, 4510446, 4510447, 4510448, 4510450, 4510451, 4510451, 4510453, 4510454, 4510455, 4510457, 4510460,
-4510460, 4510460, 4510462, 4510463, 4510466, 4510468, 4510472, 4510475, 4510480, 4510482, 4510483, 4510486, 4510488, 4510492,
-4510494, 4510497, 4510497, 4510499, 4510503, 4510505, 4510506, 4510507, 4510509, 4510512, 4510514, 4510527, 4510551, 4510553,
-4510554, 4510555, 4510556, 4510558, 4510561, 4510562, 4510566, 4510567, 4510568, 4510570, 4510573, 4510574, 4510586, 4510603,
-4510605, 4510607, 4510610, 4510610, 4510613, 4510613, 4510614, 4510614, 4510615, 4510616, 4510616, 4510620, 4510622, 4510623,
-4510624, 4510627, 4510628, 4510630, 4510631, 4510632, 4510634, 4510634, 4510634, 4510636, 4510636, 4510639, 4510639, 4510640,
-4510643, 4510647, 4510649, 4510650, 4510653, 4510653, 4510653, 4510653, 4510656, 4510659, 4510661, 4510664, 4510665, 4510669,
-4510672, 4510673, 4510674, 4510675, 4510680, 4510683, 4510684, 4510686, 4510687, 4510690, 4510691, 4510693, 4510693, 4510697,
-4510699, 4510700, 4510703, 4510704, 4510709, 4510711, 4510713, 4510713, 4510720, 4510720, 4510722, 4510724, 4510727, 4510729,
-4510735, 4510735, 4510738, 4510740, 4510744, 4510745, 4510746, 4510748, 4510754, 4510756, 4510758, 4510761, 4510764, 4510766,
-4510768, 4510768, 4510770, 4510770, 4510772, 4510774, 4510775, 4510775, 4510775, 4510776, 4510777, 4510780, 4510782, 4510783,
-4510785, 4510786, 4510788, 4510789, 4510791, 4510793, 4510793, 4510793, 4510795, 4510795, 4510799, 4510803, 4510804, 4510804,
-4510804, 4510805, 4510807, 4510809, 4510811, 4510811, 4510813, 4510815, 4510815, 4510816, 4510819, 4510820, 4510824, 4510827,
-4510829, 4510829, 4510830, 4510833, 4510835, 4510837, 4510838, 4510838, 4510839, 4510840, 4510840, 4510842, 4510842, 4510843,
-4510845, 4510845, 4510845, 4510847, 4510848, 4510848, 4510848, 4510850, 4510853, 4510855, 4510857, 4510859, 4510861, 4510862,
-4510864, 4510865, 4510865, 4510865, 4510869, 4510869, 4510869, 4510869, 4510869, 4510870, 4510870, 4510872, 4510872, 4510873,
-4510874, 4510875, 4510875, 4510877, 4510879, 4510879, 4510879, 4510879, 4510880, 4510881, 4510882, 4510883, 4510884, 4510885,
-4510886, 4510887, 4510890, 4510890, 4510891, 4510892, 4510892, 4510893, 4510893, 4510895, 4510895, 4510896, 4510897, 4510899,
-4510901, 4510901, 4510901, 4510902, 4510903, 4510903, 4510903, 4510905, 4510905, 4510906, 4510906, 4510907, 4510907, 4510909,
-4510910, 4510911, 4510911, 4510911, 4510913, 4510913, 4510914, 4510914, 4510914, 4510915, 4510916, 4510918, 4510918, 4510919,
-4510919, 4510919, 4510920, 4510921, 4510922, 4510923, 4510924, 4510924, 4510924, 4510924, 4510926, 4510927, 4510928, 4510928,
-4510928, 4510928, 4510928, 4510930, 4510933, 4510935, 4510935, 4510935, 4510935, 4510935, 4510936, 4510938, 4510947, 4510966,
-4510967, 4510969, 4510973, 4510973, 4510974, 4510974, 4510974, 4510974, 4510974, 4510974, 4510975, 4510976, 4510976, 4510976,
-4510976, 4510976, 4510976, 4510976, 4510977, 4510979, 4510979, 4510979, 4510979, 4510979, 4510979, 4510980, 4510980, 4510980,
-4510980, 4510981, 4510981, 4510981, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510982, 4510983, 4510983, 4510984,
-4510984, 4510984, 4510984, 4510984, 4510985, 4510985, 4510985, 4510985, 4510987, 4510987, 4510987, 4510988, 4510988, 4510989,
-4510989, 4510989, 4510989, 4510989, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510990, 4510991, 4510991, 4510991,
-4510991, 4510991, 4510991, 4510991, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510992, 4510993, 4510993, 4510993,
-4510994, 4510994, 4510994, 4510994, 4510995, 4510995, 4510996, 4510997, 4510998, 4510999, 4510999, 4511000, 4511000, 4511001,
-4511001, 4511002, 4511002, 4511002, 4511003, 4511004, 4511004, 4511004, 4511004, 4511005, 4511006, 4511008, 4511008, 4511008,
-4511009, 4511009, 4511009, 4511009, 4511010, 4511011, 4511011, 4511012, 4511012, 4511012, 4511012, 4511013, 4511013, 4511014,
-4511014, 4511014, 4511014, 4511015, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511018, 4511020, 4511020, 4511020,
-4511020, 4511020, 4511020, 4511020, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021, 4511021,
-4511021
- };
-
- rnd %= utable[1024];
- l = 0; r = 1023;
- while (l < r)
- {
- m = (l+r)/2;
- if (utable[m] == rnd)
- return m;
- if (utable[m] >= rnd)
- r = m - 1;
- else
- l = m + 1;
- }
- return l;
-}
-
-static uns
-gen_size(uns min, uns max, uns rnd)
-{
- if (min == max)
- return min;
- else
- return min + rnd % (max - min + 1);
-}
-
-static void
-gen_random(byte *buf, uns size, uns kn)
-{
- kn = (kn + 0x36221057) ^ (kn << 24) ^ (kn << 15);
- while (size--)
- {
- *buf++ = kn >> 24;
- kn = kn*257 + 17;
- }
-}
-
-static int
-keygen(byte *buf, uns kn)
-{
- uns size, rnd;
-
- rnd = krand(kn);
- if (key_min < 0)
- size = gen_url_size(rnd);
- else
- size = gen_size(key_min, key_max, rnd);
- *buf++ = kn >> 24;
- *buf++ = kn >> 16;
- *buf++ = kn >> 8;
- *buf++ = kn;
- if (size < 4)
- return 4;
- gen_random(buf, size-4, kn);
- return size;
-}
-
-static int
-valgen(byte *buf, uns kn)
-{
- uns size = gen_size(val_min, val_max, krand(kn));
- gen_random(buf, size, kn);
- return size;
-}
-
-static uns
-keydec(byte *buf)
-{
- return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
-}
-
-static void
-verb(char *msg, ...)
-{
- int cat = 1;
- va_list args;
-
- va_start(args, msg);
- if (msg[0] == '^' && msg[1])
- {
- cat = msg[1] - '0';
- msg += 2;
- }
- if (verbose >= cat)
- vfprintf(stderr, msg, args);
- va_end(args);
-}
-
-static void
-parse_size(int *min, int *max, char *c)
-{
- char *d;
-
- if ((d = strchr(c, '-')))
- {
- *d++ = 0;
- *min = atol(c);
- *max = atol(d);
- }
- else
- *min = *max = atol(c);
-}
-
-#define PROGRESS(i) if ((verbose > 2) || (verbose > 1 && !(i & 1023))) fprintf(stderr, "%d\r", i)
-
-int main(int argc, char **argv)
-{
- int c, i, j, k, l, m;
- byte kb[2048], vb[2048], vb2[2048];
- uns ks, vs, vs2, perc, cnt;
- char *ch;
- int dont_delete = 0;
- timestamp_t timer;
-
- log_init("dbtest");
- setvbuf(stdout, NULL, _IONBF, 0);
- setvbuf(stderr, NULL, _IONBF, 0);
- while ((c = getopt(argc, argv, "c:p:k:n:d:vsStF")) >= 0)
- switch (c)
- {
- case 'c':
- opts.cache_size = atol(optarg);
- break;
- case 'p':
- opts.page_order = atol(optarg);
- break;
- case 'k':
- if (!strcmp(optarg, "U"))
- key_min = key_max = -1;
- else
- parse_size(&key_min, &key_max, optarg);
- break;
- case 'n':
- num_keys = atol(optarg);
- break;
- case 'd':
- parse_size(&val_min, &val_max, optarg);
- break;
- case 'v':
- verbose++;
- break;
- case 's':
- opts.flags |= SDBM_SYNC;
- break;
- case 'S':
- opts.flags |= SDBM_SYNC | SDBM_FSYNC;
- break;
- case 'F':
- opts.flags |= SDBM_FAST;
- break;
- case 't':
- dont_delete = 1;
- break;
- default:
- help();
- }
-
- if (key_min >= 0 && key_min < 4)
- key_min = key_max = 4;
- if (key_min == key_max && key_min >= 0)
- opts.key_size = key_min;
- if (val_min == val_max)
- opts.val_size = val_min;
- if (!num_keys)
- die("Number of keys not given");
-
- printf(NAME " benchmark: %d records, keys ", num_keys);
- if (key_min < 0)
- printf("<URL>");
- else
- printf("%d-%d", key_min, key_max);
- printf(", values %d-%d, page size %d, cache %d pages\n", val_min, val_max, 1 << opts.page_order, opts.cache_size);
-
- verb("OPEN(%s, key=%d, val=%d, cache=%d, pgorder=%d)\n", opts.name, opts.key_size, opts.val_size,
- opts.cache_size, opts.page_order);
- if (!dont_delete)
- unlink(opts.name);
- d = sdbm_open(&opts);
- if (!d)
- die("open failed: %m");
-
- while (optind < argc)
- {
- char *o = argv[optind++];
- init_timer(&timer);
- switch (*o)
- {
- case 'c':
- printf("create %d: ", num_keys);
- for(i=0; i<num_keys; i++)
- {
- PROGRESS(i);
- ks = keygen(kb, i);
- vs = valgen(vb, i);
- if (sdbm_store(d, kb, ks, vb, vs) != 1) die("store failed");
- }
- break;
- case 'r':
- printf("rewrite %d: ", num_keys);
- for(i=0; i<num_keys; i++)
- {
- PROGRESS(i);
- ks = keygen(kb, i);
- vs = valgen(vb, i);
- if (sdbm_replace(d, kb, ks, vb, vs) != 1) die("replace failed");
- }
- break;
- case 'f':
- case 'F':
- c = (*o++ == 'f');
- if ((ch = strchr(o, '%')))
- {
- *ch++ = 0;
- perc = atol(o);
- }
- else
- {
- ch = o;
- perc = 100;
- }
- cnt = atol(ch);
- if (!cnt)
- {
- cnt = num_keys;
- m = (perc == 100);
- }
- else
- m = 0;
- printf("%s fetch %d (%d%% success, with%s values): ", (m ? "sequential" : "random"), cnt, perc, (c ? "" : "out"));
- i = -1;
- while (cnt--)
- {
- if (m)
- i++;
- else
- i = random_max(num_keys) + ((random_max(100) < perc) ? 0 : num_keys);
- PROGRESS(i);
- ks = keygen(kb, i);
- if (c)
- {
- vs2 = sizeof(vb2);
- j = sdbm_fetch(d, kb, ks, vb2, &vs2);
- }
- else
- j = sdbm_fetch(d, kb, ks, NULL, NULL);
- if (j < 0)
- die("fetch: error %d", j);
- if ((i < num_keys) != j)
- die("fetch mismatch at key %d, res %d", i, j);
- if (c && j)
- {
- vs = valgen(vb, i);
- if (vs != vs2 || memcmp(vb, vb2, vs))
- die("fetch data mismatch at key %d: %d,%d", i, vs, vs2);
- }
- }
- break;
- case 'd':
- printf("delete %d: ", num_keys);
- for(i=0; i<num_keys; i++)
- {
- PROGRESS(i);
- ks = keygen(kb, i);
- if (sdbm_delete(d, kb, ks) != 1) die("delete failed");
- }
- break;
- case 'w':
- case 'W':
- c = (*o == 'w');
- i = k = l = m = 0;
- printf("walk %d (with%s keys): ", num_keys, (c ? "" : "out"));
- sdbm_rewind(d);
- for(;;)
- {
- ks = sizeof(kb);
- vs = sizeof(vb);
- if (c)
- j = sdbm_get_next(d, kb, &ks, vb, &vs);
- else
- j = sdbm_get_next(d, kb, &ks, NULL, NULL);
- if (!j)
- break;
- if (ks < 4)
- die("get_next: too short");
- i = keydec(kb);
- if (i < 0 || i >= num_keys)
- die("get_next: %d out of range", i);
- PROGRESS(i);
- vs2 = keygen(vb2, i);
- if (ks != vs2 || memcmp(kb, vb2, ks))
- die("get_next: key mismatch at %d", i);
- if (c)
- {
- vs2 = valgen(vb2, i);
- if (vs != vs2 || memcmp(vb, vb2, vs))
- die("get_next: data mismatch at %d", i);
- }
- l += k;
- m += i;
- k++;
- }
- if (k != num_keys)
- die("fetch: wrong # of keys: %d != %d", k, num_keys);
- if (l != m)
- die("fetch: wrong checksum: %d != %d", l, m);
- break;
- default:
- help();
- }
- sdbm_sync(d);
- printf("%d ms\n", get_timer(&timer));
- }
-
- verb("CLOSE\n");
- sdbm_close(d);
-
- {
- struct stat st;
- if (stat(opts.name, &st)) die("stat: %m");
- printf("file size: %d bytes\n", (int) st.st_size);
- }
- return 0;
-}
+++ /dev/null
-/*
- * SDBM Database Utility
- *
- * (c) 2000--2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/db.h"
-#include "lib/db_internal.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-binary.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-
-static int verbose=0;
-static int cache=1024;
-static int force_key=-2;
-static int force_val=-2;
-static int force_page=-1;
-
-#define SDBM_DUMP_MAGIC 0x321f120e
-#define SDBM_DUMP_VERSION 1
-
-static void
-dump(char *db, char *dmp)
-{
- struct sdbm *src;
- struct fastbuf *dest;
- struct sdbm_options op;
- int e, c=0;
-
- bzero(&op, sizeof(op));
- op.name = db;
- op.cache_size = 16;
- op.flags = 0;
- src = sdbm_open(&op);
- if (!src)
- die("Source open failed: %m");
-
- dest = bopen(dmp, O_WRONLY | O_CREAT | O_TRUNC, 65536);
- bputl(dest, SDBM_DUMP_MAGIC);
- bputl(dest, SDBM_DUMP_VERSION);
- bputl(dest, src->page_order);
- bputl(dest, src->key_size);
- bputl(dest, src->val_size);
-
- fprintf(stderr, "Dumping database...\n");
- sdbm_rewind(src);
- for(;;)
- {
- byte key[65536], val[65536];
- int klen = sizeof(key);
- int vlen = sizeof(val);
- e = sdbm_get_next(src, key, &klen, val, &vlen);
- if (!e)
- break;
- if (e < 0)
- fprintf(stderr, "sdbm_get_next: error %d\n", e);
- if (!(c++ % 1024))
- {
- fprintf(stderr, "%d\r", c);
- fflush(stderr);
- }
- bputw(dest, klen);
- bwrite(dest, key, klen);
- bputw(dest, vlen);
- bwrite(dest, val, vlen);
- }
-
- sdbm_close(src);
- bclose(dest);
- fprintf(stderr, "Dumped %d records\n", c);
-}
-
-static void
-restore(char *dmp, char *db)
-{
- struct sdbm *dest;
- struct fastbuf *src;
- struct sdbm_options op;
- int e, c=0;
-
- src = bopen(dmp, O_RDONLY, 65536);
- if (bgetl(src) != SDBM_DUMP_MAGIC ||
- bgetl(src) != SDBM_DUMP_VERSION)
- die("%s: not a sdbm dump", dmp);
-
- bzero(&op, sizeof(op));
- op.name = db;
- e = unlink(op.name);
- if (e < 0 && errno != ENOENT)
- die("unlink: %m");
- op.cache_size = cache;
- op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
- op.page_order = bgetl(src);
- if (force_page >= 0)
- op.page_order = force_page;
- op.key_size = bgetl(src);
- if (force_key >= 0)
- op.key_size = force_key;
- op.val_size = bgetl(src);
- if (force_val >= 0)
- op.val_size = force_val;
- dest = sdbm_open(&op);
- if (!dest)
- die("Destination open failed");
-
- fprintf(stderr, "Restoring database...\n");
- for(;;)
- {
- byte key[65536], val[65536];
- int klen, vlen;
- klen = bgetw(src);
- if (klen < 0)
- break;
- breadb(src, key, klen);
- vlen = bgetw(src);
- if (vlen < 0)
- die("Corrupted dump file: value missing");
- breadb(src, val, vlen);
- if (!(c++ % 1024))
- {
- fprintf(stderr, "%d\r", c);
- fflush(stderr);
- }
- if (sdbm_store(dest, key, klen, val, vlen) == 0)
- fprintf(stderr, "sdbm_store: duplicate key\n");
- }
-
- bclose(src);
- sdbm_close(dest);
- fprintf(stderr, "Restored %d records\n", c);
-}
-
-static void
-rebuild(char *sdb, char *ddb)
-{
- struct sdbm *src, *dest;
- struct sdbm_options op;
- int e, c=0;
-
- bzero(&op, sizeof(op));
- op.name = sdb;
- op.cache_size = 16;
- op.flags = 0;
- src = sdbm_open(&op);
- if (!src)
- die("Source open failed: %m");
-
- op.name = ddb;
- e = unlink(op.name);
- if (e < 0 && errno != ENOENT)
- die("unlink: %m");
- op.cache_size = cache;
- op.flags = SDBM_CREAT | SDBM_WRITE | SDBM_FAST;
- op.page_order = (force_page >= 0) ? (u32) force_page : src->root->page_order;
- op.key_size = (force_key >= -1) ? force_key : src->root->key_size;
- op.val_size = (force_val >= -1) ? force_val : src->root->val_size;
- dest = sdbm_open(&op);
- if (!dest)
- die("Destination open failed");
-
- fprintf(stderr, "Rebuilding database...\n");
- sdbm_rewind(src);
- for(;;)
- {
- byte key[65536], val[65536];
- int klen = sizeof(key);
- int vlen = sizeof(val);
- e = sdbm_get_next(src, key, &klen, val, &vlen);
- if (!e)
- break;
- if (e < 0)
- fprintf(stderr, "sdbm_get_next: error %d\n", e);
- if (!(c++ % 1024))
- {
- fprintf(stderr, "%d\r", c);
- fflush(stderr);
- }
- if (sdbm_store(dest, key, klen, val, vlen) == 0)
- fprintf(stderr, "sdbm_store: duplicate key\n");
- }
-
- sdbm_close(src);
- sdbm_close(dest);
- fprintf(stderr, "Copied %d records\n", c);
-}
-
-int
-main(int argc, char **argv)
-{
- int o;
-
- while ((o = getopt(argc, argv, "vc:k:d:p:")) >= 0)
- switch (o)
- {
- case 'v':
- verbose++;
- break;
- case 'c':
- cache=atol(optarg);
- break;
- case 'k':
- force_key=atol(optarg);
- break;
- case 'd':
- force_val=atol(optarg);
- break;
- case 'p':
- force_page=atol(optarg);
- break;
- default:
- bad:
- fprintf(stderr, "Usage: db-tool [<options>] <command> <database>\n\
-\n\
-Options:\n\
--v\t\tBe verbose\n\
--c<n>\t\tUse cache of <n> pages\n\
--d<n>\t\tSet data size to <n> (-1=variable) [restore,rebuild]\n\
--k<n>\t\tSet key size to <n> (-1=variable) [restore,rebuild]\n\
--p<n>\t\tSet page order to <n> [restore,rebuild]\n\
-\n\
-Commands:\n\
-b <db> <new>\tRebuild database\n\
-d <db> <dump>\tDump database\n\
-r <dump> <db>\tRestore database from dump\n\
-");
- return 1;
- }
- argc -= optind;
- argv += optind;
- if (argc < 1 || strlen(argv[0]) != 1)
- goto bad;
-
- switch (argv[0][0])
- {
- case 'b':
- if (argc != 3)
- goto bad;
- rebuild(argv[1], argv[2]);
- break;
- case 'd':
- if (argc != 3)
- goto bad;
- dump(argv[1], argv[2]);
- break;
- case 'r':
- if (argc != 3)
- goto bad;
- restore(argv[1], argv[2]);
- break;
- default:
- goto bad;
- }
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Database Management Routines
- *
- * (c) 1999--2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This library uses the standard algorithm for external hashing (page directory
- * mapping topmost K bits of hash value to page address, directory splits and
- * so on). Peculiarities of this implementation (aka design decisions):
- *
- * o We allow both fixed and variable length keys and values (this includes
- * zero size values for cases you want to represent only a set of keys).
- * o We assume that key_size + val_size < page_size.
- * o We never shrink the directory nor free empty pages. (The reason is that
- * if the database was once large, it's likely it will again become large soon.)
- * o The only pages which can be freed are those of the directory (during
- * directory split), so we keep only a simple 32-entry free block list
- * and we assume it's sorted.
- * o All pointers are always given in pages from start of the file.
- * This gives us page_size*2^32 limit for file size which should be enough.
- */
-
-#include "lib/lib.h"
-#include "lib/lfs.h"
-#include "lib/pagecache.h"
-#include "lib/db.h"
-#include "lib/db_internal.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#define GET_PAGE(d,x) pgc_get((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
-#define GET_ZERO_PAGE(d,x) pgc_get_zero((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
-#define READ_PAGE(d,x) pgc_read((d)->cache, (d)->fd, ((sh_off_t)(x)) << (d)->page_order)
-#define READ_DIR(d,off) pgc_read((d)->cache, (d)->fd, (((sh_off_t)(d)->root->dir_start) << (d)->page_order) + (off))
-
-struct sdbm *
-sdbm_open(struct sdbm_options *o)
-{
- struct sdbm *d;
- struct sdbm_root root, *r;
- uns cache_size = o->cache_size ? o->cache_size : 16;
-
- d = xmalloc_zero(sizeof(struct sdbm));
- d->flags = o->flags;
- d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666);
- if (d->fd >= 0) /* Already exists, let's check it */
- {
- if (read(d->fd, &root, sizeof(root)) != sizeof(root))
- goto bad;
- if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION)
- goto bad;
- d->file_size = sh_seek(d->fd, 0, SEEK_END) >> root.page_order;
- d->page_order = root.page_order;
- d->page_size = 1 << root.page_order;
- d->cache = pgc_open(d->page_size, cache_size);
- d->root_page = pgc_read(d->cache, d->fd, 0);
- d->root = (void *) d->root_page->data;
- }
- else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0)
- {
- struct page *q;
- uns page_order = o->page_order;
- if (page_order < 10)
- page_order = 10;
- d->page_size = 1 << page_order;
- d->cache = pgc_open(d->page_size, cache_size);
- d->root_page = GET_ZERO_PAGE(d, 0);
- r = d->root = (void *) d->root_page->data; /* Build root page */
- r->magic = SDBM_MAGIC;
- r->version = SDBM_VERSION;
- r->page_order = d->page_order = page_order;
- r->key_size = o->key_size;
- r->val_size = o->val_size;
- r->dir_start = 1;
- r->dir_order = 0;
- d->file_size = 3;
- q = GET_ZERO_PAGE(d, 1); /* Build page directory */
- GET32(q->data, 0) = 2;
- pgc_put(d->cache, q);
- q = GET_ZERO_PAGE(d, 2); /* Build single data page */
- pgc_put(d->cache, q);
- }
- else
- goto bad;
- d->dir_size = 1 << d->root->dir_order;
- d->dir_shift = 32 - d->root->dir_order;
- d->page_mask = d->page_size - 1;
- d->key_size = d->root->key_size;
- d->val_size = d->root->val_size;
- return d;
-
-bad:
- sdbm_close(d);
- return NULL;
-}
-
-void
-sdbm_close(struct sdbm *d)
-{
- if (d->root_page)
- pgc_put(d->cache, d->root_page);
- if (d->cache)
- pgc_close(d->cache);
- if (d->fd >= 0)
- close(d->fd);
- xfree(d);
-}
-
-static uns
-sdbm_alloc_pages(struct sdbm *d, uns number)
-{
- uns where = d->file_size;
- if (where + number < where) /* Wrap around? */
- die("SDB: Database file too large, giving up");
- d->file_size += number;
- return where;
-}
-
-static uns
-sdbm_alloc_page(struct sdbm *d)
-{
- uns pos;
-
- if (!d->root->free_pool[0].count)
- return sdbm_alloc_pages(d, 1);
- pos = d->root->free_pool[0].first;
- d->root->free_pool[0].first++;
- if (!--d->root->free_pool[0].count)
- {
- memmove(d->root->free_pool, d->root->free_pool+1, (SDBM_NUM_FREE_PAGE_POOLS-1) * sizeof(d->root->free_pool[0]));
- d->root->free_pool[SDBM_NUM_FREE_PAGE_POOLS-1].count = 0;
- }
- pgc_mark_dirty(d->cache, d->root_page);
- return pos;
-}
-
-static void
-sdbm_free_pages(struct sdbm *d, uns start, uns number)
-{
- uns i = 0;
-
- while (d->root->free_pool[i].count)
- i++;
- ASSERT(i < SDBM_NUM_FREE_PAGE_POOLS);
- d->root->free_pool[i].first = start;
- d->root->free_pool[i].count = number;
- pgc_mark_dirty(d->cache, d->root_page);
-}
-
-u32
-sdbm_hash(byte *key, uns keylen)
-{
- /*
- * This used to be the same hash function as GDBM uses,
- * but it turned out that it tends to give the same results
- * on similar keys. Damn it.
- */
- u32 value = 0x238F13AF * keylen;
- while (keylen--)
- value = 37*value + *key++;
- return (1103515243 * value + 12345);
-}
-
-static int
-sdbm_get_entry(struct sdbm *d, byte *pos, byte **key, uns *keylen, byte **val, uns *vallen)
-{
- byte *p = pos;
-
- if (d->key_size >= 0)
- *keylen = d->key_size;
- else
- {
- *keylen = (p[0] << 8) | p[1];
- p += 2;
- }
- *key = p;
- p += *keylen;
- if (d->val_size >= 0)
- *vallen = d->val_size;
- else
- {
- *vallen = (p[0] << 8) | p[1];
- p += 2;
- }
- *val = p;
- p += *vallen;
- return p - pos;
-}
-
-static int
-sdbm_entry_len(struct sdbm *d, uns keylen, uns vallen)
-{
- uns len = keylen + vallen;
- if (d->key_size < 0)
- len += 2;
- if (d->val_size < 0)
- len += 2;
- return len;
-}
-
-static void
-sdbm_store_entry(struct sdbm *d, byte *pos, byte *key, uns keylen, byte *val, uns vallen)
-{
- if (d->key_size < 0)
- {
- *pos++ = keylen >> 8;
- *pos++ = keylen;
- }
- memmove(pos, key, keylen);
- pos += keylen;
- if (d->val_size < 0)
- {
- *pos++ = vallen >> 8;
- *pos++ = vallen;
- }
- memmove(pos, val, vallen);
-}
-
-static uns
-sdbm_page_rank(struct sdbm *d, uns dirpos)
-{
- struct page *b;
- u32 pg, x;
- uns l, r;
- uns pm = d->page_mask;
-
- b = READ_DIR(d, dirpos & ~pm);
- pg = GET32(b->data, dirpos & pm);
- l = dirpos;
- while ((l & pm) && GET32(b->data, (l - 4) & pm) == pg)
- l -= 4;
- r = dirpos + 4;
- /* We heavily depend on unused directory entries being zero */
- while ((r & pm) && GET32(b->data, r & pm) == pg)
- r += 4;
- pgc_put(d->cache, b);
-
- if (!(l & pm) && !(r & pm))
- {
- /* Note that if it spans page boundary, it must contain an integer number of pages */
- while (l)
- {
- b = READ_DIR(d, (l - 4) & ~pm);
- x = GET32(b->data, 0);
- pgc_put(d->cache, b);
- if (x != pg)
- break;
- l -= d->page_size;
- }
- while (r < 4*d->dir_size)
- {
- b = READ_DIR(d, r & ~pm);
- x = GET32(b->data, 0);
- pgc_put(d->cache, b);
- if (x != pg)
- break;
- r += d->page_size;
- }
- }
- return (r - l) >> 2;
-}
-
-static void
-sdbm_expand_directory(struct sdbm *d)
-{
- struct page *b, *c;
- int i, ent;
- u32 *dir, *t;
-
- if (d->root->dir_order >= 31)
- die("SDB: Database directory too large, giving up");
-
- if (4*d->dir_size < d->page_size)
- {
- /* It still fits within single page */
- b = READ_DIR(d, 0);
- dir = (u32 *) b->data;
- for(i=d->dir_size-1; i>=0; i--)
- dir[2*i] = dir[2*i+1] = dir[i];
- pgc_mark_dirty(d->cache, b);
- pgc_put(d->cache, b);
- }
- else
- {
- uns old_dir = d->root->dir_start;
- uns old_dir_pages = 1 << (d->root->dir_order + 2 - d->page_order);
- uns page, new_dir;
- new_dir = d->root->dir_start = sdbm_alloc_pages(d, 2*old_dir_pages);
- ent = 1 << (d->page_order - 3);
- for(page=0; page < old_dir_pages; page++)
- {
- b = READ_PAGE(d, old_dir + page);
- dir = (u32 *) b->data;
- c = GET_PAGE(d, new_dir + 2*page);
- t = (u32 *) c->data;
- for(i=0; i<ent; i++)
- t[2*i] = t[2*i+1] = dir[i];
- pgc_put(d->cache, c);
- c = GET_PAGE(d, new_dir + 2*page + 1);
- t = (u32 *) c->data;
- for(i=0; i<ent; i++)
- t[2*i] = t[2*i+1] = dir[ent+i];
- pgc_put(d->cache, c);
- pgc_put(d->cache, b);
- }
- if (!(d->flags & SDBM_FAST))
- {
- /*
- * Unless in super-fast mode, fill old directory pages with zeroes.
- * This slows us down a bit, but allows database reconstruction after
- * the free list is lost.
- */
- for(page=0; page < old_dir_pages; page++)
- {
- b = GET_ZERO_PAGE(d, old_dir + page);
- pgc_put(d->cache, b);
- }
- }
- sdbm_free_pages(d, old_dir, old_dir_pages);
- }
-
- d->root->dir_order++;
- d->dir_size = 1 << d->root->dir_order;
- d->dir_shift = 32 - d->root->dir_order;
- pgc_mark_dirty(d->cache, d->root_page);
- if (!(d->flags & SDBM_FAST))
- sdbm_sync(d);
-}
-
-static void
-sdbm_split_data(struct sdbm *d, struct sdbm_bucket *s, struct sdbm_bucket *d0, struct sdbm_bucket *d1, uns sigbit)
-{
- byte *sp = s->data;
- byte *dp[2] = { d0->data, d1->data };
- byte *K, *D;
- uns Kl, Dl, sz, i;
-
- while (sp < s->data + s->used)
- {
- sz = sdbm_get_entry(d, sp, &K, &Kl, &D, &Dl);
- sp += sz;
- i = (sdbm_hash(K, Kl) & (1 << sigbit)) ? 1 : 0;
- sdbm_store_entry(d, dp[i], K, Kl, D, Dl);
- dp[i] += sz;
- }
- d0->used = dp[0] - d0->data;
- d1->used = dp[1] - d1->data;
-}
-
-static void
-sdbm_split_dir(struct sdbm *d, uns dirpos, uns count, uns pos)
-{
- struct page *b;
- uns i;
-
- count *= 4;
- while (count)
- {
- b = READ_DIR(d, dirpos & ~d->page_mask);
- i = d->page_size - (dirpos & d->page_mask);
- if (i > count)
- i = count;
- count -= i;
- while (i)
- {
- GET32(b->data, dirpos & d->page_mask) = pos;
- dirpos += 4;
- i -= 4;
- }
- pgc_mark_dirty(d->cache, b);
- pgc_put(d->cache, b);
- }
-}
-
-static inline uns
-sdbm_dirpos(struct sdbm *d, uns hash)
-{
- if (d->dir_shift != 32) /* avoid shifting by 32 bits */
- return (hash >> d->dir_shift) << 2; /* offset in the directory */
- else
- return 0;
-}
-
-static struct page *
-sdbm_split_page(struct sdbm *d, struct page *b, u32 hash)
-{
- struct page *p[2];
- uns i, rank, sigbit, rank_log, dirpos, newpg;
-
- dirpos = sdbm_dirpos(d, hash);
- rank = sdbm_page_rank(d, dirpos); /* rank = # of pointers to this page */
- if (rank == 1)
- {
- sdbm_expand_directory(d);
- rank = 2;
- dirpos *= 2;
- }
- rank_log = 1; /* rank_log = log2(rank) */
- while ((1U << rank_log) < rank)
- rank_log++;
- sigbit = d->dir_shift + rank_log - 1; /* sigbit = bit we split on */
- p[0] = b;
- newpg = sdbm_alloc_page(d);
- p[1] = GET_PAGE(d, newpg);
- sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit);
- sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, newpg);
- pgc_mark_dirty(d->cache, p[0]);
- i = (hash & (1 << sigbit)) ? 1 : 0;
- pgc_put(d->cache, p[!i]);
- return p[i];
-}
-
-static int
-sdbm_put_user(byte *D, uns Dl, byte *val, uns *vallen)
-{
- if (vallen)
- {
- if (*vallen < Dl)
- return 1;
- *vallen = Dl;
- }
- if (val)
- memcpy(val, D, Dl);
- return 0;
-}
-
-static int
-sdbm_access(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen, uns mode) /* 0=read, 1=store, 2=replace */
-{
- struct page *p, *q;
- u32 hash, h, pos, size;
- struct sdbm_bucket *b;
- byte *c, *e;
- int rc;
-
- if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535)
- return SDBM_ERROR_BAD_KEY_SIZE;
- if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode)
- return SDBM_ERROR_BAD_VAL_SIZE;
- if (!mode && !(d->flags & SDBM_WRITE))
- return SDBM_ERROR_READ_ONLY;
- hash = sdbm_hash(key, keylen);
- h = sdbm_dirpos(d, hash);
- p = READ_DIR(d, h & ~d->page_mask);
- pos = GET32(p->data, h & d->page_mask);
- pgc_put(d->cache, p);
- q = READ_PAGE(d, pos);
- b = (void *) q->data;
- c = b->data;
- e = c + b->used;
- while (c < e)
- {
- byte *K, *D;
- uns Kl, Dl, s;
- s = sdbm_get_entry(d, c, &K, &Kl, &D, &Dl);
- if (Kl == keylen && !memcmp(K, key, Kl))
- {
- /* Gotcha! */
- switch (mode)
- {
- case 0: /* fetch: found */
- rc = sdbm_put_user(D, Dl, val, vallen);
- pgc_put(d->cache, q);
- return rc ? SDBM_ERROR_TOO_LARGE : 1;
- case 1: /* store: already present */
- pgc_put(d->cache, q);
- return 0;
- default: /* replace: delete the old one */
- memmove(c, c+s, e-(c+s));
- b->used -= s;
- goto insert;
- }
- }
- c += s;
- }
- if (!mode || !val) /* fetch or delete: no success */
- {
- pgc_put(d->cache, q);
- return 0;
- }
-
-insert:
- if (val)
- {
- size = sdbm_entry_len(d, keylen, *vallen);
- while (b->used + size > d->page_size - sizeof(struct sdbm_bucket))
- {
- /* Page overflow, need to split */
- if (size >= d->page_size - sizeof(struct sdbm_bucket))
- {
- pgc_put(d->cache, q);
- return SDBM_ERROR_GIANT;
- }
- q = sdbm_split_page(d, q, hash);
- b = (void *) q->data;
- }
- sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen);
- b->used += size;
- }
- pgc_mark_dirty(d->cache, q);
- pgc_put(d->cache, q);
- if (d->flags & SDBM_SYNC)
- sdbm_sync(d);
- return 1;
-}
-
-int
-sdbm_store(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
-{
- return sdbm_access(d, key, keylen, val, &vallen, 1);
-}
-
-int
-sdbm_replace(struct sdbm *d, byte *key, uns keylen, byte *val, uns vallen)
-{
- return sdbm_access(d, key, keylen, val, &vallen, 2);
-}
-
-int
-sdbm_delete(struct sdbm *d, byte *key, uns keylen)
-{
- return sdbm_access(d, key, keylen, NULL, NULL, 2);
-}
-
-int
-sdbm_fetch(struct sdbm *d, byte *key, uns keylen, byte *val, uns *vallen)
-{
- return sdbm_access(d, key, keylen, val, vallen, 0);
-}
-
-void
-sdbm_rewind(struct sdbm *d)
-{
- d->find_page = 1;
- d->find_pos = 0;
- d->find_free_list = 0;
-}
-
-int
-sdbm_get_next(struct sdbm *d, byte *key, uns *keylen, byte *val, uns *vallen)
-{
- uns page = d->find_page;
- uns pos = d->find_pos;
- byte *K, *V;
- uns c, Kl, Vl;
- struct page *p;
- struct sdbm_bucket *b;
-
- for(;;)
- {
- if (!pos)
- {
- if (page >= d->file_size)
- break;
- if (page == d->root->dir_start)
- page += (4*d->dir_size + d->page_size - 1) >> d->page_order;
- else if (page == d->root->free_pool[d->find_free_list].first)
- page += d->root->free_pool[d->find_free_list++].count;
- else
- pos = 4;
- continue;
- }
- p = READ_PAGE(d, page);
- b = (void *) p->data;
- if (pos - 4 >= b->used)
- {
- pos = 0;
- page++;
- pgc_put(d->cache, p);
- continue;
- }
- c = sdbm_get_entry(d, p->data + pos, &K, &Kl, &V, &Vl);
- d->find_page = page;
- d->find_pos = pos + c;
- c = sdbm_put_user(K, Kl, key, keylen) ||
- sdbm_put_user(V, Vl, val, vallen);
- pgc_put(d->cache, p);
- return c ? SDBM_ERROR_TOO_LARGE : 1;
- }
- d->find_page = page;
- d->find_pos = pos;
- return 0;
-}
-
-void
-sdbm_sync(struct sdbm *d)
-{
- pgc_flush(d->cache);
- if (d->flags & SDBM_FSYNC)
- fsync(d->fd);
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Database Management Routines
- *
- * (c) 1999--2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_DB_H
-#define _UCW_DB_H
-
-struct sdbm;
-
-struct sdbm_options { /* Set to 0 for default */
- char *name; /* File name */
- uns flags; /* See SDBM_xxx below */
- uns page_order; /* Binary logarithm of file page size */
- uns cache_size; /* Number of cached pages */
- int key_size; /* Key size, -1=variable */
- int val_size; /* Value size, -1=variable */
-};
-
-struct sdbm *sdbm_open(struct sdbm_options *);
-void sdbm_close(struct sdbm *);
-int sdbm_store(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen);
-int sdbm_replace(struct sdbm *, byte *key, uns keylen, byte *val, uns vallen); /* val == NULL -> delete */
-int sdbm_delete(struct sdbm *, byte *key, uns keylen);
-int sdbm_fetch(struct sdbm *, byte *key, uns keylen, byte *val, uns *vallen); /* val can be NULL */
-void sdbm_rewind(struct sdbm *);
-int sdbm_get_next(struct sdbm *, byte *key, uns *keylen, byte *val, uns *vallen); /* val can be NULL */
-void sdbm_sync(struct sdbm *);
-u32 sdbm_hash(byte *key, uns keylen);
-
-#define SDBM_CREAT 1 /* Create the database if it doesn't exist */
-#define SDBM_WRITE 2 /* Open the database in read/write mode */
-#define SDBM_SYNC 4 /* Sync after each operation */
-#define SDBM_FAST 8 /* Don't sync on directory splits -- results in slightly faster
- * operation, but reconstruction of database after program crash
- * may be impossible.
- */
-#define SDBM_FSYNC 16 /* When syncing, call fsync() */
-
-#define SDBM_ERROR_BAD_KEY_SIZE -1 /* Fixed key size doesn't match */
-#define SDBM_ERROR_BAD_VAL_SIZE -2 /* Fixed value size doesn't match */
-#define SDBM_ERROR_TOO_LARGE -3 /* Key/value doesn't fit in buffer supplied */
-#define SDBM_ERROR_READ_ONLY -4 /* Database has been opened read only */
-#define SDBM_ERROR_GIANT -5 /* Key/value too large to fit in a page */
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Database Management Routines -- Internal Declarations
- *
- * (c) 1999--2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#define SDBM_NUM_FREE_PAGE_POOLS 32
-
-struct sdbm_root { /* Must fit in 1K which is minimum page size */
- u32 magic;
- u32 version;
- u32 page_order; /* Binary logarithm of page size */
- s32 key_size; /* Key/val size, -1=variable */
- s32 val_size;
- u32 dir_start; /* First page of the page directory */
- u32 dir_order; /* Binary logarithm of directory size */
- /*
- * As we know the only thing which can be freed is the page directory
- * and it can grow only a limited number of times, we can use a very
- * simple-minded representation of the free page pool. We also assume
- * these entries are sorted by start position.
- */
- struct {
- u32 first;
- u32 count;
- } free_pool[SDBM_NUM_FREE_PAGE_POOLS];
-};
-
-struct sdbm_bucket {
- u32 used; /* Bytes used in this bucket */
- byte data[0];
-};
-
-struct sdbm {
- struct page_cache *cache;
- int fd;
- struct sdbm_root *root;
- struct page *root_page;
- int key_size; /* Cached values from root page */
- int val_size;
- uns page_order;
- uns page_size;
- uns page_mask; /* page_size - 1 */
- uns dir_size; /* Page directory size in entries */
- uns dir_shift; /* Number of significant bits of hash function */
- uns file_size; /* in pages */
- uns flags;
- uns find_page, find_pos; /* Current pointer for sdbm_find_next() */
- uns find_free_list; /* First free list entry not skipped by sdbm_find_next() */
-};
-
-#define SDBM_MAGIC 0x5344424d
-#define SDBM_VERSION 2
-
-#define GET32(p,o) *((u32 *)((p)+(o)))
+++ /dev/null
-# Configuration variables of the UCW library and their default values
-# (c) 2005--2007 Martin Mares <mj@ucw.cz>
-
-# Version of the whole package
-Set("SHERLOCK_VERSION" => "3.12.3");
-
-# Compile everything with debug information and ASSERT's
-UnSet("CONFIG_DEBUG");
-
-# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages)
-UnSet("CONFIG_EXACT_CPU");
-
-# Support files >2GB
-Set("CONFIG_LARGE_FILES");
-
-# Use shared libraries
-UnSet("CONFIG_SHARED");
-
-# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock
-# use its own regex library (a copy of the glibc one), because the default regex library
-# is likely to be crappy.
-UnSet("CONFIG_OWN_REGEX");
-
-# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy
-# of GNU libc's getopt. This should not be necessary on GNU libc.
-UnSet("CONFIG_OWN_GETOPT");
-
-# Install libraries and their API includes
-UnSet("CONFIG_INSTALL_API");
-
-# Build with support for multi-threaded programs
-Set("CONFIG_UCW_THREADS" => 1);
-
-# Include Perl modules
-Set("CONFIG_UCW_PERL" => 1);
-
-# Include Perl modules written in C
-UnSet("CONFIG_UCW_PERL_MODULES");
-
-# Include support utilities for shell scripts
-Set("CONFIG_UCW_SHELL_UTILS" => 1);
-
-# Default configuration file
-UnSet("DEFAULT_CONFIG");
-
-# Environment variable with configuration file
-UnSet("ENV_VAR_CONFIG");
-
-# Return success
-1;
+++ /dev/null
-/*
- * UCW Library -- Fast Allocator for Fixed-Size Elements
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This allocator is optimized for intensive allocation and freeing of small
- * blocks of identical sizes. System memory is allocated by multiples of the
- * page size and it is returned back only when the whole eltpool is deleted.
- *
- * In the future, we can add returning of memory to the system and also cache
- * coloring like in the SLAB allocator used in the Linux kernel.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/eltpool.h"
-
-struct eltpool *
-ep_new(uns elt_size, uns elts_per_chunk)
-{
- struct eltpool *pool = xmalloc_zero(sizeof(*pool));
- pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN);
- pool->chunk_size = CPU_PAGE_SIZE;
- while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size)
- pool->chunk_size *= 2;
- pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size;
- DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk);
- return pool;
-}
-
-void
-ep_delete(struct eltpool *pool)
-{
- struct eltpool_chunk *ch;
- while (ch = pool->first_chunk)
- {
- pool->first_chunk = ch->next;
- page_free(ch, pool->chunk_size);
- }
- xfree(pool);
-}
-
-void *
-ep_alloc_slow(struct eltpool *pool)
-{
- struct eltpool_chunk *ch = page_alloc(pool->chunk_size);
- void *p = (void *)(ch+1);
- for (uns i=1; i<pool->elts_per_chunk; i++)
- {
- struct eltpool_free *f = p;
- f->next = pool->first_free;
- pool->first_free = f;
- p += pool->elt_size;
- }
- ch->next = pool->first_chunk;
- pool->first_chunk = ch;
- return p;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-#include "lib/clists.h"
-
-struct argh {
- cnode n;
- byte x[1];
-} PACKED;
-
-int main(void)
-{
- struct eltpool *ep = ep_new(sizeof(struct argh), 64);
- clist l;
- clist_init(&l);
- for (uns i=0; i<65536; i++)
- {
- struct argh *a = ep_alloc(ep);
- if (i % 3)
- clist_add_tail(&l, &a->n);
- else
- clist_add_head(&l, &a->n);
- if (!(i % 5))
- {
- a = clist_head(&l);
- clist_remove(&a->n);
- ep_free(ep, a);
- }
- }
- ep_delete(ep);
- puts("OK");
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Allocator for Fixed-Size Elements
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_ELTPOOL_H
-#define _UCW_ELTPOOL_H
-
-struct eltpool {
- struct eltpool_chunk *first_chunk;
- struct eltpool_free *first_free;
- uns elt_size;
- uns chunk_size;
- uns elts_per_chunk;
- uns num_allocated; // Just for debugging
-};
-
-struct eltpool_chunk {
- struct eltpool_chunk *next;
- /* Chunk data continue here */
-};
-
-struct eltpool_free {
- struct eltpool_free *next;
-};
-
-struct eltpool *ep_new(uns elt_size, uns elts_per_chunk);
-void ep_delete(struct eltpool *pool);
-void *ep_alloc_slow(struct eltpool *pool);
-
-static inline void *
-ep_alloc(struct eltpool *pool)
-{
- pool->num_allocated++;
-#ifdef CONFIG_FAKE_ELTPOOL
- return xmalloc(pool->elt_size);
-#else
- struct eltpool_free *elt;
- if (elt = pool->first_free)
- pool->first_free = elt->next;
- else
- elt = ep_alloc_slow(pool);
- return elt;
-#endif
-}
-
-static inline void
-ep_free(struct eltpool *pool, void *p)
-{
- pool->num_allocated--;
-#ifdef CONFIG_FAKE_ELTPOOL
- (void) pool;
- xfree(p);
-#else
- struct eltpool_free *elt = p;
- elt->next = pool->first_free;
- pool->first_free = elt;
-#endif
-}
-
-#endif
+++ /dev/null
-# Tests for eltpools
-
-Run: ../obj/lib/eltpool-t
-Out: OK
+++ /dev/null
-/*
- * UCW Library -- Formatting of Process Exit Status
- *
- * (c) 2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <sys/wait.h>
-#include <errno.h>
-
-int
-format_exit_status(char *msg, int stat)
-{
- if (stat < 0)
- sprintf(msg, "failed to fork (err=%d)", errno);
- else if (WIFEXITED(stat) && WEXITSTATUS(stat) < 256)
- {
- if (WEXITSTATUS(stat))
- sprintf(msg, "died with exit code %d", WEXITSTATUS(stat));
- else
- {
- msg[0] = 0;
- return 0;
- }
- }
- else if (WIFSIGNALED(stat))
- sprintf(msg, "died on signal %d", WTERMSIG(stat));
- else
- sprintf(msg, "died with status %x", stat);
- return 1;
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-void bclose(struct fastbuf *f)
-{
- if (f)
- {
- bflush(f);
- if (f->close)
- f->close(f);
- }
-}
-
-void bflush(struct fastbuf *f)
-{
- if (f->bptr > f->bstop)
- f->spout(f);
- else if (f->bstop > f->buffer)
- f->bptr = f->bstop = f->buffer;
-}
-
-inline void bsetpos(struct fastbuf *f, sh_off_t pos)
-{
- /* We can optimize seeks only when reading */
- if (pos >= f->pos - (f->bstop - f->buffer) && pos <= f->pos)
- f->bptr = f->bstop + (pos - f->pos);
- else
- {
- bflush(f);
- if (!f->seek || !f->seek(f, pos, SEEK_SET))
- die("bsetpos: stream not seekable");
- }
-}
-
-void bseek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- switch (whence)
- {
- case SEEK_SET:
- return bsetpos(f, pos);
- case SEEK_CUR:
- return bsetpos(f, btell(f) + pos);
- case SEEK_END:
- bflush(f);
- if (!f->seek || !f->seek(f, pos, SEEK_END))
- die("bseek: stream not seekable");
- break;
- default:
- die("bseek: invalid whence=%d", whence);
- }
-}
-
-int bgetc_slow(struct fastbuf *f)
-{
- if (f->bptr < f->bstop)
- return *f->bptr++;
- if (!f->refill(f))
- return -1;
- return *f->bptr++;
-}
-
-int bpeekc_slow(struct fastbuf *f)
-{
- if (f->bptr < f->bstop)
- return *f->bptr;
- if (!f->refill(f))
- return -1;
- return *f->bptr;
-}
-
-void bputc_slow(struct fastbuf *f, uns c)
-{
- if (f->bptr >= f->bufend)
- f->spout(f);
- *f->bptr++ = c;
-}
-
-uns bread_slow(struct fastbuf *f, void *b, uns l, uns check)
-{
- uns total = 0;
- while (l)
- {
- uns k = f->bstop - f->bptr;
-
- if (!k)
- {
- f->refill(f);
- k = f->bstop - f->bptr;
- if (!k)
- break;
- }
- if (k > l)
- k = l;
- memcpy(b, f->bptr, k);
- f->bptr += k;
- b = (byte *)b + k;
- l -= k;
- total += k;
- }
- if (check && total && l)
- die("breadb: short read");
- return total;
-}
-
-void bwrite_slow(struct fastbuf *f, const void *b, uns l)
-{
- while (l)
- {
- uns k = f->bufend - f->bptr;
-
- if (!k)
- {
- f->spout(f);
- k = f->bufend - f->bptr;
- }
- if (k > l)
- k = l;
- memcpy(f->bptr, b, k);
- f->bptr += k;
- b = (byte *)b + k;
- l -= k;
- }
-}
-
-void
-bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l)
-{
- while (l)
- {
- byte *fptr, *tptr;
- uns favail, tavail, n;
-
- favail = bdirect_read_prepare(f, &fptr);
- if (!favail)
- {
- if (l == ~0U)
- return;
- die("bbcopy: source exhausted");
- }
- tavail = bdirect_write_prepare(t, &tptr);
- n = MIN(l, favail);
- n = MIN(n, tavail);
- memcpy(tptr, fptr, n);
- bdirect_read_commit(f, fptr + n);
- bdirect_write_commit(t, tptr + n);
- if (l != ~0U)
- l -= n;
- }
-}
-
-int
-bconfig(struct fastbuf *f, uns item, int value)
-{
- return f->config ? f->config(f, item, value) : -1;
-}
-
-void
-brewind(struct fastbuf *f)
-{
- bflush(f);
- bsetpos(f, 0);
-}
-
-int
-bskip_slow(struct fastbuf *f, uns len)
-{
- while (len)
- {
- byte *buf;
- uns l = bdirect_read_prepare(f, &buf);
- if (!l)
- return 0;
- l = MIN(l, len);
- bdirect_read_commit(f, buf+l);
- len -= l;
- }
- return 1;
-}
-
-sh_off_t
-bfilesize(struct fastbuf *f)
-{
- if (!f)
- return 0;
- sh_off_t pos = btell(f);
- bflush(f);
- if (!f->seek(f, 0, SEEK_END))
- return -1;
- sh_off_t len = btell(f);
- bsetpos(f, pos);
- return len;
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_FASTBUF_H
-#define _UCW_FASTBUF_H
-
-#include <string.h>
-#include <alloca.h>
-
-/*
- * Generic buffered I/O. You supply hooks to be called for low-level operations
- * (swapping of buffers, seeking and closing), we do the rest.
- *
- * Buffer layout when reading:
- *
- * +----------------+---------------------------+
- * | read data | free space |
- * +----------------+---------------------------+
- * ^ ^ ^ ^
- * buffer bptr bstop bufend
- *
- * After the last character is read, bptr == bstop and buffer refill
- * is deferred to the next read attempt. This gives us an easy way
- * how to implement bungetc().
- *
- * When writing:
- *
- * +--------+--------------+--------------------+
- * | unused | written data | free space |
- * +--------+--------------+--------------------+
- * ^ ^ ^ ^
- * buffer bstop bptr bufend
- *
- * Dirty tricks:
- *
- * - You can mix reads and writes on the same stream, but you must
- * call bflush() in between and remember that the file position
- * points after the flushed buffer which is not necessarily the same
- * as after the data you've read.
- * - The spout/refill hooks can change not only bptr and bstop, but also
- * the location of the buffer; fb-mem.c takes advantage of it.
- * - In some cases, the user of the bdirect interface can be allowed to modify
- * the data in the buffer to avoid unnecessary copying. If the back-end
- * allows such modifications, it can set can_overwrite_buffer accordingly:
- * * 0 if no modification is allowed,
- * * 1 if the user can modify the buffer on the condition that
- * the modifications will be undone before calling the next
- * fastbuf operation
- * * 2 if the user is allowed to overwrite the data in the buffer
- * if bdirect_read_commit_modified() is called afterwards.
- * In this case, the back-end must be prepared for trimming
- * of the buffer which is done by the commit function.
- */
-
-struct fastbuf {
- byte is_fastbuf[0]; /* Dummy field for checking of type casts */
- byte *bptr, *bstop; /* Access pointers */
- byte *buffer, *bufend; /* Start and end of the buffer */
- char *name; /* File name for error messages */
- sh_off_t pos; /* Position of bstop in the file */
- int (*refill)(struct fastbuf *); /* Get a buffer with new data */
- void (*spout)(struct fastbuf *); /* Write buffer data to the file */
- int (*seek)(struct fastbuf *, sh_off_t, int); /* Slow path for bseek(), buffer already flushed; returns success */
- void (*close)(struct fastbuf *); /* Close the stream */
- int (*config)(struct fastbuf *, uns, int); /* Configure the stream */
- int can_overwrite_buffer; /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */
-};
-
-/* FastIO on files with several configurable back-ends */
-
-enum fb_type { /* Which back-end you want to use */
- FB_STD, /* Standard buffered I/O */
- FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
- FB_MMAP /* Memory mapped files */
-};
-
-struct fb_params {
- enum fb_type type;
- uns buffer_size; /* 0 for default size */
- uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
- uns read_ahead; /* FB_DIRECT options */
- uns write_back;
- struct asio_queue *asio;
-};
-
-struct cf_section;
-extern struct cf_section fbpar_cf;
-extern struct fb_params fbpar_def;
-
-struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); /* Use params==NULL for defaults */
-struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params);
-struct fastbuf *bopen_tmp_file(struct fb_params *params);
-struct fastbuf *bopen_fd(int fd, struct fb_params *params);
-
-/* FastIO on standard files (shortcuts for FB_STD) */
-
-struct fastbuf *bopen(const char *name, uns mode, uns buflen);
-struct fastbuf *bopen_try(const char *name, uns mode, uns buflen);
-struct fastbuf *bopen_tmp(uns buflen);
-struct fastbuf *bfdopen(int fd, uns buflen);
-struct fastbuf *bfdopen_shared(int fd, uns buflen);
-void bfilesync(struct fastbuf *b);
-
-/* Temporary files */
-
-#define TEMP_FILE_NAME_LEN 256
-void temp_file_name(char *name);
-void bfix_tmp_file(struct fastbuf *fb, const char *name);
-
-/* Internal functions of some file back-ends */
-
-struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
-struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
-
-extern uns fbdir_cheat;
-struct asio_queue;
-struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
-
-void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
-
-/* FastIO on in-memory streams */
-
-struct fastbuf *fbmem_create(uns blocksize); /* Create stream and return its writing fastbuf */
-struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */
-
-/* FastI on file descriptors with limit */
-
-struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit);
-
-/* FastIO on static buffers */
-
-void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
-void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
-static inline uns
-fbbuf_count_written(struct fastbuf *f)
-{
- return f->bptr - f->bstop;
-}
-
-/* FastIO on recyclable growing buffers */
-
-struct fastbuf *fbgrow_create(unsigned basic_size);
-void fbgrow_reset(struct fastbuf *b); /* Reset stream and prepare for writing */
-void fbgrow_rewind(struct fastbuf *b); /* Prepare for reading */
-
-/* FastO on memory pools */
-
-struct mempool;
-struct fbpool {
- struct fastbuf fb;
- struct mempool *mp;
-};
-
-void fbpool_init(struct fbpool *fb); /* Initialize a new fastbuf */
-void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
- /* Start a new continuous block and prepare for writing (see mp_start()) */
-void *fbpool_end(struct fbpool *fb); /* Close the block and return its address (see mp_end()).
- The length can be determined with mp_size(mp, ptr). */
-
-/* FastO with atomic writes for multi-threaded programs */
-
-struct fb_atomic {
- struct fastbuf fb;
- struct fb_atomic_file *af;
- byte *expected_max_bptr;
- uns slack_size;
-};
-#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
-
-struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
-void fbatomic_internal_write(struct fastbuf *b);
-
-static inline void
-fbatomic_commit(struct fastbuf *b)
-{
- if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
- fbatomic_internal_write(b);
-}
-
-/* Configuring stream parameters */
-
-enum bconfig_type {
- BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
- BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
-};
-
-int bconfig(struct fastbuf *f, uns type, int data);
-
-/* Universal functions working on all fastbuf's */
-
-void bclose(struct fastbuf *f);
-void bflush(struct fastbuf *f);
-void bseek(struct fastbuf *f, sh_off_t pos, int whence);
-void bsetpos(struct fastbuf *f, sh_off_t pos);
-void brewind(struct fastbuf *f);
-sh_off_t bfilesize(struct fastbuf *f); /* -1 if not seekable */
-
-static inline sh_off_t btell(struct fastbuf *f)
-{
- return f->pos + (f->bptr - f->bstop);
-}
-
-int bgetc_slow(struct fastbuf *f);
-static inline int bgetc(struct fastbuf *f)
-{
- return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
-}
-
-int bpeekc_slow(struct fastbuf *f);
-static inline int bpeekc(struct fastbuf *f)
-{
- return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
-}
-
-static inline void bungetc(struct fastbuf *f)
-{
- f->bptr--;
-}
-
-void bputc_slow(struct fastbuf *f, uns c);
-static inline void bputc(struct fastbuf *f, uns c)
-{
- if (f->bptr < f->bufend)
- *f->bptr++ = c;
- else
- bputc_slow(f, c);
-}
-
-static inline uns
-bavailr(struct fastbuf *f)
-{
- return f->bstop - f->bptr;
-}
-
-static inline uns
-bavailw(struct fastbuf *f)
-{
- return f->bufend - f->bptr;
-}
-
-uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
-static inline uns bread(struct fastbuf *f, void *b, uns l)
-{
- if (bavailr(f) >= l)
- {
- memcpy(b, f->bptr, l);
- f->bptr += l;
- return l;
- }
- else
- return bread_slow(f, b, l, 0);
-}
-
-static inline uns breadb(struct fastbuf *f, void *b, uns l)
-{
- if (bavailr(f) >= l)
- {
- memcpy(b, f->bptr, l);
- f->bptr += l;
- return l;
- }
- else
- return bread_slow(f, b, l, 1);
-}
-
-void bwrite_slow(struct fastbuf *f, const void *b, uns l);
-static inline void bwrite(struct fastbuf *f, const void *b, uns l)
-{
- if (bavailw(f) >= l)
- {
- memcpy(f->bptr, b, l);
- f->bptr += l;
- }
- else
- bwrite_slow(f, b, l);
-}
-
-/*
- * Functions for reading of strings:
- *
- * bgets() reads a line, strip the trailing '\n' and return a pointer
- * to the terminating 0 or NULL on EOF. Dies if the line is too long.
- * bgets0() does the same for 0-terminated strings.
- * bgets_nodie() a variant of bgets() which returns either the length of the
- * string (excluding the terminator) or -1 if the line does not fit
- * in the buffer. In such cases, it returns after reading exactly `l'
- * bytes of input.
- * bgets_bb() a variant of bgets() which allocates the string in a growing buffer
- * bgets_mp() the same, but in a mempool
- * bgets_stk() the same, but on the stack by alloca()
- */
-
-char *bgets(struct fastbuf *f, char *b, uns l);
-char *bgets0(struct fastbuf *f, char *b, uns l);
-int bgets_nodie(struct fastbuf *f, char *b, uns l);
-
-struct mempool;
-struct bb_t;
-uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
-char *bgets_mp(struct fastbuf *f, struct mempool *mp);
-
-struct bgets_stk_struct {
- struct fastbuf *f;
- byte *old_buf, *cur_buf, *src;
- uns old_len, cur_len, src_len;
-};
-void bgets_stk_init(struct bgets_stk_struct *s);
-void bgets_stk_step(struct bgets_stk_struct *s);
-#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
-
-static inline void
-bputs(struct fastbuf *f, const char *b)
-{
- bwrite(f, b, strlen(b));
-}
-
-static inline void
-bputs0(struct fastbuf *f, const char *b)
-{
- bwrite(f, b, strlen(b)+1);
-}
-
-static inline void
-bputsn(struct fastbuf *f, const char *b)
-{
- bputs(f, b);
- bputc(f, '\n');
-}
-
-void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
-static inline void
-bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
-{
- if (bavailr(f) >= l && bavailw(t) >= l)
- {
- memcpy(t->bptr, f->bptr, l);
- t->bptr += l;
- f->bptr += l;
- }
- else
- bbcopy_slow(f, t, l);
-}
-
-int bskip_slow(struct fastbuf *f, uns len);
-static inline int bskip(struct fastbuf *f, uns len)
-{
- if (bavailr(f) >= len)
- {
- f->bptr += len;
- return 1;
- }
- else
- return bskip_slow(f, len);
-}
-
-/* Direct I/O on buffers */
-
-static inline uns
-bdirect_read_prepare(struct fastbuf *f, byte **buf)
-{
- if (f->bptr == f->bstop && !f->refill(f))
- {
- *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings
- return 0;
- }
- *buf = f->bptr;
- return bavailr(f);
-}
-
-static inline void
-bdirect_read_commit(struct fastbuf *f, byte *pos)
-{
- f->bptr = pos;
-}
-
-static inline void
-bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
-{
- f->bptr = pos;
- f->buffer = pos; /* Avoid seeking backwards in the buffer */
-}
-
-static inline uns
-bdirect_write_prepare(struct fastbuf *f, byte **buf)
-{
- if (f->bptr == f->bufend)
- f->spout(f);
- *buf = f->bptr;
- return bavailw(f);
-}
-
-static inline void
-bdirect_write_commit(struct fastbuf *f, byte *pos)
-{
- f->bptr = pos;
-}
-
-/* Formatted output */
-
-int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3);
-int vbprintf(struct fastbuf *b, const char *msg, va_list args);
-
-#endif
+++ /dev/null
-# Tests for fastbufs
-
-Run: ../obj/lib/fb-file-t
-Out: 112
- <hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello>
- 112 116
-
-Run: ../obj/lib/fb-grow-t
-Out: <10><10><0>1234512345<10><9>5<10>
- <10><10><0>1234512345<10><9>5<10>
- <10><10><0>1234512345<10><9>5<10>
- <10><10><0>1234512345<10><9>5<10>
- <10><10><0>1234512345<10><9>5<10>
-
-Run: ../obj/lib/fb-pool-t
+++ /dev/null
-/*
- * UCW Library -- Atomic Buffered Write to Files
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This fastbuf backend is intended for cases where several threads
- * of a single program append records to a single file and while the
- * record can mix in an arbitrary way, the bytes inside a single
- * record must remain uninterrupted.
- *
- * In case of files with fixed record size, we just allocate the
- * buffer to hold a whole number of records and take advantage
- * of the atomicity of the write() system call.
- *
- * With variable-sized records, we need another solution: when
- * writing a record, we keep the fastbuf in a locked state, which
- * prevents buffer flushing (and if the buffer becomes full, we extend it),
- * and we wait for an explicit commit operation which write()s the buffer
- * if the free space in the buffer falls below the expected maximum record
- * length.
- *
- * fbatomic_open() is called with the following parameters:
- * name - name of the file to open
- * master - fbatomic for the master thread or NULL if it's the first open
- * bufsize - initial buffer size
- * record_len - record length for fixed-size records;
- * or -(expected maximum record length) for variable-sized ones.
- */
-
-#define LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-struct fb_atomic_file {
- int fd;
- int use_count;
- int record_len;
- uns locked;
- byte name[1];
-};
-
-void
-fbatomic_internal_write(struct fastbuf *f)
-{
- struct fb_atomic_file *af = FB_ATOMIC(f)->af;
- int size = f->bptr - f->buffer;
- if (size)
- {
- ASSERT(af->record_len < 0 || !(size % af->record_len));
- int res = write(af->fd, f->buffer, size);
- if (res < 0)
- die("Error writing %s: %m", f->name);
- if (res != size)
- die("Unexpected partial write to %s: written only %d bytes of %d", f->name, res, size);
- f->bptr = f->buffer;
- }
-}
-
-static void
-fbatomic_spout(struct fastbuf *f)
-{
- if (f->bptr < f->bufend) /* Explicit flushes should be ignored */
- return;
-
- struct fb_atomic *F = FB_ATOMIC(f);
- if (F->af->locked)
- {
- uns written = f->bptr - f->buffer;
- uns size = f->bufend - f->buffer + F->slack_size;
- F->slack_size *= 2;
- DBG("Reallocating buffer for atomic file %s with slack %d", f->name, F->slack_size);
- f->buffer = xrealloc(f->buffer, size);
- f->bufend = f->buffer + size;
- f->bptr = f->buffer + written;
- F->expected_max_bptr = f->bufend - F->slack_size;
- }
- else
- fbatomic_internal_write(f);
-}
-
-static void
-fbatomic_close(struct fastbuf *f)
-{
- struct fb_atomic_file *af = FB_ATOMIC(f)->af;
- fbatomic_internal_write(f); /* Need to flush explicitly, because the file can be locked */
- if (!--af->use_count)
- {
- close(af->fd);
- xfree(af);
- }
- xfree(f);
-}
-
-struct fastbuf *
-fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len)
-{
- struct fb_atomic *F = xmalloc_zero(sizeof(*F));
- struct fastbuf *f = &F->fb;
- struct fb_atomic_file *af;
- if (master)
- {
- af = FB_ATOMIC(master)->af;
- af->use_count++;
- ASSERT(af->record_len == record_len);
- }
- else
- {
- af = xmalloc_zero(sizeof(*af) + strlen(name));
- if ((af->fd = sh_open(name, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666)) < 0)
- die("Cannot create %s: %m", name);
- af->use_count = 1;
- af->record_len = record_len;
- af->locked = (record_len < 0);
- strcpy(af->name, name);
- }
- F->af = af;
- if (record_len > 0 && bufsize % record_len)
- bufsize += record_len - (bufsize % record_len);
- f->buffer = xmalloc(bufsize);
- f->bufend = f->buffer + bufsize;
- F->slack_size = (record_len < 0) ? -record_len : 0;
- ASSERT(bufsize > F->slack_size);
- F->expected_max_bptr = f->bufend - F->slack_size;
- f->bptr = f->bstop = f->buffer;
- f->name = af->name;
- f->spout = fbatomic_spout;
- f->close = fbatomic_close;
- return f;
-}
-
-#ifdef TEST
-
-int main(int argc UNUSED, char **argv UNUSED)
-{
- struct fastbuf *f, *g;
-
- log(L_INFO, "Testing block writes");
- f = fbatomic_open("test", NULL, 16, 4);
- for (u32 i=0; i<17; i++)
- bwrite(f, &i, 4);
- bclose(f);
-
- log(L_INFO, "Testing interleaved var-size writes");
- f = fbatomic_open("test2", NULL, 23, -5);
- g = fbatomic_open("test2", f, 23, -5);
- for (int i=0; i<100; i++)
- {
- struct fastbuf *x = (i%2) ? g : f;
- bprintf(x, "%c<%d>\n", "fg"[i%2], ((259309*i) % 1000000) >> (i % 8));
- fbatomic_commit(x);
- }
- bclose(f);
- bclose(g);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Static Buffers
- *
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-static int
-fbbuf_refill(struct fastbuf *f UNUSED)
-{
- return 0;
-}
-
-static int
-fbbuf_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- /* Somebody might want to seek to the end of buffer, try to be nice to him. */
- sh_off_t len = f->bufend - f->buffer;
- if (whence == SEEK_END)
- pos += len;
- ASSERT(pos >= 0 && pos <= len);
- f->bptr = f->buffer + pos;
- f->bstop = f->bufend;
- f->pos = len;
- return 1;
-}
-
-void
-fbbuf_init_read(struct fastbuf *f, byte *buf, uns size, uns can_overwrite)
-{
- f->buffer = f->bptr = buf;
- f->bstop = f->bufend = buf + size;
- f->name = "fbbuf-read";
- f->pos = size;
- f->refill = fbbuf_refill;
- f->spout = NULL;
- f->seek = fbbuf_seek;
- f->close = NULL;
- f->config = NULL;
- f->can_overwrite_buffer = can_overwrite;
-}
-
-static void
-fbbuf_spout(struct fastbuf *f UNUSED)
-{
- die("fbbuf: buffer overflow on write");
-}
-
-void
-fbbuf_init_write(struct fastbuf *f, byte *buf, uns size)
-{
- f->buffer = f->bstop = f->bptr = buf;
- f->bufend = buf + size;
- f->name = "fbbuf-write";
- f->pos = size;
- f->refill = NULL;
- f->spout = fbbuf_spout;
- f->seek = NULL;
- f->close = NULL;
- f->config = NULL;
- f->can_overwrite_buffer = 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on O_DIRECT Files
- *
- * (c) 2006--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is a fastbuf backend for fast streaming I/O using O_DIRECT and
- * the asynchronous I/O module. It's designed for use on large files
- * which don't fit in the disk cache.
- *
- * CAVEATS:
- *
- * - All operations with a single fbdirect handle must be done
- * within a single thread, unless you provide a custom I/O queue
- * and take care of locking.
- *
- * FIXME: what if the OS doesn't support O_DIRECT?
- * FIXME: unaligned seeks and partial writes?
- * FIXME: append to unaligned file
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-#include "lib/asio.h"
-#include "lib/conf.h"
-#include "lib/threads.h"
-
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-
-uns fbdir_cheat;
-
-static struct cf_section fbdir_cf = {
- CF_ITEMS {
- CF_UNS("Cheat", &fbdir_cheat),
- CF_END
- }
-};
-
-#define FBDIR_ALIGN 512
-
-enum fbdir_mode { // Current operating mode
- M_NULL,
- M_READ,
- M_WRITE
-};
-
-struct fb_direct {
- struct fastbuf fb;
- int fd; // File descriptor
- int is_temp_file;
- struct asio_queue *io_queue; // I/O queue to use
- struct asio_queue *user_queue; // If io_queue was supplied by the user
- struct asio_request *pending_read;
- struct asio_request *done_read;
- struct asio_request *active_buffer;
- enum fbdir_mode mode;
- byte name[0];
-};
-#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
-
-static void CONSTRUCTOR
-fbdir_global_init(void)
-{
- cf_declare_section("FBDirect", &fbdir_cf, 0);
-}
-
-static void
-fbdir_read_sync(struct fb_direct *F)
-{
- while (F->pending_read)
- {
- struct asio_request *r = asio_wait(F->io_queue);
- ASSERT(r);
- struct fb_direct *G = r->user_data;
- ASSERT(G);
- ASSERT(G->pending_read == r && !G->done_read);
- G->pending_read = NULL;
- G->done_read = r;
- }
-}
-
-static void
-fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
-{
- if (F->mode == mode)
- return;
- DBG("FB-DIRECT: Switching mode to %d", mode);
- switch (F->mode)
- {
- case M_NULL:
- break;
- case M_READ:
- fbdir_read_sync(F); // Wait for read-ahead requests to finish
- if (F->done_read) // Return read-ahead requests if any
- {
- asio_put(F->done_read);
- F->done_read = NULL;
- }
- break;
- case M_WRITE:
- asio_sync(F->io_queue); // Wait for pending writebacks
- break;
- }
- if (F->active_buffer)
- {
- asio_put(F->active_buffer);
- F->active_buffer = NULL;
- }
- F->mode = mode;
-}
-
-static void
-fbdir_submit_read(struct fb_direct *F)
-{
- struct asio_request *r = asio_get(F->io_queue);
- r->fd = F->fd;
- r->op = ASIO_READ;
- r->len = F->io_queue->buffer_size;
- r->user_data = F;
- asio_submit(r);
- F->pending_read = r;
-}
-
-static int
-fbdir_refill(struct fastbuf *f)
-{
- struct fb_direct *F = FB_DIRECT(f);
-
- DBG("FB-DIRECT: Refill");
-
- if (!F->done_read)
- {
- if (!F->pending_read)
- {
- fbdir_change_mode(F, M_READ);
- fbdir_submit_read(F);
- }
- fbdir_read_sync(F);
- ASSERT(F->done_read);
- }
-
- struct asio_request *r = F->done_read;
- F->done_read = NULL;
- if (F->active_buffer)
- asio_put(F->active_buffer);
- F->active_buffer = r;
- if (!r->status)
- return 0;
- if (r->status < 0)
- die("Error reading %s: %s", f->name, strerror(r->returned_errno));
- f->bptr = f->buffer = r->buffer;
- f->bstop = f->bufend = f->buffer + r->status;
- f->pos += r->status;
-
- fbdir_submit_read(F); // Read-ahead the next block
-
- return r->status;
-}
-
-static void
-fbdir_spout(struct fastbuf *f)
-{
- struct fb_direct *F = FB_DIRECT(f);
- struct asio_request *r;
-
- DBG("FB-DIRECT: Spout");
-
- fbdir_change_mode(F, M_WRITE);
- r = F->active_buffer;
- if (r && f->bptr > f->bstop)
- {
- r->op = ASIO_WRITE_BACK;
- r->fd = F->fd;
- r->len = f->bptr - f->bstop;
- ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
- f->pos += r->len;
- if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes
- {
- r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
- asio_submit(r);
- asio_sync(F->io_queue);
- DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos);
- if (sh_ftruncate(F->fd, f->pos) < 0)
- die("Error truncating %s: %m", f->name);
- }
- else
- asio_submit(r);
- r = NULL;
- }
- if (!r)
- r = asio_get(F->io_queue);
- f->bstop = f->bptr = f->buffer = r->buffer;
- f->bufend = f->buffer + F->io_queue->buffer_size;
- F->active_buffer = r;
-}
-
-static int
-fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence);
-
- if (whence == SEEK_SET && pos == f->pos)
- return 1;
-
- fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish
- sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence);
- if (l < 0)
- return 0;
- f->pos = l;
- return 1;
-}
-
-static struct asio_queue *
-fbdir_get_io_queue(uns buffer_size, uns write_back)
-{
- struct ucwlib_context *ctx = ucwlib_thread_context();
- struct asio_queue *q = ctx->io_queue;
- if (!q)
- {
- q = xmalloc_zero(sizeof(struct asio_queue));
- q->buffer_size = buffer_size;
- q->max_writebacks = write_back;
- asio_init_queue(q);
- ctx->io_queue = q;
- }
- q->use_count++;
- DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
- return q;
-}
-
-static void
-fbdir_put_io_queue(void)
-{
- struct ucwlib_context *ctx = ucwlib_thread_context();
- struct asio_queue *q = ctx->io_queue;
- ASSERT(q);
- DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
- if (!--q->use_count)
- {
- asio_cleanup_queue(q);
- xfree(q);
- ctx->io_queue = NULL;
- }
-}
-
-static void
-fbdir_close(struct fastbuf *f)
-{
- struct fb_direct *F = FB_DIRECT(f);
-
- DBG("FB-DIRECT: Close");
-
- fbdir_change_mode(F, M_NULL);
- if (!F->user_queue)
- fbdir_put_io_queue();
-
- bclose_file_helper(f, F->fd, F->is_temp_file);
- xfree(f);
-}
-
-static int
-fbdir_config(struct fastbuf *f, uns item, int value)
-{
- int orig;
-
- switch (item)
- {
- case BCONFIG_IS_TEMP_FILE:
- orig = FB_DIRECT(f)->is_temp_file;
- FB_DIRECT(f)->is_temp_file = value;
- return orig;
- default:
- return -1;
- }
-}
-
-struct fastbuf *
-fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back)
-{
- int namelen = strlen(name) + 1;
- struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
- struct fastbuf *f = &F->fb;
-
- DBG("FB-DIRECT: Open");
- bzero(F, sizeof(*F));
- f->name = F->name;
- memcpy(f->name, name, namelen);
- F->fd = fd;
- if (q)
- F->io_queue = F->user_queue = q;
- else
- F->io_queue = fbdir_get_io_queue(buffer_size, write_back);
- f->refill = fbdir_refill;
- f->spout = fbdir_spout;
- f->seek = fbdir_seek;
- f->close = fbdir_close;
- f->config = fbdir_config;
- f->can_overwrite_buffer = 2;
- return f;
-}
-
-#ifdef TEST
-
-#include "lib/getopt.h"
-
-int main(int argc, char **argv)
-{
- struct fb_params par = { .type = FB_DIRECT };
- struct fastbuf *f, *t;
-
- log_init(NULL);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
- die("Hey, whaddya want?");
- f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par);
- t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par);
-
- bbcopy(f, t, ~0U);
- ASSERT(btell(f) == btell(t));
-
-#if 0 // This triggers unaligned write
- bflush(t);
- bputc(t, '\n');
-#endif
-
- brewind(t);
- bgetc(t);
- ASSERT(btell(t) == 1);
-
- bclose(f);
- bclose(t);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Files
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-struct fb_file {
- struct fastbuf fb;
- int fd; /* File descriptor */
- int is_temp_file;
- int keep_back_buf; /* Optimize for backwards reading */
- sh_off_t wpos; /* Real file position */
- uns wlen; /* Window size */
-};
-#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf)
-#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1)
-
-static int
-bfd_refill(struct fastbuf *f)
-{
- struct fb_file *F = FB_FILE(f);
- byte *read_ptr = (f->buffer = FB_BUFFER(f));
- uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen;
- /* Forward or no seek */
- if (F->wpos <= f->pos)
- {
- sh_off_t diff = f->pos - F->wpos;
- /* Formula for long forward seeks (prefer lseek()) */
- if (diff > ((sh_off_t)blen << 2))
- {
-long_seek:
- f->bptr = f->buffer + back;
- f->bstop = f->buffer + blen;
- goto seek;
- }
- /* Short forward seek (prefer read() to skip data )*/
- else if ((uns)diff >= back)
- {
- uns skip = diff - back;
- F->wpos += skip;
- while (skip)
- {
- int l = read(F->fd, f->buffer, MIN(skip, blen));
- if (unlikely(l <= 0))
- if (l < 0)
- die("Error reading %s: %m", f->name);
- else
- {
- F->wpos -= skip;
- goto eof;
- }
- skip -= l;
- }
- }
- /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */
- else
- {
- uns keep = back - (uns)diff;
- if (keep >= F->wlen)
- back = diff + (keep = F->wlen);
- else
- memmove(f->buffer, f->buffer + F->wlen - keep, keep);
- read_len -= keep;
- read_ptr += keep;
- }
- f->bptr = f->buffer + back;
- f->bstop = f->buffer + blen;
- }
- /* Backwards seek */
- else
- {
- sh_off_t diff = F->wpos - f->pos;
- /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */
- if (diff > ((sh_off_t)blen << 1))
- {
- if ((sh_off_t)back > f->pos)
- back = f->pos;
- goto long_seek;
- }
- /* Seek into previous window (do nothing... for example brewind) */
- else if ((uns)diff <= F->wlen)
- {
- f->bstop = f->buffer + F->wlen;
- f->bptr = f->bstop - diff;
- f->pos = F->wpos;
- return 1;
- }
- back *= 3;
- if ((sh_off_t)back > f->pos)
- back = f->pos;
- f->bptr = f->buffer + back;
- read_len = blen;
- f->bstop = f->buffer + read_len;
- /* Reuse part of previous window */
- if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen)
- {
- uns keep = read_len + F->wlen - back - diff;
- memmove(f->buffer + read_len - keep, f->buffer, keep);
- }
-seek:
- /* Do lseek() */
- F->wpos = f->pos + (f->buffer - f->bptr);
- if (sh_seek(F->fd, F->wpos, SEEK_SET) < 0)
- die("Error seeking %s: %m", f->name);
- }
- /* Read (part of) buffer */
- do
- {
- int l = read(F->fd, read_ptr, read_len);
- if (unlikely(l < 0))
- die("Error reading %s: %m", f->name);
- if (!l)
- if (unlikely(read_ptr < f->bptr))
- goto eof;
- else
- break; /* Incomplete read because of EOF */
- read_ptr += l;
- read_len -= l;
- F->wpos += l;
- }
- while (read_ptr <= f->bptr);
- if (read_len)
- f->bstop = read_ptr;
- f->pos += f->bstop - f->bptr;
- F->wlen = f->bstop - f->buffer;
- return f->bstop - f->bptr;
-eof:
- /* Seeked behind EOF */
- f->bptr = f->bstop = f->buffer;
- F->wlen = 0;
- return 0;
-}
-
-static void
-bfd_spout(struct fastbuf *f)
-{
- /* Do delayed lseek() if needed */
- if (FB_FILE(f)->wpos != f->pos && sh_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0)
- die("Error seeking %s: %m", f->name);
-
- int l = f->bptr - f->buffer;
- byte *c = f->buffer;
-
- /* Write the buffer */
- FB_FILE(f)->wpos = (f->pos += l);
- FB_FILE(f)->wlen = 0;
- while (l)
- {
- int z = write(FB_FILE(f)->fd, c, l);
- if (z <= 0)
- die("Error writing %s: %m", f->name);
- l -= z;
- c += z;
- }
- f->bptr = f->buffer = FB_BUFFER(f);
-}
-
-static int
-bfd_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */
- sh_off_t l;
- switch (whence)
- {
- case SEEK_SET:
- f->pos = pos;
- return 1;
- case SEEK_CUR:
- l = f->pos + pos;
- if ((pos > 0) ^ (l > f->pos))
- return 0;
- f->pos = l;
- return 1;
- case SEEK_END:
- l = sh_seek(FB_FILE(f)->fd, pos, SEEK_END);
- if (l < 0)
- return 0;
- FB_FILE(f)->wpos = f->pos = l;
- FB_FILE(f)->wlen = 0;
- return 1;
- default:
- ASSERT(0);
- }
-}
-
-static void
-bfd_close(struct fastbuf *f)
-{
- bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file);
- xfree(f);
-}
-
-static int
-bfd_config(struct fastbuf *f, uns item, int value)
-{
- int orig;
-
- switch (item)
- {
- case BCONFIG_IS_TEMP_FILE:
- orig = FB_FILE(f)->is_temp_file;
- FB_FILE(f)->is_temp_file = value;
- return orig;
- case BCONFIG_KEEP_BACK_BUF:
- orig = FB_FILE(f)->keep_back_buf;
- FB_FILE(f)->keep_back_buf = value;
- return orig;
- default:
- return -1;
- }
-}
-
-struct fastbuf *
-bfdopen_internal(int fd, const char *name, uns buflen)
-{
- ASSERT(buflen);
- int namelen = strlen(name) + 1;
- struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen);
- struct fastbuf *f = &F->fb;
-
- bzero(F, sizeof(*F));
- f->buffer = (byte *)(F+1);
- f->bptr = f->bstop = f->buffer;
- f->bufend = f->buffer + buflen;
- f->name = f->bufend;
- memcpy(f->name, name, namelen);
- F->fd = fd;
- f->refill = bfd_refill;
- f->spout = bfd_spout;
- f->seek = bfd_seek;
- f->close = bfd_close;
- f->config = bfd_config;
- f->can_overwrite_buffer = 2;
- return f;
-}
-
-void
-bfilesync(struct fastbuf *b)
-{
- bflush(b);
- if (fsync(FB_FILE(b)->fd) < 0)
- msg(L_ERROR, "fsync(%s) failed: %m", b->name);
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct fastbuf *f, *t;
- f = bopen_tmp(16);
- t = bfdopen_shared(1, 13);
- for (uns i = 0; i < 16; i++)
- bwrite(f, "<hello>", 7);
- bprintf(t, "%d\n", (int)btell(f));
- brewind(f);
- bbcopy(f, t, ~0U);
- bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t));
- bclose(f);
- bclose(t);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Growing Buffers
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-struct fb_gbuf {
- struct fastbuf fb;
- byte *last_written;
-};
-#define FB_GBUF(f) ((struct fb_gbuf *)(f)->is_fastbuf)
-
-static int
-fbgrow_refill(struct fastbuf *b)
-{
- if (b->bstop != FB_GBUF(b)->last_written)
- {
- /* There was an intervening flush */
- b->bstop = FB_GBUF(b)->last_written;
- b->pos = b->bstop - b->buffer;
- return 1;
- }
- /* We are at the end */
- return 0;
-}
-
-static void
-fbgrow_spout(struct fastbuf *b)
-{
- if (b->bptr >= b->bufend)
- {
- uns len = b->bufend - b->buffer;
- b->buffer = xrealloc(b->buffer, 2*len);
- b->bufend = b->buffer + 2*len;
- b->bstop = b->buffer;
- b->bptr = b->buffer + len;
- }
-}
-
-static int
-fbgrow_seek(struct fastbuf *b, sh_off_t pos, int whence)
-{
- ASSERT(FB_GBUF(b)->last_written); /* Seeks allowed only in read mode */
- sh_off_t len = FB_GBUF(b)->last_written - b->buffer;
- if (whence == SEEK_END)
- pos += len;
- ASSERT(pos >= 0 && pos <= len);
- b->bptr = b->buffer + pos;
- b->bstop = FB_GBUF(b)->last_written;
- b->pos = len;
- return 1;
-}
-
-static void
-fbgrow_close(struct fastbuf *b)
-{
- xfree(b->buffer);
- xfree(b);
-}
-
-struct fastbuf *
-fbgrow_create(unsigned basic_size)
-{
- struct fastbuf *b = xmalloc_zero(sizeof(struct fb_gbuf));
- b->buffer = xmalloc(basic_size);
- b->bufend = b->buffer + basic_size;
- b->bptr = b->bstop = b->buffer;
- b->name = "<fbgbuf>";
- b->refill = fbgrow_refill;
- b->spout = fbgrow_spout;
- b->seek = fbgrow_seek;
- b->close = fbgrow_close;
- b->can_overwrite_buffer = 1;
- return b;
-}
-
-void
-fbgrow_reset(struct fastbuf *b)
-{
- b->bptr = b->bstop = b->buffer;
- b->pos = 0;
- FB_GBUF(b)->last_written = NULL;
-}
-
-void
-fbgrow_rewind(struct fastbuf *b)
-{
- if (!FB_GBUF(b)->last_written)
- {
- /* Last operation was a write, so remember the end position */
- FB_GBUF(b)->last_written = b->bptr;
- }
- b->bptr = b->buffer;
- b->bstop = FB_GBUF(b)->last_written;
- b->pos = b->bstop - b->buffer;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct fastbuf *f;
- uns t;
-
- f = fbgrow_create(3);
- for (uns i=0; i<5; i++)
- {
- fbgrow_reset(f);
- bwrite(f, "12345", 5);
- bwrite(f, "12345", 5);
- printf("<%d>", (int)btell(f));
- bflush(f);
- printf("<%d>", (int)btell(f));
- fbgrow_rewind(f);
- printf("<%d>", (int)btell(f));
- while ((t = bgetc(f)) != ~0U)
- putchar(t);
- printf("<%d>", (int)btell(f));
- fbgrow_rewind(f);
- bseek(f, -1, SEEK_END);
- printf("<%d>", (int)btell(f));
- while ((t = bgetc(f)) != ~0U)
- putchar(t);
- printf("<%d>\n", (int)btell(f));
- }
- bclose(f);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered Input on Limited File Descriptors
- *
- * (c) 2003--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <unistd.h>
-
-struct fb_limfd {
- struct fastbuf fb;
- int fd; /* File descriptor */
- int limit;
-};
-#define FB_LIMFD(f) ((struct fb_limfd *)(f)->is_fastbuf)
-#define FB_BUFFER(f) (byte *)(FB_LIMFD(f) + 1)
-
-static int
-bfl_refill(struct fastbuf *f)
-{
- f->bptr = f->buffer = FB_BUFFER(f);
- int max = MIN(FB_LIMFD(f)->limit - f->pos, f->bufend - f->buffer);
- int l = read(FB_LIMFD(f)->fd, f->buffer, max);
- if (l < 0)
- die("Error reading %s: %m", f->name);
- f->bstop = f->buffer + l;
- f->pos += l;
- return l;
-}
-
-static void
-bfl_close(struct fastbuf *f)
-{
- xfree(f);
-}
-
-struct fastbuf *
-bopen_limited_fd(int fd, uns buflen, uns limit)
-{
- struct fb_limfd *F = xmalloc(sizeof(struct fb_limfd) + buflen);
- struct fastbuf *f = &F->fb;
-
- bzero(F, sizeof(*F));
- f->buffer = (char *)(F+1);
- f->bptr = f->bstop = f->buffer;
- f->bufend = f->buffer + buflen;
- f->name = "limited-fd";
- F->fd = fd;
- F->limit = limit;
- f->refill = bfl_refill;
- f->close = bfl_close;
- f->can_overwrite_buffer = 2;
- return f;
-}
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- struct fastbuf *f = bopen_limited_fd(0, 3, 13);
- struct fastbuf *o = bfdopen_shared(1, 16);
- int c;
- while ((c = bgetc(f)) >= 0)
- bputc(o, c);
- bclose(o);
- bclose(f);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Memory Streams
- *
- * (c) 1997--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-
-struct memstream {
- unsigned blocksize;
- unsigned uc;
- struct msblock *first;
-};
-
-struct msblock {
- struct msblock *next;
- sh_off_t pos;
- unsigned size;
- byte data[0];
-};
-
-struct fb_mem {
- struct fastbuf fb;
- struct memstream *stream;
- struct msblock *block;
-};
-#define FB_MEM(f) ((struct fb_mem *)(f)->is_fastbuf)
-
-static int
-fbmem_refill(struct fastbuf *f)
-{
- struct memstream *s = FB_MEM(f)->stream;
- struct msblock *b = FB_MEM(f)->block;
-
- if (!b)
- {
- b = s->first;
- if (!b)
- return 0;
- }
- else if (f->buffer == b->data && f->bstop < b->data + b->size)
- {
- f->bstop = b->data + b->size;
- f->pos = b->pos + b->size;
- return 1;
- }
- else if (!b->next)
- return 0;
- else
- b = b->next;
- if (!b->size)
- return 0;
- f->buffer = f->bptr = b->data;
- f->bufend = f->bstop = b->data + b->size;
- f->pos = b->pos + b->size;
- FB_MEM(f)->block = b;
- return 1;
-}
-
-static void
-fbmem_spout(struct fastbuf *f)
-{
- struct memstream *s = FB_MEM(f)->stream;
- struct msblock *b = FB_MEM(f)->block;
- struct msblock *bb;
-
- if (b)
- {
- b->size = f->bptr - b->data;
- if (b->size < s->blocksize)
- return;
- }
- bb = xmalloc(sizeof(struct msblock) + s->blocksize);
- if (b)
- {
- b->next = bb;
- bb->pos = b->pos + b->size;
- }
- else
- {
- s->first = bb;
- bb->pos = 0;
- }
- bb->next = NULL;
- bb->size = 0;
- f->buffer = f->bptr = f->bstop = bb->data;
- f->bufend = bb->data + s->blocksize;
- f->pos = bb->pos;
- FB_MEM(f)->block = bb;
-}
-
-static int
-fbmem_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- struct memstream *m = FB_MEM(f)->stream;
- struct msblock *b;
-
- ASSERT(whence == SEEK_SET || whence == SEEK_END);
- if (whence == SEEK_END)
- {
- for (b=m->first; b; b=b->next)
- pos += b->size;
- }
- /* Yes, this is linear. But considering the average number of buckets, it doesn't matter. */
- for (b=m->first; b; b=b->next)
- {
- if (pos <= b->pos + (sh_off_t)b->size) /* <=, because we need to be able to seek just after file end */
- {
- f->buffer = b->data;
- f->bptr = b->data + (pos - b->pos);
- f->bufend = f->bstop = b->data + b->size;
- f->pos = b->pos + b->size;
- FB_MEM(f)->block = b;
- return 1;
- }
- }
- if (!m->first && !pos)
- {
- /* Seeking to offset 0 in an empty file needs an exception */
- f->buffer = f->bptr = f->bufend = NULL;
- f->pos = 0;
- FB_MEM(f)->block = NULL;
- return 1;
- }
- die("fbmem_seek to invalid offset");
-}
-
-static void
-fbmem_close(struct fastbuf *f)
-{
- struct memstream *m = FB_MEM(f)->stream;
- struct msblock *b;
-
- if (!--m->uc)
- {
- while (b = m->first)
- {
- m->first = b->next;
- xfree(b);
- }
- xfree(m);
- }
- xfree(f);
-}
-
-struct fastbuf *
-fbmem_create(unsigned blocksize)
-{
- struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
- struct memstream *s = xmalloc_zero(sizeof(struct memstream));
-
- s->blocksize = blocksize;
- s->uc = 1;
-
- FB_MEM(f)->stream = s;
- f->name = "<fbmem-write>";
- f->spout = fbmem_spout;
- f->close = fbmem_close;
- return f;
-}
-
-struct fastbuf *
-fbmem_clone_read(struct fastbuf *b)
-{
- struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
- struct memstream *s = FB_MEM(b)->stream;
-
- bflush(b);
- s->uc++;
-
- FB_MEM(f)->stream = s;
- f->name = "<fbmem-read>";
- f->refill = fbmem_refill;
- f->seek = fbmem_seek;
- f->close = fbmem_close;
- f->can_overwrite_buffer = 1;
- return f;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct fastbuf *w, *r;
- int t;
-
- w = fbmem_create(7);
- r = fbmem_clone_read(w);
- bwrite(w, "12345", 5);
- bwrite(w, "12345", 5);
- printf("<%d>", (int)btell(w));
- bflush(w);
- printf("<%d>", (int)btell(w));
- printf("<%d>", (int)btell(r));
- while ((t = bgetc(r)) >= 0)
- putchar(t);
- printf("<%d>", (int)btell(r));
- bwrite(w, "12345", 5);
- bwrite(w, "12345", 5);
- printf("<%d>", (int)btell(w));
- bclose(w);
- bsetpos(r, 0);
- printf("<!%d>", (int)btell(r));
- while ((t = bgetc(r)) >= 0)
- putchar(t);
- bsetpos(r, 3);
- printf("<!%d>", (int)btell(r));
- while ((t = bgetc(r)) >= 0)
- putchar(t);
- fflush(stdout);
- bclose(r);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Memory-Mapped Files
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/lfs.h"
-#include "lib/conf.h"
-
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-static uns mmap_window_size = 16*CPU_PAGE_SIZE;
-static uns mmap_extend_size = 4*CPU_PAGE_SIZE;
-
-static struct cf_section fbmm_config = {
- CF_ITEMS {
- CF_UNS("WindowSize", &mmap_window_size),
- CF_UNS("ExtendSize", &mmap_extend_size),
- CF_END
- }
-};
-
-static void CONSTRUCTOR fbmm_init_config(void)
-{
- cf_declare_section("FBMMap", &fbmm_config, 0);
-}
-
-struct fb_mmap {
- struct fastbuf fb;
- int fd;
- int is_temp_file;
- sh_off_t file_size;
- sh_off_t file_extend;
- sh_off_t window_pos;
- uns window_size;
- int mode;
-};
-#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf)
-
-static void
-bfmm_map_window(struct fastbuf *f)
-{
- struct fb_mmap *F = FB_MMAP(f);
- sh_off_t pos0 = f->pos & ~(sh_off_t)(CPU_PAGE_SIZE-1);
- int l = MIN((sh_off_t)mmap_window_size, F->file_extend - pos0);
- uns ll = ALIGN_TO(l, CPU_PAGE_SIZE);
- int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE);
-
- DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend);
- if (ll != F->window_size && f->buffer)
- {
- munmap(f->buffer, F->window_size);
- f->buffer = NULL;
- }
- F->window_size = ll;
- if (!f->buffer)
- f->buffer = sh_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0);
- else
- f->buffer = sh_mmap(f->buffer, ll, prot, MAP_SHARED | MAP_FIXED, F->fd, pos0);
- if (f->buffer == (byte *) MAP_FAILED)
- die("mmap(%s): %m", f->name);
-#ifdef MADV_SEQUENTIAL
- if (ll > CPU_PAGE_SIZE)
- madvise(f->buffer, ll, MADV_SEQUENTIAL);
-#endif
- f->bufend = f->buffer + l;
- f->bptr = f->buffer + (f->pos - pos0);
- F->window_pos = pos0;
-}
-
-static int
-bfmm_refill(struct fastbuf *f)
-{
- struct fb_mmap *F = FB_MMAP(f);
-
- DBG("Refill <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
- if (f->pos >= F->file_size)
- return 0;
- if (f->bstop >= f->bufend)
- bfmm_map_window(f);
- if (F->window_pos + (f->bufend - f->buffer) > F->file_size)
- f->bstop = f->buffer + (F->file_size - F->window_pos);
- else
- f->bstop = f->bufend;
- f->pos = F->window_pos + (f->bstop - f->buffer);
- DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
- return 1;
-}
-
-static void
-bfmm_spout(struct fastbuf *f)
-{
- struct fb_mmap *F = FB_MMAP(f);
- sh_off_t end = f->pos + (f->bptr - f->bstop);
-
- DBG("Spout <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
- if (end > F->file_size)
- F->file_size = end;
- if (f->bptr < f->bufend)
- return;
- f->pos = end;
- if (f->pos >= F->file_extend)
- {
- F->file_extend = ALIGN_TO(F->file_extend + mmap_extend_size, (sh_off_t)CPU_PAGE_SIZE);
- if (sh_ftruncate(F->fd, F->file_extend))
- die("ftruncate(%s): %m", f->name);
- }
- bfmm_map_window(f);
- f->bstop = f->bptr;
- DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
-}
-
-static int
-bfmm_seek(struct fastbuf *f, sh_off_t pos, int whence)
-{
- if (whence == SEEK_END)
- pos += FB_MMAP(f)->file_size;
- else
- ASSERT(whence == SEEK_SET);
- ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size);
- f->pos = pos;
- f->bptr = f->bstop = f->bufend = f->buffer; /* force refill/spout call */
- DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
- return 1;
-}
-
-static void
-bfmm_close(struct fastbuf *f)
-{
- struct fb_mmap *F = FB_MMAP(f);
-
- if (f->buffer)
- munmap(f->buffer, F->window_size);
- if (F->file_extend > F->file_size &&
- sh_ftruncate(F->fd, F->file_size))
- die("ftruncate(%s): %m", f->name);
- bclose_file_helper(f, F->fd, F->is_temp_file);
- xfree(f);
-}
-
-static int
-bfmm_config(struct fastbuf *f, uns item, int value)
-{
- int orig;
-
- switch (item)
- {
- case BCONFIG_IS_TEMP_FILE:
- orig = FB_MMAP(f)->is_temp_file;
- FB_MMAP(f)->is_temp_file = value;
- return orig;
- default:
- return -1;
- }
-}
-
-struct fastbuf *
-bfmmopen_internal(int fd, const char *name, uns mode)
-{
- int namelen = strlen(name) + 1;
- struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen);
- struct fastbuf *f = &F->fb;
-
- bzero(F, sizeof(*F));
- f->name = (byte *)(F+1);
- memcpy(f->name, name, namelen);
- F->fd = fd;
- F->file_extend = F->file_size = sh_seek(fd, 0, SEEK_END);
- if (F->file_size < 0)
- die("seek(%s): %m", name);
- if (mode & O_APPEND)
- f->pos = F->file_size;
- F->mode = mode;
-
- f->refill = bfmm_refill;
- f->spout = bfmm_spout;
- f->seek = bfmm_seek;
- f->close = bfmm_close;
- f->config = bfmm_config;
- return f;
-}
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- struct fb_params par = { .type = FB_MMAP };
- struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par);
- struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par);
- int c;
-
- DBG("Copying");
- while ((c = bgetc(f)) >= 0)
- bputc(g, c);
- bclose(f);
- DBG("Seek inside last block");
- bsetpos(g, btell(g)-1333);
- bputc(g, 13);
- DBG("Seek to the beginning & write");
- bsetpos(g, 1333);
- bputc(g, 13);
- DBG("flush");
- bflush(g);
- bputc(g, 13);
- bflush(g);
- DBG("Seek nearby & read");
- bsetpos(g, 133);
- bgetc(g);
- DBG("Seek far & read");
- bsetpos(g, 133333);
- bgetc(g);
- DBG("Closing");
- bclose(g);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- FastIO on files with run-time parametrization
- *
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/lfs.h"
-#include "lib/fastbuf.h"
-
-#include <fcntl.h>
-#include <stdio.h>
-
-struct fb_params fbpar_def = {
- .buffer_size = 65536,
- .read_ahead = 1,
- .write_back = 1,
-};
-
-static char *
-fbpar_cf_commit(struct fb_params *p UNUSED)
-{
-#ifndef CONFIG_UCW_THREADS
- if (p->type == FB_DIRECT)
- return "Direct I/O is supported only with CONFIG_UCW_THREADS";
-#endif
- return NULL;
-}
-
-struct cf_section fbpar_cf = {
-# define F(x) PTR_TO(struct fb_params, x)
- CF_TYPE(struct fb_params),
- CF_COMMIT(fbpar_cf_commit),
- CF_ITEMS {
- CF_LOOKUP("Type", (int *)F(type), ((char *[]){"std", "direct", "mmap", NULL})),
- CF_UNS("BufSize", F(buffer_size)),
- CF_UNS("KeepBackBuf", F(keep_back_buf)),
- CF_UNS("ReadAhead", F(read_ahead)),
- CF_UNS("WriteBack", F(write_back)),
- CF_END
- }
-# undef F
-};
-
-static struct cf_section fbpar_global_cf = {
- CF_ITEMS {
- CF_SECTION("Defaults", &fbpar_def, &fbpar_cf),
- CF_END
- }
-};
-
-static void CONSTRUCTOR
-fbpar_global_init(void)
-{
- cf_declare_section("FBParam", &fbpar_global_cf, 0);
-}
-
-static struct fastbuf *
-bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name)
-{
- char buf[32];
- if (!name)
- {
- sprintf(buf, "fd%d", fd);
- name = buf;
- }
- struct fastbuf *fb;
- switch (params->type)
- {
-#ifdef CONFIG_UCW_THREADS
- case FB_DIRECT:
- fb = fbdir_open_fd_internal(fd, name, params->asio,
- params->buffer_size ? : fbpar_def.buffer_size,
- params->read_ahead ? : fbpar_def.read_ahead,
- params->write_back ? : fbpar_def.write_back);
- if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0)
- msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
- return fb;
-#endif
- case FB_STD:
- fb = bfdopen_internal(fd, name,
- params->buffer_size ? : fbpar_def.buffer_size);
- if (params->keep_back_buf)
- bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1);
- return fb;
- case FB_MMAP:
- if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0)
- die("Cannot get flags of fd %d: %m", fd);
- return bfmmopen_internal(fd, name, mode);
- default:
- ASSERT(0);
- }
-}
-
-static struct fastbuf *
-bopen_file_internal(const char *name, int mode, struct fb_params *params, int try)
-{
- if (!params)
- params = &fbpar_def;
-#ifdef CONFIG_UCW_THREADS
- if (params->type == FB_DIRECT && !fbdir_cheat)
- mode |= O_DIRECT;
-#endif
- if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY)
- mode = (mode & ~O_ACCMODE) | O_RDWR;
- int fd = sh_open(name, mode, 0666);
- if (fd < 0)
- if (try)
- return NULL;
- else
- die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name);
- struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name);
- ASSERT(fb);
- if (mode & O_APPEND)
- bseek(fb, 0, SEEK_END);
- return fb;
-}
-
-struct fastbuf *
-bopen_file(const char *name, int mode, struct fb_params *params)
-{
- return bopen_file_internal(name, mode, params, 0);
-}
-
-struct fastbuf *
-bopen_file_try(const char *name, int mode, struct fb_params *params)
-{
- return bopen_file_internal(name, mode, params, 1);
-}
-
-struct fastbuf *
-bopen_fd(int fd, struct fb_params *params)
-{
- return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL);
-}
-
-/* Function for use by individual file back-ends */
-
-void
-bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file)
-{
- switch (is_temp_file)
- {
- case 1:
- if (unlink(f->name) < 0)
- msg(L_ERROR, "unlink(%s): %m", f->name);
- case 0:
- if (close(fd))
- die("close(%s): %m", f->name);
- }
-}
-
-/* Compatibility wrappers */
-
-struct fastbuf *
-bopen_try(const char *name, uns mode, uns buflen)
-{
- return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
-}
-
-struct fastbuf *
-bopen(const char *name, uns mode, uns buflen)
-{
- return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
-}
-
-struct fastbuf *
-bfdopen(int fd, uns buflen)
-{
- return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
-}
-
-struct fastbuf *
-bfdopen_shared(int fd, uns buflen)
-{
- struct fastbuf *f = bfdopen(fd, buflen);
- bconfig(f, BCONFIG_IS_TEMP_FILE, 2);
- return f;
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Memory Pools
- *
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf)
-
-static void
-fbpool_spout(struct fastbuf *b)
-{
- if (b->bptr >= b->bufend)
- {
- uns len = b->bufend - b->buffer;
- b->buffer = mp_expand(FB_POOL(b)->mp);
- b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp);
- b->bstop = b->buffer;
- b->bptr = b->buffer + len;
- }
-}
-
-void
-fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size)
-{
- b->mp = mp;
- b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size);
- b->fb.bufend = b->fb.buffer + mp_avail(mp);
-}
-
-void *
-fbpool_end(struct fbpool *b)
-{
- return mp_end(b->mp, b->fb.bptr);
-}
-
-void
-fbpool_init(struct fbpool *b)
-{
- bzero(b, sizeof(*b));
- b->fb.name = "<fbpool>";
- b->fb.spout = fbpool_spout;
- b->fb.can_overwrite_buffer = 1;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct mempool *mp;
- struct fbpool fb;
- byte *p;
- uns l;
-
- mp = mp_new(64);
- fbpool_init(&fb);
- fbpool_start(&fb, mp, 16);
- for (uns i = 0; i < 1024; i++)
- bprintf(&fb.fb, "<hello>");
- p = fbpool_end(&fb);
- l = mp_size(mp, p);
- if (l != 1024 * 7)
- ASSERT(0);
- for (uns i = 0; i < 1024; i++)
- if (memcmp(p + i * 7, "<hello>", 7))
- ASSERT(0);
- mp_delete(mp);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Temporary Fastbufs
- *
- * (c) 2002--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/fastbuf.h"
-#include "lib/threads.h"
-
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/fcntl.h>
-
-static char *temp_prefix = "/tmp/temp";
-
-static struct cf_section temp_config = {
- CF_ITEMS {
- CF_STRING("Prefix", &temp_prefix),
- CF_END
- }
-};
-
-static void CONSTRUCTOR temp_global_init(void)
-{
- cf_declare_section("Tempfiles", &temp_config, 0);
-}
-
-void
-temp_file_name(char *buf)
-{
- struct ucwlib_context *ctx = ucwlib_thread_context();
- int cnt = ++ctx->temp_counter;
- int pid = getpid();
- if (ctx->thread_id == pid)
- sprintf(buf, "%s%d-%d", temp_prefix, pid, cnt);
- else
- sprintf(buf, "%s%d-%d-%d", temp_prefix, pid, ctx->thread_id, cnt);
-}
-
-struct fastbuf *
-bopen_tmp_file(struct fb_params *params)
-{
- char name[TEMP_FILE_NAME_LEN];
- temp_file_name(name);
- struct fastbuf *fb = bopen_file(name, O_RDWR | O_CREAT | O_TRUNC, params);
- bconfig(fb, BCONFIG_IS_TEMP_FILE, 1);
- return fb;
-}
-
-struct fastbuf *
-bopen_tmp(uns buflen)
-{
- return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
-}
-
-void bfix_tmp_file(struct fastbuf *fb, const char *name)
-{
- int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0);
- ASSERT(was_temp == 1);
- if (rename(fb->name, name))
- die("Cannot rename %s to %s: %m", fb->name, name);
- bclose(fb);
-}
-
-#ifdef TEST
-
-#include "lib/getopt.h"
-
-int main(int argc, char **argv)
-{
- log_init(NULL);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
- die("Hey, whaddya want?");
-
- struct fastbuf *f = bopen_tmp(65536);
- bputsn(f, "Hello, world!");
- bclose(f);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O: Binary Numbers
- *
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-binary.h"
-
-#define GEN(type, name, size, endian) \
-type bget##name##_##endian##_slow(struct fastbuf *f) \
-{ \
- byte buf[size/8]; \
- if (bread(f, buf, sizeof(buf)) != sizeof(buf)) \
- return ~(type)0; \
- return get_u##size##_##endian(buf); \
-} \
-void bput##name##_##endian##_##slow(struct fastbuf *f, type x) \
-{ \
- byte buf[size/8]; \
- put_u##size##_##endian(buf, x); \
- bwrite_slow(f, buf, sizeof(buf)); \
-}
-
-#define FF_ALL(type, name, size) GEN(type,name,size,be) GEN(type,name,size,le)
-
-FF_ALL(int, w, 16)
-FF_ALL(uns, l, 32)
-FF_ALL(u64, q, 64)
-FF_ALL(u64, 5, 40)
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O on Binary Values
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_FF_BINARY_H
-#define _UCW_FF_BINARY_H
-
-#include "lib/fastbuf.h"
-#include "lib/unaligned.h"
-
-#ifdef CPU_BIG_ENDIAN
-#define FF_ENDIAN be
-#else
-#define FF_ENDIAN le
-#endif
-
-#define GET_FUNC(type, name, bits, endian) \
- type bget##name##_##endian##_slow(struct fastbuf *f); \
- static inline type bget##name##_##endian(struct fastbuf *f) \
- { \
- if (bavailr(f) >= bits/8) \
- { \
- type w = get_u##bits##_##endian(f->bptr); \
- f->bptr += bits/8; \
- return w; \
- } \
- else \
- return bget##name##_##endian##_slow(f); \
- }
-
-#define PUT_FUNC(type, name, bits, endian) \
- void bput##name##_##endian##_slow(struct fastbuf *f, type x); \
- static inline void bput##name##_##endian(struct fastbuf *f, type x) \
- { \
- if (bavailw(f) >= bits/8) \
- { \
- put_u##bits##_##endian(f->bptr, x); \
- f->bptr += bits/8; \
- } \
- else \
- return bput##name##_##endian##_slow(f, x); \
- }
-
-#define FF_ALL_X(type, name, bits, defendian) \
- GET_FUNC(type, name, bits, be) \
- GET_FUNC(type, name, bits, le) \
- PUT_FUNC(type, name, bits, be) \
- PUT_FUNC(type, name, bits, le) \
- static inline type bget##name(struct fastbuf *f) { return bget##name##_##defendian(f); } \
- static inline void bput##name(struct fastbuf *f, type x) { bput##name##_##defendian(f, x); }
-
-#define FF_ALL(type, name, bits, defendian) FF_ALL_X(type, name, bits, defendian)
-
-FF_ALL(int, w, 16, FF_ENDIAN)
-FF_ALL(uns, l, 32, FF_ENDIAN)
-FF_ALL(u64, q, 64, FF_ENDIAN)
-FF_ALL(u64, 5, 40, FF_ENDIAN)
-
-#undef GET_FUNC
-#undef PUT_FUNC
-#undef FF_ENDIAN
-#undef FF_ALL_X
-#undef FF_ALL
-
-/* I/O on uintptr_t (only native endianity) */
-
-#ifdef CPU_64BIT_POINTERS
-#define bputa(x,p) bputq(x,p)
-#define bgeta(x) bgetq(x)
-#else
-#define bputa(x,p) bputl(x,p)
-#define bgeta(x) bgetl(x)
-#endif
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Printf on Fastbuf Streams
- *
- * (c) 2002--2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdio.h>
-#include <alloca.h>
-
-int
-vbprintf(struct fastbuf *b, const char *msg, va_list args)
-{
- byte *buf;
- int len, r;
- va_list args2;
-
- len = bdirect_write_prepare(b, &buf);
- if (len >= 16)
- {
- va_copy(args2, args);
- r = vsnprintf(buf, len, msg, args2);
- va_end(args2);
- if (r < 0)
- len = 256;
- else if (r < len)
- {
- bdirect_write_commit(b, buf+r);
- return r;
- }
- else
- len = r+1;
- }
- else
- len = 256;
-
- while (1)
- {
- buf = alloca(len);
- va_copy(args2, args);
- r = vsnprintf(buf, len, msg, args2);
- va_end(args2);
- if (r < 0)
- len += len;
- else if (r < len)
- {
- bwrite(b, buf, r);
- return r;
- }
- else
- len = r+1;
- }
-}
-
-int
-bprintf(struct fastbuf *b, const char *msg, ...)
-{
- va_list args;
- int res;
-
- va_start(args, msg);
- res = vbprintf(b, msg, args);
- va_end(args);
- return res;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct fastbuf *b = bfdopen_shared(1, 65536);
- for (int i=0; i<10000; i++)
- bprintf(b, "13=%d str=<%s> msg=%m\n", 13, "str");
- bclose(b);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Buffered I/O: Strings
- *
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/mempool.h"
-#include "lib/bbuf.h"
-
-char * /* Non-standard */
-bgets(struct fastbuf *f, char *b, uns l)
-{
- ASSERT(l);
- byte *src;
- uns src_len = bdirect_read_prepare(f, &src);
- if (!src_len)
- return NULL;
- do
- {
- uns cnt = MIN(l, src_len);
- for (uns i = cnt; i--;)
- {
- byte v = *src++;
- if (v == '\n')
- {
- bdirect_read_commit(f, src);
- goto exit;
- }
- *b++ = v;
- }
- if (unlikely(cnt == l))
- die("%s: Line too long", f->name);
- l -= cnt;
- bdirect_read_commit(f, src);
- src_len = bdirect_read_prepare(f, &src);
- }
- while (src_len);
-exit:
- *b = 0;
- return b;
-}
-
-int
-bgets_nodie(struct fastbuf *f, char *b, uns l)
-{
- ASSERT(l);
- byte *src, *start = b;
- uns src_len = bdirect_read_prepare(f, &src);
- if (!src_len)
- return 0;
- do
- {
- uns cnt = MIN(l, src_len);
- for (uns i = cnt; i--;)
- {
- byte v = *src++;
- if (v == '\n')
- {
- bdirect_read_commit(f, src);
- goto exit;
- }
- *b++ = v;
- }
- bdirect_read_commit(f, src);
- if (cnt == l)
- return -1;
- l -= cnt;
- src_len = bdirect_read_prepare(f, &src);
- }
- while (src_len);
-exit:
- *b++ = 0;
- return b - (char *)start;
-}
-
-uns
-bgets_bb(struct fastbuf *f, struct bb_t *bb, uns limit)
-{
- ASSERT(limit);
- byte *src;
- uns src_len = bdirect_read_prepare(f, &src);
- if (!src_len)
- return 0;
- bb_grow(bb, 1);
- byte *buf = bb->ptr;
- uns len = 0, buf_len = MIN(bb->len, limit);
- do
- {
- uns cnt = MIN(src_len, buf_len);
- for (uns i = cnt; i--;)
- {
- byte v = *src++;
- if (v == '\n')
- {
- bdirect_read_commit(f, src);
- goto exit;
- }
- *buf++ = v;
- }
- len += cnt;
- if (cnt == src_len)
- {
- bdirect_read_commit(f, src);
- src_len = bdirect_read_prepare(f, &src);
- }
- else
- src_len -= cnt;
- if (cnt == buf_len)
- {
- if (unlikely(len == limit))
- die("%s: Line too long", f->name);
- bb_do_grow(bb, len + 1);
- buf = bb->ptr + len;
- buf_len = MIN(bb->len, limit) - len;
- }
- else
- buf_len -= cnt;
- }
- while (src_len);
-exit:
- *buf++ = 0;
- return buf - bb->ptr;
-}
-
-char *
-bgets_mp(struct fastbuf *f, struct mempool *mp)
-{
- byte *src;
- uns src_len = bdirect_read_prepare(f, &src);
- if (!src_len)
- return NULL;
-#define BLOCK_SIZE (4096 - sizeof(void *))
- struct block {
- struct block *prev;
- byte data[BLOCK_SIZE];
- } *blocks = NULL;
- uns sum = 0, buf_len = BLOCK_SIZE, cnt;
- struct block first_block, *new_block = &first_block;
- byte *buf = new_block->data;
- do
- {
- cnt = MIN(src_len, buf_len);
- for (uns i = cnt; i--;)
- {
- byte v = *src++;
- if (v == '\n')
- {
- bdirect_read_commit(f, src);
- goto exit;
- }
- *buf++ = v;
- }
- if (cnt == src_len)
- {
- bdirect_read_commit(f, src);
- src_len = bdirect_read_prepare(f, &src);
- }
- else
- src_len -= cnt;
- if (cnt == buf_len)
- {
- new_block->prev = blocks;
- blocks = new_block;
- sum += buf_len = BLOCK_SIZE;
- new_block = alloca(sizeof(struct block));
- buf = new_block->data;
- }
- else
- buf_len -= cnt;
- }
- while (src_len);
-exit: ;
- uns len = buf - new_block->data;
- byte *result = mp_alloc(mp, sum + len + 1) + sum;
- result[len] = 0;
- memcpy(result, new_block->data, len);
- while (blocks)
- {
- result -= BLOCK_SIZE;
- memcpy(result, blocks->data, BLOCK_SIZE);
- blocks = blocks->prev;
- }
- return result;
-#undef BLOCK_SIZE
-}
-
-void
-bgets_stk_init(struct bgets_stk_struct *s)
-{
- s->src_len = bdirect_read_prepare(s->f, &s->src);
- if (!s->src_len)
- {
- s->cur_buf = NULL;
- s->cur_len = 0;
- }
- else
- {
- s->old_buf = NULL;
- s->cur_len = 256;
- }
-}
-
-void
-bgets_stk_step(struct bgets_stk_struct *s)
-{
- byte *buf = s->cur_buf;
- uns buf_len = s->cur_len;
- if (s->old_buf)
- {
- memcpy( s->cur_buf, s->old_buf, s->old_len);
- buf += s->old_len;
- buf_len -= s->old_len;
- }
- do
- {
- uns cnt = MIN(s->src_len, buf_len);
- for (uns i = cnt; i--;)
- {
- byte v = *s->src++;
- if (v == '\n')
- {
- bdirect_read_commit(s->f, s->src);
- goto exit;
- }
- *buf++ = v;
- }
- if (cnt == s->src_len)
- {
- bdirect_read_commit(s->f, s->src);
- s->src_len = bdirect_read_prepare(s->f, &s->src);
- }
- else
- s->src_len -= cnt;
- if (cnt == buf_len)
- {
- s->old_len = s->cur_len;
- s->old_buf = s->cur_buf;
- s->cur_len *= 2;
- return;
- }
- else
- buf_len -= cnt;
- }
- while (s->src_len);
-exit:
- *buf = 0;
- s->cur_len = 0;
-}
-
-char *
-bgets0(struct fastbuf *f, char *b, uns l)
-{
- ASSERT(l);
- byte *src;
- uns src_len = bdirect_read_prepare(f, &src);
- if (!src_len)
- return NULL;
- do
- {
- uns cnt = MIN(l, src_len);
- for (uns i = cnt; i--;)
- {
- *b = *src++;
- if (!*b)
- {
- bdirect_read_commit(f, src);
- return b;
- }
- b++;
- }
- if (unlikely(cnt == l))
- die("%s: Line too long", f->name);
- l -= cnt;
- bdirect_read_commit(f, src);
- src_len = bdirect_read_prepare(f, &src);
- }
- while (src_len);
- *b = 0;
- return b;
-}
+++ /dev/null
-/*
- * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/unicode.h"
-#include "lib/ff-unicode.h"
-#include "lib/ff-binary.h"
-
-/*** UTF-8 ***/
-
-int
-bget_utf8_slow(struct fastbuf *b, uns repl)
-{
- int c = bgetc(b);
- int code;
-
- if (c < 0x80) /* Includes EOF */
- return c;
- if (c < 0xc0) /* Incorrect combination */
- return repl;
- if (c >= 0xf0) /* Too large, skip it */
- {
- while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
- ;
- goto wrong;
- }
- if (c >= 0xe0) /* 3 bytes */
- {
- code = c & 0x0f;
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- }
- else /* 2 bytes */
- {
- code = c & 0x1f;
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- }
- return code;
-
- wrong:
- if (c >= 0)
- bungetc(b);
- return repl;
-}
-
-int
-bget_utf8_32_slow(struct fastbuf *b, uns repl)
-{
- int c = bgetc(b);
- int code;
- int nr;
-
- if (c < 0x80) /* Includes EOF */
- return c;
- if (c < 0xc0) /* Incorrect combination */
- return repl;
- if (c < 0xe0)
- {
- code = c & 0x1f;
- nr = 1;
- }
- else if (c < 0xf0)
- {
- code = c & 0x0f;
- nr = 2;
- }
- else if (c < 0xf8)
- {
- code = c & 0x07;
- nr = 3;
- }
- else if (c < 0xfc)
- {
- code = c & 0x03;
- nr = 4;
- }
- else if (c < 0xfe)
- {
- code = c & 0x01;
- nr = 5;
- }
- else /* Too large, skip it */
- {
- while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
- ;
- goto wrong;
- }
- while (nr-- > 0)
- {
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- }
- return code;
-
- wrong:
- if (c >= 0)
- bungetc(b);
- return repl;
-}
-
-void
-bput_utf8_slow(struct fastbuf *b, uns u)
-{
- ASSERT(u < 65536);
- if (u < 0x80)
- bputc(b, u);
- else
- {
- if (u < 0x800)
- bputc(b, 0xc0 | (u >> 6));
- else
- {
- bputc(b, 0xe0 | (u >> 12));
- bputc(b, 0x80 | ((u >> 6) & 0x3f));
- }
- bputc(b, 0x80 | (u & 0x3f));
- }
-}
-
-void
-bput_utf8_32_slow(struct fastbuf *b, uns u)
-{
- ASSERT(u < (1U<<31));
- if (u < 0x80)
- bputc(b, u);
- else
- {
- if (u < 0x800)
- bputc(b, 0xc0 | (u >> 6));
- else
- {
- if (u < (1<<16))
- bputc(b, 0xe0 | (u >> 12));
- else
- {
- if (u < (1<<21))
- bputc(b, 0xf0 | (u >> 18));
- else
- {
- if (u < (1<<26))
- bputc(b, 0xf8 | (u >> 24));
- else
- {
- bputc(b, 0xfc | (u >> 30));
- bputc(b, 0x80 | ((u >> 24) & 0x3f));
- }
- bputc(b, 0x80 | ((u >> 18) & 0x3f));
- }
- bputc(b, 0x80 | ((u >> 12) & 0x3f));
- }
- bputc(b, 0x80 | ((u >> 6) & 0x3f));
- }
- bputc(b, 0x80 | (u & 0x3f));
- }
-}
-
-/*** UTF-16 ***/
-
-int
-bget_utf16_be_slow(struct fastbuf *b, uns repl)
-{
- if (bpeekc(b) < 0)
- return -1;
- uns u = bgetw_be(b), x, y;
- if ((int)u < 0)
- return repl;
- if ((x = u - 0xd800) >= 0x800)
- return u;
- if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
- return repl;
- return 0x10000 + (x << 10) + y;
-}
-
-int
-bget_utf16_le_slow(struct fastbuf *b, uns repl)
-{
- if (bpeekc(b) < 0)
- return -1;
- uns u = bgetw_le(b), x, y;
- if ((int)u < 0)
- return repl;
- if ((x = u - 0xd800) >= 0x800)
- return u;
- if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
- return repl;
- return 0x10000 + (x << 10) + y;
-}
-
-void
-bput_utf16_be_slow(struct fastbuf *b, uns u)
-{
- if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
- {
- bputc(b, u >> 8);
- bputc(b, u & 0xff);
- }
- else if ((u -= 0x10000) < 0x100000)
- {
- bputc(b, 0xd8 | (u >> 18));
- bputc(b, (u >> 10) & 0xff);
- bputc(b, 0xdc | ((u >> 8) & 0x3));
- bputc(b, u & 0xff);
- }
- else
- ASSERT(0);
-}
-
-void
-bput_utf16_le_slow(struct fastbuf *b, uns u)
-{
- if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
- {
- bputc(b, u & 0xff);
- bputc(b, u >> 8);
- }
- else if ((u -= 0x10000) < 0x100000)
- {
- bputc(b, (u >> 10) & 0xff);
- bputc(b, 0xd8 | (u >> 18));
- bputc(b, u & 0xff);
- bputc(b, 0xdc | ((u >> 8) & 0x3));
- }
- else
- ASSERT(0);
-}
-
-#ifdef TEST
-
-#include <stdlib.h>
-#include <stdio.h>
-
-int main(int argc, char **argv)
-{
-#define FUNCS \
- F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
- F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
-
- enum {
-#define F(x) FUNC_##x,
- FUNCS
-#undef F
- };
- char *names[] = {
-#define F(x) [FUNC_##x] = #x,
- FUNCS
-#undef F
- };
-
- uns func = ~0U;
- if (argc > 1)
- for (uns i = 0; i < ARRAY_SIZE(names); i++)
- if (!strcasecmp(names[i], argv[1]))
- func = i;
- if (!~func)
- {
- fprintf(stderr, "Invalid usage!\n");
- return 1;
- }
-
- struct fastbuf *b = fbgrow_create(8);
- if (func < FUNC_BPUT_UTF8)
- {
- uns u;
- while (scanf("%x", &u) == 1)
- bputc(b, u);
- fbgrow_rewind(b);
- while (bpeekc(b) >= 0)
- {
- if (btell(b))
- putchar(' ');
- switch (func)
- {
- case FUNC_BGET_UTF8:
- u = bget_utf8_slow(b, UNI_REPLACEMENT);
- break;
- case FUNC_BGET_UTF8_32:
- u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
- break;
- case FUNC_BGET_UTF16_BE:
- u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
- break;
- case FUNC_BGET_UTF16_LE:
- u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
- break;
- default:
- ASSERT(0);
- }
- printf("%04x", u);
- }
- putchar('\n');
- }
- else
- {
- uns u, i = 0;
- while (scanf("%x", &u) == 1)
- {
- switch (func)
- {
- case FUNC_BPUT_UTF8:
- bput_utf8_slow(b, u);
- break;
- case FUNC_BPUT_UTF8_32:
- bput_utf8_32_slow(b, u);
- break;
- case FUNC_BPUT_UTF16_BE:
- bput_utf16_be_slow(b, u);
- break;
- case FUNC_BPUT_UTF16_LE:
- bput_utf16_le_slow(b, u);
- break;
- default:
- ASSERT(0);
- }
- fbgrow_rewind(b);
- u = 0;
- while (bpeekc(b) >= 0)
- {
- if (i++)
- putchar(' ');
- printf("%02x", bgetc(b));
- }
- fbgrow_reset(b);
- }
- putchar('\n');
- }
- bclose(b);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library: Reading and writing of UTF-8 and UTF-16 on Fastbuf Streams
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_FF_UNICODE_H
-#define _UCW_FF_UNICODE_H
-
-#include "lib/fastbuf.h"
-#include "lib/unicode.h"
-
-/*** UTF-8 ***/
-
-int bget_utf8_slow(struct fastbuf *b, uns repl);
-int bget_utf8_32_slow(struct fastbuf *b, uns repl);
-void bput_utf8_slow(struct fastbuf *b, uns u);
-void bput_utf8_32_slow(struct fastbuf *b, uns u);
-
-static inline int
-bget_utf8_repl(struct fastbuf *b, uns repl)
-{
- uns u;
- if (bavailr(b) >= 3)
- {
- b->bptr = utf8_get_repl(b->bptr, &u, repl);
- return u;
- }
- else
- return bget_utf8_slow(b, repl);
-}
-
-static inline int
-bget_utf8_32_repl(struct fastbuf *b, uns repl)
-{
- uns u;
- if (bavailr(b) >= 6)
- {
- b->bptr = utf8_32_get_repl(b->bptr, &u, repl);
- return u;
- }
- else
- return bget_utf8_32_slow(b, repl);
-}
-
-static inline int
-bget_utf8(struct fastbuf *b)
-{
- return bget_utf8_repl(b, UNI_REPLACEMENT);
-}
-
-static inline int
-bget_utf8_32(struct fastbuf *b)
-{
- return bget_utf8_32_repl(b, UNI_REPLACEMENT);
-}
-
-static inline void
-bput_utf8(struct fastbuf *b, uns u)
-{
- if (bavailw(b) >= 3)
- b->bptr = utf8_put(b->bptr, u);
- else
- bput_utf8_slow(b, u);
-}
-
-static inline void
-bput_utf8_32(struct fastbuf *b, uns u)
-{
- if (bavailw(b) >= 6)
- b->bptr = utf8_32_put(b->bptr, u);
- else
- bput_utf8_32_slow(b, u);
-}
-
-/*** UTF-16 ***/
-
-int bget_utf16_be_slow(struct fastbuf *b, uns repl);
-int bget_utf16_le_slow(struct fastbuf *b, uns repl);
-void bput_utf16_be_slow(struct fastbuf *b, uns u);
-void bput_utf16_le_slow(struct fastbuf *b, uns u);
-
-static inline int
-bget_utf16_be_repl(struct fastbuf *b, uns repl)
-{
- uns u;
- if (bavailr(b) >= 4)
- {
- b->bptr = utf16_be_get_repl(b->bptr, &u, repl);
- return u;
- }
- else
- return bget_utf16_be_slow(b, repl);
-}
-
-static inline int
-bget_utf16_le_repl(struct fastbuf *b, uns repl)
-{
- uns u;
- if (bavailr(b) >= 4)
- {
- b->bptr = utf16_le_get_repl(b->bptr, &u, repl);
- return u;
- }
- else
- return bget_utf16_le_slow(b, repl);
-}
-
-static inline int
-bget_utf16_be(struct fastbuf *b)
-{
- return bget_utf16_be_repl(b, UNI_REPLACEMENT);
-}
-
-static inline int
-bget_utf16_le(struct fastbuf *b)
-{
- return bget_utf16_le_repl(b, UNI_REPLACEMENT);
-}
-
-static inline void
-bput_utf16_be(struct fastbuf *b, uns u)
-{
- if (bavailw(b) >= 4)
- b->bptr = utf16_be_put(b->bptr, u);
- else
- bput_utf16_be_slow(b, u);
-}
-
-static inline void
-bput_utf16_lbe(struct fastbuf *b, uns u)
-{
- if (bavailw(b) >= 4)
- b->bptr = utf16_le_put(b->bptr, u);
- else
- bput_utf16_le_slow(b, u);
-}
-
-#endif
+++ /dev/null
-# Tests for the Unicode module
-
-Name: bput_utf8
-Run: ../obj/lib/ff-unicode-t bput_utf8
-In: 0041 0048 004f 004a
-Out: 41 48 4f 4a
-
-Name: bget_utf8_32
-Run: ../obj/lib/ff-unicode-t bget_utf8_32
-In: fe 83 81
-Out: fffc
-
-Name: bput_utf16_be
-Run: ../obj/lib/ff-unicode-t bput_utf16_be
-In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
-
-Name: bput_utf16_le
-Run: ../obj/lib/ff-unicode-t bput_utf16_le
-In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
-
-Name: bget_utf16_be (1)
-Run: ../obj/lib/ff-unicode-t bget_utf16_be
-In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
-Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-
-Name: bget_utf16_be (2)
-Run: ../obj/lib/ff-unicode-t bget_utf16_be
-In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
-Out: fffc 2a5f fffc 2a5f fffc
-
-Name: bget_utf16_le (1)
-Run: ../obj/lib/ff-unicode-t bget_utf16_le
-In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
-Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-
-Name: bget_utf16_le (2)
-Run: ../obj/lib/ff-unicode-t bget_utf16_le
-In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
-Out: fffc 2a5f fffc 2a5f fffc
+++ /dev/null
-/*
- * UCW Library: An alias for lib/ff-unicode.h (for backwards compatibility)
- *
- * (c) 2008 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_FF_UTF8_H
-#define _UCW_FF_UTF8_H
-
-#include "lib/ff-unicode.h"
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- A simple growing buffer
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- * (c) 2005, Martin Mares <mj@ucw.cz>
- *
- * Define the following macros:
- *
- * GBUF_TYPE data type of records stored in the buffer
- * GBUF_PREFIX(x) add a name prefix to all global symbols
- * GBUF_TRACE(msg...) log growing of buffer [optional]
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#define BUF_T GBUF_PREFIX(t)
-
-typedef struct BUF_T
-{
- uns len;
- GBUF_TYPE *ptr;
-}
-BUF_T;
-
-static inline void
-GBUF_PREFIX(init)(BUF_T *b)
-{
- b->ptr = NULL;
- b->len = 0;
-}
-
-static void UNUSED
-GBUF_PREFIX(done)(BUF_T *b)
-{
- if (b->ptr)
- xfree(b->ptr);
- b->ptr = NULL;
- b->len = 0;
-}
-
-static void UNUSED
-GBUF_PREFIX(set_size)(BUF_T *b, uns len)
-{
- b->len = len;
- b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE));
-#ifdef GBUF_TRACE
- GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len);
-#endif
-}
-
-static void UNUSED
-GBUF_PREFIX(do_grow)(BUF_T *b, uns len)
-{
- if (len < 2*b->len) // to ensure logarithmic cost
- len = 2*b->len;
- GBUF_PREFIX(set_size)(b, len);
-}
-
-static inline GBUF_TYPE *
-GBUF_PREFIX(grow)(BUF_T *b, uns len)
-{
- if (unlikely(len > b->len))
- GBUF_PREFIX(do_grow)(b, len);
- return b->ptr;
-}
-
-#undef GBUF_TYPE
-#undef GBUF_PREFIX
-#undef GBUF_TRACE
-#undef BUF_T
+++ /dev/null
-#include "lib/lib.h"
-#include "lib/getopt.h"
-
-void
-reset_getopt(void)
-{
- // Should work on GNU libc
- optind = 0;
-}
-
-#ifdef TEST
-#include <stdio.h>
-
-static void
-parse(int argc, char **argv)
-{
- static struct option longopts[] = {
- { "longa", 0, 0, 'a' },
- { "longb", 0, 0, 'b' },
- { "longc", 1, 0, 'c' },
- { "longd", 1, 0, 'd' },
- { 0, 0, 0, 0 }
- };
- int opt;
- while ((opt = getopt_long(argc, argv, "abc:d:", longopts, NULL)) >= 0)
- switch (opt)
- {
- case 'a':
- case 'b':
- printf("option %c\n", opt);
- break;
- case 'c':
- case 'd':
- printf("option %c with value `%s'\n", opt, optarg);
- break;
- case '?':
- printf("unknown option\n");
- break;
- default:
- printf("getopt returned unexpected char 0x%02x\n", opt);
- break;
- }
- if (optind != argc)
- printf("%d nonoption arguments\n", argc - optind);
-}
-
-int
-main(int argc, char **argv)
-{
- opterr = 0;
- parse(argc, argv);
- printf("reset\n");
- reset_getopt();
- parse(argc, argv);
- return 0;
-}
-#endif
+++ /dev/null
-/*
- * UCW Library -- Parsing of configuration and command-line options
- *
- * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_GETOPT_H
-#define _UCW_GETOPT_H
-
-#ifdef CONFIG_OWN_GETOPT
-#include "lib/getopt/getopt-sh.h"
-#else
-#include <getopt.h>
-#endif
-
-void reset_getopt(void);
-
-/* Safe loading and reloading of configuration files: conf-input.c */
-
-extern char *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */
-extern char *cf_env_file; /* ENV_VAR_CONFIG */
-int cf_reload(const char *file);
-int cf_load(const char *file);
-int cf_set(const char *string);
-
-/* Direct access to configuration items: conf-intr.c */
-
-#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \
- T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY)
- /* Closing brace finishes previous block.
- * Basic attributes (static, dynamic, parsed) can be used with SET.
- * Dynamic arrays can be used with SET, APPEND, PREPEND.
- * Sections can be used with SET.
- * Lists can be used with everything. */
-#define T(x) OP_##x,
-enum cf_operation { CF_OPERATIONS };
-#undef T
-
-struct cf_item;
-char *cf_find_item(const char *name, struct cf_item *item);
-char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars);
-
-/* Debug dumping: conf-dump.c */
-
-struct fastbuf;
-void cf_dump_sections(struct fastbuf *fb);
-
-/* Journaling control: conf-journal.c */
-
-struct cf_journal_item;
-struct cf_journal_item *cf_journal_new_transaction(uns new_pool);
-void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj);
-void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj);
-
-/*
- * cf_getopt() takes care of parsing the command-line arguments, loading the
- * default configuration file (cf_def_file) and processing configuration options.
- * The calling convention is the same as with GNU getopt_long(), but you must prefix
- * your own short/long options by the CF_(SHORT|LONG)_OPTS or pass CF_NO_LONG_OPTS
- * of there are no long options.
- *
- * The default configuration file can be overriden by the --config options,
- * which must come first. During parsing of all other options, the configuration
- * is already available.
- */
-
-#define CF_SHORT_OPTS "C:S:"
-#define CF_LONG_OPTS {"config", 1, 0, 'C'}, {"set", 1, 0, 'S'}, CF_LONG_OPTS_DEBUG
-#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } }
-#ifndef CF_USAGE_TAB
-#define CF_USAGE_TAB ""
-#endif
-#define CF_USAGE \
-"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\
--S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG
-
-#ifdef CONFIG_DEBUG
-#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } ,
-#define CF_USAGE_DEBUG " --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n"
-#else
-#define CF_LONG_OPTS_DEBUG
-#define CF_USAGE_DEBUG
-#endif
-
-// conf-input.c
-int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index);
-
-#endif
+++ /dev/null
-# Tests for getopt
-
-Run: ../obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3
-Out: option a
- option b
- option c with value `2819'
- option d with value `-a'
- 3 nonoption arguments
- reset
- option a
- option b
- option c with value `2819'
- option d with value `-a'
- 3 nonoption arguments
-
-Run: ../obj/lib/getopt-t -a -x
-Out: option a
- unknown option
- reset
- option a
- unknown option
+++ /dev/null
-# Makefile for the UCW GetOpt Library (c) 2007 Pavel Charvat <pchar@ucw.cz>
-
-DIRS+=lib/getopt
-
-LIBUCW_MODS+=getopt/getopt-sh
+++ /dev/null
-This directory contains getopt routines from the GNU libc 2.5.
-We need this as a fallback for our reset_getopt(), because there is
-no standardized interface for such instruction.
-
-They are distributed under the GNU LGPL.
-
-All files are exact copies of the original distribution with very
-few exceptions commented with `// SHERLOCK' prefix.
-I only provided my own getopt-sh.c, getopt-sh.h and Makefile.
-
- Pavel Charvat, 2007
-
+++ /dev/null
-#include "getopt-sh.h"
-#include "getopt_int.h"
-#include "getopt.c"
-#include "getopt1.c"
+++ /dev/null
-#ifndef _UCW_GETOPT_GETOPT_SH_H
-#define _UCW_GETOPT_GETOPT_SH_H
-
-#define getopt sh_getopt
-#define getopt_long sh_getopt_long
-#define getopt_long_only sh_getopt_longonly
-#define optarg sh_optarg
-#define optind sh_optind
-#define opterr sh_opterr
-#define optopt sh_optopt
-
-#include "lib/getopt/getopt.h"
-
-#endif
+++ /dev/null
-/* Getopt for GNU.
- NOTE: getopt is now part of the C library, so if you don't know what
- "Keep this file name-space clean" means, talk to drepper@gnu.org
- before changing it!
- Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-\f
-/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
- Ditto for AIX 3.2 and <stdlib.h>. */
-#ifndef _NO_PROTO
-# define _NO_PROTO
-#endif
-
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-# include <gnu-versions.h>
-# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-//# define ELIDE_CODE // SHERLOCK: disabled
-# endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
- to get __GNU_LIBRARY__ defined. */
-#ifdef __GNU_LIBRARY__
-/* Don't include stdlib.h for non-GNU C libraries because some of them
- contain conflicting prototypes for getopt. */
-# include <stdlib.h>
-# include <unistd.h>
-#endif /* GNU C library. */
-
-#include <string.h>
-
-#ifdef VMS
-# include <unixlib.h>
-#endif
-
-#ifdef _LIBC
-# include <libintl.h>
-#else
-//# include "gettext.h" // SHERLOCK: replaced by <libintl.h>
-# include <libintl.h>
-# define _(msgid) gettext (msgid)
-#endif
-
-#if defined _LIBC && defined USE_IN_LIBIO
-# include <wchar.h>
-#endif
-
-#ifndef attribute_hidden
-# define attribute_hidden
-#endif
-
-/* This version of `getopt' appears to the caller like standard Unix `getopt'
- but it behaves differently for the user, since it allows the user
- to intersperse the options with the other arguments.
-
- As `getopt' works, it permutes the elements of ARGV so that,
- when it is done, all the options precede everything else. Thus
- all application programs are extended to handle flexible argument order.
-
- Setting the environment variable POSIXLY_CORRECT disables permutation.
- Then the behavior is completely standard.
-
- GNU application programs can use a third alternative mode in which
- they can distinguish the relative order of options and other arguments. */
-
-#include "getopt.h"
-#include "getopt_int.h"
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns -1, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-/* 1003.2 says this must be 1 before any call. */
-int optind = 1;
-
-/* Callers store zero here to inhibit the error message
- for unrecognized options. */
-
-int opterr = 1;
-
-/* Set to an option character which was unrecognized.
- This must be initialized on some systems to avoid linking in the
- system's own getopt implementation. */
-
-int optopt = '?';
-
-/* Keep a global copy of all internal members of getopt_data. */
-
-static struct _getopt_data getopt_data;
-
-\f
-#ifndef __GNU_LIBRARY__
-
-/* Avoid depending on library functions or files
- whose names are inconsistent. */
-
-#ifndef getenv
-extern char *getenv ();
-#endif
-
-#endif /* not __GNU_LIBRARY__ */
-\f
-#ifdef _LIBC
-/* Stored original parameters.
- XXX This is no good solution. We should rather copy the args so
- that we can compare them later. But we must not use malloc(3). */
-extern int __libc_argc;
-extern char **__libc_argv;
-
-/* Bash 2.0 gives us an environment variable containing flags
- indicating ARGV elements that should not be considered arguments. */
-
-# ifdef USE_NONOPTION_FLAGS
-/* Defined in getopt_init.c */
-extern char *__getopt_nonoption_flags;
-# endif
-
-# ifdef USE_NONOPTION_FLAGS
-# define SWAP_FLAGS(ch1, ch2) \
- if (d->__nonoption_flags_len > 0) \
- { \
- char __tmp = __getopt_nonoption_flags[ch1]; \
- __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
- __getopt_nonoption_flags[ch2] = __tmp; \
- }
-# else
-# define SWAP_FLAGS(ch1, ch2)
-# endif
-#else /* !_LIBC */
-# define SWAP_FLAGS(ch1, ch2)
-#endif /* _LIBC */
-
-/* Exchange two adjacent subsequences of ARGV.
- One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
- The other is elements [last_nonopt,optind), which contains all
- the options processed since those non-options were skipped.
-
- `first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
-
-static void
-exchange (char **argv, struct _getopt_data *d)
-{
- int bottom = d->__first_nonopt;
- int middle = d->__last_nonopt;
- int top = d->optind;
- char *tem;
-
- /* Exchange the shorter segment with the far end of the longer segment.
- That puts the shorter segment into the right place.
- It leaves the longer segment in the right place overall,
- but it consists of two parts that need to be swapped next. */
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
- /* First make sure the handling of the `__getopt_nonoption_flags'
- string can work normally. Our top argument must be in the range
- of the string. */
- if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
- {
- /* We must extend the array. The user plays games with us and
- presents new arguments. */
- char *new_str = malloc (top + 1);
- if (new_str == NULL)
- d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
- else
- {
- memset (__mempcpy (new_str, __getopt_nonoption_flags,
- d->__nonoption_flags_max_len),
- '\0', top + 1 - d->__nonoption_flags_max_len);
- d->__nonoption_flags_max_len = top + 1;
- __getopt_nonoption_flags = new_str;
- }
- }
-#endif
-
- while (top > middle && middle > bottom)
- {
- if (top - middle > middle - bottom)
- {
- /* Bottom segment is the short one. */
- int len = middle - bottom;
- register int i;
-
- /* Swap it with the top part of the top segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[top - (middle - bottom) + i];
- argv[top - (middle - bottom) + i] = tem;
- SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
- }
- /* Exclude the moved bottom segment from further swapping. */
- top -= len;
- }
- else
- {
- /* Top segment is the short one. */
- int len = top - middle;
- register int i;
-
- /* Swap it with the bottom part of the bottom segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[middle + i];
- argv[middle + i] = tem;
- SWAP_FLAGS (bottom + i, middle + i);
- }
- /* Exclude the moved top segment from further swapping. */
- bottom += len;
- }
- }
-
- /* Update records for the slots the non-options now occupy. */
-
- d->__first_nonopt += (d->optind - d->__last_nonopt);
- d->__last_nonopt = d->optind;
-}
-
-/* Initialize the internal data when the first call is made. */
-
-static const char *
-_getopt_initialize (int argc, char *const *argv, const char *optstring,
- struct _getopt_data *d)
-{
- /* Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
-
- d->__first_nonopt = d->__last_nonopt = d->optind;
-
- d->__nextchar = NULL;
-
- d->__posixly_correct = !!getenv ("POSIXLY_CORRECT");
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- {
- d->__ordering = RETURN_IN_ORDER;
- ++optstring;
- }
- else if (optstring[0] == '+')
- {
- d->__ordering = REQUIRE_ORDER;
- ++optstring;
- }
- else if (d->__posixly_correct)
- d->__ordering = REQUIRE_ORDER;
- else
- d->__ordering = PERMUTE;
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
- if (!d->__posixly_correct
- && argc == __libc_argc && argv == __libc_argv)
- {
- if (d->__nonoption_flags_max_len == 0)
- {
- if (__getopt_nonoption_flags == NULL
- || __getopt_nonoption_flags[0] == '\0')
- d->__nonoption_flags_max_len = -1;
- else
- {
- const char *orig_str = __getopt_nonoption_flags;
- int len = d->__nonoption_flags_max_len = strlen (orig_str);
- if (d->__nonoption_flags_max_len < argc)
- d->__nonoption_flags_max_len = argc;
- __getopt_nonoption_flags =
- (char *) malloc (d->__nonoption_flags_max_len);
- if (__getopt_nonoption_flags == NULL)
- d->__nonoption_flags_max_len = -1;
- else
- memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
- '\0', d->__nonoption_flags_max_len - len);
- }
- }
- d->__nonoption_flags_len = d->__nonoption_flags_max_len;
- }
- else
- d->__nonoption_flags_len = 0;
-#endif
-
- return optstring;
-}
-\f
-/* Scan elements of ARGV (whose length is ARGC) for option characters
- given in OPTSTRING.
-
- If an element of ARGV starts with '-', and is not exactly "-" or "--",
- then it is an option element. The characters of this element
- (aside from the initial '-') are option characters. If `getopt'
- is called repeatedly, it returns successively each of the option characters
- from each of the option elements.
-
- If `getopt' finds another option character, it returns that character,
- updating `optind' and `nextchar' so that the next call to `getopt' can
- resume the scan with the following option character or ARGV-element.
-
- If there are no more option characters, `getopt' returns -1.
- Then `optind' is the index in ARGV of the first ARGV-element
- that is not an option. (The ARGV-elements have been permuted
- so that those that are not options now come last.)
-
- OPTSTRING is a string containing the legitimate option characters.
- If an option character is seen that is not listed in OPTSTRING,
- return '?' after printing an error message. If you set `opterr' to
- zero, the error message is suppressed but we still return '?'.
-
- If a char in OPTSTRING is followed by a colon, that means it wants an arg,
- so the following text in the same ARGV-element, or the text of the following
- ARGV-element, is returned in `optarg'. Two colons mean an option that
- wants an optional arg; if there is text in the current ARGV-element,
- it is returned in `optarg', otherwise `optarg' is set to zero.
-
- If OPTSTRING starts with `-' or `+', it requests different methods of
- handling the non-option ARGV-elements.
- See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
-
- Long-named options begin with `--' instead of `-'.
- Their names may be abbreviated as long as the abbreviation is unique
- or is an exact match for some defined option. If they have an
- argument, it follows the option name in the same ARGV-element, separated
- from the option name by a `=', or else the in next ARGV-element.
- When `getopt' finds a long-named option, it returns 0 if that option's
- `flag' field is nonzero, the value of the option's `val' field
- if the `flag' field is zero.
-
- The elements of ARGV aren't really const, because we permute them.
- But we pretend they're const in the prototype to be compatible
- with other systems.
-
- LONGOPTS is a vector of `struct option' terminated by an
- element containing a name which is zero.
-
- LONGIND returns the index in LONGOPT of the long-named option found.
- It is only valid when a long-named option has been found by the most
- recent call.
-
- If LONG_ONLY is nonzero, '-' as well as '--' can introduce
- long-named options. */
-
-int
-_getopt_internal_r (int argc, char *const *argv, const char *optstring,
- const struct option *longopts, int *longind,
- int long_only, struct _getopt_data *d)
-{
- int print_errors = d->opterr;
- if (optstring[0] == ':')
- print_errors = 0;
-
- if (argc < 1)
- return -1;
-
- d->optarg = NULL;
-
- if (d->optind == 0 || !d->__initialized)
- {
- if (d->optind == 0)
- d->optind = 1; /* Don't scan ARGV[0], the program name. */
- optstring = _getopt_initialize (argc, argv, optstring, d);
- d->__initialized = 1;
- }
-
- /* Test whether ARGV[optind] points to a non-option argument.
- Either it does not have option syntax, or there is an environment flag
- from the shell indicating it is not an option. The later information
- is only used when the used in the GNU libc. */
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
- || (d->optind < d->__nonoption_flags_len \
- && __getopt_nonoption_flags[d->optind] == '1'))
-#else
-# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
-#endif
-
- if (d->__nextchar == NULL || *d->__nextchar == '\0')
- {
- /* Advance to the next ARGV-element. */
-
- /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
- moved back by the user (who may also have changed the arguments). */
- if (d->__last_nonopt > d->optind)
- d->__last_nonopt = d->optind;
- if (d->__first_nonopt > d->optind)
- d->__first_nonopt = d->optind;
-
- if (d->__ordering == PERMUTE)
- {
- /* If we have just processed some options following some non-options,
- exchange them so that the options come first. */
-
- if (d->__first_nonopt != d->__last_nonopt
- && d->__last_nonopt != d->optind)
- exchange ((char **) argv, d);
- else if (d->__last_nonopt != d->optind)
- d->__first_nonopt = d->optind;
-
- /* Skip any additional non-options
- and extend the range of non-options previously skipped. */
-
- while (d->optind < argc && NONOPTION_P)
- d->optind++;
- d->__last_nonopt = d->optind;
- }
-
- /* The special ARGV-element `--' means premature end of options.
- Skip it like a null option,
- then exchange with previous non-options as if it were an option,
- then skip everything else like a non-option. */
-
- if (d->optind != argc && !strcmp (argv[d->optind], "--"))
- {
- d->optind++;
-
- if (d->__first_nonopt != d->__last_nonopt
- && d->__last_nonopt != d->optind)
- exchange ((char **) argv, d);
- else if (d->__first_nonopt == d->__last_nonopt)
- d->__first_nonopt = d->optind;
- d->__last_nonopt = argc;
-
- d->optind = argc;
- }
-
- /* If we have done all the ARGV-elements, stop the scan
- and back over any non-options that we skipped and permuted. */
-
- if (d->optind == argc)
- {
- /* Set the next-arg-index to point at the non-options
- that we previously skipped, so the caller will digest them. */
- if (d->__first_nonopt != d->__last_nonopt)
- d->optind = d->__first_nonopt;
- return -1;
- }
-
- /* If we have come to a non-option and did not permute it,
- either stop the scan or describe it to the caller and pass it by. */
-
- if (NONOPTION_P)
- {
- if (d->__ordering == REQUIRE_ORDER)
- return -1;
- d->optarg = argv[d->optind++];
- return 1;
- }
-
- /* We have found another option-ARGV-element.
- Skip the initial punctuation. */
-
- d->__nextchar = (argv[d->optind] + 1
- + (longopts != NULL && argv[d->optind][1] == '-'));
- }
-
- /* Decode the current option-ARGV-element. */
-
- /* Check whether the ARGV-element is a long option.
-
- If long_only and the ARGV-element has the form "-f", where f is
- a valid short option, don't consider it an abbreviated form of
- a long option that starts with f. Otherwise there would be no
- way to give the -f short option.
-
- On the other hand, if there's a long option "fubar" and
- the ARGV-element is "-fu", do consider that an abbreviation of
- the long option, just like "--fu", and not "-f" with arg "u".
-
- This distinction seems to be the most useful approach. */
-
- if (longopts != NULL
- && (argv[d->optind][1] == '-'
- || (long_only && (argv[d->optind][2]
- || !strchr (optstring, argv[d->optind][1])))))
- {
- char *nameend;
- const struct option *p;
- const struct option *pfound = NULL;
- int exact = 0;
- int ambig = 0;
- int indfound = -1;
- int option_index;
-
- for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
- /* Do nothing. */ ;
-
- /* Test all long options for either exact match
- or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name; p++, option_index++)
- if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
- {
- if ((unsigned int) (nameend - d->__nextchar)
- == (unsigned int) strlen (p->name))
- {
- /* Exact match found. */
- pfound = p;
- indfound = option_index;
- exact = 1;
- break;
- }
- else if (pfound == NULL)
- {
- /* First nonexact match found. */
- pfound = p;
- indfound = option_index;
- }
- else if (long_only
- || pfound->has_arg != p->has_arg
- || pfound->flag != p->flag
- || pfound->val != p->val)
- /* Second or later nonexact match found. */
- ambig = 1;
- }
-
- if (ambig && !exact)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
- argv[0], argv[d->optind]) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
- argv[0], argv[d->optind]);
-#endif
- }
- d->__nextchar += strlen (d->__nextchar);
- d->optind++;
- d->optopt = 0;
- return '?';
- }
-
- if (pfound != NULL)
- {
- option_index = indfound;
- d->optind++;
- if (*nameend)
- {
- /* Don't test has_arg with >, because some C compilers don't
- allow it to be used on enums. */
- if (pfound->has_arg)
- d->optarg = nameend + 1;
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (argv[d->optind - 1][1] == '-')
- {
- /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("\
-%s: option `--%s' doesn't allow an argument\n"),
- argv[0], pfound->name);
-#else
- fprintf (stderr, _("\
-%s: option `--%s' doesn't allow an argument\n"),
- argv[0], pfound->name);
-#endif
- }
- else
- {
- /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("\
-%s: option `%c%s' doesn't allow an argument\n"),
- argv[0], argv[d->optind - 1][0],
- pfound->name);
-#else
- fprintf (stderr, _("\
-%s: option `%c%s' doesn't allow an argument\n"),
- argv[0], argv[d->optind - 1][0],
- pfound->name);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2
- |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#endif
- }
-
- d->__nextchar += strlen (d->__nextchar);
-
- d->optopt = pfound->val;
- return '?';
- }
- }
- else if (pfound->has_arg == 1)
- {
- if (d->optind < argc)
- d->optarg = argv[d->optind++];
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("\
-%s: option `%s' requires an argument\n"),
- argv[0], argv[d->optind - 1]) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2
- |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- _("%s: option `%s' requires an argument\n"),
- argv[0], argv[d->optind - 1]);
-#endif
- }
- d->__nextchar += strlen (d->__nextchar);
- d->optopt = pfound->val;
- return optstring[0] == ':' ? ':' : '?';
- }
- }
- d->__nextchar += strlen (d->__nextchar);
- if (longind != NULL)
- *longind = option_index;
- if (pfound->flag)
- {
- *(pfound->flag) = pfound->val;
- return 0;
- }
- return pfound->val;
- }
-
- /* Can't find it as a long option. If this is not getopt_long_only,
- or the option starts with '--' or is not a valid short
- option, then it's an error.
- Otherwise interpret it as a short option. */
- if (!long_only || argv[d->optind][1] == '-'
- || strchr (optstring, *d->__nextchar) == NULL)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (argv[d->optind][1] == '-')
- {
- /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
- argv[0], d->__nextchar);
-#else
- fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
- argv[0], d->__nextchar);
-#endif
- }
- else
- {
- /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
- argv[0], argv[d->optind][0], d->__nextchar);
-#else
- fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
- argv[0], argv[d->optind][0], d->__nextchar);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#endif
- }
- d->__nextchar = (char *) "";
- d->optind++;
- d->optopt = 0;
- return '?';
- }
- }
-
- /* Look at and handle the next short option-character. */
-
- {
- char c = *d->__nextchar++;
- char *temp = strchr (optstring, c);
-
- /* Increment `optind' when we start to process its last character. */
- if (*d->__nextchar == '\0')
- ++d->optind;
-
- if (temp == NULL || c == ':')
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (d->__posixly_correct)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
- argv[0], c);
-#else
- fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
-#endif
- }
- else
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
- argv[0], c);
-#else
- fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#endif
- }
- d->optopt = c;
- return '?';
- }
- /* Convenience. Treat POSIX -W foo same as long option --foo */
- if (temp[0] == 'W' && temp[1] == ';')
- {
- char *nameend;
- const struct option *p;
- const struct option *pfound = NULL;
- int exact = 0;
- int ambig = 0;
- int indfound = 0;
- int option_index;
-
- /* This is an option that requires an argument. */
- if (*d->__nextchar != '\0')
- {
- d->optarg = d->__nextchar;
- /* If we end this ARGV-element by taking the rest as an arg,
- we must advance to the next element now. */
- d->optind++;
- }
- else if (d->optind == argc)
- {
- if (print_errors)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf,
- _("%s: option requires an argument -- %c\n"),
- argv[0], c) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, _("%s: option requires an argument -- %c\n"),
- argv[0], c);
-#endif
- }
- d->optopt = c;
- if (optstring[0] == ':')
- c = ':';
- else
- c = '?';
- return c;
- }
- else
- /* We already incremented `d->optind' once;
- increment it again when taking next ARGV-elt as argument. */
- d->optarg = argv[d->optind++];
-
- /* optarg is now the argument, see if it's in the
- table of longopts. */
-
- for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
- nameend++)
- /* Do nothing. */ ;
-
- /* Test all long options for either exact match
- or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name; p++, option_index++)
- if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
- {
- if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
- {
- /* Exact match found. */
- pfound = p;
- indfound = option_index;
- exact = 1;
- break;
- }
- else if (pfound == NULL)
- {
- /* First nonexact match found. */
- pfound = p;
- indfound = option_index;
- }
- else
- /* Second or later nonexact match found. */
- ambig = 1;
- }
- if (ambig && !exact)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
- argv[0], argv[d->optind]) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
- argv[0], argv[d->optind]);
-#endif
- }
- d->__nextchar += strlen (d->__nextchar);
- d->optind++;
- return '?';
- }
- if (pfound != NULL)
- {
- option_index = indfound;
- if (*nameend)
- {
- /* Don't test has_arg with >, because some C compilers don't
- allow it to be used on enums. */
- if (pfound->has_arg)
- d->optarg = nameend + 1;
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("\
-%s: option `-W %s' doesn't allow an argument\n"),
- argv[0], pfound->name) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2
- |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, _("\
-%s: option `-W %s' doesn't allow an argument\n"),
- argv[0], pfound->name);
-#endif
- }
-
- d->__nextchar += strlen (d->__nextchar);
- return '?';
- }
- }
- else if (pfound->has_arg == 1)
- {
- if (d->optind < argc)
- d->optarg = argv[d->optind++];
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("\
-%s: option `%s' requires an argument\n"),
- argv[0], argv[d->optind - 1]) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2
- |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- _("%s: option `%s' requires an argument\n"),
- argv[0], argv[d->optind - 1]);
-#endif
- }
- d->__nextchar += strlen (d->__nextchar);
- return optstring[0] == ':' ? ':' : '?';
- }
- }
- d->__nextchar += strlen (d->__nextchar);
- if (longind != NULL)
- *longind = option_index;
- if (pfound->flag)
- {
- *(pfound->flag) = pfound->val;
- return 0;
- }
- return pfound->val;
- }
- d->__nextchar = NULL;
- return 'W'; /* Let the application handle it. */
- }
- if (temp[1] == ':')
- {
- if (temp[2] == ':')
- {
- /* This is an option that accepts an argument optionally. */
- if (*d->__nextchar != '\0')
- {
- d->optarg = d->__nextchar;
- d->optind++;
- }
- else
- d->optarg = NULL;
- d->__nextchar = NULL;
- }
- else
- {
- /* This is an option that requires an argument. */
- if (*d->__nextchar != '\0')
- {
- d->optarg = d->__nextchar;
- /* If we end this ARGV-element by taking the rest as an arg,
- we must advance to the next element now. */
- d->optind++;
- }
- else if (d->optind == argc)
- {
- if (print_errors)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, _("\
-%s: option requires an argument -- %c\n"),
- argv[0], c) >= 0)
- {
- _IO_flockfile (stderr);
-
- int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
- ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
-
- __fxprintf (NULL, "%s", buf);
-
- ((_IO_FILE *) stderr)->_flags2 = old_flags2;
- _IO_funlockfile (stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- _("%s: option requires an argument -- %c\n"),
- argv[0], c);
-#endif
- }
- d->optopt = c;
- if (optstring[0] == ':')
- c = ':';
- else
- c = '?';
- }
- else
- /* We already incremented `optind' once;
- increment it again when taking next ARGV-elt as argument. */
- d->optarg = argv[d->optind++];
- d->__nextchar = NULL;
- }
- }
- return c;
- }
-}
-
-int
-_getopt_internal (int argc, char *const *argv, const char *optstring,
- const struct option *longopts, int *longind, int long_only)
-{
- int result;
-
- getopt_data.optind = optind;
- getopt_data.opterr = opterr;
-
- result = _getopt_internal_r (argc, argv, optstring, longopts,
- longind, long_only, &getopt_data);
-
- optind = getopt_data.optind;
- optarg = getopt_data.optarg;
- optopt = getopt_data.optopt;
-
- return result;
-}
-
-int
-getopt (int argc, char *const *argv, const char *optstring)
-{
- return _getopt_internal (argc, argv, optstring,
- (const struct option *) 0,
- (int *) 0,
- 0);
-}
-
-#endif /* Not ELIDE_CODE. */
-\f
-#ifdef TEST
-
-/* Compile with -DTEST to make an executable for use in testing
- the above definition of `getopt'. */
-
-int
-main (int argc, char **argv)
-{
- int c;
- int digit_optind = 0;
-
- while (1)
- {
- int this_option_optind = optind ? optind : 1;
-
- c = getopt (argc, argv, "abc:d:0123456789");
- if (c == -1)
- break;
-
- switch (c)
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (digit_optind != 0 && digit_optind != this_option_optind)
- printf ("digits occur in two different argv-elements.\n");
- digit_optind = this_option_optind;
- printf ("option %c\n", c);
- break;
-
- case 'a':
- printf ("option a\n");
- break;
-
- case 'b':
- printf ("option b\n");
- break;
-
- case 'c':
- printf ("option c with value `%s'\n", optarg);
- break;
-
- case '?':
- break;
-
- default:
- printf ("?? getopt returned character code 0%o ??\n", c);
- }
- }
-
- if (optind < argc)
- {
- printf ("non-option ARGV-elements: ");
- while (optind < argc)
- printf ("%s ", argv[optind++]);
- printf ("\n");
- }
-
- exit (0);
-}
-
-#endif /* TEST */
+++ /dev/null
-/* Declarations for getopt.
- Copyright (C) 1989-1994,1996-1999,2001,2003,2004
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _GETOPT_H
-
-#ifndef __need_getopt
-# define _GETOPT_H 1
-#endif
-
-/* If __GNU_LIBRARY__ is not already defined, either we are being used
- standalone, or this is the first header included in the source file.
- If we are being used with glibc, we need to include <features.h>, but
- that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
- not defined, include <ctype.h>, which will pull in <features.h> for us
- if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
- doesn't flood the namespace with stuff the way some other headers do.) */
-#if !defined __GNU_LIBRARY__
-# include <ctype.h>
-#endif
-
-#ifndef __THROW
-# ifndef __GNUC_PREREQ
-# define __GNUC_PREREQ(maj, min) (0)
-# endif
-# if defined __cplusplus && __GNUC_PREREQ (2,8)
-# define __THROW throw ()
-# else
-# define __THROW
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-extern char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns -1, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
- for unrecognized options. */
-
-extern int opterr;
-
-/* Set to an option character which was unrecognized. */
-
-extern int optopt;
-
-#ifndef __need_getopt
-/* Describe the long-named options requested by the application.
- The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
- of `struct option' terminated by an element containing a name which is
- zero.
-
- The field `has_arg' is:
- no_argument (or 0) if the option does not take an argument,
- required_argument (or 1) if the option requires an argument,
- optional_argument (or 2) if the option takes an optional argument.
-
- If the field `flag' is not NULL, it points to a variable that is set
- to the value given in the field `val' when the option is found, but
- left unchanged if the option is not found.
-
- To have a long-named option do something other than set an `int' to
- a compiled-in constant, such as set a value from `optarg', set the
- option's `flag' field to zero and its `val' field to a nonzero
- value (the equivalent single-letter option character, if there is
- one). For long options that have a zero `flag' field, `getopt'
- returns the contents of the `val' field. */
-
-struct option
-{
- const char *name;
- /* has_arg can't be an enum because some compilers complain about
- type mismatches in all the code that assumes it is an int. */
- int has_arg;
- int *flag;
- int val;
-};
-
-/* Names for the values of the `has_arg' field of `struct option'. */
-
-# define no_argument 0
-# define required_argument 1
-# define optional_argument 2
-#endif /* need getopt */
-
-
-/* Get definitions and prototypes for functions to process the
- arguments in ARGV (ARGC of them, minus the program name) for
- options given in OPTS.
-
- Return the option character from OPTS just read. Return -1 when
- there are no more options. For unrecognized options, or options
- missing arguments, `optopt' is set to the option letter, and '?' is
- returned.
-
- The OPTS string is a list of characters which are recognized option
- letters, optionally followed by colons, specifying that that letter
- takes an argument, to be placed in `optarg'.
-
- If a letter in OPTS is followed by two colons, its argument is
- optional. This behavior is specific to the GNU `getopt'.
-
- The argument `--' causes premature termination of argument
- scanning, explicitly telling `getopt' that there are no more
- options.
-
- If OPTS begins with `--', then non-option arguments are treated as
- arguments to the option '\0'. This behavior is specific to the GNU
- `getopt'. */
-
-#ifdef __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
- differences in the consts, in stdlib.h. To avoid compilation
- errors, only prototype getopt for the GNU C library. */
-extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
- __THROW;
-#else /* not __GNU_LIBRARY__ */
-extern int getopt ();
-#endif /* __GNU_LIBRARY__ */
-
-#ifndef __need_getopt
-extern int getopt_long (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind)
- __THROW;
-extern int getopt_long_only (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind)
- __THROW;
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-/* Make sure we later can get all the definitions and declarations. */
-#undef __need_getopt
-
-#endif /* getopt.h */
+++ /dev/null
-/* getopt_long and getopt_long_only entry points for GNU getopt.
- Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-\f
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#ifdef _LIBC
-# include <getopt.h>
-#else
-# include "getopt.h"
-#endif
-#include "getopt_int.h"
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-#include <gnu-versions.h>
-#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-//#define ELIDE_CODE // SHERLOCK: disabled
-#endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
- to get __GNU_LIBRARY__ defined. */
-#ifdef __GNU_LIBRARY__
-#include <stdlib.h>
-#endif
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-int
-getopt_long (int argc, char *const *argv, const char *options,
- const struct option *long_options, int *opt_index)
-{
- return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
-}
-
-int
-_getopt_long_r (int argc, char *const *argv, const char *options,
- const struct option *long_options, int *opt_index,
- struct _getopt_data *d)
-{
- return _getopt_internal_r (argc, argv, options, long_options, opt_index,
- 0, d);
-}
-
-/* Like getopt_long, but '-' as well as '--' can indicate a long option.
- If an option that starts with '-' (not '--') doesn't match a long option,
- but does match a short option, it is parsed as a short option
- instead. */
-
-int
-getopt_long_only (int argc, char *const *argv, const char *options,
- const struct option *long_options, int *opt_index)
-{
- return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
-}
-
-int
-_getopt_long_only_r (int argc, char *const *argv, const char *options,
- const struct option *long_options, int *opt_index,
- struct _getopt_data *d)
-{
- return _getopt_internal_r (argc, argv, options, long_options, opt_index,
- 1, d);
-}
-
-#endif /* Not ELIDE_CODE. */
-\f
-#ifdef TEST
-
-#include <stdio.h>
-
-int
-main (int argc, char **argv)
-{
- int c;
- int digit_optind = 0;
-
- while (1)
- {
- int this_option_optind = optind ? optind : 1;
- int option_index = 0;
- static struct option long_options[] =
- {
- {"add", 1, 0, 0},
- {"append", 0, 0, 0},
- {"delete", 1, 0, 0},
- {"verbose", 0, 0, 0},
- {"create", 0, 0, 0},
- {"file", 1, 0, 0},
- {0, 0, 0, 0}
- };
-
- c = getopt_long (argc, argv, "abc:d:0123456789",
- long_options, &option_index);
- if (c == -1)
- break;
-
- switch (c)
- {
- case 0:
- printf ("option %s", long_options[option_index].name);
- if (optarg)
- printf (" with arg %s", optarg);
- printf ("\n");
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (digit_optind != 0 && digit_optind != this_option_optind)
- printf ("digits occur in two different argv-elements.\n");
- digit_optind = this_option_optind;
- printf ("option %c\n", c);
- break;
-
- case 'a':
- printf ("option a\n");
- break;
-
- case 'b':
- printf ("option b\n");
- break;
-
- case 'c':
- printf ("option c with value `%s'\n", optarg);
- break;
-
- case 'd':
- printf ("option d with value `%s'\n", optarg);
- break;
-
- case '?':
- break;
-
- default:
- printf ("?? getopt returned character code 0%o ??\n", c);
- }
- }
-
- if (optind < argc)
- {
- printf ("non-option ARGV-elements: ");
- while (optind < argc)
- printf ("%s ", argv[optind++]);
- printf ("\n");
- }
-
- exit (0);
-}
-
-#endif /* TEST */
+++ /dev/null
-/* Perform additional initialization for getopt functions in GNU libc.
- Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifdef USE_NONOPTION_FLAGS
-/* Attention: this file is *not* necessary when the GNU getopt functions
- are used outside the GNU libc. Some additional functionality of the
- getopt functions in GNU libc require this additional work. */
-
-#include <getopt.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-
-#include <stdio-common/_itoa.h>
-
-/* Variable to synchronize work. */
-char *__getopt_nonoption_flags;
-
-
-/* Remove the environment variable "_<PID>_GNU_nonoption_argv_flags_" if
- it is still available. If the getopt functions are also used in the
- application it does not exist anymore since it was saved for the use
- in getopt. */
-void
-__getopt_clean_environment (char **env)
-{
- /* Bash 2.0 puts a special variable in the environment for each
- command it runs, specifying which ARGV elements are the results
- of file name wildcard expansion and therefore should not be
- considered as options. */
- static const char envvar_tail[] = "_GNU_nonoption_argv_flags_=";
- char var[50];
- char *cp, **ep;
- size_t len;
-
- /* Construct the "_<PID>_GNU_nonoption_argv_flags_=" string. We must
- not use `sprintf'. */
- cp = memcpy (&var[sizeof (var) - sizeof (envvar_tail)], envvar_tail,
- sizeof (envvar_tail));
- cp = _itoa_word (__getpid (), cp, 10, 0);
- /* Note: we omit adding the leading '_' since we explicitly test for
- it before calling strncmp. */
- len = (var + sizeof (var) - 1) - cp;
-
- for (ep = env; *ep != NULL; ++ep)
- if ((*ep)[0] == '_'
- && __builtin_expect (strncmp (*ep + 1, cp, len) == 0, 0))
- {
- /* Found it. Store this pointer and move later ones back. */
- char **dp = ep;
- __getopt_nonoption_flags = &(*ep)[len];
- do
- dp[0] = dp[1];
- while (*dp++);
- /* Continue the loop in case the name appears again. */
- }
-}
-#endif /* USE_NONOPTION_FLAGS */
+++ /dev/null
-/* Internal declarations for getopt.
- Copyright (C) 1989-1994,1996-1999,2001,2003,2004
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _GETOPT_INT_H
-#define _GETOPT_INT_H 1
-
-extern int _getopt_internal (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind,
- int __long_only);
-
-\f
-/* Reentrant versions which can handle parsing multiple argument
- vectors at the same time. */
-
-/* Data type for reentrant functions. */
-struct _getopt_data
-{
- /* These have exactly the same meaning as the corresponding global
- variables, except that they are used for the reentrant
- versions of getopt. */
- int optind;
- int opterr;
- int optopt;
- char *optarg;
-
- /* Internal members. */
-
- /* True if the internal members have been initialized. */
- int __initialized;
-
- /* The next char to be scanned in the option-element
- in which the last option character we returned was found.
- This allows us to pick up the scan where we left off.
-
- If this is zero, or a null string, it means resume the scan
- by advancing to the next ARGV-element. */
- char *__nextchar;
-
- /* Describe how to deal with options that follow non-option ARGV-elements.
-
- If the caller did not specify anything,
- the default is REQUIRE_ORDER if the environment variable
- POSIXLY_CORRECT is defined, PERMUTE otherwise.
-
- REQUIRE_ORDER means don't recognize them as options;
- stop option processing when the first non-option is seen.
- This is what Unix does.
- This mode of operation is selected by either setting the environment
- variable POSIXLY_CORRECT, or using `+' as the first character
- of the list of option characters.
-
- PERMUTE is the default. We permute the contents of ARGV as we
- scan, so that eventually all the non-options are at the end.
- This allows options to be given in any order, even with programs
- that were not written to expect this.
-
- RETURN_IN_ORDER is an option available to programs that were
- written to expect options and other ARGV-elements in any order
- and that care about the ordering of the two. We describe each
- non-option ARGV-element as if it were the argument of an option
- with character code 1. Using `-' as the first character of the
- list of option characters selects this mode of operation.
-
- The special argument `--' forces an end of option-scanning regardless
- of the value of `ordering'. In the case of RETURN_IN_ORDER, only
- `--' can cause `getopt' to return -1 with `optind' != ARGC. */
-
- enum
- {
- REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
- } __ordering;
-
- /* If the POSIXLY_CORRECT environment variable is set. */
- int __posixly_correct;
-
-
- /* Handle permutation of arguments. */
-
- /* Describe the part of ARGV that contains non-options that have
- been skipped. `first_nonopt' is the index in ARGV of the first
- of them; `last_nonopt' is the index after the last of them. */
-
- int __first_nonopt;
- int __last_nonopt;
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
- int __nonoption_flags_max_len;
- int __nonoption_flags_len;
-# endif
-};
-
-/* The initializer is necessary to set OPTIND and OPTERR to their
- default values and to clear the initialization flag. */
-#define _GETOPT_DATA_INITIALIZER { 1, 1 }
-
-extern int _getopt_internal_r (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind,
- int __long_only, struct _getopt_data *__data);
-
-extern int _getopt_long_r (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind,
- struct _getopt_data *__data);
-
-extern int _getopt_long_only_r (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts,
- int *__longind,
- struct _getopt_data *__data);
-
-#endif /* getopt_int.h */
+++ /dev/null
-/* Tests for hash table routines */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-/* TEST 1: integers */
-
-struct node1 {
- int key;
- int data;
-};
-
-#define HASH_NODE struct node1
-#define HASH_PREFIX(x) test1_##x
-#define HASH_KEY_ATOMIC key
-#define HASH_ATOMIC_TYPE int
-#define HASH_ZERO_FILL
-
-#define HASH_GIVE_INIT_DATA
-static inline void test1_init_data(struct node1 *n)
-{
- n->data = n->key + 123;
-}
-
-#define HASH_WANT_FIND
-#define HASH_WANT_LOOKUP
-#define HASH_WANT_REMOVE
-
-#include "lib/hashtable.h"
-
-static void test1(void)
-{
- int i;
-
- test1_init();
- for (i=0; i<1024; i++)
- {
- struct node1 *n = test1_lookup(i);
- ASSERT(n->data == i+123);
- }
- for (i=1; i<1024; i+=2)
- {
- struct node1 *n = test1_lookup(i);
- test1_remove(n);
- }
- for (i=0; i<1024; i++)
- {
- struct node1 *n = test1_find(i);
- if (!n != (i&1) || (n && n->data != i+123))
- die("Inconsistency at i=%d", i);
- }
- i=0;
- HASH_FOR_ALL(test1, n)
- {
- i += 1 + n->key;
- }
- HASH_END_FOR;
- ASSERT(i == 262144);
- puts("OK");
-}
-
-/* TEST 2: external strings */
-
-struct node2 {
- char *key;
- int data;
-};
-
-#define HASH_NODE struct node2
-#define HASH_PREFIX(x) test2_##x
-#define HASH_KEY_STRING key
-#define HASH_NOCASE
-#define HASH_AUTO_POOL 4096
-
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-
-#include "lib/hashtable.h"
-
-static void test2(void)
-{
- int i;
-
- test2_init();
- for (i=0; i<1024; i+=2)
- {
- char x[32];
- sprintf(x, "abc%d", i);
- test2_new(xstrdup(x));
- }
- for (i=0; i<1024; i++)
- {
- char x[32];
- struct node2 *n;
- sprintf(x, "ABC%d", i);
- n = test2_find(x);
- if (!n != (i&1))
- die("Inconsistency at i=%d", i);
- }
- puts("OK");
-}
-
-/* TEST 3: internal strings + pools */
-
-static struct mempool *pool3;
-
-struct node3 {
- int data;
- char key[1];
-};
-
-#define HASH_NODE struct node3
-#define HASH_PREFIX(x) test3_##x
-#define HASH_KEY_ENDSTRING key
-
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-
-#define HASH_USE_POOL pool3
-
-#include "lib/hashtable.h"
-
-static void test3(void)
-{
- int i;
-
- pool3 = mp_new(16384);
- test3_init();
- for (i=0; i<1048576; i+=2)
- {
- char x[32];
- sprintf(x, "abc%d", i);
- test3_new(x);
- }
- for (i=0; i<1048576; i++)
- {
- char x[32];
- struct node3 *n;
- sprintf(x, "abc%d", i);
- n = test3_find(x);
- if (!n != (i&1))
- die("Inconsistency at i=%d", i);
- }
- puts("OK");
-}
-
-/* TEST 4: complex keys */
-
-#include "lib/hashfunc.h"
-
-struct node4 {
- int port;
- int data;
- char host[1];
-};
-
-#define HASH_NODE struct node4
-#define HASH_PREFIX(x) test4_##x
-#define HASH_KEY_COMPLEX(x) x host, x port
-#define HASH_KEY_DECL char *host, int port
-
-#define HASH_WANT_CLEANUP
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_WANT_LOOKUP
-#define HASH_WANT_DELETE
-#define HASH_WANT_REMOVE
-
-#define HASH_GIVE_HASHFN
-static uns test4_hash(char *host, int port)
-{
- return hash_string_nocase(host) ^ hash_u32(port);
-}
-
-#define HASH_GIVE_EQ
-static inline int test4_eq(char *host1, int port1, char *host2, int port2)
-{
- return !strcasecmp(host1,host2) && port1 == port2;
-}
-
-#define HASH_GIVE_EXTRA_SIZE
-static inline uns test4_extra_size(char *host, int port UNUSED)
-{
- return strlen(host);
-}
-
-#define HASH_GIVE_INIT_KEY
-static inline void test4_init_key(struct node4 *n, char *host, int port)
-{
- strcpy(n->host, host);
- n->port = port;
-}
-
-#include "lib/hashtable.h"
-
-static void test4(void)
-{
- int i;
- char x[32];
- struct node4 *n;
-
- test4_init();
- for (i=0; i<1024; i++)
- if ((i % 3) == 0)
- {
- sprintf(x, "abc%d", i);
- n = test4_new(x, i%10);
- n->data = i;
- }
- for (i=0; i<1024; i++)
- {
- sprintf(x, "abc%d", i);
- n = test4_lookup(x, i%10);
- n->data = i;
- }
- for (i=0; i<1024; i++)
- if (i % 2)
- {
- sprintf(x, "aBc%d", i);
- if ((i % 7) < 3)
- {
- n = test4_find(x, i%10);
- ASSERT(n);
- test4_remove(n);
- }
- else
- test4_delete(x, i%10);
- }
- for (i=0; i<1024; i++)
- {
- sprintf(x, "ABC%d", i);
- n = test4_find(x, i%10);
- if (!n != (i&1) || (n && n->data != i))
- die("Inconsistency at i=%d", i);
- }
- test4_cleanup();
- puts("OK");
-}
-
-/* TEST 5: integers again, but this time dynamically */
-
-struct node5 {
- int key;
- int data;
-};
-
-#define HASH_NODE struct node5
-#define HASH_PREFIX(x) test5_##x
-#define HASH_KEY_ATOMIC key
-#define HASH_ATOMIC_TYPE int
-#define HASH_TABLE_DYNAMIC
-
-struct test5_table;
-
-#define HASH_GIVE_INIT_DATA
-static inline void test5_init_data(struct test5_table *table UNUSED, struct node5 *n)
-{
- n->data = n->key + 123;
-}
-
-#define HASH_WANT_FIND
-#define HASH_WANT_NEW
-#define HASH_WANT_DELETE
-
-#include "lib/hashtable.h"
-
-static void test5(void)
-{
- int i;
- struct test5_table tab;
-
- test5_init(&tab);
- for (i=0; i<1024; i++)
- {
- struct node5 *n = test5_new(&tab, i);
- ASSERT(n->data == i+123);
- }
- for (i=1; i<1024; i+=2)
- test5_delete(&tab, i);
- for (i=0; i<1024; i++)
- {
- struct node5 *n = test5_find(&tab, i);
- if (!n != (i&1) || (n && n->data != i+123))
- die("Inconsistency at i=%d", i);
- }
- i=0;
- HASH_FOR_ALL_DYNAMIC(test5, &tab, n)
- i += 1 + n->key;
- HASH_END_FOR;
- ASSERT(i == 262144);
- puts("OK");
-}
-
-int
-main(int argc, char **argv)
-{
- uns m = ~0U;
- if (argc > 1)
- {
- m = 0;
- for (int i=1; i<argc; i++)
- m |= 1 << atol(argv[i]);
- }
- if (m & (1 << 1))
- test1();
- if (m & (1 << 2))
- test2();
- if (m & (1 << 3))
- test3();
- if (m & (1 << 4))
- test4();
- if (m & (1 << 5))
- test5();
- return 0;
-}
+++ /dev/null
-# Tests for the hash table modules
-
-Run: ../obj/lib/hash-test 1
-Out: OK
-
-Run: ../obj/lib/hash-test 2
-Out: OK
-
-Run: ../obj/lib/hash-test 3
-Out: OK
-
-Run: ../obj/lib/hash-test 4
-Out: OK
+++ /dev/null
-/*
- * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
- * str_len() and hash_*() routines
- *
- * It is always at least as fast as the classical strlen() routine and for
- * strings longer than 100 characters, it is substantially faster.
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/hashfunc.h"
-#include "lib/chartype.h"
-
-/* The number of bits the hash in the function hash_*() is rotated by after
- * every pass. It should be prime with the word size. */
-#define SHIFT_BITS 7
-
-/* A bit-mask which clears higher bytes than a given threshold. */
-static uns mask_higher_bits[sizeof(uns)];
-
-static void CONSTRUCTOR
-hashfunc_init(void)
-{
- uns i, j;
- byte *str;
- for (i=0; i<sizeof(uns); i++)
- {
- str = (byte *) (mask_higher_bits + i);
- for (j=0; j<i; j++)
- str[j] = -1;
- for (j=i; j<sizeof(uns); j++)
- str[j] = 0;
- }
-}
-
-static inline uns CONST
-str_len_uns(uns x)
-{
- const uns sub = ~0U / 0xff;
- const uns and = sub * 0x80;
- uns a, i;
- byte *bytes;
- a = ~x & (x - sub) & and;
- /*
- * x_2 = x - 0x01010101;
- * x_3 = ~x & x_2;
- * a = x_3 & 0x80808080;
- *
- * If all bytes of x are nonzero, then the highest bit of each byte of
- * x_2 is lower or equal to the corresponding bit of x. Hence x_3 has
- * all these highest bits cleared (the target bit is set iff the source
- * bit has changed from 0 to 1). If a == 0, then we are sure there is
- * no zero byte in x.
- */
- if (!a)
- return sizeof(uns);
- bytes = (byte *) &x;
- for (i=0; i<sizeof(uns) && bytes[i]; i++);
- return i;
-}
-
-inline uns
-str_len_aligned(const char *str)
-{
- const uns *u = (const uns *) str;
- uns len = 0;
- while (1)
- {
- uns l = str_len_uns(*u++);
- len += l;
- if (l < sizeof(uns))
- return len;
- }
-}
-
-inline uns
-hash_string_aligned(const char *str)
-{
- const uns *u = (const uns *) str;
- uns hash = 0;
- while (1)
- {
- uns last_len = str_len_uns(*u);
- hash = ROL(hash, SHIFT_BITS);
- if (last_len < sizeof(uns))
- {
- uns tmp = *u & mask_higher_bits[last_len];
- hash ^= tmp;
- return hash;
- }
- hash ^= *u++;
- }
-}
-
-inline uns
-hash_block_aligned(const byte *str, uns len)
-{
- const uns *u = (const uns *) str;
- uns hash = 0;
- while (len >= sizeof(uns))
- {
- hash = ROL(hash, SHIFT_BITS) ^ *u++;
- len -= sizeof(uns);
- }
- hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
- return hash;
-}
-
-#ifndef CPU_ALLOW_UNALIGNED
-uns
-str_len(const char *str)
-{
- uns shift = UNALIGNED_PART(str, uns);
- if (!shift)
- return str_len_aligned(str);
- else
- {
- uns i;
- shift = sizeof(uns) - shift;
- for (i=0; i<shift; i++)
- if (!str[i])
- return i;
- return shift + str_len_aligned(str + shift);
- }
-}
-
-uns
-hash_string(const char *str)
-{
- const byte *s = str;
- uns shift = UNALIGNED_PART(s, uns);
- if (!shift)
- return hash_string_aligned(s);
- else
- {
- uns hash = 0;
- uns i;
- for (i=0; ; i++)
- {
- uns modulo = i % sizeof(uns);
- uns shift;
-#ifdef CPU_LITTLE_ENDIAN
- shift = modulo;
-#else
- shift = sizeof(uns) - 1 - modulo;
-#endif
- if (!modulo)
- hash = ROL(hash, SHIFT_BITS);
- if (!s[i])
- break;
- hash ^= s[i] << (shift * 8);
- }
- return hash;
- }
-}
-
-uns
-hash_block(const byte *str, uns len)
-{
- uns shift = UNALIGNED_PART(str, uns);
- if (!shift)
- return hash_block_aligned(str, len);
- else
- {
- uns hash = 0;
- uns i;
- for (i=0; ; i++)
- {
- uns modulo = i % sizeof(uns);
- uns shift;
-#ifdef CPU_LITTLE_ENDIAN
- shift = modulo;
-#else
- shift = sizeof(uns) - 1 - modulo;
-#endif
- if (!modulo)
- hash = ROL(hash, SHIFT_BITS);
- if (i >= len)
- break;
- hash ^= str[i] << (shift * 8);
- }
- return hash;
- }
-}
-#endif
-
-uns
-hash_string_nocase(const char *str)
-{
- const byte *s = str;
- uns hash = 0;
- uns i;
- for (i=0; ; i++)
- {
- uns modulo = i % sizeof(uns);
- uns shift;
-#ifdef CPU_LITTLE_ENDIAN
- shift = modulo;
-#else
- shift = sizeof(uns) - 1 - modulo;
-#endif
- if (!modulo)
- hash = ROL(hash, SHIFT_BITS);
- if (!s[i])
- break;
- hash ^= Cupcase(s[i]) << (shift * 8);
- }
- return hash;
-}
+++ /dev/null
-/*
- * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
- * str_len() and hash_*() routines
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_HASHFUNC_H
-#define _UCW_HASHFUNC_H
-
-#include "lib/lib.h"
-
-/* The following functions need str to be aligned to uns. */
-uns str_len_aligned(const char *str) PURE;
-uns hash_string_aligned(const char *str) PURE;
-uns hash_block_aligned(const byte *str, uns len) PURE;
-
-#ifdef CPU_ALLOW_UNALIGNED
-#define str_len(str) str_len_aligned(str)
-#define hash_string(str) hash_string_aligned(str)
-#define hash_block(str, len) hash_block_aligned(str, len)
-#else
-uns str_len(const char *str) PURE;
-uns hash_string(const char *str) PURE;
-uns hash_block(const byte *str, uns len) PURE;
-#endif
-
-uns hash_string_nocase(const char *str) PURE;
-
-/*
- * We hash integers by multiplying by a reasonably large prime with
- * few ones in its binary form (to gave the compiler the possibility
- * of using shifts and adds on architectures where multiplication
- * instructions are slow).
- */
-static inline uns CONST hash_u32(uns x) { return 0x01008041*x; }
-static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); }
-static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); }
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Universal Hash Table
- *
- * (c) 2002--2004 Martin Mares <mj@ucw.cz>
- * (c) 2002--2005 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is not a normal header file, it's a generator of hash tables.
- * Each time you include it with parameters set in the corresponding
- * preprocessor macros, it generates a hash table with the parameters
- * given.
- *
- * You need to specify:
- *
- * HASH_NODE data type where a node dwells (usually a struct).
- * HASH_PREFIX(x) macro to add a name prefix (used on all global names
- * defined by the hash table generator).
- *
- * Then decide on type of keys:
- *
- * HASH_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
- * a type which can be compared using `==')
- * HASH_ATOMIC_TYPE (defaults to int).
- * | HASH_KEY_STRING=f use node->f as a string key, allocated
- * separately from the rest of the node.
- * | HASH_KEY_ENDSTRING=f use node->f as a string key, allocated
- * automatically at the end of the node struct
- * (to be declared as "char f[1]" at the end).
- * | HASH_KEY_COMPLEX use a multi-component key; as the name suggests,
- * the passing of parameters is a bit complex then.
- * The HASH_KEY_COMPLEX(x) macro should expand to
- * `x k1, x k2, ... x kn' and you should also define:
- * HASH_KEY_DECL declaration of function parameters in which key
- * should be passed to all hash table operations.
- * That is, `type1 k1, type2 k2, ... typen kn'.
- * With complex keys, HASH_GIVE_HASHFN and HASH_GIVE_EQ
- * are mandatory.
- * | HASH_KEY_MEMORY=f use node->f as a raw data key, compared using
- * memcmp
- * HASH_KEY_SIZE the length of the key block
- *
- * Then specify what operations you request (all names are automatically
- * prefixed by calling HASH_PREFIX):
- *
- * <always defined> init() -- initialize the hash table.
- * HASH_WANT_CLEANUP cleanup() -- deallocate the hash table.
- * HASH_WANT_FIND node *find(key) -- find first node with the specified
- * key, return NULL if no such node exists.
- * HASH_WANT_FIND_NEXT node *find(node *start) -- find next node with the
- * specified key, return NULL if no such node exists.
- * HASH_WANT_NEW node *new(key) -- create new node with given key.
- * Doesn't check whether it already exists.
- * HASH_WANT_LOOKUP node *lookup(key) -- find node with given key,
- * if it doesn't exist, create it. Defining
- * HASH_GIVE_INIT_DATA is strongly recommended.
- * HASH_WANT_DELETE int delete(key) -- delete and deallocate node
- * with given key. Returns success.
- * HASH_WANT_REMOVE remove(node *) -- delete and deallocate given node.
- *
- * You can also supply several functions:
- *
- * HASH_GIVE_HASHFN unsigned int hash(key) -- calculate hash value of key.
- * We have sensible default hash functions for strings
- * and integers.
- * HASH_GIVE_EQ int eq(key1, key2) -- return whether keys are equal.
- * By default, we use == for atomic types and either
- * strcmp or strcasecmp for strings.
- * HASH_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
- * node should be allocated for dynamic data. Default=0
- * or length of the string with HASH_KEY_ENDSTRING.
- * HASH_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
- * created node. Defaults: assignment for atomic keys
- * and static strings, strcpy for end-allocated strings.
- * HASH_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
- * newly created node. Very useful for lookup operations.
- * HASH_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
- * a node. Default is xmalloc() or pooled allocation, depending
- * on HASH_USE_POOL and HASH_AUTO_POOL switches.
- * void free(void *) -- the converse.
- *
- * ... and a couple of extra parameters:
- *
- * HASH_NOCASE String comparisons should be case-insensitive.
- * HASH_DEFAULT_SIZE=n Initially, use hash table of approx. `n' entries.
- * HASH_CONSERVE_SPACE Use as little space as possible.
- * HASH_FN_BITS=n The hash function gives only `n' significant bits.
- * HASH_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
- * HASH_USE_POOL=pool Allocate all nodes from given mempool. Note, however, that
- * deallocation is not supported by mempools, so delete/remove
- * will leak pool memory.
- * HASH_AUTO_POOL=size Create a pool of the given block size automatically.
- * HASH_ZERO_FILL New entries should be initialized to all zeroes.
- * HASH_TABLE_ALLOC The hash table itself will be allocated and freed using
- * the same allocation functions as the nodes instead of
- * the default xmalloc().
- * HASH_TABLE_DYNAMIC Support multiple hash tables; the first parameter of all
- * hash table operations is struct HASH_PREFIX(table) *.
- *
- * You also get a iterator macro at no extra charge:
- *
- * HASH_FOR_ALL(hash_prefix, variable)
- * {
- * // node *variable gets declared automatically
- * do_something_with_node(variable);
- * // use HASH_BREAK and HASH_CONTINUE instead of break and continue
- * // you must not alter contents of the hash table here
- * }
- * HASH_END_FOR;
- *
- * (For dynamic tables, use HASH_FOR_ALL_DYNAMIC(hash_prefix, hash_table, variable) instead.)
- *
- * Then include "lib/hashtable.h" and voila, you have a hash table
- * suiting all your needs (at least those which you've revealed :) ).
- *
- * After including this file, all parameter macros are automatically
- * undef'd.
- */
-
-#ifndef _UCW_HASHFUNC_H
-#include "lib/hashfunc.h"
-#endif
-
-#include <string.h>
-
-/* Initial setup of parameters */
-
-#if !defined(HASH_NODE) || !defined(HASH_PREFIX)
-#error Some of the mandatory configuration macros are missing.
-#endif
-
-#if defined(HASH_KEY_ATOMIC) && !defined(HASH_CONSERVE_SPACE)
-#define HASH_CONSERVE_SPACE
-#endif
-
-#define P(x) HASH_PREFIX(x)
-
-/* Declare buckets and the hash table */
-
-typedef HASH_NODE P(node);
-
-typedef struct P(bucket) {
- struct P(bucket) *next;
-#ifndef HASH_CONSERVE_SPACE
- uns hash;
-#endif
- P(node) n;
-} P(bucket);
-
-struct P(table) {
- uns hash_size;
- uns hash_count, hash_max, hash_min, hash_hard_max;
- P(bucket) **ht;
-#ifdef HASH_AUTO_POOL
- struct mempool *pool;
-#endif
-};
-
-#ifdef HASH_TABLE_DYNAMIC
-#define T (*table)
-#define TA struct P(table) *table
-#define TAC TA,
-#define TAU TA UNUSED
-#define TAUC TA UNUSED,
-#define TT table
-#define TTC table,
-#else
-struct P(table) P(table);
-#define T P(table)
-#define TA void
-#define TAC
-#define TAU void
-#define TAUC
-#define TT
-#define TTC
-#endif
-
-/* Preset parameters */
-
-#if defined(HASH_KEY_ATOMIC)
-
-#define HASH_KEY(x) x HASH_KEY_ATOMIC
-
-#ifndef HASH_ATOMIC_TYPE
-# define HASH_ATOMIC_TYPE int
-#endif
-#define HASH_KEY_DECL HASH_ATOMIC_TYPE HASH_KEY( )
-
-#ifndef HASH_GIVE_HASHFN
-# define HASH_GIVE_HASHFN
- static inline int P(hash) (TAUC HASH_ATOMIC_TYPE x)
- { return ((sizeof(x) <= 4) ? hash_u32(x) : hash_u64(x)); }
-#endif
-
-#ifndef HASH_GIVE_EQ
-# define HASH_GIVE_EQ
- static inline int P(eq) (TAUC HASH_ATOMIC_TYPE x, HASH_ATOMIC_TYPE y)
- { return x == y; }
-#endif
-
-#ifndef HASH_GIVE_INIT_KEY
-# define HASH_GIVE_INIT_KEY
- static inline void P(init_key) (TAUC P(node) *n, HASH_ATOMIC_TYPE k)
- { HASH_KEY(n->) = k; }
-#endif
-
-#elif defined(HASH_KEY_MEMORY)
-
-#define HASH_KEY(x) x HASH_KEY_MEMORY
-
-#define HASH_KEY_DECL byte HASH_KEY( )[HASH_KEY_SIZE]
-
-#ifndef HASH_GIVE_HASHFN
-# define HASH_GIVE_HASHFN
- static inline int P(hash) (TAUC byte *x)
- { return hash_block(x, HASH_KEY_SIZE); }
-#endif
-
-#ifndef HASH_GIVE_EQ
-# define HASH_GIVE_EQ
- static inline int P(eq) (TAUC byte *x, byte *y)
- { return !memcmp(x, y, HASH_KEY_SIZE); }
-#endif
-
-#ifndef HASH_GIVE_INIT_KEY
-# define HASH_GIVE_INIT_KEY
- static inline void P(init_key) (TAUC P(node) *n, byte *k)
- { memcpy(HASH_KEY(n->), k, HASH_KEY_SIZE); }
-#endif
-
-#elif defined(HASH_KEY_STRING) || defined(HASH_KEY_ENDSTRING)
-
-#ifdef HASH_KEY_STRING
-# define HASH_KEY(x) x HASH_KEY_STRING
-# ifndef HASH_GIVE_INIT_KEY
-# define HASH_GIVE_INIT_KEY
- static inline void P(init_key) (TAUC P(node) *n, char *k)
- { HASH_KEY(n->) = k; }
-# endif
-#else
-# define HASH_KEY(x) x HASH_KEY_ENDSTRING
-# define HASH_GIVE_EXTRA_SIZE
- static inline int P(extra_size) (TAUC char *k)
- { return strlen(k); }
-# ifndef HASH_GIVE_INIT_KEY
-# define HASH_GIVE_INIT_KEY
- static inline void P(init_key) (TAUC P(node) *n, char *k)
- { strcpy(HASH_KEY(n->), k); }
-# endif
-#endif
-#define HASH_KEY_DECL char *HASH_KEY( )
-
-#ifndef HASH_GIVE_HASHFN
-#define HASH_GIVE_HASHFN
- static inline uns P(hash) (TAUC char *k)
- {
-# ifdef HASH_NOCASE
- return hash_string_nocase(k);
-# else
- return hash_string(k);
-# endif
- }
-#endif
-
-#ifndef HASH_GIVE_EQ
-# define HASH_GIVE_EQ
- static inline int P(eq) (TAUC char *x, char *y)
- {
-# ifdef HASH_NOCASE
- return !strcasecmp(x,y);
-# else
- return !strcmp(x,y);
-# endif
- }
-#endif
-
-#elif defined(HASH_KEY_COMPLEX)
-
-#define HASH_KEY(x) HASH_KEY_COMPLEX(x)
-
-#else
-#error You forgot to set the hash key type.
-#endif
-
-/* Defaults for missing parameters */
-
-#ifndef HASH_GIVE_HASHFN
-#error Unable to determine which hash function to use.
-#endif
-
-#ifndef HASH_GIVE_EQ
-#error Unable to determine how to compare two keys.
-#endif
-
-#ifdef HASH_GIVE_EXTRA_SIZE
-/* This trickery is needed to avoid `unused parameter' warnings */
-#define HASH_EXTRA_SIZE(x) P(extra_size)(TTC x)
-#else
-/*
- * Beware, C macros are expanded iteratively, not recursively,
- * hence we get only a _single_ argument, although the expansion
- * of HASH_KEY contains commas.
- */
-#define HASH_EXTRA_SIZE(x) 0
-#endif
-
-#ifndef HASH_GIVE_INIT_KEY
-#error Unable to determine how to initialize keys.
-#endif
-
-#ifndef HASH_GIVE_INIT_DATA
-static inline void P(init_data) (TAUC P(node) *n UNUSED)
-{
-}
-#endif
-
-#ifdef HASH_GIVE_ALLOC
-/* If the caller has requested to use his own allocation functions, do so */
-static inline void P(init_alloc) (TAU) { }
-static inline void P(cleanup_alloc) (TAU) { }
-
-#elif defined(HASH_USE_POOL)
-/* If the caller has requested to use his mempool, do so */
-#include "lib/mempool.h"
-static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(HASH_USE_POOL, size); }
-static inline void P(free) (TAUC void *x UNUSED) { }
-static inline void P(init_alloc) (TAU) { }
-static inline void P(cleanup_alloc) (TAU) { }
-
-#elif defined(HASH_AUTO_POOL)
-/* Use our own pools */
-#include "lib/mempool.h"
-static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(T.pool, size); }
-static inline void P(free) (TAUC void *x UNUSED) { }
-static inline void P(init_alloc) (TAU) { T.pool = mp_new(HASH_AUTO_POOL); }
-static inline void P(cleanup_alloc) (TAU) { mp_delete(T.pool); }
-#define HASH_USE_POOL
-
-#else
-/* The default allocation method */
-static inline void * P(alloc) (TAUC unsigned int size) { return xmalloc(size); }
-static inline void P(free) (TAUC void *x) { xfree(x); }
-static inline void P(init_alloc) (TAU) { }
-static inline void P(cleanup_alloc) (TAU) { }
-
-#endif
-
-#ifdef HASH_TABLE_ALLOC
-static inline void * P(table_alloc) (TAUC unsigned int size) { return P(alloc)(TTC size); }
-static inline void P(table_free) (TAUC void *x) { P(free)(TTC x); }
-#else
-static inline void * P(table_alloc) (TAUC unsigned int size) { return xmalloc(size); }
-static inline void P(table_free) (TAUC void *x) { xfree(x); }
-#endif
-
-#ifndef HASH_DEFAULT_SIZE
-#define HASH_DEFAULT_SIZE 32
-#endif
-
-#ifndef HASH_FN_BITS
-#define HASH_FN_BITS 32
-#endif
-
-#ifdef HASH_ZERO_FILL
-static inline void * P(new_bucket)(TAUC uns size)
-{
- byte *buck = P(alloc)(TTC size);
- bzero(buck, size);
- return buck;
-}
-#else
-static inline void * P(new_bucket)(TAUC uns size) { return P(alloc)(TTC size); }
-#endif
-
-/* Now the operations */
-
-static void P(alloc_table) (TAU)
-{
- T.hash_size = next_table_prime(T.hash_size);
- T.ht = P(table_alloc)(TTC sizeof(void *) * T.hash_size);
- bzero(T.ht, sizeof(void *) * T.hash_size);
- if (2*T.hash_size < T.hash_hard_max)
- T.hash_max = 2*T.hash_size;
- else
- T.hash_max = ~0U;
- if (T.hash_size/2 > HASH_DEFAULT_SIZE)
- T.hash_min = T.hash_size/4;
- else
- T.hash_min = 0;
-}
-
-static void P(init) (TA)
-{
- T.hash_count = 0;
- T.hash_size = HASH_DEFAULT_SIZE;
-#if HASH_FN_BITS < 28
- T.hash_hard_max = 1 << HASH_FN_BITS;
-#else
- T.hash_hard_max = 1 << 28;
-#endif
- P(init_alloc)(TT);
- P(alloc_table)(TT);
-}
-
-#ifdef HASH_WANT_CLEANUP
-static void P(cleanup) (TA)
-{
-#ifndef HASH_USE_POOL
- uns i;
- P(bucket) *b, *bb;
-
- for (i=0; i<T.hash_size; i++)
- for (b=T.ht[i]; b; b=bb)
- {
- bb = b->next;
- P(free)(TTC b);
- }
-#endif
- P(cleanup_alloc)(TT);
- P(table_free)(TTC T.ht);
-}
-#endif
-
-static inline uns P(bucket_hash) (TAUC P(bucket) *b)
-{
-#ifdef HASH_CONSERVE_SPACE
- return P(hash)(TTC HASH_KEY(b->n.));
-#else
- return b->hash;
-#endif
-}
-
-static void P(rehash) (TAC uns size)
-{
- P(bucket) *b, *nb;
- P(bucket) **oldt = T.ht, **newt;
- uns oldsize = T.hash_size;
- uns i, h;
-
- DBG("Rehashing %d->%d at count %d", oldsize, size, T.hash_count);
- T.hash_size = size;
- P(alloc_table)(TT);
- newt = T.ht;
- for (i=0; i<oldsize; i++)
- {
- b = oldt[i];
- while (b)
- {
- nb = b->next;
- h = P(bucket_hash)(TTC b) % T.hash_size;
- b->next = newt[h];
- newt[h] = b;
- b = nb;
- }
- }
- P(table_free)(TTC oldt);
-}
-
-#ifdef HASH_WANT_FIND
-static P(node) * P(find) (TAC HASH_KEY_DECL)
-{
- uns h0 = P(hash) (TTC HASH_KEY( ));
- uns h = h0 % T.hash_size;
- P(bucket) *b;
-
- for (b=T.ht[h]; b; b=b->next)
- {
- if (
-#ifndef HASH_CONSERVE_SPACE
- b->hash == h0 &&
-#endif
- P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
- return &b->n;
- }
- return NULL;
-}
-#endif
-
-#ifdef HASH_WANT_FIND_NEXT
-static P(node) * P(find_next) (TAC P(node) *start)
-{
-#ifndef HASH_CONSERVE_SPACE
- uns h0 = P(hash) (TTC HASH_KEY(start->));
-#endif
- P(bucket) *b = SKIP_BACK(P(bucket), n, start);
-
- for (b=b->next; b; b=b->next)
- {
- if (
-#ifndef HASH_CONSERVE_SPACE
- b->hash == h0 &&
-#endif
- P(eq)(TTC HASH_KEY(start->), HASH_KEY(b->n.)))
- return &b->n;
- }
- return NULL;
-}
-#endif
-
-#ifdef HASH_WANT_NEW
-static P(node) * P(new) (TAC HASH_KEY_DECL)
-{
- uns h0, h;
- P(bucket) *b;
-
- h0 = P(hash) (TTC HASH_KEY( ));
- h = h0 % T.hash_size;
- b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
- b->next = T.ht[h];
- T.ht[h] = b;
-#ifndef HASH_CONSERVE_SPACE
- b->hash = h0;
-#endif
- P(init_key)(TTC &b->n, HASH_KEY( ));
- P(init_data)(TTC &b->n);
- if (T.hash_count++ >= T.hash_max)
- P(rehash)(TTC 2*T.hash_size);
- return &b->n;
-}
-#endif
-
-#ifdef HASH_WANT_LOOKUP
-static P(node) * P(lookup) (TAC HASH_KEY_DECL)
-{
- uns h0 = P(hash) (TTC HASH_KEY( ));
- uns h = h0 % T.hash_size;
- P(bucket) *b;
-
- for (b=T.ht[h]; b; b=b->next)
- {
- if (
-#ifndef HASH_CONSERVE_SPACE
- b->hash == h0 &&
-#endif
- P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
- return &b->n;
- }
-
- b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
- b->next = T.ht[h];
- T.ht[h] = b;
-#ifndef HASH_CONSERVE_SPACE
- b->hash = h0;
-#endif
- P(init_key)(TTC &b->n, HASH_KEY( ));
- P(init_data)(TTC &b->n);
- if (T.hash_count++ >= T.hash_max)
- P(rehash)(TTC 2*T.hash_size);
- return &b->n;
-}
-#endif
-
-#ifdef HASH_WANT_DELETE
-static int P(delete) (TAC HASH_KEY_DECL)
-{
- uns h0 = P(hash) (TTC HASH_KEY( ));
- uns h = h0 % T.hash_size;
- P(bucket) *b, **bb;
-
- for (bb=&T.ht[h]; b=*bb; bb=&b->next)
- {
- if (
-#ifndef HASH_CONSERVE_SPACE
- b->hash == h0 &&
-#endif
- P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
- {
- *bb = b->next;
- P(free)(TTC b);
- if (--T.hash_count < T.hash_min)
- P(rehash)(TTC T.hash_size/2);
- return 1;
- }
- }
- return 0;
-}
-#endif
-
-#ifdef HASH_WANT_REMOVE
-static void P(remove) (TAC P(node) *n)
-{
- P(bucket) *x = SKIP_BACK(struct P(bucket), n, n);
- uns h0 = P(bucket_hash)(TTC x);
- uns h = h0 % T.hash_size;
- P(bucket) *b, **bb;
-
- for (bb=&T.ht[h]; (b=*bb) && b != x; bb=&b->next)
- ;
- ASSERT(b);
- *bb = b->next;
- P(free)(TTC b);
- if (--T.hash_count < T.hash_min)
- P(rehash)(TTC T.hash_size/2);
-}
-#endif
-
-/* And the iterator */
-
-#ifndef HASH_FOR_ALL
-
-#define HASH_FOR_ALL_DYNAMIC(h_px, h_table, h_var) \
-do { \
- uns h_slot; \
- struct GLUE_(h_px,bucket) *h_buck; \
- for (h_slot=0; h_slot < (h_table)->hash_size; h_slot++) \
- for (h_buck = (h_table)->ht[h_slot]; h_buck; h_buck = h_buck->next) \
- { \
- GLUE_(h_px,node) *h_var = &h_buck->n;
-#define HASH_FOR_ALL(h_px, h_var) HASH_FOR_ALL_DYNAMIC(h_px, &GLUE_(h_px,table), h_var)
-#define HASH_END_FOR } } while(0)
-#define HASH_BREAK
-#define HASH_CONTINUE continue
-
-#endif
-
-/* Finally, undefine all the parameters */
-
-#undef P
-#undef T
-#undef TA
-#undef TAC
-#undef TAU
-#undef TAUC
-#undef TT
-#undef TTC
-
-#undef HASH_ATOMIC_TYPE
-#undef HASH_CONSERVE_SPACE
-#undef HASH_DEFAULT_SIZE
-#undef HASH_EXTRA_SIZE
-#undef HASH_FN_BITS
-#undef HASH_GIVE_ALLOC
-#undef HASH_GIVE_EQ
-#undef HASH_GIVE_EXTRA_SIZE
-#undef HASH_GIVE_HASHFN
-#undef HASH_GIVE_INIT_DATA
-#undef HASH_GIVE_INIT_KEY
-#undef HASH_KEY
-#undef HASH_KEY_ATOMIC
-#undef HASH_KEY_COMPLEX
-#undef HASH_KEY_DECL
-#undef HASH_KEY_ENDSTRING
-#undef HASH_KEY_STRING
-#undef HASH_KEY_MEMORY
-#undef HASH_KEY_SIZE
-#undef HASH_NOCASE
-#undef HASH_NODE
-#undef HASH_PREFIX
-#undef HASH_USE_POOL
-#undef HASH_AUTO_POOL
-#undef HASH_WANT_CLEANUP
-#undef HASH_WANT_DELETE
-#undef HASH_WANT_FIND
-#undef HASH_WANT_FIND_NEXT
-#undef HASH_WANT_LOOKUP
-#undef HASH_WANT_NEW
-#undef HASH_WANT_REMOVE
-#undef HASH_TABLE_ALLOC
-#undef HASH_TABLE_DYNAMIC
-#undef HASH_ZERO_FILL
+++ /dev/null
-/*
- * UCW Library -- Universal Heap Macros
- *
- * (c) 2001 Martin Mares <mj@ucw.cz>
- * (c) 2005 Tomas Valla <tom@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
- for (;;) \
- { \
- _l = 2*_j; \
- if (_l > num) \
- break; \
- if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \
- break; \
- if (_l != num && less(heap[_l+1],heap[_l])) \
- _l++; \
- swap(heap,_j,_l,x); \
- _j = _l; \
- }
-
-#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \
- while (_j > 1) \
- { \
- _u = _j/2; \
- if (less(heap[_u], heap[_j])) \
- break; \
- swap(heap,_u,_j,x); \
- _j = _u; \
- }
-
-#define HEAP_INIT(type,heap,num,less,swap) \
- do { \
- uns _i = num; \
- uns _j, _l; \
- type x; \
- while (_i >= 1) \
- { \
- _j = _i; \
- HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
- _i--; \
- } \
- } while(0)
-
-#define HEAP_DELMIN(type,heap,num,less,swap) \
- do { \
- uns _j, _l; \
- type x; \
- swap(heap,1,num,x); \
- num--; \
- _j = 1; \
- HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
- } while(0)
-
-#define HEAP_INSERT(type,heap,num,less,swap) \
- do { \
- uns _j, _u; \
- type x; \
- _j = num; \
- HEAP_BUBBLE_UP_J(heap,num,less,swap); \
- } while(0)
-
-#define HEAP_INCREASE(type,heap,num,less,swap,pos) \
- do { \
- uns _j, _l; \
- type x; \
- _j = pos; \
- HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
- } while(0)
-
-#define HEAP_DELETE(type,heap,num,less,swap,pos) \
- do { \
- uns _j, _l, _u; \
- type x; \
- _j = pos; \
- swap(heap,_j,num,x); \
- num--; \
- if (less(heap[_j], heap[num+1])) \
- HEAP_BUBBLE_UP_J(heap,num,less,swap) \
- else \
- HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
- } while(0)
-
-/* Default swapping macro */
-#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
+++ /dev/null
-/*
- * UCW Library -- IP address access lists
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/clists.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/fastbuf.h"
-#include "lib/ipaccess.h"
-
-#include <string.h>
-
-struct ipaccess_entry {
- cnode n;
- int allow;
- struct ip_addrmask addr;
-};
-
-static char *
-addrmask_parser(char *c, void *ptr)
-{
- /*
- * This is tricky: addrmasks will be compared by memcmp(), so we must ensure
- * that even the padding between structure members is zeroed out.
- */
- struct ip_addrmask *am = ptr;
- bzero(am, sizeof(*am));
-
- char *p = strchr(c, '/');
- if (p)
- *p++ = 0;
- char *err = cf_parse_ip(c, &am->addr);
- if (err)
- return err;
- if (p)
- {
- uns len;
- if (!cf_parse_int(p, &len) && len <= 32)
- am->mask = ~(len == 32 ? 0 : ~0U >> len);
- else if (cf_parse_ip(p, &am->mask))
- return "Invalid prefix length or netmask";
- }
- else
- am->mask = ~0U;
- return NULL;
-}
-
-static void
-addrmask_dumper(struct fastbuf *fb, void *ptr)
-{
- struct ip_addrmask *am = ptr;
- bprintf(fb, "%08x/%08x ", am->addr, am->mask);
-}
-
-struct cf_user_type ip_addrmask_type = {
- .size = sizeof(struct ip_addrmask),
- .name = "ip_addrmask",
- .parser = addrmask_parser,
- .dumper = addrmask_dumper
-};
-
-struct cf_section ipaccess_cf = {
- CF_TYPE(struct ipaccess_entry),
- CF_ITEMS {
- CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((char*[]) { "deny", "allow", NULL })),
- CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type),
- CF_END
- }
-};
-
-int ip_addrmask_match(struct ip_addrmask *am, u32 ip)
-{
- return !((ip ^ am->addr) & am->mask);
-}
-
-int
-ipaccess_check(clist *l, u32 ip)
-{
- CLIST_FOR_EACH(struct ipaccess_entry *, a, *l)
- if (ip_addrmask_match(&a->addr, ip))
- return a->allow;
- return 0;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-
-static clist t;
-
-static struct cf_section test_cf = {
- CF_ITEMS {
- CF_LIST("A", &t, &ipaccess_cf),
- CF_END
- }
-};
-
-int main(int argc, char **argv)
-{
- cf_declare_section("T", &test_cf, 0);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
- die("Invalid arguments");
-
- byte buf[256];
- while (fgets(buf, sizeof(buf), stdin))
- {
- char *c = strchr(buf, '\n');
- if (c)
- *c = 0;
- u32 ip;
- if (cf_parse_ip(buf, &ip))
- puts("Invalid IP address");
- else if (ipaccess_check(&t, ip))
- puts("Allowed");
- else
- puts("Denied");
- }
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- IP address access lists
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_IPACCESS_H
-#define _UCW_IPACCESS_H
-
-#include "lib/clists.h"
-
-extern struct cf_section ipaccess_cf;
-int ipaccess_check(clist *l, u32 ip);
-
-/* Low-level handling of addresses and masks */
-
-struct ip_addrmask {
- u32 addr;
- u32 mask;
-};
-
-extern struct cf_user_type ip_addrmask_type;
-int ip_addrmask_match(struct ip_addrmask *am, u32 ip);
-
-#endif
+++ /dev/null
-/*
- * Knuth-Morris-Pratt's Substring Search for N given strings
- *
- * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
- * (c) 2006, Pavel Charvat <pchar@ucw.cz>
- *
- * (In fact, the algorithm is usually referred to as Aho-McCorasick,
- * but that's just an extension of KMP to multiple strings.)
- */
-
-/*
- * This is not a normal header file, it's a generator of KMP algorithm.
- * Each time you include it with parameters set in the corresponding
- * preprocessor macros, it generates KMP structures and functions
- * with the parameters given. See lib/kmp.h before reading this description.
- *
- * This file defines:
- *
- * struct search structure with both the internal and the user-defined variables
- * used during the search and accessible from all macros
- *
- * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output)
- *
- * void run(kmp,src) the same, but automatically allocates search structre from the stack
- *
- *
- * Parameters to the generator (these marked with [*] are mandatory):
- *
- * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names
- * defined by the KMP search generator)
- * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h
- *
- * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR);
- * if unset, the one from lib/kmp.h is taken
- * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c
- *
- * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string
- * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one
- *
- * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions)
- *
- * KMPS_INIT(kmp,src,search) statement executed at the beginning of search()
- * KMPS_EXIT(kmp,src,search) ... at the end
- * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update)
- * of the algorithm, but before KMPS_FOUND[_CHAIN]
- * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match
- * (stored in search->out - NOT necessary search->s!);
- * all matches form a NULL-terminated link list (search->out, search->out->next, ...)
- * in order of decreasing length
- * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out)
- * KMPS_WANT_BEST algorithm computes globally longest match, which is available
- * in search->best in KMPS_EXIT; if there is no match, it points to the null state
- */
-
-#define P(x) KMPS_PREFIX(x)
-#define KP(x) KMPS_KMP_PREFIX(x)
-
-#ifdef KMPS_SOURCE
-typedef KMPS_SOURCE P(search_source_t);
-#else
-typedef KP(source_t) P(search_source_t);
-#endif
-
-#ifndef KMPS_GET_CHAR
-#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c))
-#endif
-
-struct P(search) {
- struct KP(state) *s; /* current state */
- struct KP(state) *out; /* output state */
-# ifdef KMPS_WANT_BEST
- struct KP(state) *best; /* longest match */
-# endif
- KP(char_t) c; /* last character */
-# ifdef KMPS_ADD_CONTROLS
- uns eof;
-# endif
-# ifdef KMPS_VARS
- struct {
- KMPS_VARS
- } u; /* user-defined */
-# endif
-};
-
-static void
-P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src)
-{
- s->s = &kmp->null;
-# ifdef KMPS_WANT_BEST
- s->best = &kmp->null;
-# endif
-# ifdef KMPS_ADD_CONTROLS
- s->c = KP(control)();
- s->eof = 0;
-# else
- s->c = 0;
-# endif
-# ifdef KMPS_INIT
- { KMPS_INIT(kmp, src, s); }
-# endif
-# ifndef KMPS_ADD_CONTROLS
- goto start_read;
-# endif
- for (;;)
- {
- for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back);
- s->s = s->s ? : &kmp->null;
-
-# ifdef KMPS_STEP
- { KMPS_STEP(kmp, src, s); }
-# endif
-
-# if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
- s->out = s->s->len ? s->s : s->s->next;
- if (s->out)
- {
-# ifdef KMPS_WANT_BEST
- if (s->out->len > s->best->len)
- s->best = s->out;
-# endif
-# ifdef KMPS_FOUND_CHAIN
- { KMPS_FOUND_CHAIN(kmp, src, s); }
-# endif
-# ifdef KMPS_FOUND
- do
- { KMPS_FOUND(kmp, src, s); }
- while (s->out = s->out->next);
-# endif
- }
-# endif
-
-# ifdef KMPS_ADD_CONTROLS
- if (s->eof)
- break;
-# endif
-
-# ifndef KMPS_ADD_CONTROLS
-start_read: ;
-# endif
-# ifdef KMPS_MERGE_CONTROLS
- KP(char_t) last_c = s->c;
-# endif
-
- do
- {
- if (!KMPS_GET_CHAR(kmp, src, s))
- {
-# ifdef KMPS_ADD_CONTROLS
- if (!KP(is_control)(kmp, s->c))
- {
- s->c = KP(control)();
- s->eof = 1;
- break;
- }
-# endif
- goto exit;
- }
- }
- while (0
-# ifdef KMPS_MERGE_CONTROLS
- || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c))
-# endif
- );
- }
-exit: ;
-# ifdef KMPS_EXIT
- { KMPS_EXIT(kmp, src, s); }
-# endif
-}
-
-static inline void
-P(run) (struct KP(struct) *kmp, P(search_source_t) src)
-{
- struct P(search) search;
- P(search)(kmp, &search, src);
-}
-
-#undef P
-#undef KMPS_PREFIX
-#undef KMPS_KMP_PREFIX
-#undef KMPS_SOURCE
-#undef KMPS_GET_CHAR
-#undef KMPS_ADD_CONTROLS
-#undef KMPS_MERGE_CONTROLS
-#undef KMPS_VARS
-#undef KMPS_INIT
-#undef KMPS_EXIT
-#undef KMPS_FOUND
-#undef KMPS_FOUND_CHAIN
-#undef KMPS_WANT_BEST
-#undef KMPS_STEP
+++ /dev/null
-/*
- * Test of KMP search
- *
- * (c) 2006, Pavel Charvat <pchar@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include <string.h>
-
-#if 0
-#define TRACE(x...) do{log(L_DEBUG, x);}while(0)
-#else
-#define TRACE(x...) do{}while(0)
-#endif
-
-/* TEST1 - multiple searches */
-
-#define KMP_PREFIX(x) kmp1_##x
-#define KMP_WANT_CLEANUP
-#include "lib/kmp.h"
-#define KMPS_PREFIX(x) kmp1s1_##x
-#define KMPS_KMP_PREFIX(x) kmp1_##x
-#define KMPS_WANT_BEST
-#define KMPS_EXIT(kmp,src,s) TRACE("Best match has %d characters", s->best->len)
-#include "lib/kmp-search.h"
-#define KMPS_PREFIX(x) kmp1s2_##x
-#define KMPS_KMP_PREFIX(x) kmp1_##x
-#define KMPS_VARS uns count;
-#define KMPS_INIT(kmp,src,s) s->u.count = 0
-#define KMPS_FOUND(kmp,src,s) s->u.count++
-#include "lib/kmp-search.h"
-
-static void
-test1(void)
-{
- TRACE("Running test1");
- struct kmp1_struct kmp;
- kmp1_init(&kmp);
- kmp1_add(&kmp, "ahoj");
- kmp1_add(&kmp, "hoj");
- kmp1_add(&kmp, "aho");
- kmp1_build(&kmp);
- struct kmp1s1_search s1;
- kmp1s1_search(&kmp, &s1, "asjlahslhalahosjkjhojsas");
- ASSERT(s1.best->len == 3);
- struct kmp1s2_search s2;
- kmp1s2_search(&kmp, &s2, "asjlahslhalahojsjkjhojsas");
- ASSERT(s2.u.count == 4);
- kmp1_cleanup(&kmp);
-}
-
-/* TEST2 - various tracing */
-
-#define KMP_PREFIX(x) kmp2_##x
-#define KMP_USE_UTF8
-#define KMP_TOLOWER
-#define KMP_ONLYALPHA
-#define KMP_STATE_VARS char *str; uns id;
-#define KMP_ADD_EXTRA_ARGS uns id
-#define KMP_VARS char *start;
-#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
-#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \
- s->u.str = kmp->u.start; s->u.id = id; }while(0)
-#define KMP_ADD_DUP(kmp,src,s) TRACE("String %s already inserted", kmp->u.start)
-#define KMP_WANT_CLEANUP
-#define KMP_WANT_SEARCH
-#define KMPS_ADD_CONTROLS
-#define KMPS_MERGE_CONTROLS
-#define KMPS_FOUND(kmp,src,s) TRACE("String %s with id %d found", s->out->u.str, s->out->u.id)
-#define KMPS_STEP(kmp,src,s) TRACE("Got to state %p after reading %d", s->s, s->c)
-#include "lib/kmp.h"
-
-static void
-test2(void)
-{
- TRACE("Running test2");
- struct kmp2_struct kmp;
- kmp2_init(&kmp);
- kmp2_add(&kmp, "ahoj", 1);
- kmp2_add(&kmp, "ahoj", 2);
- kmp2_add(&kmp, "hoj", 3);
- kmp2_add(&kmp, "aho", 4);
- kmp2_add(&kmp, "aba", 5);
- kmp2_add(&kmp, "aba", 5);
- kmp2_add(&kmp, "pěl", 5);
- kmp2_build(&kmp);
- kmp2_run(&kmp, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla");
- kmp2_cleanup(&kmp);
-}
-
-/* TEST3 - random tests */
-
-#define KMP_PREFIX(x) kmp3_##x
-#define KMP_STATE_VARS uns index;
-#define KMP_ADD_EXTRA_ARGS uns index
-#define KMP_VARS char *start;
-#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
-#define KMP_ADD_NEW(kmp,src,s) s->u.index = index
-#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0
-#define KMP_WANT_CLEANUP
-#define KMP_WANT_SEARCH
-#define KMPS_VARS uns sum, *cnt;
-#define KMPS_FOUND(kmp,src,s) do{ ASSERT(s->u.cnt[s->out->u.index]); s->u.cnt[s->out->u.index]--; s->u.sum--; }while(0)
-#include "lib/kmp.h"
-
-static void
-test3(void)
-{
- TRACE("Running test3");
- struct mempool *pool = mp_new(1024);
- for (uns testn = 0; testn < 100; testn++)
- {
- mp_flush(pool);
- uns n = random_max(100);
- char *s[n];
- struct kmp3_struct kmp;
- kmp3_init(&kmp);
- for (uns i = 0; i < n; i++)
- {
- uns m = random_max(10);
- s[i] = mp_alloc(pool, m + 1);
- for (uns j = 0; j < m; j++)
- s[i][j] = 'a' + random_max(3);
- s[i][m] = 0;
- kmp3_add(&kmp, s[i], i);
- }
- kmp3_build(&kmp);
- for (uns i = 0; i < 10; i++)
- {
- uns m = random_max(100);
- byte b[m + 1];
- for (uns j = 0; j < m; j++)
- b[j] = 'a' + random_max(4);
- b[m] = 0;
- uns cnt[n];
- struct kmp3_search search;
- search.u.sum = 0;
- search.u.cnt = cnt;
- for (uns j = 0; j < n; j++)
- {
- cnt[j] = 0;
- if (*s[j])
- for (uns k = 0; k < m; k++)
- if (!strncmp(b + k, s[j], strlen(s[j])))
- cnt[j]++, search.u.sum++;
- }
- kmp3_search(&kmp, &search, b);
- ASSERT(search.u.sum == 0);
- }
- kmp3_cleanup(&kmp);
- }
- mp_delete(pool);
-}
-
-/* TEST4 - user-defined character type */
-
-struct kmp4_struct;
-struct kmp4_state;
-
-static inline int
-kmp4_eq(struct kmp4_struct *kmp UNUSED, byte *a, byte *b)
-{
- return (a == b) || (a && b && *a == *b);
-}
-
-static inline uns
-kmp4_hash(struct kmp4_struct *kmp UNUSED, struct kmp4_state *s, byte *c)
-{
- return (c ? (*c << 16) : 0) + (uns)(uintptr_t)s;
-}
-
-#define KMP_PREFIX(x) kmp4_##x
-#define KMP_CHAR byte *
-#define KMP_CONTROL_CHAR NULL
-#define KMP_GET_CHAR(kmp,src,c) ({ c = src++; !!*c; })
-#define KMP_GIVE_HASHFN
-#define KMP_GIVE_EQ
-#define KMP_WANT_CLEANUP
-#define KMP_WANT_SEARCH
-#define KMPS_FOUND(kmp,src,s) TRACE("found")
-#define KMPS_ADD_CONTROLS
-#define KMPS_MERGE_CONTROLS
-#include "lib/kmp.h"
-
-static void
-test4(void)
-{
- TRACE("Running test4");
- struct kmp4_struct kmp;
- kmp4_init(&kmp);
- kmp4_add(&kmp, "ahoj");
- kmp4_build(&kmp);
- kmp4_run(&kmp, "djdhaskjdahoahaahojojshdaksjahdahojskj");
- kmp4_cleanup(&kmp);
-}
-
-int
-main(void)
-{
- test1();
- test2();
- test3();
- test4();
- return 0;
-}
+++ /dev/null
-# Tests for the kmp module
-
-Run: ../obj/lib/kmp-test
+++ /dev/null
-/*
- * Knuth-Morris-Pratt's Substring Search for N given strings
- *
- * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
- * (c) 2006, Pavel Charvat <pchar@ucw.cz>
- *
- * (In fact, the algorithm is usually referred to as Aho-McCorasick,
- * but that's just an extension of KMP to multiple strings.)
- */
-
-/*
- * This is not a normal header file, it's a generator of KMP algorithm.
- * Each time you include it with parameters set in the corresponding
- * preprocessor macros, it generates KMP structures and functions
- * with the parameters given.
- *
- * This file contains only construction of the automaton. The search
- * itself can be generated by inclusion of file lib/kmp-search.h.
- * Separeted headers allow the user to define multiple search
- * routines for one common set of key strings.
- *
- * Example:
- *
- * #define KMP_PREFIX(x) kmp_##x
- * #define KMP_WANT_CLEANUP
- * #define KMP_WANT_SEARCH // includes lib/kmp-search.h automatically
- * #define KMPS_FOUND(kmp,src,s) printf("found\n")
- * #include "lib/kmp.h"
- *
- * [...]
- *
- * struct kmp_struct kmp; // a structure describing the whole automaton
- * kmp_init(&kmp); // initialization (must be called before all other functions)
- *
- * // add key strings we want to search
- * kmp_add(&kmp, "aaa");
- * kmp_add(&kmp, "abc");
- *
- * // complete the automaton, no more strings can be added later
- * kmp_build(&kmp);
- *
- * // example of search, should print single "found" to stdout
- * kmp_run(&kmp, "aabaabca");
- *
- * // destroy all internal structures
- * kmp_cleanup(&kmp);
- *
- *
- * Brief description of all parameters:
- *
- * Basic parameters:
- * KMP_PREFIX(x) macro to add a name prefix (used on all global names
- * defined by the KMP generator); mandatory;
- * we abbreviate this to P(x) below
- *
- * KMP_CHAR alphabet type, the default is u16
- *
- * KMP_SOURCE user-defined text source; KMP_GET_CHAR must
- * KMP_GET_CHAR(kmp,src,c) return zero at the end or nonzero together with the next character in c otherwise;
- * if not defined, zero-terminated array of bytes is used as the input
- *
- * KMP_VARS user-defined variables in 'struct P(struct)'
- * -- a structure describing the whole automaton;
- * these variables are stored in .u substructure to avoid collisions
- * KMP_STATE_VARS user-defined variables in 'struct P(state)'
- * -- created for each state of the automaton;
- * these variables are stored in .u substructure to avoid collisions
- *
- * Parameters which select how the input is interpreted (if KMP_SOURCE is unset):
- * KMP_USE_ASCII reads single bytes from the input (default)
- * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed)
- * KMP_TOLOWER converts all to lowercase
- * KMP_UNACCENT removes accents
- * KMP_ONLYALPHA converts non-alphas to KMP_CONTROL_CHAR (see below)
- *
- * Parameters controlling add(kmp, src):
- * KMP_ADD_EXTRA_ARGS extra arguments, should be used carefully because of possible collisions
- * KMP_ADD_INIT(kmp,src) called in the beginning of add(), src is the first
- * KMP_INIT_STATE(kmp,s) initialization of a new state s (called before KMP_ADD_{NEW,DUP});
- * null state is not included and should be handled after init() if necessary;
- * all user-defined data are filled by zeros before call to KMP_INIT_STATE
- * KMP_ADD_NEW(kmp,src,s) initialize last state of every new key string (called after KMP_INIT_STATE);
- * the string must be parsed before so src is after the last string's character
- * KMP_ADD_DUP(kmp,src,s) analogy of KMP_ADD_NEW called for duplicates
- *
- * Parameters to build():
- * KMP_BUILD_STATE(kmp,s) called for all states (including null) in order of non-decreasing tree depth
- *
- * Other parameters:
- * KMP_WANT_CLEANUP define cleanup()
- * KMP_WANT_SEARCH includes lib/kmp-search.h with the same prefix;
- * there can be multiple search variants for a single KMP automaton
- * KMP_USE_POOL allocates in a given pool
- * KMP_CONTROL_CHAR special control character (default is ':')
- * KMP_GIVE_ALLOC if set, you must supply custom allocation functions:
- * void *alloc(unsigned int size) -- allocate space for
- * a state. Default is pooled allocation from a local pool or HASH_USE_POOL.
- * void free(void *) -- the converse.
- * KMP_GIVE_HASHFN if set, you must supply custom hash function:
- * unsigned int hash(struct P(struct) *kmp, struct P(state) *state, KMP_CHAR c);
- * default hash function works only for integer character types
- * KMP_GIVE_EQ if set, you must supply custom compare function of two characters:
- * int eq(struct P(struct) *kmp, KMP_CHAR a, KMP_CHAR b);
- * default is 'a == b'
- */
-
-#ifndef KMP_PREFIX
-#error Missing KMP_PREFIX
-#endif
-
-#include "lib/mempool.h"
-#include <alloca.h>
-#include <string.h>
-
-#define P(x) KMP_PREFIX(x)
-
-#ifdef KMP_CHAR
-typedef KMP_CHAR P(char_t);
-#else
-typedef u16 P(char_t);
-#endif
-
-typedef u32 P(len_t);
-
-#ifdef KMP_NODE
-typedef KMP_NODE P(node_t);
-#else
-typedef struct {} P(node_t);
-#endif
-
-struct P(struct);
-
-struct P(state) {
- struct P(state) *from; /* state with the previous character (forms a tree with null state in the root) */
- struct P(state) *back; /* backwards edge to the longest shorter state with same suffix */
- struct P(state) *next; /* the longest of shorter matches (or NULL) */
- P(len_t) len; /* state depth if it represents a key string, zero otherwise */
- P(char_t) c; /* last character of the represented string */
- struct {
-# ifdef KMP_STATE_VARS
- KMP_STATE_VARS
-# endif
- } u; /* user-defined data*/
-};
-
-/* Control char */
-static inline P(char_t)
-P(control) (void)
-{
-# ifdef KMP_CONTROL_CHAR
- return KMP_CONTROL_CHAR;
-# else
- return ':';
-# endif
-}
-
-/* User-defined source */
-struct P(hash_table);
-
-#define HASH_GIVE_HASHFN
-#ifdef KMP_GIVE_HASHFN
-static inline uns
-P(hash_hash) (struct P(hash_table) *t, struct P(state) *f, P(char_t) c)
-{
- return P(hash) ((struct P(struct) *) t, f, c);
-}
-#else
-static inline uns
-P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
-{
- return (((uns)c) << 16) + (uns)(uintptr_t)f;
-}
-#endif
-
-#ifndef KMP_GIVE_EQ
-static inline int
-P(eq) (struct P(struct) *kmp UNUSED, P(char_t) c1, P(char_t) c2)
-{
- return c1 == c2;
-}
-#endif
-
-static inline int
-P(is_control) (struct P(struct) *kmp, P(char_t) c)
-{
- return P(eq) (kmp, c, P(control)());
-}
-
-#define HASH_GIVE_EQ
-static inline int
-P(hash_eq) (struct P(hash_table) *t, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
-{
- return f1 == f2 && P(eq)((struct P(struct) *) t, c1, c2);
-}
-
-#ifdef KMP_GIVE_ALLOC
-#define HASH_GIVE_ALLOC
-static inline void *
-P(hash_alloc) (struct P(hash_table) *t, uns size)
-{
- return P(alloc) ((struct P(struct) *) t, size);
-}
-
-static inline void
-P(hash_free) (struct P(hash_table) *t, void *ptr)
-{
- P(free) ((struct P(struct) *) t, ptr);
-}
-#endif
-
-#define HASH_GIVE_INIT_KEY
-static inline void
-P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
-{
- bzero(s, sizeof(*s));
-# ifdef KMP_INIT_STATE
- struct P(struct) *kmp = (struct P(struct) *)t;
- { KMP_INIT_STATE(kmp, s); }
-# endif
- s->from = f;
- s->c = c;
- s->next = f->back; /* the pointers hold the link-list of sons... changed in build() */
- f->back = s;
-}
-
-#undef P
-#define HASH_PREFIX(x) KMP_PREFIX(hash_##x)
-#define HASH_NODE struct KMP_PREFIX(state)
-#define HASH_KEY_COMPLEX(x) x from, x c
-#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
-#define HASH_WANT_NEW
-#define HASH_WANT_FIND
-#ifdef KMP_WANT_CLEANUP
-#define HASH_WANT_CLEANUP
-#endif
-#if defined(KMP_USE_POOL)
-#define HASH_USE_POOL KMP_USE_POOL
-#else
-#define HASH_AUTO_POOL 4096
-#endif
-#define HASH_CONSERVE_SPACE
-#define HASH_TABLE_DYNAMIC
-#include "lib/hashtable.h"
-#define P(x) KMP_PREFIX(x)
-
-struct P(struct) {
- struct P(hash_table) hash; /* hash table of state transitions */
- struct P(state) null; /* null state */
- struct {
-# ifdef KMP_VARS
- KMP_VARS
-# endif
- } u; /* user-defined data */
-};
-
-#ifdef KMP_SOURCE
-typedef KMP_SOURCE P(source_t);
-#else
-typedef char *P(source_t);
-#endif
-
-#ifdef KMP_GET_CHAR
-static inline int
-P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src UNUSED, P(char_t) *c UNUSED)
-{
- return KMP_GET_CHAR(kmp, (*src), (*c));
-}
-#else
-# if defined(KMP_USE_UTF8)
-# include "lib/unicode.h"
-# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
-# include "charset/unicat.h"
-# endif
-# elif defined(KMP_USE_ASCII)
-# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
-# include "lib/chartype.h"
-# endif
-# endif
-static inline int
-P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src, P(char_t) *c)
-{
-# ifdef KMP_USE_UTF8
- uns cc;
- *src = utf8_get(*src, &cc);
-# ifdef KMP_ONLYALPHA
- if (!cc) {}
- else if (!Ualpha(cc))
- cc = P(control)();
- else
-# endif
- {
-# ifdef KMP_TOLOWER
- cc = Utolower(cc);
-# endif
-# ifdef KMP_UNACCENT
- cc = Uunaccent(cc);
-# endif
- }
-# else
- uns cc = *(*src)++;
-# ifdef KMP_ONLYALPHA
- if (!cc) {}
- else if (!Calpha(cc))
- cc = P(control)();
- else
-# endif
-# ifdef KMP_TOLOWER
- cc = Clocase(cc);
-# endif
-# ifdef KMP_UNACCENT
-# error Do not know how to unaccent ASCII characters
-# endif
-# endif
- *c = cc;
- return !!cc;
-}
-#endif
-
-static struct P(state) *
-P(add) (struct P(struct) *kmp, P(source_t) src
-# ifdef KMP_ADD_EXTRA_ARGS
- , KMP_ADD_EXTRA_ARGS
-# endif
-)
-{
-# ifdef KMP_ADD_INIT
- { KMP_ADD_INIT(kmp, src); }
-# endif
-
- P(char_t) c;
- if (!P(get_char)(kmp, &src, &c))
- return NULL;
- struct P(state) *p = &kmp->null, *s;
- uns len = 0;
- do
- {
- s = P(hash_find)(&kmp->hash, p, c);
- if (!s)
- for (;;)
- {
- s = P(hash_new)(&kmp->hash, p, c);
- len++;
- if (!(P(get_char)(kmp, &src, &c)))
- goto enter_new;
- p = s;
- }
- p = s;
- len++;
- }
- while (P(get_char)(kmp, &src, &c));
- if (s->len)
- {
-# ifdef KMP_ADD_DUP
- { KMP_ADD_DUP(kmp, src, s); }
-# endif
- return s;
- }
-enter_new:
- s->len = len;
-# ifdef KMP_ADD_NEW
- { KMP_ADD_NEW(kmp, src, s); }
-# endif
- return s;
-}
-
-static void
-P(init) (struct P(struct) *kmp)
-{
- bzero(&kmp->null, sizeof(struct P(state)));
- P(hash_init)(&kmp->hash);
-}
-
-#ifdef KMP_WANT_CLEANUP
-static inline void
-P(cleanup) (struct P(struct) *kmp)
-{
- P(hash_cleanup)(&kmp->hash);
-}
-#endif
-
-static inline int
-P(empty) (struct P(struct) *kmp)
-{
- return !kmp->hash.hash_count;
-}
-
-static inline struct P(state) *
-P(chain_start) (struct P(state) *s)
-{
- return s->len ? s : s->next;
-}
-
-static void
-P(build) (struct P(struct) *kmp)
-{
- if (P(empty)(kmp))
- return;
- uns read = 0, write = 0;
- struct P(state) *fifo[kmp->hash.hash_count], *null = &kmp->null;
- for (struct P(state) *s = null->back; s; s = s->next)
- fifo[write++] = s;
- null->back = NULL;
-# ifdef KMP_BUILD_STATE
- { KMP_BUILD_STATE(kmp, null); }
-# endif
- while (read != write)
- {
- struct P(state) *s = fifo[read++], *t;
- for (t = s->back; t; t = t->next)
- fifo[write++] = t;
- for (t = s->from->back; 1; t = t->back)
- {
- if (!t)
- {
- s->back = null;
- s->next = NULL;
- break;
- }
- s->back = P(hash_find)(&kmp->hash, t, s->c);
- if (s->back)
- {
- s->next = s->back->len ? s->back : s->back->next;
- break;
- }
- }
-# ifdef KMP_BUILD_STATE
- { KMP_BUILD_STATE(kmp, s); }
-# endif
- }
-}
-
-#undef P
-#undef KMP_CHAR
-#undef KMP_SOURCE
-#undef KMP_GET_CHAR
-#undef KMP_VARS
-#undef KMP_STATE_VARS
-#undef KMP_CONTEXT
-#undef KMP_USE_ASCII
-#undef KMP_USE_UTF8
-#undef KMP_TOLOWER
-#undef KMP_UNACCENT
-#undef KMP_ONLYALPHA
-#undef KMP_CONTROL_CHAR
-#undef KMP_ADD_EXTRA_ARGS
-#undef KMP_ADD_INIT
-#undef KMP_ADD_NEW
-#undef KMP_ADD_DUP
-#undef KMP_INIT_STATE
-#undef KMP_BUILD_STATE
-#undef KMP_USE_POOL
-#undef KMP_GIVE_ALLOC
-#undef KMP_GIVE_HASHFN
-#undef KMP_GIVE_EQ
-
-#ifdef KMP_WANT_SEARCH
-# undef KMP_WANT_SEARCH
-# define KMPS_PREFIX(x) KMP_PREFIX(x)
-# define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
-# include "lib/kmp-search.h"
-#endif
-
-#undef KMP_PREFIX
+++ /dev/null
-/* Test of large files */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-
-#include <stdlib.h>
-#include <time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-#define BLOCK (1<<10)
-#define COUNT (5<<20)
-#define TESTS (1<<20)
-
-int main(void)
-{
- struct fastbuf *b;
- byte block[BLOCK];
- uns i;
-
- srand(time(NULL));
-#if 0
- b = bopen("/big/robert/large-file", O_CREAT | O_TRUNC | O_RDWR, 1<<20);
- if (!b)
- die("Cannot create large-file");
-
- log(L_DEBUG, "Writing %d blocks of size %d", COUNT, BLOCK);
- for (i=0; i<COUNT; i++)
- {
- memset(block, i & 0xff, BLOCK);
- bwrite(b, block, BLOCK);
- if ( i%1024 == 0 )
- {
- printf("\r%10d", i);
- fflush(stdout);
- }
- }
-#else
- b = bopen("/big/robert/large-file", O_RDWR, 1<<20);
- if (!b)
- die("Cannot create large-file");
-#endif
- log(L_DEBUG, "Checking the file contents in %d tests", TESTS);
- for (i=0; i<TESTS; i++)
- {
- uns idx = random()%COUNT;
- sh_off_t ofs = idx*BLOCK;
- bseek(b, ofs, SEEK_SET);
- bread(b, block, BLOCK);
- if (block[17] != (idx & 0xff))
- die("Invalid block %d in test %d: %x != %x", idx, i, block[17], idx & 0xff);
- if ( i%16 == 0 )
- {
- printf("\r%10d", i);
- fflush(stdout);
- }
- }
- log(L_DEBUG, "Done");
-
- bclose(b);
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Large File Support
- *
- * (c) 1999--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_LFS_H
-#define _UCW_LFS_H
-
-#include <fcntl.h>
-#include <unistd.h>
-
-#ifdef CONFIG_LFS
-
-#define sh_open open64
-#define sh_seek lseek64
-#define sh_pread pread64
-#define sh_pwrite pwrite64
-#define sh_ftruncate ftruncate64
-#define sh_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o)
-#define sh_pread pread64
-#define sh_pwrite pwrite64
-#define sh_stat stat64
-#define sh_fstat fstat64
-typedef struct stat64 sh_stat_t;
-
-#else /* !CONFIG_LFS */
-
-#define sh_open open
-#define sh_seek(f,o,w) lseek(f,o,w)
-#define sh_ftruncate(f,o) ftruncate(f,o)
-#define sh_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o)
-#define sh_pread pread
-#define sh_pwrite pwrite
-#define sh_stat stat
-#define sh_fstat fstat
-typedef struct stat sh_stat_t;
-
-#endif /* !CONFIG_LFS */
-
-#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0)
-#define sh_fdatasync fdatasync
-#else
-#define sh_fdatasync fsync
-#endif
-
-#define HAVE_PREAD
-
-static inline sh_off_t
-sh_file_size(const char *name)
-{
- int fd = sh_open(name, O_RDONLY);
- if (fd < 0)
- die("Cannot open %s: %m", name);
- sh_off_t len = sh_seek(fd, 0, SEEK_END);
- close(fd);
- return len;
-}
-
-#endif /* !_UCW_LFS_H */
+++ /dev/null
-/*
- * The UCW Library -- Miscellaneous Functions
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- * (c) 2005 Tomas Valla <tom@ucw.cz>
- * (c) 2006 Robert Spalek <robert@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_LIB_H
-#define _UCW_LIB_H
-
-#include "lib/config.h"
-#include <stdarg.h>
-
-/* Macros for handling structurues, offsets and alignment */
-
-#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x))
-#define PTR_TO(s, i) &((s*)0)->i
-#define OFFSETOF(s, i) ((unsigned int) PTR_TO(s, i))
-#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i)))
-#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1))
-#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p))
-#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type))
-
-/* Some other macros */
-
-#define MIN(a,b) (((a)<(b))?(a):(b))
-#define MAX(a,b) (((a)>(b))?(a):(b))
-#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
-#define ABS(x) ((x) < 0 ? -(x) : (x))
-#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
-#define STRINGIFY(x) #x
-#define STRINGIFY_EXPANDED(x) STRINGIFY(x)
-#define GLUE(x,y) x##y
-#define GLUE_(x,y) x##_##y
-
-#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0)
-#define REV_COMPARE(x,y) COMPARE(y,x)
-#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0)
-#define COMPARE_GT(x,y) COMPARE_LT(y,x)
-
-#define ROL(x, bits) (((x) << (bits)) | ((x) >> (sizeof(uns)*8 - (bits)))) /* Bitwise rotation of an uns to the left */
-
-/* GCC Extensions */
-
-#ifdef __GNUC__
-
-#undef inline
-#define NONRET __attribute__((noreturn))
-#define UNUSED __attribute__((unused))
-#define CONSTRUCTOR __attribute__((constructor))
-#define PACKED __attribute__((packed))
-#define CONST __attribute__((const))
-#define PURE __attribute__((pure))
-#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z)))
-#define likely(x) __builtin_expect((x),1)
-#define unlikely(x) __builtin_expect((x),0)
-
-#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
-#define ALWAYS_INLINE inline __attribute__((always_inline))
-#define NO_INLINE __attribute__((noinline))
-#else
-#define ALWAYS_INLINE inline
-#endif
-
-#if __GNUC__ >= 4
-#define LIKE_MALLOC __attribute__((malloc))
-#define SENTINEL_CHECK __attribute__((sentinel))
-#else
-#define LIKE_MALLOC
-#define SENTINEL_CHECK
-#endif
-
-#else
-#error This program requires the GNU C compiler.
-#endif
-
-/* Logging */
-
-#define L_DEBUG 'D' /* Debugging messages */
-#define L_INFO 'I' /* Informational msgs, warnings and errors */
-#define L_WARN 'W'
-#define L_ERROR 'E'
-#define L_INFO_R 'i' /* Errors caused by external events */
-#define L_WARN_R 'w'
-#define L_ERROR_R 'e'
-#define L_FATAL '!' /* die() */
-
-extern char *log_title; /* NULL - print no title, default is log_progname */
-extern char *log_filename; /* Expanded name of the current log file */
-extern volatile int log_switch_nest; /* log_switch() nesting counter, increment to disable automatic switches */
-extern int log_pid; /* 0 if shouldn't be logged */
-extern int log_precise_timings; /* Include microsecond timestamps in log messages */
-extern void (*log_die_hook)(void);
-struct tm;
-extern void (*log_switch_hook)(struct tm *tm);
-
-void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3);
-void vmsg(uns cat, const char *fmt, va_list args);
-void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2);
-void log_init(const char *argv0);
-void log_file(const char *name);
-void log_fork(void);
-int log_switch(void);
-
-void assert_failed(const char *assertion, const char *file, int line) NONRET;
-void assert_failed_noinfo(void) NONRET;
-
-#ifdef DEBUG_ASSERTS
-#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; })
-#else
-#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; })
-#endif
-
-#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1]
-
-#ifdef LOCAL_DEBUG
-#define DBG(x,y...) msg(L_DEBUG, x,##y)
-#else
-#define DBG(x,y...) do { } while(0)
-#endif
-
-static inline void log_switch_disable(void) { log_switch_nest++; }
-static inline void log_switch_enable(void) { ASSERT(log_switch_nest); log_switch_nest--; }
-
-/* Memory allocation */
-
-#define xmalloc sh_xmalloc
-#define xrealloc sh_xrealloc
-#define xfree sh_xfree
-
-#ifdef DEBUG_DMALLOC
-/*
- * The standard dmalloc macros tend to produce lots of namespace
- * conflicts and we use only xmalloc and xfree, so we can define
- * the stubs ourselves.
- */
-#define DMALLOC_DISABLE
-#include <dmalloc.h>
-#define sh_xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size)
-#define sh_xrealloc(ptr,size) _xrealloc_leap(__FILE__, __LINE__, ptr, size)
-#define sh_xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr)
-#else
-/*
- * Unfortunately, several libraries we might want to link to define
- * their own xmalloc and we don't want to interfere with them, hence
- * the renaming.
- */
-void *xmalloc(uns) LIKE_MALLOC;
-void *xrealloc(void *, uns);
-void xfree(void *);
-#endif
-
-void *xmalloc_zero(uns) LIKE_MALLOC;
-char *xstrdup(const char *) LIKE_MALLOC;
-
-/* Content-Type pattern matching and filters */
-
-int match_ct_patt(const char *, const char *);
-
-/* wordsplit.c */
-
-int sepsplit(char *str, uns sep, char **rec, uns max);
-int wordsplit(char *str, char **rec, uns max);
-
-/* pat(i)match.c: Matching of shell patterns */
-
-int match_pattern(const char *patt, const char *str);
-int match_pattern_nocase(const char *patt, const char *str);
-
-/* md5hex.c */
-
-void md5_to_hex(const byte *s, char *d);
-void hex_to_md5(const char *s, byte *d);
-
-#define MD5_SIZE 16
-#define MD5_HEX_SIZE 33
-
-/* prime.c */
-
-int isprime(uns x);
-uns nextprime(uns x);
-
-/* primetable.c */
-
-uns next_table_prime(uns x);
-uns prev_table_prime(uns x);
-
-/* timer.c */
-
-timestamp_t get_timestamp(void);
-
-void init_timer(timestamp_t *timer);
-uns get_timer(timestamp_t *timer);
-uns switch_timer(timestamp_t *old, timestamp_t *new);
-
-/* regex.c */
-
-typedef struct regex regex;
-
-regex *rx_compile(const char *r, int icase);
-void rx_free(regex *r);
-int rx_match(regex *r, const char *s);
-int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen);
-
-/* random.c */
-
-uns random_u32(void);
-uns random_max(uns max);
-u64 random_u64(void);
-u64 random_max_u64(u64 max);
-
-/* mmap.c */
-
-void *mmap_file(const char *name, unsigned *len, int writeable);
-void munmap_file(void *start, unsigned len);
-
-/* proctitle.c */
-
-void setproctitle_init(int argc, char **argv);
-void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2);
-char *getproctitle(void);
-
-/* randomkey.c */
-
-void randomkey(byte *buf, uns size);
-
-/* exitstatus.c */
-
-#define EXIT_STATUS_MSG_SIZE 32
-int format_exit_status(char *msg, int stat);
-
-/* runcmd.c */
-
-int run_command(const char *cmd, ...);
-void NONRET exec_command(const char *cmd, ...);
-void echo_command(char *buf, int size, const char *cmd, ...);
-int run_command_v(const char *cmd, va_list args);
-void NONRET exec_command_v(const char *cmd, va_list args);
-void echo_command_v(char *buf, int size, const char *cmd, va_list args);
-
-/* carefulio.c */
-
-int careful_read(int fd, void *buf, int len);
-int careful_write(int fd, const void *buf, int len);
-
-/* sync.c */
-
-void sync_dir(const char *name);
-
-/* sighandler.c */
-
-typedef int (*sh_sighandler_t)(int); // gets signum, returns nonzero if abort() should be called
-
-void handle_signal(int signum);
-void unhandle_signal(int signum);
-sh_sighandler_t set_signal_handler(int signum, sh_sighandler_t new);
-
-/* string.c */
-
-char *str_unesc(char *dest, const char *src);
-char *str_format_flags(char *dest, const char *fmt, uns flags);
-
-/* bigalloc.c */
-
-void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
-void *page_alloc_zero(u64 len) LIKE_MALLOC;
-void page_free(void *start, u64 len);
-void *page_realloc(void *start, u64 old_len, u64 new_len);
-
-void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
-void *big_alloc_zero(u64 len) LIKE_MALLOC;
-void big_free(void *start, u64 len);
-
-#endif
+++ /dev/null
-# pkg-config metadata for libucw
-
-libdir=@LIBDIR@
-incdir=.
-
-#ifdef CONFIG_UCW_THREADS
-threads=-lpthread
-#else
-threads=
-#endif
-
-Name: libucw
-Description: A library of utility functions and data structures
-Version: @SHERLOCK_VERSION@
-Cflags: -I${incdir}
-Libs: -L${libdir} -lucw ${threads}
+++ /dev/null
-/*
- * UCW Library -- Linked Lists
- *
- * (c) 1997--1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#define _UCW_LISTS_C
-#include "lib/lists.h"
-
-LIST_INLINE void
-add_tail(list *l, node *n)
-{
- node *z = l->tail;
-
- n->next = (node *) &l->null;
- n->prev = z;
- z->next = n;
- l->tail = n;
-}
-
-LIST_INLINE void
-add_head(list *l, node *n)
-{
- node *z = l->head;
-
- n->next = z;
- n->prev = (node *) &l->head;
- z->prev = n;
- l->head = n;
-}
-
-LIST_INLINE void
-insert_node(node *n, node *after)
-{
- node *z = after->next;
-
- n->next = z;
- n->prev = after;
- after->next = n;
- z->prev = n;
-}
-
-LIST_INLINE void
-rem_node(node *n)
-{
- node *z = n->prev;
- node *x = n->next;
-
- z->next = x;
- x->prev = z;
-}
-
-LIST_INLINE void
-init_list(list *l)
-{
- l->head = (node *) &l->null;
- l->null = NULL;
- l->tail = (node *) &l->head;
-}
-
-LIST_INLINE void
-add_tail_list(list *to, list *l)
-{
- node *p = to->tail;
- node *q = l->head;
-
- p->next = q;
- q->prev = p;
- q = l->tail;
- q->next = (node *) &to->null;
- to->tail = q;
-}
+++ /dev/null
-/*
- * UCW Library -- Linked Lists
- *
- * (c) 1997--1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_LISTS_H
-#define _UCW_LISTS_H
-
-/*
- * I admit the list structure is very tricky and also somewhat awkward,
- * but it's both efficient and easy to manipulate once one understands the
- * basic trick: The list head always contains two synthetic nodes which are
- * always present in the list: the head and the tail. But as the `next'
- * entry of the tail and the `prev' entry of the head are both NULL, the
- * nodes can overlap each other:
- *
- * head head_node.next
- * null head_node.prev tail_node.next
- * tail tail_node.prev
- */
-
-typedef struct node {
- struct node *next, *prev;
-} node;
-
-typedef struct list { /* In fact two overlayed nodes */
- struct node *head, *null, *tail;
-} list;
-
-#define NODE (node *)
-#define HEAD(list) ((void *)((list).head))
-#define TAIL(list) ((void *)((list).tail))
-#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \
- n=(void *)((NODE (n))->next))
-#define DO_FOR_ALL(n,list) WALK_LIST(n,list)
-#define WALK_LIST_DELSAFE(n,nxt,list) \
- for(n=HEAD(list); nxt=(void *)((NODE (n))->next); n=(void *) nxt)
-#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \
- n=(void *)((NODE (n))->prev))
-#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \
- for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv)
-
-#define EMPTY_LIST(list) (!(list).head->next)
-
-void add_tail(list *, node *);
-void add_head(list *, node *);
-void rem_node(node *);
-void add_tail_list(list *, list *);
-void init_list(list *);
-void insert_node(node *, node *);
-
-#if !defined(_UCW_LISTS_C) && defined(__GNUC__)
-#define LIST_INLINE extern inline
-#include "lib/lists.c"
-#undef LIST_INLINE
-#else
-#define LIST_INLINE
-#endif
-
-#endif
+++ /dev/null
-/*
- * LiZaRd -- Fast compression method based on Lempel-Ziv 77
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/threads.h"
-#include "lib/lizard.h"
-
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <setjmp.h>
-#include <errno.h>
-
-struct lizard_buffer {
- uns len;
- void *ptr;
-};
-
-struct lizard_buffer *
-lizard_alloc(void)
-{
- struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
- buf->len = 0;
- buf->ptr = NULL;
- handle_signal(SIGSEGV);
- return buf;
-}
-
-void
-lizard_free(struct lizard_buffer *buf)
-{
- unhandle_signal(SIGSEGV);
- if (buf->ptr)
- munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
- xfree(buf);
-}
-
-static void
-lizard_realloc(struct lizard_buffer *buf, uns max_len)
- /* max_len needs to be aligned to CPU_PAGE_SIZE */
-{
- if (max_len <= buf->len)
- return;
- if (max_len < 2*buf->len) // to ensure logarithmic cost
- max_len = 2*buf->len;
-
- if (buf->ptr)
- munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
- buf->len = max_len;
- buf->ptr = mmap(NULL, buf->len + CPU_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
- if (buf->ptr == MAP_FAILED)
- die("mmap(anonymous, %d bytes): %m", (uns)(buf->len + CPU_PAGE_SIZE));
- if (mprotect(buf->ptr + buf->len, CPU_PAGE_SIZE, PROT_NONE) < 0)
- die("mprotect: %m");
-}
-
-static jmp_buf safe_decompress_jump;
-static int
-sigsegv_handler(int signal UNUSED)
-{
- longjmp(safe_decompress_jump, 1);
- return 1;
-}
-
-byte *
-lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length)
- /* Decompresses in into buf, sets *ptr to the data, and returns the
- * uncompressed length. If an error has occured, -1 is returned and errno is
- * set. The buffer buf is automatically reallocated. SIGSEGV is caught in
- * case of buffer-overflow. The function is not re-entrant because of a
- * static longjmp handler. */
-{
- uns lock_offset = ALIGN_TO(expected_length + 3, CPU_PAGE_SIZE); // +3 due to the unaligned access
- if (lock_offset > buf->len)
- lizard_realloc(buf, lock_offset);
- volatile sh_sighandler_t old_handler = set_signal_handler(SIGSEGV, sigsegv_handler);
- byte *ptr;
- if (!setjmp(safe_decompress_jump))
- {
- ptr = buf->ptr + buf->len - lock_offset;
- int len = lizard_decompress(in, ptr);
- if (len != (int) expected_length)
- {
- ptr = NULL;
- errno = EINVAL;
- }
- }
- else
- {
- msg(L_ERROR, "SIGSEGV caught in lizard_decompress()");
- ptr = NULL;
- errno = EFAULT;
- }
- set_signal_handler(SIGSEGV, old_handler);
- return ptr;
-}
+++ /dev/null
-#include "lib/lib.h"
-#include "lib/getopt.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-binary.h"
-#include "lib/lizard.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-static char *options = CF_SHORT_OPTS "cdtx";
-static char *help = "\
-Usage: lizard-test <options> input-file [output-file]\n\
-\n\
-Options:\n"
-CF_USAGE
-"-c\t\tCompress\n\
--d\t\tDecompress\n\
--t\t\tCompress, decompress, and compare (in memory only, default)\n\
--x\t\tLet the test crash by shrinking the output buffer\n\
-";
-
-static void NONRET
-usage(void)
-{
- fputs(help, stderr);
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- int opt;
- uns action = 't';
- uns crash = 0;
- log_init(argv[0]);
- while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
- switch (opt)
- {
- case 'c':
- case 'd':
- case 't':
- action = opt;
- break;
- case 'x':
- crash++;
- break;
- default:
- usage();
- }
- if (action == 't' && argc != optind+1
- || action != 't' && argc != optind+2)
- usage();
-
- void *mi, *mo;
- int li, lo;
- uns adler = 0;
-
- struct stat st;
- stat(argv[optind], &st);
- li = st.st_size;
- struct fastbuf *fi = bopen(argv[optind], O_RDONLY, 1<<16);
- if (action != 'd')
- {
- lo = li * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD;
- li += LIZARD_NEEDS_CHARS;
- }
- else
- {
- lo = bgetl(fi);
- adler = bgetl(fi);
- li -= 8;
- }
- mi = xmalloc(li);
- mo = xmalloc(lo);
- li = bread(fi, mi, li);
- bclose(fi);
-
- printf("%d ", li);
- if (action == 'd')
- printf("->expected %d (%08x) ", lo, adler);
- fflush(stdout);
- if (action != 'd')
- lo = lizard_compress(mi, li, mo);
- else
- {
- lo = lizard_decompress(mi, mo);
- if (adler32(mo, lo) != adler)
- printf("wrong Adler32 ");
- }
- printf("-> %d ", lo);
- fflush(stdout);
-
- if (action != 't')
- {
- struct fastbuf *fo = bopen(argv[optind+1], O_CREAT | O_TRUNC | O_WRONLY, 1<<16);
- if (action == 'c')
- {
- bputl(fo, li);
- bputl(fo, adler32(mi, li));
- }
- bwrite(fo, mo, lo);
- bclose(fo);
- }
- else
- {
- int smaller_li;
- if (li >= (int) CPU_PAGE_SIZE)
- smaller_li = li - CPU_PAGE_SIZE;
- else
- smaller_li = 0;
- struct lizard_buffer *buf = lizard_alloc();
- byte *ptr = lizard_decompress_safe(mo, buf, crash ? smaller_li : li);
- if (!ptr)
- printf("err: %m");
- else if (memcmp(mi, ptr, li))
- printf("WRONG");
- else
- printf("OK");
- lizard_free(buf);
- }
- printf("\n");
-}
+++ /dev/null
-/*
- * LiZaRd -- Fast compression method based on Lempel-Ziv 77
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- *
- * The file format is based on LZO1X and
- * the compression method is based on zlib.
- */
-
-#include "lib/lib.h"
-#include "lib/lizard.h"
-
-#include <string.h>
-
-typedef u16 hash_ptr_t;
-struct hash_record {
- /* the position in the original text is implicit; it is computed by locate_string() */
- hash_ptr_t next; // 0=end
- hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
-};
-
-#define HASH_SIZE (1<<14) // size of hash-table
-#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
-#define CHAIN_MAX_TESTS 8 // crop longer collision chains
-#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
-
-static inline uns
-hashf(const byte *string)
- /* 0..HASH_SIZE-1 */
-{
- return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
-}
-
-static inline byte *
-locate_string(const byte *string, int record_id, int head)
- /* The strings are recorded into the hash-table regularly, hence there is no
- * need to store the pointer there. */
-{
- string += record_id - head;
- if (record_id >= head)
- string -= HASH_RECORDS-1;
- return (byte *)string;
-}
-
-static inline uns
-find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
- /* hash_tab[hash] == record_id points to the head of the double-linked
- * link-list of strings with the same hash. The records are statically
- * stored in circular array hash_rec (with the 1st entry unused), and the
- * pointers are just 16-bit indices. The strings in every collision chain
- * are ordered by age. */
-{
- uns count = CHAIN_MAX_TESTS;
- uns best_len = 0;
- while (record_id && count-- > 0)
- {
- byte *record_string = locate_string(string, record_id, head);
- byte *cmp = record_string;
- if (cmp[0] == string[0] && cmp[2] == string[2])
- /* implies cmp[1] == string[1] */
- {
- if (cmp[3] == string[3])
- {
- cmp += 4;
- if (*cmp++ == string[4] && *cmp++ == string[5]
- && *cmp++ == string[6] && *cmp++ == string[7])
- {
- const byte *str = string + 8;
- while (str <= string_end && *cmp++ == *str++);
- }
- }
- else
- cmp += 4;
- uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
- if (len > best_len)
- {
- best_len = len;
- *best_ptr = record_string;
- if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
- break;
- }
- }
- record_id = hash_rec[record_id].next;
- }
- return best_len;
-}
-
-static uns
-hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
- /* We reuse hash-records stored in a circular array. First, delete the old
- * one and then add the new one in front of the link-list. */
-{
- struct hash_record *rec = hash_rec + head;
- if (*to_delete) /* unlink the original record */
- {
- uns prev_id = rec->prev & ((1<<15)-1);
- if (rec->prev & (1<<15)) /* was a head */
- hash_tab[prev_id] = 0;
- else /* thanks to the ordering, this was a tail */
- hash_rec[prev_id].next = 0;
- }
- rec->next = hash_tab[hash];
- rec->prev = (1<<15) | hash;
- hash_rec[rec->next].prev = head;
- hash_tab[hash] = head; /* add the new record before the link-list */
-
- if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
- {
- head = 1;
- *to_delete = 1;
- }
- return head;
-}
-
-static inline byte *
-dump_unary_value(byte *out, uns l)
-{
- while (l > 255)
- {
- l -= 255;
- *out++ = 0;
- }
- *out++ = l;
- return out;
-}
-
-static byte *
-flush_copy_command(uns bof, byte *out, const byte *start, uns len)
-{
- if (bof && len <= 238)
- *out++ = len + 17;
- else if (len < 4)
- {
- /* cannot happen when !!bof */
- out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
-#ifdef CPU_ALLOW_UNALIGNED
- * (u32*) out = * (u32*) start;
- return out + len;
-#else
- while (len-- > 0)
- *out++ = *start++;
- return out;
-#endif
- }
- else
- {
- /* leave 2 least significant bits of out[-2] set to 0 */
- if (len <= 18)
- *out++ = len - 3;
- else
- {
- *out++ = 0;
- out = dump_unary_value(out, len - 18);
- }
- }
- memcpy(out, start, len);
- return out + len;
-}
-
-int
-lizard_compress(const byte *in, uns in_len, byte *out)
- /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
- * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
- * allocated after in. Returns the actual compressed length. */
-{
- hash_ptr_t hash_tab[HASH_SIZE];
- struct hash_record hash_rec[HASH_RECORDS];
- const byte *in_end = in + in_len;
- byte *out_start = out;
- const byte *copy_start = in;
- uns head = 1; /* 0 in unused */
- uns to_delete = 0, bof = 1;
- bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
- while (in < in_end)
- {
- uns hash = hashf(in);
- byte *best = NULL;
- uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
- if (len < 3)
-#if 0 // TODO: now, our routine does not detect matches of length 2
- if (len == 2 && (in - best->string - 1) < (1<<10))
- { /* pass-thru */ }
- else
-#endif
- {
-literal:
- head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
- in++; /* add a literal */
- continue;
- }
-
- if (in + len > in_end) /* crop EOF */
- {
- len = in_end - in;
- if (len < 3)
- goto literal;
- }
- /* Record the match. */
- uns copy_len = in - copy_start;
- uns is_in_copy_mode = bof || copy_len >= 4;
- uns shift = in - best - 1;
- /* Try to use a 2-byte sequence. */
-#if 0
- if (len == 2)
- {
- if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
- goto literal;
- else
- goto dump_2sequence;
- } else
-#endif
- /* now, len >= 3 */
- if (shift < (1<<11) && len <= 8)
- {
- shift |= (len-3 + 2)<<11;
-dump_2sequence:
- if (copy_len)
- out = flush_copy_command(bof, out, copy_start, copy_len);
- *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
- *out++ = shift & 0xff;
- }
- else if (len == 3 && is_in_copy_mode)
- {
- if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
- {
- shift -= 1<<11;
- goto dump_2sequence; /* shift has 11 bits and contains also len */
- }
- else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
- goto literal;
- }
- /* We have to use a 3-byte sequence. */
- else
- {
- if (copy_len)
- out = flush_copy_command(bof, out, copy_start, copy_len);
- if (shift < (1<<14))
- {
- if (len <= 33)
- *out++ = (1<<5) | (len-2);
- else
- {
- *out++ = 1<<5;
- out = dump_unary_value(out, len - 33);
- }
- }
- else /* shift < (1<<15)-1 becase of HASH_RECORDS */
- {
- shift++; /* because shift==0 is reserved for EOF */
- byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
- if (len <= 9)
- *out++ = pos_bit | (len-2);
- else
- {
- *out++ = pos_bit;
- out = dump_unary_value(out, len - 9);
- }
- }
- *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
- *out++ = shift & 0xff;
- }
- /* Update the hash-table. */
- head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
- for (uns i=1; i<len; i++)
- head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
- in += len;
- copy_start = in;
- bof = 0;
- }
- uns copy_len = in - copy_start;
- if (copy_len)
- out = flush_copy_command(bof, out, copy_start, copy_len);
- *out++ = 17; /* add EOF */
- *out++ = 0;
- *out++ = 0;
- return out - out_start;
-}
-
-static inline byte *
-read_unary_value(const byte *in, uns *val)
-{
- uns l = 0;
- while (!*in++)
- l += 255;
- l += in[-1];
- *val = l;
- return (byte *)in;
-}
-
-int
-lizard_decompress(const byte *in, byte *out)
- /* Requires out being allocated for the decompressed length must be known
- * beforehand. It is desirable to lock the following memory page for
- * read-only access to prevent buffer overflow. Returns the actual
- * decompressed length or a negative number when an error has occured. */
-{
- byte *out_start = out;
- uns expect_copy_command = 1;
- uns len;
- if (*in > 17) /* short copy command at BOF */
- {
- len = *in++ - 17;
- goto perform_copy_command;
- }
- while (1)
- {
- uns c = *in++;
- uns pos;
- if (c < 0x10)
- if (expect_copy_command == 1)
- {
- if (!c)
- {
- in = read_unary_value(in, &len);
- len += 18;
- }
- else
- len = c + 3;
- goto perform_copy_command;
- }
- else
- {
- pos = ((c&0xc)<<6) | *in++;
- if (expect_copy_command == 2)
- {
- pos += 1<<11;
- len = 3;
- }
- else
- len = 2;
- pos++;
- }
- else if (c < 0x20)
- {
- pos = (c&0x8)<<11;
- len = c&0x7;
- if (!len)
- {
- in = read_unary_value(in, &len);
- len += 9;
- }
- else
- len += 2;
- pos |= (*in++ & 0xfc)<<6;
- pos |= *in++;
- if (!pos) /* EOF */
- break;
- /* do NOT pos++ */
- }
- else if (c < 0x40)
- {
- len = c&0x1f;
- if (!len)
- {
- in = read_unary_value(in, &len);
- len += 33;
- }
- else
- len += 2;
- pos = (*in++ & 0xfc)<<6;
- pos |= *in++;
- pos++;
- }
- else /* high bits encode the length */
- {
- len = ((c&0xe0)>>5) -2 +3;
- pos = (c&0x1c)<<6;
- pos |= *in++;
- pos++;
- }
- /* take from the sliding window */
- if (len <= pos)
- {
- memcpy(out, out-pos, len);
- out += len;
- }
- else
- { /* overlapping */
- for (; len-- > 0; out++)
- *out = *(out-pos);
- /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
- }
- /* extract the copy-bits */
- len = in[-2] & 0x3;
- if (len)
- {
- expect_copy_command = 0;
-#ifdef CPU_ALLOW_UNALIGNED
- * (u32*) out = * (u32*) in;
- out += len;
- in += len;
-#else
- while (len-- > 0)
- *out++ = *in++;
-#endif
- }
- else
- expect_copy_command = 1;
- continue;
-
-perform_copy_command:
- expect_copy_command = 2;
- memcpy(out, in, len);
- out += len;
- in += len;
- }
-
- return out - out_start;
-}
-
-/*
-
-Description of the LZO1X format :
-=================================
-
-The meaning of the commands depends on the current mode. It can be either
-the compressed mode or the copy mode. In some cases, the compressed mode
-also distinguishes whether we just left the copy mode or not.
-
-Beginning of file:
-------------------
-
-Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
-so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
-the following characters to the output and switch to the compressed mode.
-If it is smaller, interpret it as a regular copy mode command.
-
-Compressed mode:
-----------------
-
-Read the first byte of the sequence and determine the type of bit encoding by
-looking at the most significant bits. The sequence is always at least 2 bytes
-long. Decode sequences of these types until the EOF or END marker is read.
-
- length L = length of the text taken from the sliding window
-
- If L=0, then count the number Z of the following zero bytes and add Z*255
- to the value of the following non-zero byte. This allows setting L
- arbitrarily high.
-
- position p = relative position of the beginning of the text
-
- Exception: 00010001 00000000 00000000 means EOF
-
- copying C = length 1..3 of copied characters or END=0
-
- C following characters will be copied from the compressed text to the
- output. The number CC is always stored in the 2 least significant bits of
- the second last byte of the sequence.
-
- If END is read, the algorithm switches to the copy mode.
-
-pattern length position
-
-0000ppCC pppppppp 2 10 bits [default interpretation]
-0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
-0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
-001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
-LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
-
-Copy mode:
-----------
-
-Read the first byte and, if the most significant bits are 0000, perform the
-following command, otherwise switch to the compressed mode (and evaluate the
-command there).
-
-pattern length position
-
-0000LLLL L* 4..18 + extend N/A
-
- Copy L characters from the compressed text to the output. The overhead for
- incompressible strings is only roughly 1/256 + epsilon.
-
-*/
+++ /dev/null
-/*
- * LiZaRd -- Fast compression method based on Lempel-Ziv 77
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_LIZARD_H
-#define _UCW_LIZARD_H
-
-#define LIZARD_NEEDS_CHARS 8
- /* The compression routine needs input buffer 8 characters longer, because it
- * does not check the input bounds all the time. */
-#define LIZARD_MAX_MULTIPLY 23./22
-#define LIZARD_MAX_ADD 4
- /* In the worst case, the compressed file will not be longer than its
- * original length * 23/22 + 4.
- *
- * The additive constant is for EOF and the header of the file.
- *
- * The multiplicative constant comes from 19-byte incompressible string
- * followed by a 3-sequence that can be compressed into 2-byte link. This
- * breaks the copy-mode and it needs to be restarted with a new header. The
- * total length is 2(header) + 19(string) + 2(link) = 23.
- */
-
-/* lizard.c */
-int lizard_compress(const byte *in, uns in_len, byte *out);
-int lizard_decompress(const byte *in, byte *out);
-
-/* lizard-safe.c */
-struct lizard_buffer;
-
-struct lizard_buffer *lizard_alloc(void);
-void lizard_free(struct lizard_buffer *buf);
-byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
-
-/* adler32.c */
-uns update_adler32(uns adler, const byte *ptr, uns len);
-
-static inline uns
-adler32(const byte *buf, uns len)
-{
- return update_adler32(1, buf, len);
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Keeping of Log Files
- *
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/lfs.h"
-#include "lib/threads.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <time.h>
-
-static char *log_name_patt;
-static int log_params;
-static int log_filename_size;
-volatile int log_switch_nest;
-
-static int
-do_log_switch(struct tm *tm)
-{
- int fd, l;
- char name[log_filename_size];
- int switched = 0;
-
- if (!log_name_patt ||
- log_filename[0] && !log_params)
- return 0;
- ucwlib_lock();
- log_switch_nest++;
- l = strftime(name, log_filename_size, log_name_patt, tm);
- if (l < 0 || l >= log_filename_size)
- die("Error formatting log file name: %m");
- if (strcmp(name, log_filename))
- {
- strcpy(log_filename, name);
- fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
- if (fd < 0)
- die("Unable to open log file %s: %m", name);
- dup2(fd, 2);
- close(fd);
- switched = 1;
- }
- log_switch_nest--;
- ucwlib_unlock();
- return switched;
-}
-
-int
-log_switch(void)
-{
- time_t tim = time(NULL);
- return do_log_switch(localtime(&tim));
-}
-
-static void
-internal_log_switch(struct tm *tm)
-{
- if (!log_switch_nest)
- do_log_switch(tm);
-}
-
-void
-log_file(const char *name)
-{
- if (name)
- {
- if (log_name_patt)
- xfree(log_name_patt);
- if (log_filename)
- {
- xfree(log_filename);
- log_filename = NULL;
- }
- log_name_patt = xstrdup(name);
- log_params = !!strchr(name, '%');
- log_filename_size = strlen(name) + 64; /* 63 is an upper bound on expansion of % escapes */
- log_filename = xmalloc(log_filename_size);
- log_filename[0] = 0;
- log_switch();
- log_switch_hook = internal_log_switch;
- }
-}
-
-void
-log_fork(void)
-{
- log_pid = getpid();
-}
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- log_init(argv[0]);
- log_file("/proc/self/fd/1");
- for (int i=1; i<argc; i++)
- log(L_INFO, argv[i]);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Logging
- *
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <time.h>
-#include <alloca.h>
-
-static char log_progname[32];
-char *log_filename;
-char *log_title;
-int log_pid;
-int log_precise_timings;
-void (*log_die_hook)(void);
-void (*log_switch_hook)(struct tm *tm);
-
-void
-vmsg(unsigned int cat, const char *fmt, va_list args)
-{
- struct timeval tv;
- struct tm tm;
- byte *buf, *p;
- int buflen = 256;
- int l, l0, r;
- va_list args2;
-
- gettimeofday(&tv, NULL);
- if (!localtime_r(&tv.tv_sec, &tm))
- bzero(&tm, sizeof(tm));
-
- if (log_switch_hook)
- log_switch_hook(&tm);
- while (1)
- {
- p = buf = alloca(buflen);
- *p++ = cat;
- /* We cannot use strftime() here, because it's not re-entrant */
- p += sprintf(p, " %4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec);
- if (log_precise_timings)
- p += sprintf(p, ".%06d", (int)tv.tv_usec);
- *p++ = ' ';
- if (log_title)
- {
- if (log_pid)
- p += sprintf(p, "[%s (%d)] ", log_title, log_pid);
- else
- p += sprintf(p, "[%s] ", log_title);
- }
- else
- {
- if (log_pid)
- p += sprintf(p, "[%d] ", log_pid);
- }
- l0 = p - buf + 1;
- r = buflen - l0;
- va_copy(args2, args);
- l = vsnprintf(p, r, fmt, args2);
- va_end(args2);
- if (l < 0)
- l = r;
- else if (l < r)
- {
- while (*p)
- {
- if (*p < 0x20 && *p != '\t')
- *p = 0x7f;
- p++;
- }
- *p = '\n';
- write(2, buf, l + l0);
- return;
- }
- buflen = l + l0 + 1;
- }
-}
-
-void
-msg(unsigned int cat, const char *fmt, ...)
-{
- va_list args;
-
- va_start(args, fmt);
- vmsg(cat, fmt, args);
- va_end(args);
-}
-
-void
-die(const char *fmt, ...)
-{
- va_list args;
-
- va_start(args, fmt);
- vmsg(L_FATAL, fmt, args);
- va_end(args);
- if (log_die_hook)
- log_die_hook();
-#ifdef DEBUG_DIE_BY_ABORT
- abort();
-#else
- exit(1);
-#endif
-}
-
-void
-assert_failed(const char *assertion, const char *file, int line)
-{
- msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
- abort();
-}
-
-void
-assert_failed_noinfo(void)
-{
- die("Internal error: Assertion failed.");
-}
-
-static const char *
-log_basename(const char *n)
-{
- const char *p = n;
-
- while (*n)
- if (*n++ == '/')
- p = n;
- return p;
-}
-
-void
-log_init(const char *argv0)
-{
- if (argv0)
- {
- strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1);
- log_progname[sizeof(log_progname)-1] = 0;
- log_title = log_progname;
- }
-}
+++ /dev/null
-/*
- * UCW Library -- Main Loop
- *
- * (c) 2004--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/mainloop.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <signal.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <time.h>
-#include <sys/poll.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-
-timestamp_t main_now;
-sh_time_t main_now_seconds;
-uns main_shutdown;
-
-clist main_timer_list, main_file_list, main_hook_list, main_process_list;
-static uns main_file_cnt;
-static uns main_poll_table_obsolete, main_poll_table_size;
-static struct pollfd *main_poll_table;
-static uns main_sigchld_set_up;
-
-void
-main_get_time(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- main_now_seconds = tv.tv_sec;
- main_now = (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
- // DBG("It's %lld o'clock", (long long) main_now);
-}
-
-void
-main_init(void)
-{
- DBG("MAIN: Initializing");
- clist_init(&main_timer_list);
- clist_init(&main_file_list);
- clist_init(&main_hook_list);
- clist_init(&main_process_list);
- main_file_cnt = 0;
- main_poll_table_obsolete = 1;
- main_get_time();
-}
-
-void
-timer_add(struct main_timer *tm, timestamp_t expires)
-{
- if (expires)
- DBG("MAIN: Setting timer %p (expire at now+%lld)", tm, (long long)(expires-main_now));
- else
- DBG("MAIN: Clearing timer %p", tm);
- if (tm->expires)
- clist_remove(&tm->n);
- tm->expires = expires;
- if (expires)
- {
- cnode *t = main_timer_list.head.next;
- while (t != &main_timer_list.head && ((struct main_timer *) t)->expires < expires)
- t = t->next;
- clist_insert_before(&tm->n, t);
- }
-}
-
-void
-timer_del(struct main_timer *tm)
-{
- timer_add(tm, 0);
-}
-
-static void
-file_timer_expired(struct main_timer *tm)
-{
- struct main_file *fi = tm->data;
- timer_del(&fi->timer);
- if (fi->error_handler)
- fi->error_handler(fi, MFERR_TIMEOUT);
-}
-
-void
-file_add(struct main_file *fi)
-{
- DBG("MAIN: Adding file %p (fd=%d)", fi, fi->fd);
- ASSERT(!fi->n.next);
- clist_add_tail(&main_file_list, &fi->n);
- fi->timer.handler = file_timer_expired;
- fi->timer.data = fi;
- main_file_cnt++;
- main_poll_table_obsolete = 1;
- if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0)
- msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
-}
-
-void
-file_chg(struct main_file *fi)
-{
- struct pollfd *p = fi->pollfd;
- if (p)
- {
- p->events = 0;
- if (fi->read_handler)
- p->events |= POLLIN | POLLHUP | POLLERR;
- if (fi->write_handler)
- p->events |= POLLOUT | POLLERR;
- }
-}
-
-void
-file_del(struct main_file *fi)
-{
- DBG("MAIN: Deleting file %p (fd=%d)", fi, fi->fd);
- ASSERT(fi->n.next);
- timer_del(&fi->timer);
- clist_remove(&fi->n);
- main_file_cnt--;
- main_poll_table_obsolete = 1;
- fi->n.next = fi->n.prev = NULL;
-}
-
-static int
-file_read_handler(struct main_file *fi)
-{
- while (fi->rpos < fi->rlen)
- {
- int l = read(fi->fd, fi->rbuf + fi->rpos, fi->rlen - fi->rpos);
- DBG("MAIN: FD %d: read %d", fi->fd, l);
- if (l < 0)
- {
- if (errno != EINTR && errno != EAGAIN && fi->error_handler)
- fi->error_handler(fi, MFERR_READ);
- return 0;
- }
- else if (!l)
- break;
- fi->rpos += l;
- }
- DBG("MAIN: FD %d done read %d of %d", fi->fd, fi->rpos, fi->rlen);
- fi->read_handler = NULL;
- file_chg(fi);
- fi->read_done(fi);
- return 1;
-}
-
-static int
-file_write_handler(struct main_file *fi)
-{
- while (fi->wpos < fi->wlen)
- {
- int l = write(fi->fd, fi->wbuf + fi->wpos, fi->wlen - fi->wpos);
- DBG("MAIN: FD %d: write %d", fi->fd, l);
- if (l < 0)
- {
- if (errno != EINTR && errno != EAGAIN && fi->error_handler)
- fi->error_handler(fi, MFERR_WRITE);
- return 0;
- }
- fi->wpos += l;
- }
- DBG("MAIN: FD %d done write %d", fi->fd, fi->wpos);
- fi->write_handler = NULL;
- file_chg(fi);
- fi->write_done(fi);
- return 1;
-}
-
-void
-file_read(struct main_file *fi, void *buf, uns len)
-{
- ASSERT(fi->n.next);
- if (len)
- {
- fi->read_handler = file_read_handler;
- fi->rbuf = buf;
- fi->rpos = 0;
- fi->rlen = len;
- }
- else
- {
- fi->read_handler = NULL;
- fi->rbuf = NULL;
- fi->rpos = fi->rlen = 0;
- }
- file_chg(fi);
-}
-
-void
-file_write(struct main_file *fi, void *buf, uns len)
-{
- ASSERT(fi->n.next);
- if (len)
- {
- fi->write_handler = file_write_handler;
- fi->wbuf = buf;
- fi->wpos = 0;
- fi->wlen = len;
- }
- else
- {
- fi->write_handler = NULL;
- fi->wbuf = NULL;
- fi->wpos = fi->wlen = 0;
- }
- file_chg(fi);
-}
-
-void
-file_set_timeout(struct main_file *fi, timestamp_t expires)
-{
- ASSERT(fi->n.next);
- timer_add(&fi->timer, expires);
-}
-
-void
-file_close_all(void)
-{
- CLIST_FOR_EACH(struct main_file *, f, main_file_list)
- close(f->fd);
-}
-
-void
-hook_add(struct main_hook *ho)
-{
- DBG("MAIN: Adding hook %p", ho);
- ASSERT(!ho->n.next);
- clist_add_tail(&main_hook_list, &ho->n);
-}
-
-void
-hook_del(struct main_hook *ho)
-{
- DBG("MAIN: Deleting hook %p", ho);
- ASSERT(ho->n.next);
- clist_remove(&ho->n);
- ho->n.next = ho->n.prev = NULL;
-}
-
-static void
-main_sigchld_handler(int x UNUSED)
-{
- DBG("SIGCHLD received");
-}
-
-void
-process_add(struct main_process *mp)
-{
- DBG("MAIN: Adding process %p (pid=%d)", mp, mp->pid);
- ASSERT(!mp->n.next);
- ASSERT(mp->handler);
- clist_add_tail(&main_process_list, &mp->n);
- if (!main_sigchld_set_up)
- {
- struct sigaction sa;
- bzero(&sa, sizeof(sa));
- sa.sa_handler = main_sigchld_handler;
- sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
- sigaction(SIGCHLD, &sa, NULL);
- main_sigchld_set_up = 1;
- }
-}
-
-void
-process_del(struct main_process *mp)
-{
- DBG("MAIN: Deleting process %p (pid=%d)", mp, mp->pid);
- ASSERT(mp->n.next);
- clist_remove(&mp->n);
- mp->n.next = NULL;
-}
-
-int
-process_fork(struct main_process *mp)
-{
- pid_t pid = fork();
- if (pid < 0)
- {
- DBG("MAIN: Fork failed");
- mp->status = -1;
- format_exit_status(mp->status_msg, -1);
- mp->handler(mp);
- return 1;
- }
- else if (!pid)
- return 0;
- else
- {
- DBG("MAIN: Forked process %d", (int) pid);
- mp->pid = pid;
- process_add(mp);
- return 1;
- }
-}
-
-void
-main_debug(void)
-{
-#ifdef CONFIG_DEBUG
- msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
- msg(L_DEBUG, "\tActive timers:");
- struct main_timer *tm;
- CLIST_WALK(tm, main_timer_list)
- msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
- struct main_file *fi;
- msg(L_DEBUG, "\tActive files:");
- CLIST_WALK(fi, main_file_list)
- msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
- fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler,
- (long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data);
- msg(L_DEBUG, "\tActive hooks:");
- struct main_hook *ho;
- CLIST_WALK(ho, main_hook_list)
- msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
- msg(L_DEBUG, "\tActive processes:");
- struct main_process *pr;
- CLIST_WALK(pr, main_process_list)
- msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
-#endif
-}
-
-static void
-main_rebuild_poll_table(void)
-{
- struct main_file *fi;
- if (main_poll_table_size < main_file_cnt)
- {
- if (main_poll_table)
- xfree(main_poll_table);
- else
- main_poll_table_size = 1;
- while (main_poll_table_size < main_file_cnt)
- main_poll_table_size *= 2;
- main_poll_table = xmalloc(sizeof(struct pollfd) * main_poll_table_size);
- }
- struct pollfd *p = main_poll_table;
- DBG("MAIN: Rebuilding poll table: %d of %d entries set", main_file_cnt, main_poll_table_size);
- CLIST_WALK(fi, main_file_list)
- {
- p->fd = fi->fd;
- fi->pollfd = p++;
- file_chg(fi);
- }
- main_poll_table_obsolete = 0;
-}
-
-void
-main_loop(void)
-{
- DBG("MAIN: Entering main_loop");
- ASSERT(main_timer_list.head.next);
-
- struct main_file *fi;
- struct main_hook *ho;
- struct main_timer *tm;
- struct main_process *pr;
- cnode *tmp;
-
- for (;;)
- {
- main_get_time();
- timestamp_t wake = main_now + 1000000000;
- while ((tm = clist_head(&main_timer_list)) && tm->expires <= main_now)
- {
- DBG("MAIN: Timer %p expired at now-%lld", tm, (long long)(main_now - tm->expires));
- tm->handler(tm);
- }
- int hook_min = HOOK_RETRY;
- int hook_max = HOOK_SHUTDOWN;
- CLIST_WALK_DELSAFE(ho, main_hook_list, tmp)
- {
- DBG("MAIN: Hook %p", ho);
- int ret = ho->handler(ho);
- hook_min = MIN(hook_min, ret);
- hook_max = MAX(hook_max, ret);
- }
- if (hook_min == HOOK_SHUTDOWN ||
- hook_min == HOOK_DONE && hook_max == HOOK_DONE ||
- main_shutdown)
- {
- DBG("MAIN: Shut down by %s", main_shutdown ? "main_shutdown" : "a hook");
- return;
- }
- if (hook_max == HOOK_RETRY)
- wake = 0;
- if (main_poll_table_obsolete)
- main_rebuild_poll_table();
- if (!clist_empty(&main_process_list))
- {
- int stat;
- pid_t pid;
- wake = MIN(wake, main_now + 10000);
- while ((pid = waitpid(-1, &stat, WNOHANG)) > 0)
- {
- DBG("MAIN: Child %d exited with status %x", pid, stat);
- CLIST_WALK(pr, main_process_list)
- if (pr->pid == pid)
- {
- pr->status = stat;
- process_del(pr);
- format_exit_status(pr->status_msg, pr->status);
- DBG("MAIN: Calling process exit handler");
- pr->handler(pr);
- break;
- }
- wake = 0;
- }
- }
- /* FIXME: Here is a small race window where SIGCHLD can come unnoticed. */
- if ((tm = clist_head(&main_timer_list)) && tm->expires < wake)
- wake = tm->expires;
- int timeout = (wake ? wake - main_now : 0);
- DBG("MAIN: Poll for %d fds and timeout %d ms", main_file_cnt, timeout);
- if (poll(main_poll_table, main_file_cnt, timeout))
- {
- struct pollfd *p = main_poll_table;
- main_get_time();
- CLIST_WALK(fi, main_file_list)
- {
- if (p->revents & (POLLIN | POLLHUP | POLLERR))
- {
- do
- DBG("MAIN: Read event on fd %d", p->fd);
- while (fi->read_handler && fi->read_handler(fi) && !main_poll_table_obsolete);
- if (main_poll_table_obsolete) /* File entries have been inserted or deleted => better not risk continuing to nowhere */
- break;
- }
- if (p->revents & (POLLOUT | POLLERR))
- {
- do
- DBG("MAIN: Write event on fd %d", p->fd);
- while (fi->write_handler && fi->write_handler(fi) && !main_poll_table_obsolete);
- if (main_poll_table_obsolete)
- break;
- }
- p++;
- }
- }
- }
-}
-
-#ifdef TEST
-
-static struct main_process mp;
-static struct main_file fin, fout;
-static struct main_hook hook;
-static struct main_timer tm;
-
-static byte rb[16];
-
-static void dread(struct main_file *fi)
-{
- if (fi->rpos < fi->rlen)
- {
- log(L_INFO, "Read EOF");
- file_del(fi);
- }
- else
- {
- log(L_INFO, "Read done");
- file_read(fi, rb, sizeof(rb));
- }
-}
-
-static void derror(struct main_file *fi, int cause)
-{
- log(L_INFO, "Error: %m !!! (cause %d)", cause);
- file_del(fi);
-}
-
-static void dwrite(struct main_file *fi UNUSED)
-{
- log(L_INFO, "Write done");
-}
-
-static int dhook(struct main_hook *ho UNUSED)
-{
- log(L_INFO, "Hook called");
- return 0;
-}
-
-static void dtimer(struct main_timer *tm)
-{
- log(L_INFO, "Timer tick");
- timer_add(tm, main_now + 10000);
-}
-
-static void dentry(void)
-{
- log(L_INFO, "*** SUBPROCESS START ***");
- sleep(2);
- log(L_INFO, "*** SUBPROCESS FINISH ***");
- exit(0);
-}
-
-static void dexit(struct main_process *pr)
-{
- log(L_INFO, "Subprocess %d exited with status %x", pr->pid, pr->status);
-}
-
-int
-main(void)
-{
- log_init(NULL);
- main_init();
-
- fin.fd = 0;
- fin.read_done = dread;
- fin.error_handler = derror;
- file_add(&fin);
- file_read(&fin, rb, sizeof(rb));
-
- fout.fd = 1;
- fout.write_done = dwrite;
- fout.error_handler = derror;
- file_add(&fout);
- file_write(&fout, "Hello, world!\n", 14);
-
- hook.handler = dhook;
- hook_add(&hook);
-
- tm.handler = dtimer;
- timer_add(&tm, main_now + 1000);
-
- mp.handler = dexit;
- if (!process_fork(&mp))
- dentry();
-
- main_debug();
-
- main_loop();
- log(L_INFO, "Finished.");
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Main Loop
- *
- * (c) 2004--2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_MAINLOOP_H
-#define _UCW_MAINLOOP_H
-
-#include "lib/clists.h"
-
-extern timestamp_t main_now; /* Current time in milliseconds since UNIX epoch */
-extern sh_time_t main_now_seconds; /* Current time in seconds since the epoch */
-extern uns main_shutdown;
-extern clist main_timer_list, main_file_list, main_hook_list, main_process_list;
-
-/* User-defined fields are marked with [*], all other fields must be initialized to zero. */
-
-/* Timers */
-
-struct main_timer {
- cnode n;
- timestamp_t expires;
- void (*handler)(struct main_timer *tm); /* [*] Function to be called when the timer expires. Must re-add/del the timer.*/
- void *data; /* [*] Data for use by the handler */
-};
-
-void timer_add(struct main_timer *tm, timestamp_t expires); /* Can modify a running timer, too */
-void timer_del(struct main_timer *tm);
-
-void main_get_time(void); /* Refresh main_now */
-
-/* Files to poll */
-
-struct main_file {
- cnode n;
- int fd; /* [*] File descriptor */
- int (*read_handler)(struct main_file *fi); /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */
- int (*write_handler)(struct main_file *fi);
- void (*error_handler)(struct main_file *fi, int cause); /* [*] Handler to call on errors */
- void *data; /* [*] Data for use by the handlers */
- byte *rbuf; /* Read/write pointers for use by file_read/write */
- uns rpos, rlen;
- byte *wbuf;
- uns wpos, wlen;
- void (*read_done)(struct main_file *fi); /* [*] Called when file_read is finished; rpos < rlen if EOF */
- void (*write_done)(struct main_file *fi); /* [*] Called when file_write is finished */
- struct main_timer timer;
- struct pollfd *pollfd;
-};
-
-enum main_file_err_cause {
- MFERR_READ,
- MFERR_WRITE,
- MFERR_TIMEOUT
-};
-
-void file_add(struct main_file *fi);
-void file_chg(struct main_file *fi);
-void file_del(struct main_file *fi);
-void file_read(struct main_file *fi, void *buf, uns len);
-void file_write(struct main_file *fi, void *buf, uns len);
-void file_set_timeout(struct main_file *fi, timestamp_t expires);
-void file_close_all(void); /* Close all known main_file's; frequently used before fork() */
-
-/* Hooks to be called in each iteration of the main loop */
-
-struct main_hook {
- cnode n;
- int (*handler)(struct main_hook *ho); /* [*] Hook function; returns HOOK_xxx */
- void *data; /* [*] For use by the handler */
-};
-
-enum main_hook_return {
- HOOK_IDLE, /* Call again when the main loop becomes idle again */
- HOOK_RETRY, /* Call again as soon as possible */
- HOOK_DONE = -1, /* Shut down the main loop if all hooks return this value */
- HOOK_SHUTDOWN = -2 /* Shut down the main loop immediately */
-};
-
-void hook_add(struct main_hook *ho);
-void hook_del(struct main_hook *ho);
-
-/* Processes to watch */
-
-struct main_process {
- cnode n;
- int pid; /* Process id (0=not running) */
- int status; /* Exit status (-1=fork failed) */
- char status_msg[EXIT_STATUS_MSG_SIZE];
- void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */
- void *data; /* [*] For use by the handler */
-};
-
-void process_add(struct main_process *mp);
-void process_del(struct main_process *mp);
-int process_fork(struct main_process *mp);
-
-/* The main loop */
-
-void main_init(void);
-void main_loop(void);
-void main_debug(void);
-
-#endif
+++ /dev/null
-/*
- * This code implements the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest. This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to MD5Init, call MD5Update as
- * needed on buffers full of bytes, and then call MD5Final, which
- * will fill a supplied 16-byte array with the digest.
- */
-
-#include "lib/lib.h"
-#include "lib/md5.h"
-
-#include <string.h> /* for memcpy() */
-
-#ifdef CPU_LITTLE_ENDIAN
-#define byteReverse(buf, len) /* Nothing */
-#else
-void byteReverse(unsigned char *buf, unsigned longs);
-
-/*
- * Note: this code is harmless on little-endian machines.
- */
-void byteReverse(unsigned char *buf, unsigned longs)
-{
- uint32 t;
- do {
- t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
- ((unsigned) buf[1] << 8 | buf[0]);
- *(uint32 *) buf = t;
- buf += 4;
- } while (--longs);
-}
-#endif
-
-/*
- * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
- * initialization constants.
- */
-void MD5Init(struct MD5Context *ctx)
-{
- ctx->buf[0] = 0x67452301;
- ctx->buf[1] = 0xefcdab89;
- ctx->buf[2] = 0x98badcfe;
- ctx->buf[3] = 0x10325476;
-
- ctx->bits[0] = 0;
- ctx->bits[1] = 0;
-}
-
-/*
- * Update context to reflect the concatenation of another buffer full
- * of bytes.
- */
-void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
-{
- uint32 t;
-
- /* Update bitcount */
-
- t = ctx->bits[0];
- if ((ctx->bits[0] = t + ((uint32) len << 3)) < t)
- ctx->bits[1]++; /* Carry from low to high */
- ctx->bits[1] += len >> 29;
-
- t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
-
- /* Handle any leading odd-sized chunks */
-
- if (t) {
- unsigned char *p = (unsigned char *) ctx->in + t;
-
- t = 64 - t;
- if (len < t) {
- memcpy(p, buf, len);
- return;
- }
- memcpy(p, buf, t);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (uint32 *) ctx->in);
- buf += t;
- len -= t;
- }
- /* Process data in 64-byte chunks */
-
- while (len >= 64) {
- memcpy(ctx->in, buf, 64);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (uint32 *) ctx->in);
- buf += 64;
- len -= 64;
- }
-
- /* Handle any remaining bytes of data. */
-
- memcpy(ctx->in, buf, len);
-}
-
-/*
- * Final wrapup - pad to 64-byte boundary with the bit pattern
- * 1 0* (64-bit count of bits processed, MSB-first)
- */
-void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
-{
- unsigned count;
- unsigned char *p;
-
- /* Compute number of bytes mod 64 */
- count = (ctx->bits[0] >> 3) & 0x3F;
-
- /* Set the first char of padding to 0x80. This is safe since there is
- always at least one byte free */
- p = ctx->in + count;
- *p++ = 0x80;
-
- /* Bytes of padding needed to make 64 bytes */
- count = 64 - 1 - count;
-
- /* Pad out to 56 mod 64 */
- if (count < 8) {
- /* Two lots of padding: Pad the first block to 64 bytes */
- memset(p, 0, count);
- byteReverse(ctx->in, 16);
- MD5Transform(ctx->buf, (uint32 *) ctx->in);
-
- /* Now fill the next block with 56 bytes */
- memset(ctx->in, 0, 56);
- } else {
- /* Pad block to 56 bytes */
- memset(p, 0, count - 8);
- }
- byteReverse(ctx->in, 14);
-
- /* Append length in bits and transform */
- ((uint32 *) ctx->in)[14] = ctx->bits[0];
- ((uint32 *) ctx->in)[15] = ctx->bits[1];
-
- MD5Transform(ctx->buf, (uint32 *) ctx->in);
- byteReverse((unsigned char *) ctx->buf, 4);
- memcpy(digest, ctx->buf, 16);
- memset((char *) ctx, 0, sizeof(ctx)); /* In case it's sensitive */
-}
-
-/* The four core functions - F1 is optimized somewhat */
-
-/* #define F1(x, y, z) (x & y | ~x & z) */
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
-
-/* This is the central step in the MD5 algorithm. */
-#define MD5STEP(f, w, x, y, z, data, s) \
- ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
-
-/*
- * The core of the MD5 algorithm, this alters an existing MD5 hash to
- * reflect the addition of 16 longwords of new data. MD5Update blocks
- * the data and converts bytes into longwords for this routine.
- */
-void MD5Transform(uint32 buf[4], uint32 const in[16])
-{
- uint32 a, b, c, d;
-
- a = buf[0];
- b = buf[1];
- c = buf[2];
- d = buf[3];
-
- MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
- MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
- MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
- MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
- MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
- MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
- MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
- MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
- MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
- MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
- MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
- MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
- MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
- MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
- MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
- MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
-
- MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
- MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
- MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
- MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
- MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
- MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
- MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
- MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
- MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
- MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
- MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
- MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
- MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
- MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
- MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
- MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
-
- MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
- MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
- MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
- MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
- MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
- MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
- MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
- MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
- MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
- MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
- MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
- MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
- MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
- MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
- MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
- MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
-
- MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
- MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
- MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
- MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
- MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
- MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
- MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
- MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
- MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
- MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
- MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
- MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
- MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
- MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
- MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
- MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
-
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
-}
+++ /dev/null
-/*
- * UCW Library -- MD5 Message Digest
- *
- * This file is in public domain (see lib/md5.c).
- */
-
-#ifndef _UCW_MD5_H
-#define _UCW_MD5_H
-
-typedef u32 uint32;
-
-struct MD5Context {
- uint32 buf[4];
- uint32 bits[2];
- unsigned char in[64];
-};
-
-void MD5Init(struct MD5Context *context);
-void MD5Update(struct MD5Context *context, unsigned char const *buf,
- unsigned len);
-void MD5Final(unsigned char digest[16], struct MD5Context *context);
-void MD5Transform(uint32 buf[4], uint32 const in[16]);
-
-#endif /* !_UCW_MD5_H */
+++ /dev/null
-/*
- * UCW Library -- MD5 Binary <-> Hex Conversions
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-
-#include <stdio.h>
-
-void
-md5_to_hex(const byte *s, char *d)
-{
- int i;
- for(i=0; i<MD5_SIZE; i++)
- d += sprintf(d, "%02X", *s++);
-}
-
-void
-hex_to_md5(const char *s, byte *d)
-{
- uns i, j;
- for(i=0; i<MD5_SIZE; i++)
- {
- if (!Cxdigit(s[0]) || !Cxdigit(s[1]))
- die("hex_to_md5: syntax error");
- j = Cxvalue(*s); s++;
- j = (j << 4) | Cxvalue(*s); s++;
- *d++ = j;
- }
-}
+++ /dev/null
-/*
- * UCW Library -- Memory Pools (Formatting)
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-
-static char *
-mp_vprintf_at(struct mempool *mp, uns ofs, const char *fmt, va_list args)
-{
- char *ret = mp_grow(mp, ofs + 1) + ofs;
- va_list args2;
- va_copy(args2, args);
- int cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
- va_end(args2);
- if (cnt < 0)
- {
- /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
- do
- {
- ret = mp_expand(mp) + ofs;
- va_copy(args2, args);
- cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
- va_end(args2);
- }
- while (cnt < 0);
- }
- else if ((uns)cnt >= mp_avail(mp) - ofs)
- {
- ret = mp_grow(mp, cnt + 1) + ofs;
- va_copy(args2, args);
- int cnt2 = vsnprintf(ret, cnt + 1, fmt, args2);
- va_end(args2);
- ASSERT(cnt2 == cnt);
- }
- mp_end(mp, ret + cnt + 1);
- return ret - ofs;
-}
-
-char *
-mp_vprintf(struct mempool *mp, const char *fmt, va_list args)
-{
- mp_start(mp, 1);
- return mp_vprintf_at(mp, 0, fmt, args);
-}
-
-char *
-mp_printf(struct mempool *p, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- char *res = mp_vprintf(p, fmt, args);
- va_end(args);
- return res;
-}
-
-char *
-mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args)
-{
- uns ofs = mp_open(mp, ptr);
- ASSERT(ofs);
- return mp_vprintf_at(mp, ofs - 1, fmt, args);
-}
-
-char *
-mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- char *res = mp_vprintf_append(mp, ptr, fmt, args);
- va_end(args);
- return res;
-}
-
-#ifdef TEST
-
-int main(void)
-{
- struct mempool *mp = mp_new(64);
- char *x = mp_printf(mp, "<Hello, %s!>", "World");
- fputs(x, stdout);
- x = mp_printf_append(mp, x, "<Appended>");
- fputs(x, stdout);
- x = mp_printf(mp, "<Hello, %50s!>\n", "World");
- fputs(x, stdout);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Memory Pools (String Operations)
- *
- * (c) 2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-
-#include <alloca.h>
-#include <string.h>
-
-char *
-mp_strdup(struct mempool *p, const char *s)
-{
- uns l = strlen(s) + 1;
- char *t = mp_alloc_fast_noalign(p, l);
- memcpy(t, s, l);
- return t;
-}
-
-void *
-mp_memdup(struct mempool *p, const void *s, uns len)
-{
- void *t = mp_alloc_fast(p, len);
- memcpy(t, s, len);
- return t;
-}
-
-char *
-mp_multicat(struct mempool *p, ...)
-{
- va_list args, a;
- va_start(args, p);
- char *x, *y;
- uns cnt = 0;
- va_copy(a, args);
- while (x = va_arg(a, char *))
- cnt++;
- uns *sizes = alloca(cnt * sizeof(uns));
- uns len = 1;
- cnt = 0;
- va_end(a);
- va_copy(a, args);
- while (x = va_arg(a, char *))
- len += sizes[cnt++] = strlen(x);
- char *buf = mp_alloc_fast_noalign(p, len);
- y = buf;
- va_end(a);
- cnt = 0;
- while (x = va_arg(args, char *))
- {
- memcpy(y, x, sizes[cnt]);
- y += sizes[cnt++];
- }
- *y = 0;
- va_end(args);
- return buf;
-}
-
-char *
-mp_strjoin(struct mempool *p, char **a, uns n, uns sep)
-{
- uns sizes[n];
- uns len = 1;
- for (uns i=0; i<n; i++)
- len += sizes[i] = strlen(a[i]);
- if (sep && n)
- len += n-1;
- char *dest = mp_alloc_fast_noalign(p, len);
- char *d = dest;
- for (uns i=0; i<n; i++)
- {
- if (sep && i)
- *d++ = sep;
- memcpy(d, a[i], sizes[i]);
- d += sizes[i];
- }
- *d = 0;
- return dest;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int main(void)
-{
- struct mempool *p = mp_new(64);
- char *s = mp_strdup(p, "12345");
- char *c = mp_multicat(p, "<<", s, ">>", NULL);
- puts(c);
- char *a[] = { "bugs", "gnats", "insects" };
- puts(mp_strjoin(p, a, 3, '.'));
- puts(mp_strjoin(p, a, 3, 0));
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Memory Pools (One-Time Allocation)
- *
- * (c) 1997--2001 Martin Mares <mj@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-
-#include <string.h>
-
-#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN)
-#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE)
-
-struct mempool_chunk {
- struct mempool_chunk *next;
- uns size;
-};
-
-static uns
-mp_align_size(uns size)
-{
-#ifdef POOL_IS_MMAP
- return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL;
-#else
- return ALIGN_TO(size, CPU_STRUCT_ALIGN);
-#endif
-}
-
-void
-mp_init(struct mempool *pool, uns chunk_size)
-{
- chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
- *pool = (struct mempool) {
- .chunk_size = chunk_size,
- .threshold = chunk_size >> 1,
- .last_big = &pool->last_big };
-}
-
-static void *
-mp_new_big_chunk(uns size)
-{
- struct mempool_chunk *chunk;
- chunk = xmalloc(size + MP_CHUNK_TAIL) + size;
- chunk->size = size;
- return chunk;
-}
-
-static void
-mp_free_big_chunk(struct mempool_chunk *chunk)
-{
- xfree((void *)chunk - chunk->size);
-}
-
-static void *
-mp_new_chunk(uns size)
-{
-#ifdef POOL_IS_MMAP
- struct mempool_chunk *chunk;
- chunk = page_alloc(size + MP_CHUNK_TAIL) + size;
- chunk->size = size;
- return chunk;
-#else
- return mp_new_big_chunk(size);
-#endif
-}
-
-static void
-mp_free_chunk(struct mempool_chunk *chunk)
-{
-#ifdef POOL_IS_MMAP
- page_free((void *)chunk - chunk->size, chunk->size + MP_CHUNK_TAIL);
-#else
- mp_free_big_chunk(chunk);
-#endif
-}
-
-struct mempool *
-mp_new(uns chunk_size)
-{
- chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
- struct mempool_chunk *chunk = mp_new_chunk(chunk_size);
- struct mempool *pool = (void *)chunk - chunk_size;
- DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size);
- chunk->next = NULL;
- *pool = (struct mempool) {
- .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } },
- .chunk_size = chunk_size,
- .threshold = chunk_size >> 1,
- .last_big = &pool->last_big };
- return pool;
-}
-
-static void
-mp_free_chain(struct mempool_chunk *chunk)
-{
- while (chunk)
- {
- struct mempool_chunk *next = chunk->next;
- mp_free_chunk(chunk);
- chunk = next;
- }
-}
-
-static void
-mp_free_big_chain(struct mempool_chunk *chunk)
-{
- while (chunk)
- {
- struct mempool_chunk *next = chunk->next;
- mp_free_big_chunk(chunk);
- chunk = next;
- }
-}
-
-void
-mp_delete(struct mempool *pool)
-{
- DBG("Deleting mempool %p", pool);
- mp_free_big_chain(pool->state.last[1]);
- mp_free_chain(pool->unused);
- mp_free_chain(pool->state.last[0]); // can contain the mempool structure
-}
-
-void
-mp_flush(struct mempool *pool)
-{
- mp_free_big_chain(pool->state.last[1]);
- struct mempool_chunk *chunk, *next;
- for (chunk = pool->state.last[0]; chunk && (void *)chunk - chunk->size != pool; chunk = next)
- {
- next = chunk->next;
- chunk->next = pool->unused;
- pool->unused = chunk;
- }
- pool->state.last[0] = chunk;
- pool->state.free[0] = chunk ? chunk->size - sizeof(*pool) : 0;
- pool->state.last[1] = NULL;
- pool->state.free[1] = 0;
- pool->state.next = NULL;
- pool->last_big = &pool->last_big;
-}
-
-static void
-mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, uns idx)
-{
- while (chunk)
- {
- stats->chain_size[idx] += chunk->size + sizeof(*chunk);
- stats->chain_count[idx]++;
- chunk = chunk->next;
- }
- stats->total_size += stats->chain_size[idx];
-}
-
-void
-mp_stats(struct mempool *pool, struct mempool_stats *stats)
-{
- bzero(stats, sizeof(*stats));
- mp_stats_chain(pool->state.last[0], stats, 0);
- mp_stats_chain(pool->state.last[1], stats, 1);
- mp_stats_chain(pool->unused, stats, 2);
-}
-
-void *
-mp_alloc_internal(struct mempool *pool, uns size)
-{
- struct mempool_chunk *chunk;
- if (size <= pool->threshold)
- {
- pool->idx = 0;
- if (pool->unused)
- {
- chunk = pool->unused;
- pool->unused = chunk->next;
- }
- else
- chunk = mp_new_chunk(pool->chunk_size);
- chunk->next = pool->state.last[0];
- pool->state.last[0] = chunk;
- pool->state.free[0] = pool->chunk_size - size;
- return (void *)chunk - pool->chunk_size;
- }
- else if (likely(size <= MP_SIZE_MAX))
- {
- pool->idx = 1;
- uns aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN);
- chunk = mp_new_big_chunk(aligned);
- chunk->next = pool->state.last[1];
- pool->state.last[1] = chunk;
- pool->state.free[1] = aligned - size;
- return pool->last_big = (void *)chunk - aligned;
- }
- else
- die("Cannot allocate %u bytes from a mempool", size);
-}
-
-void *
-mp_alloc(struct mempool *pool, uns size)
-{
- return mp_alloc_fast(pool, size);
-}
-
-void *
-mp_alloc_noalign(struct mempool *pool, uns size)
-{
- return mp_alloc_fast_noalign(pool, size);
-}
-
-void *
-mp_alloc_zero(struct mempool *pool, uns size)
-{
- void *ptr = mp_alloc_fast(pool, size);
- bzero(ptr, size);
- return ptr;
-}
-
-void *
-mp_start_internal(struct mempool *pool, uns size)
-{
- void *ptr = mp_alloc_internal(pool, size);
- pool->state.free[pool->idx] += size;
- return ptr;
-}
-
-void *
-mp_start(struct mempool *pool, uns size)
-{
- return mp_start_fast(pool, size);
-}
-
-void *
-mp_start_noalign(struct mempool *pool, uns size)
-{
- return mp_start_fast_noalign(pool, size);
-}
-
-void *
-mp_grow_internal(struct mempool *pool, uns size)
-{
- if (unlikely(size > MP_SIZE_MAX))
- die("Cannot allocate %u bytes of memory", size);
- uns avail = mp_avail(pool);
- void *ptr = mp_ptr(pool);
- if (pool->idx)
- {
- uns amortized = likely(avail <= MP_SIZE_MAX / 2) ? avail * 2 : MP_SIZE_MAX;
- amortized = MAX(amortized, size);
- amortized = ALIGN_TO(amortized, CPU_STRUCT_ALIGN);
- struct mempool_chunk *chunk = pool->state.last[1], *next = chunk->next;
- ptr = xrealloc(ptr, amortized + MP_CHUNK_TAIL);
- chunk = ptr + amortized;
- chunk->next = next;
- chunk->size = amortized;
- pool->state.last[1] = chunk;
- pool->state.free[1] = amortized;
- pool->last_big = ptr;
- return ptr;
- }
- else
- {
- void *p = mp_start_internal(pool, size);
- memcpy(p, ptr, avail);
- return p;
- }
-}
-
-uns
-mp_open(struct mempool *pool, void *ptr)
-{
- return mp_open_fast(pool, ptr);
-}
-
-void *
-mp_realloc(struct mempool *pool, void *ptr, uns size)
-{
- return mp_realloc_fast(pool, ptr, size);
-}
-
-void *
-mp_realloc_zero(struct mempool *pool, void *ptr, uns size)
-{
- uns old_size = mp_open_fast(pool, ptr);
- ptr = mp_grow(pool, size);
- if (size > old_size)
- bzero(ptr + old_size, size - old_size);
- mp_end(pool, ptr + size);
- return ptr;
-}
-
-void *
-mp_spread_internal(struct mempool *pool, void *p, uns size)
-{
- void *old = mp_ptr(pool);
- void *new = mp_grow_internal(pool, p-old+size);
- return p-old+new;
-}
-
-void
-mp_restore(struct mempool *pool, struct mempool_state *state)
-{
- struct mempool_chunk *chunk, *next;
- struct mempool_state s = *state;
- for (chunk = pool->state.last[0]; chunk != s.last[0]; chunk = next)
- {
- next = chunk->next;
- chunk->next = pool->unused;
- pool->unused = chunk;
- }
- for (chunk = pool->state.last[1]; chunk != s.last[1]; chunk = next)
- {
- next = chunk->next;
- mp_free_big_chunk(chunk);
- }
- pool->state = s;
- pool->last_big = &pool->last_big;
-}
-
-struct mempool_state *
-mp_push(struct mempool *pool)
-{
- struct mempool_state state = pool->state;
- struct mempool_state *p = mp_alloc_fast(pool, sizeof(*p));
- *p = state;
- pool->state.next = p;
- return p;
-}
-
-void
-mp_pop(struct mempool *pool)
-{
- ASSERT(pool->state.next);
- struct mempool_state state = pool->state;
- mp_restore(pool, &state);
-}
-
-#ifdef TEST
-
-#include "lib/getopt.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-static void
-fill(byte *ptr, uns len, uns magic)
-{
- while (len--)
- *ptr++ = (magic++ & 255);
-}
-
-static void
-check(byte *ptr, uns len, uns magic, uns align)
-{
- ASSERT(!((uintptr_t)ptr & (align - 1)));
- while (len--)
- if (*ptr++ != (magic++ & 255))
- ASSERT(0);
-}
-
-int main(int argc, char **argv)
-{
- srand(time(NULL));
- log_init(argv[0]);
- cf_def_file = NULL;
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || argc != optind)
- die("Invalid usage");
-
- uns max = 1000, n = 0, m = 0, can_realloc = 0;
- void *ptr[max];
- struct mempool_state *state[max];
- uns len[max], num[max], align[max];
- struct mempool *mp = mp_new(128), mp_static;
-
- for (uns i = 0; i < 5000; i++)
- {
- for (uns j = 0; j < n; j++)
- check(ptr[j], len[j], j, align[j]);
-#if 0
- DBG("free_small=%u free_big=%u idx=%u chunk_size=%u last_big=%p", mp->state.free[0], mp->state.free[1], mp->idx, mp->chunk_size, mp->last_big);
- for (struct mempool_chunk *ch = mp->state.last[0]; ch; ch = ch->next)
- DBG("small %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
- for (struct mempool_chunk *ch = mp->state.last[1]; ch; ch = ch->next)
- DBG("big %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
-#endif
- int r = random_max(100);
- if ((r -= 1) < 0)
- {
- DBG("flush");
- mp_flush(mp);
- n = m = 0;
- }
- else if ((r -= 1) < 0)
- {
- DBG("delete & new");
- mp_delete(mp);
- if (random_max(2))
- mp = mp_new(random_max(0x1000) + 1);
- else
- mp = &mp_static, mp_init(mp, random_max(512) + 1);
- n = m = 0;
- }
- else if (n < max && (r -= 30) < 0)
- {
- len[n] = random_max(0x2000);
- DBG("alloc(%u)", len[n]);
- align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
- ptr[n] = (align[n] == 1) ? mp_alloc_fast_noalign(mp, len[n]) : mp_alloc_fast(mp, len[n]);
- DBG(" -> (%p)", ptr[n]);
- fill(ptr[n], len[n], n);
- n++;
- can_realloc = 1;
- }
- else if (n < max && (r -= 20) < 0)
- {
- len[n] = random_max(0x2000);
- DBG("start(%u)", len[n]);
- align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
- ptr[n] = (align[n] == 1) ? mp_start_fast_noalign(mp, len[n]) : mp_start_fast(mp, len[n]);
- DBG(" -> (%p)", ptr[n]);
- fill(ptr[n], len[n], n);
- n++;
- can_realloc = 1;
- goto grow;
- }
- else if (can_realloc && n && (r -= 10) < 0)
- {
- if (mp_open(mp, ptr[n - 1]) != len[n - 1])
- ASSERT(0);
-grow:
- {
- uns k = n - 1;
- for (uns i = random_max(4); i--; )
- {
- uns l = len[k];
- len[k] = random_max(0x2000);
- DBG("grow(%u)", len[k]);
- ptr[k] = mp_grow(mp, len[k]);
- DBG(" -> (%p)", ptr[k]);
- check(ptr[k], MIN(l, len[k]), k, align[k]);
- fill(ptr[k], len[k], k);
- }
- mp_end(mp, ptr[k] + len[k]);
- }
- }
- else if (can_realloc && n && (r -= 20) < 0)
- {
- uns i = n - 1, l = len[i];
- DBG("realloc(%p, %u)", ptr[i], len[i]);
- ptr[i] = mp_realloc(mp, ptr[i], len[i] = random_max(0x2000));
- DBG(" -> (%p, %u)", ptr[i], len[i]);
- check(ptr[i], MIN(len[i], l), i, align[i]);
- fill(ptr[i], len[i], i);
- }
- else if (m < max && (r -= 5) < 0)
- {
- DBG("push(%u)", m);
- num[m] = n;
- state[m++] = mp_push(mp);
- can_realloc = 0;
- }
- else if (m && (r -= 2) < 0)
- {
- m--;
- DBG("pop(%u)", m);
- mp_pop(mp);
- n = num[m];
- can_realloc = 0;
- }
- else if (m && (r -= 1) < 0)
- {
- uns i = random_max(m);
- DBG("restore(%u)", i);
- mp_restore(mp, state[i]);
- n = num[m = i];
- can_realloc = 0;
- }
- else if (can_realloc && n && (r -= 5) < 0)
- ASSERT(mp_size(mp, ptr[n - 1]) == len[n - 1]);
- }
-
- mp_delete(mp);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Memory Pools
- *
- * (c) 1997--2005 Martin Mares <mj@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_POOLS_H
-#define _UCW_POOLS_H
-
-/* Memory pool state (see mp_push(), ...) */
-struct mempool_state {
- uns free[2];
- void *last[2];
- struct mempool_state *next;
-};
-
-/* Memory pool */
-struct mempool {
- struct mempool_state state;
- void *unused, *last_big;
- uns chunk_size, threshold, idx;
-};
-
-/* Statistics (see mp_stats()) */
-struct mempool_stats {
- uns total_size; /* Real allocated size in bytes */
- uns chain_count[3]; /* Number of allocated chunks in small/big/unused chains */
- uns chain_size[3]; /* Size of allocated chunks in small/big/unused chains */
-};
-
-/* Initialize a given mempool structure. Chunk size must be in the interval [1, UINT_MAX / 2] */
-void mp_init(struct mempool *pool, uns chunk_size);
-
-/* Allocate and initialize a new memory pool. See mp_init for chunk size limitations. */
-struct mempool *mp_new(uns chunk_size);
-
-/* Cleanup mempool initialized by mp_init or mp_new */
-void mp_delete(struct mempool *pool);
-
-/* Free all data on a memory pool (saves some empty chunks for later allocations) */
-void mp_flush(struct mempool *pool);
-
-/* Compute some statistics for debug purposes. See the definition of the mempool_stats structure. */
-void mp_stats(struct mempool *pool, struct mempool_stats *stats);
-
-
-/*** Allocation routines ***/
-
-/* For internal use only, do not call directly */
-void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC;
-
-/* The function allocates new <size> bytes on a given memory pool.
- * If the <size> is zero, the resulting pointer is undefined,
- * but it may be safely reallocated or used as the parameter
- * to other functions below.
- *
- * The resulting pointer is always aligned to a multiple of
- * CPU_STRUCT_ALIGN bytes and this condition remains true also
- * after future reallocations.
- */
-void *mp_alloc(struct mempool *pool, uns size);
-
-/* The same as mp_alloc, but the result may not be aligned */
-void *mp_alloc_noalign(struct mempool *pool, uns size);
-
-/* The same as mp_alloc, but fills the newly allocated data with zeroes */
-void *mp_alloc_zero(struct mempool *pool, uns size);
-
-/* Inlined version of mp_alloc() */
-static inline void *
-mp_alloc_fast(struct mempool *pool, uns size)
-{
- uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
- if (size <= avail)
- {
- pool->state.free[0] = avail - size;
- return pool->state.last[0] - avail;
- }
- else
- return mp_alloc_internal(pool, size);
-}
-
-/* Inlined version of mp_alloc_noalign() */
-static inline void *
-mp_alloc_fast_noalign(struct mempool *pool, uns size)
-{
- if (size <= pool->state.free[0])
- {
- void *ptr = pool->state.last[0] - pool->state.free[0];
- pool->state.free[0] -= size;
- return ptr;
- }
- else
- return mp_alloc_internal(pool, size);
-}
-
-
-/*** Usage as a growing buffer ***/
-
-/* For internal use only, do not call directly */
-void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC;
-void *mp_grow_internal(struct mempool *pool, uns size);
-void *mp_spread_internal(struct mempool *pool, void *p, uns size);
-
-static inline uns
-mp_idx(struct mempool *pool, void *ptr)
-{
- return ptr == pool->last_big;
-}
-
-/* Open a new growing buffer (at least <size> bytes long).
- * If the <size> is zero, the resulting pointer is undefined,
- * but it may be safely reallocated or used as the parameter
- * to other functions below.
- *
- * The resulting pointer is always aligned to a multiple of
- * CPU_STRUCT_ALIGN bytes and this condition remains true also
- * after future reallocations. There is an unaligned version as well.
- *
- * Keep in mind that you can't make any other <pool> allocations
- * before you "close" the growing buffer with mp_end().
- */
-void *mp_start(struct mempool *pool, uns size);
-void *mp_start_noalign(struct mempool *pool, uns size);
-
-/* Inlined version of mp_start() */
-static inline void *
-mp_start_fast(struct mempool *pool, uns size)
-{
- uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
- if (size <= avail)
- {
- pool->idx = 0;
- pool->state.free[0] = avail;
- return pool->state.last[0] - avail;
- }
- else
- return mp_start_internal(pool, size);
-}
-
-/* Inlined version of mp_start_noalign() */
-static inline void *
-mp_start_fast_noalign(struct mempool *pool, uns size)
-{
- if (size <= pool->state.free[0])
- {
- pool->idx = 0;
- return pool->state.last[0] - pool->state.free[0];
- }
- else
- return mp_start_internal(pool, size);
-}
-
-/* Return start pointer of the growing buffer allocated by mp_start() or a similar function */
-static inline void *
-mp_ptr(struct mempool *pool)
-{
- return pool->state.last[pool->idx] - pool->state.free[pool->idx];
-}
-
-/* Return the number of bytes available for extending the growing buffer */
-static inline uns
-mp_avail(struct mempool *pool)
-{
- return pool->state.free[pool->idx];
-}
-
-/* Grow the buffer allocated by mp_start() to be at least <size> bytes long
- * (<size> may be less than mp_avail(), even zero). Reallocated buffer may
- * change its starting position. The content will be unchanged to the minimum
- * of the old and new sizes; newly allocated memory will be uninitialized.
- * Multiple calls to mp_grow have amortized linear cost wrt. the maximum value of <size>. */
-static inline void *
-mp_grow(struct mempool *pool, uns size)
-{
- return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size);
-}
-
-/* Grow the buffer by at least one byte -- equivalent to mp_grow(pool, mp_avail(pool) + 1) */
-static inline void *
-mp_expand(struct mempool *pool)
-{
- return mp_grow_internal(pool, mp_avail(pool) + 1);
-}
-
-/* Ensure that there is at least <size> bytes free after <p>, if not, reallocate and adjust <p>. */
-static inline void *
-mp_spread(struct mempool *pool, void *p, uns size)
-{
- return (((uns)(pool->state.last[pool->idx] - p) >= size) ? p : mp_spread_internal(pool, p, size));
-}
-
-/* Close the growing buffer. The <end> must point just behind the data, you want to keep
- * allocated (so it can be in the interval [mp_ptr(pool), mp_ptr(pool) + mp_avail(pool)]).
- * Returns a pointer to the beginning of the just closed block. */
-static inline void *
-mp_end(struct mempool *pool, void *end)
-{
- void *p = mp_ptr(pool);
- pool->state.free[pool->idx] = pool->state.last[pool->idx] - end;
- return p;
-}
-
-/* Return size in bytes of the last allocated memory block (with mp_alloc*() or mp_end()). */
-static inline uns
-mp_size(struct mempool *pool, void *ptr)
-{
- uns idx = mp_idx(pool, ptr);
- return pool->state.last[idx] - ptr - pool->state.free[idx];
-}
-
-/* Open the last memory block (allocated with mp_alloc*() or mp_end())
- * for growing and return its size in bytes. The contents and the start pointer
- * remain unchanged. Do not forget to call mp_end() to close it. */
-uns mp_open(struct mempool *pool, void *ptr);
-
-/* Inlined version of mp_open() */
-static inline uns
-mp_open_fast(struct mempool *pool, void *ptr)
-{
- pool->idx = mp_idx(pool, ptr);
- uns size = pool->state.last[pool->idx] - ptr - pool->state.free[pool->idx];
- pool->state.free[pool->idx] += size;
- return size;
-}
-
-/* Reallocate the last memory block (allocated with mp_alloc*() or mp_end())
- * to the new <size>. Behavior is similar to mp_grow(), but the resulting
- * block is closed. */
-void *mp_realloc(struct mempool *pool, void *ptr, uns size);
-
-/* The same as mp_realloc(), but fills the additional bytes (if any) with zeroes */
-void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size);
-
-/* Inlined version of mp_realloc() */
-static inline void *
-mp_realloc_fast(struct mempool *pool, void *ptr, uns size)
-{
- mp_open_fast(pool, ptr);
- ptr = mp_grow(pool, size);
- mp_end(pool, ptr + size);
- return ptr;
-}
-
-
-/*** Usage as a stack ***/
-
-/* Save the current state of a memory pool.
- * Do not call this function with an opened growing buffer. */
-static inline void
-mp_save(struct mempool *pool, struct mempool_state *state)
-{
- *state = pool->state;
- pool->state.next = state;
-}
-
-/* Save the current state to a newly allocated mempool_state structure.
- * Do not call this function with an opened growing buffer. */
-struct mempool_state *mp_push(struct mempool *pool);
-
-/* Restore the state saved by mp_save() or mp_push() and free all
- * data allocated after that point (including the state structure itself).
- * You can't reallocate the last memory block from the saved state. */
-void mp_restore(struct mempool *pool, struct mempool_state *state);
-
-/* Restore the state saved by the last call to mp_push().
- * mp_pop() and mp_push() works as a stack so you can push more states safely. */
-void mp_pop(struct mempool *pool);
-
-
-/*** mempool-str.c ***/
-
-char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC;
-void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC;
-char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK;
-static inline char * LIKE_MALLOC
-mp_strcat(struct mempool *mp, const char *x, const char *y)
-{
- return mp_multicat(mp, x, y, NULL);
-}
-char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC;
-
-
-/*** mempool-fmt.c ***/
-
-char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC;
-char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC;
-char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4);
-char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args);
-
-#endif
+++ /dev/null
-# Tests for mempool modules
-
-Run: ../obj/lib/mempool-t
-
-Run: ../obj/lib/mempool-fmt-t
-Out: <Hello, World!><Hello, World!><Appended><Hello, World!>
-
-Run: ../obj/lib/mempool-str-t
-Out: <<12345>>
- bugs.gnats.insects
- bugsgnatsinsects
+++ /dev/null
-/*
- * UCW Library -- Mapping of Files
- *
- * (c) 1999--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-void *
-mmap_file(const char *name, unsigned *len, int writeable)
-{
- int fd = open(name, writeable ? O_RDWR : O_RDONLY);
- struct stat st;
- void *x;
-
- if (fd < 0)
- die("open(%s): %m", name);
- if (fstat(fd, &st) < 0)
- die("fstat(%s): %m", name);
- if (len)
- *len = st.st_size;
- if (st.st_size)
- {
- x = mmap(NULL, st.st_size, writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, 0);
- if (x == MAP_FAILED)
- die("mmap(%s): %m", name);
- }
- else /* For empty file, we can return any non-zero address */
- x = "";
- close(fd);
- return x;
-}
-
-void
-munmap_file(void *start, unsigned len)
-{
- munmap(start, len);
-}
+++ /dev/null
-/*
- * UCW Library -- File Page Cache
- *
- * (c) 1999--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/pagecache.h"
-#include "lib/lfs.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <alloca.h>
-
-struct page_cache {
- list free_pages; /* LRU queue of free non-dirty pages */
- list locked_pages; /* List of locked pages (starts with dirty ones) */
- list dirty_pages; /* List of free dirty pages */
- uns page_size; /* Bytes per page (must be a power of two) */
- uns free_count; /* Number of free / dirty pages */
- uns total_count; /* Total number of pages */
- uns max_pages; /* Maximum number of free pages */
- uns hash_size; /* Hash table size */
- uns stat_hit; /* Number of cache hits */
- uns stat_miss; /* Number of cache misses */
- uns stat_write; /* Number of writes */
- list *hash_table; /* List heads corresponding to hash buckets */
-#ifndef HAVE_PREAD
- sh_off_t pos; /* Current position in the file */
- int pos_fd; /* FD the position corresponds to */
-#endif
-};
-
-#define PAGE_NUMBER(pos) ((pos) & ~(sh_off_t)(c->page_size - 1))
-#define PAGE_OFFSET(pos) ((pos) & (c->page_size - 1))
-
-struct page_cache *
-pgc_open(uns page_size, uns max_pages)
-{
- struct page_cache *c = xmalloc_zero(sizeof(struct page_cache));
- uns i;
-
- init_list(&c->free_pages);
- init_list(&c->locked_pages);
- init_list(&c->dirty_pages);
- c->page_size = page_size;
- c->max_pages = max_pages;
- c->hash_size = nextprime(c->max_pages);
- c->hash_table = xmalloc(sizeof(list) * c->hash_size);
- for(i=0; i<c->hash_size; i++)
- init_list(&c->hash_table[i]);
-#ifndef HAVE_PREAD
- c->pos_fd = -1;
-#endif
- return c;
-}
-
-void
-pgc_close(struct page_cache *c)
-{
- pgc_cleanup(c);
- ASSERT(EMPTY_LIST(c->locked_pages));
- ASSERT(EMPTY_LIST(c->dirty_pages));
- ASSERT(EMPTY_LIST(c->free_pages));
- xfree(c->hash_table);
- xfree(c);
-}
-
-static void
-pgc_debug_page(struct page *p)
-{
- printf("\tp=%08x d=%d f=%x c=%d\n", (uns) p->pos, p->fd, p->flags, p->lock_count);
-}
-
-void
-pgc_debug(struct page_cache *c, int mode)
-{
- struct page *p;
-
- printf(">> Page cache dump: pgsize=%d, pages=%d, freepages=%d of %d, hash=%d\n", c->page_size, c->total_count, c->free_count, c->max_pages, c->hash_size);
- printf(">> stats: %d hits, %d misses, %d writes\n", c->stat_hit, c->stat_miss, c->stat_write);
- if (mode)
- {
- puts("LRU list:");
- WALK_LIST(p, c->free_pages)
- pgc_debug_page(p);
- puts("Locked list:");
- WALK_LIST(p, c->locked_pages)
- pgc_debug_page(p);
- puts("Dirty list:");
- WALK_LIST(p, c->dirty_pages)
- pgc_debug_page(p);
- }
-}
-
-static void
-flush_page(struct page_cache *c, struct page *p)
-{
- int s;
-
- ASSERT(p->flags & PG_FLAG_DIRTY);
-#ifdef HAVE_PREAD
- s = sh_pwrite(p->fd, p->data, c->page_size, p->pos);
-#else
- if (c->pos != p->pos || c->pos_fd != (int) p->fd)
- sh_seek(p->fd, p->pos, SEEK_SET);
- s = write(p->fd, p->data, c->page_size);
- c->pos = p->pos + s;
- c->pos_fd = p->fd;
-#endif
- if (s < 0)
- die("pgc_write(%d): %m", p->fd);
- if (s != (int) c->page_size)
- die("pgc_write(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
- p->flags &= ~PG_FLAG_DIRTY;
- c->stat_write++;
-}
-
-static int
-flush_cmp(const void *X, const void *Y)
-{
- struct page *x = *((struct page **)X);
- struct page *y = *((struct page **)Y);
-
- if (x->fd < y->fd)
- return -1;
- if (x->fd > y->fd)
- return 1;
- if (x->pos < y->pos)
- return -1;
- if (x->pos > y->pos)
- return 1;
- return 0;
-}
-
-static void
-flush_pages(struct page_cache *c, uns force)
-{
- uns cnt = 0;
- uns max = force ? ~0U : c->free_count / 2;
- uns i;
- struct page *p, *q, **req, **rr;
-
- WALK_LIST(p, c->dirty_pages)
- {
- cnt++;
- if (cnt >= max)
- break;
- }
- req = rr = alloca(cnt * sizeof(struct page *));
- i = cnt;
- p = HEAD(c->dirty_pages);
- while ((q = (struct page *) p->n.next) && i--)
- {
- rem_node(&p->n);
- add_tail(&c->free_pages, &p->n);
- *rr++ = p;
- p = q;
- }
- qsort(req, cnt, sizeof(struct page *), flush_cmp);
- for(i=0; i<cnt; i++)
- flush_page(c, req[i]);
-}
-
-static inline uns
-hash_page(struct page_cache *c, sh_off_t pos, uns fd)
-{
- return (pos + fd) % c->hash_size;
-}
-
-static struct page *
-get_page(struct page_cache *c, sh_off_t pos, uns fd)
-{
- node *n;
- struct page *p;
- uns hash = hash_page(c, pos, fd);
-
- /*
- * Return locked buffer for given page.
- */
-
- WALK_LIST(n, c->hash_table[hash])
- {
- p = SKIP_BACK(struct page, hn, n);
- if (p->pos == pos && p->fd == fd)
- {
- /* Found in the cache */
- rem_node(&p->n);
- if (!p->lock_count)
- c->free_count--;
- return p;
- }
- }
- if (c->total_count < c->max_pages || !c->free_count)
- {
- /* Enough free space, expand the cache */
- p = xmalloc(sizeof(struct page) + c->page_size);
- c->total_count++;
- }
- else
- {
- /* Discard the oldest unlocked page */
- p = HEAD(c->free_pages);
- if (!p->n.next)
- {
- /* There are only dirty pages here */
- flush_pages(c, 0);
- p = HEAD(c->free_pages);
- ASSERT(p->n.next);
- }
- ASSERT(!p->lock_count);
- rem_node(&p->n);
- rem_node(&p->hn);
- c->free_count--;
- }
- p->pos = pos;
- p->fd = fd;
- p->flags = 0;
- p->lock_count = 0;
- add_tail(&c->hash_table[hash], &p->hn);
- return p;
-}
-
-void
-pgc_flush(struct page_cache *c)
-{
- struct page *p;
-
- flush_pages(c, 1);
- WALK_LIST(p, c->locked_pages)
- if (p->flags & PG_FLAG_DIRTY)
- flush_page(c, p);
- else
- break;
-}
-
-void
-pgc_cleanup(struct page_cache *c)
-{
- struct page *p;
- node *n;
-
- pgc_flush(c);
- WALK_LIST_DELSAFE(p, n, c->free_pages)
- {
- ASSERT(!(p->flags & PG_FLAG_DIRTY) && !p->lock_count);
- rem_node(&p->n);
- rem_node(&p->hn);
- c->free_count--;
- c->total_count--;
- xfree(p);
- }
- ASSERT(!c->free_count);
-}
-
-static inline struct page *
-get_and_lock_page(struct page_cache *c, sh_off_t pos, uns fd)
-{
- struct page *p = get_page(c, pos, fd);
-
- add_tail(&c->locked_pages, &p->n);
- p->lock_count++;
- return p;
-}
-
-struct page *
-pgc_read(struct page_cache *c, int fd, sh_off_t pos)
-{
- struct page *p;
- int s;
-
- ASSERT(!PAGE_OFFSET(pos));
- p = get_and_lock_page(c, pos, fd);
- if (p->flags & PG_FLAG_VALID)
- c->stat_hit++;
- else
- {
- c->stat_miss++;
-#ifdef HAVE_PREAD
- s = sh_pread(fd, p->data, c->page_size, pos);
-#else
- if (c->pos != pos || c->pos_fd != (int)fd)
- sh_seek(fd, pos, SEEK_SET);
- s = read(fd, p->data, c->page_size);
- c->pos = pos + s;
- c->pos_fd = fd;
-#endif
- if (s < 0)
- die("pgc_read(%d): %m", fd);
- if (s != (int) c->page_size)
- die("pgc_read(%d): incomplete page (only %d of %d)", p->fd, s, c->page_size);
- p->flags |= PG_FLAG_VALID;
- }
- return p;
-}
-
-struct page *
-pgc_get(struct page_cache *c, int fd, sh_off_t pos)
-{
- struct page *p;
-
- ASSERT(!PAGE_OFFSET(pos));
- p = get_and_lock_page(c, pos, fd);
- p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
- return p;
-}
-
-struct page *
-pgc_get_zero(struct page_cache *c, int fd, sh_off_t pos)
-{
- struct page *p;
-
- ASSERT(!PAGE_OFFSET(pos));
- p = get_and_lock_page(c, pos, fd);
- bzero(p->data, c->page_size);
- p->flags |= PG_FLAG_VALID | PG_FLAG_DIRTY;
- return p;
-}
-
-void
-pgc_put(struct page_cache *c, struct page *p)
-{
- ASSERT(p->lock_count);
- if (--p->lock_count)
- return;
- rem_node(&p->n);
- if (p->flags & PG_FLAG_DIRTY)
- {
- add_tail(&c->dirty_pages, &p->n);
- c->free_count++;
- }
- else if (c->free_count < c->max_pages)
- {
- add_tail(&c->free_pages, &p->n);
- c->free_count++;
- }
- else
- {
- rem_node(&p->hn);
- xfree(p);
- c->total_count--;
- }
-}
-
-void
-pgc_mark_dirty(struct page_cache *c, struct page *p)
-{
- ASSERT(p->lock_count);
- if (!(p->flags & PG_FLAG_DIRTY))
- {
- p->flags |= PG_FLAG_DIRTY;
- rem_node(&p->n);
- add_head(&c->locked_pages, &p->n);
- }
-}
-
-byte *
-pgc_read_data(struct page_cache *c, int fd, sh_off_t pos, uns *len)
-{
- struct page *p;
- sh_off_t page = PAGE_NUMBER(pos);
- uns offset = PAGE_OFFSET(pos);
-
- p = pgc_read(c, fd, page);
- pgc_put(c, p);
- *len = c->page_size - offset;
- return p->data + offset;
-}
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- struct page_cache *c = pgc_open(1024, 2);
- struct page *p, *q, *r;
- int fd = open("test", O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (fd < 0)
- die("open: %m");
- pgc_debug(c, 1);
- p = pgc_get(c, fd, 0);
- pgc_debug(c, 1);
- strcpy(p->data, "one");
- pgc_put(c, p);
- pgc_debug(c, 1);
- p = pgc_get(c, fd, 1024);
- pgc_debug(c, 1);
- strcpy(p->data, "two");
- pgc_put(c, p);
- pgc_debug(c, 1);
- p = pgc_get(c, fd, 2048);
- pgc_debug(c, 1);
- strcpy(p->data, "three");
- pgc_put(c, p);
- pgc_debug(c, 1);
- pgc_flush(c);
- pgc_debug(c, 1);
- p = pgc_read(c, fd, 0);
- pgc_debug(c, 1);
- strcpy(p->data, "odin");
- pgc_mark_dirty(c, p);
- pgc_debug(c, 1);
- pgc_flush(c);
- pgc_debug(c, 1);
- q = pgc_read(c, fd, 1024);
- pgc_debug(c, 1);
- r = pgc_read(c, fd, 2048);
- pgc_debug(c, 1);
- pgc_put(c, p);
- pgc_put(c, q);
- pgc_put(c, r);
- pgc_debug(c, 1);
- p = pgc_get(c, fd, 3072);
- pgc_debug(c, 1);
- strcpy(p->data, "four");
- pgc_put(c, p);
- pgc_debug(c, 1);
- pgc_cleanup(c);
- pgc_debug(c, 1);
- pgc_close(c);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- File Page Cache
- *
- * (c) 1999--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_PAGECACHE_H
-#define _UCW_PAGECACHE_H
-
-#include "lib/lists.h"
-
-struct page_cache;
-
-struct page {
- node n; /* Node in page list */
- node hn; /* Node in hash table */
- sh_off_t pos;
- uns fd;
- uns flags;
- uns lock_count;
- byte data[0];
-};
-
-#define PG_FLAG_DIRTY 1
-#define PG_FLAG_VALID 2
-
-struct page_cache *pgc_open(uns page_size, uns max_pages);
-void pgc_close(struct page_cache *);
-void pgc_debug(struct page_cache *, int mode);
-void pgc_flush(struct page_cache *); /* Write all unwritten pages */
-void pgc_cleanup(struct page_cache *); /* Deallocate all unused buffers */
-struct page *pgc_read(struct page_cache *, int fd, sh_off_t); /* Read page and lock it */
-struct page *pgc_get(struct page_cache *, int fd, sh_off_t); /* Get page for writing */
-struct page *pgc_get_zero(struct page_cache *, int fd, sh_off_t); /* ... and clear it */
-void pgc_put(struct page_cache *, struct page *); /* Release page */
-void pgc_mark_dirty(struct page_cache *, struct page *); /* Mark locked page as dirty */
-byte *pgc_read_data(struct page_cache *, int fd, sh_off_t, uns *); /* Partial reading */
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Mapping of File Parts
- *
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- * (c) 2003--2005 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/lfs.h"
-#include "lib/partmap.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#ifdef CONFIG_PARTMAP_IS_MMAP
-#define PARTMAP_WINDOW ~(size_t)0
-#else
-#ifdef TEST
-#define PARTMAP_WINDOW 4096
-#else
-#define PARTMAP_WINDOW 16777216
-#endif
-#endif
-
-struct partmap *
-partmap_open(char *name, int writeable)
-{
- struct partmap *p = xmalloc_zero(sizeof(struct partmap));
-
- p->fd = sh_open(name, writeable ? O_RDWR : O_RDONLY);
- if (p->fd < 0)
- die("open(%s): %m", name);
- if ((p->file_size = sh_seek(p->fd, 0, SEEK_END)) < 0)
- die("lseek(%s): %m", name);
- p->writeable = writeable;
-#ifdef CONFIG_PARTMAP_IS_MMAP
- partmap_load(p, 0, p->file_size);
-#endif
- return p;
-}
-
-sh_off_t
-partmap_size(struct partmap *p)
-{
- return p->file_size;
-}
-
-void
-partmap_close(struct partmap *p)
-{
- if (p->start_map)
- munmap(p->start_map, p->end_off - p->start_off);
- close(p->fd);
- xfree(p);
-}
-
-void
-partmap_load(struct partmap *p, sh_off_t start, uns size)
-{
- if (p->start_map)
- munmap(p->start_map, p->end_off - p->start_off);
- sh_off_t end = start + size;
- sh_off_t win_start = start/CPU_PAGE_SIZE * CPU_PAGE_SIZE;
- size_t win_len = PARTMAP_WINDOW;
- if ((sh_off_t) (win_start+win_len) > p->file_size)
- win_len = ALIGN_TO(p->file_size - win_start, CPU_PAGE_SIZE);
- if ((sh_off_t) (win_start+win_len) < end)
- die("partmap_map: Window is too small for mapping %d bytes", size);
- p->start_map = sh_mmap(NULL, win_len, p->writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, p->fd, win_start);
- if (p->start_map == MAP_FAILED)
- die("mmap failed at position %lld: %m", (long long)win_start);
- p->start_off = win_start;
- p->end_off = win_start+win_len;
- madvise(p->start_map, win_len, MADV_SEQUENTIAL);
-}
-
-#ifdef TEST
-int main(int argc, char **argv)
-{
- struct partmap *p = partmap_open(argv[1], 0);
- uns l = partmap_size(p);
- uns i;
- for (i=0; i<l; i++)
- putchar(*(char *)partmap_map(p, i, 1));
- partmap_close(p);
- return 0;
-}
-#endif
+++ /dev/null
-/*
- * UCW Library -- Mapping of File Parts
- *
- * (c) 2003--2006 Martin Mares <mj@ucw.cz>
- * (c) 2003--2005 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_PARTMAP_H
-#define _UCW_PARTMAP_H
-
-struct partmap {
- int fd;
- sh_off_t file_size;
- sh_off_t start_off, end_off;
- byte *start_map;
- int writeable;
-};
-
-struct partmap *partmap_open(char *name, int writeable);
-void partmap_close(struct partmap *p);
-sh_off_t partmap_size(struct partmap *p);
-void partmap_load(struct partmap *p, sh_off_t start, uns size);
-
-static inline void *
-partmap_map(struct partmap *p, sh_off_t start, uns size UNUSED)
-{
-#ifndef CONFIG_PARTMAP_IS_MMAP
- if (unlikely(!p->start_map || start < p->start_off || (sh_off_t) (start+size) > p->end_off))
- partmap_load(p, start, size);
-#endif
- return p->start_map + (start - p->start_off);
-}
-
-static inline void *
-partmap_map_forward(struct partmap *p, sh_off_t start, uns size UNUSED)
-{
-#ifndef CONFIG_PARTMAP_IS_MMAP
- if (unlikely((sh_off_t) (start+size) > p->end_off))
- partmap_load(p, start, size);
-#endif
- return p->start_map + (start - p->start_off);
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Shell-Like Case-Insensitive Pattern Matching (currently only '?' and '*')
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-
-#define Convert(x) Cupcase(x)
-#define MATCH_FUNC_NAME match_pattern_nocase
-
-#include "lib/patmatch.h"
+++ /dev/null
-/*
- * UCW Library -- Shell-Like Pattern Matching (currently only '?' and '*')
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#define Convert(x) (x)
-#define MATCH_FUNC_NAME match_pattern
-
-#include "lib/patmatch.h"
+++ /dev/null
-/*
- * UCW Library -- Generic Shell-Like Pattern Matching (currently only '?' and '*')
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-int
-MATCH_FUNC_NAME(const char *p, const char *s)
-{
- while (*p)
- {
- if (*p == '?' && *s)
- p++, s++;
- else if (*p == '*')
- {
- int z = p[1];
-
- if (!z)
- return 1;
- if (z == '\\' && p[2])
- z = p[2];
- z = Convert(z);
- for(;;)
- {
- while (*s && Convert(*s) != z)
- s++;
- if (!*s)
- return 0;
- if (MATCH_FUNC_NAME(p+1, s))
- return 1;
- s++;
- }
- }
- else
- {
- if (*p == '\\' && p[1])
- p++;
- if (Convert(*p++) != Convert(*s++))
- return 0;
- }
- }
- return !*s;
-}
+++ /dev/null
-# Poor Man's CGI Module for Perl
-#
-# (c) 2002--2007 Martin Mares <mj@ucw.cz>
-# Slightly modified by Tomas Valla <tom@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-
-# FIXME:
-# - respond with proper HTTP error codes
-# - if we get invalid parameters, generate HTTP error or redirect
-
-package UCW::CGI;
-
-# First of all, set up error handling, so that even errors during parsing
-# will be reported properly.
-
-# Variables to be set by the calling module:
-# $UCW::CGI::error_mail mail address of the script admin (optional)
-# (this one has to be set in the BEGIN block!)
-# $UCW::CGI::error_hook function to be called for reporting errors
-
-my $error_reported;
-my $exit_code;
-my $debug = 0;
-
-sub report_bug($)
-{
- if (!defined $error_reported) {
- $error_reported = 1;
- print STDERR $_[0];
- if (defined($UCW::CGI::error_hook)) {
- &$UCW::CGI::error_hook($_[0]);
- } else {
- print "Content-type: text/plain\n\n";
- print "Internal bug:\n";
- print $_[0], "\n";
- print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail;
- }
- }
- die;
-}
-
-BEGIN {
- $SIG{__DIE__} = sub { report_bug($_[0]); };
- $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); };
- $exit_code = 0;
-}
-
-END {
- $? = $exit_code;
-}
-
-use strict;
-use warnings;
-
-require Exporter;
-our $VERSION = 1.0;
-our @ISA = qw(Exporter);
-our @EXPORT = qw(&html_escape &url_escape &url_param_escape &self_ref &self_form &http_get);
-our @EXPORT_OK = qw();
-
-### Escaping ###
-
-sub url_escape($) {
- my $x = shift @_;
- $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge;
- return $x;
-}
-
-sub url_param_escape($) {
- my $x = shift @_;
- $x = url_escape($x);
- $x =~ s/%20/+/g;
- return $x;
-}
-
-sub html_escape($) {
- my $x = shift @_;
- $x =~ s/&/&/g;
- $x =~ s/</</g;
- $x =~ s/>/>/g;
- $x =~ s/"/"/g;
- return $x;
-}
-
-### Analysing RFC 822 Style Headers ###
-
-sub rfc822_prepare($) {
- my $x = shift @_;
- # Convert all %'s and backslash escapes to %xx escapes
- $x =~ s/%/%25/g;
- $x =~ s/\\(.)/"%".unpack("H2",$1)/ge;
- # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically)
- while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { }
- # Remove quotes and escape dangerous characters inside (again closing at the end automatically)
- $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge;
- # All control characters are properly escaped, tokens are clearly visible.
- # Finally remove all unnecessary spaces.
- $x =~ s/\s+/ /g;
- $x =~ s/(^ | $)//g;
- $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g;
- return $x;
-}
-
-sub rfc822_deescape($) {
- my $x = shift @_;
- $x =~ s/%(..)/pack("H2",$1)/ge;
- return $x;
-}
-
-### Reading of HTTP headers ###
-
-sub http_get($) {
- my $h = shift @_;
- $h =~ tr/a-z-/A-Z_/;
- return $ENV{"HTTP_$h"} || $ENV{"$h"};
-}
-
-### Parsing of Arguments ###
-
-my $arg_table;
-
-sub parse_arg_string($) {
- my ($s) = @_;
- $s =~ s/\s+//;
- foreach $_ (split /[&:]/,$s) {
- (/^([^=]+)=(.*)$/) or next;
- my $arg = $arg_table->{$1} or next;
- $_ = $2;
- s/\+/ /g;
- s/%(..)/pack("H2",$1)/eg;
- s/\r\n/\n/g;
- s/\r/\n/g;
- $arg->{'multiline'} || s/(\n|\t)/ /g;
- s/^\s+//;
- s/\s+$//;
- if (my $rx = $arg->{'check'}) {
- if (!/^$rx$/) { $_ = $arg->{'default'}; }
- }
-
- my $r = ref($arg->{'var'});
- if ($r eq 'SCALAR') {
- ${$arg->{'var'}} = $_;
- } elsif ($r eq 'ARRAY') {
- push @{$arg->{'var'}}, $_;
- }
- }
-}
-
-sub parse_multipart_form_data();
-
-sub parse_args($) {
- $arg_table = shift @_;
- if (!defined $ENV{"GATEWAY_INTERFACE"}) {
- print STDERR "Must be called as a CGI script.\n";
- $exit_code = 1;
- exit;
- }
- foreach my $a (values %$arg_table) {
- my $r = ref($a->{'var'});
- defined($a->{'default'}) or $a->{'default'}="";
- if ($r eq 'SCALAR') {
- ${$a->{'var'}} = $a->{'default'};
- } elsif ($r eq 'ARRAY') {
- @{$a->{'var'}} = ();
- }
- }
- my $method = $ENV{"REQUEST_METHOD"};
- my $qs = $ENV{"QUERY_STRING"};
- parse_arg_string($qs) if defined($qs);
- if ($method eq "GET") {
- } elsif ($method eq "POST") {
- if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) {
- while (<STDIN>) {
- chomp;
- parse_arg_string($_);
- }
- } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) {
- parse_multipart_form_data();
- } else {
- die "Unknown content type for POST data";
- }
- } else {
- die "Unknown request method";
- }
-}
-
-### Parsing Multipart Form Data ###
-
-my $boundary;
-my $boundary_len;
-my $mp_buffer;
-my $mp_buffer_i;
-my $mp_buffer_boundary;
-my $mp_eof;
-
-sub refill_mp_data($) {
- my ($more) = @_;
- if ($mp_buffer_boundary >= $mp_buffer_i) {
- return $mp_buffer_boundary - $mp_buffer_i;
- } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) {
- return $more;
- } else {
- if ($mp_buffer_i) {
- $mp_buffer = substr($mp_buffer, $mp_buffer_i);
- $mp_buffer_i = 0;
- }
- while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) {
- last if $mp_eof;
- my $data;
- my $n = read(STDIN, $data, 2048);
- if ($n > 0) {
- $mp_buffer .= $data;
- } else {
- $mp_eof = 1;
- }
- }
- $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
- if ($mp_buffer_boundary >= 0) {
- return $mp_buffer_boundary;
- } elsif ($mp_eof) {
- return length($mp_buffer);
- } else {
- return length($mp_buffer) - $boundary_len;
- }
- }
-}
-
-sub get_mp_line($) {
- my ($allow_empty) = @_;
- my $n = refill_mp_data(1024);
- my $i = index($mp_buffer, "\r\n", $mp_buffer_i);
- if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) {
- my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i);
- $mp_buffer_i = $i + 2;
- return $s;
- } elsif ($allow_empty) {
- if ($n) { # An incomplete line
- my $s = substr($mp_buffer, $mp_buffer_i, $n);
- $mp_buffer_i += $n;
- return $s;
- } else { # No more lines
- return undef;
- }
- } else {
- die "Premature end of multipart POST data";
- }
-}
-
-sub skip_mp_boundary() {
- if ($mp_buffer_boundary != $mp_buffer_i) {
- die "Premature end of multipart POST data";
- }
- $mp_buffer_boundary = -1;
- $mp_buffer_i += 2;
- my $b = get_mp_line(0);
- print STDERR "SEP $b\n" if $debug;
- $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
- if ("\r\n$b" =~ /^$boundary--/) {
- return 0;
- } else {
- return 1;
- }
-}
-
-sub parse_mp_header() {
- my $h = {};
- my $last;
- while ((my $l = get_mp_line(0)) ne "") {
- print STDERR "HH $l\n" if $debug;
- if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) {
- $name =~ tr/A-Z/a-z/;
- $h->{$name} = $value;
- $last = $name;
- } elsif ($l =~ /^\s+/ && $last) {
- $h->{$last} .= $l;
- } else {
- $last = undef;
- }
- }
- foreach my $n (keys %$h) {
- $h->{$n} = rfc822_prepare($h->{$n});
- print STDERR "H $n: $h->{$n}\n" if $debug;
- }
- return (keys %$h) ? $h : undef;
-}
-
-sub parse_multipart_form_data() {
- # First of all, find the boundary string
- my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"});
- if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) {
- die "Multipart content with no boundary string received";
- }
- $boundary = rfc822_deescape($boundary);
- print STDERR "BOUNDARY IS $boundary\n" if $debug;
-
- # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string
- # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution.
- my $agent = http_get("User-agent") || "";
- $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/;
- $boundary = "\r\n$boundary";
- $boundary_len = length($boundary) + 2;
-
- # Check upload size in advance
- if (my $size = http_get("Content-Length")) {
- my $max_allowed = 0;
- foreach my $a (values %$arg_table) {
- $max_allowed += $a->{"maxsize"} || 65536;
- }
- if ($size > $max_allowed) {
- die "Maximum form data length exceeded";
- }
- }
-
- # Initialize our buffering mechanism and part splitter
- $mp_buffer = "\r\n";
- $mp_buffer_i = 0;
- $mp_buffer_boundary = -1;
- $mp_eof = 0;
-
- # Skip garbage before the 1st part
- while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
- skip_mp_boundary() || return;
-
- # Process individual parts
- do { PART: {
- print STDERR "NEXT PART\n" if $debug;
- my $h = parse_mp_header();
- my ($field, $cdisp, $a);
- if ($h &&
- ($cdisp = $h->{"content-disposition"}) &&
- $cdisp =~ /^form-data/ &&
- (($field) = ($cdisp =~ /;name=([^;]+)/)) &&
- ($a = $arg_table->{"$field"})) {
- print STDERR "FIELD $field\n" if $debug;
- if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; }
- if (defined $a->{"var"}) {
- while (defined (my $l = get_mp_line(1))) {
- print STDERR "VALUE $l\n" if $debug;
- parse_arg_string("$field=$l");
- }
- next PART;
- } elsif (defined $a->{"file"}) {
- require File::Temp;
- require IO::Handle;
- my $max_size = $a->{"maxsize"} || 1048576;
- my @tmpargs = (undef, UNLINK => 1);
- push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"};
- my ($fh, $fn) = File::Temp::tempfile(@tmpargs);
- print STDERR "FILE UPLOAD to $fn\n" if $debug;
- ${$a->{"file"}} = $fn;
- ${$a->{"fh"}} = $fh if defined $a->{"fh"};
- my $total_size = 0;
- while (my $i = refill_mp_data(4096)) {
- print $fh substr($mp_buffer, $mp_buffer_i, $i);
- $mp_buffer_i += $i;
- $total_size += $i;
- if ($total_size > $max_size) { die "Uploaded file too long"; }
- }
- $fh->flush(); # Don't close the handle, the file would disappear otherwise
- next PART;
- }
- }
- print STDERR "SKIPPING\n" if $debug;
- while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
- } } while (skip_mp_boundary());
-}
-
-### Generating Self-ref URL's ###
-
-sub make_out_args($) {
- my ($overrides) = @_;
- my $out = {};
- foreach my $name (keys %$arg_table) {
- my $arg = $arg_table->{$name};
- defined($arg->{'var'}) || next;
- defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next;
- my $value;
- if (!defined($value = $overrides->{$name})) {
- if (exists $overrides->{$name}) {
- $value = $arg->{'default'};
- } else {
- $value = ${$arg->{'var'}};
- }
- }
- if ($value ne $arg->{'default'}) {
- $out->{$name} = $value;
- }
- }
- return $out;
-}
-
-sub self_ref(@) {
- my %h = @_;
- my $out = make_out_args(\%h);
- return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out);
-}
-
-sub self_form(@) {
- my %h = @_;
- my $out = make_out_args(\%h);
- return join('', map { "<input type=hidden name=$_ value='" . html_escape($out->{$_}) . "'>\n" } sort keys %$out);
-}
-
-### Cookies
-
-sub cookie_esc($) {
- my $x = shift @_;
- if ($x !~ /^[a-zA-Z0-9%]+$/) {
- $x =~ s/([\\\"])/\\$1/g;
- $x = "\"$x\"";
- }
- return $x;
-}
-
-sub set_cookie($$@) {
- my $key = shift @_;
- my $value = shift @_;
- my %other = @_;
- $other{'version'} = 1 unless defined $other{'version'};
- print "Set-Cookie: $key=", cookie_esc($value);
- foreach my $k (keys %other) {
- print ";$k=", cookie_esc($other{$k});
- }
- print "\n";
-}
-
-sub parse_cookies() {
- my $h = http_get("Cookie") or return ();
- my @cook = ();
- while (my ($padding,$name,$val,$xx,$rest) = ($h =~ /\s*([,;]\s*)*([^ =]+)=([^ =,;\"]*|\"([^\"\\]|\\.)*\")(\s.*|;.*|$)/)) {
- if ($val =~ /^\"/) {
- $val =~ s/^\"//;
- $val =~ s/\"$//;
- $val =~ s/\\(.)/$1/g;
- }
- push @cook, $name, $val;
- $h = $rest;
- }
- return @cook;
-}
-
-1; # OK
+++ /dev/null
-# Perl module for parsing Sherlock configuration files (using the config utility)
-#
-# (c) 2002--2005 Martin Mares <mj@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-
-package UCW::Config;
-
-use strict;
-use warnings;
-use Getopt::Long;
-
-our %Sections = ();
-
-our $DefaultConfigFile = "";
-our $Usage = "-C, --config filename Override the default configuration file
--S, --set sec.item=val Manual setting of a configuration item";
-
-
-sub Parse(@) {
- my @options = @_;
- my $defargs = "";
- my $override_config = 0;
- push @options, "config|C=s" => sub { my ($o,$a)=@_; $defargs .= " -C'$a'"; $override_config=1; };
- push @options, "set|S=s" => sub { my ($o,$a)=@_; $defargs .= " -S'$a'"; };
- Getopt::Long::Configure("bundling");
- Getopt::Long::GetOptions(@options) or return 0;
- if (!$override_config && $DefaultConfigFile) {
- $defargs = "-C'$DefaultConfigFile' $defargs";
- }
- foreach my $section (keys %Sections) {
- my $opts = $Sections{$section};
- my $optlist = join(";", keys %$opts);
- my %filtered_opts = map { my $t=$_; $t=~s/[#\$]+$//; $t => $$opts{$_} } keys %$opts;
- my @l = `bin/config $defargs "$section\{$optlist\}"`;
- $? && exit 1;
- foreach my $o (@l) {
- $o =~ /^CF_.*_([^=]+)='(.*)'\n$/ or die "Cannot parse bin/config output: $_";
- my $var = $filtered_opts{$1};
- my $val = $2;
- if (ref $var eq "SCALAR") {
- $$var = $val;
- } elsif (ref $var eq "ARRAY") {
- push @$var, $val;
- } elsif (ref $var) {
- die ("UCW::Config::Parse: don't know how to set $o");
- }
- }
- }
- 1;
-}
-
-1; # OK
+++ /dev/null
-# Perl module for UCW Configure Scripts
-#
-# (c) 2005 Martin Mares <mj@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-
-package UCW::Configure;
-
-use strict;
-use warnings;
-
-BEGIN {
- # The somewhat hairy Perl export mechanism
- use Exporter();
- our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
- $VERSION = 1.0;
- @ISA = qw(Exporter);
- @EXPORT = qw(&Init &Log &Notice &Warn &Fail &IsSet &IsGiven &Set &UnSet &Append &Override &Get &Test &Include &Finish &FindFile &TryFindFile &TryCmd &PkgConfig &TrivConfig);
- @EXPORT_OK = qw();
- %EXPORT_TAGS = ();
-}
-
-our %vars = ();
-our %overriden = ();
-
-sub Log($) {
- print @_;
-}
-
-sub Notice($) {
- print @_ if $vars{"VERBOSE"};
-}
-
-sub Warn($) {
- print "WARNING: ", @_;
-}
-
-sub Fail($) {
- Log("ERROR: " . (shift @_) . "\n");
- exit 1;
-}
-
-sub IsSet($) {
- my ($x) = @_;
- return exists $vars{$x};
-}
-
-sub IsGiven($) {
- my ($x) = @_;
- return exists $overriden{$x};
-}
-
-sub Get($) {
- my ($x) = @_;
- return $vars{$x};
-}
-
-sub Set($;$) {
- my ($x,$y) = @_;
- $y=1 unless defined $y;
- $vars{$x}=$y unless $overriden{$x};
-}
-
-sub UnSet($) {
- my ($x) = @_;
- delete $vars{$x} unless $overriden{$x};
-}
-
-sub Append($$) {
- my ($x,$y) = @_;
- Set($x, (IsSet($x) ? (Get($x) . " $y") : $y));
-}
-
-sub Override($;$) {
- my ($x,$y) = @_;
- $y=1 unless defined $y;
- $vars{$x}=$y;
- $overriden{$x} = 1;
-}
-
-sub Test($$$) {
- my ($var,$msg,$sub) = @_;
- Log "$msg ... ";
- if (!IsSet($var)) {
- Set $var, &$sub();
- }
- Log Get($var) . "\n";
-}
-
-sub TryFindFile($) {
- my ($f) = @_;
- if (-f $f) {
- return $f;
- } elsif ($f !~ /^\// && -f (Get("SRCDIR")."/$f")) {
- return Get("SRCDIR")."/$f";
- } else {
- return undef;
- }
-}
-
-sub FindFile($) {
- my ($f) = @_;
- my $F;
- defined ($F = TryFindFile($f)) or Fail "Cannot find file $f";
- return $F;
-}
-
-sub Init($$) {
- my ($srcdir,$defconfig) = @_;
- sub usage($) {
- my ($dc) = @_;
- print STDERR "Usage: [<srcdir>/]configure " . (defined $dc ? "[" : "") . "<config-name>" . (defined $dc ? "]" : "") .
- " [<option>[=<value>] | -<option>] ...\n";
- exit 1;
- }
- Set('CONFIG' => $defconfig) if defined $defconfig;
- if (@ARGV) {
- usage($defconfig) if $ARGV[0] eq "--help";
- if (!defined($defconfig) || $ARGV[0] !~ /^-?[A-Z][A-Z0-9_]*(=|$)/) {
- # This does not look like an option, so read it as a file name
- Set('CONFIG' => shift @ARGV);
- }
- }
- Set("SRCDIR", $srcdir);
-
- foreach my $x (@ARGV) {
- if ($x =~ /^(\w+)=(.*)/) {
- Override($1 => $2);
- } elsif ($x =~ /^-(\w+)$/) {
- Override($1 => 0);
- delete $vars{$1};
- } elsif ($x =~ /^(\w+)$/) {
- Override($1 => 1);
- } else {
- print STDERR "Invalid option $x\n";
- exit 1;
- }
- }
-
- defined Get("CONFIG") or usage($defconfig);
- if (!TryFindFile(Get("CONFIG"))) {
- TryFindFile(Get("CONFIG")."/config") or Fail "Cannot find configuration " . Get("CONFIG");
- Override("CONFIG" => Get("CONFIG")."/config");
- }
-}
-
-sub Include($) {
- my ($f) = @_;
- $f = FindFile($f);
- Notice "Loading configuration $f\n";
- require $f;
-}
-
-sub Finish() {
- print "\n";
-
- if (Get("SRCDIR") ne ".") {
- Log "Preparing for compilation from directory " . Get("SRCDIR") . " to obj/ ... ";
- -l "src" and unlink "src";
- symlink Get("SRCDIR"), "src" or Fail "Cannot link source directory to src: $!";
- Override("SRCDIR" => "src");
- -l "Makefile" and unlink "Makefile";
- -f "Makefile" and Fail "Makefile already exists";
- symlink "src/Makefile", "Makefile" or Fail "Cannot link Makefile: $!";
- } else {
- Log "Preparing for compilation from current directory to obj/ ... ";
- }
- `rm -rf obj` if -d "obj"; Fail "Cannot delete old obj directory" if $?;
- -d "obj" or mkdir("obj", 0777) or Fail "Cannot create obj directory: $!";
- -d "obj/lib" or mkdir("obj/lib", 0777) or Fail "Cannot create obj/lib directory: $!";
- Log "done\n";
-
- Log "Generating autoconf.h ... ";
- open X, ">obj/autoconf.h" or Fail $!;
- print X "/* Generated automatically by $0, please don't touch manually. */\n";
- foreach my $x (sort keys %vars) {
- # Don't export variables which contain no underscores
- next unless $x =~ /_/;
- my $v = $vars{$x};
- # Try to add quotes if necessary
- $v = '"' . $v . '"' unless ($v =~ /^"/ || $v =~ /^\d*$/);
- print X "#define $x $v\n";
- }
- close X;
- Log "done\n";
-
- Log "Generating config.mk ... ";
- open X, ">obj/config.mk" or Fail $!;
- print X "# Generated automatically by $0, please don't touch manually.\n";
- foreach my $x (sort keys %vars) {
- print X "$x=$vars{$x}\n";
- }
- print X "s=\${SRCDIR}\n";
- print X "o=obj\n";
- close X;
- Log "done\n";
-}
-
-sub TryCmd($) {
- my ($cmd) = @_;
- my $res = `$cmd`;
- chomp $res;
- return $res unless $?;
- return;
-}
-
-sub maybe_manually($) {
- my ($n) = @_;
- if (IsGiven($n)) {
- if (Get("$n")) { Log "YES (set manually)\n"; }
- else { Log "NO (set manually)\n"; }
- return 1;
- }
- return 0;
-}
-
-sub PkgConfig($@) {
- my $pkg = shift @_;
- my %opts = @_;
- my $upper = $pkg; $upper =~ tr/a-z/A-Z/; $upper =~ s/[^0-9A-Z]+/_/g;
- Log "Checking for package $pkg ... ";
- maybe_manually("CONFIG_HAVE_$upper") and return Get("CONFIG_HAVE_$upper");
- my $ver = TryCmd("pkg-config --modversion $pkg 2>/dev/null");
- if (!defined $ver) {
- Log("NONE\n");
- return 0;
- }
- if (defined($opts{minversion})) {
- my $min = $opts{minversion};
- if (!defined TryCmd("pkg-config --atleast-version=$min $pkg")) {
- Log("NO: version $ver is too old (need >= $min)\n");
- return 0;
- }
- }
- Log("YES: version $ver\n");
- Set("CONFIG_HAVE_$upper" => 1);
- Set("CONFIG_VER_$upper" => $ver);
- my $cf = TryCmd("pkg-config --cflags $pkg");
- Set("CFLAGS_$upper" => $cf) if defined $cf;
- my $lf = TryCmd("pkg-config --libs $pkg");
- Set("LIBS_$upper" => $lf) if defined $lf;
- return 1;
-}
-
-sub ver_norm($) {
- my ($v) = @_;
- return join(".", map { sprintf("%05s", $_) } split(/\./, $v));
-}
-
-sub TrivConfig($@) {
- my $pkg = shift @_;
- my %opts = @_;
- my $upper = $pkg; $upper =~ tr/a-z/A-Z/; $upper =~ s/[^0-9A-Z]+/_/g;
- Log "Checking for package $pkg ... ";
- maybe_manually("CONFIG_HAVE_$upper") and return Get("CONFIG_HAVE_$upper");
- my $pc = $opts{script};
- my $ver = TryCmd("$pc --version 2>/dev/null");
- if (!defined $ver) {
- Log("NONE\n");
- return 0;
- }
- if (defined($opts{minversion})) {
- my $min = $opts{minversion};
- if (ver_norm($ver) lt ver_norm($min)) {
- Log("NO: version $ver is too old (need >= $min)\n");
- return 0;
- }
- }
- Log("YES: version $ver\n");
- Set("CONFIG_HAVE_$upper" => 1);
- Set("CONFIG_VER_$upper" => $ver);
- my $cf = TryCmd("$pc --cflags");
- Set("CFLAGS_$upper" => $cf) if defined $cf;
- my $lf = TryCmd("$pc --libs");
- Set("LIBS_$upper" => $lf) if defined $lf;
- return 1;
-}
-
-1; # OK
+++ /dev/null
-# Perl module for setting process limits
-#
-# (c) 2007 Pavel Charvat <pchar@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-#
-#
-#
-# Interface:
-# UCW::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
-#
-
-package UCW::Filelock;
-
-use 5.006;
-use strict;
-use warnings;
-
-require DynaLoader;
-
-our @ISA = qw(DynaLoader);
-unshift @DynaLoader::dl_library_path, "lib";
-
-our $VERSION = '0.01';
-
-bootstrap UCW::Filelock $VERSION;
-
-# Preloaded methods go here.
-
-1;
-__END__
+++ /dev/null
-/*
- * PerlXS module for managing file locks
- *
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- */
-
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-
-#include <unistd.h>
-#include <fcntl.h>
-
-
-MODULE = UCW::Filelock PACKAGE = UCW::Filelock
-
-PROTOTYPES: ENABLED
-
-int
-fcntl_lock(IN int fd, IN int cmd, IN int type, IN int whence, IN int start, IN int len)
-CODE:
- struct flock fl;
- fl.l_type = type;
- fl.l_whence = whence;
- fl.l_start = start;
- fl.l_len = len;
-
- RETVAL = fcntl(fd, cmd, &fl);
-OUTPUT:
- RETVAL
+++ /dev/null
-Makefile
-Makefile.PL
-MANIFEST
-Fcntllock.pm
-Fcntllock.xs
-lib/Sherlock/.exists
+++ /dev/null
-# Makefile for the Filelock Perl module (c) 2007 Pavel Chrvat <pchar@ucw.cz>
-
-DIRS+=lib/perl/Filelock/arch/auto/UCW/Filelock
-FILELOCK_DIR=lib/perl/Filelock
-
-PROGS+=$(o)/lib/perl/Filelock/Filelock.pm
-
-extras:: $(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT)
-
-$(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
- $(M)MAKE $@
- $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
- $(Q)touch $@
- $(Q)cp $@ run/$(DATADIR)/
-
-$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL
- $(M)PREPARE $@
- $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
-
-$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs
- $(Q)cp $^ $@
-
-$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL
- $(Q)cp $^ $@
+++ /dev/null
-# Makefile for Perl MakeMaker (c) 2007 Pavel Charvat <pchar@ucw.cz>
-
-use ExtUtils::MakeMaker;
-WriteMakefile(
- 'NAME' => 'UCW::Filelock',
- 'VERSION_FROM' => 'Filelock.pm',
- 'INST_LIB' => 'lib',
- 'INST_ARCHLIB' => 'arch',
-);
+++ /dev/null
-#
-# Perl module for Logging
-#
-# (c) 2007 Pavel Charvat <pchar@ucw.cz>
-#
-
-package UCW::Log;
-
-use lib 'lib/perl5';
-use strict;
-use warnings;
-use POSIX;
-use Exporter;
-
-our $version = 1.0;
-our @ISA = qw(Exporter);
-our @EXPORT = ();
-our %EXPORT_TAGS = ( all => [qw(&Log &Die)]);
-our @EXPORT_OK = (@{$EXPORT_TAGS{'all'}});
-
-my $Prog = (reverse split(/\//, $0))[0];
-
-sub Log {
- my $level = shift;
- my $text = join(' ', @_);
- print STDERR $level, strftime(" %Y-%m-%d %H:%M:%S ", localtime()), "[$Prog] ", $text, "\n";
-}
-
-sub Die {
- Log('!', @_);
- exit 1;
-}
-
-1;
+++ /dev/null
-# Perl modules
-
-DIRS+=lib/perl
-EXTRA_RUNDIRS+=lib/perl5/UCW
-PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm CGI.pm)
-
-ifdef CONFIG_UCW_PERL_MODULES
-include $(s)/lib/perl/Ulimit/Makefile
-include $(s)/lib/perl/Filelock/Makefile
-endif
+++ /dev/null
-Makefile
-Makefile.PL
-MANIFEST
-Ulimit.pm
-Ulimit.xs
-lib/Sherlock/.exists
+++ /dev/null
-# Makefile for the Ulimit Perl module (c) 2003 Tomas Valla <tom@ucw.cz>
-
-DIRS+=lib/perl/Ulimit/arch/auto/UCW/Ulimit
-ULIMIT_DIR=lib/perl/Ulimit
-
-PROGS+=$(o)/lib/perl/Ulimit/Ulimit.pm
-
-extras:: $(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT)
-
-$(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
- $(M)MAKE $@
- $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
- $(Q)touch $@
- $(Q)cp $@ run/$(DATADIR)/
-
-$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL
- $(M)PREPARE $@
- $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
-
-$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs
- $(Q)cp $^ $@
-
-$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL
- $(Q)cp $^ $@
+++ /dev/null
-# Makefile for Perl MakeMaker (c) 2003 Tomas Valla <tom@ucw.cz>
-
-use ExtUtils::MakeMaker;
-WriteMakefile(
- 'NAME' => 'UCW::Ulimit',
- 'VERSION_FROM' => 'Ulimit.pm',
- 'INST_LIB' => 'lib',
- 'INST_ARCHLIB' => 'arch',
-);
+++ /dev/null
-# Perl module for setting process limits
-#
-# (c) 2003 Tomas Valla <tom@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-#
-#
-#
-# Interface:
-# UCW::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
-# UCW::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
-#
-# setlimit sets limit to values supplied in softlimit and hardlimit
-# getlimit reads limits into softlimit and hardlimit
-# $resource constants are defined below
-#
-
-package UCW::Ulimit;
-
-use 5.006;
-use strict;
-use warnings;
-
-require DynaLoader;
-
-our @ISA = qw(DynaLoader);
-unshift @DynaLoader::dl_library_path, "lib";
-
-our $CPU = 0;
-our $FSIZE = 1;
-our $DATA = 2;
-our $STACK = 3;
-our $CORE = 4;
-our $RSS = 5;
-our $NPROC = 6;
-our $NOFILE = 7;
-our $MEMLOCK = 8;
-our $AS = 9;
-
-our $VERSION = '0.01';
-
-bootstrap UCW::Ulimit $VERSION;
-
-# Preloaded methods go here.
-
-1;
-__END__
+++ /dev/null
-/*
- * PerlXS module for managing process limits
- *
- * (c) 2003 Tomas Valla <tom@ucw.cz>
- */
-
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-
-#include <sys/resource.h>
-#include <unistd.h>
-
-
-MODULE = UCW::Ulimit PACKAGE = UCW::Ulimit
-
-PROTOTYPES: ENABLED
-
-int
-setlimit(IN int resource, IN int soft, IN int hard)
-CODE:
- struct rlimit rl;
- int r;
-
- switch(resource) {
- case 0:
- r = RLIMIT_CPU; break;
- case 1:
- r = RLIMIT_FSIZE; break;
- case 2:
- r = RLIMIT_DATA; break;
- case 3:
- r = RLIMIT_STACK; break;
- case 4:
- r = RLIMIT_CORE; break;
- case 5:
- r = RLIMIT_RSS; break;
- case 6:
- r = RLIMIT_NPROC; break;
- case 7:
- r = RLIMIT_NOFILE; break;
- case 8:
- r = RLIMIT_MEMLOCK; break;
- case 9:
- r = RLIMIT_AS; break;
- }
- rl.rlim_cur = soft;
- rl.rlim_max = hard;
- RETVAL = setrlimit(r, &rl);
-OUTPUT:
- RETVAL
-
-
-int
-getlimit(IN int resource, OUT int soft, OUT int hard)
-CODE:
- struct rlimit rl;
- int r;
-
- switch(resource) {
- case 0:
- r = RLIMIT_CPU; break;
- case 1:
- r = RLIMIT_FSIZE; break;
- case 2:
- r = RLIMIT_DATA; break;
- case 3:
- r = RLIMIT_STACK; break;
- case 4:
- r = RLIMIT_CORE; break;
- case 5:
- r = RLIMIT_RSS; break;
- case 6:
- r = RLIMIT_NPROC; break;
- case 7:
- r = RLIMIT_NOFILE; break;
- case 8:
- r = RLIMIT_MEMLOCK; break;
- case 9:
- r = RLIMIT_AS; break;
- }
-
- RETVAL = getrlimit(r, &rl);
- soft = rl.rlim_cur;
- hard = rl.rlim_max;
-OUTPUT:
- RETVAL
+++ /dev/null
-/*
- * UCW Library -- Prefetch
- *
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_PREFETCH_H
-#define _UCW_PREFETCH_H
-
-#if defined(__k6)
- /* K6 doesn't have prefetches */
-
-#elif defined(__athlon) || defined(__k8) || \
- defined(__i686) || \
- defined(__pentium4) || defined(__prescott) || defined(__nocona)
-
-#define HAVE_PREFETCH
-static inline void prefetch(void *addr)
-{
- asm volatile ("prefetcht0 %0" : : "m" (*(byte*)addr));
-}
-
-#else
-#warning "Don't know how to prefetch on your CPU. Please fix lib/prefetch.h."
-#endif
-
-#ifndef HAVE_PREFETCH
-static inline void prefetch(void *addr UNUSED)
-{
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Prime Number Tests
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-static int /* Sequential search */
-__isprime(uns x) /* We know x != 2 && x != 3 */
-{
- uns test = 5;
-
- if (x == 5)
- return 1;
- for(;;)
- {
- if (!(x % test))
- return 0;
- if (x / test <= test)
- return 1;
- test += 2; /* 6k+1 */
- if (!(x % test))
- return 0;
- if (x / test <= test)
- return 1;
- test += 4; /* 6k-1 */
- }
-}
-
-int
-isprime(uns x)
-{
- if (x < 5)
- return (x == 2 || x == 3);
- switch (x % 6)
- {
- case 1:
- case 5:
- return __isprime(x);
- default:
- return 0;
- }
-}
-
-uns
-nextprime(uns x) /* Returns some prime greater than x */
-{
- x += 5 - (x % 6); /* x is 6k-1 */
- for(;;)
- {
- x += 2; /* 6k+1 */
- if (__isprime(x))
- return x;
- x += 4; /* 6k-1 */
- if (__isprime(x))
- return x;
- }
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-#include <stdlib.h>
-
-int
-main(int argc, char **argv)
-{
- uns k = atol(argv[1]);
- printf("%d is%s prime\n", k, isprime(k) ? "" : "n't");
- printf("Next prime is %d\n", nextprime(k));
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Prime Number Table
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/binsearch.h"
-
-/* A table of odd primes, each is about 1.2 times the previous one */
-static uns prime_table[] = {
- 3,
- 7,
- 13,
- 19,
- 29,
- 37,
- 53,
- 67,
- 89,
- 109,
- 137,
- 173,
- 211,
- 263,
- 331,
- 409,
- 499,
- 601,
- 727,
- 877,
- 1061,
- 1279,
- 1543,
- 1861,
- 2239,
- 2689,
- 3229,
- 3877,
- 4657,
- 5623,
- 6761,
- 8123,
- 9767,
- 11731,
- 14083,
- 16903,
- 20287,
- 24359,
- 29243,
- 35099,
- 42131,
- 50581,
- 60703,
- 72859,
- 87433,
- 104933,
- 125927,
- 151121,
- 181361,
- 217643,
- 261223,
- 313471,
- 376171,
- 451411,
- 541699,
- 650059,
- 780119,
- 936151,
- 1123391,
- 1348111,
- 1617739,
- 1941293,
- 2329559,
- 2795477,
- 3354581,
- 4025507,
- 4830619,
- 5796797,
- 6956203,
- 8347483,
- 10017011,
- 12020431,
- 14424539,
- 17309471,
- 20771371,
- 24925661,
- 29910821,
- 35892991,
- 43071601,
- 51685939,
- 62023139,
- 74427803,
- 89313379,
- 107176057,
- 128611313,
- 154333591,
- 185200339,
- 222240413,
- 266688509,
- 320026249,
- 384031507,
- 460837813,
- 553005391,
- 663606499,
- 796327811,
- 955593439,
- 1146712139,
- 1376054569,
- 1651265507,
- 1981518631,
- 2377822387,
- 2853386881,
- 3424064269,
- 4108877153,
- 4294967291
-};
-
-#define NPRIMES ARRAY_SIZE(prime_table)
-
-uns
-next_table_prime(uns x)
-{
- if (x >= prime_table[NPRIMES-1])
- return 0;
- else
- return prime_table[BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x+1)];
-}
-
-uns
-prev_table_prime(uns x)
-{
- int i = BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x);
- return i ? prime_table[i-1] : 0;
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int main(void)
-{
-#if 0 /* Generate the table */
- uns x = 3, xx;
- do
- {
- printf(" %u,\n", x);
- xx = x;
- x = nextprime(1.2*x);
- }
- while (x > xx);
-#else
- for (int i=1; i<=100; i++)
- printf("%d\t%d\t%d\n", i, next_table_prime(i), prev_table_prime(i));
- for (uns i=0xfffffff0; i; i++)
- printf("%u\t%u\t%u\n", i, next_table_prime(i), prev_table_prime(i));
- return 0;
-#endif
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Setting of Process Title
- *
- * (c) 2001--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-
-static char **spt_argv;
-static char *spt_start, *spt_end;
-
-void
-setproctitle_init(int argc, char **argv)
-{
-#ifdef CONFIG_LINUX
- int i, len;
- char **env, **oldenv, *t;
-
- spt_argv = argv;
-
- /* Create a backup copy of environment */
- oldenv = __environ;
- len = 0;
- for (i=0; oldenv[i]; i++)
- len += strlen(oldenv[i]) + 1;
- __environ = env = xmalloc(sizeof(char *)*(i+1));
- t = xmalloc(len);
- for (i=0; oldenv[i]; i++)
- {
- env[i] = t;
- len = strlen(oldenv[i]) + 1;
- memcpy(t, oldenv[i], len);
- t += len;
- }
- env[i] = NULL;
-
- /* Scan for consecutive free space */
- spt_start = spt_end = argv[0];
- for (i=0; i<argc; i++)
- if (!i || spt_end+1 == argv[i])
- spt_end = argv[i] + strlen(argv[i]);
- for (i=0; oldenv[i]; i++)
- if (spt_end+1 == oldenv[i])
- spt_end = oldenv[i] + strlen(oldenv[i]);
-#endif
-}
-
-void
-setproctitle(const char *msg, ...)
-{
- va_list args;
- byte buf[256];
- int n;
-
- va_start(args, msg);
- if (spt_end > spt_start)
- {
- n = vsnprintf(buf, sizeof(buf), msg, args);
- if (n >= (int) sizeof(buf) || n < 0)
- sprintf(buf, "<too-long>");
- n = spt_end - spt_start;
- strncpy(spt_start, buf, n);
- spt_start[n] = 0;
- spt_argv[0] = spt_start;
- spt_argv[1] = NULL;
- }
- va_end(args);
-}
-
-char *
-getproctitle(void)
-{
- return (spt_start < spt_end) ? spt_start : NULL;
-}
+++ /dev/null
-/*
- * UCW Library -- Poor Man's Profiler
- *
- * (c) 2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/profile.h"
-
-#include <stdio.h>
-
-/* PROFILE_TOD */
-
-#include <sys/time.h>
-
-void
-prof_tod_init(struct prof_tod *c)
-{
- c->sec = c->usec = 0;
-}
-
-void
-prof_tod_switch(struct prof_tod *o, struct prof_tod *n)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- if (n)
- {
- n->start_sec = tv.tv_sec;
- n->start_usec = tv.tv_usec;
- }
- if (o)
- {
- o->sec += tv.tv_sec - o->start_sec;
- o->usec += tv.tv_usec - o->start_usec;
- if (o->usec < 0)
- {
- o->usec += 1000000;
- o->sec--;
- }
- else while (o->usec >= 1000000)
- {
- o->usec -= 1000000;
- o->sec++;
- }
- }
-}
-
-int
-prof_tod_format(char *buf, struct prof_tod *c)
-{
- return sprintf(buf, "%d.%06d", c->sec, c->usec);
-}
-
-/* PROFILE_TSC */
-
-#ifdef CPU_I386
-
-void
-prof_tsc_init(struct prof_tsc *c)
-{
- c->ticks = 0;
-}
-
-int
-prof_tsc_format(char *buf, struct prof_tsc *c)
-{
- return sprintf(buf, "%lld", c->ticks);
-}
-
-#endif
-
-/* PROFILE_KTSC */
-
-#ifdef CONFIG_LINUX
-
-#include <fcntl.h>
-#include <unistd.h>
-static int self_prof_fd = -1;
-
-void
-prof_ktsc_init(struct prof_ktsc *c)
-{
- if (self_prof_fd < 0)
- {
- self_prof_fd = open("/proc/self/profile", O_RDONLY, 0);
- if (self_prof_fd < 0)
- die("Unable to open /proc/self/profile: %m");
- }
- c->ticks_user = 0;
- c->ticks_sys = 0;
-}
-
-void
-prof_ktsc_switch(struct prof_ktsc *o, struct prof_ktsc *n)
-{
- unsigned long long u, s;
- byte buf[256];
-
- int l = pread(self_prof_fd, buf, sizeof(buf)-1, 0);
- ASSERT(l > 0 && l < (int)sizeof(buf)-1);
- buf[l] = 0;
- l = sscanf(buf, "%lld%lld", &u, &s);
- ASSERT(l == 2);
-
- if (n)
- {
- n->start_user = u;
- n->start_sys = s;
- }
- if (o)
- {
- u -= o->start_user;
- o->ticks_user += u;
- s -= o->start_sys;
- o->ticks_sys += s;
- }
-}
-
-int
-prof_ktsc_format(char *buf, struct prof_ktsc *c)
-{
- return sprintf(buf, "%lld+%lld", (long long) c->ticks_user, (long long) c->ticks_sys);
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Poor Man's Profiler
- *
- * (c) 2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * Usage:
- * #define PROFILE_xxx
- * #include "lib/profile.h"
- * prof_t cnt;
- * prof_init(&cnt);
- * ...
- * prof_start(&cnt);
- * ...
- * prof_stop(&cnt);
- * printf("%s\n", PROF_STR(cnt));
- */
-
-/* PROFILE_TOD: gettimeofday() profiler */
-
-struct prof_tod {
- u32 start_sec, start_usec;
- s32 sec, usec;
-};
-
-void prof_tod_init(struct prof_tod *);
-void prof_tod_switch(struct prof_tod *, struct prof_tod *);
-int prof_tod_format(char *, struct prof_tod *);
-
-/* PROFILE_TSC: i386 TSC profiler */
-
-#ifdef CPU_I386
-
-struct prof_tsc {
- u64 start_tsc;
- u64 ticks;
-};
-
-void prof_tsc_init(struct prof_tsc *);
-int prof_tsc_format(char *, struct prof_tsc *);
-
-#endif
-
-/* PROFILE_KTSC: Linux kernel TSC profiler */
-
-#ifdef CONFIG_LINUX
-
-struct prof_ktsc {
- u64 start_user, start_sys;
- u64 ticks_user, ticks_sys;
-};
-
-void prof_ktsc_init(struct prof_ktsc *);
-void prof_ktsc_switch(struct prof_ktsc *, struct prof_ktsc *);
-int prof_ktsc_format(char *, struct prof_ktsc *);
-
-#endif
-
-/* Select the right profiler */
-
-#if defined(PROFILE_TOD)
-
-#define PROFILER
-#define PROF_STR_SIZE 21
-typedef struct prof_tod prof_t;
-#define prof_init prof_tod_init
-#define prof_switch prof_tod_switch
-#define prof_format prof_tod_format
-
-#elif defined(PROFILE_TSC)
-
-#define PROFILER
-#define PROFILER_INLINE
-#define PROF_STR_SIZE 24
-
-typedef struct prof_tsc prof_t;
-#define prof_init prof_tsc_init
-#define prof_format prof_tsc_format
-
-#define rdtscll(val) __asm__ __volatile__("rdtsc" : "=A" (val))
-
-static inline void prof_start(prof_t *c)
-{
- rdtscll(c->start_tsc);
-}
-
-static inline void prof_stop(prof_t *c)
-{
- u64 tsc;
- rdtscll(tsc);
- tsc -= c->start_tsc;
- c->ticks += tsc;
-}
-
-static inline void prof_switch(prof_t *o, prof_t *n)
-{
- u64 tsc;
- rdtscll(tsc);
- n->start_tsc = tsc;
- tsc -= o->start_tsc;
- o->ticks += tsc;
-}
-
-#elif defined(PROFILE_KTSC)
-
-#define PROFILER
-#define PROF_STR_SIZE 50
-typedef struct prof_ktsc prof_t;
-#define prof_init prof_ktsc_init
-#define prof_switch prof_ktsc_switch
-#define prof_format prof_ktsc_format
-
-#endif
-
-#ifdef PROFILER
-
-/* Stuff common for all profilers */
-#ifndef PROFILER_INLINE
-static inline void prof_start(prof_t *c) { prof_switch(NULL, c); }
-static inline void prof_stop(prof_t *c) { prof_switch(c, NULL); }
-#endif
-#define PROF_STR(C) ({ static char _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
-
-#else
-
-/* Dummy profiler with no output */
-typedef struct { } prof_t;
-static inline void prof_init(prof_t *c UNUSED) { }
-static inline void prof_start(prof_t *c UNUSED) { }
-static inline void prof_stop(prof_t *c UNUSED) { }
-static inline void prof_switch(prof_t *c UNUSED, prof_t *d UNUSED) { }
-static inline void prof_format(char *b, prof_t *c UNUSED) { b[0]='?'; b[1]=0; }
-#define PROF_STR_SIZE 2
-#define PROF_STR(C) "?"
-
-#endif
+++ /dev/null
-/*
- * Simple and Quick Shared Memory Cache
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/bitops.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-binary.h"
-#include "lib/qache.h"
-
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-/*
- * The cache lives in a mmapped file of the following format:
- * qache_header
- * qache_entry[max_entries] table of entries and their keys
- * u32 qache_hash[hash_size] hash table pointing to keys
- * u32 block_next[num_blocks] next block pointers
- * padding to a multiple of block size
- * blocks[] data blocks
- */
-
-struct qache_header {
- u32 magic; /* QCACHE_MAGIC */
- u32 block_size; /* Parameters as in qache_params */
- u32 block_shift; /* block_size = 1 << block_shift */
- u32 num_blocks;
- u32 format_id;
- u32 entry_table_start; /* Array of qache_entry's */
- u32 max_entries;
- u32 hash_table_start; /* Hash table containing all keys */
- u32 hash_size;
- u32 next_table_start; /* Array of next pointers */
- u32 first_data_block;
-};
-
-#define QACHE_MAGIC 0xb79f6d12
-
-struct qache_entry {
- u32 lru_prev, lru_next; /* Entry #0: head of the cyclic LRU list */
- u32 data_len; /* Entry #0: number of free blocks, Free entries: ~0U */
- u32 first_data_block; /* Entry #0: first free block */
- qache_key_t key;
- u32 hash_next; /* Entry #0: first free entry, Free entries: next free */
-};
-
-struct qache {
- struct qache_header *hdr;
- struct qache_entry *entry_table;
- u32 *hash_table;
- u32 *next_table;
- int fd;
- byte *mmap_data;
- uns file_size;
- char *file_name;
- uns locked;
-};
-
-#define first_free_entry entry_table[0].hash_next
-#define first_free_block entry_table[0].first_data_block
-#define num_free_blocks entry_table[0].data_len
-
-static inline char *
-format_key(qache_key_t *key)
-{
- static char keybuf[2*sizeof(qache_key_t)+1];
- for (uns i=0; i<sizeof(qache_key_t); i++)
- sprintf(keybuf+2*i, "%02x", (*key)[i]);
- return keybuf;
-}
-
-static void
-qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
-{
-#ifndef CONFIG_LINUX
- /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
- len += (start % CPU_PAGE_SIZE);
- start -= start % CPU_PAGE_SIZE;
- len = ALIGN_TO(len, CPU_PAGE_SIZE);
- if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
- msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
-#endif
-}
-
-static void
-qache_msync_block(struct qache *q, uns blk)
-{
- DBG("\tSyncing block %d", blk);
- qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
-}
-
-static void
-qache_lock(struct qache *q)
-{
- /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
- ASSERT(!q->locked);
- struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
- if (fcntl(q->fd, F_SETLKW, &fl) < 0)
- die("fcntl lock on %s: %m", q->file_name);
- q->locked = 1;
- DBG("Locked cache %s", q->file_name);
-}
-
-static void
-qache_unlock(struct qache *q, uns dirty)
-{
- ASSERT(q->locked);
- if (dirty) /* Sync header, entry table and hash table */
- qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
- struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
- if (fcntl(q->fd, F_SETLKW, &fl) < 0)
- die("fcntl unlock on %s: %m", q->file_name);
- q->locked = 0;
- DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
-}
-
-enum entry_audit_flags {
- ET_FREE_LIST = 1,
- ET_LRU = 2,
- ET_HASH = 4
-};
-
-static char *
-audit_entries(struct qache *q, byte *entrymap)
-{
- uns i, j;
-
- DBG("Auditing entries");
-
- /* Check the free list */
- i = q->first_free_entry;
- while (i)
- {
- if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
- return "inconsistent free entry list";
- entrymap[i] |= ET_FREE_LIST;
- i = q->entry_table[i].hash_next;
- }
-
- /* Check the hash table */
- for (i=0; i<q->hdr->hash_size; i++)
- {
- j = q->hash_table[i];
- while (j)
- {
- if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
- return "inconsistent hash chains";
- entrymap[j] |= ET_HASH;
- j = q->entry_table[j].hash_next;
- }
- }
-
- /* Check the LRU */
- i = 0;
- do
- {
- j = q->entry_table[i].lru_next;
- if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
- return "inconsistent LRU list";
- entrymap[i] |= ET_LRU;
- i = j;
- }
- while (i);
-
- /* Check if all non-free items are in all lists */
- for (i=1; i<q->hdr->max_entries; i++)
- {
- if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
- return "inconsistent lists";
- }
- return NULL;
-}
-
-enum block_audit_flags {
- BT_FREE_LIST = 1,
- BT_ALLOC = 2
-};
-
-static char *
-audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
-{
- uns i, j;
-
- DBG("Auditing blocks");
-
- /* Check the free list */
- for (i=q->first_free_block; i; i=q->next_table[i])
- {
- if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
- return "inconsistent free block list";
- blockmap[i] |= BT_FREE_LIST;
- }
-
- /* Check allocation lists of entries */
- for (i=1; i<q->hdr->max_entries; i++)
- if (!(entrymap[i] & ET_FREE_LIST))
- {
- uns blocks = 0;
- for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
- {
- if (blockmap[j])
- return "inconsistent entry block list";
- blockmap[j] |= BT_ALLOC;
- blocks++;
- }
- if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
- return "inconsistent entry data length";
- }
-
- /* Check if all blocks belong somewhere */
- for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
- if (!blockmap[i])
- {
- DBG("Block %d unreferenced", i);
- return "unreferenced blocks found";
- }
-
- return NULL;
-}
-
-static char *
-do_audit(struct qache *q)
-{
- byte *entry_map = xmalloc_zero(q->hdr->max_entries);
- byte *block_map = xmalloc_zero(q->hdr->num_blocks);
- byte *err = audit_entries(q, entry_map);
- if (!err)
- err = audit_blocks(q, entry_map, block_map);
- xfree(block_map);
- xfree(entry_map);
- return err;
-}
-
-static void
-qache_setup_pointers(struct qache *q)
-{
- q->hdr = (struct qache_header *) q->mmap_data;
- q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
- q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
- q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
-}
-
-static int
-qache_open_existing(struct qache *q, struct qache_params *par)
-{
- if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
- return 0;
-
- struct stat st;
- char *err = "stat failed";
- if (fstat(q->fd, &st) < 0)
- goto close_and_fail;
-
- err = "invalid file size";
- if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
- goto close_and_fail;
- q->file_size = st.st_size;
-
- err = "requested size change";
- if (q->file_size != par->cache_size)
- goto close_and_fail;
-
- err = "cannot mmap";
- if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
- goto close_and_fail;
- struct qache_header *h = (struct qache_header *) q->mmap_data;
-
- qache_setup_pointers(q);
- qache_lock(q);
-
- err = "incompatible format";
- if (h->magic != QACHE_MAGIC ||
- h->block_size != par->block_size ||
- h->max_entries != par->max_entries ||
- h->format_id != par->format_id)
- goto unlock_and_fail;
-
- err = "incomplete file";
- if (h->num_blocks*h->block_size != q->file_size)
- goto unlock_and_fail;
-
- if (err = do_audit(q))
- goto unlock_and_fail;
-
- qache_unlock(q, 0);
- msg(L_INFO, "Cache %s: using existing data", q->file_name);
- return 1;
-
- unlock_and_fail:
- qache_unlock(q, 0);
- munmap(q->mmap_data, q->file_size);
- close_and_fail:
- msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
- close(q->fd);
- return 0;
-}
-
-static void
-qache_create(struct qache *q, struct qache_params *par)
-{
- q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (q->fd < 0)
- die("Cache %s: unable to create (%m)", q->file_name);
- struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
-
- struct qache_header h;
- bzero(&h, sizeof(h));
- h.magic = QACHE_MAGIC;
- h.block_size = par->block_size;
- h.block_shift = bit_fls(h.block_size);
- h.num_blocks = par->cache_size >> h.block_shift;
- h.format_id = par->format_id;
- h.entry_table_start = sizeof(h);
- h.max_entries = par->max_entries;
- h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
- h.hash_size = 1;
- while (h.hash_size < h.max_entries)
- h.hash_size *= 2;
- h.next_table_start = h.hash_table_start + h.hash_size * 4;
- h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
- if (h.first_data_block >= h.num_blocks)
- die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
- bwrite(fb, &h, sizeof(h));
-
- /* Entry #0: heads of all lists */
- ASSERT(btell(fb) == (sh_off_t)h.entry_table_start);
- struct qache_entry ent;
- bzero(&ent, sizeof(ent));
- ent.first_data_block = h.first_data_block;
- ent.data_len = h.num_blocks - h.first_data_block;
- ent.hash_next = 1;
- bwrite(fb, &ent, sizeof(ent));
-
- /* Other entries */
- bzero(&ent, sizeof(ent));
- ent.data_len = ~0U;
- for (uns i=1; i<h.max_entries; i++)
- {
- ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
- bwrite(fb, &ent, sizeof(ent));
- }
-
- /* The hash table */
- ASSERT(btell(fb) == (sh_off_t)h.hash_table_start);
- for (uns i=0; i<h.hash_size; i++)
- bputl(fb, 0);
-
- /* The next pointers */
- ASSERT(btell(fb) == (sh_off_t)h.next_table_start);
- for (uns i=0; i<h.num_blocks; i++)
- bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
-
- /* Padding */
- ASSERT(btell(fb) <= (sh_off_t)(h.first_data_block << h.block_shift));
- while (btell(fb) < (sh_off_t)(h.first_data_block << h.block_shift))
- bputc(fb, 0);
-
- /* Data blocks */
- for (uns i=h.first_data_block; i<h.num_blocks; i++)
- for (uns j=0; j<h.block_size; j+=4)
- bputl(fb, 0);
-
- ASSERT(btell(fb) == (sh_off_t)par->cache_size);
- bclose(fb);
- msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
-
- if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
- die("Cache %s: mmap failed (%m)", par->file_name);
- q->file_size = par->cache_size;
- qache_setup_pointers(q);
-}
-
-struct qache *
-qache_open(struct qache_params *par)
-{
- struct qache *q = xmalloc_zero(sizeof(*q));
- q->file_name = xstrdup(par->file_name);
-
- ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
- par->cache_size = ALIGN_TO(par->cache_size, par->block_size);
-
- if (par->force_reset <= 0 && qache_open_existing(q, par))
- ;
- else if (par->force_reset < 0)
- die("Cache %s: read-only access requested, but no data available", q->file_name);
- else
- qache_create(q, par);
- return q;
-}
-
-void
-qache_close(struct qache *q, uns retain_data)
-{
- munmap(q->mmap_data, q->file_size);
- close(q->fd);
- if (!retain_data && unlink(q->file_name) < 0)
- msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
- xfree(q->file_name);
- xfree(q);
-}
-
-static uns
-qache_hash(struct qache *q, qache_key_t *key)
-{
- uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
- return h % q->hdr->hash_size;
-}
-
-static uns
-qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
-{
- ASSERT(q->locked);
-
- if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
- return pos_hint;
-
- uns h = qache_hash(q, key);
- for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
- if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
- return e;
- return 0;
-}
-
-static void
-qache_hash_insert(struct qache *q, uns e)
-{
- uns h = qache_hash(q, &q->entry_table[e].key);
- q->entry_table[e].hash_next = q->hash_table[h];
- q->hash_table[h] = e;
-}
-
-static void
-qache_hash_remove(struct qache *q, uns e)
-{
- struct qache_entry *entry = &q->entry_table[e];
- uns f, *hh;
- for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
- if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
- {
- *hh = entry->hash_next;
- return;
- }
- ASSERT(0);
-}
-
-static uns
-qache_alloc_entry(struct qache *q)
-{
- uns e = q->first_free_entry;
- ASSERT(q->locked && e);
- struct qache_entry *entry = &q->entry_table[e];
- ASSERT(entry->data_len == ~0U);
- q->first_free_entry = entry->hash_next;
- entry->data_len = 0;
- return e;
-}
-
-static void
-qache_free_entry(struct qache *q, uns e)
-{
- struct qache_entry *entry = &q->entry_table[e];
- ASSERT(q->locked && entry->data_len != ~0U);
- entry->data_len = ~0U;
- entry->hash_next = q->first_free_entry;
- q->first_free_entry = e;
-}
-
-static inline void *
-get_block_start(struct qache *q, uns block)
-{
- ASSERT(block && block < q->hdr->num_blocks);
- return q->mmap_data + (block << q->hdr->block_shift);
-}
-
-static uns
-qache_alloc_block(struct qache *q)
-{
- ASSERT(q->locked && q->num_free_blocks);
- uns blk = q->first_free_block;
- q->first_free_block = q->next_table[blk];
- q->num_free_blocks--;
- DBG("\tAllocated block %d", blk);
- return blk;
-}
-
-static void
-qache_free_block(struct qache *q, uns blk)
-{
- ASSERT(q->locked);
- q->next_table[blk] = q->first_free_block;
- q->first_free_block = blk;
- q->num_free_blocks++;
- DBG("\tFreed block %d", blk);
-}
-
-static void
-qache_lru_insert(struct qache *q, uns e)
-{
- struct qache_entry *head = &q->entry_table[0];
- struct qache_entry *entry = &q->entry_table[e];
- ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
- uns succe = head->lru_next;
- struct qache_entry *succ = &q->entry_table[succe];
- head->lru_next = e;
- entry->lru_prev = 0;
- entry->lru_next = succe;
- succ->lru_prev = e;
-}
-
-static void
-qache_lru_remove(struct qache *q, uns e)
-{
- ASSERT(q->locked);
- struct qache_entry *entry = &q->entry_table[e];
- q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
- q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
- entry->lru_prev = entry->lru_next = 0;
-}
-
-static uns
-qache_lru_get(struct qache *q)
-{
- return q->entry_table[0].lru_prev;
-}
-
-static void
-qache_ll_delete(struct qache *q, uns e)
-{
- struct qache_entry *entry = &q->entry_table[e];
- uns blk = entry->first_data_block;
- while (entry->data_len)
- {
- uns next = q->next_table[blk];
- qache_free_block(q, blk);
- blk = next;
- if (entry->data_len >= q->hdr->block_size)
- entry->data_len -= q->hdr->block_size;
- else
- entry->data_len = 0;
- }
- qache_lru_remove(q, e);
- qache_hash_remove(q, e);
- qache_free_entry(q, e);
-}
-
-uns
-qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
-{
- qache_lock(q);
-
- uns e = qache_hash_find(q, key, pos_hint);
- if (e)
- {
- qache_ll_delete(q ,e);
- DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
- }
-
- uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
- if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
- {
- qache_unlock(q, 0);
- return 0;
- }
- while (q->num_free_blocks < blocks || !q->first_free_entry)
- {
- e = qache_lru_get(q);
- DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
- ASSERT(e);
- qache_ll_delete(q, e);
- }
- e = qache_alloc_entry(q);
- struct qache_entry *entry = &q->entry_table[e];
- entry->data_len = size;
- memcpy(entry->key, key, sizeof(*key));
- DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
-
- entry->first_data_block = 0;
- while (size)
- {
- uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
- uns blk = qache_alloc_block(q);
- q->next_table[blk] = entry->first_data_block;
- memcpy(get_block_start(q, blk), data+size-chunk, chunk);
- qache_msync_block(q, blk);
- entry->first_data_block = blk;
- size -= chunk;
- }
-
- qache_lru_insert(q, e);
- qache_hash_insert(q, e);
- qache_unlock(q, 1);
- return e;
-}
-
-static void
-copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
-{
- if (sizep)
- {
- uns size = *sizep;
- uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
- uns xfer = MIN(size, avail);
- *sizep = avail;
- if (datap)
- {
- if (!*datap)
- *datap = xmalloc(xfer);
- uns blk = entry->first_data_block;
- while (start >= q->hdr->block_size)
- {
- blk = q->next_table[blk];
- start -= q->hdr->block_size;
- }
- byte *data = *datap;
- while (xfer)
- {
- uns len = MIN(xfer, q->hdr->block_size - start);
- memcpy(data, get_block_start(q, blk), len);
- blk = q->next_table[blk];
- data += len;
- xfer -= len;
- start = 0;
- }
- }
- }
- else
- ASSERT(!datap);
-}
-
-uns
-qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
-{
- qache_lock(q);
- uns e = qache_hash_find(q, key, pos_hint);
- if (e)
- {
- struct qache_entry *entry = &q->entry_table[e];
- DBG("Lookup <%s>: found entry %d", format_key(key), e);
- qache_lru_remove(q, e);
- qache_lru_insert(q, e);
- copy_out(q, entry, datap, sizep, start);
- qache_unlock(q, 1); /* Yes, modified -- we update the LRU */
- }
- else
- {
- DBG("Lookup <%s>: not found", format_key(key));
- qache_unlock(q, 0);
- }
- return e;
-}
-
-uns
-qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
-{
- if (!pos || pos >= q->hdr->max_entries)
- {
- DBG("Probe %d: Out of range", pos);
- return ~0U;
- }
-
- qache_lock(q);
- uns ret = 0;
- struct qache_entry *entry = &q->entry_table[pos];
- if (entry->data_len != ~0U)
- {
- DBG("Probe %d: Found key <%s>", format_key(entry->key));
- if (key)
- memcpy(key, entry->key, sizeof(qache_key_t));
- copy_out(q, entry, datap, sizep, start);
- ret = pos;
- }
- else
- DBG("Probe %d: Empty", pos);
- qache_unlock(q, 0);
- return ret;
-}
-
-uns
-qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
-{
- qache_lock(q);
- uns e = qache_hash_find(q, key, pos_hint);
- if (e)
- {
- DBG("Delete <%s: deleting entry %d", format_key(key), e);
- qache_ll_delete(q, e);
- }
- else
- DBG("Delete <%s>: No match", format_key(key));
- qache_unlock(q, 1);
- return e;
-}
-
-void
-qache_debug(struct qache *q)
-{
- msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
- q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
- q->hdr->max_entries, q->hdr->hash_size);
-
- msg(L_DEBUG, "Table of cache entries:");
- msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
- for (uns e=0; e<q->hdr->max_entries; e++)
- {
- struct qache_entry *ent = &q->entry_table[e];
- msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
- ent->first_data_block, ent->hash_next, format_key(&ent->key));
- }
-
- msg(L_DEBUG, "Hash table:");
- for (uns h=0; h<q->hdr->hash_size; h++)
- msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
-
- msg(L_DEBUG, "Next pointers:");
- for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
- msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
-}
-
-void
-qache_audit(struct qache *q)
-{
- char *err;
- qache_lock(q);
- if (err = do_audit(q))
- die("Cache %s: %s", q->file_name, err);
- qache_unlock(q, 0);
-}
-
-#ifdef TEST
-
-int main(int argc UNUSED, char **argv UNUSED)
-{
- struct qache_params par = {
- .file_name = "tmp/test",
- .block_size = 256,
- .cache_size = 65536,
- .max_entries = 123,
- .force_reset = 0,
- .format_id = 0xfeedcafe
- };
- struct qache *q = qache_open(&par);
-
- qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
-#define N 100
- uns i, j;
- byte data[11*N];
- for (i=0; i<N; i++)
- {
- key[3] = i / 16; key[15] = i % 16;
- for (j=0; j<11*i; j++)
- data[j] = 0x33 + i*j;
- qache_insert(q, &key, 0, data, 11*i);
- }
- qache_debug(q);
- qache_audit(q);
-
- uns found = 0;
- for (i=0; i<100; i++)
- {
- key[3] = i / 16; key[15] = i % 16;
- byte *dptr = data;
- uns sz = sizeof(data);
- uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
- if (e)
- {
- ASSERT(sz == 11*i);
- for (j=0; j<sz; j++)
- ASSERT(data[j] == (byte)(0x33 + i*j));
- found++;
- }
- }
- msg(L_INFO, "Found %d of %d entries", found, N);
-
- qache_close(q, 1);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * Simple and Quick Shared Memory Cache
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- */
-
-#ifndef _UCW_QACHE_H
-#define _UCW_QACHE_H
-
-struct qache_params {
- char *file_name;
- uns block_size; /* Cache block size (a power of two) */
- uns cache_size; /* Size of the whole cache */
- uns max_entries; /* Maximum number of cached entries */
- int force_reset; /* Force creation of a new cache even if the old one seems usable, -1 if reset should never be done */
- uns format_id; /* Data format ID (old cache not used if formats differ) */
-};
-
-typedef byte qache_key_t[16];
-
-struct qache;
-
-/* Create and destroy a cache */
-struct qache *qache_open(struct qache_params *p);
-void qache_close(struct qache *q, uns retain_data);
-
-/* Insert new item to the cache with a given key and data. If pos_hint is non-zero, it serves
- * as a hint about the position of the entry (if it's known that an entry with the particular key
- * was located there a moment ago). Returns position of the new entry.
- */
-uns qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size);
-
-/* Look up data in the cache, given a key and a position hint (as above). If datap is non-NULL, data
- * from the cache entry are copied either to *datap (if *datap is NULL, new memory is allocated by
- * calling xmalloc and *datap is set to point to that memory). The *sizep contains the maximum number
- * of bytes to be copied (~0U if unlimited) and it is replaced by the number of bytes available (so it
- * can be greater than the original value requested). The start indicates starting offset inside the
- * entry's data.
- */
-uns qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start);
-
-/* Inspect data in the cache (but don't modify LRU nor anything else), given a position.
- * If key is non-NULL, it's filled with the cache key. The rest works as in qache_lookup.
- * Returns 0 if the entry is empty, ~0 for position out of range, entry number otherwise.
- */
-uns qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start);
-
-/* Delete data from the cache, given a key and a position hint. */
-uns qache_delete(struct qache *q, qache_key_t *key, uns pos_hint);
-
-/* Debugging dump (beware, doesn't lock the cache!) */
-void qache_debug(struct qache *q);
-
-/* Check consistency of the cache structure */
-void qache_audit(struct qache *q);
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Unbiased Random Numbers
- *
- * (c) 1998--2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdlib.h>
-
-/* We expect the random generator in libc to give at least 30 bits of randomness */
-COMPILE_ASSERT(RAND_MAX_RANGE_TEST, RAND_MAX >= (1 << 30)-1);
-
-uns
-random_u32(void)
-{
- return (random() & 0xffff) | ((random() & 0xffff) << 16);
-}
-
-uns
-random_max(uns max)
-{
- uns r, l;
-
- ASSERT(max <= (1 << 30));
- l = (RAND_MAX + 1U) - ((RAND_MAX + 1U) % max);
- do
- r = random();
- while (r >= l);
- return r % max;
-}
-
-u64
-random_u64(void)
-{
- return
- ((u64)(random() & 0xffff) << 48) |
- ((u64)(random() & 0xffffff) << 24) |
- (random() & 0xffffff);
-}
-
-u64
-random_max_u64(u64 max)
-{
- if (max < (1 << 30))
- return random_max(max);
-
- u64 r, l, m;
- m = 0xffffffffffffffff;
- l = m - (m % max);
- do
- r = random_u64();
- while (r >= l);
- return r % max;
-}
+++ /dev/null
-/*
- * UCW Library -- Cryptographically Safe Random Key Generator
- *
- * (c) 2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <fcntl.h>
-#include <unistd.h>
-
-void
-randomkey(byte *buf, uns size)
-{
- int fd;
-
- if ((fd = open("/dev/urandom", O_RDONLY, 0)) < 0)
- die("Unable to open /dev/urandom: %m");
- if (read(fd, buf, size) != (int) size)
- die("Error reading /dev/urandom: %m");
- close(fd);
-}
+++ /dev/null
-/*
- * UCW Library -- Memory Re-allocation
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdlib.h>
-
-#ifndef DEBUG_DMALLOC
-
-void *
-xrealloc(void *old, uns size)
-{
- /* We assume that realloc(NULL, x) works like malloc(x), which is true with the glibc. */
- void *x = realloc(old, size);
- if (!x)
- die("Cannot reallocate %d bytes of memory", size);
- return x;
-}
-
-#endif
+++ /dev/null
-/*
- * Test of red-black trees
- *
- * (c) 2002, Robert Spalek <robert@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/getopt.h"
-#include "lib/fastbuf.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-struct my1_node
-{
- int key;
- int x;
-};
-
-static void my_dump_key(struct fastbuf *fb, struct my1_node *n)
-{
- char tmp[20];
- sprintf(tmp, "key=%d ", n->key);
- bputs(fb, tmp);
-}
-
-static void my_dump_data(struct fastbuf *fb, struct my1_node *n)
-{
- char tmp[20];
- sprintf(tmp, "x=%d ", n->x);
- bputs(fb, tmp);
-}
-
-#define TREE_NODE struct my1_node
-#define TREE_PREFIX(x) my_##x
-#define TREE_KEY_ATOMIC key
-#define TREE_WANT_CLEANUP
-#define TREE_WANT_LOOKUP
-#define TREE_WANT_DELETE
-#define TREE_WANT_ITERATOR
-#define TREE_WANT_DUMP
-#define TREE_CONSERVE_SPACE
-#include "redblack.h"
-
-static void my_check_order(struct fastbuf *fb, struct my_tree *t)
-{
- int last_key = 0x80000000;
- TREE_FOR_ALL(my, t, n)
- {
- ASSERT(n->key >= last_key);
- last_key = n->key;
- if (fb)
- {
- char tmp[30];
- sprintf(tmp, "%d -> %d\n", n->key, n->x);
- bputs(fb, tmp);
- }
- }
- TREE_END_FOR;
- if (fb)
- bflush(fb);
-}
-
-struct my2_node
-{
- char key[1];
-};
-
-static void my2_dump_key(struct fastbuf *fb, struct my2_node *n)
-{
- bputs(fb, "key=");
- bputs(fb, n->key);
- bputc(fb, ' ');
-}
-
-static void my2_dump_data(struct fastbuf *fb UNUSED, struct my2_node *n UNUSED)
-{
-}
-
-#define TREE_NODE struct my2_node
-#define TREE_PREFIX(x) my2_##x
-#define TREE_KEY_ENDSTRING key
-#define TREE_NOCASE
-#define TREE_WANT_CLEANUP
-#define TREE_WANT_NEW
-#define TREE_WANT_SEARCH
-#define TREE_WANT_REMOVE
-#define TREE_WANT_FIND_NEXT
-#define TREE_WANT_ITERATOR
-#define TREE_WANT_DUMP
-#define TREE_STATIC
-#define TREE_CONSERVE_SPACE
-#include "redblack.h"
-
-static void random_string(char *txt, uns max_len)
-{
- uns len = random() % max_len;
- uns j;
- for (j=0; j<len; j++)
- txt[j] = random() % 96 + 32;
- txt[len] = 0;
-}
-
-static char *options = CF_SHORT_OPTS "vn:a";
-
-static char *help = "\
-Usage: test1.bin <options>\n\
-Options:\n"
-CF_USAGE
-"-v\tSet verbose mode\n\
--n num\tNumber of inserted nodes\n\
--a\tProbe some ASSERTs\n\
-";
-
-static void NONRET
-usage(void)
-{
- fputs(help, stderr);
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- int verbose = 0, number = 1000, asserts = 0;
- int opt;
- struct fastbuf *fb, *dump_fb;
- struct my_tree t;
- struct my2_tree t2;
- int i;
- cf_def_file = NULL;
- log_init(argv[0]);
- while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
- switch (opt)
- {
- case 'v':
- verbose++;
- break;
- case 'n':
- number = atoi(optarg);
- break;
- case 'a':
- asserts++;
- break;
- default:
- usage();
- break;
- }
- if (optind < argc)
- usage();
- fb = bfdopen(1, 4096);
- if (verbose > 1)
- dump_fb = fb;
- else
- dump_fb = NULL;
-
- my_init(&t);
- for (i=0; i<number; i++)
- my_lookup(&t, random() % 1000000)->x = i;
- my_dump(dump_fb, &t);
- my_check_order(dump_fb, &t);
- if (asserts)
- {
- my_new(&t, 1);
- my_new(&t, 1);
- }
- my_cleanup(&t);
- if (verbose > 0)
- bputs(fb, "Load test passed\n");
-
- my_init(&t);
- for (i=0; i<100; i++)
- {
- my_new(&t, i)->x = i;
- my_dump(dump_fb, &t);
- }
- for (i=0; i<100; i++)
- {
- int a = i/10, b = i%10, j = a*10 + (b + a) % 10;
- int res UNUSED = my_delete(&t, j);
- ASSERT(res);
- my_dump(dump_fb, &t);
- }
- my_cleanup(&t);
- if (verbose > 0)
- bputs(fb, "Sequential adding and deleting passed\n");
-
- my_init(&t);
- for (i=0; i<997; i++)
- {
- my_new(&t, i*238 % 997)->x = i;
- my_dump(NULL, &t);
- }
- my_dump(dump_fb, &t);
- i = 0;
- TREE_FOR_ALL(my, &t, n)
- {
- ASSERT(n->key == i);
- i++;
- }
- TREE_END_FOR;
- ASSERT(i == 997);
- for (i=0; i<997; i++)
- {
- int res UNUSED = my_delete(&t, i*111 % 997);
- ASSERT(res);
- my_dump(NULL, &t);
- }
- my_dump(dump_fb, &t);
- my_cleanup(&t);
- if (verbose > 0)
- bputs(fb, "Complete tree passed\n");
-
- my2_init(&t2);
- for (i=0; i<number; i++)
- {
- char txt[30];
- random_string(txt, 30);
- my2_new(&t2, txt);
- }
- my2_dump(dump_fb, &t2);
- TREE_FOR_ALL(my2, &t2, n)
- {
- my2_node *tmp;
- int count = 0;
- for (tmp=n; tmp; tmp = my2_find_next(tmp))
- count++;
- if (dump_fb)
- {
- char txt[20];
- bputs(dump_fb, n->key);
- sprintf(txt, ": %d\n", count);
- bputs(dump_fb, txt);
- }
- }
- TREE_END_FOR;
- while (t2.count > 0)
- {
- char txt[30];
- my2_node *n;
- random_string(txt, 30);
- n = my2_search(&t2, txt);
- ASSERT(n);
- my2_remove(&t2, n);
- }
- my2_dump(dump_fb, &t2);
- my2_cleanup(&t2);
- if (verbose > 0)
- bputs(fb, "String test passed\n");
-
- bclose(fb);
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Red-black trees
- *
- * (c) 2002--2005, Robert Spalek <robert@ucw.cz>
- *
- * Skeleton based on hash-tables by:
- *
- * (c) 2002, Martin Mares <mj@ucw.cz>
- *
- */
-
-/*
- * Data structure description:
- *
- * A red-black tree is a binary search tree, where records are stored
- * in nodes (may be also leaves). Every node has a colour. The
- * following restrictions hold:
- *
- * - a parent of a red node is black
- * - every path from the root to a node with less than 2 children
- * contains the same number of black nodes
- *
- * A usual interpretation is, that leaves are intervals between records
- * and contain no data. Every leaf is black. This is equivalent, but
- * saves the space.
- */
-
-/*
- * This is not a normal header file, it's a generator of red-black trees.
- * Each time you include it with parameters set in the corresponding
- * preprocessor macros, it generates a tree structure with the parameters
- * given.
- *
- * You need to specify:
- *
- * TREE_NODE data type where a node dwells (usually a struct).
- * TREE_PREFIX(x) macro to add a name prefix (used on all global names
- * defined by the tree generator).
- *
- * Then decide on type of keys:
- *
- * TREE_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
- * a type which can be compared using '>', `==', and '<')
- * & TREE_ATOMIC_TYPE (defaults to int).
- * | TREE_KEY_STRING=f use node->f as a string key, allocated
- * separately from the rest of the node.
- * | TREE_KEY_ENDSTRING=f use node->f as a string key, allocated
- * automatically at the end of the node struct
- * (to be declared as "char f[1]" at the end).
- * | TREE_KEY_COMPLEX use a multi-component key; as the name suggests,
- * the passing of parameters is a bit complex then.
- * The TREE_KEY_COMPLEX(x) macro should expand to
- * `x k1, x k2, ... x kn' and you should also define:
- * & TREE_KEY_DECL declaration of function parameters in which key
- * should be passed to all tree operations.
- * That is, `type1 k1, type2 k2, ... typen kn'.
- * With complex keys, TREE_GIVE_CMP is mandatory.
- *
- * Then specify what operations you request (all names are automatically
- * prefixed by calling TREE_PREFIX):
- *
- * <always defined> init() -- initialize the tree.
- * TREE_WANT_CLEANUP cleanup() -- deallocate the tree.
- * TREE_WANT_FIND node *find(key) -- find first node with the specified
- * key, return NULL if no such node exists.
- * TREE_WANT_FIND_NEXT node *find_next(node *start) -- find next node with the
- * specified key, return NULL if no such node exists.
- * Implies TREE_DUPLICATES.
- * TREE_WANT_SEARCH node *search(key) -- find the node with the specified
- * or, if it does not exist, the nearest one.
- * TREE_WANT_SEARCH_DOWN node *search_down(key) -- find either the node with
- * specified value, or if it does not exist, the node
- * with nearest smaller value.
- * TREE_WANT_BOUNDARY node *boundary(uns direction) -- finds smallest
- * (direction==0) or largest (direction==1) node.
- * TREE_WANT_ADJACENT node *adjacent(node *, uns direction) -- finds next
- * (direction==1) or previous (direction==0) node.
- * TREE_WANT_NEW node *new(key) -- create new node with given key.
- * If it already exists, it is created as the last one.
- * TREE_WANT_LOOKUP node *lookup(key) -- find node with given key,
- * if it doesn't exist, create it. Defining
- * TREE_GIVE_INIT_DATA is strongly recommended.
- * TREE_WANT_DELETE int delete(key) -- delete and deallocate node
- * with a given key. Returns success.
- * TREE_WANT_REMOVE remove(node *) -- delete and deallocate given node.
- *
- * TREE_WANT_DUMP dump() -- dumps the whole tree to stdout
- *
- * You can also supply several functions:
- *
- * TREE_GIVE_CMP int cmp(key1, key2) -- return -1, 0, and 1 according to
- * the relation of keys. By default, we use <, ==, > for
- * atomic types and either strcmp or strcasecmp for
- * strings.
- * TREE_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
- * node should be allocated for dynamic data. Default=0
- * or length of the string with TREE_KEY_ENDSTRING.
- * TREE_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
- * created node. Defaults: assignment for atomic keys
- * and static strings, strcpy for end-allocated strings.
- * TREE_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
- * newly created node. Very useful for lookup operations.
- * TREE_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
- * a node. Default is either normal or pooled allocation
- * depending on whether we want deletions.
- * void free(void *) -- the converse.
- *
- * ... and a couple of extra parameters:
- *
- * TREE_NOCASE string comparisons should be case-insensitive.
- * TREE_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
- * TREE_USE_POOL=pool Allocate all nodes from given mempool.
- * Collides with delete/remove functions.
- * TREE_GLOBAL Functions are exported (i.e., not static).
- * TREE_CONSERVE_SPACE Use as little space as possible at the price of a
- * little slowdown.
- * TREE_DUPLICATES Records with duplicate keys are allowed.
- * TREE_MAX_DEPTH Maximal depth of a tree (for stack allocation).
- *
- * If you set TREE_WANT_ITERATOR, you also get a iterator macro at no
- * extra charge:
- *
- * TREE_FOR_ALL(tree_prefix, tree_pointer, variable)
- * {
- * // node *variable gets declared automatically
- * do_something_with_node(variable);
- * // use TREE_BREAK and TREE_CONTINUE instead of break and continue
- * // you must not alter contents of the tree here
- * }
- * TREE_END_FOR;
- *
- * Then include "lib/redblack.h" and voila, you have a tree suiting all your
- * needs (at least those which you've revealed :) ).
- *
- * After including this file, all parameter macros are automatically
- * undef'd.
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#if !defined(TREE_NODE) || !defined(TREE_PREFIX)
-#error Some of the mandatory configuration macros are missing.
-#endif
-
-#define P(x) TREE_PREFIX(x)
-
-/* Declare buckets and the tree. */
-
-typedef TREE_NODE P(node);
-
-#if defined(TREE_WANT_FIND_NEXT) || defined(TREE_WANT_ADJACENT) || defined(TREE_WANT_ITERATOR) || defined(TREE_WANT_REMOVE)
-# define TREE_STORE_PARENT
-#endif
-
-typedef struct P(bucket) {
- struct P(bucket) *son[2];
-#ifdef TREE_STORE_PARENT
- struct P(bucket) *parent;
-#endif
-#if !defined(TREE_CONSERVE_SPACE) && (defined(TREE_GIVE_EXTRA_SIZE) || defined(TREE_KEY_ENDSTRING))
- uns red_flag:1;
-#endif
- P(node) n;
-#if !defined(TREE_CONSERVE_SPACE) && !defined(TREE_GIVE_EXTRA_SIZE) && !defined(TREE_KEY_ENDSTRING)
- uns red_flag:1;
-#endif
-} P(bucket);
-
-struct P(tree) {
- uns count;
- uns height; /* of black nodes */
- P(bucket) *root;
-};
-
-typedef struct P(stack_entry) {
- P(bucket) *buck;
- uns son;
-} P(stack_entry);
-
-#define T struct P(tree)
-
-/* Preset parameters */
-
-#if defined(TREE_KEY_ATOMIC)
-
-#define TREE_KEY(x) x TREE_KEY_ATOMIC
-
-#ifndef TREE_ATOMIC_TYPE
-# define TREE_ATOMIC_TYPE int
-#endif
-#define TREE_KEY_DECL TREE_ATOMIC_TYPE TREE_KEY()
-
-#ifndef TREE_GIVE_CMP
-# define TREE_GIVE_CMP
- static inline int P(cmp) (TREE_ATOMIC_TYPE x, TREE_ATOMIC_TYPE y)
- {
- if (x < y)
- return -1;
- else if (x > y)
- return 1;
- else
- return 0;
- }
-#endif
-
-#ifndef TREE_GIVE_INIT_KEY
-# define TREE_GIVE_INIT_KEY
- static inline void P(init_key) (P(node) *n, TREE_ATOMIC_TYPE k)
- { TREE_KEY(n->) = k; }
-#endif
-
-#elif defined(TREE_KEY_STRING) || defined(TREE_KEY_ENDSTRING)
-
-#ifdef TREE_KEY_STRING
-# define TREE_KEY(x) x TREE_KEY_STRING
-# ifndef TREE_GIVE_INIT_KEY
-# define TREE_GIVE_INIT_KEY
- static inline void P(init_key) (P(node) *n, char *k)
- { TREE_KEY(n->) = k; }
-# endif
-#else
-# define TREE_KEY(x) x TREE_KEY_ENDSTRING
-# define TREE_GIVE_EXTRA_SIZE
- static inline int P(extra_size) (char *k)
- { return strlen(k); }
-# ifndef TREE_GIVE_INIT_KEY
-# define TREE_GIVE_INIT_KEY
- static inline void P(init_key) (P(node) *n, char *k)
- { strcpy(TREE_KEY(n->), k); }
-# endif
-#endif
-#define TREE_KEY_DECL char *TREE_KEY()
-
-#ifndef TREE_GIVE_CMP
-# define TREE_GIVE_CMP
- static inline int P(cmp) (char *x, char *y)
- {
-# ifdef TREE_NOCASE
- return strcasecmp(x,y);
-# else
- return strcmp(x,y);
-# endif
- }
-#endif
-
-#elif defined(TREE_KEY_COMPLEX)
-
-#define TREE_KEY(x) TREE_KEY_COMPLEX(x)
-
-#else
-#error You forgot to set the tree key type.
-#endif
-
-#ifndef TREE_CONSERVE_SPACE
- static inline uns P(red_flag) (P(bucket) *node)
- { return node->red_flag; }
- static inline void P(set_red_flag) (P(bucket) *node, uns flag)
- { node->red_flag = flag; }
- static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
- { return node->son[id]; }
- static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
- { node->son[id] = son; }
-#else
- /* Pointers are aligned, hence we can use lower bits. */
- static inline uns P(red_flag) (P(bucket) *node)
- { return ((uintptr_t) node->son[0]) & 1L; }
- static inline void P(set_red_flag) (P(bucket) *node, uns flag)
- { node->son[0] = (void*) ( (((uintptr_t) node->son[0]) & ~1L) | (flag & 1L) ); }
- static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
- { return (void *) (((uintptr_t) node->son[id]) & ~1L); }
- static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
- { node->son[id] = (void *) ((uintptr_t) son | (((uintptr_t) node->son[id]) & 1L) ); }
-#endif
-
-/* Defaults for missing parameters. */
-
-#ifndef TREE_GIVE_CMP
-#error Unable to determine how to compare two keys.
-#endif
-
-#ifdef TREE_GIVE_EXTRA_SIZE
-/* This trickery is needed to avoid `unused parameter' warnings */
-# define TREE_EXTRA_SIZE P(extra_size)
-#else
-/*
- * Beware, C macros are expanded iteratively, not recursively,
- * hence we get only a _single_ argument, although the expansion
- * of TREE_KEY contains commas.
- */
-# define TREE_EXTRA_SIZE(x) 0
-#endif
-
-#ifndef TREE_GIVE_INIT_KEY
-# error Unable to determine how to initialize keys.
-#endif
-
-#ifndef TREE_GIVE_INIT_DATA
-static inline void P(init_data) (P(node) *n UNUSED)
-{
-}
-#endif
-
-#include <stdlib.h>
-
-#ifndef TREE_GIVE_ALLOC
-# ifdef TREE_USE_POOL
- static inline void * P(alloc) (unsigned int size)
- { return mp_alloc_fast(TREE_USE_POOL, size); }
-# define TREE_SAFE_FREE(x)
-# else
- static inline void * P(alloc) (unsigned int size)
- { return xmalloc(size); }
-
- static inline void P(free) (void *x)
- { xfree(x); }
-# endif
-#endif
-
-#ifndef TREE_SAFE_FREE
-# define TREE_SAFE_FREE(x) P(free) (x)
-#endif
-
-#ifdef TREE_GLOBAL
-# define STATIC
-#else
-# define STATIC static
-#endif
-
-#ifndef TREE_MAX_DEPTH
-# define TREE_MAX_DEPTH 64
-#endif
-
-#if defined(TREE_WANT_FIND_NEXT) && !defined(TREE_DUPLICATES)
-# define TREE_DUPLICATES
-#endif
-
-#ifdef TREE_WANT_LOOKUP
-#ifndef TREE_WANT_FIND
-# define TREE_WANT_FIND
-#endif
-#ifndef TREE_WANT_NEW
-# define TREE_WANT_NEW
-#endif
-#endif
-
-/* Now the operations */
-
-STATIC void P(init) (T *t)
-{
- t->count = t->height = 0;
- t->root = NULL;
-}
-
-#ifdef TREE_WANT_CLEANUP
-static void P(cleanup_subtree) (T *t, P(bucket) *node)
-{
- if (!node)
- return;
- P(cleanup_subtree) (t, P(tree_son) (node, 0));
- P(cleanup_subtree) (t, P(tree_son) (node, 1));
- P(free) (node);
- t->count--;
-}
-
-STATIC void P(cleanup) (T *t)
-{
- P(cleanup_subtree) (t, t->root);
- ASSERT(!t->count);
- t->height = 0;
-}
-#endif
-
-static uns P(fill_stack) (P(stack_entry) *stack, uns max_depth, P(bucket) *node, TREE_KEY_DECL, uns son_id UNUSED)
-{
- uns i;
- stack[0].buck = node;
- for (i=0; stack[i].buck; i++)
- {
- int cmp;
- cmp = P(cmp) (TREE_KEY(), TREE_KEY(stack[i].buck->n.));
- if (cmp == 0)
- break;
- else if (cmp < 0)
- stack[i].son = 0;
- else
- stack[i].son = 1;
- ASSERT(i+1 < max_depth);
- stack[i+1].buck = P(tree_son) (stack[i].buck, stack[i].son);
- }
-#ifdef TREE_DUPLICATES
- if (stack[i].buck)
- {
- uns idx;
- /* Find first/last of equal keys according to son_id. */
- idx = P(fill_stack) (stack+i+1, max_depth-i-1,
- P(tree_son) (stack[i].buck, son_id), TREE_KEY(), son_id);
- if (stack[i+1+idx].buck)
- {
- stack[i].son = son_id;
- i = i+1+idx;
- }
- }
-#endif
- stack[i].son = 10;
- return i;
-}
-
-#ifdef TREE_WANT_FIND
-STATIC P(node) * P(find) (T *t, TREE_KEY_DECL)
-{
- P(stack_entry) stack[TREE_MAX_DEPTH];
- uns depth;
- depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
- return stack[depth].buck ? &stack[depth].buck->n : NULL;
-}
-#endif
-
-#ifdef TREE_WANT_SEARCH_DOWN
-STATIC P(node) * P(search_down) (T *t, TREE_KEY_DECL)
-{
- P(node) *last_right=NULL;
- P(bucket) *node=t->root;
- while(node)
- {
- int cmp;
- cmp = P(cmp) (TREE_KEY(), TREE_KEY(node->n.));
- if (cmp == 0)
- return &node->n;
- else if (cmp < 0)
- node=P(tree_son) (node, 0);
- else
- {
- last_right=&node->n;
- node=P(tree_son) (node, 1);
- }
- }
- return last_right;
-}
-#endif
-
-#ifdef TREE_WANT_BOUNDARY
-STATIC P(node) * P(boundary) (T *t, uns direction)
-{
- P(bucket) *n = t->root, *ns;
- if (!n)
- return NULL;
- else
- {
- uns son = !!direction;
- while ((ns = P(tree_son) (n, son)))
- n = ns;
- return &n->n;
- }
-}
-#endif
-
-#ifdef TREE_STORE_PARENT
-STATIC P(node) * P(adjacent) (P(node) *start, uns direction)
-{
- P(bucket) *node = SKIP_BACK(P(bucket), n, start);
- P(bucket) *next = P(tree_son) (node, direction);
- if (next)
- {
- while (1)
- {
- node = P(tree_son) (next, 1 - direction);
- if (!node)
- break;
- next = node;
- }
- }
- else
- {
- next = node->parent;
- while (next && node == P(tree_son) (next, direction))
- {
- node = next;
- next = node->parent;
- }
- if (!next)
- return NULL;
- ASSERT(node == P(tree_son) (next, 1 - direction));
- }
- return &next->n;
-}
-#endif
-
-#if defined(TREE_DUPLICATES) || defined(TREE_WANT_DELETE) || defined(TREE_WANT_REMOVE)
-static int P(find_next_node) (P(stack_entry) *stack, uns max_depth, uns direction)
-{
- uns depth = 0;
- if (stack[0].buck)
- {
- ASSERT(depth+1 < max_depth);
- stack[depth].son = direction;
- stack[depth+1].buck = P(tree_son) (stack[depth].buck, direction);
- depth++;
- while (stack[depth].buck)
- {
- ASSERT(depth+1 < max_depth);
- stack[depth].son = 1 - direction;
- stack[depth+1].buck = P(tree_son) (stack[depth].buck, 1 - direction);
- depth++;
- }
- }
- return depth;
-}
-#endif
-
-#ifdef TREE_WANT_FIND_NEXT
-STATIC P(node) * P(find_next) (P(node) *start)
-{
- P(node) *next = P(adjacent) (start, 1);
- if (next && P(cmp) (TREE_KEY(start->), TREE_KEY(next->)) == 0)
- return next;
- else
- return NULL;
-
-}
-#endif
-
-#ifdef TREE_WANT_SEARCH
-STATIC P(node) * P(search) (T *t, TREE_KEY_DECL)
-{
- P(stack_entry) stack[TREE_MAX_DEPTH];
- uns depth;
- depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
- if (!stack[depth].buck)
- {
- if (depth > 0)
- depth--;
- else
- return NULL;
- }
- return &stack[depth].buck->n;
-}
-#endif
-
-#if 0
-#define TREE_TRACE(txt...) do { printf(txt); fflush(stdout); } while (0)
-#else
-#define TREE_TRACE(txt...)
-#endif
-
-static inline P(bucket) * P(rotation) (P(bucket) *node, uns son_id)
-{
- /* Destroys red_flag's in node, son. Returns new root. */
- P(bucket) *son = P(tree_son) (node, son_id);
- TREE_TRACE("Rotation (node %d, son %d), direction %d\n", node->n.key, son->n.key, son_id);
- node->son[son_id] = P(tree_son) (son, 1-son_id);
- son->son[1-son_id] = node;
-#ifdef TREE_STORE_PARENT
- if (node->son[son_id])
- node->son[son_id]->parent = node;
- son->parent = node->parent;
- node->parent = son;
-#endif
- return son;
-}
-
-static void P(rotate_after_insert) (T *t, P(stack_entry) *stack, uns depth)
-{
- P(bucket) *node;
- P(bucket) *parent, *grand, *uncle;
- int s1, s2;
-try_it_again:
- node = stack[depth].buck;
- ASSERT(P(red_flag) (node));
- /* At this moment, node became red. The paths sum have
- * been preserved, but we have to check the parental
- * condition. */
- if (depth == 0)
- {
- ASSERT(t->root == node);
- return;
- }
- parent = stack[depth-1].buck;
- if (!P(red_flag) (parent))
- return;
- if (depth == 1)
- {
- ASSERT(t->root == parent);
- P(set_red_flag) (parent, 0);
- t->height++;
- return;
- }
- grand = stack[depth-2].buck;
- ASSERT(!P(red_flag) (grand));
- /* The parent is also red, the grandparent exists and it
- * is black. */
- s1 = stack[depth-1].son;
- s2 = stack[depth-2].son;
- uncle = P(tree_son) (grand, 1-s2);
- if (uncle && P(red_flag) (uncle))
- {
- /* Red parent and uncle, black grandparent.
- * Exchange and try another iteration. */
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (uncle, 0);
- P(set_red_flag) (grand, 1);
- depth -= 2;
- TREE_TRACE("Swapping colours (parent %d, uncle %d, grand %d), passing thru\n", parent->n.key, uncle->n.key, grand->n.key);
- goto try_it_again;
- }
- /* Black uncle and grandparent, we need to rotate. Test
- * the direction. */
- if (s1 == s2)
- {
- node = P(rotation) (grand, s2);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (grand, 1);
- }
- else
- {
- grand->son[s2] = P(rotation) (parent, s1);
- node = P(rotation) (grand, s2);
- P(set_red_flag) (grand, 1);
- P(set_red_flag) (parent, 1);
- P(set_red_flag) (node, 0);
- }
- if (depth >= 3)
- P(set_tree_son) (stack[depth-3].buck, stack[depth-3].son, node);
- else
- t->root = node;
-}
-
-#ifdef TREE_WANT_NEW
-STATIC P(node) * P(new) (T *t, TREE_KEY_DECL)
-{
- P(stack_entry) stack[TREE_MAX_DEPTH];
- P(bucket) *added;
- uns depth;
- depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
-#ifdef TREE_DUPLICATES
- /* It is the last found value, hence everything in the right subtree is
- * strongly _bigger_. */
- depth += P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
-#endif
- ASSERT(!stack[depth].buck);
- /* We are in a leaf, hence we can easily append a new leaf to it. */
- added = P(alloc) (sizeof(struct P(bucket)) + TREE_EXTRA_SIZE(TREE_KEY()) );
- added->son[0] = added->son[1] = NULL;
- stack[depth].buck = added;
- if (depth > 0)
- {
-#ifdef TREE_STORE_PARENT
- added->parent = stack[depth-1].buck;
-#endif
- P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, added);
- }
- else
- {
-#ifdef TREE_STORE_PARENT
- added->parent = NULL;
-#endif
- t->root = added;
- }
- P(set_red_flag) (added, 1); /* Set it red to not disturb the path sum. */
- P(init_key) (&added->n, TREE_KEY());
- P(init_data) (&added->n);
- t->count++;
- /* Let us reorganize the red_flag's and the structure of the tree. */
- P(rotate_after_insert) (t, stack, depth);
- return &added->n;
-}
-#endif
-
-#ifdef TREE_WANT_LOOKUP
-STATIC P(node) * P(lookup) (T *t, TREE_KEY_DECL)
-{
- P(node) *node;
- node = P(find) (t, TREE_KEY());
- if (node)
- return node;
- return P(new) (t, TREE_KEY());
-}
-#endif
-
-#if defined(TREE_WANT_REMOVE) || defined(TREE_WANT_DELETE)
-static void P(rotate_after_delete) (T *t, P(stack_entry) *stack, int depth)
-{
- uns iteration = 0;
- P(bucket) *parent, *sibling, *instead;
- uns parent_red, del_son, sibl_red;
-missing_black:
- if (depth < 0)
- {
- t->height--;
- return;
- }
- parent = stack[depth].buck;
- parent_red = P(red_flag) (parent);
- del_son = stack[depth].son;
- /* For the 1st iteration: we have deleted parent->son[del_son], which
- * was a black node with no son. Hence there is one mising black
- * vertex in that path, which we are going to fix now.
- *
- * For other iterations: in that path, there is also missing a black
- * node. */
- if (!iteration)
- ASSERT(!P(tree_son) (parent, del_son));
- sibling = P(tree_son) (parent, 1-del_son);
- ASSERT(sibling);
- sibl_red = P(red_flag) (sibling);
- instead = NULL;
- if (!sibl_red)
- {
- P(bucket) *son[2];
- uns red[2];
- son[0] = P(tree_son) (sibling, 0);
- son[1] = P(tree_son) (sibling, 1);
- red[0] = son[0] ? P(red_flag) (son[0]) : 0;
- red[1] = son[1] ? P(red_flag) (son[1]) : 0;
- if (!red[0] && !red[1])
- {
- P(set_red_flag) (sibling, 1);
- P(set_red_flag) (parent, 0);
- if (parent_red)
- return;
- else
- {
- depth--;
- iteration++;
- TREE_TRACE("Swapping colours (parent %d, sibling %d), passing thru\n", parent->n.key, sibling->n.key);
- goto missing_black;
- }
- } else if (!red[del_son])
- {
- instead = P(rotation) (parent, 1-del_son);
- P(set_red_flag) (instead, parent_red);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (son[1-del_son], 0);
- } else /* red[del_son] */
- {
- parent->son[1-del_son] = P(rotation) (sibling, del_son);
- instead = P(rotation) (parent, 1-del_son);
- P(set_red_flag) (instead, parent_red);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (sibling, 0);
- }
- } else /* sibl_red */
- {
- P(bucket) *grand[2], *son;
- uns red[2];
- ASSERT(!parent_red);
- son = P(tree_son) (sibling, del_son);
- ASSERT(son && !P(red_flag) (son));
- grand[0] = P(tree_son) (son, 0);
- grand[1] = P(tree_son) (son, 1);
- red[0] = grand[0] ? P(red_flag) (grand[0]) : 0;
- red[1] = grand[1] ? P(red_flag) (grand[1]) : 0;
- if (!red[0] && !red[1])
- {
- instead = P(rotation) (parent, 1-del_son);
- P(set_red_flag) (instead, 0);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (son, 1);
- }
- else if (!red[del_son])
- {
- parent->son[1-del_son] = P(rotation) (sibling, del_son);
- instead = P(rotation) (parent, 1-del_son);
- P(set_red_flag) (instead, 0);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (sibling, 1);
- P(set_red_flag) (grand[1-del_son], 0);
- } else /* red[del_son] */
- {
- sibling->son[del_son] = P(rotation) (son, del_son);
- parent->son[1-del_son] = P(rotation) (sibling, del_son);
- instead = P(rotation) (parent, 1-del_son);
- P(set_red_flag) (instead, 0);
- P(set_red_flag) (parent, 0);
- P(set_red_flag) (sibling, 1);
- P(set_red_flag) (son, 0);
- }
- }
- /* We have performed all desired rotations and need to store the new
- * pointer to the subtree. */
- ASSERT(instead);
- if (depth > 0)
- P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, instead);
- else
- t->root = instead;
-}
-
-static void P(remove_by_stack) (T *t, P(stack_entry) *stack, uns depth)
-{
- P(bucket) *node = stack[depth].buck;
- P(bucket) *son;
- uns i;
- for (i=0; i<depth; i++)
- ASSERT(P(tree_son) (stack[i].buck, stack[i].son) == stack[i+1].buck);
- if (P(tree_son) (node, 0) && P(tree_son) (node, 1))
- {
- P(bucket) *xchg;
- uns flag_node, flag_xchg;
- uns d = P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
-
- ASSERT(d >= 2);
- d--;
- xchg = stack[depth+d].buck;
- flag_node = P(red_flag) (node);
- flag_xchg = P(red_flag) (xchg);
- ASSERT(!P(tree_son) (xchg, 0));
- son = P(tree_son) (xchg, 1);
- stack[depth].buck = xchg; /* Magic iff d == 1. */
- stack[depth+d].buck = node;
- xchg->son[0] = P(tree_son) (node, 0);
- xchg->son[1] = P(tree_son) (node, 1);
- if (depth > 0)
- P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, xchg);
- else
- t->root = xchg;
- node->son[0] = NULL;
- node->son[1] = son;
- P(set_tree_son) (stack[depth+d-1].buck, stack[depth+d-1].son, node);
-#ifdef TREE_STORE_PARENT
- xchg->parent = depth > 0 ? stack[depth-1].buck : NULL;
- xchg->son[0]->parent = xchg;
- xchg->son[1]->parent = xchg;
- node->parent = stack[depth+d-1].buck;
- if (son)
- son->parent = node;
-#endif
- P(set_red_flag) (xchg, flag_node);
- P(set_red_flag) (node, flag_xchg);
- depth += d;
- }
- else if (P(tree_son) (node, 0))
- son = P(tree_son) (node, 0);
- else
- son = P(tree_son) (node, 1);
- /* At this moment, stack[depth].buck == node and it has at most one son
- * and it is stored in the variable son. */
- t->count--;
- if (depth > 0)
- {
- P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, son);
-#ifdef TREE_STORE_PARENT
- if (son)
- son->parent = stack[depth-1].buck;
-#endif
- }
- else
- {
- t->root = son;
-#ifdef TREE_STORE_PARENT
- if (son)
- son->parent = NULL;
-#endif
- }
- if (P(red_flag) (node))
- {
- ASSERT(!son);
- return;
- }
- TREE_SAFE_FREE(node);
- /* We have deleted a black node. */
- if (son)
- {
- ASSERT(P(red_flag) (son));
- P(set_red_flag) (son, 0);
- return;
- }
- P(rotate_after_delete) (t, stack, (int) depth - 1);
-}
-#endif
-
-#ifdef TREE_WANT_REMOVE
-STATIC void P(remove) (T *t, P(node) *Node)
-{
- P(stack_entry) stack[TREE_MAX_DEPTH];
- P(bucket) *node = SKIP_BACK(P(bucket), n, Node);
- uns depth = 0, i;
- stack[0].buck = node;
- stack[0].son = 10;
- while (node->parent)
- {
- depth++;
- ASSERT(depth < TREE_MAX_DEPTH);
- stack[depth].buck = node->parent;
- stack[depth].son = P(tree_son) (node->parent, 0) == node ? 0 : 1;
- node = node->parent;
- }
- for (i=0; i<(depth+1)/2; i++)
- {
- P(stack_entry) tmp = stack[i];
- stack[i] = stack[depth-i];
- stack[depth-i] = tmp;
- }
- P(remove_by_stack) (t, stack, depth);
-}
-#endif
-
-#ifdef TREE_WANT_DELETE
-STATIC int P(delete) (T *t, TREE_KEY_DECL)
-{
- P(stack_entry) stack[TREE_MAX_DEPTH];
- uns depth;
- depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
- if (stack[depth].buck)
- {
- P(remove_by_stack) (t, stack, depth);
- return 1;
- }
- else
- return 0;
-}
-#endif
-
-#ifdef TREE_WANT_DUMP
-static void P(dump_subtree) (struct fastbuf *fb, T *t, P(bucket) *node, P(bucket) *parent, int cmp_res, int level, uns black)
-{
- uns flag;
- int i;
- if (!node)
- {
- ASSERT(black == t->height);
- return;
- }
- flag = P(red_flag) (node);
-#ifdef TREE_STORE_PARENT
- ASSERT(node->parent == parent);
-#endif
- if (parent)
- {
- ASSERT(!flag || !P(red_flag) (parent));
- cmp_res *= P(cmp) (TREE_KEY(node->n.), TREE_KEY(parent->n.));
-#ifdef TREE_DUPLICATES
- ASSERT(cmp_res >= 0);
-#else
- ASSERT(cmp_res > 0);
-#endif
- }
- P(dump_subtree) (fb, t, P(tree_son) (node, 0), node, -1, level+1, black + (1-flag));
- if (fb)
- {
- char tmp[20];
- for (i=0; i<level; i++)
- bputs(fb, " ");
- sprintf(tmp, "L%d %c\t", level, flag ? 'R' : 'B');
- bputs(fb, tmp);
- P(dump_key) (fb, &node->n);
- P(dump_data) (fb, &node->n);
- bputs(fb, "\n");
- }
- P(dump_subtree) (fb, t, P(tree_son) (node, 1), node, +1, level+1, black + (1-flag));
-}
-
-STATIC void P(dump) (struct fastbuf *fb, T *t)
-{
- if (fb)
- {
- char tmp[50];
- sprintf(tmp, "Tree of %d nodes and height %d\n", t->count, t->height);
- bputs(fb, tmp);
- }
- P(dump_subtree) (fb, t, t->root, NULL, 0, 0, 0);
- if (fb)
- {
- bputs(fb, "\n");
- bflush(fb);
- }
-}
-#endif
-
-/* And the iterator */
-
-#ifdef TREE_WANT_ITERATOR
-static P(node) * P(first_node) (T *t, uns direction)
-{
- P(bucket) *node = t->root, *prev = NULL;
- while (node)
- {
- prev = node;
- node = P(tree_son) (node, direction);
- }
- return prev ? &prev->n : NULL;
-}
-
-#ifndef TREE_FOR_ALL
-
-#define TREE_FOR_ALL(t_px, t_ptr, t_var) \
-do \
-{ \
- GLUE_(t_px,node) *t_var = GLUE_(t_px,first_node)(t_ptr, 0); \
- for (; t_var; t_var = GLUE_(t_px,adjacent)(t_var, 1)) \
- {
-#define TREE_END_FOR } } while(0)
-#define TREE_BREAK break
-#define TREE_CONTINUE continue
-
-#endif
-#endif
-
-/* Finally, undefine all the parameters */
-
-#undef P
-#undef T
-
-#undef TREE_NODE
-#undef TREE_PREFIX
-#undef TREE_KEY_ATOMIC
-#undef TREE_KEY_STRING
-#undef TREE_KEY_ENDSTRING
-#undef TREE_KEY_COMPLEX
-#undef TREE_KEY_DECL
-#undef TREE_WANT_CLEANUP
-#undef TREE_WANT_FIND
-#undef TREE_WANT_FIND_NEXT
-#undef TREE_WANT_SEARCH
-#undef TREE_WANT_SEARCH_DOWN
-#undef TREE_WANT_BOUNDARY
-#undef TREE_WANT_ADJACENT
-#undef TREE_WANT_NEW
-#undef TREE_WANT_LOOKUP
-#undef TREE_WANT_DELETE
-#undef TREE_WANT_REMOVE
-#undef TREE_WANT_DUMP
-#undef TREE_WANT_ITERATOR
-#undef TREE_GIVE_CMP
-#undef TREE_GIVE_EXTRA_SIZE
-#undef TREE_GIVE_INIT_KEY
-#undef TREE_GIVE_INIT_DATA
-#undef TREE_GIVE_ALLOC
-#undef TREE_NOCASE
-#undef TREE_ATOMIC_TYPE
-#undef TREE_USE_POOL
-#undef TREE_STATIC
-#undef TREE_CONSERVE_SPACE
-#undef TREE_DUPLICATES
-#undef TREE_MAX_DEPTH
-#undef TREE_STORE_PARENT
-#undef TREE_KEY
-#undef TREE_EXTRA_SIZE
-#undef TREE_SAFE_FREE
-#undef TREE_TRACE
-#undef STATIC
+++ /dev/null
-/*
- * UCW Library -- Interface to Regular Expression Libraries
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2001 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-#include "lib/hashfunc.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX)
-
-/* POSIX regular expression library */
-
-#ifdef CONFIG_OWN_REGEX
-#include "lib/regex/regex-sh.h"
-#else
-#include <regex.h>
-#endif
-
-struct regex {
- regex_t rx;
- regmatch_t matches[10];
-};
-
-regex *
-rx_compile(const char *p, int icase)
-{
- regex *r = xmalloc_zero(sizeof(regex));
-
- int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
- if (err)
- {
- char msg[256];
- regerror(err, &r->rx, msg, sizeof(msg)-1);
- /* regfree(&r->rx) not needed */
- die("Error parsing regular expression `%s': %s", p, msg);
- }
- return r;
-}
-
-void
-rx_free(regex *r)
-{
- regfree(&r->rx);
- xfree(r);
-}
-
-int
-rx_match(regex *r, const char *s)
-{
- int err = regexec(&r->rx, s, 10, r->matches, 0);
- if (!err)
- {
- /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */
- return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]);
- }
- else if (err == REG_NOMATCH)
- return 0;
- else if (err == REG_ESPACE)
- die("Regex matching ran out of memory");
- else
- die("Regex matching failed with unknown error %d", err);
-}
-
-int
-rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
-{
- char *end = dest + destlen - 1;
-
- if (!rx_match(r, src))
- return 0;
-
- while (*by)
- {
- if (*by == '\\')
- {
- by++;
- if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
- {
- uns j = *by++ - '0';
- if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
- {
- const char *s = src + r->matches[j].rm_so;
- uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
- if (dest + i >= end)
- return -1;
- memcpy(dest, s, i);
- dest += i;
- continue;
- }
- }
- }
- if (dest < end)
- *dest++ = *by++;
- else
- return -1;
- }
- *dest = 0;
- return 1;
-}
-
-#elif defined(CONFIG_PCRE)
-
-/* PCRE library */
-
-#include <pcre.h>
-
-struct regex {
- pcre *rx;
- pcre_extra *extra;
- uns match_array_size;
- uns real_matches;
- int matches[0]; /* (max_matches+1) pairs (pos,len) plus some workspace */
-};
-
-regex *
-rx_compile(const char *p, int icase)
-{
- const char *err;
- int errpos, match_array_size, eno;
-
- pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL);
- if (!rx)
- die("Error parsing regular expression `%s': %s at position %d", p, err, errpos);
- eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size);
- if (eno)
- die("Internal error: pcre_fullinfo() failed with error %d", eno);
- match_array_size = 3*(match_array_size+1);
- regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int));
- r->rx = rx;
- r->match_array_size = match_array_size;
- r->extra = pcre_study(r->rx, 0, &err);
- if (err)
- die("Error studying regular expression `%s': %s", p, err);
- return r;
-}
-
-void
-rx_free(regex *r)
-{
- xfree(r->rx);
- xfree(r->extra);
- xfree(r);
-}
-
-int
-rx_match(regex *r, const char *s)
-{
- int len = str_len(s);
- int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
- if (err >= 0)
- {
- r->real_matches = err;
- /* need to check that the full string matches */
- return !(r->matches[0] || s[r->matches[1]]);
- }
- else if (err == PCRE_ERROR_NOMATCH)
- return 0;
- else if (err == PCRE_ERROR_NOMEMORY)
- die("Regex matching ran out of memory");
- else
- die("Regex matching failed with unknown error %d", err);
-}
-
-int
-rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
-{
- char *end = dest + destlen - 1;
-
- if (!rx_match(r, src))
- return 0;
-
- while (*by)
- {
- if (*by == '\\')
- {
- by++;
- if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
- {
- uns j = *by++ - '0';
- if (j < r->real_matches && r->matches[2*j] >= 0)
- {
- char *s = src + r->matches[2*j];
- uns i = r->matches[2*j+1] - r->matches[2*j];
- if (dest + i >= end)
- return -1;
- memcpy(dest, s, i);
- dest += i;
- continue;
- }
- }
- }
- if (dest < end)
- *dest++ = *by++;
- else
- return -1;
- }
- *dest = 0;
- return 1;
-}
-
-#else
-
-/* BSD regular expression library */
-
-#ifdef CONFIG_OWN_BSD_REGEX
-#include "lib/regex/regex-sh.h"
-#else
-#include <regex.h>
-#endif
-
-#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */
-#define CHAR_SET_SIZE 256 /* How many characters in the character set. */
-
-struct regex {
- struct re_pattern_buffer buf;
- struct re_registers regs; /* Must not change between re_match() calls */
- int len_cache;
-};
-
-regex *
-rx_compile(const char *p, int icase)
-{
- regex *r = xmalloc_zero(sizeof(regex));
- const char *msg;
-
- r->buf.buffer = xmalloc(INITIAL_MEM);
- r->buf.allocated = INITIAL_MEM;
- if (icase)
- {
- unsigned i;
- r->buf.translate = xmalloc (CHAR_SET_SIZE);
- /* Map uppercase characters to corresponding lowercase ones. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- r->buf.translate[i] = Cupcase(i);
- }
- else
- r->buf.translate = NULL;
- re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
- msg = re_compile_pattern(p, strlen(p), &r->buf);
- if (!msg)
- return r;
- die("Error parsing pattern `%s': %s", p, msg);
-}
-
-void
-rx_free(regex *r)
-{
- xfree(r->buf.buffer);
- if (r->buf.translate)
- xfree(r->buf.translate);
- xfree(r);
-}
-
-int
-rx_match(regex *r, const char *s)
-{
- int len = strlen(s);
-
- r->len_cache = len;
- if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
- return 0;
- if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
- return 0;
- return 1;
-}
-
-int
-rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
-{
- char *end = dest + destlen - 1;
-
- if (!rx_match(r, src))
- return 0;
-
- while (*by)
- {
- if (*by == '\\')
- {
- by++;
- if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
- {
- uns j = *by++ - '0';
- if (j < r->regs.num_regs)
- {
- const char *s = src + r->regs.start[j];
- uns i = r->regs.end[j] - r->regs.start[j];
- if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
- return -1;
- if (dest + i >= end)
- return -1;
- memcpy(dest, s, i);
- dest += i;
- continue;
- }
- }
- }
- if (dest < end)
- *dest++ = *by++;
- else
- return -1;
- }
- *dest = 0;
- return 1;
-}
-
-#endif
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- regex *r;
- char buf1[4096], buf2[4096];
- int opt_i = 0;
-
- if (!strcmp(argv[1], "-i"))
- {
- opt_i = 1;
- argv++;
- argc--;
- }
- r = rx_compile(argv[1], opt_i);
- while (fgets(buf1, sizeof(buf1), stdin))
- {
- char *p = strchr(buf1, '\n');
- if (p)
- *p = 0;
- if (argc == 2)
- {
- if (rx_match(r, buf1))
- puts("MATCH");
- else
- puts("NO MATCH");
- }
- else
- {
- int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2));
- if (i < 0)
- puts("OVERFLOW");
- else if (!i)
- puts("NO MATCH");
- else
- puts(buf2);
- }
- }
- rx_free(r);
-}
-
-#endif
+++ /dev/null
-# Tests for the regex module
-
-Run: ../obj/lib/regex-t 'a.*b.*c'
-In: abc
- ajkhkbbbbbc
- Aabc
-Out: MATCH
- MATCH
- NO MATCH
-
-Run: ../obj/lib/regex-t -i 'a.*b.*c'
-In: aBc
- ajkhkbBBBBC
- Aabc
-Out: MATCH
- MATCH
- MATCH
-
-Run: ../obj/lib/regex-t -i '(ahoj|nebo)'
-In: Ahoj
- nEBo
- ahoja
- (ahoj|nebo)
-Out: MATCH
- MATCH
- NO MATCH
- NO MATCH
-
-Run: ../obj/lib/regex-t '\(ahoj\)'
-In: (ahoj)
- ahoj
-Out: MATCH
- NO MATCH
-
-Run: ../obj/lib/regex-t '(.*b)*'
-In: ababababab
- ababababababababababababababababababababababababababababa
-Out: MATCH
- NO MATCH
-
-Run: ../obj/lib/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
-In: aaabbb
- aabbccb
- abcabc
- aaccbb
-Out: a<aabb>b
- aabb<>b
- NO MATCH
- aa<>bb
-
-Run: ../obj/lib/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
-In: /nemecky/ubytovani/hotel.php?sort=&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3
- /test...?f=1&s=3&sid=123&q=3&
-Out: NO MATCH
- MATCH
-
-Run: ../obj/lib/regex-t '.*[0-9a-f]{8,16}.*'
-In: abcdabcdabcd
- aaaaaaaaaaaaaaaaaaaaaaaaaaaa
- asddajlkdkajlqwepoiequwiouio
- 000001111p101010101010q12032
-Out: MATCH
- MATCH
- NO MATCH
- MATCH
+++ /dev/null
-# Makefile for the UCW Regex Library (c) 2004 Martin Mares <mj@ucw.cz>
-
-DIRS+=lib/regex
-
-LIBUCW_MODS+=regex/regex
-
-$(o)/lib/regex/regex.o $(o)/lib/regex/regex.oo: CWARNS=
+++ /dev/null
-This directory contains regular expression routines from the GNU libc 2.3.2
-which are significantly faster than the default regex libraries on most systems.
-
-They are distributed under the GNU LGPL.
-
-All files are exact copies of the original distribution, I only provided my
-own regex.c, regex-sh.h and Makefile.
-
- Martin Mares, March 2004
+++ /dev/null
-/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
- int length, reg_syntax_t syntax);
-static void re_compile_fastmap_iter (regex_t *bufp,
- const re_dfastate_t *init_state,
- char *fastmap);
-static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
-static reg_errcode_t init_word_char (re_dfa_t *dfa);
-#ifdef RE_ENABLE_I18N
-static void free_charset (re_charset_t *cset);
-#endif /* RE_ENABLE_I18N */
-static void free_workarea_compile (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-static reg_errcode_t analyze (re_dfa_t *dfa);
-static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
-static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
-static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
-static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
-static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
- int top_clone_node, int root_node,
- unsigned int constraint);
-static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
- unsigned int constraint);
-static int search_duplicated_node (re_dfa_t *dfa, int org_node,
- unsigned int constraint);
-static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
-static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
- int node, int root);
-static void calc_inveclosure (re_dfa_t *dfa);
-static int fetch_number (re_string_t *input, re_token_t *token,
- reg_syntax_t syntax);
-static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax);
-static int peek_token (re_token_t *token, re_string_t *input,
- reg_syntax_t syntax);
-static int peek_token_bracket (re_token_t *token, re_string_t *input,
- reg_syntax_t syntax);
-static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
- reg_syntax_t syntax, reg_errcode_t *err);
-static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
- re_token_t *token, reg_syntax_t syntax,
- int nest, reg_errcode_t *err);
-static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
- re_token_t *token, reg_syntax_t syntax,
- int nest, reg_errcode_t *err);
-static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
- re_token_t *token, reg_syntax_t syntax,
- int nest, reg_errcode_t *err);
-static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
- re_token_t *token, reg_syntax_t syntax,
- int nest, reg_errcode_t *err);
-static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
- re_dfa_t *dfa, re_token_t *token,
- reg_syntax_t syntax, reg_errcode_t *err);
-static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
- re_token_t *token, reg_syntax_t syntax,
- reg_errcode_t *err);
-static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
- re_string_t *regexp,
- re_token_t *token, int token_len,
- re_dfa_t *dfa,
- reg_syntax_t syntax);
-static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
- re_string_t *regexp,
- re_token_t *token);
-#ifndef _LIBC
-# ifdef RE_ENABLE_I18N
-static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem,
- bracket_elem_t *end_elem);
-static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset,
- int *coll_sym_alloc,
- const unsigned char *name);
-# else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
- bracket_elem_t *start_elem,
- bracket_elem_t *end_elem);
-static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
- const unsigned char *name);
-# endif /* not RE_ENABLE_I18N */
-#endif /* not _LIBC */
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset,
- int *equiv_class_alloc,
- const unsigned char *name);
-static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset,
- int *char_class_alloc,
- const unsigned char *class_name,
- reg_syntax_t syntax);
-#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
- const unsigned char *name);
-static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
- const unsigned char *class_name,
- reg_syntax_t syntax);
-#endif /* not RE_ENABLE_I18N */
-static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err);
-static void free_bin_tree (bin_tree_t *tree);
-static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
- re_token_type_t type, int index);
-static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
-\f
-/* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there.
- POSIX doesn't require that we do anything for REG_NOERROR,
- but why not be nice? */
-
-const char __re_error_msgid[] attribute_hidden =
- {
-#define REG_NOERROR_IDX 0
- gettext_noop ("Success") /* REG_NOERROR */
- "\0"
-#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
- gettext_noop ("No match") /* REG_NOMATCH */
- "\0"
-#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
- gettext_noop ("Invalid regular expression") /* REG_BADPAT */
- "\0"
-#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
- gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
- "\0"
-#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
- gettext_noop ("Invalid character class name") /* REG_ECTYPE */
- "\0"
-#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
- gettext_noop ("Trailing backslash") /* REG_EESCAPE */
- "\0"
-#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
- gettext_noop ("Invalid back reference") /* REG_ESUBREG */
- "\0"
-#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
- gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
- "\0"
-#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
- gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
- "\0"
-#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
- gettext_noop ("Unmatched \\{") /* REG_EBRACE */
- "\0"
-#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
- gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
- "\0"
-#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
- gettext_noop ("Invalid range end") /* REG_ERANGE */
- "\0"
-#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
- gettext_noop ("Memory exhausted") /* REG_ESPACE */
- "\0"
-#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
- gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
- "\0"
-#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
- gettext_noop ("Premature end of regular expression") /* REG_EEND */
- "\0"
-#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
- gettext_noop ("Regular expression too big") /* REG_ESIZE */
- "\0"
-#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
- gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
- };
-
-const size_t __re_error_msgid_idx[] attribute_hidden =
- {
- REG_NOERROR_IDX,
- REG_NOMATCH_IDX,
- REG_BADPAT_IDX,
- REG_ECOLLATE_IDX,
- REG_ECTYPE_IDX,
- REG_EESCAPE_IDX,
- REG_ESUBREG_IDX,
- REG_EBRACK_IDX,
- REG_EPAREN_IDX,
- REG_EBRACE_IDX,
- REG_BADBR_IDX,
- REG_ERANGE_IDX,
- REG_ESPACE_IDX,
- REG_BADRPT_IDX,
- REG_EEND_IDX,
- REG_ESIZE_IDX,
- REG_ERPAREN_IDX
- };
-\f
-/* Entry points for GNU code. */
-
-/* re_compile_pattern is the GNU regular expression compiler: it
- compiles PATTERN (of length LENGTH) and puts the result in BUFP.
- Returns 0 if the pattern was valid, otherwise an error string.
-
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
- are set in BUFP on entry. */
-
-const char *
-re_compile_pattern (pattern, length, bufp)
- const char *pattern;
- size_t length;
- struct re_pattern_buffer *bufp;
-{
- reg_errcode_t ret;
-
- /* And GNU code determines whether or not to get register information
- by passing null for the REGS argument to re_match, etc., not by
- setting no_sub. */
- bufp->no_sub = 0;
-
- /* Match anchors at newline. */
- bufp->newline_anchor = 1;
-
- ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
-
- if (!ret)
- return NULL;
- return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
-}
-#ifdef _LIBC
-weak_alias (__re_compile_pattern, re_compile_pattern)
-#endif
-
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
-/* This has no initializer because initialized variables in Emacs
- become read-only after dumping. */
-reg_syntax_t re_syntax_options;
-
-
-/* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
-
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex.h. We return the old syntax. */
-
-reg_syntax_t
-re_set_syntax (syntax)
- reg_syntax_t syntax;
-{
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
-}
-#ifdef _LIBC
-weak_alias (__re_set_syntax, re_set_syntax)
-#endif
-
-int
-re_compile_fastmap (bufp)
- struct re_pattern_buffer *bufp;
-{
- re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
- char *fastmap = bufp->fastmap;
-
- memset (fastmap, '\0', sizeof (char) * SBC_MAX);
- re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
- if (dfa->init_state != dfa->init_state_word)
- re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
- if (dfa->init_state != dfa->init_state_nl)
- re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
- if (dfa->init_state != dfa->init_state_begbuf)
- re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
- bufp->fastmap_accurate = 1;
- return 0;
-}
-#ifdef _LIBC
-weak_alias (__re_compile_fastmap, re_compile_fastmap)
-#endif
-
-static inline void
-re_set_fastmap (char *fastmap, int icase, int ch)
-{
- fastmap[ch] = 1;
- if (icase)
- fastmap[tolower (ch)] = 1;
-}
-
-/* Helper function for re_compile_fastmap.
- Compile fastmap for the initial_state INIT_STATE. */
-
-static void
-re_compile_fastmap_iter (bufp, init_state, fastmap)
- regex_t *bufp;
- const re_dfastate_t *init_state;
- char *fastmap;
-{
- re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
- int node_cnt;
- int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE));
- for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
- {
- int node = init_state->nodes.elems[node_cnt];
- re_token_type_t type = dfa->nodes[node].type;
-
- if (type == CHARACTER)
- re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
- else if (type == SIMPLE_BRACKET)
- {
- int i, j, ch;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
- re_set_fastmap (fastmap, icase, ch);
- }
-#ifdef RE_ENABLE_I18N
- else if (type == COMPLEX_BRACKET)
- {
- int i;
- re_charset_t *cset = dfa->nodes[node].opr.mbcset;
- if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
- || cset->nranges || cset->nchar_classes)
- {
-# ifdef _LIBC
- if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
- {
- /* In this case we want to catch the bytes which are
- the first byte of any collation elements.
- e.g. In da_DK, we want to catch 'a' since "aa"
- is a valid collation element, and don't catch
- 'b' since 'b' is the only collation element
- which starts from 'b'. */
- int j, ch;
- const int32_t *table = (const int32_t *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (table[ch] < 0)
- re_set_fastmap (fastmap, icase, ch);
- }
-# else
- if (MB_CUR_MAX > 1)
- for (i = 0; i < SBC_MAX; ++i)
- if (__btowc (i) == WEOF)
- re_set_fastmap (fastmap, icase, i);
-# endif /* not _LIBC */
- }
- for (i = 0; i < cset->nmbchars; ++i)
- {
- char buf[256];
- mbstate_t state;
- memset (&state, '\0', sizeof (state));
- __wcrtomb (buf, cset->mbchars[i], &state);
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
- }
- }
-#endif /* RE_ENABLE_I18N */
- else if (type == END_OF_RE || type == OP_PERIOD)
- {
- memset (fastmap, '\1', sizeof (char) * SBC_MAX);
- if (type == END_OF_RE)
- bufp->can_be_null = 1;
- return;
- }
- }
-}
-\f
-/* Entry point for POSIX code. */
-/* regcomp takes a regular expression as a string and compiles it.
-
- PREG is a regex_t *. We do not expect any fields to be initialized,
- since POSIX says we shouldn't. Thus, we set
-
- `buffer' to the compiled pattern;
- `used' to the length of the compiled pattern;
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
- RE_SYNTAX_POSIX_BASIC;
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' to an allocated space for the fastmap;
- `fastmap_accurate' to zero;
- `re_nsub' to the number of subexpressions in PATTERN.
-
- PATTERN is the address of the pattern string.
-
- CFLAGS is a series of bits which affect compilation.
-
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
- use POSIX basic syntax.
-
- If REG_NEWLINE is set, then . and [^...] don't match newline.
- Also, regexec will try a match beginning after every newline.
-
- If REG_ICASE is set, then we considers upper- and lowercase
- versions of letters to be equivalent when matching.
-
- If REG_NOSUB is set, then when PREG is passed to regexec, that
- routine will report only success or failure, and nothing about the
- registers.
-
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
- the return codes and their meanings.) */
-
-int
-regcomp (preg, pattern, cflags)
- regex_t *__restrict preg;
- const char *__restrict pattern;
- int cflags;
-{
- reg_errcode_t ret;
- reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
- : RE_SYNTAX_POSIX_BASIC);
-
- preg->buffer = NULL;
- preg->allocated = 0;
- preg->used = 0;
-
- /* Try to allocate space for the fastmap. */
- preg->fastmap = re_malloc (char, SBC_MAX);
- if (BE (preg->fastmap == NULL, 0))
- return REG_ESPACE;
-
- syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
-
- /* If REG_NEWLINE is set, newlines are treated differently. */
- if (cflags & REG_NEWLINE)
- { /* REG_NEWLINE implies neither . nor [^...] match newline. */
- syntax &= ~RE_DOT_NEWLINE;
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
- /* It also changes the matching behavior. */
- preg->newline_anchor = 1;
- }
- else
- preg->newline_anchor = 0;
- preg->no_sub = !!(cflags & REG_NOSUB);
- preg->translate = NULL;
-
- ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
-
- /* POSIX doesn't distinguish between an unmatched open-group and an
- unmatched close-group: both are REG_EPAREN. */
- if (ret == REG_ERPAREN)
- ret = REG_EPAREN;
-
- /* We have already checked preg->fastmap != NULL. */
- if (BE (ret == REG_NOERROR, 1))
- /* Compute the fastmap now, since regexec cannot modify the pattern
- buffer. This function nevers fails in this implementation. */
- (void) re_compile_fastmap (preg);
- else
- {
- /* Some error occurred while compiling the expression. */
- re_free (preg->fastmap);
- preg->fastmap = NULL;
- }
-
- return (int) ret;
-}
-#ifdef _LIBC
-weak_alias (__regcomp, regcomp)
-#endif
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
- from either regcomp or regexec. We don't use PREG here. */
-
-size_t
-regerror (errcode, preg, errbuf, errbuf_size)
- int errcode;
- const regex_t *preg;
- char *errbuf;
- size_t errbuf_size;
-{
- const char *msg;
- size_t msg_size;
-
- if (BE (errcode < 0
- || errcode >= (int) (sizeof (__re_error_msgid_idx)
- / sizeof (__re_error_msgid_idx[0])), 0))
- /* Only error codes returned by the rest of the code should be passed
- to this routine. If we are given anything else, or if other regex
- code generates an invalid error code, then the program has a bug.
- Dump core so we can fix it. */
- abort ();
-
- msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
-
- msg_size = strlen (msg) + 1; /* Includes the null. */
-
- if (BE (errbuf_size != 0, 1))
- {
- if (BE (msg_size > errbuf_size, 0))
- {
-#if defined HAVE_MEMPCPY || defined _LIBC
- *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
-#else
- memcpy (errbuf, msg, errbuf_size - 1);
- errbuf[errbuf_size - 1] = 0;
-#endif
- }
- else
- memcpy (errbuf, msg, msg_size);
- }
-
- return msg_size;
-}
-#ifdef _LIBC
-weak_alias (__regerror, regerror)
-#endif
-
-
-static void
-free_dfa_content (re_dfa_t *dfa)
-{
- int i, j;
-
- re_free (dfa->subexps);
-
- for (i = 0; i < dfa->nodes_len; ++i)
- {
- re_token_t *node = dfa->nodes + i;
-#ifdef RE_ENABLE_I18N
- if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
- free_charset (node->opr.mbcset);
- else
-#endif /* RE_ENABLE_I18N */
- if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
- re_free (node->opr.sbcset);
- }
- re_free (dfa->nexts);
- for (i = 0; i < dfa->nodes_len; ++i)
- {
- if (dfa->eclosures != NULL)
- re_node_set_free (dfa->eclosures + i);
- if (dfa->inveclosures != NULL)
- re_node_set_free (dfa->inveclosures + i);
- if (dfa->edests != NULL)
- re_node_set_free (dfa->edests + i);
- }
- re_free (dfa->edests);
- re_free (dfa->eclosures);
- re_free (dfa->inveclosures);
- re_free (dfa->nodes);
-
- for (i = 0; i <= dfa->state_hash_mask; ++i)
- {
- struct re_state_table_entry *entry = dfa->state_table + i;
- for (j = 0; j < entry->num; ++j)
- {
- re_dfastate_t *state = entry->array[j];
- free_state (state);
- }
- re_free (entry->array);
- }
- re_free (dfa->state_table);
-
- if (dfa->word_char != NULL)
- re_free (dfa->word_char);
-#ifdef DEBUG
- re_free (dfa->re_str);
-#endif
-
- re_free (dfa);
-}
-
-
-/* Free dynamically allocated space used by PREG. */
-
-void
-regfree (preg)
- regex_t *preg;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- if (BE (dfa != NULL, 1))
- free_dfa_content (dfa);
-
- re_free (preg->fastmap);
-}
-#ifdef _LIBC
-weak_alias (__regfree, regfree)
-#endif
-\f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them unless specifically requested. */
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-
-/* BSD has one and only one pattern buffer. */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-# ifdef _LIBC
-/* Make these definitions weak in libc, so POSIX programs can redefine
- these names if they don't use our functions, and still use
- regcomp/regexec above without link errors. */
-weak_function
-# endif
-re_comp (s)
- const char *s;
-{
- reg_errcode_t ret;
- char *fastmap;
-
- if (!s)
- {
- if (!re_comp_buf.buffer)
- return gettext ("No previous regular expression");
- return 0;
- }
-
- if (re_comp_buf.buffer)
- {
- fastmap = re_comp_buf.fastmap;
- re_comp_buf.fastmap = NULL;
- __regfree (&re_comp_buf);
- memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
- re_comp_buf.fastmap = fastmap;
- }
-
- if (re_comp_buf.fastmap == NULL)
- {
- re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
- if (re_comp_buf.fastmap == NULL)
- return (char *) gettext (__re_error_msgid
- + __re_error_msgid_idx[(int) REG_ESPACE]);
- }
-
- /* Since `re_exec' always passes NULL for the `regs' argument, we
- don't need to initialize the pattern buffer fields which affect it. */
-
- /* Match anchors at newlines. */
- re_comp_buf.newline_anchor = 1;
-
- ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
-
- if (!ret)
- return NULL;
-
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
- return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
-}
-
-#ifdef _LIBC
-libc_freeres_fn (free_mem)
-{
- __regfree (&re_comp_buf);
-}
-#endif
-
-#endif /* _REGEX_RE_COMP */
-\f
-/* Internal entry point.
- Compile the regular expression PATTERN, whose length is LENGTH.
- SYNTAX indicate regular expression's syntax. */
-
-static reg_errcode_t
-re_compile_internal (preg, pattern, length, syntax)
- regex_t *preg;
- const char * pattern;
- int length;
- reg_syntax_t syntax;
-{
- reg_errcode_t err = REG_NOERROR;
- re_dfa_t *dfa;
- re_string_t regexp;
-
- /* Initialize the pattern buffer. */
- preg->fastmap_accurate = 0;
- preg->syntax = syntax;
- preg->not_bol = preg->not_eol = 0;
- preg->used = 0;
- preg->re_nsub = 0;
- preg->can_be_null = 0;
- preg->regs_allocated = REGS_UNALLOCATED;
-
- /* Initialize the dfa. */
- dfa = (re_dfa_t *) preg->buffer;
- if (preg->allocated < sizeof (re_dfa_t))
- {
- /* If zero allocated, but buffer is non-null, try to realloc
- enough space. This loses if buffer's address is bogus, but
- that is the user's responsibility. If ->buffer is NULL this
- is a simple allocation. */
- dfa = re_realloc (preg->buffer, re_dfa_t, 1);
- if (dfa == NULL)
- return REG_ESPACE;
- preg->allocated = sizeof (re_dfa_t);
- }
- preg->buffer = (unsigned char *) dfa;
- preg->used = sizeof (re_dfa_t);
-
- err = init_dfa (dfa, length);
- if (BE (err != REG_NOERROR, 0))
- {
- re_free (dfa);
- preg->buffer = NULL;
- preg->allocated = 0;
- return err;
- }
-#ifdef DEBUG
- dfa->re_str = re_malloc (char, length + 1);
- strncpy (dfa->re_str, pattern, length + 1);
-#endif
-
- err = re_string_construct (®exp, pattern, length, preg->translate,
- syntax & RE_ICASE);
- if (BE (err != REG_NOERROR, 0))
- {
- re_free (dfa);
- preg->buffer = NULL;
- preg->allocated = 0;
- return err;
- }
-
- /* Parse the regular expression, and build a structure tree. */
- preg->re_nsub = 0;
- dfa->str_tree = parse (®exp, preg, syntax, &err);
- if (BE (dfa->str_tree == NULL, 0))
- goto re_compile_internal_free_return;
-
- /* Analyze the tree and collect information which is necessary to
- create the dfa. */
- err = analyze (dfa);
- if (BE (err != REG_NOERROR, 0))
- goto re_compile_internal_free_return;
-
- /* Then create the initial state of the dfa. */
- err = create_initial_state (dfa);
-
- /* Release work areas. */
- free_workarea_compile (preg);
- re_string_destruct (®exp);
-
- if (BE (err != REG_NOERROR, 0))
- {
- re_compile_internal_free_return:
- free_dfa_content (dfa);
- preg->buffer = NULL;
- preg->allocated = 0;
- }
-
- return err;
-}
-
-/* Initialize DFA. We use the length of the regular expression PAT_LEN
- as the initial length of some arrays. */
-
-static reg_errcode_t
-init_dfa (dfa, pat_len)
- re_dfa_t *dfa;
- int pat_len;
-{
- int table_size;
-
- memset (dfa, '\0', sizeof (re_dfa_t));
-
- dfa->nodes_alloc = pat_len + 1;
- dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
-
- dfa->states_alloc = pat_len + 1;
-
- /* table_size = 2 ^ ceil(log pat_len) */
- for (table_size = 1; table_size > 0; table_size <<= 1)
- if (table_size > pat_len)
- break;
-
- dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
- dfa->state_hash_mask = table_size - 1;
-
- dfa->subexps_alloc = 1;
- dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
- dfa->word_char = NULL;
-
- if (BE (dfa->nodes == NULL || dfa->state_table == NULL
- || dfa->subexps == NULL, 0))
- {
- /* We don't bother to free anything which was allocated. Very
- soon the process will go down anyway. */
- dfa->subexps = NULL;
- dfa->state_table = NULL;
- dfa->nodes = NULL;
- return REG_ESPACE;
- }
- return REG_NOERROR;
-}
-
-/* Initialize WORD_CHAR table, which indicate which character is
- "word". In this case "word" means that it is the word construction
- character used by some operators like "\<", "\>", etc. */
-
-static reg_errcode_t
-init_word_char (dfa)
- re_dfa_t *dfa;
-{
- int i, j, ch;
- dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
- if (BE (dfa->word_char == NULL, 0))
- return REG_ESPACE;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (isalnum (ch) || ch == '_')
- dfa->word_char[i] |= 1 << j;
- return REG_NOERROR;
-}
-
-/* Free the work area which are only used while compiling. */
-
-static void
-free_workarea_compile (preg)
- regex_t *preg;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- free_bin_tree (dfa->str_tree);
- dfa->str_tree = NULL;
- re_free (dfa->org_indices);
- dfa->org_indices = NULL;
-}
-
-/* Create initial states for all contexts. */
-
-static reg_errcode_t
-create_initial_state (dfa)
- re_dfa_t *dfa;
-{
- int first, i;
- reg_errcode_t err;
- re_node_set init_nodes;
-
- /* Initial states have the epsilon closure of the node which is
- the first node of the regular expression. */
- first = dfa->str_tree->first;
- dfa->init_node = first;
- err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
- if (BE (err != REG_NOERROR, 0))
- return err;
-
- /* The back-references which are in initial states can epsilon transit,
- since in this case all of the subexpressions can be null.
- Then we add epsilon closures of the nodes which are the next nodes of
- the back-references. */
- if (dfa->nbackref > 0)
- for (i = 0; i < init_nodes.nelem; ++i)
- {
- int node_idx = init_nodes.elems[i];
- re_token_type_t type = dfa->nodes[node_idx].type;
-
- int clexp_idx;
- if (type != OP_BACK_REF)
- continue;
- for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
- {
- re_token_t *clexp_node;
- clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
- if (clexp_node->type == OP_CLOSE_SUBEXP
- && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
- break;
- }
- if (clexp_idx == init_nodes.nelem)
- continue;
-
- if (type == OP_BACK_REF)
- {
- int dest_idx = dfa->edests[node_idx].elems[0];
- if (!re_node_set_contains (&init_nodes, dest_idx))
- {
- re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
- i = 0;
- }
- }
- }
-
- /* It must be the first time to invoke acquire_state. */
- dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
- /* We don't check ERR here, since the initial state must not be NULL. */
- if (BE (dfa->init_state == NULL, 0))
- return err;
- if (dfa->init_state->has_constraint)
- {
- dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
- CONTEXT_WORD);
- dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
- CONTEXT_NEWLINE);
- dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
- &init_nodes,
- CONTEXT_NEWLINE
- | CONTEXT_BEGBUF);
- if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
- || dfa->init_state_begbuf == NULL, 0))
- return err;
- }
- else
- dfa->init_state_word = dfa->init_state_nl
- = dfa->init_state_begbuf = dfa->init_state;
-
- re_node_set_free (&init_nodes);
- return REG_NOERROR;
-}
-\f
-/* Analyze the structure tree, and calculate "first", "next", "edest",
- "eclosure", and "inveclosure". */
-
-static reg_errcode_t
-analyze (dfa)
- re_dfa_t *dfa;
-{
- int i;
- reg_errcode_t ret;
-
- /* Allocate arrays. */
- dfa->nexts = re_malloc (int, dfa->nodes_alloc);
- dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
- dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
- dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
- dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
- if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
- || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
- return REG_ESPACE;
- /* Initialize them. */
- for (i = 0; i < dfa->nodes_len; ++i)
- {
- dfa->nexts[i] = -1;
- re_node_set_init_empty (dfa->edests + i);
- re_node_set_init_empty (dfa->eclosures + i);
- re_node_set_init_empty (dfa->inveclosures + i);
- }
-
- ret = analyze_tree (dfa, dfa->str_tree);
- if (BE (ret == REG_NOERROR, 1))
- {
- ret = calc_eclosure (dfa);
- if (ret == REG_NOERROR)
- calc_inveclosure (dfa);
- }
- return ret;
-}
-
-/* Helper functions for analyze.
- This function calculate "first", "next", and "edest" for the subtree
- whose root is NODE. */
-
-static reg_errcode_t
-analyze_tree (dfa, node)
- re_dfa_t *dfa;
- bin_tree_t *node;
-{
- reg_errcode_t ret;
- if (node->first == -1)
- calc_first (dfa, node);
- if (node->next == -1)
- calc_next (dfa, node);
- if (node->eclosure.nelem == 0)
- calc_epsdest (dfa, node);
- /* Calculate "first" etc. for the left child. */
- if (node->left != NULL)
- {
- ret = analyze_tree (dfa, node->left);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- }
- /* Calculate "first" etc. for the right child. */
- if (node->right != NULL)
- {
- ret = analyze_tree (dfa, node->right);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- }
- return REG_NOERROR;
-}
-
-/* Calculate "first" for the node NODE. */
-static void
-calc_first (dfa, node)
- re_dfa_t *dfa;
- bin_tree_t *node;
-{
- int idx, type;
- idx = node->node_idx;
- type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
-
- switch (type)
- {
-#ifdef DEBUG
- case OP_OPEN_BRACKET:
- case OP_CLOSE_BRACKET:
- case OP_OPEN_DUP_NUM:
- case OP_CLOSE_DUP_NUM:
- case OP_NON_MATCH_LIST:
- case OP_OPEN_COLL_ELEM:
- case OP_CLOSE_COLL_ELEM:
- case OP_OPEN_EQUIV_CLASS:
- case OP_CLOSE_EQUIV_CLASS:
- case OP_OPEN_CHAR_CLASS:
- case OP_CLOSE_CHAR_CLASS:
- /* These must not be appeared here. */
- assert (0);
-#endif
- case END_OF_RE:
- case CHARACTER:
- case OP_PERIOD:
- case OP_DUP_ASTERISK:
- case OP_DUP_QUESTION:
-#ifdef RE_ENABLE_I18N
- case COMPLEX_BRACKET:
-#endif /* RE_ENABLE_I18N */
- case SIMPLE_BRACKET:
- case OP_BACK_REF:
- case ANCHOR:
- case OP_OPEN_SUBEXP:
- case OP_CLOSE_SUBEXP:
- node->first = idx;
- break;
- case OP_DUP_PLUS:
-#ifdef DEBUG
- assert (node->left != NULL);
-#endif
- if (node->left->first == -1)
- calc_first (dfa, node->left);
- node->first = node->left->first;
- break;
- case OP_ALT:
- node->first = idx;
- break;
- /* else fall through */
- default:
-#ifdef DEBUG
- assert (node->left != NULL);
-#endif
- if (node->left->first == -1)
- calc_first (dfa, node->left);
- node->first = node->left->first;
- break;
- }
-}
-
-/* Calculate "next" for the node NODE. */
-
-static void
-calc_next (dfa, node)
- re_dfa_t *dfa;
- bin_tree_t *node;
-{
- int idx, type;
- bin_tree_t *parent = node->parent;
- if (parent == NULL)
- {
- node->next = -1;
- idx = node->node_idx;
- if (node->type == 0)
- dfa->nexts[idx] = node->next;
- return;
- }
-
- idx = parent->node_idx;
- type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
-
- switch (type)
- {
- case OP_DUP_ASTERISK:
- case OP_DUP_PLUS:
- node->next = idx;
- break;
- case CONCAT:
- if (parent->left == node)
- {
- if (parent->right->first == -1)
- calc_first (dfa, parent->right);
- node->next = parent->right->first;
- break;
- }
- /* else fall through */
- default:
- if (parent->next == -1)
- calc_next (dfa, parent);
- node->next = parent->next;
- break;
- }
- idx = node->node_idx;
- if (node->type == 0)
- dfa->nexts[idx] = node->next;
-}
-
-/* Calculate "edest" for the node NODE. */
-
-static void
-calc_epsdest (dfa, node)
- re_dfa_t *dfa;
- bin_tree_t *node;
-{
- int idx;
- idx = node->node_idx;
- if (node->type == 0)
- {
- if (dfa->nodes[idx].type == OP_DUP_ASTERISK
- || dfa->nodes[idx].type == OP_DUP_PLUS
- || dfa->nodes[idx].type == OP_DUP_QUESTION)
- {
- if (node->left->first == -1)
- calc_first (dfa, node->left);
- if (node->next == -1)
- calc_next (dfa, node);
- re_node_set_init_2 (dfa->edests + idx, node->left->first,
- node->next);
- }
- else if (dfa->nodes[idx].type == OP_ALT)
- {
- int left, right;
- if (node->left != NULL)
- {
- if (node->left->first == -1)
- calc_first (dfa, node->left);
- left = node->left->first;
- }
- else
- {
- if (node->next == -1)
- calc_next (dfa, node);
- left = node->next;
- }
- if (node->right != NULL)
- {
- if (node->right->first == -1)
- calc_first (dfa, node->right);
- right = node->right->first;
- }
- else
- {
- if (node->next == -1)
- calc_next (dfa, node);
- right = node->next;
- }
- re_node_set_init_2 (dfa->edests + idx, left, right);
- }
- else if (dfa->nodes[idx].type == ANCHOR
- || dfa->nodes[idx].type == OP_OPEN_SUBEXP
- || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
- || dfa->nodes[idx].type == OP_BACK_REF)
- re_node_set_init_1 (dfa->edests + idx, node->next);
- }
-}
-
-/* Duplicate the epsilon closure of the node ROOT_NODE.
- Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
- to their own constraint. */
-
-static reg_errcode_t
-duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
- init_constraint)
- re_dfa_t *dfa;
- int top_org_node, top_clone_node, root_node;
- unsigned int init_constraint;
-{
- reg_errcode_t err;
- int org_node, clone_node, ret;
- unsigned int constraint = init_constraint;
- for (org_node = top_org_node, clone_node = top_clone_node;;)
- {
- int org_dest, clone_dest;
- if (dfa->nodes[org_node].type == OP_BACK_REF)
- {
- /* If the back reference epsilon-transit, its destination must
- also have the constraint. Then duplicate the epsilon closure
- of the destination of the back reference, and store it in
- edests of the back reference. */
- org_dest = dfa->nexts[org_node];
- re_node_set_empty (dfa->edests + clone_node);
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- dfa->nexts[clone_node] = dfa->nexts[org_node];
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- }
- else if (dfa->edests[org_node].nelem == 0)
- {
- /* In case of the node can't epsilon-transit, don't duplicate the
- destination and store the original destination as the
- destination of the node. */
- dfa->nexts[clone_node] = dfa->nexts[org_node];
- break;
- }
- else if (dfa->edests[org_node].nelem == 1)
- {
- /* In case of the node can epsilon-transit, and it has only one
- destination. */
- org_dest = dfa->edests[org_node].elems[0];
- re_node_set_empty (dfa->edests + clone_node);
- if (dfa->nodes[org_node].type == ANCHOR)
- {
- /* In case of the node has another constraint, append it. */
- if (org_node == root_node && clone_node != org_node)
- {
- /* ...but if the node is root_node itself, it means the
- epsilon closure have a loop, then tie it to the
- destination of the root_node. */
- ret = re_node_set_insert (dfa->edests + clone_node,
- org_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- break;
- }
- constraint |= dfa->nodes[org_node].opr.ctx_type;
- }
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- }
- else /* dfa->edests[org_node].nelem == 2 */
- {
- /* In case of the node can epsilon-transit, and it has two
- destinations. E.g. '|', '*', '+', '?'. */
- org_dest = dfa->edests[org_node].elems[0];
- re_node_set_empty (dfa->edests + clone_node);
- /* Search for a duplicated node which satisfies the constraint. */
- clone_dest = search_duplicated_node (dfa, org_dest, constraint);
- if (clone_dest == -1)
- {
- /* There are no such a duplicated node, create a new one. */
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- err = duplicate_node_closure (dfa, org_dest, clone_dest,
- root_node, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- else
- {
- /* There are a duplicated node which satisfy the constraint,
- use it to avoid infinite loop. */
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- }
-
- org_dest = dfa->edests[org_node].elems[1];
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- }
- org_node = org_dest;
- clone_node = clone_dest;
- }
- return REG_NOERROR;
-}
-
-/* Search for a node which is duplicated from the node ORG_NODE, and
- satisfies the constraint CONSTRAINT. */
-
-static int
-search_duplicated_node (dfa, org_node, constraint)
- re_dfa_t *dfa;
- int org_node;
- unsigned int constraint;
-{
- int idx;
- for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
- {
- if (org_node == dfa->org_indices[idx]
- && constraint == dfa->nodes[idx].constraint)
- return idx; /* Found. */
- }
- return -1; /* Not found. */
-}
-
-/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
- The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
- otherwise return the error code. */
-
-static reg_errcode_t
-duplicate_node (new_idx, dfa, org_idx, constraint)
- re_dfa_t *dfa;
- int *new_idx, org_idx;
- unsigned int constraint;
-{
- re_token_t dup;
- int dup_idx;
-
- dup = dfa->nodes[org_idx];
- dup_idx = re_dfa_add_node (dfa, dup, 1);
- if (BE (dup_idx == -1, 0))
- return REG_ESPACE;
- dfa->nodes[dup_idx].constraint = constraint;
- if (dfa->nodes[org_idx].type == ANCHOR)
- dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
- dfa->nodes[dup_idx].duplicated = 1;
- re_node_set_init_empty (dfa->edests + dup_idx);
- re_node_set_init_empty (dfa->eclosures + dup_idx);
- re_node_set_init_empty (dfa->inveclosures + dup_idx);
-
- /* Store the index of the original node. */
- dfa->org_indices[dup_idx] = org_idx;
- *new_idx = dup_idx;
- return REG_NOERROR;
-}
-
-static void
-calc_inveclosure (dfa)
- re_dfa_t *dfa;
-{
- int src, idx, dest;
- for (src = 0; src < dfa->nodes_len; ++src)
- {
- for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
- {
- dest = dfa->eclosures[src].elems[idx];
- re_node_set_insert (dfa->inveclosures + dest, src);
- }
- }
-}
-
-/* Calculate "eclosure" for all the node in DFA. */
-
-static reg_errcode_t
-calc_eclosure (dfa)
- re_dfa_t *dfa;
-{
- int node_idx, incomplete;
-#ifdef DEBUG
- assert (dfa->nodes_len > 0);
-#endif
- incomplete = 0;
- /* For each nodes, calculate epsilon closure. */
- for (node_idx = 0; ; ++node_idx)
- {
- reg_errcode_t err;
- re_node_set eclosure_elem;
- if (node_idx == dfa->nodes_len)
- {
- if (!incomplete)
- break;
- incomplete = 0;
- node_idx = 0;
- }
-
-#ifdef DEBUG
- assert (dfa->eclosures[node_idx].nelem != -1);
-#endif
- /* If we have already calculated, skip it. */
- if (dfa->eclosures[node_idx].nelem != 0)
- continue;
- /* Calculate epsilon closure of `node_idx'. */
- err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
- if (BE (err != REG_NOERROR, 0))
- return err;
-
- if (dfa->eclosures[node_idx].nelem == 0)
- {
- incomplete = 1;
- re_node_set_free (&eclosure_elem);
- }
- }
- return REG_NOERROR;
-}
-
-/* Calculate epsilon closure of NODE. */
-
-static reg_errcode_t
-calc_eclosure_iter (new_set, dfa, node, root)
- re_node_set *new_set;
- re_dfa_t *dfa;
- int node, root;
-{
- reg_errcode_t err;
- unsigned int constraint;
- int i, incomplete;
- re_node_set eclosure;
- incomplete = 0;
- err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
- if (BE (err != REG_NOERROR, 0))
- return err;
-
- /* This indicates that we are calculating this node now.
- We reference this value to avoid infinite loop. */
- dfa->eclosures[node].nelem = -1;
-
- constraint = ((dfa->nodes[node].type == ANCHOR)
- ? dfa->nodes[node].opr.ctx_type : 0);
- /* If the current node has constraints, duplicate all nodes.
- Since they must inherit the constraints. */
- if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
- {
- int org_node, cur_node;
- org_node = cur_node = node;
- err = duplicate_node_closure (dfa, node, node, node, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- /* Expand each epsilon destination nodes. */
- if (IS_EPSILON_NODE(dfa->nodes[node].type))
- for (i = 0; i < dfa->edests[node].nelem; ++i)
- {
- re_node_set eclosure_elem;
- int edest = dfa->edests[node].elems[i];
- /* If calculating the epsilon closure of `edest' is in progress,
- return intermediate result. */
- if (dfa->eclosures[edest].nelem == -1)
- {
- incomplete = 1;
- continue;
- }
- /* If we haven't calculated the epsilon closure of `edest' yet,
- calculate now. Otherwise use calculated epsilon closure. */
- if (dfa->eclosures[edest].nelem == 0)
- {
- err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- else
- eclosure_elem = dfa->eclosures[edest];
- /* Merge the epsilon closure of `edest'. */
- re_node_set_merge (&eclosure, &eclosure_elem);
- /* If the epsilon closure of `edest' is incomplete,
- the epsilon closure of this node is also incomplete. */
- if (dfa->eclosures[edest].nelem == 0)
- {
- incomplete = 1;
- re_node_set_free (&eclosure_elem);
- }
- }
-
- /* Epsilon closures include itself. */
- re_node_set_insert (&eclosure, node);
- if (incomplete && !root)
- dfa->eclosures[node].nelem = 0;
- else
- dfa->eclosures[node] = eclosure;
- *new_set = eclosure;
- return REG_NOERROR;
-}
-\f
-/* Functions for token which are used in the parser. */
-
-/* Fetch a token from INPUT.
- We must not use this function inside bracket expressions. */
-
-static re_token_t
-fetch_token (input, syntax)
- re_string_t *input;
- reg_syntax_t syntax;
-{
- re_token_t token;
- int consumed_byte;
- consumed_byte = peek_token (&token, input, syntax);
- re_string_skip_bytes (input, consumed_byte);
- return token;
-}
-
-/* Peek a token from INPUT, and return the length of the token.
- We must not use this function inside bracket expressions. */
-
-static int
-peek_token (token, input, syntax)
- re_token_t *token;
- re_string_t *input;
- reg_syntax_t syntax;
-{
- unsigned char c;
-
- if (re_string_eoi (input))
- {
- token->type = END_OF_RE;
- return 0;
- }
-
- c = re_string_peek_byte (input, 0);
- token->opr.c = c;
-
-#ifdef RE_ENABLE_I18N
- token->mb_partial = 0;
- if (MB_CUR_MAX > 1 &&
- !re_string_first_byte (input, re_string_cur_idx (input)))
- {
- token->type = CHARACTER;
- token->mb_partial = 1;
- return 1;
- }
-#endif
- if (c == '\\')
- {
- unsigned char c2;
- if (re_string_cur_idx (input) + 1 >= re_string_length (input))
- {
- token->type = BACK_SLASH;
- return 1;
- }
-
- c2 = re_string_peek_byte_case (input, 1);
- token->opr.c = c2;
- token->type = CHARACTER;
- switch (c2)
- {
- case '|':
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
- token->type = OP_ALT;
- break;
- case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- if (!(syntax & RE_NO_BK_REFS))
- {
- token->type = OP_BACK_REF;
- token->opr.idx = c2 - '0';
- }
- break;
- case '<':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = WORD_FIRST;
- }
- break;
- case '>':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = WORD_LAST;
- }
- break;
- case 'b':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = WORD_DELIM;
- }
- break;
- case 'B':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = INSIDE_WORD;
- }
- break;
- case 'w':
- if (!(syntax & RE_NO_GNU_OPS))
- token->type = OP_WORD;
- break;
- case 'W':
- if (!(syntax & RE_NO_GNU_OPS))
- token->type = OP_NOTWORD;
- break;
- case '`':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = BUF_FIRST;
- }
- break;
- case '\'':
- if (!(syntax & RE_NO_GNU_OPS))
- {
- token->type = ANCHOR;
- token->opr.idx = BUF_LAST;
- }
- break;
- case '(':
- if (!(syntax & RE_NO_BK_PARENS))
- token->type = OP_OPEN_SUBEXP;
- break;
- case ')':
- if (!(syntax & RE_NO_BK_PARENS))
- token->type = OP_CLOSE_SUBEXP;
- break;
- case '+':
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
- token->type = OP_DUP_PLUS;
- break;
- case '?':
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
- token->type = OP_DUP_QUESTION;
- break;
- case '{':
- if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
- token->type = OP_OPEN_DUP_NUM;
- break;
- case '}':
- if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
- token->type = OP_CLOSE_DUP_NUM;
- break;
- default:
- break;
- }
- return 2;
- }
-
- token->type = CHARACTER;
- switch (c)
- {
- case '\n':
- if (syntax & RE_NEWLINE_ALT)
- token->type = OP_ALT;
- break;
- case '|':
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
- token->type = OP_ALT;
- break;
- case '*':
- token->type = OP_DUP_ASTERISK;
- break;
- case '+':
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
- token->type = OP_DUP_PLUS;
- break;
- case '?':
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
- token->type = OP_DUP_QUESTION;
- break;
- case '{':
- if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- token->type = OP_OPEN_DUP_NUM;
- break;
- case '}':
- if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- token->type = OP_CLOSE_DUP_NUM;
- break;
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- token->type = OP_OPEN_SUBEXP;
- break;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- token->type = OP_CLOSE_SUBEXP;
- break;
- case '[':
- token->type = OP_OPEN_BRACKET;
- break;
- case '.':
- token->type = OP_PERIOD;
- break;
- case '^':
- if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
- re_string_cur_idx (input) != 0)
- {
- char prev = re_string_peek_byte (input, -1);
- if (prev != '|' && prev != '(' &&
- (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
- break;
- }
- token->type = ANCHOR;
- token->opr.idx = LINE_FIRST;
- break;
- case '$':
- if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
- re_string_cur_idx (input) + 1 != re_string_length (input))
- {
- re_token_t next;
- re_string_skip_bytes (input, 1);
- peek_token (&next, input, syntax);
- re_string_skip_bytes (input, -1);
- if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
- break;
- }
- token->type = ANCHOR;
- token->opr.idx = LINE_LAST;
- break;
- default:
- break;
- }
- return 1;
-}
-
-/* Peek a token from INPUT, and return the length of the token.
- We must not use this function out of bracket expressions. */
-
-static int
-peek_token_bracket (token, input, syntax)
- re_token_t *token;
- re_string_t *input;
- reg_syntax_t syntax;
-{
- unsigned char c;
- if (re_string_eoi (input))
- {
- token->type = END_OF_RE;
- return 0;
- }
- c = re_string_peek_byte (input, 0);
- token->opr.c = c;
-
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1 &&
- !re_string_first_byte (input, re_string_cur_idx (input)))
- {
- token->type = CHARACTER;
- return 1;
- }
-#endif /* RE_ENABLE_I18N */
-
- if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS))
- {
- /* In this case, '\' escape a character. */
- unsigned char c2;
- re_string_skip_bytes (input, 1);
- c2 = re_string_peek_byte (input, 0);
- token->opr.c = c2;
- token->type = CHARACTER;
- return 1;
- }
- if (c == '[') /* '[' is a special char in a bracket exps. */
- {
- unsigned char c2;
- int token_len;
- c2 = re_string_peek_byte (input, 1);
- token->opr.c = c2;
- token_len = 2;
- switch (c2)
- {
- case '.':
- token->type = OP_OPEN_COLL_ELEM;
- break;
- case '=':
- token->type = OP_OPEN_EQUIV_CLASS;
- break;
- case ':':
- if (syntax & RE_CHAR_CLASSES)
- {
- token->type = OP_OPEN_CHAR_CLASS;
- break;
- }
- /* else fall through. */
- default:
- token->type = CHARACTER;
- token->opr.c = c;
- token_len = 1;
- break;
- }
- return token_len;
- }
- switch (c)
- {
- case '-':
- token->type = OP_CHARSET_RANGE;
- break;
- case ']':
- token->type = OP_CLOSE_BRACKET;
- break;
- case '^':
- token->type = OP_NON_MATCH_LIST;
- break;
- default:
- token->type = CHARACTER;
- }
- return 1;
-}
-\f
-/* Functions for parser. */
-
-/* Entry point of the parser.
- Parse the regular expression REGEXP and return the structure tree.
- If an error is occured, ERR is set by error code, and return NULL.
- This function build the following tree, from regular expression <reg_exp>:
- CAT
- / \
- / \
- <reg_exp> EOR
-
- CAT means concatenation.
- EOR means end of regular expression. */
-
-static bin_tree_t *
-parse (regexp, preg, syntax, err)
- re_string_t *regexp;
- regex_t *preg;
- reg_syntax_t syntax;
- reg_errcode_t *err;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *tree, *eor, *root;
- re_token_t current_token;
- int new_idx;
- current_token = fetch_token (regexp, syntax);
- tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- new_idx = re_dfa_add_node (dfa, current_token, 0);
- eor = create_tree (NULL, NULL, 0, new_idx);
- if (tree != NULL)
- root = create_tree (tree, eor, CONCAT, 0);
- else
- root = eor;
- if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- return root;
-}
-
-/* This function build the following tree, from regular expression
- <branch1>|<branch2>:
- ALT
- / \
- / \
- <branch1> <branch2>
-
- ALT means alternative, which represents the operator `|'. */
-
-static bin_tree_t *
-parse_reg_exp (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *tree, *branch = NULL;
- int new_idx;
- tree = parse_branch (regexp, preg, token, syntax, nest, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
-
- while (token->type == OP_ALT)
- {
- re_token_t alt_token = *token;
- new_idx = re_dfa_add_node (dfa, alt_token, 0);
- *token = fetch_token (regexp, syntax);
- if (token->type != OP_ALT && token->type != END_OF_RE
- && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
- {
- branch = parse_branch (regexp, preg, token, syntax, nest, err);
- if (BE (*err != REG_NOERROR && branch == NULL, 0))
- {
- free_bin_tree (tree);
- return NULL;
- }
- }
- else
- branch = NULL;
- tree = create_tree (tree, branch, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- dfa->has_plural_match = 1;
- }
- return tree;
-}
-
-/* This function build the following tree, from regular expression
- <exp1><exp2>:
- CAT
- / \
- / \
- <exp1> <exp2>
-
- CAT means concatenation. */
-
-static bin_tree_t *
-parse_branch (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
-{
- bin_tree_t *tree, *exp;
- tree = parse_expression (regexp, preg, token, syntax, nest, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
-
- while (token->type != OP_ALT && token->type != END_OF_RE
- && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
- {
- exp = parse_expression (regexp, preg, token, syntax, nest, err);
- if (BE (*err != REG_NOERROR && exp == NULL, 0))
- {
- free_bin_tree (tree);
- return NULL;
- }
- if (tree != NULL && exp != NULL)
- {
- tree = create_tree (tree, exp, CONCAT, 0);
- if (tree == NULL)
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- else if (tree == NULL)
- tree = exp;
- /* Otherwise exp == NULL, we don't need to create new tree. */
- }
- return tree;
-}
-
-/* This function build the following tree, from regular expression a*:
- *
- |
- a
-*/
-
-static bin_tree_t *
-parse_expression (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *tree;
- int new_idx;
- switch (token->type)
- {
- case CHARACTER:
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- while (!re_string_eoi (regexp)
- && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
- {
- bin_tree_t *mbc_remain;
- *token = fetch_token (regexp, syntax);
- new_idx = re_dfa_add_node (dfa, *token, 0);
- mbc_remain = create_tree (NULL, NULL, 0, new_idx);
- tree = create_tree (tree, mbc_remain, CONCAT, 0);
- if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- }
-#endif
- break;
- case OP_OPEN_SUBEXP:
- tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- break;
- case OP_OPEN_BRACKET:
- tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- break;
- case OP_BACK_REF:
- if (BE (preg->re_nsub < token->opr.idx
- || dfa->subexps[token->opr.idx - 1].end == -1, 0))
- {
- *err = REG_ESUBREG;
- return NULL;
- }
- dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- ++dfa->nbackref;
- dfa->has_mb_node = 1;
- break;
- case OP_DUP_ASTERISK:
- case OP_DUP_PLUS:
- case OP_DUP_QUESTION:
- case OP_OPEN_DUP_NUM:
- if (syntax & RE_CONTEXT_INVALID_OPS)
- {
- *err = REG_BADRPT;
- return NULL;
- }
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- {
- *token = fetch_token (regexp, syntax);
- return parse_expression (regexp, preg, token, syntax, nest, err);
- }
- /* else fall through */
- case OP_CLOSE_SUBEXP:
- if ((token->type == OP_CLOSE_SUBEXP) &&
- !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
- {
- *err = REG_ERPAREN;
- return NULL;
- }
- /* else fall through */
- case OP_CLOSE_DUP_NUM:
- /* We treat it as a normal character. */
-
- /* Then we can these characters as normal characters. */
- token->type = CHARACTER;
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- break;
- case ANCHOR:
- if (dfa->word_char == NULL)
- {
- *err = init_word_char (dfa);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
- if (token->opr.ctx_type == WORD_DELIM)
- {
- bin_tree_t *tree_first, *tree_last;
- int idx_first, idx_last;
- token->opr.ctx_type = WORD_FIRST;
- idx_first = re_dfa_add_node (dfa, *token, 0);
- tree_first = create_tree (NULL, NULL, 0, idx_first);
- token->opr.ctx_type = WORD_LAST;
- idx_last = re_dfa_add_node (dfa, *token, 0);
- tree_last = create_tree (NULL, NULL, 0, idx_last);
- token->type = OP_ALT;
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (tree_first, tree_last, 0, new_idx);
- if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
- || tree_first == NULL || tree_last == NULL
- || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- else
- {
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- /* We must return here, since ANCHORs can't be followed
- by repetition operators.
- eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
- it must not be "<ANCHOR(^)><REPEAT(*)>". */
- *token = fetch_token (regexp, syntax);
- return tree;
- case OP_PERIOD:
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- if (MB_CUR_MAX > 1)
- dfa->has_mb_node = 1;
- break;
- case OP_WORD:
- tree = build_word_op (dfa, 0, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- break;
- case OP_NOTWORD:
- tree = build_word_op (dfa, 1, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- break;
- case OP_ALT:
- case END_OF_RE:
- return NULL;
- case BACK_SLASH:
- *err = REG_EESCAPE;
- return NULL;
- default:
- /* Must not happen? */
-#ifdef DEBUG
- assert (0);
-#endif
- return NULL;
- }
- *token = fetch_token (regexp, syntax);
-
- while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
- || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
- {
- tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- dfa->has_plural_match = 1;
- }
-
- return tree;
-}
-
-/* This function build the following tree, from regular expression
- (<reg_exp>):
- SUBEXP
- |
- <reg_exp>
-*/
-
-static bin_tree_t *
-parse_sub_exp (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *tree, *left_par, *right_par;
- size_t cur_nsub;
- int new_idx;
- cur_nsub = preg->re_nsub++;
- if (dfa->subexps_alloc < preg->re_nsub)
- {
- re_subexp_t *new_array;
- dfa->subexps_alloc *= 2;
- new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
- if (BE (new_array == NULL, 0))
- {
- dfa->subexps_alloc /= 2;
- *err = REG_ESPACE;
- return NULL;
- }
- dfa->subexps = new_array;
- }
- dfa->subexps[cur_nsub].start = dfa->nodes_len;
- dfa->subexps[cur_nsub].end = -1;
-
- new_idx = re_dfa_add_node (dfa, *token, 0);
- left_par = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || left_par == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- dfa->nodes[new_idx].opr.idx = cur_nsub;
- *token = fetch_token (regexp, syntax);
-
- /* The subexpression may be a null string. */
- if (token->type == OP_CLOSE_SUBEXP)
- tree = NULL;
- else
- {
- tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
- }
- if (BE (token->type != OP_CLOSE_SUBEXP, 0))
- {
- free_bin_tree (tree);
- *err = REG_BADPAT;
- return NULL;
- }
- new_idx = re_dfa_add_node (dfa, *token, 0);
- dfa->subexps[cur_nsub].end = dfa->nodes_len;
- right_par = create_tree (NULL, NULL, 0, new_idx);
- tree = ((tree == NULL) ? right_par
- : create_tree (tree, right_par, CONCAT, 0));
- tree = create_tree (left_par, tree, CONCAT, 0);
- if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- dfa->nodes[new_idx].opr.idx = cur_nsub;
-
- return tree;
-}
-
-/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
-
-static bin_tree_t *
-parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
- bin_tree_t *dup_elem;
- re_string_t *regexp;
- re_dfa_t *dfa;
- re_token_t *token;
- reg_syntax_t syntax;
- reg_errcode_t *err;
-{
- re_token_t dup_token;
- bin_tree_t *tree = dup_elem, *work_tree;
- int new_idx, start_idx = re_string_cur_idx (regexp);
- re_token_t start_token = *token;
- if (token->type == OP_OPEN_DUP_NUM)
- {
- int i;
- int end = 0;
- int start = fetch_number (regexp, token, syntax);
- bin_tree_t *elem;
- if (start == -1)
- {
- if (token->type == CHARACTER && token->opr.c == ',')
- start = 0; /* We treat "{,m}" as "{0,m}". */
- else
- {
- *err = REG_BADBR; /* <re>{} is invalid. */
- return NULL;
- }
- }
- if (BE (start != -2, 1))
- {
- /* We treat "{n}" as "{n,n}". */
- end = ((token->type == OP_CLOSE_DUP_NUM) ? start
- : ((token->type == CHARACTER && token->opr.c == ',')
- ? fetch_number (regexp, token, syntax) : -2));
- }
- if (BE (start == -2 || end == -2, 0))
- {
- /* Invalid sequence. */
- if (token->type == OP_CLOSE_DUP_NUM)
- goto parse_dup_op_invalid_interval;
- else
- goto parse_dup_op_ebrace;
- }
- if (BE (start == 0 && end == 0, 0))
- {
- /* We treat "<re>{0}" and "<re>{0,0}" as null string. */
- *token = fetch_token (regexp, syntax);
- free_bin_tree (dup_elem);
- return NULL;
- }
-
- /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
- elem = tree;
- for (i = 0; i < start; ++i)
- if (i != 0)
- {
- work_tree = duplicate_tree (elem, dfa);
- tree = create_tree (tree, work_tree, CONCAT, 0);
- if (BE (work_tree == NULL || tree == NULL, 0))
- goto parse_dup_op_espace;
- }
-
- if (end == -1)
- {
- /* We treat "<re>{0,}" as "<re>*". */
- dup_token.type = OP_DUP_ASTERISK;
- if (start > 0)
- {
- elem = duplicate_tree (elem, dfa);
- new_idx = re_dfa_add_node (dfa, dup_token, 0);
- work_tree = create_tree (elem, NULL, 0, new_idx);
- tree = create_tree (tree, work_tree, CONCAT, 0);
- if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
- || tree == NULL, 0))
- goto parse_dup_op_espace;
- }
- else
- {
- new_idx = re_dfa_add_node (dfa, dup_token, 0);
- tree = create_tree (elem, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- goto parse_dup_op_espace;
- }
- }
- else if (end - start > 0)
- {
- /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?". */
- dup_token.type = OP_DUP_QUESTION;
- if (start > 0)
- {
- elem = duplicate_tree (elem, dfa);
- new_idx = re_dfa_add_node (dfa, dup_token, 0);
- elem = create_tree (elem, NULL, 0, new_idx);
- tree = create_tree (tree, elem, CONCAT, 0);
- if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
- goto parse_dup_op_espace;
- }
- else
- {
- new_idx = re_dfa_add_node (dfa, dup_token, 0);
- tree = elem = create_tree (elem, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- goto parse_dup_op_espace;
- }
- for (i = 1; i < end - start; ++i)
- {
- work_tree = duplicate_tree (elem, dfa);
- tree = create_tree (tree, work_tree, CONCAT, 0);
- if (BE (work_tree == NULL || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- }
- }
- else
- {
- new_idx = re_dfa_add_node (dfa, *token, 0);
- tree = create_tree (tree, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- *token = fetch_token (regexp, syntax);
- return tree;
-
- parse_dup_op_espace:
- free_bin_tree (tree);
- *err = REG_ESPACE;
- return NULL;
-
- parse_dup_op_ebrace:
- if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
- {
- *err = REG_EBRACE;
- return NULL;
- }
- goto parse_dup_op_rollback;
- parse_dup_op_invalid_interval:
- if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
- {
- *err = REG_BADBR;
- return NULL;
- }
- parse_dup_op_rollback:
- re_string_set_index (regexp, start_idx);
- *token = start_token;
- token->type = CHARACTER;
- return dup_elem;
-}
-
-/* Size of the names for collating symbol/equivalence_class/character_class.
- I'm not sure, but maybe enough. */
-#define BRACKET_NAME_BUF_SIZE 32
-
-#ifndef _LIBC
- /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument sinse we may
- update it. */
-
-static reg_errcode_t
-# ifdef RE_ENABLE_I18N
-build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
- re_charset_t *mbcset;
- int *range_alloc;
-# else /* not RE_ENABLE_I18N */
-build_range_exp (sbcset, start_elem, end_elem)
-# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- bracket_elem_t *start_elem, *end_elem;
-{
- unsigned int start_ch, end_ch;
- /* Equivalence Classes and Character Classes can't be a range start/end. */
- if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
- 0))
- return REG_ERANGE;
-
- /* We can handle no multi character collating elements without libc
- support. */
- if (BE ((start_elem->type == COLL_SYM
- && strlen ((char *) start_elem->opr.name) > 1)
- || (end_elem->type == COLL_SYM
- && strlen ((char *) end_elem->opr.name) > 1), 0))
- return REG_ECOLLATE;
-
-# ifdef RE_ENABLE_I18N
- {
- wchar_t wc, start_wc, end_wc;
- wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
-
- start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
- end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
- : 0));
- start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
- ? __btowc (start_ch) : start_elem->opr.wch);
- end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
- ? __btowc (end_ch) : end_elem->opr.wch);
- cmp_buf[0] = start_wc;
- cmp_buf[4] = end_wc;
- if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
- return REG_ERANGE;
-
- /* Check the space of the arrays. */
- if (*range_alloc == mbcset->nranges)
- {
- /* There are not enough space, need realloc. */
- wchar_t *new_array_start, *new_array_end;
- int new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, wchar_t,
- new_nranges);
-
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
-
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
-
- mbcset->range_starts[mbcset->nranges] = start_wc;
- mbcset->range_ends[mbcset->nranges++] = end_wc;
-
- /* Build the table for single byte characters. */
- for (wc = 0; wc <= SBC_MAX; ++wc)
- {
- cmp_buf[2] = wc;
- if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
- && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
- bitset_set (sbcset, wc);
- }
- }
-# else /* not RE_ENABLE_I18N */
- {
- unsigned int ch;
- start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
- end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
- : 0));
- if (start_ch > end_ch)
- return REG_ERANGE;
- /* Build the table for single byte characters. */
- for (ch = 0; ch <= SBC_MAX; ++ch)
- if (start_ch <= ch && ch <= end_ch)
- bitset_set (sbcset, ch);
- }
-# endif /* not RE_ENABLE_I18N */
- return REG_NOERROR;
-}
-#endif /* not _LIBC */
-
-#ifndef _LIBC
-/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument since we may update it. */
-
-static reg_errcode_t
-# ifdef RE_ENABLE_I18N
-build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
- re_charset_t *mbcset;
- int *coll_sym_alloc;
-# else /* not RE_ENABLE_I18N */
-build_collating_symbol (sbcset, name)
-# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *name;
-{
- size_t name_len = strlen ((const char *) name);
- if (BE (name_len != 1, 0))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
-}
-#endif /* not _LIBC */
-
-/* This function parse bracket expression like "[abc]", "[a-c]",
- "[[.a-a.]]" etc. */
-
-static bin_tree_t *
-parse_bracket_exp (regexp, dfa, token, syntax, err)
- re_string_t *regexp;
- re_dfa_t *dfa;
- re_token_t *token;
- reg_syntax_t syntax;
- reg_errcode_t *err;
-{
-#ifdef _LIBC
- const unsigned char *collseqmb;
- const char *collseqwc;
- uint32_t nrules;
- int32_t table_size;
- const int32_t *symb_table;
- const unsigned char *extra;
-
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Seek the collating symbol entry correspondings to NAME.
- Return the index of the symbol in the SYMB_TABLE. */
-
- static inline int32_t
- seek_collating_symbol_entry (name, name_len)
- const unsigned char *name;
- size_t name_len;
- {
- int32_t hash = elem_hash ((const char *) name, name_len);
- int32_t elem = hash % table_size;
- int32_t second = hash % (table_size - 2);
- while (symb_table[2 * elem] != 0)
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- /* Compare the length of the name. */
- && name_len == extra[symb_table[2 * elem + 1]]
- /* Compare the name. */
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
- name_len) == 0)
- {
- /* Yep, this is the entry. */
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- return elem;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Look up the collation sequence value of BR_ELEM.
- Return the value if succeeded, UINT_MAX otherwise. */
-
- static inline unsigned int
- lookup_collation_sequence_value (br_elem)
- bracket_elem_t *br_elem;
- {
- if (br_elem->type == SB_CHAR)
- {
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- return collseqmb[br_elem->opr.ch];
- else
- {
- wint_t wc = __btowc (br_elem->opr.ch);
- return collseq_table_lookup (collseqwc, wc);
- }
- }
- else if (br_elem->type == MB_CHAR)
- {
- return collseq_table_lookup (collseqwc, br_elem->opr.wch);
- }
- else if (br_elem->type == COLL_SYM)
- {
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
- if (nrules != 0)
- {
- int32_t elem, idx;
- elem = seek_collating_symbol_entry (br_elem->opr.name,
- sym_name_len);
- if (symb_table[2 * elem] != 0)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- /* Skip the byte sequence of the collating element. */
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the multibyte collation sequence value. */
- idx += sizeof (unsigned int);
- /* Skip the wide char sequence of the collating element. */
- idx += sizeof (unsigned int) *
- (1 + *(unsigned int *) (extra + idx));
- /* Return the collation sequence value. */
- return *(unsigned int *) (extra + idx);
- }
- else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
- {
- /* No valid character. Match it as a single byte
- character. */
- return collseqmb[br_elem->opr.name[0]];
- }
- }
- else if (sym_name_len == 1)
- return collseqmb[br_elem->opr.name[0]];
- }
- return UINT_MAX;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument sinse we may
- update it. */
-
- static inline reg_errcode_t
-# ifdef RE_ENABLE_I18N
- build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
- re_charset_t *mbcset;
- int *range_alloc;
-# else /* not RE_ENABLE_I18N */
- build_range_exp (sbcset, start_elem, end_elem)
-# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- bracket_elem_t *start_elem, *end_elem;
- {
- unsigned int ch;
- uint32_t start_collseq;
- uint32_t end_collseq;
-
-# ifdef RE_ENABLE_I18N
- /* Check the space of the arrays. */
- if (*range_alloc == mbcset->nranges)
- {
- /* There are not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- int new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
-
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
-
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
-# endif /* RE_ENABLE_I18N */
-
- /* Equivalence Classes and Character Classes can't be a range
- start/end. */
- if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
- 0))
- return REG_ERANGE;
-
- start_collseq = lookup_collation_sequence_value (start_elem);
- end_collseq = lookup_collation_sequence_value (end_elem);
- /* Check start/end collation sequence values. */
- if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
- return REG_ECOLLATE;
- if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
- return REG_ERANGE;
-
-# ifdef RE_ENABLE_I18N
- /* Got valid collation sequence values, add them as a new entry. */
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
-# endif /* RE_ENABLE_I18N */
-
- /* Build the table for single byte characters. */
- for (ch = 0; ch <= SBC_MAX; ch++)
- {
- uint32_t ch_collseq;
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- ch_collseq = collseqmb[ch];
- else
- ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch));
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
- bitset_set (sbcset, ch);
- }
- return REG_NOERROR;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument sinse we may update it. */
-
- static inline reg_errcode_t
-# ifdef RE_ENABLE_I18N
- build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
- re_charset_t *mbcset;
- int *coll_sym_alloc;
-# else /* not RE_ENABLE_I18N */
- build_collating_symbol (sbcset, name)
-# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *name;
- {
- int32_t elem, idx;
- size_t name_len = strlen ((const char *) name);
- if (nrules != 0)
- {
- elem = seek_collating_symbol_entry (name, name_len);
- if (symb_table[2 * elem] != 0)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- }
- else if (symb_table[2 * elem] == 0 && name_len == 1)
- {
- /* No valid character, treat it as a normal
- character. */
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- else
- return REG_ECOLLATE;
-
-# ifdef RE_ENABLE_I18N
- /* Got valid collation sequence, add it as a new entry. */
- /* Check the space of the arrays. */
- if (*coll_sym_alloc == mbcset->ncoll_syms)
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
- /* Use realloc since mbcset->coll_syms is NULL
- if *alloc == 0. */
- mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- *coll_sym_alloc);
- if (BE (mbcset->coll_syms == NULL, 0))
- return REG_ESPACE;
- }
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
-# endif /* RE_ENABLE_I18N */
- return REG_NOERROR;
- }
- else
- {
- if (BE (name_len != 1, 0))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- }
- }
-#endif
-
- re_token_t br_token;
- re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
- re_charset_t *mbcset;
- int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
- int equiv_class_alloc = 0, char_class_alloc = 0;
-#else /* not RE_ENABLE_I18N */
- int non_match = 0;
-#endif /* not RE_ENABLE_I18N */
- bin_tree_t *work_tree;
- int token_len, new_idx;
-#ifdef _LIBC
- collseqmb = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
- nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
- if (nrules)
- {
- /*
- if (MB_CUR_MAX > 1)
- */
- collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
- table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
- symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_SYMB_TABLEMB);
- extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_SYMB_EXTRAMB);
- }
-#endif
- sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
-#ifdef RE_ENABLE_I18N
- mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
-#endif /* RE_ENABLE_I18N */
-#ifdef RE_ENABLE_I18N
- if (BE (sbcset == NULL || mbcset == NULL, 0))
-#else
- if (BE (sbcset == NULL, 0))
-#endif /* RE_ENABLE_I18N */
- {
- *err = REG_ESPACE;
- return NULL;
- }
-
- token_len = peek_token_bracket (token, regexp, syntax);
- if (BE (token->type == END_OF_RE, 0))
- {
- *err = REG_BADPAT;
- goto parse_bracket_exp_free_return;
- }
- if (token->type == OP_NON_MATCH_LIST)
- {
-#ifdef RE_ENABLE_I18N
- int i;
- mbcset->non_match = 1;
-#else /* not RE_ENABLE_I18N */
- non_match = 1;
-#endif /* not RE_ENABLE_I18N */
- if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
- bitset_set (sbcset, '\0');
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
- token_len = peek_token_bracket (token, regexp, syntax);
- if (BE (token->type == END_OF_RE, 0))
- {
- *err = REG_BADPAT;
- goto parse_bracket_exp_free_return;
- }
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- for (i = 0; i < SBC_MAX; ++i)
- if (__btowc (i) == WEOF)
- bitset_set (sbcset, i);
-#endif /* RE_ENABLE_I18N */
- }
-
- /* We treat the first ']' as a normal character. */
- if (token->type == OP_CLOSE_BRACKET)
- token->type = CHARACTER;
-
- while (1)
- {
- bracket_elem_t start_elem, end_elem;
- unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
- unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
- reg_errcode_t ret;
- int token_len2 = 0, is_range_exp = 0;
- re_token_t token2;
-
- start_elem.opr.name = start_name_buf;
- ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
- syntax);
- if (BE (ret != REG_NOERROR, 0))
- {
- *err = ret;
- goto parse_bracket_exp_free_return;
- }
-
- token_len = peek_token_bracket (token, regexp, syntax);
- if (BE (token->type == END_OF_RE, 0))
- {
- *err = REG_BADPAT;
- goto parse_bracket_exp_free_return;
- }
- if (token->type == OP_CHARSET_RANGE)
- {
- re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
- token_len2 = peek_token_bracket (&token2, regexp, syntax);
- if (BE (token->type == END_OF_RE, 0))
- {
- *err = REG_BADPAT;
- goto parse_bracket_exp_free_return;
- }
- if (token2.type == OP_CLOSE_BRACKET)
- {
- /* We treat the last '-' as a normal character. */
- re_string_skip_bytes (regexp, -token_len);
- token->type = CHARACTER;
- }
- else
- is_range_exp = 1;
- }
-
- if (is_range_exp == 1)
- {
- end_elem.opr.name = end_name_buf;
- ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
- dfa, syntax);
- if (BE (ret != REG_NOERROR, 0))
- {
- *err = ret;
- goto parse_bracket_exp_free_return;
- }
-
- token_len = peek_token_bracket (token, regexp, syntax);
- if (BE (token->type == END_OF_RE, 0))
- {
- *err = REG_BADPAT;
- goto parse_bracket_exp_free_return;
- }
- *err = build_range_exp (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &range_alloc,
-#endif /* RE_ENABLE_I18N */
- &start_elem, &end_elem);
- if (BE (*err != REG_NOERROR, 0))
- goto parse_bracket_exp_free_return;
- }
- else
- {
- switch (start_elem.type)
- {
- case SB_CHAR:
- bitset_set (sbcset, start_elem.opr.ch);
- break;
-#ifdef RE_ENABLE_I18N
- case MB_CHAR:
- /* Check whether the array has enough space. */
- if (mbchar_alloc == mbcset->nmbchars)
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->nmbchars is 0. */
- mbchar_alloc = 2 * mbcset->nmbchars + 1;
- /* Use realloc since array is NULL if *alloc == 0. */
- mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
- mbchar_alloc);
- if (BE (mbcset->mbchars == NULL, 0))
- goto parse_bracket_exp_espace;
- }
- mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
- break;
-#endif /* RE_ENABLE_I18N */
- case EQUIV_CLASS:
- *err = build_equiv_class (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &equiv_class_alloc,
-#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
- if (BE (*err != REG_NOERROR, 0))
- goto parse_bracket_exp_free_return;
- break;
- case COLL_SYM:
- *err = build_collating_symbol (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &coll_sym_alloc,
-#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
- if (BE (*err != REG_NOERROR, 0))
- goto parse_bracket_exp_free_return;
- break;
- case CHAR_CLASS:
- *err = build_charclass (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &char_class_alloc,
-#endif /* RE_ENABLE_I18N */
- start_elem.opr.name, syntax);
- if (BE (*err != REG_NOERROR, 0))
- goto parse_bracket_exp_free_return;
- break;
- default:
- assert (0);
- break;
- }
- }
- if (token->type == OP_CLOSE_BRACKET)
- break;
- }
-
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
-
- /* If it is non-matching list. */
-#ifdef RE_ENABLE_I18N
- if (mbcset->non_match)
-#else /* not RE_ENABLE_I18N */
- if (non_match)
-#endif /* not RE_ENABLE_I18N */
- bitset_not (sbcset);
-
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- new_idx = re_dfa_add_node (dfa, br_token, 0);
- work_tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
-
-#ifdef RE_ENABLE_I18N
- if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
- || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes
- || mbcset->non_match)))
- {
- re_token_t alt_token;
- bin_tree_t *mbc_tree;
- /* Build a tree for complex bracket. */
- br_token.type = COMPLEX_BRACKET;
- br_token.opr.mbcset = mbcset;
- dfa->has_mb_node = 1;
- new_idx = re_dfa_add_node (dfa, br_token, 0);
- mbc_tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || mbc_tree == NULL, 0))
- goto parse_bracket_exp_espace;
- /* Then join them by ALT node. */
- dfa->has_plural_match = 1;
- alt_token.type = OP_ALT;
- new_idx = re_dfa_add_node (dfa, alt_token, 0);
- work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
- if (BE (new_idx != -1 && mbc_tree != NULL, 1))
- return work_tree;
- }
- else
- {
- free_charset (mbcset);
- return work_tree;
- }
-#else /* not RE_ENABLE_I18N */
- return work_tree;
-#endif /* not RE_ENABLE_I18N */
-
- parse_bracket_exp_espace:
- *err = REG_ESPACE;
- parse_bracket_exp_free_return:
- re_free (sbcset);
-#ifdef RE_ENABLE_I18N
- free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
- return NULL;
-}
-
-/* Parse an element in the bracket expression. */
-
-static reg_errcode_t
-parse_bracket_element (elem, regexp, token, token_len, dfa, syntax)
- bracket_elem_t *elem;
- re_string_t *regexp;
- re_token_t *token;
- int token_len;
- re_dfa_t *dfa;
- reg_syntax_t syntax;
-{
-#ifdef RE_ENABLE_I18N
- int cur_char_size;
- cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
- if (cur_char_size > 1)
- {
- elem->type = MB_CHAR;
- elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
- re_string_skip_bytes (regexp, cur_char_size);
- return REG_NOERROR;
- }
-#endif /* RE_ENABLE_I18N */
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
- if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
- || token->type == OP_OPEN_EQUIV_CLASS)
- return parse_bracket_symbol (elem, regexp, token);
- elem->type = SB_CHAR;
- elem->opr.ch = token->opr.c;
- return REG_NOERROR;
-}
-
-/* Parse a bracket symbol in the bracket expression. Bracket symbols are
- such as [:<character_class>:], [.<collating_element>.], and
- [=<equivalent_class>=]. */
-
-static reg_errcode_t
-parse_bracket_symbol (elem, regexp, token)
- bracket_elem_t *elem;
- re_string_t *regexp;
- re_token_t *token;
-{
- unsigned char ch, delim = token->opr.c;
- int i = 0;
- for (;; ++i)
- {
- if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE)
- return REG_EBRACK;
- if (token->type == OP_OPEN_CHAR_CLASS)
- ch = re_string_fetch_byte_case (regexp);
- else
- ch = re_string_fetch_byte (regexp);
- if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
- break;
- elem->opr.name[i] = ch;
- }
- re_string_skip_bytes (regexp, 1);
- elem->opr.name[i] = '\0';
- switch (token->type)
- {
- case OP_OPEN_COLL_ELEM:
- elem->type = COLL_SYM;
- break;
- case OP_OPEN_EQUIV_CLASS:
- elem->type = EQUIV_CLASS;
- break;
- case OP_OPEN_CHAR_CLASS:
- elem->type = CHAR_CLASS;
- break;
- default:
- break;
- }
- return REG_NOERROR;
-}
-
- /* Helper function for parse_bracket_exp.
- Build the equivalence class which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
- is a pointer argument sinse we may update it. */
-
-static reg_errcode_t
-#ifdef RE_ENABLE_I18N
-build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
- re_charset_t *mbcset;
- int *equiv_class_alloc;
-#else /* not RE_ENABLE_I18N */
-build_equiv_class (sbcset, name)
-#endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *name;
-{
-#if defined _LIBC && defined RE_ENABLE_I18N
- uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
- if (nrules != 0)
- {
- const int32_t *table, *indirect;
- const unsigned char *weights, *extra, *cp;
- unsigned char char_buf[2];
- int32_t idx1, idx2;
- unsigned int ch;
- size_t len;
- /* This #include defines a local function! */
-# include <locale/weight.h>
- /* Calculate the index for equivalence class. */
- cp = name;
- table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_WEIGHTMB);
- extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_EXTRAMB);
- indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_INDIRECTMB);
- idx1 = findidx (&cp);
- if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
- /* This isn't a valid character. */
- return REG_ECOLLATE;
-
- /* Build single byte matcing table for this equivalence class. */
- char_buf[1] = (unsigned char) '\0';
- len = weights[idx1];
- for (ch = 0; ch < SBC_MAX; ++ch)
- {
- char_buf[0] = ch;
- cp = char_buf;
- idx2 = findidx (&cp);
-/*
- idx2 = table[ch];
-*/
- if (idx2 == 0)
- /* This isn't a valid character. */
- continue;
- if (len == weights[idx2])
- {
- int cnt = 0;
- while (cnt <= len &&
- weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
- ++cnt;
-
- if (cnt > len)
- bitset_set (sbcset, ch);
- }
- }
- /* Check whether the array has enough space. */
- if (*equiv_class_alloc == mbcset->nequiv_classes)
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->nequiv_classes is 0. */
- *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
- /* Use realloc since the array is NULL if *alloc == 0. */
- mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
- *equiv_class_alloc);
- if (BE (mbcset->equiv_classes == NULL, 0))
- return REG_ESPACE;
- }
- mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
- }
- else
-#endif /* _LIBC && RE_ENABLE_I18N */
- {
- if (BE (strlen ((const char *) name) != 1, 0))
- return REG_ECOLLATE;
- bitset_set (sbcset, *name);
- }
- return REG_NOERROR;
-}
-
- /* Helper function for parse_bracket_exp.
- Build the character class which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
- is a pointer argument sinse we may update it. */
-
-static reg_errcode_t
-#ifdef RE_ENABLE_I18N
-build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax)
- re_charset_t *mbcset;
- int *char_class_alloc;
-#else /* not RE_ENABLE_I18N */
-build_charclass (sbcset, class_name, syntax)
-#endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *class_name;
- reg_syntax_t syntax;
-{
- int i;
- const char *name = (const char *) class_name;
-
- /* In case of REG_ICASE "upper" and "lower" match the both of
- upper and lower cases. */
- if ((syntax & RE_ICASE)
- && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
- name = "alpha";
-
-#ifdef RE_ENABLE_I18N
- /* Check the space of the arrays. */
- if (*char_class_alloc == mbcset->nchar_classes)
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->nchar_classes is 0. */
- *char_class_alloc = 2 * mbcset->nchar_classes + 1;
- /* Use realloc since array is NULL if *alloc == 0. */
- mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
- *char_class_alloc);
- if (BE (mbcset->char_classes == NULL, 0))
- return REG_ESPACE;
- }
- mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
-#endif /* RE_ENABLE_I18N */
-
-#define BUILD_CHARCLASS_LOOP(ctype_func)\
- for (i = 0; i < SBC_MAX; ++i) \
- { \
- if (ctype_func (i)) \
- bitset_set (sbcset, i); \
- }
-
- if (strcmp (name, "alnum") == 0)
- BUILD_CHARCLASS_LOOP (isalnum)
- else if (strcmp (name, "cntrl") == 0)
- BUILD_CHARCLASS_LOOP (iscntrl)
- else if (strcmp (name, "lower") == 0)
- BUILD_CHARCLASS_LOOP (islower)
- else if (strcmp (name, "space") == 0)
- BUILD_CHARCLASS_LOOP (isspace)
- else if (strcmp (name, "alpha") == 0)
- BUILD_CHARCLASS_LOOP (isalpha)
- else if (strcmp (name, "digit") == 0)
- BUILD_CHARCLASS_LOOP (isdigit)
- else if (strcmp (name, "print") == 0)
- BUILD_CHARCLASS_LOOP (isprint)
- else if (strcmp (name, "upper") == 0)
- BUILD_CHARCLASS_LOOP (isupper)
- else if (strcmp (name, "blank") == 0)
- BUILD_CHARCLASS_LOOP (isblank)
- else if (strcmp (name, "graph") == 0)
- BUILD_CHARCLASS_LOOP (isgraph)
- else if (strcmp (name, "punct") == 0)
- BUILD_CHARCLASS_LOOP (ispunct)
- else if (strcmp (name, "xdigit") == 0)
- BUILD_CHARCLASS_LOOP (isxdigit)
- else
- return REG_ECTYPE;
-
- return REG_NOERROR;
-}
-
-static bin_tree_t *
-build_word_op (dfa, not, err)
- re_dfa_t *dfa;
- int not;
- reg_errcode_t *err;
-{
- re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
- re_charset_t *mbcset;
- int alloc = 0;
-#else /* not RE_ENABLE_I18N */
- int non_match = 0;
-#endif /* not RE_ENABLE_I18N */
- reg_errcode_t ret;
- re_token_t br_token;
- bin_tree_t *tree;
- int new_idx;
-
- sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
-#ifdef RE_ENABLE_I18N
- mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
-#endif /* RE_ENABLE_I18N */
-
-#ifdef RE_ENABLE_I18N
- if (BE (sbcset == NULL || mbcset == NULL, 0))
-#else /* not RE_ENABLE_I18N */
- if (BE (sbcset == NULL, 0))
-#endif /* not RE_ENABLE_I18N */
- {
- *err = REG_ESPACE;
- return NULL;
- }
-
- if (not)
- {
-#ifdef RE_ENABLE_I18N
- int i;
- /*
- if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
- bitset_set(cset->sbcset, '\0');
- */
- mbcset->non_match = 1;
- if (MB_CUR_MAX > 1)
- for (i = 0; i < SBC_MAX; ++i)
- if (__btowc (i) == WEOF)
- bitset_set (sbcset, i);
-#else /* not RE_ENABLE_I18N */
- non_match = 1;
-#endif /* not RE_ENABLE_I18N */
- }
-
- /* We don't care the syntax in this case. */
- ret = build_charclass (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &alloc,
-#endif /* RE_ENABLE_I18N */
- (const unsigned char *) "alpha", 0);
-
- if (BE (ret != REG_NOERROR, 0))
- {
- re_free (sbcset);
-#ifdef RE_ENABLE_I18N
- free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
- *err = ret;
- return NULL;
- }
- /* \w match '_' also. */
- bitset_set (sbcset, '_');
-
- /* If it is non-matching list. */
-#ifdef RE_ENABLE_I18N
- if (mbcset->non_match)
-#else /* not RE_ENABLE_I18N */
- if (non_match)
-#endif /* not RE_ENABLE_I18N */
- bitset_not (sbcset);
-
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- new_idx = re_dfa_add_node (dfa, br_token, 0);
- tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || tree == NULL, 0))
- goto build_word_op_espace;
-
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- re_token_t alt_token;
- bin_tree_t *mbc_tree;
- /* Build a tree for complex bracket. */
- br_token.type = COMPLEX_BRACKET;
- br_token.opr.mbcset = mbcset;
- dfa->has_mb_node = 1;
- new_idx = re_dfa_add_node (dfa, br_token, 0);
- mbc_tree = create_tree (NULL, NULL, 0, new_idx);
- if (BE (new_idx == -1 || mbc_tree == NULL, 0))
- goto build_word_op_espace;
- /* Then join them by ALT node. */
- alt_token.type = OP_ALT;
- new_idx = re_dfa_add_node (dfa, alt_token, 0);
- tree = create_tree (tree, mbc_tree, 0, new_idx);
- if (BE (new_idx != -1 && mbc_tree != NULL, 1))
- return tree;
- }
- else
- {
- free_charset (mbcset);
- return tree;
- }
-#else /* not RE_ENABLE_I18N */
- return tree;
-#endif /* not RE_ENABLE_I18N */
-
- build_word_op_espace:
- re_free (sbcset);
-#ifdef RE_ENABLE_I18N
- free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
- *err = REG_ESPACE;
- return NULL;
-}
-
-/* This is intended for the expressions like "a{1,3}".
- Fetch a number from `input', and return the number.
- Return -1, if the number field is empty like "{,1}".
- Return -2, If an error is occured. */
-
-static int
-fetch_number (input, token, syntax)
- re_string_t *input;
- re_token_t *token;
- reg_syntax_t syntax;
-{
- int num = -1;
- unsigned char c;
- while (1)
- {
- *token = fetch_token (input, syntax);
- c = token->opr.c;
- if (BE (token->type == END_OF_RE, 0))
- return -2;
- if (token->type == OP_CLOSE_DUP_NUM || c == ',')
- break;
- num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
- ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
- num = (num > RE_DUP_MAX) ? -2 : num;
- }
- return num;
-}
-\f
-#ifdef RE_ENABLE_I18N
-static void
-free_charset (re_charset_t *cset)
-{
- re_free (cset->mbchars);
-# ifdef _LIBC
- re_free (cset->coll_syms);
- re_free (cset->equiv_classes);
- re_free (cset->range_starts);
- re_free (cset->range_ends);
-# endif
- re_free (cset->char_classes);
- re_free (cset);
-}
-#endif /* RE_ENABLE_I18N */
-\f
-/* Functions for binary tree operation. */
-
-/* Create a node of tree.
- Note: This function automatically free left and right if malloc fails. */
-
-static bin_tree_t *
-create_tree (left, right, type, index)
- bin_tree_t *left;
- bin_tree_t *right;
- re_token_type_t type;
- int index;
-{
- bin_tree_t *tree;
- tree = re_malloc (bin_tree_t, 1);
- if (BE (tree == NULL, 0))
- {
- free_bin_tree (left);
- free_bin_tree (right);
- return NULL;
- }
- tree->parent = NULL;
- tree->left = left;
- tree->right = right;
- tree->type = type;
- tree->node_idx = index;
- tree->first = -1;
- tree->next = -1;
- re_node_set_init_empty (&tree->eclosure);
-
- if (left != NULL)
- left->parent = tree;
- if (right != NULL)
- right->parent = tree;
- return tree;
-}
-
-/* Free the sub tree pointed by TREE. */
-
-static void
-free_bin_tree (tree)
- bin_tree_t *tree;
-{
- if (tree == NULL)
- return;
- /*re_node_set_free (&tree->eclosure);*/
- free_bin_tree (tree->left);
- free_bin_tree (tree->right);
- re_free (tree);
-}
-
-/* Duplicate the node SRC, and return new node. */
-
-static bin_tree_t *
-duplicate_tree (src, dfa)
- const bin_tree_t *src;
- re_dfa_t *dfa;
-{
- bin_tree_t *left = NULL, *right = NULL, *new_tree;
- int new_node_idx;
- /* Since node indies must be according to Post-order of the tree,
- we must duplicate the left at first. */
- if (src->left != NULL)
- {
- left = duplicate_tree (src->left, dfa);
- if (left == NULL)
- return NULL;
- }
-
- /* Secondaly, duplicate the right. */
- if (src->right != NULL)
- {
- right = duplicate_tree (src->right, dfa);
- if (right == NULL)
- {
- free_bin_tree (left);
- return NULL;
- }
- }
-
- /* At last, duplicate itself. */
- if (src->type == NON_TYPE)
- {
- new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
- dfa->nodes[new_node_idx].duplicated = 1;
- if (BE (new_node_idx == -1, 0))
- {
- free_bin_tree (left);
- free_bin_tree (right);
- return NULL;
- }
- }
- else
- new_node_idx = src->type;
-
- new_tree = create_tree (left, right, src->type, new_node_idx);
- if (BE (new_tree == NULL, 0))
- {
- free_bin_tree (left);
- free_bin_tree (right);
- }
- return new_tree;
-}
+++ /dev/null
-/*
- * Regular Expression Functions from glibc 2.3.2
- * (renamed to sh_* to avoid clashes with the system libraries)
- */
-
-#ifndef _UCW_REGEX_H
-#define _UCW_REGEX_H
-
-#define regfree sh_regfree
-#define regexec sh_regexec
-#define regcomp sh_regcomp
-#define regerror sh_regerror
-#define re_set_registers sh_re_set_registers
-#define re_match_2 sh_re_match2
-#define re_match sh_re_match
-#define re_search sh_re_search
-#define re_compile_pattern sh_re_compile_pattern
-#define re_set_syntax sh_re_set_syntax
-#define re_search_2 sh_re_search_2
-#define re_compile_fastmap sh_re_compile_fastmap
-
-#include "lib/regex/regex.h"
-
-#endif
+++ /dev/null
-/*
- * Regular Expression Functions from glibc 2.3.2
- */
-
-#include <sys/types.h>
-#include "regex-sh.h"
-#include "regex_internal.h"
-#include "regex_internal.c"
-#include "regcomp.c"
-#include "regexec.c"
+++ /dev/null
-/* Definitions for data structures and routines for the regular
- expression library.
- Copyright (C) 1985,1989-93,1995-98,2000,2001,2002
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _REGEX_H
-#define _REGEX_H 1
-
-/* Allow the use in C++ code. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* POSIX says that <sys/types.h> must be included (by the caller) before
- <regex.h>. */
-
-#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
-/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
- should be there. */
-# include <stddef.h>
-#endif
-
-/* The following two types have to be signed and unsigned integer type
- wide enough to hold a value of a pointer. For most ANSI compilers
- ptrdiff_t and size_t should be likely OK. Still size of these two
- types is 2 for Microsoft C. Ugh... */
-typedef long int s_reg_t;
-typedef unsigned long int active_reg_t;
-
-/* The following bits are used to determine the regexp syntax we
- recognize. The set/not-set meanings are chosen so that Emacs syntax
- remains the value 0. The bits are given in alphabetical order, and
- the definitions shifted by one from the previous bit; thus, when we
- add or remove a bit, only one other definition need change. */
-typedef unsigned long int reg_syntax_t;
-
-/* If this bit is not set, then \ inside a bracket expression is literal.
- If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
-
-/* If this bit is not set, then + and ? are operators, and \+ and \? are
- literals.
- If set, then \+ and \? are operators and + and ? are literals. */
-#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
-
-/* If this bit is set, then character classes are supported. They are:
- [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
- [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
- If not set, then character classes are not supported. */
-#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
-
-/* If this bit is set, then ^ and $ are always anchors (outside bracket
- expressions, of course).
- If this bit is not set, then it depends:
- ^ is an anchor if it is at the beginning of a regular
- expression or after an open-group or an alternation operator;
- $ is an anchor if it is at the end of a regular expression, or
- before a close-group or an alternation operator.
-
- This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
- POSIX draft 11.2 says that * etc. in leading positions is undefined.
- We already implemented a previous draft which made those constructs
- invalid, though, so we haven't changed the code back. */
-#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
-
-/* If this bit is set, then special characters are always special
- regardless of where they are in the pattern.
- If this bit is not set, then special characters are special only in
- some contexts; otherwise they are ordinary. Specifically,
- * + ? and intervals are only special when not after the beginning,
- open-group, or alternation operator. */
-#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
-
-/* If this bit is set, then *, +, ?, and { cannot be first in an re or
- immediately after an alternation or begin-group operator. */
-#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
-
-/* If this bit is set, then . matches newline.
- If not set, then it doesn't. */
-#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
-
-/* If this bit is set, then . doesn't match NUL.
- If not set, then it does. */
-#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
-
-/* If this bit is set, nonmatching lists [^...] do not match newline.
- If not set, they do. */
-#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
-
-/* If this bit is set, either \{...\} or {...} defines an
- interval, depending on RE_NO_BK_BRACES.
- If not set, \{, \}, {, and } are literals. */
-#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
-
-/* If this bit is set, +, ? and | aren't recognized as operators.
- If not set, they are. */
-#define RE_LIMITED_OPS (RE_INTERVALS << 1)
-
-/* If this bit is set, newline is an alternation operator.
- If not set, newline is literal. */
-#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
-
-/* If this bit is set, then `{...}' defines an interval, and \{ and \}
- are literals.
- If not set, then `\{...\}' defines an interval. */
-#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
-
-/* If this bit is set, (...) defines a group, and \( and \) are literals.
- If not set, \(...\) defines a group, and ( and ) are literals. */
-#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
-
-/* If this bit is set, then \<digit> matches <digit>.
- If not set, then \<digit> is a back-reference. */
-#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
-
-/* If this bit is set, then | is an alternation operator, and \| is literal.
- If not set, then \| is an alternation operator, and | is literal. */
-#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
-
-/* If this bit is set, then an ending range point collating higher
- than the starting range point, as in [z-a], is invalid.
- If not set, then when ending range point collates higher than the
- starting range point, the range is ignored. */
-#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
-
-/* If this bit is set, then an unmatched ) is ordinary.
- If not set, then an unmatched ) is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
-
-/* If this bit is set, succeed as soon as we match the whole pattern,
- without further backtracking. */
-#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
-
-/* If this bit is set, do not process the GNU regex operators.
- If not set, then the GNU regex operators are recognized. */
-#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
-
-/* If this bit is set, turn on internal regex debugging.
- If not set, and debugging was on, turn it off.
- This only works if regex.c is compiled -DDEBUG.
- We define this bit always, so that all that's needed to turn on
- debugging is to recompile regex.c; the calling code can always have
- this bit set, and it won't affect anything in the normal case. */
-#define RE_DEBUG (RE_NO_GNU_OPS << 1)
-
-/* If this bit is set, a syntactically invalid interval is treated as
- a string of ordinary characters. For example, the ERE 'a{1' is
- treated as 'a\{1'. */
-#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
-
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
-
-/* This global variable defines the particular regexp syntax to use (for
- some interfaces). When a regexp is compiled, the syntax used is
- stored in the pattern buffer, so changing this does not affect
- already-compiled regexps. */
-extern reg_syntax_t re_syntax_options;
-\f
-/* Define combinations of the above bits for the standard possibilities.
- (The [[[ comments delimit what gets put into the Texinfo file, so
- don't delete them!) */
-/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS 0
-
-#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
- | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
-
-#define RE_SYNTAX_GNU_AWK \
- ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
- & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
- | RE_CONTEXT_INVALID_OPS ))
-
-#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
- | RE_INTERVALS | RE_NO_GNU_OPS)
-
-#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
- | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
-
-#define RE_SYNTAX_EGREP \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
-#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
- | RE_INVALID_INTERVAL_ORD)
-
-/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
-#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
-
-#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
-
-/* Syntax bits common to both basic and extended POSIX regex syntax. */
-#define _RE_SYNTAX_POSIX_COMMON \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
-
-#define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
-
-/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
- RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
- isn't minimal, since other operators, such as \`, aren't disabled. */
-#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
-
-#define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
- removed and RE_NO_BK_REFS is added. */
-#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
-/* [[[end syntaxes]]] */
-\f
-/* Maximum number of duplicates an interval can allow. Some systems
- (erroneously) define this in other header files, but we want our
- value, so remove any previous define. */
-#ifdef RE_DUP_MAX
-# undef RE_DUP_MAX
-#endif
-/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
-#define RE_DUP_MAX (0x7fff)
-
-
-/* POSIX `cflags' bits (i.e., information for `regcomp'). */
-
-/* If this bit is set, then use extended regular expression syntax.
- If not set, then use basic regular expression syntax. */
-#define REG_EXTENDED 1
-
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define REG_ICASE (REG_EXTENDED << 1)
-
-/* If this bit is set, then anchors do not match at newline
- characters in the string.
- If not set, then anchors do match at newlines. */
-#define REG_NEWLINE (REG_ICASE << 1)
-
-/* If this bit is set, then report only success or fail in regexec.
- If not set, then returns differ between not matching and errors. */
-#define REG_NOSUB (REG_NEWLINE << 1)
-
-
-/* POSIX `eflags' bits (i.e., information for regexec). */
-
-/* If this bit is set, then the beginning-of-line operator doesn't match
- the beginning of the string (presumably because it's not the
- beginning of a line).
- If not set, then the beginning-of-line operator does match the
- beginning of the string. */
-#define REG_NOTBOL 1
-
-/* Like REG_NOTBOL, except for the end-of-line. */
-#define REG_NOTEOL (1 << 1)
-
-
-/* If any error codes are removed, changed, or added, update the
- `re_error_msg' table in regex.c. */
-typedef enum
-{
-#ifdef _XOPEN_SOURCE
- REG_ENOSYS = -1, /* This will never happen for this implementation. */
-#endif
-
- REG_NOERROR = 0, /* Success. */
- REG_NOMATCH, /* Didn't find a match (for regexec). */
-
- /* POSIX regcomp return error codes. (In the order listed in the
- standard.) */
- REG_BADPAT, /* Invalid pattern. */
- REG_ECOLLATE, /* Not implemented. */
- REG_ECTYPE, /* Invalid character class name. */
- REG_EESCAPE, /* Trailing backslash. */
- REG_ESUBREG, /* Invalid back reference. */
- REG_EBRACK, /* Unmatched left bracket. */
- REG_EPAREN, /* Parenthesis imbalance. */
- REG_EBRACE, /* Unmatched \{. */
- REG_BADBR, /* Invalid contents of \{\}. */
- REG_ERANGE, /* Invalid range end. */
- REG_ESPACE, /* Ran out of memory. */
- REG_BADRPT, /* No preceding re for repetition op. */
-
- /* Error codes we've added. */
- REG_EEND, /* Premature end. */
- REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
- REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
-} reg_errcode_t;
-\f
-/* This data structure represents a compiled pattern. Before calling
- the pattern compiler, the fields `buffer', `allocated', `fastmap',
- `translate', and `no_sub' can be set. After the pattern has been
- compiled, the `re_nsub' field is available. All other fields are
- private to the regex routines. */
-
-#ifndef RE_TRANSLATE_TYPE
-# define RE_TRANSLATE_TYPE char *
-#endif
-
-struct re_pattern_buffer
-{
-/* [[[begin pattern_buffer]]] */
- /* Space that holds the compiled pattern. It is declared as
- `unsigned char *' because its elements are
- sometimes used as array indexes. */
- unsigned char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- unsigned long int allocated;
-
- /* Number of bytes actually used in `buffer'. */
- unsigned long int used;
-
- /* Syntax setting with which the pattern was compiled. */
- reg_syntax_t syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- RE_TRANSLATE_TYPE translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Zero if this pattern cannot match the empty string, one else.
- Well, in truth it's used only in `re_search_2', to see
- whether or not we should use the fastmap, so we don't set
- this absolutely perfectly; see `re_compile_fastmap' (the
- `duplicate' case). */
- unsigned can_be_null : 1;
-
- /* If REGS_UNALLOCATED, allocate space in the `regs' structure
- for `max (RE_NREGS, re_nsub + 1)' groups.
- If REGS_REALLOCATE, reallocate space if necessary.
- If REGS_FIXED, use what's there. */
-#define REGS_UNALLOCATED 0
-#define REGS_REALLOCATE 1
-#define REGS_FIXED 2
- unsigned regs_allocated : 2;
-
- /* Set to zero when `regex_compile' compiles a pattern; set to one
- by `re_compile_fastmap' if it updates the fastmap. */
- unsigned fastmap_accurate : 1;
-
- /* If set, `re_match_2' does not return information about
- subexpressions. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor doesn't match at the
- beginning of the string. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If true, an anchor at a newline matches. */
- unsigned newline_anchor : 1;
-
-/* [[[end pattern_buffer]]] */
-};
-
-typedef struct re_pattern_buffer regex_t;
-\f
-/* Type for byte offsets within the string. POSIX mandates this. */
-typedef int regoff_t;
-
-
-/* This is the structure we store register match data in. See
- regex.texinfo for a full description of what registers match. */
-struct re_registers
-{
- unsigned num_regs;
- regoff_t *start;
- regoff_t *end;
-};
-
-
-/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
- `re_match_2' returns information about at least this many registers
- the first time a `regs' structure is passed. */
-#ifndef RE_NREGS
-# define RE_NREGS 30
-#endif
-
-
-/* POSIX specification for registers. Aside from the different names than
- `re_registers', POSIX uses an array of structures, instead of a
- structure of arrays. */
-typedef struct
-{
- regoff_t rm_so; /* Byte offset from string's start to substring's start. */
- regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
-} regmatch_t;
-\f
-/* Declarations for routines. */
-
-/* To avoid duplicating every routine declaration -- once with a
- prototype (if we are ANSI), and once without (if we aren't) -- we
- use the following macro to declare argument types. This
- unfortunately clutters up the declarations a bit, but I think it's
- worth it. */
-
-#if __STDC__
-
-# define _RE_ARGS(args) args
-
-#else /* not __STDC__ */
-
-# define _RE_ARGS(args) ()
-
-#endif /* not __STDC__ */
-
-/* Sets the current default syntax to SYNTAX, and return the old syntax.
- You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
-
-/* Compile the regular expression PATTERN, with length LENGTH
- and syntax given by the global `re_syntax_options', into the buffer
- BUFFER. Return NULL if successful, and an error string if not. */
-extern const char *re_compile_pattern
- _RE_ARGS ((const char *pattern, size_t length,
- struct re_pattern_buffer *buffer));
-
-
-/* Compile a fastmap for the compiled pattern in BUFFER; used to
- accelerate searches. Return 0 if successful and -2 if was an
- internal error. */
-extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
-
-
-/* Search in the string STRING (with length LENGTH) for the pattern
- compiled into BUFFER. Start searching at position START, for RANGE
- characters. Return the starting position of the match, -1 for no
- match, or -2 for an internal error. Also return register
- information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, int range, struct re_registers *regs));
-
-
-/* Like `re_search', but search in the concatenation of STRING1 and
- STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, int range, struct re_registers *regs, int stop));
-
-
-/* Like `re_search', but return how many characters in STRING the regexp
- in BUFFER matched, starting at position START. */
-extern int re_match
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, struct re_registers *regs));
-
-
-/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, struct re_registers *regs, int stop));
-
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using BUFFER and REGS will use this memory
- for recording register information. STARTS and ENDS must be
- allocated with malloc, and must each be at least `NUM_REGS * sizeof
- (regoff_t)' bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-extern void re_set_registers
- _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
- unsigned num_regs, regoff_t *starts, regoff_t *ends));
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-# ifndef _CRAY
-/* 4.2 bsd compatibility. */
-extern char *re_comp _RE_ARGS ((const char *));
-extern int re_exec _RE_ARGS ((const char *));
-# endif
-#endif
-
-/* GCC 2.95 and later have "__restrict"; C99 compilers have
- "restrict", and "configure" may have defined "restrict". */
-#ifndef __restrict
-# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
-# if defined restrict || 199901L <= __STDC_VERSION__
-# define __restrict restrict
-# else
-# define __restrict
-# endif
-# endif
-#endif
-/* gcc 3.1 and up support the [restrict] syntax. */
-#ifndef __restrict_arr
-# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
-# define __restrict_arr __restrict
-# else
-# define __restrict_arr
-# endif
-#endif
-
-/* POSIX compatibility. */
-extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
- const char *__restrict __pattern,
- int __cflags));
-
-extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
- const char *__restrict __string, size_t __nmatch,
- regmatch_t __pmatch[__restrict_arr],
- int __eflags));
-
-extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
- char *__errbuf, size_t __errbuf_size));
-
-extern void regfree _RE_ARGS ((regex_t *__preg));
-
-
-#ifdef __cplusplus
-}
-#endif /* C++ */
-
-#endif /* regex.h */
-\f
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
+++ /dev/null
-/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-static void re_string_construct_common (const char *str, int len,
- re_string_t *pstr,
- RE_TRANSLATE_TYPE trans, int icase);
-#ifdef RE_ENABLE_I18N
-static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
- wint_t *last_wc);
-#endif /* RE_ENABLE_I18N */
-static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
- const re_node_set *nodes,
- unsigned int hash);
-static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
- unsigned int hash);
-static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
- const re_node_set *nodes,
- unsigned int hash);
-static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
- const re_node_set *nodes,
- unsigned int context,
- unsigned int hash);
-static unsigned int inline calc_state_hash (const re_node_set *nodes,
- unsigned int context);
-\f
-/* Functions for string operation. */
-
-/* This function allocate the buffers. It is necessary to call
- re_string_reconstruct before using the object. */
-
-static reg_errcode_t
-re_string_allocate (pstr, str, len, init_len, trans, icase)
- re_string_t *pstr;
- const char *str;
- int len, init_len, icase;
- RE_TRANSLATE_TYPE trans;
-{
- reg_errcode_t ret;
- int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
- re_string_construct_common (str, len, pstr, trans, icase);
- pstr->stop = pstr->len;
-
- ret = re_string_realloc_buffers (pstr, init_buf_len);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
-
- pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
- : (unsigned char *) str);
- pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
- pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
- || MB_CUR_MAX > 1) ? pstr->valid_len : len;
- return REG_NOERROR;
-}
-
-/* This function allocate the buffers, and initialize them. */
-
-static reg_errcode_t
-re_string_construct (pstr, str, len, trans, icase)
- re_string_t *pstr;
- const char *str;
- int len, icase;
- RE_TRANSLATE_TYPE trans;
-{
- reg_errcode_t ret;
- re_string_construct_common (str, len, pstr, trans, icase);
- pstr->stop = pstr->len;
- /* Set 0 so that this function can initialize whole buffers. */
- pstr->valid_len = 0;
-
- if (len > 0)
- {
- ret = re_string_realloc_buffers (pstr, len + 1);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- }
- pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
- : (unsigned char *) str);
- pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
-
- if (icase)
- {
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- build_wcs_upper_buffer (pstr);
- else
-#endif /* RE_ENABLE_I18N */
- build_upper_buffer (pstr);
- }
- else
- {
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- build_wcs_buffer (pstr);
- else
-#endif /* RE_ENABLE_I18N */
- {
- if (trans != NULL)
- re_string_translate_buffer (pstr);
- else
- pstr->valid_len = len;
- }
- }
-
- /* Initialized whole buffers, then valid_len == bufs_len. */
- pstr->valid_len = pstr->bufs_len;
- return REG_NOERROR;
-}
-
-/* Helper functions for re_string_allocate, and re_string_construct. */
-
-static reg_errcode_t
-re_string_realloc_buffers (pstr, new_buf_len)
- re_string_t *pstr;
- int new_buf_len;
-{
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- pstr->wcs = new_array;
- }
-#endif /* RE_ENABLE_I18N */
- if (MBS_ALLOCATED (pstr))
- {
- unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
- new_buf_len);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- pstr->mbs = new_array;
- }
- if (MBS_CASE_ALLOCATED (pstr))
- {
- unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
- new_buf_len);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- pstr->mbs_case = new_array;
- if (!MBS_ALLOCATED (pstr))
- pstr->mbs = pstr->mbs_case;
- }
- pstr->bufs_len = new_buf_len;
- return REG_NOERROR;
-}
-
-
-static void
-re_string_construct_common (str, len, pstr, trans, icase)
- const char *str;
- int len;
- re_string_t *pstr;
- RE_TRANSLATE_TYPE trans;
- int icase;
-{
- memset (pstr, '\0', sizeof (re_string_t));
- pstr->raw_mbs = (const unsigned char *) str;
- pstr->len = len;
- pstr->trans = trans;
- pstr->icase = icase ? 1 : 0;
-}
-
-#ifdef RE_ENABLE_I18N
-
-/* Build wide character buffer PSTR->WCS.
- If the byte sequence of the string are:
- <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
- Then wide character buffer will be:
- <wc1> , WEOF , <wc2> , WEOF , <wc3>
- We use WEOF for padding, they indicate that the position isn't
- a first byte of a multibyte character.
-
- Note that this function assumes PSTR->VALID_LEN elements are already
- built and starts from PSTR->VALID_LEN. */
-
-static void
-build_wcs_buffer (pstr)
- re_string_t *pstr;
-{
- mbstate_t prev_st;
- int byte_idx, end_idx, mbclen, remain_len;
- /* Build the buffers from pstr->valid_len to either pstr->len or
- pstr->bufs_len. */
- end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
- for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
- {
- wchar_t wc;
- remain_len = end_idx - byte_idx;
- prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
- + byte_idx), remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2, 0))
- {
- /* The buffer doesn't have enough space, finish to build. */
- pstr->cur_state = prev_st;
- break;
- }
- else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
- {
- /* We treat these cases as a singlebyte character. */
- mbclen = 1;
- wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
- pstr->cur_state = prev_st;
- }
-
- /* Apply the translateion if we need. */
- if (pstr->trans != NULL && mbclen == 1)
- {
- int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
- pstr->mbs_case[byte_idx] = ch;
- }
- /* Write wide character and padding. */
- pstr->wcs[byte_idx++] = wc;
- /* Write paddings. */
- for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
- pstr->wcs[byte_idx++] = WEOF;
- }
- pstr->valid_len = byte_idx;
-}
-
-/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
- but for REG_ICASE. */
-
-static void
-build_wcs_upper_buffer (pstr)
- re_string_t *pstr;
-{
- mbstate_t prev_st;
- int byte_idx, end_idx, mbclen, remain_len;
- /* Build the buffers from pstr->valid_len to either pstr->len or
- pstr->bufs_len. */
- end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
- for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
- {
- wchar_t wc;
- remain_len = end_idx - byte_idx;
- prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
- + byte_idx), remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2, 0))
- {
- /* The buffer doesn't have enough space, finish to build. */
- pstr->cur_state = prev_st;
- break;
- }
- else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
- {
- /* In case of a singlebyte character. */
- int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
- /* Apply the translateion if we need. */
- if (pstr->trans != NULL && mbclen == 1)
- {
- ch = pstr->trans[ch];
- pstr->mbs_case[byte_idx] = ch;
- }
- pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
- pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
- if (BE (mbclen == (size_t) -1, 0))
- pstr->cur_state = prev_st;
- }
- else /* mbclen > 1 */
- {
- if (iswlower (wc))
- wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
- else
- memcpy (pstr->mbs + byte_idx,
- pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
- pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
- /* Write paddings. */
- for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
- pstr->wcs[byte_idx++] = WEOF;
- }
- }
- pstr->valid_len = byte_idx;
-}
-
-/* Skip characters until the index becomes greater than NEW_RAW_IDX.
- Return the index. */
-
-static int
-re_string_skip_chars (pstr, new_raw_idx, last_wc)
- re_string_t *pstr;
- int new_raw_idx;
- wint_t *last_wc;
-{
- mbstate_t prev_st;
- int rawbuf_idx, mbclen;
- wchar_t wc = 0;
-
- /* Skip the characters which are not necessary to check. */
- for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
- rawbuf_idx < new_raw_idx;)
- {
- int remain_len;
- remain_len = pstr->len - rawbuf_idx;
- prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
- remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
- {
- /* We treat these cases as a singlebyte character. */
- mbclen = 1;
- pstr->cur_state = prev_st;
- }
- /* Then proceed the next character. */
- rawbuf_idx += mbclen;
- }
- *last_wc = (wint_t) wc;
- return rawbuf_idx;
-}
-#endif /* RE_ENABLE_I18N */
-
-/* Build the buffer PSTR->MBS, and apply the translation if we need.
- This function is used in case of REG_ICASE. */
-
-static void
-build_upper_buffer (pstr)
- re_string_t *pstr;
-{
- int char_idx, end_idx;
- end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
-
- for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
- {
- int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
- if (pstr->trans != NULL)
- {
- ch = pstr->trans[ch];
- pstr->mbs_case[char_idx] = ch;
- }
- if (islower (ch))
- pstr->mbs[char_idx] = toupper (ch);
- else
- pstr->mbs[char_idx] = ch;
- }
- pstr->valid_len = char_idx;
-}
-
-/* Apply TRANS to the buffer in PSTR. */
-
-static void
-re_string_translate_buffer (pstr)
- re_string_t *pstr;
-{
- int buf_idx, end_idx;
- end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
-
- for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
- {
- int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
- pstr->mbs_case[buf_idx] = pstr->trans[ch];
- }
-
- pstr->valid_len = buf_idx;
-}
-
-/* This function re-construct the buffers.
- Concretely, convert to wide character in case of MB_CUR_MAX > 1,
- convert to upper case in case of REG_ICASE, apply translation. */
-
-static reg_errcode_t
-re_string_reconstruct (pstr, idx, eflags, newline)
- re_string_t *pstr;
- int idx, eflags, newline;
-{
- int offset = idx - pstr->raw_mbs_idx;
- if (offset < 0)
- {
- /* Reset buffer. */
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
-#endif /* RE_ENABLE_I18N */
- pstr->len += pstr->raw_mbs_idx;
- pstr->stop += pstr->raw_mbs_idx;
- pstr->valid_len = pstr->raw_mbs_idx = 0;
- pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
- : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
- if (!MBS_CASE_ALLOCATED (pstr))
- pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
- if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
- pstr->mbs = (unsigned char *) pstr->raw_mbs;
- offset = idx;
- }
-
- if (offset != 0)
- {
- /* Are the characters which are already checked remain? */
- if (offset < pstr->valid_len)
- {
- /* Yes, move them to the front of the buffer. */
- pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
- newline);
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- memmove (pstr->wcs, pstr->wcs + offset,
- (pstr->valid_len - offset) * sizeof (wint_t));
-#endif /* RE_ENABLE_I18N */
- if (MBS_ALLOCATED (pstr))
- memmove (pstr->mbs, pstr->mbs + offset,
- pstr->valid_len - offset);
- if (MBS_CASE_ALLOCATED (pstr))
- memmove (pstr->mbs_case, pstr->mbs_case + offset,
- pstr->valid_len - offset);
- pstr->valid_len -= offset;
-#if DEBUG
- assert (pstr->valid_len > 0);
-#endif
- }
- else
- {
- /* No, skip all characters until IDX. */
- pstr->valid_len = 0;
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- int wcs_idx;
- wint_t wc;
- pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
- for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
- pstr->wcs[wcs_idx] = WEOF;
- if (pstr->trans && wc <= 0xff)
- wc = pstr->trans[wc];
- pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
- : ((newline && IS_WIDE_NEWLINE (wc))
- ? CONTEXT_NEWLINE : 0));
- }
- else
-#endif /* RE_ENABLE_I18N */
- {
- int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
- if (pstr->trans)
- c = pstr->trans[c];
- pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
- : ((newline && IS_NEWLINE (c))
- ? CONTEXT_NEWLINE : 0));
- }
- }
- if (!MBS_CASE_ALLOCATED (pstr))
- {
- pstr->mbs_case += offset;
- /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
- if (!MBS_ALLOCATED (pstr))
- pstr->mbs += offset;
- }
- }
- pstr->raw_mbs_idx = idx;
- pstr->len -= offset;
- pstr->stop -= offset;
-
- /* Then build the buffers. */
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- if (pstr->icase)
- build_wcs_upper_buffer (pstr);
- else
- build_wcs_buffer (pstr);
- }
- else
-#endif /* RE_ENABLE_I18N */
- {
- if (pstr->icase)
- build_upper_buffer (pstr);
- else if (pstr->trans != NULL)
- re_string_translate_buffer (pstr);
- }
- pstr->cur_idx = 0;
-
- return REG_NOERROR;
-}
-
-static void
-re_string_destruct (pstr)
- re_string_t *pstr;
-{
-#ifdef RE_ENABLE_I18N
- re_free (pstr->wcs);
-#endif /* RE_ENABLE_I18N */
- if (MBS_ALLOCATED (pstr))
- re_free (pstr->mbs);
- if (MBS_CASE_ALLOCATED (pstr))
- re_free (pstr->mbs_case);
-}
-
-/* Return the context at IDX in INPUT. */
-
-static unsigned int
-re_string_context_at (input, idx, eflags, newline_anchor)
- const re_string_t *input;
- int idx, eflags, newline_anchor;
-{
- int c;
- if (idx < 0 || idx == input->len)
- {
- if (idx < 0)
- /* In this case, we use the value stored in input->tip_context,
- since we can't know the character in input->mbs[-1] here. */
- return input->tip_context;
- else /* (idx == input->len) */
- return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
- : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
- }
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- {
- wint_t wc;
- int wc_idx = idx;
- while(input->wcs[wc_idx] == WEOF)
- {
-#ifdef DEBUG
- /* It must not happen. */
- assert (wc_idx >= 0);
-#endif
- --wc_idx;
- if (wc_idx < 0)
- return input->tip_context;
- }
- wc = input->wcs[wc_idx];
- if (IS_WIDE_WORD_CHAR (wc))
- return CONTEXT_WORD;
- return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
- }
- else
-#endif
- {
- c = re_string_byte_at (input, idx);
- if (IS_WORD_CHAR (c))
- return CONTEXT_WORD;
- return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
- }
-}
-\f
-/* Functions for set operation. */
-
-static reg_errcode_t
-re_node_set_alloc (set, size)
- re_node_set *set;
- int size;
-{
- set->alloc = size;
- set->nelem = 0;
- set->elems = re_malloc (int, size);
- if (BE (set->elems == NULL, 0))
- return REG_ESPACE;
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-re_node_set_init_1 (set, elem)
- re_node_set *set;
- int elem;
-{
- set->alloc = 1;
- set->nelem = 1;
- set->elems = re_malloc (int, 1);
- if (BE (set->elems == NULL, 0))
- {
- set->alloc = set->nelem = 0;
- return REG_ESPACE;
- }
- set->elems[0] = elem;
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-re_node_set_init_2 (set, elem1, elem2)
- re_node_set *set;
- int elem1, elem2;
-{
- set->alloc = 2;
- set->elems = re_malloc (int, 2);
- if (BE (set->elems == NULL, 0))
- return REG_ESPACE;
- if (elem1 == elem2)
- {
- set->nelem = 1;
- set->elems[0] = elem1;
- }
- else
- {
- set->nelem = 2;
- if (elem1 < elem2)
- {
- set->elems[0] = elem1;
- set->elems[1] = elem2;
- }
- else
- {
- set->elems[0] = elem2;
- set->elems[1] = elem1;
- }
- }
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-re_node_set_init_copy (dest, src)
- re_node_set *dest;
- const re_node_set *src;
-{
- dest->nelem = src->nelem;
- if (src->nelem > 0)
- {
- dest->alloc = dest->nelem;
- dest->elems = re_malloc (int, dest->alloc);
- if (BE (dest->elems == NULL, 0))
- {
- dest->alloc = dest->nelem = 0;
- return REG_ESPACE;
- }
- memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
- }
- else
- re_node_set_init_empty (dest);
- return REG_NOERROR;
-}
-
-/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
- DEST. Return value indicate the error code or REG_NOERROR if succeeded.
- Note: We assume dest->elems is NULL, when dest->alloc is 0. */
-
-static reg_errcode_t
-re_node_set_add_intersect (dest, src1, src2)
- re_node_set *dest;
- const re_node_set *src1, *src2;
-{
- int i1, i2, id;
- if (src1->nelem > 0 && src2->nelem > 0)
- {
- if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
- {
- dest->alloc = src1->nelem + src2->nelem + dest->nelem;
- dest->elems = re_realloc (dest->elems, int, dest->alloc);
- if (BE (dest->elems == NULL, 0))
- return REG_ESPACE;
- }
- }
- else
- return REG_NOERROR;
-
- for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
- {
- if (src1->elems[i1] > src2->elems[i2])
- {
- ++i2;
- continue;
- }
- if (src1->elems[i1] == src2->elems[i2])
- {
- while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
- ++id;
- if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
- ++id;
- else
- {
- memmove (dest->elems + id + 1, dest->elems + id,
- sizeof (int) * (dest->nelem - id));
- dest->elems[id++] = src2->elems[i2++];
- ++dest->nelem;
- }
- }
- ++i1;
- }
- return REG_NOERROR;
-}
-
-/* Calculate the union set of the sets SRC1 and SRC2. And store it to
- DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
-
-static reg_errcode_t
-re_node_set_init_union (dest, src1, src2)
- re_node_set *dest;
- const re_node_set *src1, *src2;
-{
- int i1, i2, id;
- if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
- {
- dest->alloc = src1->nelem + src2->nelem;
- dest->elems = re_malloc (int, dest->alloc);
- if (BE (dest->elems == NULL, 0))
- return REG_ESPACE;
- }
- else
- {
- if (src1 != NULL && src1->nelem > 0)
- return re_node_set_init_copy (dest, src1);
- else if (src2 != NULL && src2->nelem > 0)
- return re_node_set_init_copy (dest, src2);
- else
- re_node_set_init_empty (dest);
- return REG_NOERROR;
- }
- for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
- {
- if (src1->elems[i1] > src2->elems[i2])
- {
- dest->elems[id++] = src2->elems[i2++];
- continue;
- }
- if (src1->elems[i1] == src2->elems[i2])
- ++i2;
- dest->elems[id++] = src1->elems[i1++];
- }
- if (i1 < src1->nelem)
- {
- memcpy (dest->elems + id, src1->elems + i1,
- (src1->nelem - i1) * sizeof (int));
- id += src1->nelem - i1;
- }
- else if (i2 < src2->nelem)
- {
- memcpy (dest->elems + id, src2->elems + i2,
- (src2->nelem - i2) * sizeof (int));
- id += src2->nelem - i2;
- }
- dest->nelem = id;
- return REG_NOERROR;
-}
-
-/* Calculate the union set of the sets DEST and SRC. And store it to
- DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
-
-static reg_errcode_t
-re_node_set_merge (dest, src)
- re_node_set *dest;
- const re_node_set *src;
-{
- int si, di;
- if (src == NULL || src->nelem == 0)
- return REG_NOERROR;
- if (dest->alloc < src->nelem + dest->nelem)
- {
- int *new_buffer;
- dest->alloc = 2 * (src->nelem + dest->alloc);
- new_buffer = re_realloc (dest->elems, int, dest->alloc);
- if (BE (new_buffer == NULL, 0))
- return REG_ESPACE;
- dest->elems = new_buffer;
- }
-
- for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
- {
- int cp_from, ncp, mid, right, src_elem = src->elems[si];
- /* Binary search the spot we will add the new element. */
- right = dest->nelem;
- while (di < right)
- {
- mid = (di + right) / 2;
- if (dest->elems[mid] < src_elem)
- di = mid + 1;
- else
- right = mid;
- }
- if (di >= dest->nelem)
- break;
-
- if (dest->elems[di] == src_elem)
- {
- /* Skip since, DEST already has the element. */
- ++di;
- ++si;
- continue;
- }
-
- /* Skip the src elements which are less than dest->elems[di]. */
- cp_from = si;
- while (si < src->nelem && src->elems[si] < dest->elems[di])
- ++si;
- /* Copy these src elements. */
- ncp = si - cp_from;
- memmove (dest->elems + di + ncp, dest->elems + di,
- sizeof (int) * (dest->nelem - di));
- memcpy (dest->elems + di, src->elems + cp_from,
- sizeof (int) * ncp);
- /* Update counters. */
- di += ncp;
- dest->nelem += ncp;
- }
-
- /* Copy remaining src elements. */
- if (si < src->nelem)
- {
- memcpy (dest->elems + di, src->elems + si,
- sizeof (int) * (src->nelem - si));
- dest->nelem += src->nelem - si;
- }
- return REG_NOERROR;
-}
-
-/* Insert the new element ELEM to the re_node_set* SET.
- return 0 if SET already has ELEM,
- return -1 if an error is occured, return 1 otherwise. */
-
-static int
-re_node_set_insert (set, elem)
- re_node_set *set;
- int elem;
-{
- int idx, right, mid;
- /* In case of the set is empty. */
- if (set->elems == NULL || set->alloc == 0)
- {
- if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
- return 1;
- else
- return -1;
- }
-
- /* Binary search the spot we will add the new element. */
- idx = 0;
- right = set->nelem;
- while (idx < right)
- {
- mid = (idx + right) / 2;
- if (set->elems[mid] < elem)
- idx = mid + 1;
- else
- right = mid;
- }
-
- /* Realloc if we need. */
- if (set->alloc < set->nelem + 1)
- {
- int *new_array;
- set->alloc = set->alloc * 2;
- new_array = re_malloc (int, set->alloc);
- if (BE (new_array == NULL, 0))
- return -1;
- /* Copy the elements they are followed by the new element. */
- if (idx > 0)
- memcpy (new_array, set->elems, sizeof (int) * (idx));
- /* Copy the elements which follows the new element. */
- if (set->nelem - idx > 0)
- memcpy (new_array + idx + 1, set->elems + idx,
- sizeof (int) * (set->nelem - idx));
- re_free (set->elems);
- set->elems = new_array;
- }
- else
- {
- /* Move the elements which follows the new element. */
- if (set->nelem - idx > 0)
- memmove (set->elems + idx + 1, set->elems + idx,
- sizeof (int) * (set->nelem - idx));
- }
- /* Insert the new element. */
- set->elems[idx] = elem;
- ++set->nelem;
- return 1;
-}
-
-/* Compare two node sets SET1 and SET2.
- return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
-
-static int
-re_node_set_compare (set1, set2)
- const re_node_set *set1, *set2;
-{
- int i;
- if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
- return 0;
- for (i = 0 ; i < set1->nelem ; i++)
- if (set1->elems[i] != set2->elems[i])
- return 0;
- return 1;
-}
-
-/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
-
-static int
-re_node_set_contains (set, elem)
- const re_node_set *set;
- int elem;
-{
- int idx, right, mid;
- if (set->nelem <= 0)
- return 0;
-
- /* Binary search the element. */
- idx = 0;
- right = set->nelem - 1;
- while (idx < right)
- {
- mid = (idx + right) / 2;
- if (set->elems[mid] < elem)
- idx = mid + 1;
- else
- right = mid;
- }
- return set->elems[idx] == elem ? idx + 1 : 0;
-}
-
-static void
-re_node_set_remove_at (set, idx)
- re_node_set *set;
- int idx;
-{
- if (idx < 0 || idx >= set->nelem)
- return;
- if (idx < set->nelem - 1)
- memmove (set->elems + idx, set->elems + idx + 1,
- sizeof (int) * (set->nelem - idx - 1));
- --set->nelem;
-}
-\f
-
-/* Add the token TOKEN to dfa->nodes, and return the index of the token.
- Or return -1, if an error will be occured. */
-
-static int
-re_dfa_add_node (dfa, token, mode)
- re_dfa_t *dfa;
- re_token_t token;
- int mode;
-{
- if (dfa->nodes_len >= dfa->nodes_alloc)
- {
- re_token_t *new_array;
- dfa->nodes_alloc *= 2;
- new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
- if (BE (new_array == NULL, 0))
- return -1;
- else
- dfa->nodes = new_array;
- if (mode)
- {
- int *new_nexts, *new_indices;
- re_node_set *new_edests, *new_eclosures, *new_inveclosures;
-
- new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
- new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
- new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
- new_eclosures = re_realloc (dfa->eclosures, re_node_set,
- dfa->nodes_alloc);
- new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
- dfa->nodes_alloc);
- if (BE (new_nexts == NULL || new_indices == NULL
- || new_edests == NULL || new_eclosures == NULL
- || new_inveclosures == NULL, 0))
- return -1;
- dfa->nexts = new_nexts;
- dfa->org_indices = new_indices;
- dfa->edests = new_edests;
- dfa->eclosures = new_eclosures;
- dfa->inveclosures = new_inveclosures;
- }
- }
- dfa->nodes[dfa->nodes_len] = token;
- dfa->nodes[dfa->nodes_len].duplicated = 0;
- dfa->nodes[dfa->nodes_len].constraint = 0;
- return dfa->nodes_len++;
-}
-
-static unsigned int inline
-calc_state_hash (nodes, context)
- const re_node_set *nodes;
- unsigned int context;
-{
- unsigned int hash = nodes->nelem + context;
- int i;
- for (i = 0 ; i < nodes->nelem ; i++)
- hash += nodes->elems[i];
- return hash;
-}
-
-/* Search for the state whose node_set is equivalent to NODES.
- Return the pointer to the state, if we found it in the DFA.
- Otherwise create the new one and return it. In case of an error
- return NULL and set the error code in ERR.
- Note: - We assume NULL as the invalid state, then it is possible that
- return value is NULL and ERR is REG_NOERROR.
- - We never return non-NULL value in case of any errors, it is for
- optimization. */
-
-static re_dfastate_t*
-re_acquire_state (err, dfa, nodes)
- reg_errcode_t *err;
- re_dfa_t *dfa;
- const re_node_set *nodes;
-{
- unsigned int hash;
- re_dfastate_t *new_state;
- struct re_state_table_entry *spot;
- int i;
- if (BE (nodes->nelem == 0, 0))
- {
- *err = REG_NOERROR;
- return NULL;
- }
- hash = calc_state_hash (nodes, 0);
- spot = dfa->state_table + (hash & dfa->state_hash_mask);
-
- for (i = 0 ; i < spot->num ; i++)
- {
- re_dfastate_t *state = spot->array[i];
- if (hash != state->hash)
- continue;
- if (re_node_set_compare (&state->nodes, nodes))
- return state;
- }
-
- /* There are no appropriate state in the dfa, create the new one. */
- new_state = create_ci_newstate (dfa, nodes, hash);
- if (BE (new_state != NULL, 1))
- return new_state;
- else
- {
- *err = REG_ESPACE;
- return NULL;
- }
-}
-
-/* Search for the state whose node_set is equivalent to NODES and
- whose context is equivalent to CONTEXT.
- Return the pointer to the state, if we found it in the DFA.
- Otherwise create the new one and return it. In case of an error
- return NULL and set the error code in ERR.
- Note: - We assume NULL as the invalid state, then it is possible that
- return value is NULL and ERR is REG_NOERROR.
- - We never return non-NULL value in case of any errors, it is for
- optimization. */
-
-static re_dfastate_t*
-re_acquire_state_context (err, dfa, nodes, context)
- reg_errcode_t *err;
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int context;
-{
- unsigned int hash;
- re_dfastate_t *new_state;
- struct re_state_table_entry *spot;
- int i;
- if (nodes->nelem == 0)
- {
- *err = REG_NOERROR;
- return NULL;
- }
- hash = calc_state_hash (nodes, context);
- spot = dfa->state_table + (hash & dfa->state_hash_mask);
-
- for (i = 0 ; i < spot->num ; i++)
- {
- re_dfastate_t *state = spot->array[i];
- if (hash != state->hash)
- continue;
- if (re_node_set_compare (state->entrance_nodes, nodes)
- && state->context == context)
- return state;
- }
- /* There are no appropriate state in `dfa', create the new one. */
- new_state = create_cd_newstate (dfa, nodes, context, hash);
- if (BE (new_state != NULL, 1))
- return new_state;
- else
- {
- *err = REG_ESPACE;
- return NULL;
- }
-}
-
-/* Allocate memory for DFA state and initialize common properties.
- Return the new state if succeeded, otherwise return NULL. */
-
-static re_dfastate_t *
-create_newstate_common (dfa, nodes, hash)
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int hash;
-{
- re_dfastate_t *newstate;
- reg_errcode_t err;
- newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
- if (BE (newstate == NULL, 0))
- return NULL;
- err = re_node_set_init_copy (&newstate->nodes, nodes);
- if (BE (err != REG_NOERROR, 0))
- {
- re_free (newstate);
- return NULL;
- }
- newstate->trtable = NULL;
- newstate->trtable_search = NULL;
- newstate->hash = hash;
- return newstate;
-}
-
-/* Store the new state NEWSTATE whose hash value is HASH in appropriate
- position. Return value indicate the error code if failed. */
-
-static reg_errcode_t
-register_state (dfa, newstate, hash)
- re_dfa_t *dfa;
- re_dfastate_t *newstate;
- unsigned int hash;
-{
- struct re_state_table_entry *spot;
- spot = dfa->state_table + (hash & dfa->state_hash_mask);
-
- if (spot->alloc <= spot->num)
- {
- re_dfastate_t **new_array;
- spot->alloc = 2 * spot->num + 2;
- new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- spot->array = new_array;
- }
- spot->array[spot->num++] = newstate;
- return REG_NOERROR;
-}
-
-/* Create the new state which is independ of contexts.
- Return the new state if succeeded, otherwise return NULL. */
-
-static re_dfastate_t *
-create_ci_newstate (dfa, nodes, hash)
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int hash;
-{
- int i;
- reg_errcode_t err;
- re_dfastate_t *newstate;
- newstate = create_newstate_common (dfa, nodes, hash);
- if (BE (newstate == NULL, 0))
- return NULL;
- newstate->entrance_nodes = &newstate->nodes;
-
- for (i = 0 ; i < nodes->nelem ; i++)
- {
- re_token_t *node = dfa->nodes + nodes->elems[i];
- re_token_type_t type = node->type;
- if (type == CHARACTER && !node->constraint)
- continue;
-
- /* If the state has the halt node, the state is a halt state. */
- else if (type == END_OF_RE)
- newstate->halt = 1;
-#ifdef RE_ENABLE_I18N
- else if (type == COMPLEX_BRACKET
- || (type == OP_PERIOD && MB_CUR_MAX > 1))
- newstate->accept_mb = 1;
-#endif /* RE_ENABLE_I18N */
- else if (type == OP_BACK_REF)
- newstate->has_backref = 1;
- else if (type == ANCHOR || node->constraint)
- newstate->has_constraint = 1;
- }
- err = register_state (dfa, newstate, hash);
- if (BE (err != REG_NOERROR, 0))
- {
- free_state (newstate);
- newstate = NULL;
- }
- return newstate;
-}
-
-/* Create the new state which is depend on the context CONTEXT.
- Return the new state if succeeded, otherwise return NULL. */
-
-static re_dfastate_t *
-create_cd_newstate (dfa, nodes, context, hash)
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int context, hash;
-{
- int i, nctx_nodes = 0;
- reg_errcode_t err;
- re_dfastate_t *newstate;
-
- newstate = create_newstate_common (dfa, nodes, hash);
- if (BE (newstate == NULL, 0))
- return NULL;
- newstate->context = context;
- newstate->entrance_nodes = &newstate->nodes;
-
- for (i = 0 ; i < nodes->nelem ; i++)
- {
- unsigned int constraint = 0;
- re_token_t *node = dfa->nodes + nodes->elems[i];
- re_token_type_t type = node->type;
- if (node->constraint)
- constraint = node->constraint;
-
- if (type == CHARACTER && !constraint)
- continue;
- /* If the state has the halt node, the state is a halt state. */
- else if (type == END_OF_RE)
- newstate->halt = 1;
-#ifdef RE_ENABLE_I18N
- else if (type == COMPLEX_BRACKET
- || (type == OP_PERIOD && MB_CUR_MAX > 1))
- newstate->accept_mb = 1;
-#endif /* RE_ENABLE_I18N */
- else if (type == OP_BACK_REF)
- newstate->has_backref = 1;
- else if (type == ANCHOR)
- constraint = node->opr.ctx_type;
-
- if (constraint)
- {
- if (newstate->entrance_nodes == &newstate->nodes)
- {
- newstate->entrance_nodes = re_malloc (re_node_set, 1);
- if (BE (newstate->entrance_nodes == NULL, 0))
- {
- free_state (newstate);
- return NULL;
- }
- re_node_set_init_copy (newstate->entrance_nodes, nodes);
- nctx_nodes = 0;
- newstate->has_constraint = 1;
- }
-
- if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
- {
- re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
- ++nctx_nodes;
- }
- }
- }
- err = register_state (dfa, newstate, hash);
- if (BE (err != REG_NOERROR, 0))
- {
- free_state (newstate);
- newstate = NULL;
- }
- return newstate;
-}
-
-static void
-free_state (state)
- re_dfastate_t *state;
-{
- if (state->entrance_nodes != &state->nodes)
- {
- re_node_set_free (state->entrance_nodes);
- re_free (state->entrance_nodes);
- }
- re_node_set_free (&state->nodes);
- re_free (state->trtable);
- re_free (state->trtable_search);
- re_free (state);
-}
+++ /dev/null
-/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _REGEX_INTERNAL_H
-#define _REGEX_INTERNAL_H 1
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if defined HAVE_LOCALE_H || defined _LIBC
-# include <locale.h>
-#endif
-#if defined HAVE_WCHAR_H || defined _LIBC
-# include <wchar.h>
-#endif /* HAVE_WCHAR_H || _LIBC */
-#if defined HAVE_WCTYPE_H || defined _LIBC
-# include <wctype.h>
-#endif /* HAVE_WCTYPE_H || _LIBC */
-
-/* In case that the system doesn't have isblank(). */
-#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
-# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
-#endif
-
-#ifdef _LIBC
-# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
-# define _RE_DEFINE_LOCALE_FUNCTIONS 1
-# include <locale/localeinfo.h>
-# include <locale/elem-hash.h>
-# include <locale/coll-lookup.h>
-# endif
-#endif
-
-/* This is for other GNU distributions with internationalized messages. */
-#if HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-# undef gettext
-# define gettext(msgid) \
- INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
-# endif
-#else
-# define gettext(msgid) (msgid)
-#endif
-
-#ifndef gettext_noop
-/* This define is so xgettext can find the internationalizable
- strings. */
-# define gettext_noop(String) String
-#endif
-
-#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
-# define RE_ENABLE_I18N
-#endif
-
-#if __GNUC__ >= 3
-# define BE(expr, val) __builtin_expect (expr, val)
-#else
-# define BE(expr, val) (expr)
-# define inline
-#endif
-
-/* Number of bits in a byte. */
-#define BYTE_BITS 8
-/* Number of single byte character. */
-#define SBC_MAX 256
-
-#define COLL_ELEM_LEN_MAX 8
-
-/* The character which represents newline. */
-#define NEWLINE_CHAR '\n'
-#define WIDE_NEWLINE_CHAR L'\n'
-
-/* Rename to standard API for using out of glibc. */
-#ifndef _LIBC
-# define __wctype wctype
-# define __iswctype iswctype
-# define __btowc btowc
-# define __mempcpy mempcpy
-# define __wcrtomb wcrtomb
-# define attribute_hidden
-#endif /* not _LIBC */
-
-extern const char __re_error_msgid[] attribute_hidden;
-extern const size_t __re_error_msgid_idx[] attribute_hidden;
-
-/* Number of bits in an unsinged int. */
-#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
-/* Number of unsigned int in an bit_set. */
-#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
-typedef unsigned int bitset[BITSET_UINTS];
-typedef unsigned int *re_bitset_ptr_t;
-
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
-#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_set_all(set) \
- memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_copy(dest,src) \
- memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
-static inline void bitset_not (bitset set);
-static inline void bitset_merge (bitset dest, const bitset src);
-static inline void bitset_not_merge (bitset dest, const bitset src);
-
-#define PREV_WORD_CONSTRAINT 0x0001
-#define PREV_NOTWORD_CONSTRAINT 0x0002
-#define NEXT_WORD_CONSTRAINT 0x0004
-#define NEXT_NOTWORD_CONSTRAINT 0x0008
-#define PREV_NEWLINE_CONSTRAINT 0x0010
-#define NEXT_NEWLINE_CONSTRAINT 0x0020
-#define PREV_BEGBUF_CONSTRAINT 0x0040
-#define NEXT_ENDBUF_CONSTRAINT 0x0080
-#define DUMMY_CONSTRAINT 0x0100
-
-typedef enum
-{
- INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
- WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
- WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
- LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
- LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
- BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
- BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
- WORD_DELIM = DUMMY_CONSTRAINT
-} re_context_type;
-
-typedef struct
-{
- int alloc;
- int nelem;
- int *elems;
-} re_node_set;
-
-typedef enum
-{
- NON_TYPE = 0,
-
- /* Token type, these are used only by token. */
- OP_OPEN_BRACKET,
- OP_CLOSE_BRACKET,
- OP_CHARSET_RANGE,
- OP_OPEN_DUP_NUM,
- OP_CLOSE_DUP_NUM,
- OP_NON_MATCH_LIST,
- OP_OPEN_COLL_ELEM,
- OP_CLOSE_COLL_ELEM,
- OP_OPEN_EQUIV_CLASS,
- OP_CLOSE_EQUIV_CLASS,
- OP_OPEN_CHAR_CLASS,
- OP_CLOSE_CHAR_CLASS,
- OP_WORD,
- OP_NOTWORD,
- BACK_SLASH,
-
- /* Tree type, these are used only by tree. */
- CONCAT,
- ALT,
- SUBEXP,
- SIMPLE_BRACKET,
-#ifdef RE_ENABLE_I18N
- COMPLEX_BRACKET,
-#endif /* RE_ENABLE_I18N */
-
- /* Node type, These are used by token, node, tree. */
- OP_OPEN_SUBEXP,
- OP_CLOSE_SUBEXP,
- OP_PERIOD,
- CHARACTER,
- END_OF_RE,
- OP_ALT,
- OP_DUP_ASTERISK,
- OP_DUP_PLUS,
- OP_DUP_QUESTION,
- OP_BACK_REF,
- ANCHOR,
-
- /* Dummy marker. */
- END_OF_RE_TOKEN_T
-} re_token_type_t;
-
-#ifdef RE_ENABLE_I18N
-typedef struct
-{
- /* Multibyte characters. */
- wchar_t *mbchars;
-
- /* Collating symbols. */
-# ifdef _LIBC
- int32_t *coll_syms;
-# endif
-
- /* Equivalence classes. */
-# ifdef _LIBC
- int32_t *equiv_classes;
-# endif
-
- /* Range expressions. */
-# ifdef _LIBC
- uint32_t *range_starts;
- uint32_t *range_ends;
-# else /* not _LIBC */
- wchar_t *range_starts;
- wchar_t *range_ends;
-# endif /* not _LIBC */
-
- /* Character classes. */
- wctype_t *char_classes;
-
- /* If this character set is the non-matching list. */
- unsigned int non_match : 1;
-
- /* # of multibyte characters. */
- int nmbchars;
-
- /* # of collating symbols. */
- int ncoll_syms;
-
- /* # of equivalence classes. */
- int nequiv_classes;
-
- /* # of range expressions. */
- int nranges;
-
- /* # of character classes. */
- int nchar_classes;
-} re_charset_t;
-#endif /* RE_ENABLE_I18N */
-
-typedef struct
-{
- union
- {
- unsigned char c; /* for CHARACTER */
- re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
-#ifdef RE_ENABLE_I18N
- re_charset_t *mbcset; /* for COMPLEX_BRACKET */
-#endif /* RE_ENABLE_I18N */
- int idx; /* for BACK_REF */
- re_context_type ctx_type; /* for ANCHOR */
- } opr;
-#if __GNUC__ >= 2
- re_token_type_t type : 8;
-#else
- re_token_type_t type;
-#endif
- unsigned int constraint : 10; /* context constraint */
- unsigned int duplicated : 1;
-#ifdef RE_ENABLE_I18N
- unsigned int mb_partial : 1;
-#endif
-} re_token_t;
-
-#define IS_EPSILON_NODE(type) \
- ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
- || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
- || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
-
-#define ACCEPT_MB_NODE(type) \
- ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
-
-struct re_string_t
-{
- /* Indicate the raw buffer which is the original string passed as an
- argument of regexec(), re_search(), etc.. */
- const unsigned char *raw_mbs;
- /* Store the multibyte string. In case of "case insensitive mode" like
- REG_ICASE, upper cases of the string are stored, otherwise MBS points
- the same address that RAW_MBS points. */
- unsigned char *mbs;
- /* Store the case sensitive multibyte string. In case of
- "case insensitive mode", the original string are stored,
- otherwise MBS_CASE points the same address that MBS points. */
- unsigned char *mbs_case;
-#ifdef RE_ENABLE_I18N
- /* Store the wide character string which is corresponding to MBS. */
- wint_t *wcs;
- mbstate_t cur_state;
-#endif
- /* Index in RAW_MBS. Each character mbs[i] corresponds to
- raw_mbs[raw_mbs_idx + i]. */
- int raw_mbs_idx;
- /* The length of the valid characters in the buffers. */
- int valid_len;
- /* The length of the buffers MBS, MBS_CASE, and WCS. */
- int bufs_len;
- /* The index in MBS, which is updated by re_string_fetch_byte. */
- int cur_idx;
- /* This is length_of_RAW_MBS - RAW_MBS_IDX. */
- int len;
- /* End of the buffer may be shorter than its length in the cases such
- as re_match_2, re_search_2. Then, we use STOP for end of the buffer
- instead of LEN. */
- int stop;
-
- /* The context of mbs[0]. We store the context independently, since
- the context of mbs[0] may be different from raw_mbs[0], which is
- the beginning of the input string. */
- unsigned int tip_context;
- /* The translation passed as a part of an argument of re_compile_pattern. */
- RE_TRANSLATE_TYPE trans;
- /* 1 if REG_ICASE. */
- unsigned int icase : 1;
-};
-typedef struct re_string_t re_string_t;
-/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
-#define MBS_ALLOCATED(pstr) (pstr->icase)
-/* In case that we need translation, we allocate the buffer dynamically
- for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
-#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
-
-
-static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
- int len, int init_len,
- RE_TRANSLATE_TYPE trans, int icase);
-static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
- int len, RE_TRANSLATE_TYPE trans,
- int icase);
-static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
- int eflags, int newline);
-static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
- int new_buf_len);
-#ifdef RE_ENABLE_I18N
-static void build_wcs_buffer (re_string_t *pstr);
-static void build_wcs_upper_buffer (re_string_t *pstr);
-#endif /* RE_ENABLE_I18N */
-static void build_upper_buffer (re_string_t *pstr);
-static void re_string_translate_buffer (re_string_t *pstr);
-static void re_string_destruct (re_string_t *pstr);
-#ifdef RE_ENABLE_I18N
-static int re_string_elem_size_at (const re_string_t *pstr, int idx);
-static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
-static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
-#endif /* RE_ENABLE_I18N */
-static unsigned int re_string_context_at (const re_string_t *input, int idx,
- int eflags, int newline_anchor);
-#define re_string_peek_byte(pstr, offset) \
- ((pstr)->mbs[(pstr)->cur_idx + offset])
-#define re_string_peek_byte_case(pstr, offset) \
- ((pstr)->mbs_case[(pstr)->cur_idx + offset])
-#define re_string_fetch_byte(pstr) \
- ((pstr)->mbs[(pstr)->cur_idx++])
-#define re_string_fetch_byte_case(pstr) \
- ((pstr)->mbs_case[(pstr)->cur_idx++])
-#define re_string_first_byte(pstr, idx) \
- ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF)
-#define re_string_is_single_byte_char(pstr, idx) \
- ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \
- || (pstr)->wcs[(idx) + 1] != WEOF))
-#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
-#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
-#define re_string_get_buffer(pstr) ((pstr)->mbs)
-#define re_string_length(pstr) ((pstr)->len)
-#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
-#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
-#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
-
-#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
-#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
-#define re_free(p) free (p)
-
-struct bin_tree_t
-{
- struct bin_tree_t *parent;
- struct bin_tree_t *left;
- struct bin_tree_t *right;
-
- /* `node_idx' is the index in dfa->nodes, if `type' == 0.
- Otherwise `type' indicate the type of this node. */
- re_token_type_t type;
- int node_idx;
-
- int first;
- int next;
- re_node_set eclosure;
-};
-typedef struct bin_tree_t bin_tree_t;
-
-
-#define CONTEXT_WORD 1
-#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
-#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
-#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
-
-#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
-#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
-#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
-#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
-#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
-
-#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
-#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
-#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
-#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
-
-#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
- ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
- || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
- || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
- || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
-
-#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
- ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
- || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
- || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
- || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
-
-struct re_dfastate_t
-{
- unsigned int hash;
- re_node_set nodes;
- re_node_set *entrance_nodes;
- struct re_dfastate_t **trtable;
- struct re_dfastate_t **trtable_search;
- /* If this state is a special state.
- A state is a special state if the state is the halt state, or
- a anchor. */
- unsigned int context : 2;
- unsigned int halt : 1;
- /* If this state can accept `multi byte'.
- Note that we refer to multibyte characters, and multi character
- collating elements as `multi byte'. */
- unsigned int accept_mb : 1;
- /* If this state has backreference node(s). */
- unsigned int has_backref : 1;
- unsigned int has_constraint : 1;
-};
-typedef struct re_dfastate_t re_dfastate_t;
-
-typedef struct
-{
- /* start <= node < end */
- int start;
- int end;
-} re_subexp_t;
-
-struct re_state_table_entry
-{
- int num;
- int alloc;
- re_dfastate_t **array;
-};
-
-/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
-
-typedef struct
-{
- int next_idx;
- int alloc;
- re_dfastate_t **array;
-} state_array_t;
-
-/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
-
-typedef struct
-{
- int node;
- int str_idx; /* The position NODE match at. */
- state_array_t path;
-} re_sub_match_last_t;
-
-/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
- And information about the node, whose type is OP_CLOSE_SUBEXP,
- corresponding to NODE is stored in LASTS. */
-
-typedef struct
-{
- int str_idx;
- int node;
- int next_last_offset;
- state_array_t *path;
- int alasts; /* Allocation size of LASTS. */
- int nlasts; /* The number of LASTS. */
- re_sub_match_last_t **lasts;
-} re_sub_match_top_t;
-
-struct re_backref_cache_entry
-{
- int node;
- int str_idx;
- int subexp_from;
- int subexp_to;
- int flag;
-};
-
-typedef struct
-{
- /* EFLAGS of the argument of regexec. */
- int eflags;
- /* Where the matching ends. */
- int match_last;
- int last_node;
- /* The string object corresponding to the input string. */
- re_string_t *input;
- /* The state log used by the matcher. */
- re_dfastate_t **state_log;
- int state_log_top;
- /* Back reference cache. */
- int nbkref_ents;
- int abkref_ents;
- struct re_backref_cache_entry *bkref_ents;
- int max_mb_elem_len;
- int nsub_tops;
- int asub_tops;
- re_sub_match_top_t **sub_tops;
-} re_match_context_t;
-
-typedef struct
-{
- int cur_bkref;
- int cls_subexp_idx;
-
- re_dfastate_t **sifted_states;
- re_dfastate_t **limited_states;
-
- re_node_set limits;
-
- int last_node;
- int last_str_idx;
- int check_subexp;
-} re_sift_context_t;
-
-struct re_fail_stack_ent_t
-{
- int idx;
- int node;
- regmatch_t *regs;
- re_node_set eps_via_nodes;
-};
-
-struct re_fail_stack_t
-{
- int num;
- int alloc;
- struct re_fail_stack_ent_t *stack;
-};
-
-struct re_dfa_t
-{
- re_bitset_ptr_t word_char;
-
- /* number of subexpressions `re_nsub' is in regex_t. */
- int subexps_alloc;
- re_subexp_t *subexps;
-
- re_token_t *nodes;
- int nodes_alloc;
- int nodes_len;
- bin_tree_t *str_tree;
- int *nexts;
- int *org_indices;
- re_node_set *edests;
- re_node_set *eclosures;
- re_node_set *inveclosures;
- struct re_state_table_entry *state_table;
- unsigned int state_hash_mask;
- re_dfastate_t *init_state;
- re_dfastate_t *init_state_word;
- re_dfastate_t *init_state_nl;
- re_dfastate_t *init_state_begbuf;
- int states_alloc;
- int init_node;
- int nbackref; /* The number of backreference in this dfa. */
- /* Bitmap expressing which backreference is used. */
- unsigned int used_bkref_map;
-#ifdef DEBUG
- char* re_str;
-#endif
- unsigned int has_plural_match : 1;
- /* If this dfa has "multibyte node", which is a backreference or
- a node which can accept multibyte character or multi character
- collating element. */
- unsigned int has_mb_node : 1;
-};
-typedef struct re_dfa_t re_dfa_t;
-
-static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
-static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem);
-static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
- int elem2);
-#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
-static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
- const re_node_set *src);
-static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
- const re_node_set *src1,
- const re_node_set *src2);
-static reg_errcode_t re_node_set_init_union (re_node_set *dest,
- const re_node_set *src1,
- const re_node_set *src2);
-static reg_errcode_t re_node_set_merge (re_node_set *dest,
- const re_node_set *src);
-static int re_node_set_insert (re_node_set *set, int elem);
-static int re_node_set_compare (const re_node_set *set1,
- const re_node_set *set2);
-static int re_node_set_contains (const re_node_set *set, int elem);
-static void re_node_set_remove_at (re_node_set *set, int idx);
-#define re_node_set_remove(set,id) \
- (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
-#define re_node_set_empty(p) ((p)->nelem = 0)
-#define re_node_set_free(set) re_free ((set)->elems)
-static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
-static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
- const re_node_set *nodes);
-static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
- re_dfa_t *dfa,
- const re_node_set *nodes,
- unsigned int context);
-static void free_state (re_dfastate_t *state);
-\f
-
-typedef enum
-{
- SB_CHAR,
- MB_CHAR,
- EQUIV_CLASS,
- COLL_SYM,
- CHAR_CLASS
-} bracket_elem_type;
-
-typedef struct
-{
- bracket_elem_type type;
- union
- {
- unsigned char ch;
- unsigned char *name;
- wchar_t wch;
- } opr;
-} bracket_elem_t;
-
-
-/* Inline functions for bitset operation. */
-static inline void
-bitset_not (set)
- bitset set;
-{
- int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
- set[bitset_i] = ~set[bitset_i];
-}
-
-static inline void
-bitset_merge (dest, src)
- bitset dest;
- const bitset src;
-{
- int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
- dest[bitset_i] |= src[bitset_i];
-}
-
-static inline void
-bitset_not_merge (dest, src)
- bitset dest;
- const bitset src;
-{
- int i;
- for (i = 0; i < BITSET_UINTS; ++i)
- dest[i] |= ~src[i];
-}
-
-#ifdef RE_ENABLE_I18N
-/* Inline functions for re_string. */
-static inline int
-re_string_char_size_at (pstr, idx)
- const re_string_t *pstr;
- int idx;
-{
- int byte_idx;
- if (MB_CUR_MAX == 1)
- return 1;
- for (byte_idx = 1; idx + byte_idx < pstr->len; ++byte_idx)
- if (pstr->wcs[idx + byte_idx] != WEOF)
- break;
- return byte_idx;
-}
-
-static inline wint_t
-re_string_wchar_at (pstr, idx)
- const re_string_t *pstr;
- int idx;
-{
- if (MB_CUR_MAX == 1)
- return (wint_t) pstr->mbs[idx];
- return (wint_t) pstr->wcs[idx];
-}
-
-static int
-re_string_elem_size_at (pstr, idx)
- const re_string_t *pstr;
- int idx;
-{
-#ifdef _LIBC
- const unsigned char *p, *extra;
- const int32_t *table, *indirect;
- int32_t tmp;
-# include <locale/weight.h>
- uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
-
- if (nrules != 0)
- {
- table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- extra = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
- indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_INDIRECTMB);
- p = pstr->mbs + idx;
- tmp = findidx (&p);
- return p - pstr->mbs - idx;
- }
- else
-#endif /* _LIBC */
- return 1;
-}
-#endif /* RE_ENABLE_I18N */
-
-#endif /* _REGEX_INTERNAL_H */
+++ /dev/null
-/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
- re_string_t *input, int n);
-static void match_ctx_clean (re_match_context_t *mctx);
-static void match_ctx_free (re_match_context_t *cache);
-static void match_ctx_free_subtops (re_match_context_t *mctx);
-static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
- int str_idx, int from, int to);
-static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
-static void match_ctx_clear_flag (re_match_context_t *mctx);
-static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
- int str_idx);
-static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
- int node, int str_idx);
-static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
- re_dfastate_t **limited_sts, int last_node,
- int last_str_idx, int check_subexp);
-static reg_errcode_t re_search_internal (const regex_t *preg,
- const char *string, int length,
- int start, int range, int stop,
- size_t nmatch, regmatch_t pmatch[],
- int eflags);
-static int re_search_2_stub (struct re_pattern_buffer *bufp,
- const char *string1, int length1,
- const char *string2, int length2,
- int start, int range, struct re_registers *regs,
- int stop, int ret_len);
-static int re_search_stub (struct re_pattern_buffer *bufp,
- const char *string, int length, int start,
- int range, int stop, struct re_registers *regs,
- int ret_len);
-static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
- int nregs, int regs_allocated);
-static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
- const regex_t *preg,
- const re_match_context_t *mctx,
- int idx);
-static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
- re_match_context_t *mctx);
-static int check_matching (const regex_t *preg, re_match_context_t *mctx,
- int fl_search, int fl_longest_match);
-static int check_halt_node_context (const re_dfa_t *dfa, int node,
- unsigned int context);
-static int check_halt_state_context (const regex_t *preg,
- const re_dfastate_t *state,
- const re_match_context_t *mctx, int idx);
-static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
- int cur_idx, int nmatch);
-static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
- const re_match_context_t *mctx,
- int *pidx, int node, re_node_set *eps_via_nodes,
- struct re_fail_stack_t *fs);
-static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
- int str_idx, int *dests, int nregs,
- regmatch_t *regs,
- re_node_set *eps_via_nodes);
-static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
- regmatch_t *regs, re_node_set *eps_via_nodes);
-static reg_errcode_t set_regs (const regex_t *preg,
- const re_match_context_t *mctx,
- size_t nmatch, regmatch_t *pmatch,
- int fl_backtrack);
-static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
-
-#ifdef RE_ENABLE_I18N
-static int sift_states_iter_mb (const regex_t *preg,
- const re_match_context_t *mctx,
- re_sift_context_t *sctx,
- int node_idx, int str_idx, int max_str_idx);
-#endif /* RE_ENABLE_I18N */
-static reg_errcode_t sift_states_backward (const regex_t *preg,
- re_match_context_t *mctx,
- re_sift_context_t *sctx);
-static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
- re_match_context_t *mctx,
- re_sift_context_t *sctx,
- int str_idx,
- re_node_set *dest_nodes);
-static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
- re_node_set *dest_nodes,
- const re_node_set *candidates);
-static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
- re_node_set *dest_nodes,
- const re_node_set *and_nodes);
-static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
- re_match_context_t *mctx, int dst_node,
- int dst_idx, int src_node, int src_idx);
-static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
- int limit, re_node_set *eclosures,
- int subexp_idx, int node, int str_idx);
-static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
- re_node_set *dest_nodes,
- const re_node_set *candidates,
- re_node_set *limits,
- struct re_backref_cache_entry *bkref_ents,
- int str_idx);
-static reg_errcode_t sift_states_bkref (const regex_t *preg,
- re_match_context_t *mctx,
- re_sift_context_t *sctx,
- int str_idx, re_node_set *dest_nodes);
-static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
- int next_state_log_idx);
-static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
- re_dfastate_t **src, int num);
-static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
- re_match_context_t *mctx,
- re_dfastate_t *state, int fl_search);
-static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
- re_match_context_t *mctx,
- re_node_set *cur_nodes,
- int str_idx);
-static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
- re_dfastate_t *pstate,
- int fl_search,
- re_match_context_t *mctx);
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t transit_state_mb (const regex_t *preg,
- re_dfastate_t *pstate,
- re_match_context_t *mctx);
-#endif /* RE_ENABLE_I18N */
-static reg_errcode_t transit_state_bkref (const regex_t *preg,
- re_node_set *nodes,
- re_match_context_t *mctx);
-static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
- int bkref_node, int bkref_str_idx);
-static reg_errcode_t get_subexp_sub (const regex_t *preg,
- re_match_context_t *mctx,
- re_sub_match_top_t *sub_top,
- re_sub_match_last_t *sub_last,
- int bkref_node, int bkref_str);
-static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
- int subexp_idx, int fl_open);
-static reg_errcode_t check_arrival (const regex_t *preg,
- re_match_context_t *mctx,
- state_array_t *path, int top_node,
- int top_str, int last_node, int last_str,
- int fl_open);
-static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
- re_dfa_t *dfa,
- re_match_context_t *mctx,
- int str_idx,
- re_node_set *cur_nodes,
- re_node_set *next_nodes);
-static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
- re_node_set *cur_nodes,
- int ex_subexp, int fl_open);
-static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
- re_node_set *dst_nodes,
- int target, int ex_subexp,
- int fl_open);
-static reg_errcode_t expand_bkref_cache (const regex_t *preg,
- re_match_context_t *mctx,
- re_node_set *cur_nodes, int cur_str,
- int last_str, int subexp_num,
- int fl_open);
-static re_dfastate_t **build_trtable (const regex_t *dfa,
- const re_dfastate_t *state,
- int fl_search);
-#ifdef RE_ENABLE_I18N
-static int check_node_accept_bytes (const regex_t *preg, int node_idx,
- const re_string_t *input, int idx);
-# ifdef _LIBC
-static unsigned int find_collation_sequence_value (const unsigned char *mbs,
- size_t name_len);
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
-static int group_nodes_into_DFAstates (const regex_t *dfa,
- const re_dfastate_t *state,
- re_node_set *states_node,
- bitset *states_ch);
-static int check_node_accept (const regex_t *preg, const re_token_t *node,
- const re_match_context_t *mctx, int idx);
-static reg_errcode_t extend_buffers (re_match_context_t *mctx);
-\f
-/* Entry point for POSIX code. */
-
-/* regexec searches for a given pattern, specified by PREG, in the
- string STRING.
-
- If NMATCH is zero or REG_NOSUB was set in the cflags argument to
- `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
- least NMATCH elements, and we set them to the offsets of the
- corresponding matched substrings.
-
- EFLAGS specifies `execution flags' which affect matching: if
- REG_NOTBOL is set, then ^ does not match at the beginning of the
- string; if REG_NOTEOL is set, then $ does not match at the end.
-
- We return 0 if we find a match and REG_NOMATCH if not. */
-
-int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *__restrict preg;
- const char *__restrict string;
- size_t nmatch;
- regmatch_t pmatch[];
- int eflags;
-{
- reg_errcode_t err;
- int length = strlen (string);
- if (preg->no_sub)
- err = re_search_internal (preg, string, length, 0, length, length, 0,
- NULL, eflags);
- else
- err = re_search_internal (preg, string, length, 0, length, length, nmatch,
- pmatch, eflags);
- return err != REG_NOERROR;
-}
-#ifdef _LIBC
-weak_alias (__regexec, regexec)
-#endif
-
-/* Entry points for GNU code. */
-
-/* re_match, re_search, re_match_2, re_search_2
-
- The former two functions operate on STRING with length LENGTH,
- while the later two operate on concatenation of STRING1 and STRING2
- with lengths LENGTH1 and LENGTH2, respectively.
-
- re_match() matches the compiled pattern in BUFP against the string,
- starting at index START.
-
- re_search() first tries matching at index START, then it tries to match
- starting from index START + 1, and so on. The last start position tried
- is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
- way as re_match().)
-
- The parameter STOP of re_{match,search}_2 specifies that no match exceeding
- the first STOP characters of the concatenation of the strings should be
- concerned.
-
- If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
- and all groups is stroed in REGS. (For the "_2" variants, the offsets are
- computed relative to the concatenation, not relative to the individual
- strings.)
-
- On success, re_match* functions return the length of the match, re_search*
- return the position of the start of the match. Return value -1 means no
- match was found and -2 indicates an internal error. */
-
-int
-re_match (bufp, string, length, start, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start;
- struct re_registers *regs;
-{
- return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
-}
-#ifdef _LIBC
-weak_alias (__re_match, re_match)
-#endif
-
-int
-re_search (bufp, string, length, start, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range;
- struct re_registers *regs;
-{
- return re_search_stub (bufp, string, length, start, range, length, regs, 0);
-}
-#ifdef _LIBC
-weak_alias (__re_search, re_search)
-#endif
-
-int
-re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, stop;
- struct re_registers *regs;
-{
- return re_search_2_stub (bufp, string1, length1, string2, length2,
- start, 0, regs, stop, 1);
-}
-#ifdef _LIBC
-weak_alias (__re_match_2, re_match_2)
-#endif
-
-int
-re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop;
- struct re_registers *regs;
-{
- return re_search_2_stub (bufp, string1, length1, string2, length2,
- start, range, regs, stop, 0);
-}
-#ifdef _LIBC
-weak_alias (__re_search_2, re_search_2)
-#endif
-
-static int
-re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
- stop, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop, ret_len;
- struct re_registers *regs;
-{
- const char *str;
- int rval;
- int len = length1 + length2;
- int free_str = 0;
-
- if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
- return -2;
-
- /* Concatenate the strings. */
- if (length2 > 0)
- if (length1 > 0)
- {
- char *s = re_malloc (char, len);
-
- if (BE (s == NULL, 0))
- return -2;
- memcpy (s, string1, length1);
- memcpy (s + length1, string2, length2);
- str = s;
- free_str = 1;
- }
- else
- str = string2;
- else
- str = string1;
-
- rval = re_search_stub (bufp, str, len, start, range, stop, regs,
- ret_len);
- if (free_str)
- re_free ((char *) str);
- return rval;
-}
-
-/* The parameters have the same meaning as those of re_search.
- Additional parameters:
- If RET_LEN is nonzero the length of the match is returned (re_match style);
- otherwise the position of the match is returned. */
-
-static int
-re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range, stop, ret_len;
- struct re_registers *regs;
-{
- reg_errcode_t result;
- regmatch_t *pmatch;
- int nregs, rval;
- int eflags = 0;
-
- /* Check for out-of-range. */
- if (BE (start < 0 || start > length, 0))
- return -1;
- if (BE (start + range > length, 0))
- range = length - start;
- else if (BE (start + range < 0, 0))
- range = -start;
-
- eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
- eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
-
- /* Compile fastmap if we haven't yet. */
- if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
- re_compile_fastmap (bufp);
-
- if (BE (bufp->no_sub, 0))
- regs = NULL;
-
- /* We need at least 1 register. */
- if (regs == NULL)
- nregs = 1;
- else if (BE (bufp->regs_allocated == REGS_FIXED &&
- regs->num_regs < bufp->re_nsub + 1, 0))
- {
- nregs = regs->num_regs;
- if (BE (nregs < 1, 0))
- {
- /* Nothing can be copied to regs. */
- regs = NULL;
- nregs = 1;
- }
- }
- else
- nregs = bufp->re_nsub + 1;
- pmatch = re_malloc (regmatch_t, nregs);
- if (BE (pmatch == NULL, 0))
- return -2;
-
- result = re_search_internal (bufp, string, length, start, range, stop,
- nregs, pmatch, eflags);
-
- rval = 0;
-
- /* I hope we needn't fill ther regs with -1's when no match was found. */
- if (result != REG_NOERROR)
- rval = -1;
- else if (regs != NULL)
- {
- /* If caller wants register contents data back, copy them. */
- bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
- bufp->regs_allocated);
- if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
- rval = -2;
- }
-
- if (BE (rval == 0, 1))
- {
- if (ret_len)
- {
- assert (pmatch[0].rm_so == start);
- rval = pmatch[0].rm_eo - start;
- }
- else
- rval = pmatch[0].rm_so;
- }
- re_free (pmatch);
- return rval;
-}
-
-static unsigned
-re_copy_regs (regs, pmatch, nregs, regs_allocated)
- struct re_registers *regs;
- regmatch_t *pmatch;
- int nregs, regs_allocated;
-{
- int rval = REGS_REALLOCATE;
- int i;
- int need_regs = nregs + 1;
- /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
- uses. */
-
- /* Have the register data arrays been allocated? */
- if (regs_allocated == REGS_UNALLOCATED)
- { /* No. So allocate them with malloc. */
- regs->start = re_malloc (regoff_t, need_regs);
- if (BE (regs->start == NULL, 0))
- return REGS_UNALLOCATED;
- regs->end = re_malloc (regoff_t, need_regs);
- if (BE (regs->end == NULL, 0))
- {
- re_free (regs->start);
- return REGS_UNALLOCATED;
- }
- regs->num_regs = need_regs;
- }
- else if (regs_allocated == REGS_REALLOCATE)
- { /* Yes. If we need more elements than were already
- allocated, reallocate them. If we need fewer, just
- leave it alone. */
- if (need_regs > regs->num_regs)
- {
- regs->start = re_realloc (regs->start, regoff_t, need_regs);
- if (BE (regs->start == NULL, 0))
- {
- if (regs->end != NULL)
- re_free (regs->end);
- return REGS_UNALLOCATED;
- }
- regs->end = re_realloc (regs->end, regoff_t, need_regs);
- if (BE (regs->end == NULL, 0))
- {
- re_free (regs->start);
- return REGS_UNALLOCATED;
- }
- regs->num_regs = need_regs;
- }
- }
- else
- {
- assert (regs_allocated == REGS_FIXED);
- /* This function may not be called with REGS_FIXED and nregs too big. */
- assert (regs->num_regs >= nregs);
- rval = REGS_FIXED;
- }
-
- /* Copy the regs. */
- for (i = 0; i < nregs; ++i)
- {
- regs->start[i] = pmatch[i].rm_so;
- regs->end[i] = pmatch[i].rm_eo;
- }
- for ( ; i < regs->num_regs; ++i)
- regs->start[i] = regs->end[i] = -1;
-
- return rval;
-}
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
- this memory for recording register information. STARTS and ENDS
- must be allocated using the malloc library routine, and must each
- be at least NUM_REGS * sizeof (regoff_t) bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-
-void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
-{
- if (num_regs)
- {
- bufp->regs_allocated = REGS_REALLOCATE;
- regs->num_regs = num_regs;
- regs->start = starts;
- regs->end = ends;
- }
- else
- {
- bufp->regs_allocated = REGS_UNALLOCATED;
- regs->num_regs = 0;
- regs->start = regs->end = (regoff_t *) 0;
- }
-}
-#ifdef _LIBC
-weak_alias (__re_set_registers, re_set_registers)
-#endif
-\f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them unless specifically requested. */
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-int
-# ifdef _LIBC
-weak_function
-# endif
-re_exec (s)
- const char *s;
-{
- return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
-}
-#endif /* _REGEX_RE_COMP */
-\f
-static re_node_set empty_set;
-
-/* Internal entry point. */
-
-/* Searches for a compiled pattern PREG in the string STRING, whose
- length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
- mingings with regexec. START, and RANGE have the same meanings
- with re_search.
- Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
- otherwise return the error code.
- Note: We assume front end functions already check ranges.
- (START + RANGE >= 0 && START + RANGE <= LENGTH) */
-
-static reg_errcode_t
-re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
- eflags)
- const regex_t *preg;
- const char *string;
- int length, start, range, stop, eflags;
- size_t nmatch;
- regmatch_t pmatch[];
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- re_string_t input;
- int left_lim, right_lim, incr;
- int fl_longest_match, match_first, match_last = -1;
- int fast_translate, sb;
- re_match_context_t mctx;
- char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
- && range && !preg->can_be_null) ? preg->fastmap : NULL);
-
- /* Check if the DFA haven't been compiled. */
- if (BE (preg->used == 0 || dfa->init_state == NULL
- || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
- || dfa->init_state_begbuf == NULL, 0))
- return REG_NOMATCH;
-
- re_node_set_init_empty (&empty_set);
- memset (&mctx, '\0', sizeof (re_match_context_t));
-
- /* We must check the longest matching, if nmatch > 0. */
- fl_longest_match = (nmatch != 0 || dfa->nbackref);
-
- err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
- preg->translate, preg->syntax & RE_ICASE);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- input.stop = stop;
-
- err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
-
- /* We will log all the DFA states through which the dfa pass,
- if nmatch > 1, or this dfa has "multibyte node", which is a
- back-reference or a node which can accept multibyte character or
- multi character collating element. */
- if (nmatch > 1 || dfa->has_mb_node)
- {
- mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
- if (BE (mctx.state_log == NULL, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
- }
- else
- mctx.state_log = NULL;
-
-#ifdef DEBUG
- /* We assume front-end functions already check them. */
- assert (start + range >= 0 && start + range <= length);
-#endif
-
- match_first = start;
- input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
- : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
-
- /* Check incrementally whether of not the input string match. */
- incr = (range < 0) ? -1 : 1;
- left_lim = (range < 0) ? start + range : start;
- right_lim = (range < 0) ? start : start + range;
- sb = MB_CUR_MAX == 1;
- fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
-
- for (;;)
- {
- /* At first get the current byte from input string. */
- if (fastmap)
- {
- if (BE (fast_translate, 1))
- {
- unsigned RE_TRANSLATE_TYPE t
- = (unsigned RE_TRANSLATE_TYPE) preg->translate;
- if (BE (range >= 0, 1))
- {
- if (BE (t != NULL, 0))
- {
- while (BE (match_first < right_lim, 1)
- && !fastmap[t[(unsigned char) string[match_first]]])
- ++match_first;
- }
- else
- {
- while (BE (match_first < right_lim, 1)
- && !fastmap[(unsigned char) string[match_first]])
- ++match_first;
- }
- if (BE (match_first == right_lim, 0))
- {
- int ch = match_first >= length
- ? 0 : (unsigned char) string[match_first];
- if (!fastmap[t ? t[ch] : ch])
- break;
- }
- }
- else
- {
- while (match_first >= left_lim)
- {
- int ch = match_first >= length
- ? 0 : (unsigned char) string[match_first];
- if (fastmap[t ? t[ch] : ch])
- break;
- --match_first;
- }
- if (match_first < left_lim)
- break;
- }
- }
- else
- {
- int ch;
-
- do
- {
- /* In this case, we can't determine easily the current byte,
- since it might be a component byte of a multibyte
- character. Then we use the constructed buffer
- instead. */
- /* If MATCH_FIRST is out of the valid range, reconstruct the
- buffers. */
- if (input.raw_mbs_idx + input.valid_len <= match_first
- || match_first < input.raw_mbs_idx)
- {
- err = re_string_reconstruct (&input, match_first, eflags,
- preg->newline_anchor);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
- Note that MATCH_FIRST must not be smaller than 0. */
- ch = ((match_first >= length) ? 0
- : re_string_byte_at (&input,
- match_first - input.raw_mbs_idx));
- if (fastmap[ch])
- break;
- match_first += incr;
- }
- while (match_first >= left_lim && match_first <= right_lim);
- if (! fastmap[ch])
- break;
- }
- }
-
- /* Reconstruct the buffers so that the matcher can assume that
- the matching starts from the begining of the buffer. */
- err = re_string_reconstruct (&input, match_first, eflags,
- preg->newline_anchor);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
-#ifdef RE_ENABLE_I18N
- /* Eliminate it when it is a component of a multibyte character
- and isn't the head of a multibyte character. */
- if (sb || re_string_first_byte (&input, 0))
-#endif
- {
- /* It seems to be appropriate one, then use the matcher. */
- /* We assume that the matching starts from 0. */
- mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
- match_last = check_matching (preg, &mctx, 0, fl_longest_match);
- if (match_last != -1)
- {
- if (BE (match_last == -2, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
- else
- {
- mctx.match_last = match_last;
- if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
- {
- re_dfastate_t *pstate = mctx.state_log[match_last];
- mctx.last_node = check_halt_state_context (preg, pstate,
- &mctx, match_last);
- }
- if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
- || dfa->nbackref)
- {
- err = prune_impossible_nodes (preg, &mctx);
- if (err == REG_NOERROR)
- break;
- if (BE (err != REG_NOMATCH, 0))
- goto free_return;
- }
- else
- break; /* We found a matching. */
- }
- }
- match_ctx_clean (&mctx);
- }
- /* Update counter. */
- match_first += incr;
- if (match_first < left_lim || right_lim < match_first)
- break;
- }
-
- /* Set pmatch[] if we need. */
- if (match_last != -1 && nmatch > 0)
- {
- int reg_idx;
-
- /* Initialize registers. */
- for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
- pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
-
- /* Set the points where matching start/end. */
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = mctx.match_last;
-
- if (!preg->no_sub && nmatch > 1)
- {
- err = set_regs (preg, &mctx, nmatch, pmatch,
- dfa->has_plural_match && dfa->nbackref > 0);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
-
- /* At last, add the offset to the each registers, since we slided
- the buffers so that We can assume that the matching starts from 0. */
- for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
- if (pmatch[reg_idx].rm_so != -1)
- {
- pmatch[reg_idx].rm_so += match_first;
- pmatch[reg_idx].rm_eo += match_first;
- }
- }
- err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
- free_return:
- re_free (mctx.state_log);
- if (dfa->nbackref)
- match_ctx_free (&mctx);
- re_string_destruct (&input);
- return err;
-}
-
-static reg_errcode_t
-prune_impossible_nodes (preg, mctx)
- const regex_t *preg;
- re_match_context_t *mctx;
-{
- int halt_node, match_last;
- reg_errcode_t ret;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- re_dfastate_t **sifted_states;
- re_dfastate_t **lim_states = NULL;
- re_sift_context_t sctx;
-#ifdef DEBUG
- assert (mctx->state_log != NULL);
-#endif
- match_last = mctx->match_last;
- halt_node = mctx->last_node;
- sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
- if (BE (sifted_states == NULL, 0))
- {
- ret = REG_ESPACE;
- goto free_return;
- }
- if (dfa->nbackref)
- {
- lim_states = re_malloc (re_dfastate_t *, match_last + 1);
- if (BE (lim_states == NULL, 0))
- {
- ret = REG_ESPACE;
- goto free_return;
- }
- while (1)
- {
- memset (lim_states, '\0',
- sizeof (re_dfastate_t *) * (match_last + 1));
- match_ctx_clear_flag (mctx);
- sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
- match_last, 0);
- ret = sift_states_backward (preg, mctx, &sctx);
- re_node_set_free (&sctx.limits);
- if (BE (ret != REG_NOERROR, 0))
- goto free_return;
- if (sifted_states[0] != NULL || lim_states[0] != NULL)
- break;
- do
- {
- --match_last;
- if (match_last < 0)
- {
- ret = REG_NOMATCH;
- goto free_return;
- }
- } while (!mctx->state_log[match_last]->halt);
- halt_node = check_halt_state_context (preg,
- mctx->state_log[match_last],
- mctx, match_last);
- }
- ret = merge_state_array (dfa, sifted_states, lim_states,
- match_last + 1);
- re_free (lim_states);
- lim_states = NULL;
- if (BE (ret != REG_NOERROR, 0))
- goto free_return;
- }
- else
- {
- sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
- match_last, 0);
- ret = sift_states_backward (preg, mctx, &sctx);
- re_node_set_free (&sctx.limits);
- if (BE (ret != REG_NOERROR, 0))
- goto free_return;
- }
- re_free (mctx->state_log);
- mctx->state_log = sifted_states;
- sifted_states = NULL;
- mctx->last_node = halt_node;
- mctx->match_last = match_last;
- ret = REG_NOERROR;
- free_return:
- re_free (sifted_states);
- re_free (lim_states);
- return ret;
-}
-
-/* Acquire an initial state and return it.
- We must select appropriate initial state depending on the context,
- since initial states may have constraints like "\<", "^", etc.. */
-
-static inline re_dfastate_t *
-acquire_init_state_context (err, preg, mctx, idx)
- reg_errcode_t *err;
- const regex_t *preg;
- const re_match_context_t *mctx;
- int idx;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-
- *err = REG_NOERROR;
- if (dfa->init_state->has_constraint)
- {
- unsigned int context;
- context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
- preg->newline_anchor);
- if (IS_WORD_CONTEXT (context))
- return dfa->init_state_word;
- else if (IS_ORDINARY_CONTEXT (context))
- return dfa->init_state;
- else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
- return dfa->init_state_begbuf;
- else if (IS_NEWLINE_CONTEXT (context))
- return dfa->init_state_nl;
- else if (IS_BEGBUF_CONTEXT (context))
- {
- /* It is relatively rare case, then calculate on demand. */
- return re_acquire_state_context (err, dfa,
- dfa->init_state->entrance_nodes,
- context);
- }
- else
- /* Must not happen? */
- return dfa->init_state;
- }
- else
- return dfa->init_state;
-}
-
-/* Check whether the regular expression match input string INPUT or not,
- and return the index where the matching end, return -1 if not match,
- or return -2 in case of an error.
- FL_SEARCH means we must search where the matching starts,
- FL_LONGEST_MATCH means we want the POSIX longest matching.
- Note that the matcher assume that the maching starts from the current
- index of the buffer. */
-
-static int
-check_matching (preg, mctx, fl_search, fl_longest_match)
- const regex_t *preg;
- re_match_context_t *mctx;
- int fl_search, fl_longest_match;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- reg_errcode_t err;
- int match = 0;
- int match_last = -1;
- int cur_str_idx = re_string_cur_idx (mctx->input);
- re_dfastate_t *cur_state;
-
- cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
- /* An initial state must not be NULL(invalid state). */
- if (BE (cur_state == NULL, 0))
- return -2;
- if (mctx->state_log != NULL)
- mctx->state_log[cur_str_idx] = cur_state;
-
- /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
- later. E.g. Processing back references. */
- if (dfa->nbackref)
- {
- err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- if (cur_state->has_backref)
- {
- err = transit_state_bkref (preg, &cur_state->nodes, mctx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- /* If the RE accepts NULL string. */
- if (cur_state->halt)
- {
- if (!cur_state->has_constraint
- || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
- {
- if (!fl_longest_match)
- return cur_str_idx;
- else
- {
- match_last = cur_str_idx;
- match = 1;
- }
- }
- }
-
- while (!re_string_eoi (mctx->input))
- {
- cur_state = transit_state (&err, preg, mctx, cur_state,
- fl_search && !match);
- if (cur_state == NULL) /* Reached at the invalid state or an error. */
- {
- cur_str_idx = re_string_cur_idx (mctx->input);
- if (BE (err != REG_NOERROR, 0))
- return -2;
- if (fl_search && !match)
- {
- /* Restart from initial state, since we are searching
- the point from where matching start. */
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX == 1
- || re_string_first_byte (mctx->input, cur_str_idx))
-#endif /* RE_ENABLE_I18N */
- cur_state = acquire_init_state_context (&err, preg, mctx,
- cur_str_idx);
- if (BE (cur_state == NULL && err != REG_NOERROR, 0))
- return -2;
- if (mctx->state_log != NULL)
- mctx->state_log[cur_str_idx] = cur_state;
- }
- else if (!fl_longest_match && match)
- break;
- else /* (fl_longest_match && match) || (!fl_search && !match) */
- {
- if (mctx->state_log == NULL)
- break;
- else
- {
- int max = mctx->state_log_top;
- for (; cur_str_idx <= max; ++cur_str_idx)
- if (mctx->state_log[cur_str_idx] != NULL)
- break;
- if (cur_str_idx > max)
- break;
- }
- }
- }
-
- if (cur_state != NULL && cur_state->halt)
- {
- /* Reached at a halt state.
- Check the halt state can satisfy the current context. */
- if (!cur_state->has_constraint
- || check_halt_state_context (preg, cur_state, mctx,
- re_string_cur_idx (mctx->input)))
- {
- /* We found an appropriate halt state. */
- match_last = re_string_cur_idx (mctx->input);
- match = 1;
- if (!fl_longest_match)
- break;
- }
- }
- }
- return match_last;
-}
-
-/* Check NODE match the current context. */
-
-static int check_halt_node_context (dfa, node, context)
- const re_dfa_t *dfa;
- int node;
- unsigned int context;
-{
- re_token_type_t type = dfa->nodes[node].type;
- unsigned int constraint = dfa->nodes[node].constraint;
- if (type != END_OF_RE)
- return 0;
- if (!constraint)
- return 1;
- if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
- return 0;
- return 1;
-}
-
-/* Check the halt state STATE match the current context.
- Return 0 if not match, if the node, STATE has, is a halt node and
- match the context, return the node. */
-
-static int
-check_halt_state_context (preg, state, mctx, idx)
- const regex_t *preg;
- const re_dfastate_t *state;
- const re_match_context_t *mctx;
- int idx;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int i;
- unsigned int context;
-#ifdef DEBUG
- assert (state->halt);
-#endif
- context = re_string_context_at (mctx->input, idx, mctx->eflags,
- preg->newline_anchor);
- for (i = 0; i < state->nodes.nelem; ++i)
- if (check_halt_node_context (dfa, state->nodes.elems[i], context))
- return state->nodes.elems[i];
- return 0;
-}
-
-/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
- corresponding to the DFA).
- Return the destination node, and update EPS_VIA_NODES, return -1 in case
- of errors. */
-
-static int
-proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
- const regex_t *preg;
- regmatch_t *regs;
- const re_match_context_t *mctx;
- int nregs, *pidx, node;
- re_node_set *eps_via_nodes;
- struct re_fail_stack_t *fs;
-{
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int i, err, dest_node;
- dest_node = -1;
- if (IS_EPSILON_NODE (dfa->nodes[node].type))
- {
- re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
- int ndest, dest_nodes[2];
- err = re_node_set_insert (eps_via_nodes, node);
- if (BE (err < 0, 0))
- return -1;
- /* Pick up valid destinations. */
- for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
- {
- int candidate = dfa->edests[node].elems[i];
- if (!re_node_set_contains (cur_nodes, candidate))
- continue;
- dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
- dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
- ++ndest;
- }
- if (ndest <= 1)
- return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
- /* In order to avoid infinite loop like "(a*)*". */
- if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
- return dest_nodes[1];
- if (fs != NULL)
- push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
- return dest_nodes[0];
- }
- else
- {
- int naccepted = 0;
- re_token_type_t type = dfa->nodes[node].type;
-
-#ifdef RE_ENABLE_I18N
- if (ACCEPT_MB_NODE (type))
- naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
- else
-#endif /* RE_ENABLE_I18N */
- if (type == OP_BACK_REF)
- {
- int subexp_idx = dfa->nodes[node].opr.idx;
- naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
- if (fs != NULL)
- {
- if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
- return -1;
- else if (naccepted)
- {
- char *buf = (char *) re_string_get_buffer (mctx->input);
- if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
- naccepted) != 0)
- return -1;
- }
- }
-
- if (naccepted == 0)
- {
- err = re_node_set_insert (eps_via_nodes, node);
- if (BE (err < 0, 0))
- return -2;
- dest_node = dfa->edests[node].elems[0];
- if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
- dest_node))
- return dest_node;
- }
- }
-
- if (naccepted != 0
- || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
- {
- dest_node = dfa->nexts[node];
- *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
- if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
- || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
- dest_node)))
- return -1;
- re_node_set_empty (eps_via_nodes);
- return dest_node;
- }
- }
- return -1;
-}
-
-static reg_errcode_t
-push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int str_idx, *dests, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
-{
- reg_errcode_t err;
- int num = fs->num++;
- if (fs->num == fs->alloc)
- {
- struct re_fail_stack_ent_t *new_array;
- fs->alloc *= 2;
- new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
- * fs->alloc));
- if (new_array == NULL)
- return REG_ESPACE;
- fs->stack = new_array;
- }
- fs->stack[num].idx = str_idx;
- fs->stack[num].node = dests[1];
- fs->stack[num].regs = re_malloc (regmatch_t, nregs);
- memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
- err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
- return err;
-}
-
-static int
-pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int *pidx, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
-{
- int num = --fs->num;
- assert (num >= 0);
- *pidx = fs->stack[num].idx;
- memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
- re_node_set_free (eps_via_nodes);
- re_free (fs->stack[num].regs);
- *eps_via_nodes = fs->stack[num].eps_via_nodes;
- return fs->stack[num].node;
-}
-
-/* Set the positions where the subexpressions are starts/ends to registers
- PMATCH.
- Note: We assume that pmatch[0] is already set, and
- pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
-
-static reg_errcode_t
-set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
- const regex_t *preg;
- const re_match_context_t *mctx;
- size_t nmatch;
- regmatch_t *pmatch;
- int fl_backtrack;
-{
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int idx, cur_node, real_nmatch;
- re_node_set eps_via_nodes;
- struct re_fail_stack_t *fs;
- struct re_fail_stack_t fs_body = {0, 2, NULL};
-#ifdef DEBUG
- assert (nmatch > 1);
- assert (mctx->state_log != NULL);
-#endif
- if (fl_backtrack)
- {
- fs = &fs_body;
- fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
- }
- else
- fs = NULL;
- cur_node = dfa->init_node;
- real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
- re_node_set_init_empty (&eps_via_nodes);
- for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
- {
- update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
- if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
- {
- int reg_idx;
- if (fs)
- {
- for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
- if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
- break;
- if (reg_idx == nmatch)
- {
- re_node_set_free (&eps_via_nodes);
- return free_fail_stack_return (fs);
- }
- cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
- &eps_via_nodes);
- }
- else
- {
- re_node_set_free (&eps_via_nodes);
- return REG_NOERROR;
- }
- }
-
- /* Proceed to next node. */
- cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
- &eps_via_nodes, fs);
-
- if (BE (cur_node < 0, 0))
- {
- if (cur_node == -2)
- return REG_ESPACE;
- if (fs)
- cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
- &eps_via_nodes);
- else
- {
- re_node_set_free (&eps_via_nodes);
- return REG_NOMATCH;
- }
- }
- }
- re_node_set_free (&eps_via_nodes);
- return free_fail_stack_return (fs);
-}
-
-static reg_errcode_t
-free_fail_stack_return (fs)
- struct re_fail_stack_t *fs;
-{
- if (fs)
- {
- int fs_idx;
- for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
- {
- re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
- re_free (fs->stack[fs_idx].regs);
- }
- re_free (fs->stack);
- }
- return REG_NOERROR;
-}
-
-static void
-update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
- re_dfa_t *dfa;
- regmatch_t *pmatch;
- int cur_node, cur_idx, nmatch;
-{
- int type = dfa->nodes[cur_node].type;
- int reg_num;
- if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
- return;
- reg_num = dfa->nodes[cur_node].opr.idx + 1;
- if (reg_num >= nmatch)
- return;
- if (type == OP_OPEN_SUBEXP)
- {
- /* We are at the first node of this sub expression. */
- pmatch[reg_num].rm_so = cur_idx;
- pmatch[reg_num].rm_eo = -1;
- }
- else if (type == OP_CLOSE_SUBEXP)
- /* We are at the first node of this sub expression. */
- pmatch[reg_num].rm_eo = cur_idx;
-}
-
-#define NUMBER_OF_STATE 1
-
-/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
- and sift the nodes in each states according to the following rules.
- Updated state_log will be wrote to STATE_LOG.
-
- Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
- 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
- If `a' isn't the LAST_NODE and `a' can't epsilon transit to
- the LAST_NODE, we throw away the node `a'.
- 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
- string `s' and transit to `b':
- i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
- away the node `a'.
- ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
- throwed away, we throw away the node `a'.
- 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
- i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
- node `a'.
- ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
- we throw away the node `a'. */
-
-#define STATE_NODE_CONTAINS(state,node) \
- ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
-
-static reg_errcode_t
-sift_states_backward (preg, mctx, sctx)
- const regex_t *preg;
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int null_cnt = 0;
- int str_idx = sctx->last_str_idx;
- re_node_set cur_dest;
- re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
-
-#ifdef DEBUG
- assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
-#endif
- cur_src = &mctx->state_log[str_idx]->nodes;
-
- /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
- transit to the last_node and the last_node itself. */
- err = re_node_set_init_1 (&cur_dest, sctx->last_node);
- if (BE (err != REG_NOERROR, 0))
- return err;
- err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
-
- /* Then check each states in the state_log. */
- while (str_idx > 0)
- {
- int i, ret;
- /* Update counters. */
- null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
- if (null_cnt > mctx->max_mb_elem_len)
- {
- memset (sctx->sifted_states, '\0',
- sizeof (re_dfastate_t *) * str_idx);
- re_node_set_free (&cur_dest);
- return REG_NOERROR;
- }
- re_node_set_empty (&cur_dest);
- --str_idx;
- cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
-
- /* Then build the next sifted state.
- We build the next sifted state on `cur_dest', and update
- `sifted_states[str_idx]' with `cur_dest'.
- Note:
- `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
- `cur_src' points the node_set of the old `state_log[str_idx]'. */
- for (i = 0; i < cur_src->nelem; i++)
- {
- int prev_node = cur_src->elems[i];
- int naccepted = 0;
- re_token_type_t type = dfa->nodes[prev_node].type;
-
- if (IS_EPSILON_NODE(type))
- continue;
-#ifdef RE_ENABLE_I18N
- /* If the node may accept `multi byte'. */
- if (ACCEPT_MB_NODE (type))
- naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
- str_idx, sctx->last_str_idx);
-
-#endif /* RE_ENABLE_I18N */
- /* We don't check backreferences here.
- See update_cur_sifted_state(). */
-
- if (!naccepted
- && check_node_accept (preg, dfa->nodes + prev_node, mctx,
- str_idx)
- && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
- dfa->nexts[prev_node]))
- naccepted = 1;
-
- if (naccepted == 0)
- continue;
-
- if (sctx->limits.nelem)
- {
- int to_idx = str_idx + naccepted;
- if (check_dst_limits (dfa, &sctx->limits, mctx,
- dfa->nexts[prev_node], to_idx,
- prev_node, str_idx))
- continue;
- }
- ret = re_node_set_insert (&cur_dest, prev_node);
- if (BE (ret == -1, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
- }
-
- /* Add all the nodes which satisfy the following conditions:
- - It can epsilon transit to a node in CUR_DEST.
- - It is in CUR_SRC.
- And update state_log. */
- err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- err = REG_NOERROR;
- free_return:
- re_node_set_free (&cur_dest);
- return err;
-}
-
-/* Helper functions. */
-
-static inline reg_errcode_t
-clean_state_log_if_need (mctx, next_state_log_idx)
- re_match_context_t *mctx;
- int next_state_log_idx;
-{
- int top = mctx->state_log_top;
-
- if (next_state_log_idx >= mctx->input->bufs_len
- || (next_state_log_idx >= mctx->input->valid_len
- && mctx->input->valid_len < mctx->input->len))
- {
- reg_errcode_t err;
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- if (top < next_state_log_idx)
- {
- memset (mctx->state_log + top + 1, '\0',
- sizeof (re_dfastate_t *) * (next_state_log_idx - top));
- mctx->state_log_top = next_state_log_idx;
- }
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-merge_state_array (dfa, dst, src, num)
- re_dfa_t *dfa;
- re_dfastate_t **dst;
- re_dfastate_t **src;
- int num;
-{
- int st_idx;
- reg_errcode_t err;
- for (st_idx = 0; st_idx < num; ++st_idx)
- {
- if (dst[st_idx] == NULL)
- dst[st_idx] = src[st_idx];
- else if (src[st_idx] != NULL)
- {
- re_node_set merged_set;
- err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
- &src[st_idx]->nodes);
- if (BE (err != REG_NOERROR, 0))
- return err;
- dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
- re_node_set_free (&merged_set);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- }
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
- const regex_t *preg;
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *dest_nodes;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- const re_node_set *candidates;
- candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
-
- /* At first, add the nodes which can epsilon transit to a node in
- DEST_NODE. */
- if (dest_nodes->nelem)
- {
- err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- /* Then, check the limitations in the current sift_context. */
- if (dest_nodes->nelem && sctx->limits.nelem)
- {
- err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
- mctx->bkref_ents, str_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- /* Update state_log. */
- sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
- if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
- return err;
-
- if ((mctx->state_log[str_idx] != NULL
- && mctx->state_log[str_idx]->has_backref))
- {
- err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-add_epsilon_src_nodes (dfa, dest_nodes, candidates)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
-{
- reg_errcode_t err;
- int src_idx;
- re_node_set src_copy;
-
- err = re_node_set_init_copy (&src_copy, dest_nodes);
- if (BE (err != REG_NOERROR, 0))
- return err;
- for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
- {
- err = re_node_set_add_intersect (dest_nodes, candidates,
- dfa->inveclosures
- + src_copy.elems[src_idx]);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&src_copy);
- return err;
- }
- }
- re_node_set_free (&src_copy);
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
- re_dfa_t *dfa;
- int node;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
-{
- int ecl_idx;
- reg_errcode_t err;
- re_node_set *inv_eclosure = dfa->inveclosures + node;
- re_node_set except_nodes;
- re_node_set_init_empty (&except_nodes);
- for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
- {
- int cur_node = inv_eclosure->elems[ecl_idx];
- if (cur_node == node)
- continue;
- if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
- {
- int edst1 = dfa->edests[cur_node].elems[0];
- int edst2 = ((dfa->edests[cur_node].nelem > 1)
- ? dfa->edests[cur_node].elems[1] : -1);
- if ((!re_node_set_contains (inv_eclosure, edst1)
- && re_node_set_contains (dest_nodes, edst1))
- || (edst2 > 0
- && !re_node_set_contains (inv_eclosure, edst2)
- && re_node_set_contains (dest_nodes, edst2)))
- {
- err = re_node_set_add_intersect (&except_nodes, candidates,
- dfa->inveclosures + cur_node);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&except_nodes);
- return err;
- }
- }
- }
- }
- for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
- {
- int cur_node = inv_eclosure->elems[ecl_idx];
- if (!re_node_set_contains (&except_nodes, cur_node))
- {
- int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
- re_node_set_remove_at (dest_nodes, idx);
- }
- }
- re_node_set_free (&except_nodes);
- return REG_NOERROR;
-}
-
-static int
-check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
- re_dfa_t *dfa;
- re_node_set *limits;
- re_match_context_t *mctx;
- int dst_node, dst_idx, src_node, src_idx;
-{
- int lim_idx, src_pos, dst_pos;
-
- for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
- {
- int subexp_idx;
- struct re_backref_cache_entry *ent;
- ent = mctx->bkref_ents + limits->elems[lim_idx];
- subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
-
- dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
- dfa->eclosures + dst_node,
- subexp_idx, dst_node, dst_idx);
- src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
- dfa->eclosures + src_node,
- subexp_idx, src_node, src_idx);
-
- /* In case of:
- <src> <dst> ( <subexp> )
- ( <subexp> ) <src> <dst>
- ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
- if (src_pos == dst_pos)
- continue; /* This is unrelated limitation. */
- else
- return 1;
- }
- return 0;
-}
-
-static int
-check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
- str_idx)
- re_dfa_t *dfa;
- re_match_context_t *mctx;
- re_node_set *eclosures;
- int limit, subexp_idx, node, str_idx;
-{
- struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
- int pos = (str_idx < lim->subexp_from ? -1
- : (lim->subexp_to < str_idx ? 1 : 0));
- if (pos == 0
- && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
- {
- int node_idx;
- for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
- {
- int node = eclosures->elems[node_idx];
- re_token_type_t type= dfa->nodes[node].type;
- if (type == OP_BACK_REF)
- {
- int bi = search_cur_bkref_entry (mctx, str_idx);
- for (; bi < mctx->nbkref_ents; ++bi)
- {
- struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
- if (ent->str_idx > str_idx)
- break;
- if (ent->node == node && ent->subexp_from == ent->subexp_to)
- {
- int cpos, dst;
- dst = dfa->edests[node].elems[0];
- cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
- dfa->eclosures + dst,
- subexp_idx, dst,
- str_idx);
- if ((str_idx == lim->subexp_from && cpos == -1)
- || (str_idx == lim->subexp_to && cpos == 0))
- return cpos;
- }
- }
- }
- if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
- && str_idx == lim->subexp_from)
- {
- pos = -1;
- break;
- }
- if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
- && str_idx == lim->subexp_to)
- break;
- }
- if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
- pos = 1;
- }
- return pos;
-}
-
-/* Check the limitations of sub expressions LIMITS, and remove the nodes
- which are against limitations from DEST_NODES. */
-
-static reg_errcode_t
-check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
- re_node_set *limits;
- struct re_backref_cache_entry *bkref_ents;
- int str_idx;
-{
- reg_errcode_t err;
- int node_idx, lim_idx;
-
- for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
- {
- int subexp_idx;
- struct re_backref_cache_entry *ent;
- ent = bkref_ents + limits->elems[lim_idx];
-
- if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
- continue; /* This is unrelated limitation. */
-
- subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
- if (ent->subexp_to == str_idx)
- {
- int ops_node = -1;
- int cls_node = -1;
- for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
- {
- int node = dest_nodes->elems[node_idx];
- re_token_type_t type= dfa->nodes[node].type;
- if (type == OP_OPEN_SUBEXP
- && subexp_idx == dfa->nodes[node].opr.idx)
- ops_node = node;
- else if (type == OP_CLOSE_SUBEXP
- && subexp_idx == dfa->nodes[node].opr.idx)
- cls_node = node;
- }
-
- /* Check the limitation of the open subexpression. */
- /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
- if (ops_node >= 0)
- {
- err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
- candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- /* Check the limitation of the close subexpression. */
- for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
- {
- int node = dest_nodes->elems[node_idx];
- if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
- && !re_node_set_contains (dfa->eclosures + node, cls_node))
- {
- /* It is against this limitation.
- Remove it form the current sifted state. */
- err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
- candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- --node_idx;
- }
- }
- }
- else /* (ent->subexp_to != str_idx) */
- {
- for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
- {
- int node = dest_nodes->elems[node_idx];
- re_token_type_t type= dfa->nodes[node].type;
- if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
- {
- if (subexp_idx != dfa->nodes[node].opr.idx)
- continue;
- if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
- || (type == OP_OPEN_SUBEXP))
- {
- /* It is against this limitation.
- Remove it form the current sifted state. */
- err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
- candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- }
- }
- }
- }
- return REG_NOERROR;
-}
-
-static reg_errcode_t
-sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
- const regex_t *preg;
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *dest_nodes;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int node_idx, node;
- re_sift_context_t local_sctx;
- const re_node_set *candidates;
- candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
- local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
-
- for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
- {
- int cur_bkref_idx = re_string_cur_idx (mctx->input);
- re_token_type_t type;
- node = candidates->elems[node_idx];
- type = dfa->nodes[node].type;
- if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
- continue;
- /* Avoid infinite loop for the REs like "()\1+". */
- if (node == sctx->last_node && str_idx == sctx->last_str_idx)
- continue;
- if (type == OP_BACK_REF)
- {
- int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
- for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
- {
- int disabled_idx, subexp_len, to_idx, dst_node;
- struct re_backref_cache_entry *entry;
- entry = mctx->bkref_ents + enabled_idx;
- if (entry->str_idx > str_idx)
- break;
- if (entry->node != node)
- continue;
- subexp_len = entry->subexp_to - entry->subexp_from;
- to_idx = str_idx + subexp_len;
- dst_node = (subexp_len ? dfa->nexts[node]
- : dfa->edests[node].elems[0]);
-
- if (to_idx > sctx->last_str_idx
- || sctx->sifted_states[to_idx] == NULL
- || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
- dst_node)
- || check_dst_limits (dfa, &sctx->limits, mctx, node,
- str_idx, dst_node, to_idx))
- continue;
- {
- re_dfastate_t *cur_state;
- entry->flag = 0;
- for (disabled_idx = enabled_idx + 1;
- disabled_idx < mctx->nbkref_ents; ++disabled_idx)
- {
- struct re_backref_cache_entry *entry2;
- entry2 = mctx->bkref_ents + disabled_idx;
- if (entry2->str_idx > str_idx)
- break;
- entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
- }
-
- if (local_sctx.sifted_states == NULL)
- {
- local_sctx = *sctx;
- err = re_node_set_init_copy (&local_sctx.limits,
- &sctx->limits);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- local_sctx.last_node = node;
- local_sctx.last_str_idx = str_idx;
- err = re_node_set_insert (&local_sctx.limits, enabled_idx);
- if (BE (err < 0, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
- cur_state = local_sctx.sifted_states[str_idx];
- err = sift_states_backward (preg, mctx, &local_sctx);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- if (sctx->limited_states != NULL)
- {
- err = merge_state_array (dfa, sctx->limited_states,
- local_sctx.sifted_states,
- str_idx + 1);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- local_sctx.sifted_states[str_idx] = cur_state;
- re_node_set_remove (&local_sctx.limits, enabled_idx);
- /* We must not use the variable entry here, since
- mctx->bkref_ents might be realloced. */
- mctx->bkref_ents[enabled_idx].flag = 1;
- }
- }
- enabled_idx = search_cur_bkref_entry (mctx, str_idx);
- for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
- {
- struct re_backref_cache_entry *entry;
- entry = mctx->bkref_ents + enabled_idx;
- if (entry->str_idx > str_idx)
- break;
- if (entry->node == node)
- entry->flag = 0;
- }
- }
- }
- err = REG_NOERROR;
- free_return:
- if (local_sctx.sifted_states != NULL)
- {
- re_node_set_free (&local_sctx.limits);
- }
-
- return err;
-}
-
-
-#ifdef RE_ENABLE_I18N
-static int
-sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
- const regex_t *preg;
- const re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int node_idx, str_idx, max_str_idx;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int naccepted;
- /* Check the node can accept `multi byte'. */
- naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
- if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
- !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
- dfa->nexts[node_idx]))
- /* The node can't accept the `multi byte', or the
- destination was already throwed away, then the node
- could't accept the current input `multi byte'. */
- naccepted = 0;
- /* Otherwise, it is sure that the node could accept
- `naccepted' bytes input. */
- return naccepted;
-}
-#endif /* RE_ENABLE_I18N */
-
-\f
-/* Functions for state transition. */
-
-/* Return the next state to which the current state STATE will transit by
- accepting the current input byte, and update STATE_LOG if necessary.
- If STATE can accept a multibyte char/collating element/back reference
- update the destination of STATE_LOG. */
-
-static re_dfastate_t *
-transit_state (err, preg, mctx, state, fl_search)
- reg_errcode_t *err;
- const regex_t *preg;
- re_match_context_t *mctx;
- re_dfastate_t *state;
- int fl_search;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- re_dfastate_t **trtable, *next_state;
- unsigned char ch;
- int cur_idx;
-
- if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
- || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
- && mctx->input->valid_len < mctx->input->len))
- {
- *err = extend_buffers (mctx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
-
- *err = REG_NOERROR;
- if (state == NULL)
- {
- next_state = state;
- re_string_skip_bytes (mctx->input, 1);
- }
- else
- {
-#ifdef RE_ENABLE_I18N
- /* If the current state can accept multibyte. */
- if (state->accept_mb)
- {
- *err = transit_state_mb (preg, state, mctx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
-#endif /* RE_ENABLE_I18N */
-
- /* Then decide the next state with the single byte. */
- if (1)
- {
- /* Use transition table */
- ch = re_string_fetch_byte (mctx->input);
- trtable = fl_search ? state->trtable_search : state->trtable;
- if (trtable == NULL)
- {
- trtable = build_trtable (preg, state, fl_search);
- if (fl_search)
- state->trtable_search = trtable;
- else
- state->trtable = trtable;
- }
- next_state = trtable[ch];
- }
- else
- {
- /* don't use transition table */
- next_state = transit_state_sb (err, preg, state, fl_search, mctx);
- if (BE (next_state == NULL && err != REG_NOERROR, 0))
- return NULL;
- }
- }
-
- cur_idx = re_string_cur_idx (mctx->input);
- /* Update the state_log if we need. */
- if (mctx->state_log != NULL)
- {
- if (cur_idx > mctx->state_log_top)
- {
- mctx->state_log[cur_idx] = next_state;
- mctx->state_log_top = cur_idx;
- }
- else if (mctx->state_log[cur_idx] == 0)
- {
- mctx->state_log[cur_idx] = next_state;
- }
- else
- {
- re_dfastate_t *pstate;
- unsigned int context;
- re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
- /* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
- pstate = mctx->state_log[cur_idx];
- log_nodes = pstate->entrance_nodes;
- if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
- log_nodes);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
- else
- next_nodes = *log_nodes;
- /* Note: We already add the nodes of the initial state,
- then we don't need to add them here. */
-
- context = re_string_context_at (mctx->input,
- re_string_cur_idx (mctx->input) - 1,
- mctx->eflags, preg->newline_anchor);
- next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
- /* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
-
- if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
- }
- }
-
- /* Check OP_OPEN_SUBEXP in the current state in case that we use them
- later. We must check them here, since the back references in the
- next state might use them. */
- if (dfa->nbackref && next_state/* && fl_process_bkref */)
- {
- *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
- cur_idx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
-
- /* If the next state has back references. */
- if (next_state != NULL && next_state->has_backref)
- {
- *err = transit_state_bkref (preg, &next_state->nodes, mctx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- next_state = mctx->state_log[cur_idx];
- }
- return next_state;
-}
-
-/* Helper functions for transit_state. */
-
-/* From the node set CUR_NODES, pick up the nodes whose types are
- OP_OPEN_SUBEXP and which have corresponding back references in the regular
- expression. And register them to use them later for evaluating the
- correspoding back references. */
-
-static reg_errcode_t
-check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
- re_dfa_t *dfa;
- re_match_context_t *mctx;
- re_node_set *cur_nodes;
- int str_idx;
-{
- int node_idx;
- reg_errcode_t err;
-
- /* TODO: This isn't efficient.
- Because there might be more than one nodes whose types are
- OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
- nodes.
- E.g. RE: (a){2} */
- for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
- {
- int node = cur_nodes->elems[node_idx];
- if (dfa->nodes[node].type == OP_OPEN_SUBEXP
- && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
- {
- err = match_ctx_add_subtop (mctx, node, str_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- }
- return REG_NOERROR;
-}
-
-/* Return the next state to which the current state STATE will transit by
- accepting the current input byte. */
-
-static re_dfastate_t *
-transit_state_sb (err, preg, state, fl_search, mctx)
- reg_errcode_t *err;
- const regex_t *preg;
- re_dfastate_t *state;
- int fl_search;
- re_match_context_t *mctx;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- re_node_set next_nodes;
- re_dfastate_t *next_state;
- int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
- unsigned int context;
-
- *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
- {
- int cur_node = state->nodes.elems[node_cnt];
- if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
- {
- *err = re_node_set_merge (&next_nodes,
- dfa->eclosures + dfa->nexts[cur_node]);
- if (BE (*err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return NULL;
- }
- }
- }
- if (fl_search)
- {
-#ifdef RE_ENABLE_I18N
- int not_initial = 0;
- if (MB_CUR_MAX > 1)
- for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
- if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
- {
- not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
- break;
- }
- if (!not_initial)
-#endif
- {
- *err = re_node_set_merge (&next_nodes,
- dfa->init_state->entrance_nodes);
- if (BE (*err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return NULL;
- }
- }
- }
- context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
- preg->newline_anchor);
- next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
- /* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
-
- re_node_set_free (&next_nodes);
- re_string_skip_bytes (mctx->input, 1);
- return next_state;
-}
-
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t
-transit_state_mb (preg, pstate, mctx)
- const regex_t *preg;
- re_dfastate_t *pstate;
- re_match_context_t *mctx;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int i;
-
- for (i = 0; i < pstate->nodes.nelem; ++i)
- {
- re_node_set dest_nodes, *new_nodes;
- int cur_node_idx = pstate->nodes.elems[i];
- int naccepted = 0, dest_idx;
- unsigned int context;
- re_dfastate_t *dest_state;
-
- if (dfa->nodes[cur_node_idx].constraint)
- {
- context = re_string_context_at (mctx->input,
- re_string_cur_idx (mctx->input),
- mctx->eflags, preg->newline_anchor);
- if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
- context))
- continue;
- }
-
- /* How many bytes the node can accepts? */
- if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
- naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
- re_string_cur_idx (mctx->input));
- if (naccepted == 0)
- continue;
-
- /* The node can accepts `naccepted' bytes. */
- dest_idx = re_string_cur_idx (mctx->input) + naccepted;
- mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
- : mctx->max_mb_elem_len);
- err = clean_state_log_if_need (mctx, dest_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
-#ifdef DEBUG
- assert (dfa->nexts[cur_node_idx] != -1);
-#endif
- /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
- then we use pstate->nodes.elems[i] instead. */
- new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
-
- dest_state = mctx->state_log[dest_idx];
- if (dest_state == NULL)
- dest_nodes = *new_nodes;
- else
- {
- err = re_node_set_init_union (&dest_nodes,
- dest_state->entrance_nodes, new_nodes);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
- preg->newline_anchor);
- mctx->state_log[dest_idx]
- = re_acquire_state_context (&err, dfa, &dest_nodes, context);
- if (dest_state != NULL)
- re_node_set_free (&dest_nodes);
- if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
- return err;
- }
- return REG_NOERROR;
-}
-#endif /* RE_ENABLE_I18N */
-
-static reg_errcode_t
-transit_state_bkref (preg, nodes, mctx)
- const regex_t *preg;
- re_node_set *nodes;
- re_match_context_t *mctx;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int i;
- int cur_str_idx = re_string_cur_idx (mctx->input);
-
- for (i = 0; i < nodes->nelem; ++i)
- {
- int dest_str_idx, prev_nelem, bkc_idx;
- int node_idx = nodes->elems[i];
- unsigned int context;
- re_token_t *node = dfa->nodes + node_idx;
- re_node_set *new_dest_nodes;
-
- /* Check whether `node' is a backreference or not. */
- if (node->type != OP_BACK_REF)
- continue;
-
- if (node->constraint)
- {
- context = re_string_context_at (mctx->input, cur_str_idx,
- mctx->eflags, preg->newline_anchor);
- if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
- continue;
- }
-
- /* `node' is a backreference.
- Check the substring which the substring matched. */
- bkc_idx = mctx->nbkref_ents;
- err = get_subexp (preg, mctx, node_idx, cur_str_idx);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
-
- /* And add the epsilon closures (which is `new_dest_nodes') of
- the backreference to appropriate state_log. */
-#ifdef DEBUG
- assert (dfa->nexts[node_idx] != -1);
-#endif
- for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
- {
- int subexp_len;
- re_dfastate_t *dest_state;
- struct re_backref_cache_entry *bkref_ent;
- bkref_ent = mctx->bkref_ents + bkc_idx;
- if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
- continue;
- subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
- new_dest_nodes = (subexp_len == 0
- ? dfa->eclosures + dfa->edests[node_idx].elems[0]
- : dfa->eclosures + dfa->nexts[node_idx]);
- dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
- - bkref_ent->subexp_from);
- context = re_string_context_at (mctx->input, dest_str_idx - 1,
- mctx->eflags, preg->newline_anchor);
- dest_state = mctx->state_log[dest_str_idx];
- prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
- : mctx->state_log[cur_str_idx]->nodes.nelem);
- /* Add `new_dest_node' to state_log. */
- if (dest_state == NULL)
- {
- mctx->state_log[dest_str_idx]
- = re_acquire_state_context (&err, dfa, new_dest_nodes,
- context);
- if (BE (mctx->state_log[dest_str_idx] == NULL
- && err != REG_NOERROR, 0))
- goto free_return;
- }
- else
- {
- re_node_set dest_nodes;
- err = re_node_set_init_union (&dest_nodes,
- dest_state->entrance_nodes,
- new_dest_nodes);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&dest_nodes);
- goto free_return;
- }
- mctx->state_log[dest_str_idx]
- = re_acquire_state_context (&err, dfa, &dest_nodes, context);
- re_node_set_free (&dest_nodes);
- if (BE (mctx->state_log[dest_str_idx] == NULL
- && err != REG_NOERROR, 0))
- goto free_return;
- }
- /* We need to check recursively if the backreference can epsilon
- transit. */
- if (subexp_len == 0
- && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
- {
- err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
- cur_str_idx);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- err = transit_state_bkref (preg, new_dest_nodes, mctx);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- }
- }
- err = REG_NOERROR;
- free_return:
- return err;
-}
-
-/* Enumerate all the candidates which the backreference BKREF_NODE can match
- at BKREF_STR_IDX, and register them by match_ctx_add_entry().
- Note that we might collect inappropriate candidates here.
- However, the cost of checking them strictly here is too high, then we
- delay these checking for prune_impossible_nodes(). */
-
-static reg_errcode_t
-get_subexp (preg, mctx, bkref_node, bkref_str_idx)
- const regex_t *preg;
- re_match_context_t *mctx;
- int bkref_node, bkref_str_idx;
-{
- int subexp_num, sub_top_idx;
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- char *buf = (char *) re_string_get_buffer (mctx->input);
- /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
- int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
- for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
- {
- struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
- if (entry->str_idx > bkref_str_idx)
- break;
- if (entry->node == bkref_node)
- return REG_NOERROR; /* We already checked it. */
- }
- subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
-
- /* For each sub expression */
- for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
- {
- reg_errcode_t err;
- re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
- re_sub_match_last_t *sub_last;
- int sub_last_idx, sl_str;
- char *bkref_str;
-
- if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
- continue; /* It isn't related. */
-
- sl_str = sub_top->str_idx;
- bkref_str = buf + bkref_str_idx;
- /* At first, check the last node of sub expressions we already
- evaluated. */
- for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
- {
- int sl_str_diff;
- sub_last = sub_top->lasts[sub_last_idx];
- sl_str_diff = sub_last->str_idx - sl_str;
- /* The matched string by the sub expression match with the substring
- at the back reference? */
- if (sl_str_diff > 0
- && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
- break; /* We don't need to search this sub expression any more. */
- bkref_str += sl_str_diff;
- sl_str += sl_str_diff;
- err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
- bkref_str_idx);
- if (err == REG_NOMATCH)
- continue;
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- if (sub_last_idx < sub_top->nlasts)
- continue;
- if (sub_last_idx > 0)
- ++sl_str;
- /* Then, search for the other last nodes of the sub expression. */
- for (; sl_str <= bkref_str_idx; ++sl_str)
- {
- int cls_node, sl_str_off;
- re_node_set *nodes;
- sl_str_off = sl_str - sub_top->str_idx;
- /* The matched string by the sub expression match with the substring
- at the back reference? */
- if (sl_str_off > 0
- && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
- break; /* We don't need to search this sub expression any more. */
- if (mctx->state_log[sl_str] == NULL)
- continue;
- /* Does this state have a ')' of the sub expression? */
- nodes = &mctx->state_log[sl_str]->nodes;
- cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
- if (cls_node == -1)
- continue; /* No. */
- if (sub_top->path == NULL)
- {
- sub_top->path = calloc (sizeof (state_array_t),
- sl_str - sub_top->str_idx + 1);
- if (sub_top->path == NULL)
- return REG_ESPACE;
- }
- /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
- in the current context? */
- err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
- sub_top->str_idx, cls_node, sl_str, 0);
- if (err == REG_NOMATCH)
- continue;
- if (BE (err != REG_NOERROR, 0))
- return err;
- sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
- if (BE (sub_last == NULL, 0))
- return REG_ESPACE;
- err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
- bkref_str_idx);
- if (err == REG_NOMATCH)
- continue;
- }
- }
- return REG_NOERROR;
-}
-
-/* Helper functions for get_subexp(). */
-
-/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
- If it can arrive, register the sub expression expressed with SUB_TOP
- and SUB_LAST. */
-
-static reg_errcode_t
-get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
- const regex_t *preg;
- re_match_context_t *mctx;
- re_sub_match_top_t *sub_top;
- re_sub_match_last_t *sub_last;
- int bkref_node, bkref_str;
-{
- reg_errcode_t err;
- int to_idx;
- /* Can the subexpression arrive the back reference? */
- err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
- sub_last->str_idx, bkref_node, bkref_str, 1);
- if (err != REG_NOERROR)
- return err;
- err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
- sub_last->str_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
- clean_state_log_if_need (mctx, to_idx);
- return REG_NOERROR;
-}
-
-/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
- Search '(' if FL_OPEN, or search ')' otherwise.
- TODO: This function isn't efficient...
- Because there might be more than one nodes whose types are
- OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
- nodes.
- E.g. RE: (a){2} */
-
-static int
-find_subexp_node (dfa, nodes, subexp_idx, fl_open)
- re_dfa_t *dfa;
- re_node_set *nodes;
- int subexp_idx, fl_open;
-{
- int cls_idx;
- for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
- {
- int cls_node = nodes->elems[cls_idx];
- re_token_t *node = dfa->nodes + cls_node;
- if (((fl_open && node->type == OP_OPEN_SUBEXP)
- || (!fl_open && node->type == OP_CLOSE_SUBEXP))
- && node->opr.idx == subexp_idx)
- return cls_node;
- }
- return -1;
-}
-
-/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
- LAST_NODE at LAST_STR. We record the path onto PATH since it will be
- heavily reused.
- Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
-
-static reg_errcode_t
-check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
- fl_open)
- const regex_t *preg;
- re_match_context_t *mctx;
- state_array_t *path;
- int top_node, top_str, last_node, last_str, fl_open;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- reg_errcode_t err;
- int subexp_num, backup_cur_idx, str_idx, null_cnt;
- re_dfastate_t *cur_state = NULL;
- re_node_set *cur_nodes, next_nodes;
- re_dfastate_t **backup_state_log;
- unsigned int context;
-
- subexp_num = dfa->nodes[top_node].opr.idx;
- /* Extend the buffer if we need. */
- if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
- {
- re_dfastate_t **new_array;
- int old_alloc = path->alloc;
- path->alloc += last_str + mctx->max_mb_elem_len + 1;
- new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
- if (new_array == NULL)
- return REG_ESPACE;
- path->array = new_array;
- memset (new_array + old_alloc, '\0',
- sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
- }
-
- str_idx = path->next_idx == 0 ? top_str : path->next_idx;
-
- /* Temporary modify MCTX. */
- backup_state_log = mctx->state_log;
- backup_cur_idx = mctx->input->cur_idx;
- mctx->state_log = path->array;
- mctx->input->cur_idx = str_idx;
-
- /* Setup initial node set. */
- context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
- preg->newline_anchor);
- if (str_idx == top_str)
- {
- err = re_node_set_init_1 (&next_nodes, top_node);
- if (BE (err != REG_NOERROR, 0))
- return err;
- err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- }
- else
- {
- cur_state = mctx->state_log[str_idx];
- if (cur_state && cur_state->has_backref)
- {
- err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
- if (BE ( err != REG_NOERROR, 0))
- return err;
- }
- else
- re_node_set_init_empty (&next_nodes);
- }
- if (str_idx == top_str || (cur_state && cur_state->has_backref))
- {
- if (next_nodes.nelem)
- {
- err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
- subexp_num, fl_open);
- if (BE ( err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- }
- cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
- if (BE (cur_state == NULL && err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- mctx->state_log[str_idx] = cur_state;
- }
-
- for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
- {
- re_node_set_empty (&next_nodes);
- if (mctx->state_log[str_idx + 1])
- {
- err = re_node_set_merge (&next_nodes,
- &mctx->state_log[str_idx + 1]->nodes);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- }
- if (cur_state)
- {
- err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
- &cur_state->nodes, &next_nodes);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- }
- ++str_idx;
- if (next_nodes.nelem)
- {
- err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
- fl_open);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
- subexp_num, fl_open);
- if (BE ( err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- }
- context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
- preg->newline_anchor);
- cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
- if (BE (cur_state == NULL && err != REG_NOERROR, 0))
- {
- re_node_set_free (&next_nodes);
- return err;
- }
- mctx->state_log[str_idx] = cur_state;
- null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
- }
- re_node_set_free (&next_nodes);
- cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
- : &mctx->state_log[last_str]->nodes);
- path->next_idx = str_idx;
-
- /* Fix MCTX. */
- mctx->state_log = backup_state_log;
- mctx->input->cur_idx = backup_cur_idx;
-
- if (cur_nodes == NULL)
- return REG_NOMATCH;
- /* Then check the current node set has the node LAST_NODE. */
- return (re_node_set_contains (cur_nodes, last_node)
- || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
- : REG_NOMATCH);
-}
-
-/* Helper functions for check_arrival. */
-
-/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
- to NEXT_NODES.
- TODO: This function is similar to the functions transit_state*(),
- however this function has many additional works.
- Can't we unify them? */
-
-static reg_errcode_t
-check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
- const regex_t *preg;
- re_dfa_t *dfa;
- re_match_context_t *mctx;
- int str_idx;
- re_node_set *cur_nodes, *next_nodes;
-{
- int cur_idx;
- reg_errcode_t err;
- re_node_set union_set;
- re_node_set_init_empty (&union_set);
- for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
- {
- int naccepted = 0;
- int cur_node = cur_nodes->elems[cur_idx];
- re_token_type_t type = dfa->nodes[cur_node].type;
- if (IS_EPSILON_NODE(type))
- continue;
-#ifdef RE_ENABLE_I18N
- /* If the node may accept `multi byte'. */
- if (ACCEPT_MB_NODE (type))
- {
- naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
- str_idx);
- if (naccepted > 1)
- {
- re_dfastate_t *dest_state;
- int next_node = dfa->nexts[cur_node];
- int next_idx = str_idx + naccepted;
- dest_state = mctx->state_log[next_idx];
- re_node_set_empty (&union_set);
- if (dest_state)
- {
- err = re_node_set_merge (&union_set, &dest_state->nodes);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&union_set);
- return err;
- }
- err = re_node_set_insert (&union_set, next_node);
- if (BE (err < 0, 0))
- {
- re_node_set_free (&union_set);
- return REG_ESPACE;
- }
- }
- else
- {
- err = re_node_set_insert (&union_set, next_node);
- if (BE (err < 0, 0))
- {
- re_node_set_free (&union_set);
- return REG_ESPACE;
- }
- }
- mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
- &union_set);
- if (BE (mctx->state_log[next_idx] == NULL
- && err != REG_NOERROR, 0))
- {
- re_node_set_free (&union_set);
- return err;
- }
- }
- }
-#endif /* RE_ENABLE_I18N */
- if (naccepted
- || check_node_accept (preg, dfa->nodes + cur_node, mctx,
- str_idx))
- {
- err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
- if (BE (err < 0, 0))
- {
- re_node_set_free (&union_set);
- return REG_ESPACE;
- }
- }
- }
- re_node_set_free (&union_set);
- return REG_NOERROR;
-}
-
-/* For all the nodes in CUR_NODES, add the epsilon closures of them to
- CUR_NODES, however exclude the nodes which are:
- - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
- - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
-*/
-
-static reg_errcode_t
-check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
- re_dfa_t *dfa;
- re_node_set *cur_nodes;
- int ex_subexp, fl_open;
-{
- reg_errcode_t err;
- int idx, outside_node;
- re_node_set new_nodes;
-#ifdef DEBUG
- assert (cur_nodes->nelem);
-#endif
- err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
- if (BE (err != REG_NOERROR, 0))
- return err;
- /* Create a new node set NEW_NODES with the nodes which are epsilon
- closures of the node in CUR_NODES. */
-
- for (idx = 0; idx < cur_nodes->nelem; ++idx)
- {
- int cur_node = cur_nodes->elems[idx];
- re_node_set *eclosure = dfa->eclosures + cur_node;
- outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
- if (outside_node == -1)
- {
- /* There are no problematic nodes, just merge them. */
- err = re_node_set_merge (&new_nodes, eclosure);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&new_nodes);
- return err;
- }
- }
- else
- {
- /* There are problematic nodes, re-calculate incrementally. */
- err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
- ex_subexp, fl_open);
- if (BE (err != REG_NOERROR, 0))
- {
- re_node_set_free (&new_nodes);
- return err;
- }
- }
- }
- re_node_set_free (cur_nodes);
- *cur_nodes = new_nodes;
- return REG_NOERROR;
-}
-
-/* Helper function for check_arrival_expand_ecl.
- Check incrementally the epsilon closure of TARGET, and if it isn't
- problematic append it to DST_NODES. */
-
-static reg_errcode_t
-check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
- re_dfa_t *dfa;
- int target, ex_subexp, fl_open;
- re_node_set *dst_nodes;
-{
- int cur_node, type;
- for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
- {
- int err;
- type = dfa->nodes[cur_node].type;
-
- if (((type == OP_OPEN_SUBEXP && fl_open)
- || (type == OP_CLOSE_SUBEXP && !fl_open))
- && dfa->nodes[cur_node].opr.idx == ex_subexp)
- {
- if (!fl_open)
- {
- err = re_node_set_insert (dst_nodes, cur_node);
- if (BE (err == -1, 0))
- return REG_ESPACE;
- }
- break;
- }
- err = re_node_set_insert (dst_nodes, cur_node);
- if (BE (err == -1, 0))
- return REG_ESPACE;
- if (dfa->edests[cur_node].nelem == 0)
- break;
- if (dfa->edests[cur_node].nelem == 2)
- {
- err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
- dfa->edests[cur_node].elems[1],
- ex_subexp, fl_open);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- cur_node = dfa->edests[cur_node].elems[0];
- }
- return REG_NOERROR;
-}
-
-
-/* For all the back references in the current state, calculate the
- destination of the back references by the appropriate entry
- in MCTX->BKREF_ENTS. */
-
-static reg_errcode_t
-expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
- fl_open)
- const regex_t *preg;
- re_match_context_t *mctx;
- int cur_str, last_str, subexp_num, fl_open;
- re_node_set *cur_nodes;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int cache_idx, cache_idx_start;
- /* The current state. */
-
- cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
- for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
- {
- int to_idx, next_node;
- struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
- if (ent->str_idx > cur_str)
- break;
- /* Is this entry ENT is appropriate? */
- if (!re_node_set_contains (cur_nodes, ent->node))
- continue; /* No. */
-
- to_idx = cur_str + ent->subexp_to - ent->subexp_from;
- /* Calculate the destination of the back reference, and append it
- to MCTX->STATE_LOG. */
- if (to_idx == cur_str)
- {
- /* The backreference did epsilon transit, we must re-check all the
- node in the current state. */
- re_node_set new_dests;
- reg_errcode_t err2, err3;
- next_node = dfa->edests[ent->node].elems[0];
- if (re_node_set_contains (cur_nodes, next_node))
- continue;
- err = re_node_set_init_1 (&new_dests, next_node);
- err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
- fl_open);
- err3 = re_node_set_merge (cur_nodes, &new_dests);
- re_node_set_free (&new_dests);
- if (BE (err != REG_NOERROR || err2 != REG_NOERROR
- || err3 != REG_NOERROR, 0))
- {
- err = (err != REG_NOERROR ? err
- : (err2 != REG_NOERROR ? err2 : err3));
- return err;
- }
- /* TODO: It is still inefficient... */
- cache_idx = cache_idx_start - 1;
- continue;
- }
- else
- {
- re_node_set union_set;
- next_node = dfa->nexts[ent->node];
- if (mctx->state_log[to_idx])
- {
- int ret;
- if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
- next_node))
- continue;
- err = re_node_set_init_copy (&union_set,
- &mctx->state_log[to_idx]->nodes);
- ret = re_node_set_insert (&union_set, next_node);
- if (BE (err != REG_NOERROR || ret < 0, 0))
- {
- re_node_set_free (&union_set);
- err = err != REG_NOERROR ? err : REG_ESPACE;
- return err;
- }
- }
- else
- {
- err = re_node_set_init_1 (&union_set, next_node);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
- re_node_set_free (&union_set);
- if (BE (mctx->state_log[to_idx] == NULL
- && err != REG_NOERROR, 0))
- return err;
- }
- }
- return REG_NOERROR;
-}
-
-/* Build transition table for the state.
- Return the new table if succeeded, otherwise return NULL. */
-
-static re_dfastate_t **
-build_trtable (preg, state, fl_search)
- const regex_t *preg;
- const re_dfastate_t *state;
- int fl_search;
-{
- reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int i, j, k, ch;
- int dests_node_malloced = 0, dest_states_malloced = 0;
- int ndests; /* Number of the destination states from `state'. */
- re_dfastate_t **trtable;
- re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
- re_node_set follows, *dests_node;
- bitset *dests_ch;
- bitset acceptable;
-
- /* We build DFA states which corresponds to the destination nodes
- from `state'. `dests_node[i]' represents the nodes which i-th
- destination state contains, and `dests_ch[i]' represents the
- characters which i-th destination state accepts. */
-#ifdef _LIBC
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
- dests_node = (re_node_set *)
- alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
- else
-#endif
- {
- dests_node = (re_node_set *)
- malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
- if (BE (dests_node == NULL, 0))
- return NULL;
- dests_node_malloced = 1;
- }
- dests_ch = (bitset *) (dests_node + SBC_MAX);
-
- /* Initialize transiton table. */
- trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
- if (BE (trtable == NULL, 0))
- {
- if (dests_node_malloced)
- free (dests_node);
- return NULL;
- }
-
- /* At first, group all nodes belonging to `state' into several
- destinations. */
- ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
- if (BE (ndests <= 0, 0))
- {
- if (dests_node_malloced)
- free (dests_node);
- /* Return NULL in case of an error, trtable otherwise. */
- if (ndests == 0)
- return trtable;
- free (trtable);
- return NULL;
- }
-
- err = re_node_set_alloc (&follows, ndests + 1);
- if (BE (err != REG_NOERROR, 0))
- goto out_free;
-
-#ifdef _LIBC
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
- + ndests * 3 * sizeof (re_dfastate_t *)))
- dest_states = (re_dfastate_t **)
- alloca (ndests * 3 * sizeof (re_dfastate_t *));
- else
-#endif
- {
- dest_states = (re_dfastate_t **)
- malloc (ndests * 3 * sizeof (re_dfastate_t *));
- if (BE (dest_states == NULL, 0))
- {
-out_free:
- if (dest_states_malloced)
- free (dest_states);
- re_node_set_free (&follows);
- for (i = 0; i < ndests; ++i)
- re_node_set_free (dests_node + i);
- free (trtable);
- if (dests_node_malloced)
- free (dests_node);
- return NULL;
- }
- dest_states_malloced = 1;
- }
- dest_states_word = dest_states + ndests;
- dest_states_nl = dest_states_word + ndests;
- bitset_empty (acceptable);
-
- /* Then build the states for all destinations. */
- for (i = 0; i < ndests; ++i)
- {
- int next_node;
- re_node_set_empty (&follows);
- /* Merge the follows of this destination states. */
- for (j = 0; j < dests_node[i].nelem; ++j)
- {
- next_node = dfa->nexts[dests_node[i].elems[j]];
- if (next_node != -1)
- {
- err = re_node_set_merge (&follows, dfa->eclosures + next_node);
- if (BE (err != REG_NOERROR, 0))
- goto out_free;
- }
- }
- /* If search flag is set, merge the initial state. */
- if (fl_search)
- {
-#ifdef RE_ENABLE_I18N
- int not_initial = 0;
- for (j = 0; j < follows.nelem; ++j)
- if (dfa->nodes[follows.elems[j]].type == CHARACTER)
- {
- not_initial = dfa->nodes[follows.elems[j]].mb_partial;
- break;
- }
- if (!not_initial)
-#endif
- {
- err = re_node_set_merge (&follows,
- dfa->init_state->entrance_nodes);
- if (BE (err != REG_NOERROR, 0))
- goto out_free;
- }
- }
- dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
- if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
- goto out_free;
- /* If the new state has context constraint,
- build appropriate states for these contexts. */
- if (dest_states[i]->has_constraint)
- {
- dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
- CONTEXT_WORD);
- if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
- goto out_free;
- dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
- CONTEXT_NEWLINE);
- if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
- goto out_free;
- }
- else
- {
- dest_states_word[i] = dest_states[i];
- dest_states_nl[i] = dest_states[i];
- }
- bitset_merge (acceptable, dests_ch[i]);
- }
-
- /* Update the transition table. */
- /* For all characters ch...: */
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if ((acceptable[i] >> j) & 1)
- {
- /* The current state accepts the character ch. */
- if (IS_WORD_CHAR (ch))
- {
- for (k = 0; k < ndests; ++k)
- if ((dests_ch[k][i] >> j) & 1)
- {
- /* k-th destination accepts the word character ch. */
- trtable[ch] = dest_states_word[k];
- /* There must be only one destination which accepts
- character ch. See group_nodes_into_DFAstates. */
- break;
- }
- }
- else /* not WORD_CHAR */
- {
- for (k = 0; k < ndests; ++k)
- if ((dests_ch[k][i] >> j) & 1)
- {
- /* k-th destination accepts the non-word character ch. */
- trtable[ch] = dest_states[k];
- /* There must be only one destination which accepts
- character ch. See group_nodes_into_DFAstates. */
- break;
- }
- }
- }
- /* new line */
- if (bitset_contain (acceptable, NEWLINE_CHAR))
- {
- /* The current state accepts newline character. */
- for (k = 0; k < ndests; ++k)
- if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
- {
- /* k-th destination accepts newline character. */
- trtable[NEWLINE_CHAR] = dest_states_nl[k];
- /* There must be only one destination which accepts
- newline. See group_nodes_into_DFAstates. */
- break;
- }
- }
-
- if (dest_states_malloced)
- free (dest_states);
-
- re_node_set_free (&follows);
- for (i = 0; i < ndests; ++i)
- re_node_set_free (dests_node + i);
-
- if (dests_node_malloced)
- free (dests_node);
-
- return trtable;
-}
-
-/* Group all nodes belonging to STATE into several destinations.
- Then for all destinations, set the nodes belonging to the destination
- to DESTS_NODE[i] and set the characters accepted by the destination
- to DEST_CH[i]. This function return the number of destinations. */
-
-static int
-group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
- const regex_t *preg;
- const re_dfastate_t *state;
- re_node_set *dests_node;
- bitset *dests_ch;
-{
- reg_errcode_t err;
- const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int i, j, k;
- int ndests; /* Number of the destinations from `state'. */
- bitset accepts; /* Characters a node can accept. */
- const re_node_set *cur_nodes = &state->nodes;
- bitset_empty (accepts);
- ndests = 0;
-
- /* For all the nodes belonging to `state', */
- for (i = 0; i < cur_nodes->nelem; ++i)
- {
- re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
- re_token_type_t type = node->type;
- unsigned int constraint = node->constraint;
-
- /* Enumerate all single byte character this node can accept. */
- if (type == CHARACTER)
- bitset_set (accepts, node->opr.c);
- else if (type == SIMPLE_BRACKET)
- {
- bitset_merge (accepts, node->opr.sbcset);
- }
- else if (type == OP_PERIOD)
- {
- bitset_set_all (accepts);
- if (!(preg->syntax & RE_DOT_NEWLINE))
- bitset_clear (accepts, '\n');
- if (preg->syntax & RE_DOT_NOT_NULL)
- bitset_clear (accepts, '\0');
- }
- else
- continue;
-
- /* Check the `accepts' and sift the characters which are not
- match it the context. */
- if (constraint)
- {
- if (constraint & NEXT_WORD_CONSTRAINT)
- for (j = 0; j < BITSET_UINTS; ++j)
- accepts[j] &= dfa->word_char[j];
- if (constraint & NEXT_NOTWORD_CONSTRAINT)
- for (j = 0; j < BITSET_UINTS; ++j)
- accepts[j] &= ~dfa->word_char[j];
- if (constraint & NEXT_NEWLINE_CONSTRAINT)
- {
- int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
- bitset_empty (accepts);
- if (accepts_newline)
- bitset_set (accepts, NEWLINE_CHAR);
- else
- continue;
- }
- }
-
- /* Then divide `accepts' into DFA states, or create a new
- state. */
- for (j = 0; j < ndests; ++j)
- {
- bitset intersec; /* Intersection sets, see below. */
- bitset remains;
- /* Flags, see below. */
- int has_intersec, not_subset, not_consumed;
-
- /* Optimization, skip if this state doesn't accept the character. */
- if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
- continue;
-
- /* Enumerate the intersection set of this state and `accepts'. */
- has_intersec = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
- has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
- /* And skip if the intersection set is empty. */
- if (!has_intersec)
- continue;
-
- /* Then check if this state is a subset of `accepts'. */
- not_subset = not_consumed = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
- {
- not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
- not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
- }
-
- /* If this state isn't a subset of `accepts', create a
- new group state, which has the `remains'. */
- if (not_subset)
- {
- bitset_copy (dests_ch[ndests], remains);
- bitset_copy (dests_ch[j], intersec);
- err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
- if (BE (err != REG_NOERROR, 0))
- goto error_return;
- ++ndests;
- }
-
- /* Put the position in the current group. */
- err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
- if (BE (err < 0, 0))
- goto error_return;
-
- /* If all characters are consumed, go to next node. */
- if (!not_consumed)
- break;
- }
- /* Some characters remain, create a new group. */
- if (j == ndests)
- {
- bitset_copy (dests_ch[ndests], accepts);
- err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
- if (BE (err != REG_NOERROR, 0))
- goto error_return;
- ++ndests;
- bitset_empty (accepts);
- }
- }
- return ndests;
- error_return:
- for (j = 0; j < ndests; ++j)
- re_node_set_free (dests_node + j);
- return -1;
-}
-
-#ifdef RE_ENABLE_I18N
-/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
- Return the number of the bytes the node accepts.
- STR_IDX is the current index of the input string.
-
- This function handles the nodes which can accept one character, or
- one collating element like '.', '[a-z]', opposite to the other nodes
- can only accept one byte. */
-
-static int
-check_node_accept_bytes (preg, node_idx, input, str_idx)
- const regex_t *preg;
- int node_idx, str_idx;
- const re_string_t *input;
-{
- const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- const re_token_t *node = dfa->nodes + node_idx;
- int elem_len = re_string_elem_size_at (input, str_idx);
- int char_len = re_string_char_size_at (input, str_idx);
- int i;
-# ifdef _LIBC
- int j;
- uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
-# endif /* _LIBC */
- if (elem_len <= 1 && char_len <= 1)
- return 0;
- if (node->type == OP_PERIOD)
- {
- /* '.' accepts any one character except the following two cases. */
- if ((!(preg->syntax & RE_DOT_NEWLINE) &&
- re_string_byte_at (input, str_idx) == '\n') ||
- ((preg->syntax & RE_DOT_NOT_NULL) &&
- re_string_byte_at (input, str_idx) == '\0'))
- return 0;
- return char_len;
- }
- else if (node->type == COMPLEX_BRACKET)
- {
- const re_charset_t *cset = node->opr.mbcset;
-# ifdef _LIBC
- const unsigned char *pin = ((char *) re_string_get_buffer (input)
- + str_idx);
-# endif /* _LIBC */
- int match_len = 0;
- wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
- ? re_string_wchar_at (input, str_idx) : 0);
-
- /* match with multibyte character? */
- for (i = 0; i < cset->nmbchars; ++i)
- if (wc == cset->mbchars[i])
- {
- match_len = char_len;
- goto check_node_accept_bytes_match;
- }
- /* match with character_class? */
- for (i = 0; i < cset->nchar_classes; ++i)
- {
- wctype_t wt = cset->char_classes[i];
- if (__iswctype (wc, wt))
- {
- match_len = char_len;
- goto check_node_accept_bytes_match;
- }
- }
-
-# ifdef _LIBC
- if (nrules != 0)
- {
- unsigned int in_collseq = 0;
- const int32_t *table, *indirect;
- const unsigned char *weights, *extra;
- const char *collseqwc;
- int32_t idx;
- /* This #include defines a local function! */
-# include <locale/weight.h>
-
- /* match with collating_symbol? */
- if (cset->ncoll_syms)
- extra = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
- for (i = 0; i < cset->ncoll_syms; ++i)
- {
- const unsigned char *coll_sym = extra + cset->coll_syms[i];
- /* Compare the length of input collating element and
- the length of current collating element. */
- if (*coll_sym != elem_len)
- continue;
- /* Compare each bytes. */
- for (j = 0; j < *coll_sym; j++)
- if (pin[j] != coll_sym[1 + j])
- break;
- if (j == *coll_sym)
- {
- /* Match if every bytes is equal. */
- match_len = j;
- goto check_node_accept_bytes_match;
- }
- }
-
- if (cset->nranges)
- {
- if (elem_len <= char_len)
- {
- collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
- in_collseq = collseq_table_lookup (collseqwc, wc);
- }
- else
- in_collseq = find_collation_sequence_value (pin, elem_len);
- }
- /* match with range expression? */
- for (i = 0; i < cset->nranges; ++i)
- if (cset->range_starts[i] <= in_collseq
- && in_collseq <= cset->range_ends[i])
- {
- match_len = elem_len;
- goto check_node_accept_bytes_match;
- }
-
- /* match with equivalence_class? */
- if (cset->nequiv_classes)
- {
- const unsigned char *cp = pin;
- table = (const int32_t *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- weights = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
- extra = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
- indirect = (const int32_t *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- idx = findidx (&cp);
- if (idx > 0)
- for (i = 0; i < cset->nequiv_classes; ++i)
- {
- int32_t equiv_class_idx = cset->equiv_classes[i];
- size_t weight_len = weights[idx];
- if (weight_len == weights[equiv_class_idx])
- {
- int cnt = 0;
- while (cnt <= weight_len
- && (weights[equiv_class_idx + 1 + cnt]
- == weights[idx + 1 + cnt]))
- ++cnt;
- if (cnt > weight_len)
- {
- match_len = elem_len;
- goto check_node_accept_bytes_match;
- }
- }
- }
- }
- }
- else
-# endif /* _LIBC */
- {
- /* match with range expression? */
-#if __GNUC__ >= 2
- wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
-#else
- wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
- cmp_buf[2] = wc;
-#endif
- for (i = 0; i < cset->nranges; ++i)
- {
- cmp_buf[0] = cset->range_starts[i];
- cmp_buf[4] = cset->range_ends[i];
- if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
- && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
- {
- match_len = char_len;
- goto check_node_accept_bytes_match;
- }
- }
- }
- check_node_accept_bytes_match:
- if (!cset->non_match)
- return match_len;
- else
- {
- if (match_len > 0)
- return 0;
- else
- return (elem_len > char_len) ? elem_len : char_len;
- }
- }
- return 0;
-}
-
-# ifdef _LIBC
-static unsigned int
-find_collation_sequence_value (mbs, mbs_len)
- const unsigned char *mbs;
- size_t mbs_len;
-{
- uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
- if (nrules == 0)
- {
- if (mbs_len == 1)
- {
- /* No valid character. Match it as a single byte character. */
- const unsigned char *collseq = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
- return collseq[mbs[0]];
- }
- return UINT_MAX;
- }
- else
- {
- int32_t idx;
- const unsigned char *extra = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
-
- for (idx = 0; ;)
- {
- int mbs_cnt, found = 0;
- int32_t elem_mbs_len;
- /* Skip the name of collating element name. */
- idx = idx + extra[idx] + 1;
- elem_mbs_len = extra[idx++];
- if (mbs_len == elem_mbs_len)
- {
- for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
- if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
- break;
- if (mbs_cnt == elem_mbs_len)
- /* Found the entry. */
- found = 1;
- }
- /* Skip the byte sequence of the collating element. */
- idx += elem_mbs_len;
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the collation sequence value. */
- idx += sizeof (uint32_t);
- /* Skip the wide char sequence of the collating element. */
- idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
- /* If we found the entry, return the sequence value. */
- if (found)
- return *(uint32_t *) (extra + idx);
- /* Skip the collation sequence value. */
- idx += sizeof (uint32_t);
- }
- }
-}
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
-
-/* Check whether the node accepts the byte which is IDX-th
- byte of the INPUT. */
-
-static int
-check_node_accept (preg, node, mctx, idx)
- const regex_t *preg;
- const re_token_t *node;
- const re_match_context_t *mctx;
- int idx;
-{
- unsigned char ch;
- if (node->constraint)
- {
- /* The node has constraints. Check whether the current context
- satisfies the constraints. */
- unsigned int context = re_string_context_at (mctx->input, idx,
- mctx->eflags,
- preg->newline_anchor);
- if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
- return 0;
- }
- ch = re_string_byte_at (mctx->input, idx);
- if (node->type == CHARACTER)
- return node->opr.c == ch;
- else if (node->type == SIMPLE_BRACKET)
- return bitset_contain (node->opr.sbcset, ch);
- else if (node->type == OP_PERIOD)
- return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
- || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
- else
- return 0;
-}
-
-/* Extend the buffers, if the buffers have run out. */
-
-static reg_errcode_t
-extend_buffers (mctx)
- re_match_context_t *mctx;
-{
- reg_errcode_t ret;
- re_string_t *pstr = mctx->input;
-
- /* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
-
- if (mctx->state_log != NULL)
- {
- /* And double the length of state_log. */
- re_dfastate_t **new_array;
- new_array = re_realloc (mctx->state_log, re_dfastate_t *,
- pstr->bufs_len * 2);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- mctx->state_log = new_array;
- }
-
- /* Then reconstruct the buffers. */
- if (pstr->icase)
- {
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- build_wcs_upper_buffer (pstr);
- else
-#endif /* RE_ENABLE_I18N */
- build_upper_buffer (pstr);
- }
- else
- {
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1)
- build_wcs_buffer (pstr);
- else
-#endif /* RE_ENABLE_I18N */
- {
- if (pstr->trans != NULL)
- re_string_translate_buffer (pstr);
- else
- pstr->valid_len = pstr->bufs_len;
- }
- }
- return REG_NOERROR;
-}
-
-\f
-/* Functions for matching context. */
-
-/* Initialize MCTX. */
-
-static reg_errcode_t
-match_ctx_init (mctx, eflags, input, n)
- re_match_context_t *mctx;
- int eflags, n;
- re_string_t *input;
-{
- mctx->eflags = eflags;
- mctx->input = input;
- mctx->match_last = -1;
- if (n > 0)
- {
- mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
- mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
- if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
- return REG_ESPACE;
- }
- else
- mctx->bkref_ents = NULL;
- mctx->nbkref_ents = 0;
- mctx->abkref_ents = n;
- mctx->max_mb_elem_len = 1;
- mctx->nsub_tops = 0;
- mctx->asub_tops = n;
- return REG_NOERROR;
-}
-
-/* Clean the entries which depend on the current input in MCTX.
- This function must be invoked when the matcher changes the start index
- of the input, or changes the input string. */
-
-static void
-match_ctx_clean (mctx)
- re_match_context_t *mctx;
-{
- match_ctx_free_subtops (mctx);
- mctx->nsub_tops = 0;
- mctx->nbkref_ents = 0;
-}
-
-/* Free all the memory associated with MCTX. */
-
-static void
-match_ctx_free (mctx)
- re_match_context_t *mctx;
-{
- match_ctx_free_subtops (mctx);
- re_free (mctx->sub_tops);
- re_free (mctx->bkref_ents);
-}
-
-/* Free all the memory associated with MCTX->SUB_TOPS. */
-
-static void
-match_ctx_free_subtops (mctx)
- re_match_context_t *mctx;
-{
- int st_idx;
- for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
- {
- int sl_idx;
- re_sub_match_top_t *top = mctx->sub_tops[st_idx];
- for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
- {
- re_sub_match_last_t *last = top->lasts[sl_idx];
- re_free (last->path.array);
- re_free (last);
- }
- re_free (top->lasts);
- if (top->path)
- {
- re_free (top->path->array);
- re_free (top->path);
- }
- free (top);
- }
-}
-
-/* Add a new backreference entry to MCTX.
- Note that we assume that caller never call this function with duplicate
- entry, and call with STR_IDX which isn't smaller than any existing entry.
-*/
-
-static reg_errcode_t
-match_ctx_add_entry (mctx, node, str_idx, from, to)
- re_match_context_t *mctx;
- int node, str_idx, from, to;
-{
- if (mctx->nbkref_ents >= mctx->abkref_ents)
- {
- struct re_backref_cache_entry* new_entry;
- new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
- mctx->abkref_ents * 2);
- if (BE (new_entry == NULL, 0))
- {
- re_free (mctx->bkref_ents);
- return REG_ESPACE;
- }
- mctx->bkref_ents = new_entry;
- memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
- sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
- mctx->abkref_ents *= 2;
- }
- mctx->bkref_ents[mctx->nbkref_ents].node = node;
- mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
- mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
- mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
- mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
- if (mctx->max_mb_elem_len < to - from)
- mctx->max_mb_elem_len = to - from;
- return REG_NOERROR;
-}
-
-/* Search for the first entry which has the same str_idx.
- Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
-
-static int
-search_cur_bkref_entry (mctx, str_idx)
- re_match_context_t *mctx;
- int str_idx;
-{
- int left, right, mid;
- right = mctx->nbkref_ents;
- for (left = 0; left < right;)
- {
- mid = (left + right) / 2;
- if (mctx->bkref_ents[mid].str_idx < str_idx)
- left = mid + 1;
- else
- right = mid;
- }
- return left;
-}
-
-static void
-match_ctx_clear_flag (mctx)
- re_match_context_t *mctx;
-{
- int i;
- for (i = 0; i < mctx->nbkref_ents; ++i)
- {
- mctx->bkref_ents[i].flag = 0;
- }
-}
-
-/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
- at STR_IDX. */
-
-static reg_errcode_t
-match_ctx_add_subtop (mctx, node, str_idx)
- re_match_context_t *mctx;
- int node, str_idx;
-{
-#ifdef DEBUG
- assert (mctx->sub_tops != NULL);
- assert (mctx->asub_tops > 0);
-#endif
- if (mctx->nsub_tops == mctx->asub_tops)
- {
- re_sub_match_top_t **new_array;
- mctx->asub_tops *= 2;
- new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
- mctx->asub_tops);
- if (BE (new_array == NULL, 0))
- return REG_ESPACE;
- mctx->sub_tops = new_array;
- }
- mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
- if (mctx->sub_tops[mctx->nsub_tops] == NULL)
- return REG_ESPACE;
- mctx->sub_tops[mctx->nsub_tops]->node = node;
- mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
- return REG_NOERROR;
-}
-
-/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
- at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
-
-static re_sub_match_last_t *
-match_ctx_add_sublast (subtop, node, str_idx)
- re_sub_match_top_t *subtop;
- int node, str_idx;
-{
- re_sub_match_last_t *new_entry;
- if (subtop->nlasts == subtop->alasts)
- {
- re_sub_match_last_t **new_array;
- subtop->alasts = 2 * subtop->alasts + 1;
- new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
- subtop->alasts);
- if (BE (new_array == NULL, 0))
- return NULL;
- subtop->lasts = new_array;
- }
- new_entry = calloc (1, sizeof (re_sub_match_last_t));
- if (BE (new_entry == NULL, 0))
- return NULL;
- subtop->lasts[subtop->nlasts] = new_entry;
- new_entry->node = node;
- new_entry->str_idx = str_idx;
- ++subtop->nlasts;
- return new_entry;
-}
-
-static void
-sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
- check_subexp)
- re_sift_context_t *sctx;
- re_dfastate_t **sifted_sts, **limited_sts;
- int last_node, last_str_idx, check_subexp;
-{
- sctx->sifted_states = sifted_sts;
- sctx->limited_states = limited_sts;
- sctx->last_node = last_node;
- sctx->last_str_idx = last_str_idx;
- sctx->check_subexp = check_subexp;
- sctx->cur_bkref = -1;
- sctx->cls_subexp_idx = -1;
- re_node_set_init_empty (&sctx->limits);
-}
+++ /dev/null
-/*
- * UCW Library -- Running of Commands
- *
- * (c) 2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <alloca.h>
-#include <unistd.h>
-#include <sys/wait.h>
-
-void NONRET
-exec_command_v(const char *cmd, va_list args)
-{
- va_list cargs;
- va_copy(cargs, args);
- int cnt = 2;
- char *arg;
- while (arg = va_arg(cargs, char *))
- cnt++;
- va_end(cargs);
- char **argv = alloca(sizeof(char *) * cnt);
- argv[0] = (char *)cmd;
- cnt = 1;
- va_copy(cargs, args);
- while (arg = va_arg(cargs, char *))
- argv[cnt++] = arg;
- va_end(cargs);
- argv[cnt] = NULL;
- execv(cmd, argv);
- char echo[256];
- echo_command_v(echo, sizeof(echo), cmd, args);
- msg(L_ERROR, "Cannot execute %s: %m", echo);
- exit(255);
-}
-
-int
-run_command_v(const char *cmd, va_list args)
-{
- pid_t p = fork();
- if (p < 0)
- {
- msg(L_ERROR, "fork() failed: %m");
- return 0;
- }
- else if (!p)
- exec_command_v(cmd, args);
- else
- {
- int stat;
- char status_msg[EXIT_STATUS_MSG_SIZE];
- p = waitpid(p, &stat, 0);
- if (p < 0)
- die("waitpid() failed: %m");
- if (format_exit_status(status_msg, stat))
- {
- char echo[256];
- echo_command_v(echo, sizeof(echo), cmd, args);
- msg(L_ERROR, "`%s' failed: %s", echo, status_msg);
- return 0;
- }
- return 1;
- }
-}
-
-void
-echo_command_v(char *buf, int size, const char *cmd, va_list args)
-{
- char *limit = buf + size - 4;
- char *p = buf;
- const char *arg = cmd;
- do
- {
- int l = strlen(arg);
- if (p != buf && p < limit)
- *p++ = ' ';
- if (p+l > limit)
- {
- memcpy(p, arg, limit-p);
- strcpy(limit, "...");
- return;
- }
- memcpy(p, arg, l);
- p += l;
- }
- while (arg = va_arg(args, char *));
- *p = 0;
-}
-
-int
-run_command(const char *cmd, ...)
-{
- va_list args;
- va_start(args, cmd);
- int e = run_command_v(cmd, args);
- va_end(args);
- return e;
-}
-
-void NONRET
-exec_command(const char *cmd, ...)
-{
- va_list args;
- va_start(args, cmd);
- exec_command_v(cmd, args);
-}
-
-void
-echo_command(char *buf, int len, const char *cmd, ...)
-{
- va_list args;
- va_start(args, cmd);
- echo_command_v(buf, len, cmd, args);
- va_end(args);
-}
-
-#ifdef TEST
-
-int main(void)
-{
- char msg[1024];
- echo_command(msg, sizeof(msg), "/bin/echo", "datel", "strakapoud", NULL);
- log(L_INFO, "Running <%s>", msg);
- run_command("/bin/echo", "datel", "strakapoud", NULL);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * The UCW Library -- POSIX semaphores wrapper
- *
- * (c) 2006 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_SEMAPHORE_H
-#define _UCW_SEMAPHORE_H
-
-#include <semaphore.h>
-
-#ifdef CONFIG_DARWIN
-
-#include <unistd.h>
-#include <stdio.h>
-
-/* In Darwin, sem_init() is unfortunately not implemented and the guide
- * recommends emulating it using sem_open(). */
-
-static inline sem_t *
-sem_alloc(void)
-{
- static uns cnt = 0;
- char buf[20];
- sprintf(buf, "tmp/sem-%d-%d", getpid(), cnt++);
- sem_t *sem = sem_open(buf, O_CREAT, 0777, 0);
- ASSERT(sem != (sem_t*) SEM_FAILED);
- return sem;
-}
-
-static inline void
-sem_free(sem_t *sem)
-{
- sem_close(sem);
-}
-
-#else
-
-static inline sem_t *
-sem_alloc(void)
-{
- sem_t *sem = xmalloc(sizeof(sem_t));
- int res = sem_init(sem, 0, 0);
- ASSERT(!res);
- return sem;
-}
-
-static inline void
-sem_free(sem_t *sem)
-{
- sem_destroy(sem);
- xfree(sem);
-}
-
-#endif
-
-#endif
+++ /dev/null
-# Support routines for shell scripts
-
-DIRS+=lib/shell
-PROGS+=$(o)/lib/shell/config $(o)/lib/shell/logger
-DATAFILES+=$(o)/lib/shell/libucw.sh
-
-$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBUCW)
-$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBUCW)
-
-TESTS+=$(addprefix $(o)/lib/shell/,config.test)
-
-$(o)/lib/shell/config.test: $(o)/lib/shell/config
+++ /dev/null
-/*
- * UCW Library -- Shell Interface to Configuration Files
- *
- * (c) 2002--2005 Martin Mares <mj@ucw.cz>
- * (c) 2006 Robert Spalek <robert@ucw.cz>
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- *
- * Once we were using this beautiful Shell version, but it turned out
- * that it doesn't work with nested config files:
- *
- * eval `sed <cf/sherlock '/^#/d;/^ *$/d;s/ \+$//;
- * h;s@[^ ]*@@;x;s@[ ].*@@;y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;G;s/\n//;
- * /^\[SECTION\]/,/^\[/ {; /^[A-Z]/ { s/^\([^ ]\+\)[ ]*\(.*\)$/SH_\1="\2"/; p; }; };
- * d;'`
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/getopt.h"
-#include "lib/conf-internal.h"
-#include "lib/clists.h"
-#include "lib/mempool.h"
-#include "lib/chartype.h"
-#include "lib/bbuf.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <alloca.h>
-
-static void
-help(void)
-{
- fputs("\n\
-Usage: config [-C<configfile>] [-S<section>.<option>=<value>] <sections>\n\
-\n\
-<sections>\t<section>[;<sections>]\n\
-<section>\t[!]<name>{[<items>]}\n\
-<items>\t\t[-]<item>[;<items>]\n\
-<item>\t\t<static> | <array> | <list>\n\
-<static>\t<type><name>[=<value>]\n\
-<list>\t\t@<name>{[<items>]}\n\
-<array>\t\t<type><name><left-bracket>[<number>]<right-bracket>\n\
-<value>\t\t[a-zA-Z0-9.-/]* | 'string without single quotes'<value> | \"c-like string\"<value>\n\
-\n\
-Types:\n\
-<empty>\t\tString\n\
-#\t\t32-bit integer\n\
-##\t\t64-bit integer\n\
-$\t\tFloating point number\n\
-\n\
-Modifiers:\n\
-!\t\tReport unknown items as errors\n\
--\t\tDo not dump item's value\n\
-", stderr);
- exit(1);
-}
-
-union value {
- void *v_ptr;
- int v_int;
- u64 v_u64;
- double v_double;
- clist list;
-};
-
-#define FLAG_HIDE 0x1
-#define FLAG_NO_UNKNOWN 0x2
-
-struct item {
- cnode node;
- uns flags;
- struct cf_item cf;
- union value value;
- uns index;
-};
-
-struct section {
- struct item item;
- clist list;
- uns count;
- uns size;
-};
-
-static struct mempool *pool;
-static clist sections;
-static byte *pos;
-
-static void
-parse_white(void)
-{
- while (Cspace(*pos))
- pos++;
-}
-
-static void
-parse_char(byte c)
-{
- if (*pos++ != c)
- die("Missing '%c'", c);
-}
-
-static byte *
-parse_name(void)
-{
- byte *name = pos;
- while (Cword(*pos))
- pos++;
- uns len = pos - name;
- if (!len)
- die("Expected item/section name");
- byte *buf = mp_alloc(pool, len + 1);
- memcpy(buf, name, len);
- buf[len] = 0;
- return buf;
-}
-
-static void
-parse_section(struct section *section)
-{
-#define TRY(x) do{byte *_err=(x); if (_err) die(_err); }while(0)
- for (uns sep = 0; ; sep = 1)
- {
- parse_white();
- if (!*pos || *pos == '}')
- break;
- if (sep)
- parse_char(';');
- parse_white();
-
- struct item *item;
-
- if (*pos == '@')
- {
- pos++;
- struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
- sec->size = sizeof(cnode);
- clist_init(&sec->list);
- item = &sec->item;
- item->cf.name = parse_name();
- item->cf.cls = CC_LIST;
- item->cf.number = 1;
- parse_white();
- parse_char('{');
- parse_section(sec);
- parse_char('}');
- }
- else
- {
- item = mp_alloc_zero(pool, sizeof(*item));
- if (*pos == '-')
- {
- item->flags |= FLAG_HIDE;
- pos++;
- }
- item->cf.cls = CC_STATIC;
- item->cf.number = 1;
- switch (*pos)
- {
- case '#':
- if (*++pos == '#')
- {
- pos++;
- item->cf.type = CT_U64;
- }
- else
- item->cf.type = CT_INT;
- break;
- case '$':
- pos++;
- item->cf.type = CT_DOUBLE;
- break;
- default:
- if (!Cword(*pos))
- die("Invalid type syntax");
- item->cf.type = CT_STRING;
- break;
- }
- parse_white();
- item->cf.name = parse_name();
- parse_white();
- if (*pos == '[')
- {
- pos++;
- parse_white();
- item->cf.cls = CC_DYNAMIC;
- byte *num = pos;
- while (*pos && *pos != ']')
- pos++;
- if (!*pos)
- die("Missing ']'");
- *pos++ = 0;
- if (!*num)
- item->cf.number = CF_ANY_NUM;
- else
- {
- int inum;
- TRY(cf_parse_int(num, &inum));
- if (!inum)
- die("Invalid array length");
- item->cf.number = inum;
- }
- parse_white();
- }
- if (*pos == '=')
- {
- pos++;
- parse_white();
- if (section->item.cf.cls == CC_LIST)
- die("List items can not have default values");
- if (item->cf.cls == CC_DYNAMIC)
- die("Arrays can not have default values");
- byte *def = pos, *d = def;
- while (*pos != ';' && *pos != '}' && !Cspace(*pos))
- {
- if (*pos == '\'')
- {
- pos++;
- while (*pos != '\'')
- {
- if (!*pos)
- die("Unterminated string");
- *d++ = *pos++;
- }
- pos++;
- }
- else if (*pos == '"')
- {
- pos++;
- byte *start = d;
- uns esc = 0;
- while (*pos != '"' || esc)
- {
- if (!*pos)
- die("Unterminated string");
- if (*pos == '\\')
- esc ^= 1;
- else
- esc = 0;
- *d++ = *pos++;
- }
- pos++;
- *d = 0;
- d = str_unesc(start, start);
- }
- else
- *d++ = *pos++;
- }
- uns len = d - def;
- byte *buf = mp_alloc(pool, len + 1);
- memcpy(buf, def, len);
- buf[len] = 0;
- switch (item->cf.type)
- {
- case CT_STRING:
- item->value.v_ptr = buf;
- break;
- case CT_INT:
- TRY(cf_parse_int(buf, &item->value.v_int));
- break;
- case CT_U64:
- TRY(cf_parse_u64(buf, &item->value.v_u64));
- break;
- case CT_DOUBLE:
- TRY(cf_parse_double(buf, &item->value.v_double));
- break;
- default:
- ASSERT(0);
- }
- }
- }
- if (section->item.cf.cls == CC_LIST)
- {
- item->cf.ptr = (void *)(uintptr_t)section->size;
- section->size += sizeof(union value);
- }
- else
- item->cf.ptr = &item->value;
- clist_add_tail(§ion->list, &item->node);
- section->count++;
- }
-#undef TRY
-}
-
-static void
-parse_outer(void)
-{
- for (uns sep = 0; ; sep = 1)
- {
- parse_white();
- if (!*pos)
- break;
- if (sep)
- parse_char(';');
- parse_white();
- struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
- if (*pos == '!')
- {
- pos++;
- sec->item.flags |= FLAG_NO_UNKNOWN;
- }
- sec->item.cf.name = parse_name();
- parse_white();
- parse_char('{');
- clist_add_tail(§ions, &sec->item.node);
- clist_init(&sec->list);
- parse_section(sec);
- parse_char('}');
- }
-}
-
-static struct cf_section *
-generate_section(struct section *section)
-{
- struct cf_section *sec = mp_alloc_zero(pool, sizeof(*sec));
- if (section->item.cf.cls == CC_LIST)
- sec->size = section->size;
- struct cf_item *c = sec->cfg = mp_alloc_zero(pool, sizeof(struct cf_item) * (section->count + 1));
- CLIST_FOR_EACH(struct item *, item, section->list)
- {
- *c = item->cf;
- if (c->cls == CC_LIST)
- c->u.sec = generate_section((struct section *)item);
- c++;
- }
- c->cls = CC_END;
- return sec;
-}
-
-static bb_t path;
-
-static void
-dump_value(uns array, struct item *item, void *v)
-{
- byte buf[128], *value = buf;
- if (!array)
- printf("CF_%s_%s='", path.ptr, item->cf.name);
- else
- printf("CF_%s_%s[%u]='", path.ptr, item->cf.name, ++item->index);
- switch (item->cf.type)
- {
- case CT_INT:
- sprintf(buf, "%d", *(int *)v);
- break;
- case CT_U64:
- sprintf(buf, "%llu", (long long) *(u64 *)v);
- break;
- case CT_DOUBLE:
- sprintf(buf, "%g", *(double *)v);
- break;
- case CT_STRING:
- if (*(byte **)v)
- value = *(byte **)v;
- else
- *value = 0;
- break;
- default:
- ASSERT(0);
- }
- while (*value) {
- if (*value == '\'')
- printf("'\\''");
- else
- putchar(*value);
- value++;
- }
- printf("'\n");
-}
-
-static void
-dump_item(struct item *item, void *ptr, uns path_len)
-{
- if (item->flags & FLAG_HIDE)
- return;
- byte *val = (byte *)((uintptr_t)ptr + (uintptr_t)item->cf.ptr);
- if (item->cf.cls == CC_LIST)
- {
- uns len = strlen(item->cf.name);
- bb_grow(&path, path_len + len + 1);
- path.ptr[path_len] = '_';
- memcpy(path.ptr + path_len + 1, item->cf.name, len);
- CLIST_FOR_EACH(cnode *, ptr2, *(clist *)val)
- CLIST_FOR_EACH(struct item *, item2, ((struct section *)item)->list)
- dump_item(item2, ptr2, path_len + len + 1);
- }
- else
- {
- bb_grow(&path, path_len + 1)[path_len] = 0;
- if (item->cf.cls == CC_STATIC)
- dump_value(!!ptr, item, val);
- else
- {
- val = *(void **)val;
- uns len = DARY_LEN(val);
- uns size = cf_type_size(item->cf.type, NULL);
- for (uns i = 0; i < len; i++, val += size)
- dump_value(1, item, val);
- }
- }
-}
-
-int main(int argc, char **argv)
-{
- log_init("config");
- if (argc < 2)
- help();
- pos = argv[argc - 1];
- argv[argc - 1] = NULL;
-
- pool = mp_new(0x1000);
- clist_init(§ions);
- parse_outer();
- CLIST_FOR_EACH(struct section *, sec, sections)
- cf_declare_section(sec->item.cf.name, generate_section(sec), !(sec->item.flags & FLAG_NO_UNKNOWN));
-
- if (cf_getopt(argc - 1, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
- help();
-
- bb_init(&path);
- CLIST_FOR_EACH(struct section *, section, sections)
- {
- uns len = strlen(section->item.cf.name);
- memcpy(bb_grow(&path, len), section->item.cf.name, len);
- CLIST_FOR_EACH(struct item *, item, section->list)
- dump_item(item, NULL, len);
- }
- bb_done(&path);
-
- return 0;
-}
-
+++ /dev/null
-# Tests for configuration parser
-
-Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
-Out: CF_sec1_int1='23'
- CF_sec1_long1='1234567812345678'
- CF_sec1_str2='s2'
- CF_sec1_int2='123'
- CF_sec1_long2='4321'
- CF_sec1_int3='16'
- CF_sec1_int4='0'
- CF_sec1_dbl1='1.1'
- CF_sec1_dbl2='0'
- CF_sec2_str3=''
-
-Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
-Out: CF_sec1_list1_int1[1]='2'
- CF_sec1_list1_str1[1]='a2'
- CF_sec1_list1_int1[2]='3'
- CF_sec1_list1_str1[2]='a3'
-
-Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
-Out: CF_sec1_ar1[1]='a'
- CF_sec1_ar1[2]='b'
- CF_sec1_ar1[3]='c'
- CF_sec1_ar2[1]='1'
- CF_sec1_ar2[2]='2'
- CF_sec1_ar3[1]='1.1'
-
-Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
-Out: CF_sec1_list1_str1[1]='1'
- CF_sec1_list1_list2_str2[1]='a'
- CF_sec1_list1_list2_str2[2]='b'
- CF_sec1_list1_list2_str2[3]='c'
- CF_sec1_list1_str1[2]='2'
- CF_sec1_list1_list2_str2[4]='d'
- CF_sec1_list1_list2_str2[5]='e'
-
-Run: ../obj/lib/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
-Out: CF_sec_str='ab"cd\e'\''fg'
+++ /dev/null
-# The UCW Library -- Shell Functions
-# (c) 2005 Martin Mares <mj@ucw.cz>
-#
-# This software may be freely distributed and used according to the terms
-# of the GNU Lesser General Public License.
-
-UCW_CF=
-while [ "${1:0:2}" = "-C" -o "${1:0:2}" = "-S" ] ; do
- if [ -z "${1:2:1}" ] ; then
- UCW_CF="$UCW_CF $1 $2"
- shift 2
- else
- UCW_CF="$UCW_CF $1"
- shift 1
- fi
-done
-
-function log # msg
-{
- bin/logger $UCW_PROGNAME I "$1"
-}
-
-function errlog # msg
-{
- bin/logger $UCW_PROGNAME E "$1"
-}
-
-function warnlog # msg
-{
- bin/logger $UCW_PROGNAME E "$1"
-}
-
-function die # msg
-{
- bin/logger $UCW_PROGNAME ! "$1"
- exit 1
-}
-
-function parse-config # section vars...
-{
- eval `bin/config$UCW_CF "$@"`
-}
+++ /dev/null
-/*
- * UCW Library Utilities -- A Simple Logger for use in shell scripts
- *
- * (c) 2001 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <string.h>
-
-int
-main(int argc, char **argv)
-{
- byte buf[1024], *c;
-
- log_init("logger");
- if (argc < 3 || argc > 4 || strlen(argv[2]) != 1)
- die("Usage: logger [<logname>:]<progname> <level> [<text>]");
- if (c = strchr(argv[1], ':'))
- {
- *c++ = 0;
- log_init(c);
- log_file(argv[1]);
- }
- else
- log_init(argv[1]);
- if (argc > 3)
- msg(argv[2][0], argv[3]);
- else
- while (fgets(buf, sizeof(buf), stdin))
- {
- c = strchr(buf, '\n');
- if (c)
- *c = 0;
- msg(argv[2][0], buf);
- }
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Catching of signals and calling callback functions
- *
- * (c) 2004, Robert Spalek <robert@ucw.cz>
- * (c) 2006 Martin Mares <mj@ucw.cz>
- */
-
-#include "lib/lib.h"
-#include "lib/threads.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <signal.h>
-
-static int sig_handler_nest[NSIG];
-static struct sigaction sig_handler_old[NSIG];
-
-static void
-signal_handler_internal(int sig)
-{
- struct ucwlib_context *ctx = ucwlib_thread_context();
- if (!ctx->signal_handlers || !ctx->signal_handlers[sig] || ctx->signal_handlers[sig](sig))
- abort();
-}
-
-void
-handle_signal(int signum)
-{
- ucwlib_lock();
- if (!sig_handler_nest[signum]++)
- {
- struct sigaction act;
- bzero(&act, sizeof(act));
- act.sa_handler = signal_handler_internal;
- act.sa_flags = SA_NODEFER;
- if (sigaction(signum, &act, &sig_handler_old[signum]) < 0)
- die("sigaction: %m");
- }
- ucwlib_unlock();
-}
-
-void
-unhandle_signal(int signum)
-{
- ucwlib_lock();
- ASSERT(sig_handler_nest[signum]);
- if (!--sig_handler_nest[signum])
- {
- if (sigaction(signum, &sig_handler_old[signum], NULL) < 0)
- die("sigaction: %m");
- }
- ucwlib_unlock();
-}
-
-sh_sighandler_t
-set_signal_handler(int signum, sh_sighandler_t new)
-{
- struct ucwlib_context *ctx = ucwlib_thread_context();
- if (!ctx->signal_handlers)
- ctx->signal_handlers = xmalloc_zero(NSIG * sizeof(sh_sighandler_t));
- sh_sighandler_t old = ctx->signal_handlers[signum];
- ctx->signal_handlers[signum] = new;
- return old;
-}
+++ /dev/null
-/*
- * UCW Library -- Linked Lists of Simple Items
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/conf.h"
-#include "lib/simple-lists.h"
-
-simp_node *
-simp_append(struct mempool *mp, clist *l)
-{
- simp_node *n = mp_alloc_fast(mp, sizeof(*n));
- clist_add_tail(l, &n->n);
- return n;
-}
-
-simp2_node *
-simp2_append(struct mempool *mp, clist *l)
-{
- simp2_node *n = mp_alloc_fast(mp, sizeof(*n));
- clist_add_tail(l, &n->n);
- return n;
-}
-
-/* Configuration sections for common lists */
-
-struct cf_section cf_string_list_config = {
- CF_TYPE(simp_node),
- CF_ITEMS {
- CF_STRING("String", PTR_TO(simp_node, s)),
- CF_END
- }
-};
-
-struct cf_section cf_2string_list_config = {
- CF_TYPE(simp2_node),
- CF_ITEMS {
- CF_STRING("Src", PTR_TO(simp2_node, s1)),
- CF_STRING("Dest", PTR_TO(simp2_node, s2)),
- CF_END
- }
-};
+++ /dev/null
-/*
- * UCW Library -- Linked Lists of Simple Items
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_SIMPLE_LISTS_H
-#define _UCW_SIMPLE_LISTS_H
-
-#include "lib/clists.h"
-
-typedef struct simp_node {
- cnode n;
- union {
- char *s;
- void *p;
- int i;
- uns u;
- };
-} simp_node;
-
-typedef struct simp2_node {
- cnode n;
- union {
- char *s1;
- void *p1;
- int i1;
- uns u1;
- };
- union {
- char *s2;
- void *p2;
- int i2;
- uns u2;
- };
-} simp2_node;
-
-struct mempool;
-simp_node *simp_append(struct mempool *mp, clist *l);
-simp2_node *simp2_append(struct mempool *mp, clist *l);
-
-/* Configuration sections */
-extern struct cf_section cf_string_list_config;
-extern struct cf_section cf_2string_list_config;
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Single-Linked Lists
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/slists.h"
-
-static inline snode *
-slist_raw_prev(slist *l, snode *n)
-{
- snode *m = &l->head;
- while (m)
- {
- if (n == m->next)
- return m;
- m = m->next;
- }
- ASSERT(0);
-}
-
-void *
-slist_prev(slist *l, snode *n)
-{
- snode *p = slist_raw_prev(l, n);
- return (p == &l->head) ? NULL : p;
-}
-
-void
-slist_insert_before(slist *l, snode *what, snode *before)
-{
- what->next = before;
- slist_raw_prev(l, before)->next = what;
-}
-
-void
-slist_remove(slist *l, snode *n)
-{
- snode *p = slist_raw_prev(l, n);
- slist_remove_after(l, p);
-}
-
-#ifdef TEST
-
-#include <stdio.h>
-#include <alloca.h>
-
-int main(void)
-{
- slist l;
-
- struct x {
- snode n;
- int val;
- };
-
- slist_init(&l);
- for (int i=1; i<=10; i++)
- {
- struct x *x = alloca(sizeof(*x));
- x->val = i;
- if (i % 2)
- slist_add_head(&l, &x->n);
- else
- slist_add_tail(&l, &x->n);
- }
-
- struct x *x, *prev;
- SLIST_WALK_DELSAFE(x, l, prev)
- if (x->val == 5)
- slist_remove_after(&l, &prev->n);
- else if (x->val == 6)
- slist_remove(&l, &x->n);
- SLIST_FOR_EACH(struct x *, x, l)
- printf("%d/", x->val);
- putchar('\n');
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Single-Linked Lists
- *
- * (c) 2005 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_SLISTS_H
-#define _UCW_SLISTS_H
-
-typedef struct snode {
- struct snode *next;
-} snode;
-
-typedef struct slist {
- struct snode head, *last;
-} slist;
-
-static inline void *slist_head(slist *l)
-{
- return l->head.next;
-}
-
-static inline void *slist_tail(slist *l)
-{
- return l->last;
-}
-
-static inline void *slist_next(snode *n)
-{
- return n->next;
-}
-
-static inline int slist_empty(slist *l)
-{
- return !l->head.next;
-}
-
-#define SLIST_WALK(n,list) for(n=(void*)(list).head.next; (n); (n)=(void*)((snode*)(n))->next)
-#define SLIST_WALK_DELSAFE(n,list,prev) for((prev)=(void*)&(list).head; (n)=(void*)((snode*)prev)->next; (prev)=(((snode*)(prev))->next==(snode*)(n) ? (void*)(n) : (void*)(prev)))
-#define SLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; n; n=(void*)((snode*)(n))->next)
-
-static inline void slist_insert_after(slist *l, snode *what, snode *after)
-{
- what->next = after->next;
- after->next = what;
- if (!what->next)
- l->last = what;
-}
-
-static inline void slist_add_head(slist *l, snode *n)
-{
- n->next = l->head.next;
- l->head.next = n;
- if (!l->last)
- l->last = n;
-}
-
-static inline void slist_add_tail(slist *l, snode *n)
-{
- if (l->last)
- l->last->next = n;
- else
- l->head.next = n;
- n->next = NULL;
- l->last = n;
-}
-
-static inline void slist_init(slist *l)
-{
- l->head.next = l->last = NULL;
-}
-
-static inline void slist_remove_after(slist *l, snode *after)
-{
- snode *n = after->next;
- after->next = n->next;
- if (l->last == n)
- l->last = (after == &l->head) ? NULL : after;
-}
-
-/* Non-trivial functions */
-
-void *slist_prev(slist *l, snode *n);
-void slist_insert_before(slist *l, snode *what, snode *before);
-void slist_remove(slist *l, snode *n);
-
-#endif
+++ /dev/null
-# Test for slists module
-
-Run: ../obj/lib/slists-t
-Out: 9/7/3/1/2/4/8/10/
+++ /dev/null
-# Makefile for the UCW Sorter (c) 2007 Martin Mares <mj@ucw.cz>
-
-DIRS+=lib/sorter
-
-LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array)
-LIBUCW_INCLUDES+=$(addprefix sorter/, array.h common.h s-fixint.h \
- s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h)
-
-ifdef CONFIG_DEBUG_TOOLS
-PROGS+=$(o)/lib/sorter/sort-test
-endif
-
-$(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW)
+++ /dev/null
-Cleanups:
-o Log messages should show both original and new size of the data. The speed
- should be probably calculated from the former.
-o Buffer sizing in shep-export.
-
-Improvements:
-o When quicksorting a large input (especially in threaded case), invest more
- time to picking a good pivot.
-o Overlay presorter I/O with internal sorting.
-
-Users of lib/sorter/array.h which might use radix-sorting:
-indexer/chewer.c
-indexer/lexfreq.c
-indexer/mkgraph.c
-indexer/reftexts.c
+++ /dev/null
-/*
- * UCW Library -- Optimized Array Sorter
- *
- * (c) 2003--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/sorter/common.h"
-
-#include <string.h>
-#include <alloca.h>
-
-#define ASORT_MIN_SHIFT 2
-
-#define ASORT_TRACE(x...) ASORT_XTRACE(1, x)
-#define ASORT_XTRACE(level, x...) do { if (sorter_trace_array >= level) msg(L_DEBUG, x); } while(0)
-
-static void
-asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
-{
- // swap_output == 0 if result should be returned in `array', otherwise in `buffer'
- uns buckets = (1 << ctx->radix_bits);
- uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
- uns cnt[buckets];
-
-#if 0
- static int reported[64];
- if (!reported[hash_bits]++)
-#endif
- DBG(">>> n=%u h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output);
-
- bzero(cnt, sizeof(cnt));
- ctx->radix_count(array, num_elts, cnt, shift);
-
- uns pos = 0;
- for (uns i=0; i<buckets; i++)
- {
- uns j = cnt[i];
- cnt[i] = pos;
- pos += j;
- }
- ASSERT(pos == num_elts);
-
- ctx->radix_split(array, buffer, num_elts, cnt, shift);
- pos = 0;
- for (uns i=0; i<buckets; i++)
- {
- uns n = cnt[i] - pos;
- if (n < ctx->radix_threshold || shift < ASORT_MIN_SHIFT)
- {
- ctx->quicksort(buffer, n);
- if (!swapped_output)
- memcpy(array, buffer, n * ctx->elt_size);
- }
- else
- asort_radix(ctx, buffer, array, n, shift, !swapped_output);
- array += n * ctx->elt_size;
- buffer += n * ctx->elt_size;
- pos = cnt[i];
- }
-}
-
-#ifdef CONFIG_UCW_THREADS
-
-#include "lib/threads.h"
-#include "lib/workqueue.h"
-#include "lib/eltpool.h"
-
-static uns asort_threads_use_count;
-static uns asort_threads_ready;
-static struct worker_pool asort_thread_pool;
-
-static uns
-rs_estimate_stack(void)
-{
- // Stack space needed by the recursive radix-sorter
- uns ctrsize = sizeof(uns) * (1 << CONFIG_UCW_RADIX_SORTER_BITS);
- uns maxdepth = (64 / CONFIG_UCW_RADIX_SORTER_BITS) + 1;
- return ctrsize * maxdepth;
-}
-
-void
-asort_start_threads(uns run)
-{
- ucwlib_lock();
- asort_threads_use_count++;
- if (run && !asort_threads_ready)
- {
- // XXX: If somebody overrides the radix-sorter parameters to insane values,
- // he also should override the stack size to insane values.
- asort_thread_pool.stack_size = default_thread_stack_size + rs_estimate_stack();
- asort_thread_pool.num_threads = sorter_threads;
- ASORT_TRACE("Initializing thread pool (%d threads, %dK stack)", sorter_threads, asort_thread_pool.stack_size >> 10);
- worker_pool_init(&asort_thread_pool);
- asort_threads_ready = 1;
- }
- ucwlib_unlock();
-}
-
-void
-asort_stop_threads(void)
-{
- ucwlib_lock();
- if (!--asort_threads_use_count && asort_threads_ready)
- {
- ASORT_TRACE("Shutting down thread pool");
- worker_pool_cleanup(&asort_thread_pool);
- asort_threads_ready = 0;
- }
- ucwlib_unlock();
-}
-
-struct qs_work {
- struct work w;
- struct asort_context *ctx;
- void *array;
- uns num_elts;
- int left, right;
-#define LR_UNDEF -100
-};
-
-static void
-qs_handle_work(struct worker_thread *thr UNUSED, struct work *ww)
-{
- struct qs_work *w = (struct qs_work *) ww;
- struct asort_context *ctx = w->ctx;
-
- DBG("Thread %d: got %u elts", thr->id, w->num_elts);
- if (w->num_elts < ctx->thread_threshold)
- {
- ctx->quicksort(w->array, w->num_elts);
- w->left = w->right = LR_UNDEF;
- }
- else
- ctx->quicksplit(w->array, w->num_elts, &w->left, &w->right);
- DBG("Thread %d: returning l=%u r=%u", thr->id, w->left, w->right);
-}
-
-static struct qs_work *
-qs_alloc_work(struct asort_context *ctx)
-{
- struct qs_work *w = ep_alloc(ctx->eltpool);
- w->w.priority = 0;
- w->w.go = qs_handle_work;
- w->ctx = ctx;
- return w;
-}
-
-static void
-threaded_quicksort(struct asort_context *ctx)
-{
- struct work_queue q;
- struct qs_work *v, *w;
-
- asort_start_threads(1);
- work_queue_init(&asort_thread_pool, &q);
- ctx->eltpool = ep_new(sizeof(struct qs_work), 1000);
-
- w = qs_alloc_work(ctx);
- w->array = ctx->array;
- w->num_elts = ctx->num_elts;
- work_submit(&q, &w->w);
-
- while (v = (struct qs_work *) work_wait(&q))
- {
- if (v->left != LR_UNDEF)
- {
- if (v->right > 0)
- {
- w = qs_alloc_work(ctx);
- w->array = v->array;
- w->num_elts = v->right + 1;
- w->w.priority = v->w.priority + 1;
- work_submit(&q, &w->w);
- }
- if (v->left < (int)v->num_elts - 1)
- {
- w = qs_alloc_work(ctx);
- w->array = v->array + v->left * ctx->elt_size;
- w->num_elts = v->num_elts - v->left;
- w->w.priority = v->w.priority + 1;
- work_submit(&q, &w->w);
- }
- }
- ep_free(ctx->eltpool, v);
- }
-
- ep_delete(ctx->eltpool);
- work_queue_cleanup(&q);
- asort_stop_threads();
-}
-
-struct rs_work {
- struct work w;
- struct asort_context *ctx;
- void *array, *buffer; // Like asort_radix().
- uns num_elts;
- uns shift;
- uns swap_output;
- uns cnt[0];
-};
-
-static void
-rs_count(struct worker_thread *thr UNUSED, struct work *ww)
-{
- struct rs_work *w = (struct rs_work *) ww;
-
- DBG("Thread %d: Counting %u items, shift=%d", thr->id, w->num_elts, w->shift);
- w->ctx->radix_count(w->array, w->num_elts, w->cnt, w->shift);
- DBG("Thread %d: Counting done", thr->id);
-}
-
-static void
-rs_split(struct worker_thread *thr UNUSED, struct work *ww)
-{
- struct rs_work *w = (struct rs_work *) ww;
-
- DBG("Thread %d: Splitting %u items, shift=%d", thr->id, w->num_elts, w->shift);
- w->ctx->radix_split(w->array, w->buffer, w->num_elts, w->cnt, w->shift);
- DBG("Thread %d: Splitting done", thr->id);
-}
-
-static void
-rs_finish(struct worker_thread *thr UNUSED, struct work *ww)
-{
- struct rs_work *w = (struct rs_work *) ww;
-
- if (thr)
- DBG("Thread %d: Finishing %u items, shift=%d", thr->id, w->num_elts, w->shift);
- if (w->shift < ASORT_MIN_SHIFT || w->num_elts < w->ctx->radix_threshold)
- {
- w->ctx->quicksort(w->array, w->num_elts);
- if (w->swap_output)
- memcpy(w->buffer, w->array, w->num_elts * w->ctx->elt_size);
- }
- else
- asort_radix(w->ctx, w->array, w->buffer, w->num_elts, w->shift, w->swap_output);
- if (thr)
- DBG("Thread %d: Finishing done", thr->id);
-}
-
-static void
-rs_wait_small(struct asort_context *ctx)
-{
- struct rs_work *w;
-
- while (w = (struct rs_work *) work_wait(ctx->rs_work_queue))
- {
- DBG("Reaping small chunk of %u items", w->num_elts);
- ep_free(ctx->eltpool, w);
- }
-}
-
-static void
-rs_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
-{
- uns buckets = (1 << ctx->radix_bits);
- uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
- uns cnt[buckets];
- uns blksize = num_elts / sorter_threads;
- DBG(">>> n=%u h=%d s=%d blk=%u sw=%d", num_elts, hash_bits, shift, blksize, swapped_output);
-
- // If there are any small chunks in progress, wait for them to finish
- rs_wait_small(ctx);
-
- // Start parallel counting
- void *iptr = array;
- for (uns i=0; i<sorter_threads; i++)
- {
- struct rs_work *w = ctx->rs_works[i];
- w->w.priority = 0;
- w->w.go = rs_count;
- w->ctx = ctx;
- w->array = iptr;
- w->buffer = buffer;
- w->num_elts = blksize;
- if (i == sorter_threads-1)
- w->num_elts += num_elts % sorter_threads;
- w->shift = shift;
- iptr += w->num_elts * ctx->elt_size;
- bzero(w->cnt, sizeof(uns) * buckets);
- work_submit(ctx->rs_work_queue, &w->w);
- }
-
- // Get bucket sizes from the counts
- bzero(cnt, sizeof(cnt));
- for (uns i=0; i<sorter_threads; i++)
- {
- struct rs_work *w = (struct rs_work *) work_wait(ctx->rs_work_queue);
- ASSERT(w);
- for (uns j=0; j<buckets; j++)
- cnt[j] += w->cnt[j];
- }
-
- // Calculate bucket starts
- uns pos = 0;
- for (uns i=0; i<buckets; i++)
- {
- uns j = cnt[i];
- cnt[i] = pos;
- pos += j;
- }
- ASSERT(pos == num_elts);
-
- // Start parallel splitting
- for (uns i=0; i<sorter_threads; i++)
- {
- struct rs_work *w = ctx->rs_works[i];
- w->w.go = rs_split;
- for (uns j=0; j<buckets; j++)
- {
- uns k = w->cnt[j];
- w->cnt[j] = cnt[j];
- cnt[j] += k;
- }
- work_submit(ctx->rs_work_queue, &w->w);
- }
- ASSERT(cnt[buckets-1] == num_elts);
-
- // Wait for splits to finish
- while (work_wait(ctx->rs_work_queue))
- ;
-
- // Recurse on buckets
- pos = 0;
- for (uns i=0; i<buckets; i++)
- {
- uns n = cnt[i] - pos;
- if (!n)
- continue;
- if (n < ctx->thread_threshold || shift < ASORT_MIN_SHIFT)
- {
- struct rs_work *w = ep_alloc(ctx->eltpool);
- w->w.priority = 0;
- w->w.go = rs_finish;
- w->ctx = ctx;
- w->array = buffer;
- w->buffer = array;
- w->num_elts = n;
- w->shift = shift;
- w->swap_output = !swapped_output;
- if (n < ctx->thread_chunk)
- {
- DBG("Sorting block %u+%u inline", pos, n);
- rs_finish(NULL, &w->w);
- ep_free(ctx->eltpool, w);
- }
- else
- {
- DBG("Scheduling block %u+%u", pos, n);
- work_submit(ctx->rs_work_queue, &w->w);
- }
- }
- else
- rs_radix(ctx, buffer, array, n, shift, !swapped_output);
- pos = cnt[i];
- array += n * ctx->elt_size;
- buffer += n * ctx->elt_size;
- }
-}
-
-static void
-threaded_radixsort(struct asort_context *ctx, uns swap)
-{
- struct work_queue q;
-
- asort_start_threads(1);
- work_queue_init(&asort_thread_pool, &q);
-
- // Prepare work structures for counting and splitting.
- // We use big_alloc(), because we want to avoid cacheline aliasing between threads.
- ctx->rs_work_queue = &q;
- ctx->rs_works = alloca(sizeof(struct rs_work *) * sorter_threads);
- for (uns i=0; i<sorter_threads; i++)
- ctx->rs_works[i] = big_alloc(sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
-
- // Prepare a pool for all remaining small bits which will be sorted on background.
- ctx->eltpool = ep_new(sizeof(struct rs_work), 1000);
-
- // Do the big splitting
- rs_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
- for (uns i=0; i<sorter_threads; i++)
- big_free(ctx->rs_works[i], sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
-
- // Finish the small blocks
- rs_wait_small(ctx);
-
- ASSERT(!ctx->eltpool->num_allocated);
- ep_delete(ctx->eltpool);
- work_queue_cleanup(&q);
- asort_stop_threads();
-}
-
-#else
-
-void asort_start_threads(uns run UNUSED) { }
-void asort_stop_threads(void) { }
-
-#endif
-
-static uns
-predict_swap(struct asort_context *ctx)
-{
- uns bits = ctx->radix_bits;
- uns elts = ctx->num_elts;
- uns swap = 0;
-
- while (elts >= ctx->radix_threshold && bits >= ASORT_MIN_SHIFT)
- {
- DBG("Predicting pass: %u elts, %d bits", elts, bits);
- swap = !swap;
- elts >>= ctx->radix_bits;
- bits = MAX(bits, ctx->radix_bits) - ctx->radix_bits;
- }
- return swap;
-}
-
-void
-asort_run(struct asort_context *ctx)
-{
- ctx->thread_threshold = MIN(sorter_thread_threshold / ctx->elt_size, ~0U);
- ctx->thread_chunk = MIN(sorter_thread_chunk / ctx->elt_size, ~0U);
- ctx->radix_threshold = MIN(sorter_radix_threshold / ctx->elt_size, ~0U);
-
- ASORT_TRACE("Array-sorting %u items per %u bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits);
- ASORT_XTRACE(2, "Limits: thread_threshold=%u, thread_chunk=%u, radix_threshold=%u",
- ctx->thread_threshold, ctx->thread_chunk, ctx->radix_threshold);
- uns allow_threads UNUSED = (sorter_threads > 1 &&
- ctx->num_elts >= ctx->thread_threshold &&
- !(sorter_debug & SORT_DEBUG_ASORT_NO_THREADS));
-
- if (ctx->num_elts < ctx->radix_threshold ||
- ctx->hash_bits <= ASORT_MIN_SHIFT ||
- !ctx->radix_split ||
- (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX))
- {
-#ifdef CONFIG_UCW_THREADS
- if (allow_threads)
- {
- ASORT_XTRACE(2, "Decided to use parallel quicksort");
- threaded_quicksort(ctx);
- }
- else
-#endif
- {
- ASORT_XTRACE(2, "Decided to use sequential quicksort");
- ctx->quicksort(ctx->array, ctx->num_elts);
- }
- }
- else
- {
- uns swap = predict_swap(ctx);
-#ifdef CONFIG_UCW_THREADS
- if (allow_threads)
- {
- ASORT_XTRACE(2, "Decided to use parallel radix-sort (swap=%d)", swap);
- threaded_radixsort(ctx, swap);
- }
- else
-#endif
- {
- ASORT_XTRACE(2, "Decided to use sequential radix-sort (swap=%d)", swap);
- asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
- }
- if (swap)
- ctx->array = ctx->buffer;
- }
-
- ASORT_XTRACE(2, "Array-sort finished");
-}
+++ /dev/null
-/*
- * UCW Library -- Optimized Array Sorter
- *
- * (c) 2003--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is a generator of routines for sorting huge arrays, similar to the one
- * in lib/arraysort.h. It cannot handle discontiguous arrays, but it is able
- * to employ radix-sorting if a monotone hash function is available and also
- * use several threads in parallel on SMP systems (this assumes that all
- * callbacks you provide are thread-safe).
- *
- * It is usually called internally by the generic shorter machinery, but
- * you are free to use it explicitly if you need.
- *
- * So much for advocacy, there are the parameters (those marked with [*]
- * are mandatory):
- *
- * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
- * defined by the sorter)
- * ASORT_KEY_TYPE [*] data type of a single array entry key
- * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y")
- * ASORT_HASH(x) a monotone hash function (safisfying hash(x) < hash(y) => x<y)
- * ASORT_LONG_HASH hashes are 64-bit numbers (default is 32 bits)
- *
- * Fine-tuning parameters: (if you really insist)
- *
- * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
- * ASORT_RADIX_BITS how many bits of the hash functions are to be used at once for
- * radix-sorting.
- *
- * After including this file, a function
- * ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts [, ASORT_KEY_TYPE *buf, uns hash_bits])
- * is declared and all parameter macros are automatically undef'd. Here `buf' is an
- * auxiliary buffer of the same size as the input array, required whenever radix
- * sorting should be used, and `hash_bits' is the number of significant bits returned
- * by the hash function. If the buffer is specified, the sorting function returns either
- * a pointer to the input array or to the buffer, depending on where the result is stored.
- * If you do not use hashing, these parameters should be omitted.
- */
-
-#include "lib/sorter/common.h"
-
-#define Q(x) ASORT_PREFIX(x)
-
-typedef ASORT_KEY_TYPE Q(key);
-
-#ifndef ASORT_LT
-#define ASORT_LT(x,y) ((x) < (y))
-#endif
-
-#ifndef ASORT_SWAP
-#define ASORT_SWAP(i,j) do { Q(key) tmp = array[i]; array[i]=array[j]; array[j]=tmp; } while (0)
-#endif
-
-#ifndef ASORT_THRESHOLD
-#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
-#endif
-
-#ifndef ASORT_RADIX_BITS
-#define ASORT_RADIX_BITS CONFIG_UCW_RADIX_SORTER_BITS
-#endif
-#define ASORT_RADIX_MASK ((1 << (ASORT_RADIX_BITS)) - 1)
-
-/* QuickSort with optimizations a'la Sedgewick, inspired by qsort() from GNU libc. */
-
-static void Q(quicksort)(void *array_ptr, uns num_elts)
-{
- Q(key) *array = array_ptr;
- struct stk { int l, r; } stack[8*sizeof(uns)];
- int l, r, left, right, m;
- uns sp = 0;
- Q(key) pivot;
-
- if (num_elts <= 1)
- return;
-
- left = 0;
- right = num_elts - 1;
- for(;;)
- {
- l = left;
- r = right;
- m = (l+r)/2;
- if (ASORT_LT(array[m], array[l]))
- ASORT_SWAP(l,m);
- if (ASORT_LT(array[r], array[m]))
- {
- ASORT_SWAP(m,r);
- if (ASORT_LT(array[m], array[l]))
- ASORT_SWAP(l,m);
- }
- pivot = array[m];
- do
- {
- while (ASORT_LT(array[l], pivot))
- l++;
- while (ASORT_LT(pivot, array[r]))
- r--;
- if (l < r)
- {
- ASORT_SWAP(l,r);
- l++;
- r--;
- }
- else if (l == r)
- {
- l++;
- r--;
- }
- }
- while (l <= r);
- if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
- {
- /* Both partitions ok => push the larger one */
- if ((r - left) > (right - l))
- {
- stack[sp].l = left;
- stack[sp].r = r;
- left = l;
- }
- else
- {
- stack[sp].l = l;
- stack[sp].r = right;
- right = r;
- }
- sp++;
- }
- else if ((r - left) >= ASORT_THRESHOLD)
- {
- /* Left partition OK, right undersize */
- right = r;
- }
- else if ((right - l) >= ASORT_THRESHOLD)
- {
- /* Right partition OK, left undersize */
- left = l;
- }
- else
- {
- /* Both partitions undersize => pop */
- if (!sp)
- break;
- sp--;
- left = stack[sp].l;
- right = stack[sp].r;
- }
- }
-
- /*
- * We have a partially sorted array, finish by insertsort. Inspired
- * by qsort() in GNU libc.
- */
-
- /* Find minimal element which will serve as a barrier */
- r = MIN(num_elts, ASORT_THRESHOLD);
- m = 0;
- for (l=1; l<r; l++)
- if (ASORT_LT(array[l], array[m]))
- m = l;
- ASORT_SWAP(0,m);
-
- /* Insertion sort */
- for (m=1; m<(int)num_elts; m++)
- {
- l=m;
- while (ASORT_LT(array[m], array[l-1]))
- l--;
- while (l < m)
- {
- ASORT_SWAP(l,m);
- l++;
- }
- }
-}
-
-/* Just the splitting part of QuickSort */
-
-static void Q(quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp)
-{
- Q(key) *array = array_ptr;
- int l, r, m;
- Q(key) pivot;
-
- l = 0;
- r = num_elts - 1;
- m = (l+r)/2;
- if (ASORT_LT(array[m], array[l]))
- ASORT_SWAP(l,m);
- if (ASORT_LT(array[r], array[m]))
- {
- ASORT_SWAP(m,r);
- if (ASORT_LT(array[m], array[l]))
- ASORT_SWAP(l,m);
- }
- pivot = array[m];
- do
- {
- while (ASORT_LT(array[l], pivot))
- l++;
- while (ASORT_LT(pivot, array[r]))
- r--;
- if (l < r)
- {
- ASORT_SWAP(l,r);
- l++;
- r--;
- }
- else if (l == r)
- {
- l++;
- r--;
- }
- }
- while (l <= r);
- *leftp = l;
- *rightp = r;
-}
-
-#ifdef ASORT_HASH
-
-static void Q(radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift)
-{
- Q(key) *src = src_ptr;
- uns i;
-
- switch (shift)
- {
-#define RC(s) \
- case s: \
- for (i=0; i<num_elts; i++) \
- cnt[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ] ++; \
- break; \
-
-#ifdef ASORT_LONG_HASH
- RC(63); RC(62); RC(61); RC(60); RC(59); RC(58); RC(57); RC(56);
- RC(55); RC(54); RC(53); RC(52); RC(51); RC(50); RC(49); RC(48);
- RC(47); RC(46); RC(45); RC(44); RC(43); RC(42); RC(41); RC(40);
- RC(39); RC(38); RC(37); RC(36); RC(35); RC(34); RC(33); RC(32);
-#endif
- RC(31); RC(30); RC(29); RC(28); RC(27); RC(26); RC(25); RC(24);
- RC(23); RC(22); RC(21); RC(20); RC(19); RC(18); RC(17); RC(16);
- RC(15); RC(14); RC(13); RC(12); RC(11); RC(10); RC(9); RC(8);
- RC(7); RC(6); RC(5); RC(4); RC(3); RC(2); RC(1); RC(0);
- default:
- ASSERT(0);
- }
-#undef RC
-}
-
-static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift)
-{
- Q(key) *src = src_ptr, *dest = dest_ptr;
- uns i;
-
- switch (shift)
- {
-#define RS(s) \
- case s: \
- for (i=0; i<num_elts; i++) \
- dest[ ptrs[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ]++ ] = src[i]; \
- break;
-
-#ifdef ASORT_LONG_HASH
- RS(63); RS(62); RS(61); RS(60); RS(59); RS(58); RS(57); RS(56);
- RS(55); RS(54); RS(53); RS(52); RS(51); RS(50); RS(49); RS(48);
- RS(47); RS(46); RS(45); RS(44); RS(43); RS(42); RS(41); RS(40);
- RS(39); RS(38); RS(37); RS(36); RS(35); RS(34); RS(33); RS(32);
-#endif
- RS(31); RS(30); RS(29); RS(28); RS(27); RS(26); RS(25); RS(24);
- RS(23); RS(22); RS(21); RS(20); RS(19); RS(18); RS(17); RS(16);
- RS(15); RS(14); RS(13); RS(12); RS(11); RS(10); RS(9); RS(8);
- RS(7); RS(6); RS(5); RS(4); RS(3); RS(2); RS(1); RS(0);
- default:
- ASSERT(0);
- }
-#undef RS
-}
-
-#endif
-
-static Q(key) *Q(sort)(Q(key) *array, uns num_elts
-#ifdef ASORT_HASH
- , Q(key) *buffer, uns hash_bits
-#endif
- )
-{
- struct asort_context ctx = {
- .array = array,
- .num_elts = num_elts,
- .elt_size = sizeof(Q(key)),
- .quicksort = Q(quicksort),
- .quicksplit = Q(quicksplit),
-#ifdef ASORT_HASH
- .buffer = buffer,
- .hash_bits = hash_bits,
- .radix_count = Q(radix_count),
- .radix_split = Q(radix_split),
- .radix_bits = ASORT_RADIX_BITS,
-#endif
- };
- asort_run(&ctx);
- return ctx.array;
-}
-
-#undef ASORT_HASH
-#undef ASORT_KEY_TYPE
-#undef ASORT_LONG_HASH
-#undef ASORT_LT
-#undef ASORT_PAGE_ALIGNED
-#undef ASORT_PREFIX
-#undef ASORT_RADIX_BITS
-#undef ASORT_RADIX_MASK
-#undef ASORT_SWAP
-#undef ASORT_THRESHOLD
-#undef Q
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Common Declarations
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_SORTER_COMMON_H
-#define _UCW_SORTER_COMMON_H
-
-#include "lib/clists.h"
-
-/* Configuration variables */
-extern uns sorter_trace, sorter_trace_array, sorter_stream_bufsize;
-extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits, sorter_add_radix_bits;
-extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
-extern uns sorter_threads;
-extern u64 sorter_bufsize, sorter_small_input;
-extern u64 sorter_thread_threshold, sorter_thread_chunk, sorter_radix_threshold;
-extern struct fb_params sorter_fb_params, sorter_small_fb_params;
-
-#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
-#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
-
-enum sort_debug {
- SORT_DEBUG_NO_PRESORT = 1,
- SORT_DEBUG_NO_JOIN = 2,
- SORT_DEBUG_KEEP_BUCKETS = 4,
- SORT_DEBUG_NO_RADIX = 8,
- SORT_DEBUG_NO_MULTIWAY = 16,
- SORT_DEBUG_ASORT_NO_RADIX = 32,
- SORT_DEBUG_ASORT_NO_THREADS = 64
-};
-
-struct sort_bucket;
-
-struct sort_context {
- struct fastbuf *in_fb;
- struct fastbuf *out_fb;
- uns hash_bits;
- u64 in_size;
- struct fb_params *fb_params;
-
- struct mempool *pool;
- clist bucket_list;
- void *big_buf;
- size_t big_buf_size;
-
- int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
-
- // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
- // Return 1 if there is more data available in the source bucket.
- int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
-
- // Estimate how much input data from `b' will fit in the internal sorting buffer.
- u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
-
- // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
- // Bucket arrays are NULL-terminated.
- void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
-
- // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
- void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
-
- // Radix split according to hash function
- void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
-
- // State variables of internal_sort
- void *key_buf;
- int more_keys;
-
- // Timing
- timestamp_t start_time;
- uns last_pass_time;
- uns total_int_time, total_pre_time, total_ext_time;
-};
-
-void sorter_run(struct sort_context *ctx);
-
-/* Buffers */
-
-void *sorter_alloc(struct sort_context *ctx, uns size);
-void sorter_prepare_buf(struct sort_context *ctx);
-void sorter_alloc_buf(struct sort_context *ctx);
-void sorter_free_buf(struct sort_context *ctx);
-
-/* Buckets */
-
-struct sort_bucket {
- cnode n;
- struct sort_context *ctx;
- uns flags;
- struct fastbuf *fb;
- byte *filename;
- u64 size; // Size in bytes (not valid when writing)
- uns runs; // Number of runs, 0 if not sorted
- uns hash_bits; // Remaining bits of the hash function
- byte *ident; // Identifier used in debug messages
-};
-
-enum sort_bucket_flags {
- SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run)
- SBF_SOURCE = 2, // Contains the source file (always 0 runs)
- SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter
- SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf
- SBF_OPEN_READ = 512, // We are reading from the fastbuf
- SBF_DESTROYED = 1024, // Already done with, no further references allowed
- SBF_SWAPPED_OUT = 2048, // Swapped out to a named file
-};
-
-struct sort_bucket *sbuck_new(struct sort_context *ctx);
-void sbuck_drop(struct sort_bucket *b);
-int sbuck_have(struct sort_bucket *b);
-int sbuck_has_file(struct sort_bucket *b);
-sh_off_t sbuck_size(struct sort_bucket *b);
-struct fastbuf *sbuck_read(struct sort_bucket *b);
-struct fastbuf *sbuck_write(struct sort_bucket *b);
-void sbuck_swap_out(struct sort_bucket *b);
-
-/* Contexts and helper functions for the array sorter */
-
-struct asort_context {
- // Interface between generic code in array.c and functions generated by array.h
- void *array; // Array to sort
- void *buffer; // Auxiliary buffer (required when radix-sorting)
- uns num_elts; // Number of elements in the array
- uns elt_size; // Bytes per element
- uns hash_bits; // Remaining bits of the hash function
- uns radix_bits; // How many bits to process in a single radix-sort pass
- void (*quicksort)(void *array_ptr, uns num_elts);
- void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
- void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
- void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
-
- // Used internally by array.c
- struct rs_work **rs_works;
- struct work_queue *rs_work_queue;
- struct eltpool *eltpool;
-
- // Configured limits translated from bytes to elements
- uns thread_threshold;
- uns thread_chunk;
- uns radix_threshold;
-};
-
-void asort_run(struct asort_context *ctx);
-void asort_start_threads(uns run);
-void asort_stop_threads(void);
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Configuration
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/fastbuf.h"
-#include "lib/sorter/common.h"
-
-uns sorter_trace;
-uns sorter_trace_array;
-u64 sorter_bufsize = 65536;
-uns sorter_debug;
-uns sorter_min_radix_bits;
-uns sorter_max_radix_bits;
-uns sorter_add_radix_bits;
-uns sorter_min_multiway_bits;
-uns sorter_max_multiway_bits;
-uns sorter_threads;
-u64 sorter_thread_threshold = 1048576;
-u64 sorter_thread_chunk = 4096;
-u64 sorter_radix_threshold = 4096;
-struct fb_params sorter_fb_params;
-struct fb_params sorter_small_fb_params;
-u64 sorter_small_input;
-
-static struct cf_section sorter_config = {
- CF_ITEMS {
- CF_UNS("Trace", &sorter_trace),
- CF_UNS("TraceArray", &sorter_trace_array),
- CF_SECTION("FileAccess", &sorter_fb_params, &fbpar_cf),
- CF_SECTION("SmallFileAccess", &sorter_fb_params, &fbpar_cf),
- CF_U64("SmallInput", &sorter_small_input),
- CF_U64("SortBuffer", &sorter_bufsize),
- CF_UNS("Debug", &sorter_debug),
- CF_UNS("MinRadixBits", &sorter_min_radix_bits),
- CF_UNS("MaxRadixBits", &sorter_max_radix_bits),
- CF_UNS("AddRadixBits", &sorter_add_radix_bits),
- CF_UNS("MinMultiwayBits", &sorter_min_multiway_bits),
- CF_UNS("MaxMultiwayBits", &sorter_max_multiway_bits),
- CF_UNS("Threads", &sorter_threads),
- CF_U64("ThreadThreshold", &sorter_thread_threshold),
- CF_U64("ThreadChunk", &sorter_thread_chunk),
- CF_U64("RadixThreshold", &sorter_radix_threshold),
- CF_END
- }
-};
-
-static void CONSTRUCTOR sorter_init_config(void)
-{
- cf_declare_section("Sorter", &sorter_config, 0);
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Governing Routines
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/mempool.h"
-#include "lib/stkstring.h"
-#include "lib/sorter/common.h"
-
-#include <string.h>
-#include <sys/time.h>
-#include <time.h>
-
-#define F_BSIZE(b) stk_fsize(sbuck_size(b))
-
-static void
-sorter_start_timer(struct sort_context *ctx)
-{
- init_timer(&ctx->start_time);
-}
-
-static void
-sorter_stop_timer(struct sort_context *ctx, uns *account_to)
-{
- ctx->last_pass_time = get_timer(&ctx->start_time);
- *account_to += ctx->last_pass_time;
-}
-
-static uns
-sorter_speed(struct sort_context *ctx, u64 size)
-{
- if (!size)
- return 0;
- if (!ctx->last_pass_time)
- return 0;
- return (uns)((double)size / (1<<20) * 1000 / ctx->last_pass_time);
-}
-
-static int
-sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
-{
- sorter_alloc_buf(ctx);
- if (in->flags & SBF_CUSTOM_PRESORT)
- {
- /*
- * The trick with automatic joining, which we use for the normal presorter,
- * is not necessary with the custom presorter, because the custom presorter
- * is never called in the middle of the sorted data.
- */
- struct fastbuf *f = sbuck_write(out);
- out->runs++;
- return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size);
- }
- return ctx->internal_sort(ctx, in, out, out_only);
-}
-
-static struct sort_bucket *
-sbuck_join_to(struct sort_bucket *b, sh_off_t *sizep)
-{
- if (sorter_debug & SORT_DEBUG_NO_JOIN)
- return NULL;
-
- struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
- if (!(out->flags & SBF_FINAL))
- return NULL;
- ASSERT(out->runs == 1);
- *sizep = sbuck_size(out);
- return out;
-}
-
-static sh_off_t
-sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, sh_off_t join_size)
-{
- if (join && join->runs >= 2)
- {
- if (b)
- sbuck_drop(b);
- ASSERT(join->runs == 2);
- join->runs--;
- return sbuck_size(join) - join_size;
- }
- else if (b)
- {
- clist_insert_after(&b->n, list_pos);
- return sbuck_size(b);
- }
- else
- return 0;
-}
-
-static void
-sorter_join(struct sort_bucket *b)
-{
- struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
- ASSERT(join->flags & SBF_FINAL);
- ASSERT(b->runs == 1);
-
- if (!sbuck_has_file(join))
- {
- // The final bucket doesn't have any file associated yet, so replace
- // it with the new bucket.
- SORT_XTRACE(3, "Replaced final bucket");
- b->flags |= SBF_FINAL;
- sbuck_drop(join);
- }
- else
- {
- SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
- struct fastbuf *src = sbuck_read(b);
- struct fastbuf *dest = sbuck_write(join);
- bbcopy(src, dest, ~0U);
- sbuck_drop(b);
- }
-}
-
-static void
-sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
-{
- struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
- cnode *list_pos = b->n.prev;
- sh_off_t join_size;
- struct sort_bucket *join = sbuck_join_to(b, &join_size);
-
- if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
- {
- SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
- sorter_start_timer(ctx);
- ins[0] = sbuck_new(ctx);
- if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
- {
- sorter_stop_timer(ctx, &ctx->total_pre_time);
- sh_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
- SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 3), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
- sbuck_drop(b);
- return;
- }
-
- ins[1] = sbuck_new(ctx);
- int i = 1;
- while (sorter_presort(ctx, b, ins[i], ins[i]))
- i = 1-i;
- sbuck_drop(b);
- sorter_stop_timer(ctx, &ctx->total_pre_time);
- SORT_TRACE("Presorting pass (%d+%d runs, %s+%s, %dMB/s)",
- ins[0]->runs, ins[1]->runs,
- F_BSIZE(ins[0]), F_BSIZE(ins[1]),
- sorter_speed(ctx, sbuck_size(ins[0]) + sbuck_size(ins[1])));
- }
- else
- {
- SORT_XTRACE(2, "Presorting disabled");
- ins[0] = b;
- }
-
- SORT_XTRACE(3, "Main sorting");
- uns pass = 0;
- do {
- ++pass;
- sorter_start_timer(ctx);
- if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
- {
- // This is guaranteed to produce a single run, so join if possible
- outs[0] = join;
- outs[1] = NULL;
- ctx->twoway_merge(ctx, ins, outs);
- sh_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
- sorter_stop_timer(ctx, &ctx->total_ext_time);
- SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
- sbuck_drop(ins[0]);
- sbuck_drop(ins[1]);
- return;
- }
- outs[0] = sbuck_new(ctx);
- outs[1] = sbuck_new(ctx);
- outs[2] = NULL;
- ctx->twoway_merge(ctx, ins, outs);
- sorter_stop_timer(ctx, &ctx->total_ext_time);
- SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s, %dMB/s)", pass,
- outs[0]->runs, outs[1]->runs,
- F_BSIZE(outs[0]), F_BSIZE(outs[1]),
- sorter_speed(ctx, sbuck_size(outs[0]) + sbuck_size(outs[1])));
- sbuck_drop(ins[0]);
- sbuck_drop(ins[1]);
- memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
- } while (sbuck_have(ins[1]));
-
- sbuck_drop(ins[1]);
- clist_insert_after(&ins[0]->n, list_pos);
-}
-
-static void
-sorter_multiway(struct sort_context *ctx, struct sort_bucket *b)
-{
- clist parts;
- cnode *list_pos = b->n.prev;
- sh_off_t join_size;
- struct sort_bucket *join = sbuck_join_to(b, &join_size);
- uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 3;
-
- clist_init(&parts);
- ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
- SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
- uns cont;
- uns part_cnt = 0;
- u64 total_size = 0;
- sorter_start_timer(ctx);
- do
- {
- struct sort_bucket *p = sbuck_new(ctx);
- cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
- if (sbuck_have(p))
- {
- part_cnt++;
- clist_add_tail(&parts, &p->n);
- total_size += sbuck_size(p);
- sbuck_swap_out(p);
- }
- else
- sbuck_drop(p);
- }
- while (cont);
- sorter_stop_timer(ctx, &ctx->total_pre_time);
- sorter_free_buf(ctx);
- sbuck_drop(b);
-
- if (part_cnt <= 1)
- {
- sh_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
- SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
- return;
- }
-
- SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
-
- uns max_ways = 1 << sorter_max_multiway_bits;
- struct sort_bucket *ways[max_ways+1];
- SORT_XTRACE(3, "Starting up to %d-way merge", max_ways);
- for (;;)
- {
- uns n = 0;
- struct sort_bucket *p;
- while (n < max_ways && (p = clist_head(&parts)))
- {
- clist_remove(&p->n);
- ways[n++] = p;
- }
- ways[n] = NULL;
- ASSERT(n > 1);
-
- struct sort_bucket *out;
- if (clist_empty(&parts) && join)
- out = join;
- else
- out = sbuck_new(ctx);
- sorter_start_timer(ctx);
- ctx->multiway_merge(ctx, ways, out);
- sorter_stop_timer(ctx, &ctx->total_ext_time);
-
- for (uns i=0; i<n; i++)
- sbuck_drop(ways[i]);
-
- if (clist_empty(&parts))
- {
- sh_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
- SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
- return;
- }
- else
- {
- sbuck_swap_out(out);
- clist_add_tail(&parts, &out->n);
- SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
- }
- }
-}
-
-static void
-sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
-{
- // Add more bits if requested and allowed.
- bits = MIN(bits + sorter_add_radix_bits, sorter_max_radix_bits);
-
- uns nbuck = 1 << bits;
- SORT_XTRACE(3, "Running radix split on %s with hash %d bits of %d (expecting %s buckets)",
- F_BSIZE(b), bits, b->hash_bits, stk_fsize(sbuck_size(b) / nbuck));
- sorter_free_buf(ctx);
- sorter_start_timer(ctx);
-
- struct sort_bucket **outs = alloca(nbuck * sizeof(struct sort_bucket *));
- for (uns i=nbuck; i--; )
- {
- outs[i] = sbuck_new(ctx);
- outs[i]->hash_bits = b->hash_bits - bits;
- clist_insert_after(&outs[i]->n, &b->n);
- }
-
- ctx->radix_split(ctx, b, outs, b->hash_bits - bits, bits);
-
- u64 min = ~(u64)0, max = 0, sum = 0;
- for (uns i=0; i<nbuck; i++)
- {
- u64 s = sbuck_size(outs[i]);
- min = MIN(min, s);
- max = MAX(max, s);
- sum += s;
- if (nbuck > 4)
- sbuck_swap_out(outs[i]);
- }
-
- sorter_stop_timer(ctx, &ctx->total_ext_time);
- SORT_TRACE("Radix split (%d buckets, %s min, %s max, %s avg, %dMB/s)", nbuck,
- stk_fsize(min), stk_fsize(max), stk_fsize(sum / nbuck), sorter_speed(ctx, sum));
- sbuck_drop(b);
-}
-
-static void
-sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
-{
- // Drop empty buckets
- if (!sbuck_have(b))
- {
- SORT_XTRACE(4, "Dropping empty bucket");
- sbuck_drop(b);
- return;
- }
-
- // How many bits of bucket size we have to reduce before it fits in the RAM?
- // (this is insanely large if the input size is unknown, but it serves our purpose)
- u64 insize = sbuck_size(b);
- u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // Magical factor accounting for various non-uniformities
- uns bits = 0;
- while ((insize >> bits) > mem)
- bits++;
-
- // Calculate the possibilities of radix splits
- uns radix_bits;
- if (!ctx->radix_split ||
- (b->flags & SBF_CUSTOM_PRESORT) ||
- (sorter_debug & SORT_DEBUG_NO_RADIX))
- radix_bits = 0;
- else
- {
- radix_bits = MIN(bits, b->hash_bits);
- radix_bits = MIN(radix_bits, sorter_max_radix_bits);
- if (radix_bits < sorter_min_radix_bits)
- radix_bits = 0;
- }
-
- // The same for multi-way merges
- uns multiway_bits;
- if (!ctx->multiway_merge ||
- (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
- (sorter_debug & SORT_DEBUG_NO_PRESORT))
- multiway_bits = 0;
- else
- {
- multiway_bits = MIN(bits, sorter_max_multiway_bits);
- if (multiway_bits < sorter_min_multiway_bits)
- multiway_bits = 0;
- }
-
- SORT_XTRACE(3, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
- stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
- radix_bits, multiway_bits);
-
- // If the input already consists of a single run, just join it
- if (b->runs)
- return sorter_join(b);
-
- // If everything fits in memory, the 2-way strategy will sort it in memory
- if (!bits)
- return sorter_twoway(ctx, b);
-
- // If we can reduce everything in one pass, do so and prefer radix splits
- if (radix_bits == bits)
- return sorter_radix(ctx, b, radix_bits);
- if (multiway_bits == bits)
- return sorter_multiway(ctx, b);
-
- // Otherwise, reduce as much as possible and again prefer radix splits
- if (radix_bits)
- return sorter_radix(ctx, b, radix_bits);
- if (multiway_bits)
- return sorter_multiway(ctx, b);
-
- // Fall back to 2-way strategy if nothing else applies
- return sorter_twoway(ctx, b);
-}
-
-void
-sorter_run(struct sort_context *ctx)
-{
- ctx->pool = mp_new(4096);
- clist_init(&ctx->bucket_list);
- sorter_prepare_buf(ctx);
- asort_start_threads(0);
-
- // Create bucket containing the source
- struct sort_bucket *bin = sbuck_new(ctx);
- bin->flags = SBF_SOURCE | SBF_OPEN_READ;
- if (ctx->custom_presort)
- bin->flags |= SBF_CUSTOM_PRESORT;
- else
- bin->fb = ctx->in_fb;
- bin->ident = "in";
- bin->size = ctx->in_size;
- bin->hash_bits = ctx->hash_bits;
- clist_add_tail(&ctx->bucket_list, &bin->n);
- SORT_XTRACE(2, "Input size: %s, %d hash bits", F_BSIZE(bin), bin->hash_bits);
- ctx->fb_params = (bin->size < sorter_small_input) ? &sorter_small_fb_params : &sorter_fb_params;
-
- // Create bucket for the output
- struct sort_bucket *bout = sbuck_new(ctx);
- bout->flags = SBF_FINAL;
- if (bout->fb = ctx->out_fb)
- bout->flags |= SBF_OPEN_WRITE;
- bout->ident = "out";
- bout->runs = 1;
- clist_add_head(&ctx->bucket_list, &bout->n);
-
- // Repeatedly sort buckets
- struct sort_bucket *b;
- while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
- sorter_decide(ctx, b);
-
- asort_stop_threads();
- sorter_free_buf(ctx);
- sbuck_write(bout); // Force empty bucket to a file
- SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
- SORT_XTRACE(2, "Final timings: %.3fs external sorting, %.3fs presorting, %.3fs internal sorting",
- ctx->total_ext_time/1000., ctx->total_pre_time/1000., ctx->total_int_time/1000.);
- ctx->out_fb = sbuck_read(bout);
- mp_delete(ctx->pool);
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Fixed-Size Internal Sorting Module
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/stkstring.h"
-
-#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
-#define ASORT_KEY_TYPE P(key)
-#define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0)
-#ifdef SORT_INTERNAL_RADIX
-# define ASORT_HASH(x) P(hash)(&(x))
-# ifdef SORT_LONG_HASH
-# define ASORT_LONG_HASH
-# endif
-#endif
-#include "lib/sorter/array.h"
-
-/*
- * This is a more efficient implementation of the internal sorter,
- * which runs under the following assumptions:
- *
- * - the keys have fixed (and small) size
- * - no data are present after the key
- * - unification does not require any workspace
- */
-
-static size_t P(internal_workspace)(void)
-{
- size_t workspace = 0;
-#ifdef SORT_UNIFY
- workspace = sizeof(P(key) *);
-#endif
-#ifdef SORT_INTERNAL_RADIX
- workspace = MAX(workspace, sizeof(P(key)));
-#endif
- return workspace;
-}
-
-static uns P(internal_num_keys)(struct sort_context *ctx)
-{
- size_t bufsize = ctx->big_buf_size;
- size_t workspace = P(internal_workspace)();
- if (workspace)
- bufsize -= CPU_PAGE_SIZE;
- u64 maxkeys = bufsize / (sizeof(P(key)) + workspace);
- return MIN(maxkeys, ~0U); // The number of records must fit in uns
-}
-
-static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
-{
- sorter_alloc_buf(ctx);
- struct fastbuf *in = sbuck_read(bin);
- P(key) *buf = ctx->big_buf;
- uns maxkeys = P(internal_num_keys)(ctx);
-
- SORT_XTRACE(5, "s-fixint: Reading (maxkeys=%u, hash_bits=%d)", maxkeys, bin->hash_bits);
- uns n = 0;
- while (n < maxkeys && P(read_key)(in, &buf[n]))
- n++;
- if (!n)
- return 0;
- void *workspace UNUSED = ALIGN_PTR(&buf[n], CPU_PAGE_SIZE);
-
- SORT_XTRACE(4, "s-fixint: Sorting %u items (%s items, %s workspace)",
- n,
- stk_fsize(n * sizeof(P(key))),
- stk_fsize(n * P(internal_workspace)()));
- timestamp_t timer;
- init_timer(&timer);
- buf = P(array_sort)(buf, n
-#ifdef SORT_INTERNAL_RADIX
- , workspace, bin->hash_bits
-#endif
- );
- if ((void *)buf != ctx->big_buf)
- workspace = ctx->big_buf;
- ctx->total_int_time += get_timer(&timer);
-
- SORT_XTRACE(5, "s-fixint: Writing");
- if (n < maxkeys)
- bout = bout_only;
- struct fastbuf *out = sbuck_write(bout);
- bout->runs++;
- uns merged UNUSED = 0;
- for (uns i=0; i<n; i++)
- {
-#ifdef SORT_UNIFY
- if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
- {
- P(key) **keys = workspace;
- uns n = 2;
- keys[0] = &buf[i];
- keys[1] = &buf[i+1];
- while (!P(compare)(&buf[i], &buf[i+n]))
- {
- keys[n] = &buf[i+n];
- n++;
- }
- P(write_merged)(out, keys, NULL, n, NULL);
- merged += n - 1;
- i += n - 1;
- continue;
- }
-#endif
-#ifdef SORT_ASSERT_UNIQUE
- ASSERT(i == n-1 || P(compare)(&buf[i], &buf[i+1]) < 0);
-#endif
- P(write_key)(out, &buf[i]);
- }
-#ifdef SORT_UNIFY
- SORT_XTRACE(4, "Merging reduced %u records", merged);
-#endif
-
- return (n == maxkeys);
-}
-
-static u64
-P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
-{
- return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1; // -1 since if the buffer is full, we don't recognize EOF
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Internal Sorting Module
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/stkstring.h"
-
-#ifdef SORT_INTERNAL_RADIX
-/* Keep copies of the items' hashes to save cache misses */
-#define SORT_COPY_HASH
-#endif
-
-typedef struct {
- P(key) *key;
-#ifdef SORT_COPY_HASH
- P(hash_t) hash;
-#endif
-} P(internal_item_t);
-
-#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
-#define ASORT_KEY_TYPE P(internal_item_t)
-#ifdef SORT_COPY_HASH
-# ifdef SORT_INT
-# define ASORT_LT(x,y) ((x).hash < (y).hash) // In this mode, the hash is the value
-# else
-# define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
-# endif
-#else
-# define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
-#endif
-#ifdef SORT_INTERNAL_RADIX
-# ifdef SORT_COPY_HASH
-# define ASORT_HASH(x) (x).hash
-# else
-# define ASORT_HASH(x) P(hash)((x).key)
-# endif
-# ifdef SORT_LONG_HASH
-# define ASORT_LONG_HASH
-# endif
-#endif
-#include "lib/sorter/array.h"
-
-/*
- * The big_buf has the following layout:
- *
- * +-------------------------------------------------------------------------------+
- * | array of internal_item's |
- * +-------------------------------------------------------------------------------+
- * | padding to make the following part page-aligned |
- * +--------------------------------+----------------------------------------------+
- * | shadow copy of item array | array of pointers to data for write_merged() |
- * | used if radix-sorting +----------------------------------------------+
- * | | workspace for write_merged() |
- * +--------------------------------+----------------------------------------------+
- * | +---------+ |
- * | | key | |
- * | +---------+ |
- * | sequence of | padding | |
- * | items +---------+ |
- * | | data | |
- * | +---------+ |
- * | | padding | |
- * | +---------+ |
- * +-------------------------------------------------------------------------------+
- *
- * (the data which are in different columns are never accessed simultaneously,
- * so we use a single buffer for both)
- */
-
-static inline void *P(internal_get_data)(P(key) *key)
-{
- uns ksize = SORT_KEY_SIZE(*key);
-#ifdef SORT_UNIFY
- ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
-#endif
- return (byte *) key + ksize;
-}
-
-static inline size_t P(internal_workspace)(P(key) *key UNUSED)
-{
- size_t ws = 0;
-#ifdef SORT_UNIFY
- ws += sizeof(void *);
-#endif
-#ifdef SORT_UNIFY_WORKSPACE
- ws += SORT_UNIFY_WORKSPACE(*key);
-#endif
-#ifdef SORT_INTERNAL_RADIX
- ws = MAX(ws, sizeof(P(internal_item_t)));
-#endif
- return ws;
-}
-
-static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
-{
- sorter_alloc_buf(ctx);
- struct fastbuf *in = sbuck_read(bin);
-
- P(key) key, *keybuf = ctx->key_buf;
- if (!keybuf)
- keybuf = ctx->key_buf = sorter_alloc(ctx, sizeof(key));
- if (ctx->more_keys)
- {
- key = *keybuf;
- ctx->more_keys = 0;
- }
- else if (!P(read_key)(in, &key))
- return 0;
-
- size_t bufsize = ctx->big_buf_size;
-#ifdef SORT_VAR_DATA
- if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
- {
- SORT_XTRACE(4, "s-internal: Generating a giant run");
- struct fastbuf *out = sbuck_write(bout);
- P(copy_data)(&key, in, out);
- bout->runs++;
- return 1; // We don't know, but 1 is always safe
- }
-#endif
-
- SORT_XTRACE(5, "s-internal: Reading");
- P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
- byte *end = (byte *) ctx->big_buf + bufsize;
- size_t remains = bufsize - CPU_PAGE_SIZE;
- do
- {
- uns ksize = SORT_KEY_SIZE(key);
-#ifdef SORT_UNIFY
- uns ksize_aligned = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
-#else
- uns ksize_aligned = ksize;
-#endif
- uns dsize = SORT_DATA_SIZE(key);
- uns recsize = ALIGN_TO(ksize_aligned + dsize, CPU_STRUCT_ALIGN);
- size_t totalsize = recsize + sizeof(P(internal_item_t)) + P(internal_workspace)(&key);
- if (unlikely(totalsize > remains
-#ifdef CPU_64BIT_POINTERS
- || item >= item_array + ~0U // The number of items must fit in an uns
-#endif
- ))
- {
- ctx->more_keys = 1;
- *keybuf = key;
- break;
- }
- remains -= totalsize;
- end -= recsize;
- memcpy(end, &key, ksize);
-#ifdef SORT_VAR_DATA
- breadb(in, end + ksize_aligned, dsize);
-#endif
- item->key = (P(key)*) end;
-#ifdef SORT_COPY_HASH
- item->hash = P(hash)(item->key);
-#endif
- item++;
- }
- while (P(read_key)(in, &key));
- last_item = item;
-
- uns count = last_item - item_array;
- void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
- SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
- count,
- stk_fsize((byte*)last_item - (byte*)item_array),
- stk_fsize(end - (byte*)last_item - remains),
- stk_fsize((byte*)ctx->big_buf + bufsize - end));
- timestamp_t timer;
- init_timer(&timer);
- item_array = P(array_sort)(item_array, count
-#ifdef SORT_INTERNAL_RADIX
- , workspace, bin->hash_bits
-#endif
- );
- if ((void *)item_array != ctx->big_buf)
- workspace = ctx->big_buf;
- last_item = item_array + count;
- ctx->total_int_time += get_timer(&timer);
-
- SORT_XTRACE(5, "s-internal: Writing");
- if (!ctx->more_keys)
- bout = bout_only;
- struct fastbuf *out = sbuck_write(bout);
- bout->runs++;
- uns merged UNUSED = 0;
- for (item = item_array; item < last_item; item++)
- {
-#ifdef SORT_UNIFY
- if (item < last_item - 1 && !P(compare)(item->key, item[1].key))
- {
- // Rewrite the item structures with just pointers to keys and place
- // pointers to data in the workspace.
- P(key) **key_array = (void *) item;
- void **data_array = workspace;
- key_array[0] = item[0].key;
- data_array[0] = P(internal_get_data)(key_array[0]);
- uns cnt;
- for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++)
- {
- key_array[cnt] = item[cnt].key;
- data_array[cnt] = P(internal_get_data)(key_array[cnt]);
- }
- P(write_merged)(out, key_array, data_array, cnt, data_array+cnt);
- item += cnt - 1;
- merged += cnt - 1;
- continue;
- }
-#endif
-#ifdef SORT_ASSERT_UNIQUE
- ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0);
-#endif
- P(write_key)(out, item->key);
-#ifdef SORT_VAR_DATA
- bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key));
-#endif
- }
-#ifdef SORT_UNIFY
- SORT_XTRACE(4, "Merging reduced %u records", merged);
-#endif
-
- return ctx->more_keys;
-}
-
-static u64
-P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
-{
- // Most of this is just wild guesses
-#ifdef SORT_VAR_KEY
- uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
-#else
- uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
-#endif
- uns ws = 0;
-#ifdef SORT_UNIFY
- ws += sizeof(void *);
-#endif
-#ifdef SORT_UNIFY_WORKSPACE
- ws += avg;
-#endif
-#ifdef SORT_INTERNAL_RADIX
- ws = MAX(ws, sizeof(P(internal_item_t)));
-#endif
- // We ignore the data part of records, it probably won't make the estimate much worse
- return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
-}
-
-#undef SORT_COPY_HASH
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Multi-Way Merge Module
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * We use a binary tree to keep track of the current minimum. The tree is
- * represented by an array (in the same way as binary heaps usually are),
- * leaves correspond to input streams and each internal vertex remembers
- * the leaf in its subtree, which has the lowest key.
- */
-
-typedef struct P(mwt) {
- int i; // Minimum of the subtree
-#ifdef SORT_UNIFY
- int eq; // Did we encounter equality anywhere in the subtree?
-#endif
-} P(mwt);
-
-static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
-{
- while (i /= 2)
- {
- if (tree[2*i].i < 0)
- tree[i] = tree[2*i+1];
- else if (tree[2*i+1].i < 0)
- tree[i] = tree[2*i];
- else
- {
- int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
- tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
-#ifdef SORT_UNIFY
- if (!cmp)
- tree[i].eq = 1;
-#endif
- }
- /*
- * It is very tempting to stop as soon as the current node does not
- * change, but it is wrong, because even if the stream index stored in
- * the tree is the same, the actual key value can differ.
- */
- }
- /*
- * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
- * leading to an assumption that tree[1] does not change during this function.
- * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
- */
- asm volatile ("" : : : "memory");
-}
-
-static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
-{
- tree[i].i = val;
- P(update_tree)(keys, tree, i);
-}
-
-static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
-{
- uns num_ins = 0;
- while (ins[num_ins])
- num_ins++;
-
- uns n2 = 1;
- while (n2 < num_ins)
- n2 *= 2;
-
- struct fastbuf *fout = sbuck_write(out);
- struct fastbuf *fins[num_ins];
- P(key) keys[num_ins];
- P(mwt) tree[2*n2];
- for (uns i=1; i<2*n2; i++)
- tree[i] = (P(mwt)) { .i = -1 };
-
- for (uns i=0; i<num_ins; i++)
- {
- fins[i] = sbuck_read(ins[i]);
- if (P(read_key)(fins[i], &keys[i]))
- P(set_tree)(keys, tree, n2+i, i);
- }
-
-#ifdef SORT_UNIFY
-
- uns hits[num_ins];
- P(key) *mkeys[num_ins], *key;
- struct fastbuf *mfb[num_ins];
-
- while (likely(tree[1].i >= 0))
- {
- int i = tree[1].i;
- if (!tree[1].eq)
- {
- /* The key is unique, so let's go through the fast path */
- P(copy_data)(&keys[i], fins[i], fout);
- if (unlikely(!P(read_key)(fins[i], &keys[i])))
- tree[n2+i].i = -1;
- P(update_tree)(keys, tree, n2+i);
- continue;
- }
-
- uns m = 0;
- key = &keys[i];
- do
- {
- hits[m] = i;
- mkeys[m] = &keys[i];
- mfb[m] = fins[i];
- m++;
- P(set_tree)(keys, tree, n2+i, -1);
- i = tree[1].i;
- if (unlikely(i < 0))
- break;
- }
- while (!P(compare)(key, &keys[i]));
-
- P(copy_merged)(mkeys, mfb, m, fout);
-
- for (uns j=0; j<m; j++)
- {
- i = hits[j];
- if (likely(P(read_key)(fins[i], &keys[i])))
- P(set_tree)(keys, tree, n2+i, i);
- }
- }
-
-#else
-
- /* Simplified version which does not support any unification */
- while (likely(tree[1].i >= 0))
- {
- uns i = tree[1].i;
- P(key) UNUSED key = keys[i];
- P(copy_data)(&keys[i], fins[i], fout);
- if (unlikely(!P(read_key)(fins[i], &keys[i])))
- tree[n2+i].i = -1;
- P(update_tree)(keys, tree, n2+i);
-#ifdef SORT_ASSERT_UNIQUE
- ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);
-#endif
- }
-
-#endif
-
- out->runs++;
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Radix-Split Module
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include <string.h>
-
-static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
-{
- uns nbucks = 1 << numbits;
- uns mask = nbucks - 1;
- struct fastbuf *in = sbuck_read(bin);
- P(key) k;
-
- struct fastbuf *outs[nbucks];
- bzero(outs, sizeof(outs));
-
- while (P(read_key)(in, &k))
- {
- P(hash_t) h = P(hash)(&k);
- uns i = (h >> bitpos) & mask;
- if (unlikely(!outs[i]))
- outs[i] = sbuck_write(bouts[i]);
- P(copy_data)(&k, in, outs[i]);
- }
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Two-Way Merge Module
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
-{
- struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
- P(key) kbuf1, kbuf2, kbuf3, kbuf4;
- P(key) *kin1 = &kbuf1, *kprev1 = &kbuf2, *kin2 = &kbuf3, *kprev2 = &kbuf4;
- P(key) *kout = NULL, *ktmp;
- int next1, next2, run1, run2;
- int comp;
- uns run_count = 0;
-
- fin1 = sbuck_read(ins[0]);
- next1 = P(read_key)(fin1, kin1);
- if (sbuck_have(ins[1]))
- {
- fin2 = sbuck_read(ins[1]);
- next2 = P(read_key)(fin2, kin2);
- }
- else
- {
- fin2 = NULL;
- next2 = 0;
- }
- fout1 = fout2 = NULL;
-
- run1 = next1, run2 = next2;
- while (next1 || next2)
- {
- if (!run1)
- comp = 1;
- else if (!run2)
- comp = -1;
- else
- comp = P(compare)(kin1, kin2);
- ktmp = (comp <= 0) ? kin1 : kin2;
- if (!kout || !(P(compare)(kout, ktmp) LESS 0))
- {
- SWAP(fout1, fout2, ftmp);
- if (unlikely(!fout1))
- {
- if (!fout2)
- fout1 = sbuck_write(outs[0]);
- else if (outs[1])
- fout1 = sbuck_write(outs[1]);
- else
- fout1 = fout2;
- }
- run_count++;
- }
-#ifdef SORT_ASSERT_UNIQUE
- ASSERT(comp != 0);
-#endif
- if (comp LESS 0)
- {
- P(copy_data)(kin1, fin1, fout1);
- SWAP(kin1, kprev1, ktmp);
- next1 = P(read_key)(fin1, kin1);
- run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
- kout = kprev1;
- }
-#ifdef SORT_UNIFY
- else if (comp == 0)
- {
- P(key) *mkeys[] = { kin1, kin2 };
- struct fastbuf *mfb[] = { fin1, fin2 };
- P(copy_merged)(mkeys, mfb, 2, fout1);
- SWAP(kin1, kprev1, ktmp);
- next1 = P(read_key)(fin1, kin1);
- run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
- SWAP(kin2, kprev2, ktmp);
- next2 = P(read_key)(fin2, kin2);
- run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
- kout = kprev2;
- }
-#endif
- else
- {
- P(copy_data)(kin2, fin2, fout1);
- SWAP(kin2, kprev2, ktmp);
- next2 = P(read_key)(fin2, kin2);
- run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
- kout = kprev2;
- }
- if (!run1 && !run2)
- {
- run1 = next1;
- run2 = next2;
- }
- }
-
- if (fout2 && fout2 != fout1)
- outs[1]->runs += run_count / 2;
- if (fout1)
- outs[0]->runs += (run_count+1) / 2;
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Operations on Contexts, Buffers and Buckets
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/fastbuf.h"
-#include "lib/mempool.h"
-#include "lib/stkstring.h"
-#include "lib/sorter/common.h"
-
-#include <fcntl.h>
-
-void *
-sorter_alloc(struct sort_context *ctx, uns size)
-{
- return mp_alloc_zero(ctx->pool, size);
-}
-
-struct sort_bucket *
-sbuck_new(struct sort_context *ctx)
-{
- struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
- b->ctx = ctx;
- return b;
-}
-
-void
-sbuck_drop(struct sort_bucket *b)
-{
- if (b)
- {
- ASSERT(!(b->flags & SBF_DESTROYED));
- if (b->n.prev)
- clist_remove(&b->n);
- bclose(b->fb);
- bzero(b, sizeof(*b));
- b->flags = SBF_DESTROYED;
- }
-}
-
-sh_off_t
-sbuck_size(struct sort_bucket *b)
-{
- if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
- return btell(b->fb);
- else
- return b->size;
-}
-
-int
-sbuck_have(struct sort_bucket *b)
-{
- return b && sbuck_size(b);
-}
-
-int
-sbuck_has_file(struct sort_bucket *b)
-{
- return (b->fb || (b->flags & SBF_SWAPPED_OUT));
-}
-
-static void
-sbuck_swap_in(struct sort_bucket *b)
-{
- if (b->flags & SBF_SWAPPED_OUT)
- {
- b->fb = bopen_file(b->filename, O_RDWR, b->ctx->fb_params);
- if (b->flags & SBF_OPEN_WRITE)
- bseek(b->fb, 0, SEEK_END);
- if (!(sorter_debug & SORT_DEBUG_KEEP_BUCKETS))
- bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
- b->flags &= ~SBF_SWAPPED_OUT;
- SORT_XTRACE(3, "Swapped in %s", b->filename);
- }
-}
-
-struct fastbuf *
-sbuck_read(struct sort_bucket *b)
-{
- sbuck_swap_in(b);
- if (b->flags & SBF_OPEN_READ)
- return b->fb;
- else if (b->flags & SBF_OPEN_WRITE)
- {
- b->size = btell(b->fb);
- b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
- brewind(b->fb);
- return b->fb;
- }
- else
- ASSERT(0);
-}
-
-struct fastbuf *
-sbuck_write(struct sort_bucket *b)
-{
- sbuck_swap_in(b);
- if (b->flags & SBF_OPEN_WRITE)
- ASSERT(b->fb);
- else
- {
- ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
- b->fb = bopen_tmp_file(b->ctx->fb_params);
- if (sorter_debug & SORT_DEBUG_KEEP_BUCKETS)
- bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
- b->flags |= SBF_OPEN_WRITE;
- b->filename = mp_strdup(b->ctx->pool, b->fb->name);
- }
- return b->fb;
-}
-
-void
-sbuck_swap_out(struct sort_bucket *b)
-{
- if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb && !(b->flags & SBF_SOURCE))
- {
- if (b->flags & SBF_OPEN_WRITE)
- b->size = btell(b->fb);
- bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
- bclose(b->fb);
- b->fb = NULL;
- b->flags |= SBF_SWAPPED_OUT;
- SORT_XTRACE(3, "Swapped out %s", b->filename);
- }
-}
-
-void
-sorter_prepare_buf(struct sort_context *ctx)
-{
- u64 bs = sorter_bufsize;
- bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
- bs = MAX(bs, 2*(u64)CPU_PAGE_SIZE);
- ctx->big_buf_size = bs;
-}
-
-void
-sorter_alloc_buf(struct sort_context *ctx)
-{
- if (ctx->big_buf)
- return;
- ctx->big_buf = big_alloc(ctx->big_buf_size);
- SORT_XTRACE(3, "Allocated sorting buffer (%s)", stk_fsize(ctx->big_buf_size));
-}
-
-void
-sorter_free_buf(struct sort_context *ctx)
-{
- if (!ctx->big_buf)
- return;
- big_free(ctx->big_buf, ctx->big_buf_size);
- ctx->big_buf = NULL;
- SORT_XTRACE(3, "Freed sorting buffer");
-}
+++ /dev/null
-/*
- * UCW Library -- Testing the Sorter
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/getopt.h"
-#include "lib/conf.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-binary.h"
-#include "lib/hashfunc.h"
-#include "lib/md5.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-/*** A hack for overriding radix-sorter configuration ***/
-
-#ifdef FORCE_RADIX_BITS
-#undef CONFIG_UCW_RADIX_SORTER_BITS
-#define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
-#endif
-
-/*** Time measurement ***/
-
-static timestamp_t timer;
-static uns test_id;
-
-static void
-start(void)
-{
- sync();
- init_timer(&timer);
-}
-
-static void
-stop(void)
-{
- sync();
- msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
-}
-
-/*** Simple 4-byte integer keys ***/
-
-struct key1 {
- u32 x;
-};
-
-#define SORT_KEY_REGULAR struct key1
-#define SORT_PREFIX(x) s1_##x
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-#define SORT_UNIQUE
-#define SORT_INT(k) (k).x
-#define SORT_DELETE_INPUT 0
-
-#include "lib/sorter/sorter.h"
-
-static void
-test_int(int mode, u64 size)
-{
- uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
- uns K = N/4*3;
- msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
-
- struct fastbuf *f = bopen_tmp(65536);
- for (uns i=0; i<N; i++)
- bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
- brewind(f);
-
- start();
- f = s1_sort(f, NULL, N-1);
- stop();
-
- SORT_XTRACE(2, "Verifying");
- for (uns i=0; i<N; i++)
- {
- uns j = bgetl(f);
- if (i != j)
- die("Discrepancy: %u instead of %u", j, i);
- }
- bclose(f);
-}
-
-/*** Integers with merging, but no data ***/
-
-struct key2 {
- u32 x;
- u32 cnt;
-};
-
-static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
-{
- for (uns i=1; i<n; i++)
- k[0]->cnt += k[i]->cnt;
- bwrite(f, k[0], sizeof(struct key2));
-}
-
-#define SORT_KEY_REGULAR struct key2
-#define SORT_PREFIX(x) s2_##x
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-#define SORT_UNIFY
-#define SORT_INT(k) (k).x
-
-#include "lib/sorter/sorter.h"
-
-static void
-test_counted(int mode, u64 size)
-{
- u64 items = size / sizeof(struct key2);
- uns mult = 2;
- while (items/(2*mult) > 0xffff0000)
- mult++;
- uns N = items ? nextprime(items/(2*mult)) : 0;
- uns K = N/4*3;
- msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
-
- struct fastbuf *f = bopen_tmp(65536);
- for (uns m=0; m<mult; m++)
- for (uns i=0; i<N; i++)
- for (uns j=0; j<2; j++)
- {
- bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
- bputl(f, 1);
- }
- brewind(f);
-
- start();
- f = s2_sort(f, NULL, N-1);
- stop();
-
- SORT_XTRACE(2, "Verifying");
- for (uns i=0; i<N; i++)
- {
- uns j = bgetl(f);
- if (i != j)
- die("Discrepancy: %u instead of %u", j, i);
- uns k = bgetl(f);
- if (k != 2*mult)
- die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
- }
- bclose(f);
-}
-
-/*** Longer records with hashes (similar to Shepherd's index records) ***/
-
-struct key3 {
- u32 hash[4];
- u32 i;
- u32 payload[3];
-};
-
-static inline int s3_compare(struct key3 *x, struct key3 *y)
-{
- COMPARE(x->hash[0], y->hash[0]);
- COMPARE(x->hash[1], y->hash[1]);
- COMPARE(x->hash[2], y->hash[2]);
- COMPARE(x->hash[3], y->hash[3]);
- return 0;
-}
-
-static inline uns s3_hash(struct key3 *x)
-{
- return x->hash[0];
-}
-
-#define SORT_KEY_REGULAR struct key3
-#define SORT_PREFIX(x) s3_##x
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-#define SORT_HASH_BITS 32
-
-#include "lib/sorter/sorter.h"
-
-static void
-gen_hash_key(int mode, struct key3 *k, uns i)
-{
- k->i = i;
- k->payload[0] = 7*i + 13;
- k->payload[1] = 13*i + 19;
- k->payload[2] = 19*i + 7;
- switch (mode)
- {
- case 0:
- k->hash[0] = i;
- k->hash[1] = k->payload[0];
- k->hash[2] = k->payload[1];
- k->hash[3] = k->payload[2];
- break;
- case 1:
- k->hash[0] = ~i;
- k->hash[1] = k->payload[0];
- k->hash[2] = k->payload[1];
- k->hash[3] = k->payload[2];
- break;
- default: ;
- struct MD5Context ctx;
- MD5Init(&ctx);
- MD5Update(&ctx, (byte*) &k->i, 4);
- MD5Final((byte*) &k->hash, &ctx);
- break;
- }
-}
-
-static void
-test_hashes(int mode, u64 size)
-{
- uns N = MIN(size / sizeof(struct key3), 0xffffffff);
- msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
- struct key3 k, lastk;
-
- struct fastbuf *f = bopen_tmp(65536);
- uns hash_sum = 0;
- for (uns i=0; i<N; i++)
- {
- gen_hash_key(mode, &k, i);
- hash_sum += k.hash[3];
- bwrite(f, &k, sizeof(k));
- }
- brewind(f);
-
- start();
- f = s3_sort(f, NULL);
- stop();
-
- SORT_XTRACE(2, "Verifying");
- for (uns i=0; i<N; i++)
- {
- int ok = breadb(f, &k, sizeof(k));
- ASSERT(ok);
- if (i && s3_compare(&k, &lastk) <= 0)
- ASSERT(0);
- gen_hash_key(mode, &lastk, k.i);
- if (memcmp(&k, &lastk, sizeof(k)))
- ASSERT(0);
- hash_sum -= k.hash[3];
- }
- ASSERT(!hash_sum);
- bclose(f);
-}
-
-/*** Variable-length records (strings) with and without var-length data ***/
-
-#define KEY4_MAX 256
-
-struct key4 {
- uns len;
- byte s[KEY4_MAX];
-};
-
-static inline int s4_compare(struct key4 *x, struct key4 *y)
-{
- uns l = MIN(x->len, y->len);
- int c = memcmp(x->s, y->s, l);
- if (c)
- return c;
- COMPARE(x->len, y->len);
- return 0;
-}
-
-static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
-{
- x->len = bgetl(f);
- if (x->len == 0xffffffff)
- return 0;
- ASSERT(x->len < KEY4_MAX);
- breadb(f, x->s, x->len);
- return 1;
-}
-
-static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
-{
- ASSERT(x->len < KEY4_MAX);
- bputl(f, x->len);
- bwrite(f, x->s, x->len);
-}
-
-#define SORT_KEY struct key4
-#define SORT_PREFIX(x) s4_##x
-#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-
-#include "lib/sorter/sorter.h"
-
-#define s4b_compare s4_compare
-#define s4b_read_key s4_read_key
-#define s4b_write_key s4_write_key
-
-static inline uns s4_data_size(struct key4 *x)
-{
- return x->len ? (x->s[0] ^ 0xad) : 0;
-}
-
-#define SORT_KEY struct key4
-#define SORT_PREFIX(x) s4b_##x
-#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
-#define SORT_DATA_SIZE(x) s4_data_size(&(x))
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-
-#include "lib/sorter/sorter.h"
-
-static void
-gen_key4(struct key4 *k)
-{
- k->len = random_max(KEY4_MAX);
- for (uns i=0; i<k->len; i++)
- k->s[i] = random();
-}
-
-static void
-gen_data4(byte *buf, uns len, uns h)
-{
- while (len--)
- {
- *buf++ = h >> 24;
- h = h*259309 + 17;
- }
-}
-
-static void
-test_strings(uns mode, u64 size)
-{
- uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
- uns N = MIN(size / avg_item_size, 0xffffffff);
- msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
- srand(1);
-
- struct key4 k, lastk;
- byte buf[256], buf2[256];
- uns sum = 0;
-
- struct fastbuf *f = bopen_tmp(65536);
- for (uns i=0; i<N; i++)
- {
- gen_key4(&k);
- s4_write_key(f, &k);
- uns h = hash_block(k.s, k.len);
- sum += h;
- if (mode)
- {
- gen_data4(buf, s4_data_size(&k), h);
- bwrite(f, buf, s4_data_size(&k));
- }
- }
- brewind(f);
-
- start();
- f = (mode ? s4b_sort : s4_sort)(f, NULL);
- stop();
-
- SORT_XTRACE(2, "Verifying");
- for (uns i=0; i<N; i++)
- {
- int ok = s4_read_key(f, &k);
- ASSERT(ok);
- uns h = hash_block(k.s, k.len);
- if (mode && s4_data_size(&k))
- {
- ok = breadb(f, buf, s4_data_size(&k));
- ASSERT(ok);
- gen_data4(buf2, s4_data_size(&k), h);
- ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
- }
- if (i && s4_compare(&k, &lastk) < 0)
- ASSERT(0);
- sum -= h;
- lastk = k;
- }
- ASSERT(!sum);
- bclose(f);
-}
-
-/*** Graph-like structure with custom presorting ***/
-
-struct key5 {
- u32 x;
- u32 cnt;
-};
-
-static uns s5_N, s5_K, s5_L, s5_i, s5_j;
-
-struct s5_pair {
- uns x, y;
-};
-
-static int s5_gen(struct s5_pair *p)
-{
- if (s5_j >= s5_N)
- {
- if (!s5_N || s5_i >= s5_N-1)
- return 0;
- s5_j = 0;
- s5_i++;
- }
- p->x = ((u64)s5_j * s5_K) % s5_N;
- p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
- s5_j++;
- return 1;
-}
-
-#define ASORT_PREFIX(x) s5m_##x
-#define ASORT_KEY_TYPE u32
-#define ASORT_ELT(i) ary[i]
-#define ASORT_EXTRA_ARGS , u32 *ary
-#include "lib/arraysort.h"
-
-static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
-{
- u32 *a = buf;
- uns m = 0;
- for (uns i=0; i<n; i++)
- {
- memcpy(&a[m], data[i], 4*keys[i]->cnt);
- m += keys[i]->cnt;
- }
- s5m_sort(m, a);
- keys[0]->cnt = m;
- bwrite(f, keys[0], sizeof(struct key5));
- bwrite(f, a, 4*m);
-}
-
-static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
-{
- u32 k[n];
- uns m = 0;
- for (uns i=0; i<n; i++)
- {
- k[i] = bgetl(data[i]);
- m += keys[i]->cnt;
- }
- struct key5 key = { .x = keys[0]->x, .cnt = m };
- bwrite(dest, &key, sizeof(key));
- while (key.cnt--)
- {
- uns b = 0;
- for (uns i=1; i<n; i++)
- if (k[i] < k[b])
- b = i;
- bputl(dest, k[b]);
- if (--keys[b]->cnt)
- k[b] = bgetl(data[b]);
- else
- k[b] = ~0U;
- }
-}
-
-static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
-{
- COMPARE_LT(x.x, y.x);
- COMPARE_LT(x.y, y.y);
- return 0;
-}
-
-#define ASORT_PREFIX(x) s5p_##x
-#define ASORT_KEY_TYPE struct s5_pair
-#define ASORT_LT(x,y) s5p_lt(x,y)
-#include "lib/sorter/array.h"
-
-static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
-{
- uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
- struct s5_pair *a = buf;
- uns n = 0;
- while (n<max && s5_gen(&a[n]))
- n++;
- if (!n)
- return 0;
- s5p_sort(a, n);
- uns i = 0;
- while (i < n)
- {
- uns j = i;
- while (i < n && a[i].x == a[j].x)
- i++;
- struct key5 k = { .x = a[j].x, .cnt = i-j };
- bwrite(dest, &k, sizeof(k));
- while (j < i)
- bputl(dest, a[j++].y);
- }
- return 1;
-}
-
-#define SORT_KEY_REGULAR struct key5
-#define SORT_PREFIX(x) s5_##x
-#define SORT_DATA_SIZE(k) (4*(k).cnt)
-#define SORT_UNIFY
-#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
-#define SORT_INPUT_PRESORT
-#define SORT_OUTPUT_THIS_FB
-#define SORT_INT(k) (k).x
-
-#include "lib/sorter/sorter.h"
-
-#define SORT_KEY_REGULAR struct key5
-#define SORT_PREFIX(x) s5b_##x
-#define SORT_DATA_SIZE(k) (4*(k).cnt)
-#define SORT_UNIFY
-#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_THIS_FB
-#define SORT_INT(k) (k).x
-#define s5b_write_merged s5_write_merged
-#define s5b_copy_merged s5_copy_merged
-
-#include "lib/sorter/sorter.h"
-
-static void
-test_graph(uns mode, u64 size)
-{
- uns N = 3;
- while ((u64)N*(N+2)*4 < size)
- N = nextprime(N);
- if (!size)
- N = 0;
- msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
- s5_N = N;
- s5_K = N/4*3;
- s5_L = N/3*2;
- s5_i = s5_j = 0;
-
- struct fastbuf *in = NULL;
- if (mode)
- {
- struct s5_pair p;
- in = bopen_tmp(65536);
- while (s5_gen(&p))
- {
- struct key5 k = { .x = p.x, .cnt = 1 };
- bwrite(in, &k, sizeof(k));
- bputl(in, p.y);
- }
- brewind(in);
- }
-
- start();
- struct fastbuf *f = bopen_tmp(65536);
- bputl(f, 0xfeedcafe);
- struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
- ASSERT(f == g);
- stop();
-
- SORT_XTRACE(2, "Verifying");
- uns c = bgetl(f);
- ASSERT(c == 0xfeedcafe);
- for (uns i=0; i<N; i++)
- {
- struct key5 k;
- int ok = breadb(f, &k, sizeof(k));
- ASSERT(ok);
- ASSERT(k.x == i);
- ASSERT(k.cnt == N);
- for (uns j=0; j<N; j++)
- {
- uns y = bgetl(f);
- ASSERT(y == j);
- }
- }
- bclose(f);
-}
-
-/*** Simple 8-byte integer keys ***/
-
-struct key6 {
- u64 x;
-};
-
-#define SORT_KEY_REGULAR struct key6
-#define SORT_PREFIX(x) s6_##x
-#define SORT_INPUT_FB
-#define SORT_OUTPUT_FB
-#define SORT_UNIQUE
-#define SORT_INT64(k) (k).x
-
-#include "lib/sorter/sorter.h"
-
-static void
-test_int64(int mode, u64 size)
-{
- u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
- u64 K = N/4*3;
- msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
-
- struct fastbuf *f = bopen_tmp(65536);
- for (u64 i=0; i<N; i++)
- bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
- brewind(f);
-
- start();
- f = s6_sort(f, NULL, 777777*(N-1));
- stop();
-
- SORT_XTRACE(2, "Verifying");
- for (u64 i=0; i<N; i++)
- {
- u64 j = bgetq(f);
- if (777777*i != j)
- die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
- }
- bclose(f);
-}
-
-/*** Main ***/
-
-static void
-run_test(uns i, u64 size)
-{
- test_id = i;
- switch (i)
- {
- case 0:
- test_int(0, size); break;
- case 1:
- test_int(1, size); break;
- case 2:
- test_int(2, size); break;
- case 3:
- test_counted(0, size); break;
- case 4:
- test_counted(1, size); break;
- case 5:
- test_counted(2, size); break;
- case 6:
- test_hashes(0, size); break;
- case 7:
- test_hashes(1, size); break;
- case 8:
- test_hashes(2, size); break;
- case 9:
- test_strings(0, size); break;
- case 10:
- test_strings(1, size); break;
- case 11:
- test_graph(0, size); break;
- case 12:
- test_graph(1, size); break;
- case 13:
- test_int64(0, size); break;
- case 14:
- test_int64(1, size); break;
- case 15:
- test_int64(2, size); break;
-#define TMAX 16
- }
-}
-
-int
-main(int argc, char **argv)
-{
- log_init(NULL);
- int c;
- u64 size = 10000000;
- uns t = ~0;
-
- while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
- switch (c)
- {
- case 'd':
- sorter_debug = atol(optarg);
- break;
- case 's':
- if (cf_parse_u64(optarg, &size))
- goto usage;
- break;
- case 't':
- {
- char *w[32];
- int f = sepsplit(optarg, ',', w, ARRAY_SIZE(w));
- if (f < 0)
- goto usage;
- t = 0;
- for (int i=0; i<f; i++)
- {
- int j = atol(w[i]);
- if (j >= TMAX)
- goto usage;
- t |= 1 << j;
- }
- }
- break;
- case 'v':
- sorter_trace++;
- break;
- default:
- usage:
- fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
- exit(1);
- }
- if (optind != argc)
- goto usage;
-
- for (uns i=0; i<TMAX; i++)
- if (t & (1 << i))
- run_test(i, size);
-
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter
- *
- * (c) 2001--2007 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is not a normal header file, but a generator of sorting
- * routines. Each time you include it with parameters set in the
- * corresponding preprocessor macros, it generates a file sorter
- * with the parameters given.
- *
- * The sorter operates on fastbufs containing sequences of items. Each item
- * consists of a key, optionally followed by data. The keys are represented
- * by fixed-size structures of type SORT_KEY internally, if this format differs
- * from the on-disk format, explicit reading and writing routines can be provided.
- * The data are always copied verbatim, unless the sorter is in the merging
- * mode in which it calls callbacks for merging of items with equal keys.
- *
- * All callbacks must be thread-safe.
- *
- * Basic parameters and callbacks:
- *
- * SORT_PREFIX(x) add a name prefix (used on all global names defined by the sorter)
- *
- * SORT_KEY data type capable of holding a single key in memory (the on-disk
- * representation can be different). Alternatively, you can use:
- * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
- * in this case, bread() and bwrite() is used to read/write keys
- * and it's also assumed that the keys are not very long.
- * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
- * compares two keys, returns result like strcmp(). Mandatory.
- * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
- * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
- * Mandatory unless SORT_KEY_REGULAR is defined.
- * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
- * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
- *
- * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
- * can be truncated to this number of bytes without any harm;
- * used to save memory when the keys have variable sizes).
- * Default: always store the whole SORT_KEY.
- * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
- * Default: records consist of keys only.
- *
- * Integer sorting:
- *
- * SORT_INT(key) we are sorting by an integer value returned by this macro.
- * In this mode, PREFIX_compare is supplied automatically and the sorting
- * function gets an extra parameter specifying the range of the integers.
- * The better the range fits, the faster we sort.
- * Sets up SORT_HASH_xxx automatically.
- * SORT_INT64(key) the same for 64-bit integers.
- *
- * Hashing (optional, but it can speed sorting up):
- *
- * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
- * bits is available. A monotone hash is a function f from keys to integers
- * such that f(x) < f(y) implies x < y, which is approximately uniformly
- * distributed. It should be declared as:
- * uns PREFIX_hash(SORT_KEY *a)
- *
- * Unification:
- *
- * SORT_UNIFY merge items with identical keys. It requires the following functions:
- * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
- * takes n records in memory with keys which compare equal and writes
- * a single record to the given fastbuf. `buf' points to a buffer which
- * is guaranteed to hold the sum of workspace requirements (see below)
- * over all given records. The function is allowed to modify all its inputs.
- * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
- * takes n records with keys in memory and data in fastbufs and writes
- * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
- * is defined.
- * SORT_UNIFY_WORKSPACE(key)
- * gets a key and returns the amount of workspace required when merging
- * the given record. Defaults to 0.
- *
- * Input (choose one of these):
- *
- * SORT_INPUT_FILE file of a given name
- * SORT_INPUT_FB seekable fastbuf stream
- * SORT_INPUT_PIPE non-seekable fastbuf stream
- * SORT_INPUT_PRESORT custom presorter. Calls function
- * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
- * to get successive batches of pre-sorted data.
- * The function is passed a page-aligned presorting buffer.
- * It returns 1 on success or 0 on EOF.
- * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
- * as soon as possible.
- *
- * Output (chose one of these):
- *
- * SORT_OUTPUT_FILE file of a given name
- * SORT_OUTPUT_FB temporary fastbuf stream
- * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain some data
- *
- * Other switches:
- *
- * SORT_UNIQUE all items have distinct keys (checked in debug mode)
- *
- * The function generated:
- *
- * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
- * <in> = input file name/fastbuf or NULL
- * <out> = output file name/fastbuf or NULL
- * <range> = maximum integer value for the SORT_INT mode
- *
- * After including this file, all parameter macros are automatically
- * undef'd.
- */
-
-#include "lib/sorter/common.h"
-#include "lib/fastbuf.h"
-
-#include <fcntl.h>
-
-#define P(x) SORT_PREFIX(x)
-
-#ifdef SORT_KEY_REGULAR
-typedef SORT_KEY_REGULAR P(key);
-static inline int P(read_key) (struct fastbuf *f, P(key) *k)
-{
- return breadb(f, k, sizeof(P(key)));
-}
-static inline void P(write_key) (struct fastbuf *f, P(key) *k)
-{
- bwrite(f, k, sizeof(P(key)));
-}
-#elif defined(SORT_KEY)
-typedef SORT_KEY P(key);
-#else
-#error Missing definition of sorting key.
-#endif
-
-#ifdef SORT_INT64
-typedef u64 P(hash_t);
-#define SORT_INT SORT_INT64
-#define SORT_LONG_HASH
-#else
-typedef uns P(hash_t);
-#endif
-
-#ifdef SORT_INT
-static inline int P(compare) (P(key) *x, P(key) *y)
-{
- if (SORT_INT(*x) < SORT_INT(*y))
- return -1;
- if (SORT_INT(*x) > SORT_INT(*y))
- return 1;
- return 0;
-}
-
-#ifndef SORT_HASH_BITS
-static inline P(hash_t) P(hash) (P(key) *x)
-{
- return SORT_INT((*x));
-}
-#endif
-#endif
-
-#ifdef SORT_UNIFY
-#define LESS <
-#else
-#define LESS <=
-#endif
-#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
-
-#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
-#define SORT_ASSERT_UNIQUE
-#endif
-
-#ifdef SORT_KEY_SIZE
-#define SORT_VAR_KEY
-#else
-#define SORT_KEY_SIZE(key) sizeof(key)
-#endif
-
-#ifdef SORT_DATA_SIZE
-#define SORT_VAR_DATA
-#else
-#define SORT_DATA_SIZE(key) 0
-#endif
-
-static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
-{
- P(write_key)(out, key);
-#ifdef SORT_VAR_DATA
- bbcopy(in, out, SORT_DATA_SIZE(*key));
-#else
- (void) in;
-#endif
-}
-
-#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
-static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
-{
- P(write_merged)(dest, keys, NULL, n, NULL);
-}
-#endif
-
-#if defined(SORT_HASH_BITS) || defined(SORT_INT)
-#define SORT_INTERNAL_RADIX
-#include "lib/sorter/s-radix.h"
-#endif
-
-#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
-#include "lib/sorter/s-internal.h"
-#else
-#include "lib/sorter/s-fixint.h"
-#endif
-
-#include "lib/sorter/s-twoway.h"
-#include "lib/sorter/s-multiway.h"
-
-static struct fastbuf *P(sort)(
-#ifdef SORT_INPUT_FILE
- byte *in,
-#else
- struct fastbuf *in,
-#endif
-#ifdef SORT_OUTPUT_FILE
- byte *out
-#else
- struct fastbuf *out
-#endif
-#ifdef SORT_INT
- , u64 int_range
-#endif
- )
-{
- struct sort_context ctx;
- bzero(&ctx, sizeof(ctx));
-
-#ifdef SORT_INPUT_FILE
- ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
- ctx.in_size = bfilesize(ctx.in_fb);
-#elif defined(SORT_INPUT_FB)
- ctx.in_fb = in;
- ctx.in_size = bfilesize(in);
-#elif defined(SORT_INPUT_PIPE)
- ctx.in_fb = in;
- ctx.in_size = ~(u64)0;
-#elif defined(SORT_INPUT_PRESORT)
- ASSERT(!in);
- ctx.custom_presort = P(presort);
- ctx.in_size = ~(u64)0;
-#else
-#error No input given.
-#endif
-#ifdef SORT_DELETE_INPUT
- if (SORT_DELETE_INPUT)
- bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
-#endif
-
-#ifdef SORT_OUTPUT_FB
- ASSERT(!out);
-#elif defined(SORT_OUTPUT_THIS_FB)
- ctx.out_fb = out;
-#elif defined(SORT_OUTPUT_FILE)
- /* Just assume fastbuf output and rename the fastbuf later */
-#else
-#error No output given.
-#endif
-
-#ifdef SORT_HASH_BITS
- ctx.hash_bits = SORT_HASH_BITS;
- ctx.radix_split = P(radix_split);
-#elif defined(SORT_INT)
- ctx.hash_bits = 0;
- while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
- ctx.hash_bits++;
- ctx.radix_split = P(radix_split);
-#endif
-
- ctx.internal_sort = P(internal);
- ctx.internal_estimate = P(internal_estimate);
- ctx.twoway_merge = P(twoway_merge);
- ctx.multiway_merge = P(multiway_merge);
-
- sorter_run(&ctx);
-
-#ifdef SORT_OUTPUT_FILE
- bfix_tmp_file(ctx.out_fb, out);
- ctx.out_fb = NULL;
-#endif
- return ctx.out_fb;
-}
-
-#undef SORT_ASSERT_UNIQUE
-#undef SORT_DATA_SIZE
-#undef SORT_DELETE_INPUT
-#undef SORT_HASH_BITS
-#undef SORT_INPUT_FB
-#undef SORT_INPUT_FILE
-#undef SORT_INPUT_PIPE
-#undef SORT_INPUT_PRESORT
-#undef SORT_INT
-#undef SORT_INT64
-#undef SORT_INTERNAL_RADIX
-#undef SORT_KEY
-#undef SORT_KEY_REGULAR
-#undef SORT_KEY_SIZE
-#undef SORT_LONG_HASH
-#undef SORT_OUTPUT_FB
-#undef SORT_OUTPUT_FILE
-#undef SORT_OUTPUT_THIS_FB
-#undef SORT_PREFIX
-#undef SORT_UNIFY
-#undef SORT_UNIFY_WORKSPACE
-#undef SORT_UNIQUE
-#undef SORT_VAR_DATA
-#undef SORT_VAR_KEY
-#undef SWAP
-#undef LESS
-#undef P
+++ /dev/null
-#include "lib/lib.h"
-#include "lib/stkstring.h"
-
-#include <stdio.h>
-
-uns
-stk_array_len(char **s, uns cnt)
-{
- uns l = 1;
- while (cnt--)
- l += strlen(*s++);
- return l;
-}
-
-void
-stk_array_join(char *x, char **s, uns cnt, uns sep)
-{
- while (cnt--)
- {
- uns l = strlen(*s);
- memcpy(x, *s, l);
- x += l;
- s++;
- if (sep && cnt)
- *x++ = sep;
- }
- *x = 0;
-}
-
-uns
-stk_printf_internal(const char *fmt, ...)
-{
- uns len = 256;
- char *buf = alloca(len);
- va_list args, args2;
- va_start(args, fmt);
- for (;;)
- {
- va_copy(args2, args);
- int l = vsnprintf(buf, len, fmt, args2);
- va_end(args2);
- if (l < 0)
- len *= 2;
- else
- {
- va_end(args);
- return l+1;
- }
- buf = alloca(len);
- }
-}
-
-uns
-stk_vprintf_internal(const char *fmt, va_list args)
-{
- uns len = 256;
- char *buf = alloca(len);
- va_list args2;
- for (;;)
- {
- va_copy(args2, args);
- int l = vsnprintf(buf, len, fmt, args2);
- va_end(args2);
- if (l < 0)
- len *= 2;
- else
- {
- va_end(args);
- return l+1;
- }
- buf = alloca(len);
- }
-}
-
-void
-stk_hexdump_internal(char *dst, const byte *src, uns n)
-{
- for (uns i=0; i<n; i++)
- {
- if (i)
- *dst++ = ' ';
- dst += sprintf(dst, "%02x", *src++);
- }
- *dst = 0;
-}
-
-void
-stk_fsize_internal(char *buf, u64 x)
-{
- if (x < 1<<10)
- sprintf(buf, "%dB", (int)x);
- else if (x < 10<<10)
- sprintf(buf, "%.1fK", (double)x/(1<<10));
- else if (x < 1<<20)
- sprintf(buf, "%dK", (int)(x/(1<<10)));
- else if (x < 10<<20)
- sprintf(buf, "%.1fM", (double)x/(1<<20));
- else if (x < 1<<30)
- sprintf(buf, "%dM", (int)(x/(1<<20)));
- else if (x < (u64)10<<30)
- sprintf(buf, "%.1fG", (double)x/(1<<30));
- else if (x != ~(u64)0)
- sprintf(buf, "%dG", (int)(x/(1<<30)));
- else
- strcpy(buf, "unknown");
-}
-
-#ifdef TEST
-
-int main(void)
-{
- char *a = stk_strndup("are!",3);
- a = stk_strcat(a, " the ");
- a = stk_strmulticat(a, stk_strdup("Jabberwock, "), "my", NULL);
- char *arr[] = { a, " son" };
- a = stk_strarraycat(arr, 2);
- a = stk_printf("Bew%s!", a);
- puts(a);
- puts(stk_hexdump(a, 3));
- char *ary[] = { "The", "jaws", "that", "bite" };
- puts(stk_strjoin(ary, 4, ' '));
- puts(stk_fsize(1234567));
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Strings Allocated on the Stack
- *
- * (c) 2005--2007 Martin Mares <mj@ucw.cz>
- * (c) 2005 Tomas Valla <tom@ucw.cz>
- * (c) 2008 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_STKSTRING_H
-#define _UCW_STKSTRING_H
-
-#include <alloca.h>
-#include <string.h>
-#include <stdio.h>
-
-#define stk_strdup(s) ({ const char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
-#define stk_strndup(s,n) ({ const char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
-#define stk_strcat(s1,s2) ({ const char *_s1=(s1); const char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
-#define stk_strmulticat(s...) ({ char *_s[]={s}; char *_x=alloca(stk_array_len(_s, ARRAY_SIZE(_s)-1)); stk_array_join(_x, _s, ARRAY_SIZE(_s)-1, 0); _x; })
-#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
-#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
-#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
-#define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
-#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(char*)(s),_n); _x; })
-#define stk_str_unesc(s) ({ const char *_s=(s); char *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
-#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
-
-uns stk_array_len(char **s, uns cnt);
-void stk_array_join(char *x, char **s, uns cnt, uns sep);
-uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
-uns stk_vprintf_internal(const char *x, va_list args);
-void stk_hexdump_internal(char *dst, const byte *src, uns n);
-void stk_fsize_internal(char *dst, u64 size);
-
-#endif
+++ /dev/null
-# Tests for stkstring modules
-
-Run: ../obj/lib/stkstring-t
-Out: Beware the Jabberwock, my son!
- 42 65 77
- The jaws that bite
- 1.2M
+++ /dev/null
-/*
- * Checking the correctness of str_len() and hash_*() and proving, that
- * it is faster than the classical version ;-)
- */
-
-#include "lib/hashfunc.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/time.h>
-
-/* It will be divided by (10 + strlen()). */
-#define TEST_TIME 1000000
-
-/* The shift of the string according to the alignment. */
-static uns alignment = 0;
-
-static void
-random_string(byte *str, int len)
-{
- int i;
- for (i=0; i<len; i++)
- str[i] = random() % 255 + 1;
- str[len] = 0;
-}
-
-static uns
-elapsed_time(void)
-{
- static struct timeval last_tv, tv;
- uns elapsed;
- gettimeofday(&tv, NULL);
- elapsed = (tv.tv_sec - last_tv.tv_sec) * 1000000 + (tv.tv_usec - last_tv.tv_usec);
- last_tv = tv;
- return elapsed;
-}
-
-int
-main(int argc, char **argv)
-{
- byte *strings[] = {
- "",
- "a",
- "aa",
- "aaa",
- "aaaa",
- "aaaaa",
- "aaaaaa",
- "aaaaaaa",
- "aaaaaaaa",
- "aaaaaaaaa",
- "aaaaaaaaaa",
- "AHOJ",
- "\200aaaa",
- "\200",
- "\200\200",
- "\200\200\200",
- "\200\200\200\200",
- "\200\200\200\200\200",
- "kelapS treboR",
- "Robert Spalek",
- "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu",
- "********************************",
- "****************************************************************",
- NULL
- };
- int lengths[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 30, 40, 50, 60, 70, 80, 90, 100,
- 200, 300, 400, 500, 600, 700, 800, 900, 1000,
- 2000, 4000, 8000, 16000, 32000, 64000,
- -1
- };
- int i;
- if (argc > 1)
- alignment = atoi(argv[1]);
- printf("Alignment set to %d\n", alignment);
- for (i=0; strings[i]; i++)
- if (strlen(strings[i]) != str_len(strings[i]))
- die("Internal str_len() error on string %d", i);
- printf("%d strings tested OK\n", i);
- for (i=0; strings[i]; i++)
- {
- uns h1, h2;
- h1 = hash_string(strings[i]);
- h2 = hash_string_nocase(strings[i]);
- if (h1 != hash_block(strings[i], str_len(strings[i])))
- die("Internal hash_string() error on string %d", i);
- printf("hash %2d = %08x %08x", i, h1, h2);
- if (h1 == h2)
- printf(" upper case?");
- printf("\n");
- }
- for (i=0; lengths[i] >= 0; i++)
- {
- byte str[lengths[i] + 1 + alignment];
- uns count = TEST_TIME / (lengths[i] + 10);
- uns el1 = 0, el2 = 0, elh = 0, elhn = 0;
- uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0;
- uns j;
- for (j=0; j<count; j++)
- {
- random_string(str + alignment, lengths[i]);
- elapsed_time();
- /* Avoid "optimizing" by gcc, since the functions are
- * attributed PURE. */
- tot1 += strlen(str + alignment);
- el1 += elapsed_time();
- tot2 += str_len(str + alignment);
- el2 += elapsed_time();
- hash ^= hash_string(str + alignment);
- elh += elapsed_time();
- hashn ^= hash_string_nocase(str + alignment);
- elhn += elapsed_time();
- }
- if (tot1 != tot2)
- die("Internal error during test %d", i);
- printf("Test %d: strlen = %d, passes = %d, classical = %d usec, speedup = %.4f\n",
- i, lengths[i], count, el1, (el1 + 0.) / el2);
- printf("\t\t total hash = %08x/%08x, hash time = %d/%d usec\n", hash, hashn, elh, elhn);
- }
-/*
- printf("test1: %d\n", hash_modify(10000000, 10000000, 99777555));
- printf("test1: %d, %d\n", i, hash_modify(i, lengths[i-2], 99777333));
- printf("test1: %d, %d\n", i, hash_modify(lengths[i-2], i, 99777333));
- printf("test1: %d,%d,%d->%d\n", i, i*3-2, i*i, hash_modify(4587, i*3-2, i*i));
- printf("test1: %d\n", hash_modify(lengths[5], 345, i));
-*/
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Character Classes
- *
- * (c) 1998--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/chartype.h"
-
-const unsigned char _c_cat[256] = {
-#define CHAR(code,upper,lower,cat) cat,
-#include "lib/charmap.h"
-#undef CHAR
-};
+++ /dev/null
-/*
- * UCW Library -- Lowercase Map
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/chartype.h"
-
-const unsigned char _c_lower[256] = {
-#define CHAR(code,upper,lower,cat) lower,
-#include "lib/charmap.h"
-#undef CHAR
-};
+++ /dev/null
-/*
- * UCW Library -- Uppercase Map
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/chartype.h"
-
-const unsigned char _c_upper[256] = {
-#define CHAR(code,upper,lower,cat) upper,
-#include "lib/charmap.h"
-#undef CHAR
-};
+++ /dev/null
-/*
- * UCW Library -- String Routines
- *
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-#include <stdlib.h>
-
-/* Expands C99-like escape sequences.
- * It is safe to use the same buffer for both input and output. */
-char *
-str_unesc(char *d, const char *s)
-{
- while (*s)
- {
- if (*s == '\\')
- switch (s[1])
- {
- case 'a': *d++ = '\a'; s += 2; break;
- case 'b': *d++ = '\b'; s += 2; break;
- case 'f': *d++ = '\f'; s += 2; break;
- case 'n': *d++ = '\n'; s += 2; break;
- case 'r': *d++ = '\r'; s += 2; break;
- case 't': *d++ = '\t'; s += 2; break;
- case 'v': *d++ = '\v'; s += 2; break;
- case '\?': *d++ = '\?'; s += 2; break;
- case '\'': *d++ = '\''; s += 2; break;
- case '\"': *d++ = '\"'; s += 2; break;
- case '\\': *d++ = '\\'; s += 2; break;
- case 'x':
- if (!Cxdigit(s[2]))
- {
- s++;
- DBG("\\x used with no following hex digits");
- }
- else
- {
- char *p;
- uns v = strtoul(s + 2, &p, 16);
- if (v <= 255)
- *d++ = v;
- else
- DBG("hex escape sequence out of range");
- s = (char *)p;
- }
- break;
- default:
- if (s[1] >= '0' && s[1] <= '7')
- {
- uns v = s[1] - '0';
- s += 2;
- for (uns i = 0; i < 2 && *s >= '0' && *s <= '7'; s++, i++)
- v = (v << 3) + *s - '0';
- if (v <= 255)
- *d++ = v;
- else
- DBG("octal escape sequence out of range");
- }
- *d++ = *s++;
- break;
- }
- else
- *d++ = *s++;
- }
- *d = 0;
- return d;
-}
-
-char *
-str_format_flags(char *dest, const char *fmt, uns flags)
-{
- char *start = dest;
- for (uns i=0; fmt[i]; i++)
- {
- if (flags & (1 << i))
- *dest++ = fmt[i];
- else
- *dest++ = '-';
- }
- *dest = 0;
- return start;
-}
+++ /dev/null
-/*
- * UCW Library -- Syncing Directories
- *
- * (c) 2004--2005 Martin Mares <mj@ucw.cz>
- */
-
-#include "lib/lib.h"
-
-#include <fcntl.h>
-#include <unistd.h>
-
-void
-sync_dir(const char *name)
-{
- int fd = open(name, O_RDONLY
-#ifdef CONFIG_LINUX
- | O_DIRECTORY
-#endif
-);
- if (fd < 0)
- goto err;
- int err = fsync(fd);
- close(fd);
- if (err >= 0)
- return;
- err:
- msg(L_ERROR, "Unable to sync directory %s: %m", name);
-}
+++ /dev/null
-/*
- * The UCW Library -- Threading Helpers
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/threads.h"
-#include "lib/conf.h"
-
-uns default_thread_stack_size = 65556;
-
-static struct cf_section threads_config = {
- CF_ITEMS {
- CF_UNS("DefaultStackSize", &default_thread_stack_size),
- CF_END
- }
-};
-
-static void CONSTRUCTOR
-ucwlib_threads_conf_init(void)
-{
- cf_declare_section("Threads", &threads_config, 0);
-}
+++ /dev/null
-/*
- * The UCW Library -- Threading Helpers
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/threads.h"
-
-#ifdef CONFIG_UCW_THREADS
-
-#include <pthread.h>
-
-#ifdef CONFIG_LINUX
-#include <sys/types.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#ifdef __NR_gettid
-static pid_t
-gettid(void)
-{
- return syscall(__NR_gettid);
-}
-#define CONFIG_USE_GETTID
-#endif
-#endif
-
-static pthread_key_t ucwlib_context_key;
-static pthread_mutex_t ucwlib_master_mutex;
-
-static void
-ucwlib_free_thread_context(void *p)
-{
- xfree(p);
-}
-
-static void CONSTRUCTOR
-ucwlib_threads_init(void)
-{
- if (pthread_key_create(&ucwlib_context_key, ucwlib_free_thread_context) < 0)
- die("Cannot create pthread_key: %m");
- pthread_mutex_init(&ucwlib_master_mutex, NULL);
-}
-
-static int
-ucwlib_tid(void)
-{
- static int tid_counter;
- int tid;
-
-#ifdef CONFIG_USE_GETTID
- tid = gettid();
- if (tid > 0)
- return tid;
- /* The syscall might be unimplemented */
-#endif
-
- ucwlib_lock();
- tid = ++tid_counter;
- ucwlib_unlock();
- return tid;
-}
-
-struct ucwlib_context *
-ucwlib_thread_context(void)
-{
- struct ucwlib_context *c = pthread_getspecific(ucwlib_context_key);
- if (!c)
- {
- c = xmalloc_zero(sizeof(*c));
- c->thread_id = ucwlib_tid();
- pthread_setspecific(ucwlib_context_key, c);
- }
- return c;
-}
-
-void
-ucwlib_lock(void)
-{
- pthread_mutex_lock(&ucwlib_master_mutex);
-}
-
-void
-ucwlib_unlock(void)
-{
- pthread_mutex_unlock(&ucwlib_master_mutex);
-}
-
-#else
-
-struct ucwlib_context *
-ucwlib_thread_context(void)
-{
- static struct ucwlib_context ucwlib_context;
- return &ucwlib_context;
-}
-
-void
-ucwlib_lock(void)
-{
-}
-
-void
-ucwlib_unlock(void)
-{
-}
-
-#endif
-
-#ifdef TEST
-
-int main(void)
-{
- ucwlib_lock();
- ucwlib_unlock();
- log(L_INFO, "tid=%d", ucwlib_thread_context()->thread_id);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * The UCW Library -- Threading Helpers
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_THREAD_H
-#define _UCW_THREAD_H
-
-/* This structure holds per-thread data */
-
-struct ucwlib_context {
- int thread_id; // Thread ID (either kernel tid or a counter)
- int temp_counter; // Counter for fb-temp.c
- struct asio_queue *io_queue; // Async I/O queue for fb-direct.c
- sh_sighandler_t *signal_handlers; // Signal handlers for sighandler.c
-};
-
-struct ucwlib_context *ucwlib_thread_context(void);
-
-/* Global lock used for initialization, cleanup and other not so frequently accessed global state */
-
-void ucwlib_lock(void);
-void ucwlib_unlock(void);
-
-#ifdef CONFIG_UCW_THREADS
-
-extern uns default_thread_stack_size;
-
-#endif
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- A Simple Millisecond Timer
- *
- * (c) 2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-
-timestamp_t
-get_timestamp(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
-}
-
-void
-init_timer(timestamp_t *timer)
-{
- *timer = get_timestamp();
-}
-
-uns
-get_timer(timestamp_t *timer)
-{
- timestamp_t t = *timer;
- *timer = get_timestamp();
- return MIN(*timer-t, ~0U);
-}
-
-uns
-switch_timer(timestamp_t *old, timestamp_t *new)
-{
- *new = get_timestamp();
- return MIN(*new-*old, ~0U);
-}
+++ /dev/null
-/*
- * UCW Library -- Fast Access to Unaligned Data
- *
- * (c) 1997--2007 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_UNALIGNED_H
-#define _UCW_UNALIGNED_H
-
-/* Big endian format */
-
-#if defined(CPU_ALLOW_UNALIGNED) && defined(CPU_BIG_ENDIAN)
-static inline uns get_u16_be(const void *p) { return *(u16 *)p; }
-static inline u32 get_u32_be(const void *p) { return *(u32 *)p; }
-static inline u64 get_u64_be(const void *p) { return *(u64 *)p; }
-static inline void put_u16_be(void *p, uns x) { *(u16 *)p = x; }
-static inline void put_u32_be(void *p, u32 x) { *(u32 *)p = x; }
-static inline void put_u64_be(void *p, u64 x) { *(u64 *)p = x; }
-#else
-static inline uns get_u16_be(const void *p)
-{
- const byte *c = p;
- return (c[0] << 8) | c[1];
-}
-static inline u32 get_u32_be(const void *p)
-{
- const byte *c = p;
- return (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
-}
-static inline u64 get_u64_be(const void *p)
-{
- return ((u64) get_u32_be(p) << 32) | get_u32_be((const byte *)p+4);
-}
-static inline void put_u16_be(void *p, uns x)
-{
- byte *c = p;
- c[0] = x >> 8;
- c[1] = x;
-}
-static inline void put_u32_be(void *p, u32 x)
-{
- byte *c = p;
- c[0] = x >> 24;
- c[1] = x >> 16;
- c[2] = x >> 8;
- c[3] = x;
-}
-static inline void put_u64_be(void *p, u64 x)
-{
- put_u32_be(p, x >> 32);
- put_u32_be((byte *)p+4, x);
-}
-#endif
-
-/* Little-endian format */
-
-#if defined(CPU_ALLOW_UNALIGNED) && !defined(CPU_BIG_ENDIAN)
-static inline uns get_u16_le(const void *p) { return *(u16 *)p; }
-static inline u32 get_u32_le(const void *p) { return *(u32 *)p; }
-static inline u64 get_u64_le(const void *p) { return *(u64 *)p; }
-static inline void put_u16_le(void *p, uns x) { *(u16 *)p = x; }
-static inline void put_u32_le(void *p, u32 x) { *(u32 *)p = x; }
-static inline void put_u64_le(void *p, u64 x) { *(u64 *)p = x; }
-#else
-static inline uns get_u16_le(const void *p)
-{
- const byte *c = p;
- return c[0] | (c[1] << 8);
-}
-static inline u32 get_u32_le(const void *p)
-{
- const byte *c = p;
- return c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
-}
-static inline u64 get_u64_le(const void *p)
-{
- return get_u32_le(p) | ((u64) get_u32_le((const byte *)p+4) << 32);
-}
-static inline void put_u16_le(void *p, uns x)
-{
- byte *c = p;
- c[0] = x;
- c[1] = x >> 8;
-}
-static inline void put_u32_le(void *p, u32 x)
-{
- byte *c = p;
- c[0] = x;
- c[1] = x >> 8;
- c[2] = x >> 16;
- c[3] = x >> 24;
-}
-static inline void put_u64_le(void *p, u64 x)
-{
- put_u32_le(p, x);
- put_u32_le((byte *)p+4, x >> 32);
-}
-#endif
-
-static inline u64 get_u40_be(const void *p)
-{
- const byte *c = p;
- return ((u64)c[0] << 32) | get_u32_be(c+1);
-}
-
-static inline void put_u40_be(void *p, u64 x)
-{
- byte *c = p;
- c[0] = x >> 32;
- put_u32_be(c+1, x);
-}
-
-static inline u64 get_u40_le(const void *p)
-{
- const byte *c = p;
- return get_u32_le(c) | ((u64) c[4] << 32);
-}
-
-static inline void put_u40_le(void *p, u64 x)
-{
- byte *c = p;
- put_u32_le(c, x);
- c[4] = x >> 32;
-}
-
-/* The native format */
-
-#ifdef CPU_BIG_ENDIAN
-
-static inline uns get_u16(const void *p) { return get_u16_be(p); }
-static inline u32 get_u32(const void *p) { return get_u32_be(p); }
-static inline u64 get_u64(const void *p) { return get_u64_be(p); }
-static inline u64 get_u40(const void *p) { return get_u40_be(p); }
-static inline void put_u16(void *p, uns x) { return put_u16_be(p, x); }
-static inline void put_u32(void *p, u32 x) { return put_u32_be(p, x); }
-static inline void put_u64(void *p, u64 x) { return put_u64_be(p, x); }
-static inline void put_u40(void *p, u64 x) { return put_u40_be(p, x); }
-
-#else
-
-static inline uns get_u16(const void *p) { return get_u16_le(p); }
-static inline u32 get_u32(const void *p) { return get_u32_le(p); }
-static inline u64 get_u64(const void *p) { return get_u64_le(p); }
-static inline u64 get_u40(const void *p) { return get_u40_le(p); }
-static inline void put_u16(void *p, uns x) { return put_u16_le(p, x); }
-static inline void put_u32(void *p, u32 x) { return put_u32_le(p, x); }
-static inline void put_u64(void *p, u64 x) { return put_u64_le(p, x); }
-static inline void put_u40(void *p, u64 x) { return put_u40_le(p, x); }
-
-#endif
-
-/* Just for completeness */
-
-static inline uns get_u8(const void *p) { return *(const byte *)p; }
-static inline void put_u8(void *p, uns x) { *(byte *)p = x; }
-
-/* Backward compatibility macros */
-
-#define GET_U8(p) get_u8(p)
-#define GET_U16(p) get_u16(p)
-#define GET_U32(p) get_u32(p)
-#define GET_U64(p) get_u64(p)
-#define GET_U40(p) get_u40(p)
-
-#define PUT_U8(p,x) put_u8(p,x);
-#define PUT_U16(p,x) put_u16(p,x)
-#define PUT_U32(p,x) put_u32(p,x)
-#define PUT_U64(p,x) put_u64(p,x)
-#define PUT_U40(p,x) put_u40(p,x)
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- UTF-8 Functions
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2003 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/unicode.h"
-
-uns
-utf8_strlen(const byte *str)
-{
- uns len = 0;
- while (*str)
- {
- UTF8_SKIP(str);
- len++;
- }
- return len;
-}
-
-uns
-utf8_strnlen(const byte *str, uns n)
-{
- uns len = 0;
- const byte *end = str + n;
- while (str < end)
- {
- UTF8_SKIP(str);
- len++;
- }
- return len;
-}
-
-#ifdef TEST
-
-#include <string.h>
-#include <stdio.h>
-
-int main(int argc, char **argv)
-{
- byte buf[256];
-
-#define FUNCS \
- F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
- F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
-
- enum {
-#define F(x) FUNC_##x,
- FUNCS
-#undef F
- };
- char *names[] = {
-#define F(x) [FUNC_##x] = #x,
- FUNCS
-#undef F
- };
-
- uns func = ~0U;
- if (argc > 1)
- for (uns i = 0; i < ARRAY_SIZE(names); i++)
- if (!strcasecmp(names[i], argv[1]))
- func = i;
- if (!~func)
- {
- fprintf(stderr, "Invalid usage!\n");
- return 1;
- }
-
- if (func < FUNC_UTF8_PUT)
- {
- byte *p = buf, *q = buf, *last;
- uns u;
- bzero(buf, sizeof(buf));
- while (scanf("%x", &u) == 1)
- *q++ = u;
- while (p < q)
- {
- last = p;
- if (p != buf)
- putchar(' ');
- switch (func)
- {
- case FUNC_UTF8_GET:
- p = utf8_get(p, &u);
- break;
- case FUNC_UTF8_32_GET:
- p = utf8_32_get(p, &u);
- break;
- case FUNC_UTF16_BE_GET:
- p = utf16_be_get(p, &u);
- break;
- case FUNC_UTF16_LE_GET:
- p = utf16_le_get(p, &u);
- break;
- default:
- ASSERT(0);
- }
- printf("%04x", u);
- ASSERT(last < p && p <= q);
- }
- putchar('\n');
- }
- else
- {
- uns u, i=0;
- while (scanf("%x", &u) == 1)
- {
- byte *p = buf, *q = buf;
- switch (func)
- {
- case FUNC_UTF8_PUT:
- p = utf8_put(p, u);
- break;
- case FUNC_UTF8_32_PUT:
- p = utf8_32_put(p, u);
- break;
- case FUNC_UTF16_BE_PUT:
- p = utf16_be_put(p, u);
- break;
- case FUNC_UTF16_LE_PUT:
- p = utf16_le_put(p, u);
- break;
- default:
- ASSERT(0);
- }
- while (q < p)
- {
- if (i++)
- putchar(' ');
- printf("%02x", *q++);
- }
- }
- putchar('\n');
- }
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Unicode Characters
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_UNICODE_H
-#define _UCW_UNICODE_H
-
-#include "lib/unaligned.h"
-
-/* Macros for handling UTF-8 */
-
-#define UNI_REPLACEMENT 0xfffc
-
-/* Encode a character from the basic multilingual plane [0, 0xFFFF]
- * (subset of Unicode 4.0); up to 3 bytes needed (RFC2279) */
-static inline byte *
-utf8_put(byte *p, uns u)
-{
- if (u < 0x80)
- *p++ = u;
- else if (u < 0x800)
- {
- *p++ = 0xc0 | (u >> 6);
- *p++ = 0x80 | (u & 0x3f);
- }
- else
- {
- ASSERT(u < 0x10000);
- *p++ = 0xe0 | (u >> 12);
- *p++ = 0x80 | ((u >> 6) & 0x3f);
- *p++ = 0x80 | (u & 0x3f);
- }
- return p;
-}
-
-/* Encode a value from the range [0, 0x7FFFFFFF];
- * (superset of Unicode 4.0) up to 6 bytes needed (RFC2279) */
-static inline byte *
-utf8_32_put(byte *p, uns u)
-{
- if (u < 0x80)
- *p++ = u;
- else if (u < 0x800)
- {
- *p++ = 0xc0 | (u >> 6);
- goto put1;
- }
- else if (u < (1<<16))
- {
- *p++ = 0xe0 | (u >> 12);
- goto put2;
- }
- else if (u < (1<<21))
- {
- *p++ = 0xf0 | (u >> 18);
- goto put3;
- }
- else if (u < (1<<26))
- {
- *p++ = 0xf8 | (u >> 24);
- goto put4;
- }
- else if (u < (1U<<31))
- {
- *p++ = 0xfc | (u >> 30);
- *p++ = 0x80 | ((u >> 24) & 0x3f);
-put4: *p++ = 0x80 | ((u >> 18) & 0x3f);
-put3: *p++ = 0x80 | ((u >> 12) & 0x3f);
-put2: *p++ = 0x80 | ((u >> 6) & 0x3f);
-put1: *p++ = 0x80 | (u & 0x3f);
- }
- else
- ASSERT(0);
- return p;
-}
-
-#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
-
-/* Decode a character from the basic multilingual plane [0, 0xFFFF]
- * or return 'repl' if the encoding has been corrupted */
-static inline byte *
-utf8_get_repl(const byte *p, uns *uu, uns repl)
-{
- uns u = *p++;
- if (u < 0x80)
- ;
- else if (unlikely(u < 0xc0))
- {
- /* Incorrect byte sequence */
- bad:
- u = repl;
- }
- else if (u < 0xe0)
- {
- u &= 0x1f;
- UTF8_GET_NEXT;
- }
- else if (likely(u < 0xf0))
- {
- u &= 0x0f;
- UTF8_GET_NEXT;
- UTF8_GET_NEXT;
- }
- else
- goto bad;
- *uu = u;
- return (byte *)p;
-}
-
-/* Decode a value from the range [0, 0x7FFFFFFF]
- * or return 'repl' if the encoding has been corrupted */
-static inline byte *
-utf8_32_get_repl(const byte *p, uns *uu, uns repl)
-{
- uns u = *p++;
- if (u < 0x80)
- ;
- else if (unlikely(u < 0xc0))
- {
- /* Incorrect byte sequence */
- bad:
- u = repl;
- }
- else if (u < 0xe0)
- {
- u &= 0x1f;
- goto get1;
- }
- else if (u < 0xf0)
- {
- u &= 0x0f;
- goto get2;
- }
- else if (u < 0xf8)
- {
- u &= 0x07;
- goto get3;
- }
- else if (u < 0xfc)
- {
- u &= 0x03;
- goto get4;
- }
- else if (u < 0xfe)
- {
- u &= 0x01;
- UTF8_GET_NEXT;
-get4: UTF8_GET_NEXT;
-get3: UTF8_GET_NEXT;
-get2: UTF8_GET_NEXT;
-get1: UTF8_GET_NEXT;
- }
- else
- goto bad;
- *uu = u;
- return (byte *)p;
-}
-
-/* Decode a character from the basic multilingual plane [0, 0xFFFF]
- * or return UNI_REPLACEMENT if the encoding has been corrupted */
-static inline byte *
-utf8_get(const byte *p, uns *uu)
-{
- return utf8_get_repl(p, uu, UNI_REPLACEMENT);
-}
-
-/* Decode a value from the range [0, 0x7FFFFFFF]
- * or return UNI_REPLACEMENT if the encoding has been corrupted */
-static inline byte *
-utf8_32_get(const byte *p, uns *uu)
-{
- return utf8_32_get_repl(p, uu, UNI_REPLACEMENT);
-}
-
-#define PUT_UTF8(p,u) p = utf8_put(p, u)
-#define GET_UTF8(p,u) p = (byte*)utf8_get(p, &(u))
-
-#define PUT_UTF8_32(p,u) p = utf8_32_put(p, u)
-#define GET_UTF8_32(p,u) p = (byte*)utf8_32_get(p, &(u))
-
-#define UTF8_SKIP(p) do { \
- uns c = *p++; \
- if (c >= 0xc0) \
- while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \
- p++, c <<= 1; \
- } while (0)
-
-#define UTF8_SKIP_BWD(p) while ((*--(p) & 0xc0) == 0x80)
-
-static inline uns
-utf8_space(uns u)
-{
- if (u < 0x80)
- return 1;
- if (u < 0x800)
- return 2;
- if (u < (1<<16))
- return 3;
- if (u < (1<<21))
- return 4;
- if (u < (1<<26))
- return 5;
- return 6;
-}
-
-static inline uns
-utf8_encoding_len(uns c)
-{
- if (c < 0x80)
- return 1;
- ASSERT(c >= 0xc0 && c < 0xfe);
- if (c < 0xe0)
- return 2;
- if (c < 0xf0)
- return 3;
- if (c < 0xf8)
- return 4;
- if (c < 0xfc)
- return 5;
- return 6;
-}
-
-/* Encode a character from the range [0, 0xD7FF] or [0xE000,0x11FFFF];
- * up to 4 bytes needed */
-static inline void *
-utf16_le_put(void *p, uns u)
-{
- if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
- {
- put_u16_le(p, u);
- return p + 2;
- }
- else if ((u -= 0x10000) < 0x100000)
- {
- put_u16_le(p, 0xd800 | (u >> 10));
- put_u16_le(p + 2, 0xdc00 | (u & 0x3ff));
- return p + 4;
- }
- else
- ASSERT(0);
-}
-
-static inline void *
-utf16_be_put(void *p, uns u)
-{
- if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
- {
- put_u16_be(p, u);
- return p + 2;
- }
- else if ((u -= 0x10000) < 0x100000)
- {
- put_u16_be(p, 0xd800 | (u >> 10));
- put_u16_be(p + 2, 0xdc00 | (u & 0x3ff));
- return p + 4;
- }
- else
- ASSERT(0);
-}
-
-/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
- * or return `repl' if the encoding has been corrupted */
-static inline void *
-utf16_le_get_repl(const void *p, uns *uu, uns repl)
-{
- uns u = get_u16_le(p), x, y;
- x = u - 0xd800;
- if (x < 0x800)
- if (x < 0x400 && (y = get_u16_le(p + 2) - 0xdc00) < 0x400)
- {
- u = 0x10000 + (x << 10) + y;
- p += 2;
- }
- else
- u = repl;
- *uu = u;
- return (void *)(p + 2);
-}
-
-static inline void *
-utf16_be_get_repl(const void *p, uns *uu, uns repl)
-{
- uns u = get_u16_be(p), x, y;
- x = u - 0xd800;
- if (x < 0x800)
- if (x < 0x400 && (y = get_u16_be(p + 2) - 0xdc00) < 0x400)
- {
- u = 0x10000 + (x << 10) + y;
- p += 2;
- }
- else
- u = repl;
- *uu = u;
- return (void *)(p + 2);
-}
-
-/* Decode a character from the range [0, 0xD7FF] or [0xE000,11FFFF]
- * or return UNI_REPLACEMENT if the encoding has been corrupted */
-static inline void *
-utf16_le_get(const void *p, uns *uu)
-{
- return utf16_le_get_repl(p, uu, UNI_REPLACEMENT);
-}
-
-static inline void *
-utf16_be_get(const void *p, uns *uu)
-{
- return utf16_be_get_repl(p, uu, UNI_REPLACEMENT);
-}
-
-static inline uns
-unicode_sanitize_char(uns u)
-{
- if (u >= 0x10000 || // We don't accept anything outside the basic plane
- u >= 0xd800 && u < 0xf900 || // neither we do surrogates
- u >= 0x80 && u < 0xa0 || // nor latin-1 control chars
- u < 0x20 && u != '\t')
- return UNI_REPLACEMENT;
- return u;
-}
-
-/* unicode-utf8.c */
-
-uns utf8_strlen(const byte *str);
-uns utf8_strnlen(const byte *str, uns n);
-
-#endif
+++ /dev/null
-# Tests for the Unicode module
-
-Name: utf8_put (1)
-Run: ../obj/lib/unicode-t utf8_put
-In: 0041 0048 004f 004a
-Out: 41 48 4f 4a
-
-Name: utf8_put (2)
-Run: ../obj/lib/unicode-t utf8_put
-In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
-Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
-
-Name: utf8_get (1)
-Run: ../obj/lib/unicode-t utf8_get
-In: 41 48 4f 4a
-Out: 0041 0048 004f 004a
-
-Name: utf8_get (2)
-Run: ../obj/lib/unicode-t utf8_get
-In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
-Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
-
-Name: utf8_get (3)
-Run: ../obj/lib/unicode-t utf8_get
-In: 84 ff f9 f8 c2 aa 41
-Out: fffc fffc fffc fffc 00aa 0041
-
-Name: utf8_32_put
-Run: ../obj/lib/unicode-t utf8_32_put
-In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
-Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
-
-Name: utf8_32_get (1)
-Run: ../obj/lib/unicode-t utf8_32_get
-In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
-Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
-
-Name: utf8_32_get (2)
-Run: ../obj/lib/unicode-t utf8_32_get
-In: fe 83 81
-Out: fffc fffc fffc
-
-Name: utf16_be_put
-Run: ../obj/lib/unicode-t utf16_be_put
-In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
-
-Name: utf16_le_put
-Run: ../obj/lib/unicode-t utf16_le_put
-In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
-
-Name: utf16_be_get (1)
-Run: ../obj/lib/unicode-t utf16_be_get
-In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
-Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-
-Name: utf16_be_get (2)
-Run: ../obj/lib/unicode-t utf16_be_get
-In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
-Out: fffc 2a5f fffc fffc 2a5f fffc
-
-Name: utf16_le_get (1)
-Run: ../obj/lib/unicode-t utf16_le_get
-In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
-Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
-
-Name: utf16_le_get (2)
-Run: ../obj/lib/unicode-t utf16_le_get
-In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
-Out: fffc 2a5f fffc fffc 2a5f fffc
+++ /dev/null
-/*
- * UCW Library -- URL Functions
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2001--2005 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- *
- * The URL syntax corresponds to RFC 2396 with several exceptions:
- *
- * o Escaping of special characters still follows RFC 1738.
- * o Interpretation of path parameters follows RFC 1808.
- *
- * XXX: The buffer handling in this module is really horrible, but it works.
- */
-
-#include "lib/lib.h"
-#include "lib/url.h"
-#include "lib/chartype.h"
-#include "lib/conf.h"
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <alloca.h>
-
-/* Configuration */
-
-static uns url_ignore_spaces;
-static uns url_ignore_underflow;
-static char *url_component_separators = "";
-static uns url_min_repeat_count = 0x7fffffff;
-static uns url_max_repeat_length = 0;
-static uns url_max_occurences = ~0U;
-
-static struct cf_section url_config = {
- CF_ITEMS {
- CF_UNS("IgnoreSpaces", &url_ignore_spaces),
- CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
- CF_STRING("ComponentSeparators", &url_component_separators),
- CF_UNS("MinRepeatCount", &url_min_repeat_count),
- CF_UNS("MaxRepeatLength", &url_max_repeat_length),
- CF_UNS("MaxOccurences", &url_max_occurences),
- CF_END
- }
-};
-
-static void CONSTRUCTOR url_init_config(void)
-{
- cf_declare_section("URL", &url_config, 0);
-}
-
-/* Escaping and de-escaping */
-
-static uns
-enhex(uns x)
-{
- return (x<10) ? (x + '0') : (x - 10 + 'A');
-}
-
-int
-url_deescape(const byte *s, byte *d)
-{
- byte *dstart = d;
- byte *end = d + MAX_URL_SIZE - 10;
- while (*s)
- {
- if (d >= end)
- return URL_ERR_TOO_LONG;
- if (*s == '%')
- {
- unsigned int val;
- if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
- return URL_ERR_INVALID_ESCAPE;
- val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
- if (val < 0x20)
- return URL_ERR_INVALID_ESCAPED_CHAR;
- switch (val)
- {
- case ';':
- val = NCC_SEMICOLON; break;
- case '/':
- val = NCC_SLASH; break;
- case '?':
- val = NCC_QUEST; break;
- case ':':
- val = NCC_COLON; break;
- case '@':
- val = NCC_AT; break;
- case '=':
- val = NCC_EQUAL; break;
- case '&':
- val = NCC_AND; break;
- case '#':
- val = NCC_HASH; break;
- }
- *d++ = val;
- s += 3;
- }
- else if (*s > 0x20)
- *d++ = *s++;
- else if (Cspace(*s))
- {
- const byte *s0 = s;
- while (Cspace(*s))
- s++;
- if (!url_ignore_spaces || !(!*s || d == dstart))
- {
- while (Cspace(*s0))
- {
- if (d >= end)
- return URL_ERR_TOO_LONG;
- *d++ = *s0++;
- }
- }
- }
- else
- return URL_ERR_INVALID_CHAR;
- }
- *d = 0;
- return 0;
-}
-
-int
-url_enescape(const byte *s, byte *d)
-{
- byte *end = d + MAX_URL_SIZE - 10;
- unsigned int c;
-
- while (c = *s)
- {
- if (d >= end)
- return URL_ERR_TOO_LONG;
- if (Calnum(c) || /* RFC 1738(2.2): Only alphanumerics ... */
- c == '$' || c == '-' || c == '_' || c == '.' || c == '+' || /* ... and several other exceptions ... */
- c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' ||
- c == ',' ||
- c == '/' || c == '?' || c == ':' || c == '@' || /* ... and reserved chars used for reserved purpose */
- c == '=' || c == '&' || c == '#' || c == ';')
- *d++ = *s++;
- else
- {
- uns val = (*s < NCC_MAX) ? NCC_CHARS[*s] : *s;
- *d++ = '%';
- *d++ = enhex(val >> 4);
- *d++ = enhex(val & 0x0f);
- s++;
- }
- }
- *d = 0;
- return 0;
-}
-
-int
-url_enescape_friendly(const byte *src, byte *dest)
-{
- byte *end = dest + MAX_URL_SIZE - 10;
- while (*src)
- {
- if (dest >= end)
- return URL_ERR_TOO_LONG;
- if (*src < NCC_MAX)
- *dest++ = NCC_CHARS[*src++];
- else if (*src >= 0x20 && *src < 0x7f)
- *dest++ = *src++;
- else
- {
- *dest++ = '%';
- *dest++ = enhex(*src >> 4);
- *dest++ = enhex(*src++ & 0x0f);
- }
- }
- *dest = 0;
- return 0;
-}
-
-/* Split an URL (several parts may be copied to the destination buffer) */
-
-byte *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
-static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
-
-uns
-identify_protocol(const byte *p)
-{
- uns i;
-
- for(i=1; i<URL_PROTO_MAX; i++)
- if (!strcasecmp(p, url_proto_names[i]))
- return i;
- return URL_PROTO_UNKNOWN;
-}
-
-int
-url_split(byte *s, struct url *u, byte *d)
-{
- bzero(u, sizeof(struct url));
- u->port = ~0;
- u->bufend = d + MAX_URL_SIZE - 10;
-
- if (s[0] != '/') /* Seek for "protocol:" */
- {
- byte *p = s;
- while (*p && Calnum(*p))
- p++;
- if (p != s && *p == ':')
- {
- u->protocol = d;
- while (s < p)
- *d++ = *s++;
- *d++ = 0;
- u->protoid = identify_protocol(u->protocol);
- s++;
- if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
- {
- /* The protocol requires complete host spec, but it's missing -> treat as a relative path instead */
- int len = d - u->protocol;
- d -= len;
- s -= len;
- u->protocol = NULL;
- u->protoid = 0;
- }
- }
- }
-
- if (s[0] == '/') /* Host spec or absolute path */
- {
- if (s[1] == '/') /* Host spec */
- {
- byte *q, *e;
- byte *at = NULL;
- char *ep;
-
- s += 2;
- q = d;
- while (*s && *s != '/' && *s != '?') /* Copy user:passwd@host:port */
- {
- if (*s != '@')
- *d++ = *s;
- else if (!at)
- {
- *d++ = 0;
- at = d;
- }
- else /* This shouldn't happen with sane URL's, but we need to be sure */
- *d++ = NCC_AT;
- s++;
- }
- *d++ = 0;
- if (at) /* user:passwd present */
- {
- u->user = q;
- if (e = strchr(q, ':'))
- {
- *e++ = 0;
- u->pass = e;
- }
- }
- else
- at = q;
- e = strchr(at, ':');
- if (e) /* host:port present */
- {
- uns p;
- *e++ = 0;
- p = strtoul(e, &ep, 10);
- if (ep && *ep || p > 65535)
- return URL_ERR_INVALID_PORT;
- else if (p) /* Port 0 (e.g. in :/) is treated as default port */
- u->port = p;
- }
- u->host = at;
- }
- }
-
- u->rest = s;
- u->buf = d;
- return 0;
-}
-
-/* Normalization according to given base URL */
-
-static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
-
-static int
-relpath_merge(struct url *u, struct url *b)
-{
- byte *a = u->rest;
- byte *o = b->rest;
- byte *d = u->buf;
- byte *e = u->bufend;
- byte *p;
-
- if (a[0] == '/') /* Absolute path => OK */
- return 0;
- if (o[0] != '/' && o[0] != '?')
- return URL_PATH_UNDERFLOW;
-
- if (!a[0]) /* Empty URL -> inherit everything */
- {
- u->rest = b->rest;
- return 0;
- }
-
- u->rest = d; /* We know we'll need to copy the path somewhere else */
-
- if (a[0] == '#') /* Another fragment */
- {
- for(p=o; *p && *p != '#'; p++)
- ;
- goto copy;
- }
- if (a[0] == '?') /* New query */
- {
- for(p=o; *p && *p != '#' && *p != '?'; p++)
- ;
- goto copy;
- }
- if (a[0] == ';') /* Change parameters */
- {
- for(p=o; *p && *p != ';' && *p != '?' && *p != '#'; p++)
- ;
- goto copy;
- }
-
- p = NULL; /* Copy original path and find the last slash */
- while (*o && *o != ';' && *o != '?' && *o != '#')
- {
- if (d >= e)
- return URL_ERR_TOO_LONG;
- if ((*d++ = *o++) == '/')
- p = d;
- }
- if (!p)
- return URL_ERR_REL_NOTHING;
- d = p;
-
- while (*a)
- {
- if (a[0] == '.')
- {
- if (a[1] == '/' || !a[1]) /* Skip "./" and ".$" */
- {
- a++;
- if (a[0])
- a++;
- continue;
- }
- else if (a[1] == '.' && (a[2] == '/' || !a[2])) /* "../" */
- {
- a += 2;
- if (a[0])
- a++;
- if (d <= u->buf + 1)
- {
- /*
- * RFC 1808 says we should leave ".." as a path segment, but
- * we intentionally break the rule and refuse the URL.
- */
- if (!url_ignore_underflow)
- return URL_PATH_UNDERFLOW;
- }
- else
- {
- d--; /* Discard trailing slash */
- while (d[-1] != '/')
- d--;
- }
- continue;
- }
- }
- while (a[0] && a[0] != '/')
- {
- if (d >= e)
- return URL_ERR_TOO_LONG;
- *d++ = *a++;
- }
- if (a[0])
- *d++ = *a++;
- }
-
-okay:
- *d++ = 0;
- u->buf = d;
- return 0;
-
-copy: /* Combine part of old URL with the new one */
- while (o < p)
- if (d < e)
- *d++ = *o++;
- else
- return URL_ERR_TOO_LONG;
- while (*a)
- if (d < e)
- *d++ = *a++;
- else
- return URL_ERR_TOO_LONG;
- goto okay;
-}
-
-int
-url_normalize(struct url *u, struct url *b)
-{
- int err;
-
- /* Basic checks */
- if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) ||
- !u->host && u->user ||
- !u->user && u->pass ||
- !u->rest)
- return URL_SYNTAX_ERROR;
-
- if (!u->protocol)
- {
- /* Now we know it's a relative URL. Do we have any base? */
- if (!b || !url_proto_path_flags[b->protoid])
- return URL_ERR_REL_NOTHING;
- u->protocol = b->protocol;
- u->protoid = b->protoid;
-
- /* Reference to the same host */
- if (!u->host)
- {
- u->host = b->host;
- u->user = b->user;
- u->pass = b->pass;
- u->port = b->port;
- if (err = relpath_merge(u, b))
- return err;
- }
- }
-
- /* Change path "?" to "/?" because it's the true meaning */
- if (u->rest[0] == '?')
- {
- int l = strlen(u->rest);
- if (u->bufend - u->buf < l+1)
- return URL_ERR_TOO_LONG;
- u->buf[0] = '/';
- memcpy(u->buf+1, u->rest, l+1);
- u->rest = u->buf;
- u->buf += l+2;
- }
-
- /* Fill in missing info */
- if (u->port == ~0U)
- u->port = std_ports[u->protoid];
-
- return 0;
-}
-
-/* Name canonicalization */
-
-static void
-lowercase(byte *b)
-{
- if (b)
- while (*b)
- {
- if (*b >= 'A' && *b <= 'Z')
- *b = *b + 0x20;
- b++;
- }
-}
-
-static void
-kill_end_dot(byte *b)
-{
- byte *k;
-
- if (b)
- {
- k = b + strlen(b) - 1;
- while (k > b && *k == '.')
- *k-- = 0;
- }
-}
-
-int
-url_canonicalize(struct url *u)
-{
- char *c;
-
- lowercase(u->protocol);
- lowercase(u->host);
- kill_end_dot(u->host);
- if ((!u->rest || !*u->rest) && url_proto_path_flags[u->protoid])
- u->rest = "/";
- if (u->rest && (c = strchr(u->rest, '#'))) /* Kill fragment reference */
- *c = 0;
- return 0;
-}
-
-/* Pack a broken-down URL */
-
-static byte *
-append(byte *d, const byte *s, byte *e)
-{
- if (d)
- while (*s)
- {
- if (d >= e)
- return NULL;
- *d++ = *s++;
- }
- return d;
-}
-
-int
-url_pack(struct url *u, byte *d)
-{
- byte *e = d + MAX_URL_SIZE - 10;
-
- if (u->protocol)
- {
- d = append(d, u->protocol, e);
- d = append(d, ":", e);
- u->protoid = identify_protocol(u->protocol);
- }
- if (u->host)
- {
- d = append(d, "//", e);
- if (u->user)
- {
- d = append(d, u->user, e);
- if (u->pass)
- {
- d = append(d, ":", e);
- d = append(d, u->pass, e);
- }
- d = append(d, "@", e);
- }
- d = append(d, u->host, e);
- if (u->port != std_ports[u->protoid] && u->port != ~0U)
- {
- char z[10];
- sprintf(z, "%d", u->port);
- d = append(d, ":", e);
- d = append(d, z, e);
- }
- }
- if (u->rest)
- d = append(d, u->rest, e);
- if (!d)
- return URL_ERR_TOO_LONG;
- *d = 0;
- return 0;
-}
-
-/* Error messages */
-
-static char *errmsg[] = {
- "Something is wrong",
- "Too long",
- "Invalid character",
- "Invalid escape",
- "Invalid escaped character",
- "Invalid port number",
- "Relative URL not allowed",
- "Unknown protocol",
- "Syntax error",
- "Path underflow"
-};
-
-char *
-url_error(uns err)
-{
- if (err >= sizeof(errmsg) / sizeof(char *))
- err = 0;
- return errmsg[err];
-}
-
-/* Standard cookbook recipes */
-
-int
-url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
-{
- int err;
-
- if (err = url_deescape(u, buf1))
- return err;
- if (err = url_split(buf1, url, buf2))
- return err;
- if (err = url_normalize(url, base))
- return err;
- return url_canonicalize(url);
-}
-
-int
-url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
-{
- byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
- int err;
- struct url ur;
-
- (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) ||
- (err = url_pack(&ur, buf3)) ||
- (err = url_enescape(buf3, dst)));
- return err;
-}
-
-/* Testing */
-
-#ifdef TEST
-
-int main(int argc, char **argv)
-{
- char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
- int err;
- struct url url, url0;
- char *base = "http://mj@www.hell.org/123/sub_dir/index.html;param?query&zzz/subquery#fragment";
-
- if (argc != 2 && argc != 3)
- return 1;
- if (argc == 3)
- base = argv[2];
- if (err = url_deescape(argv[1], buf1))
- {
- printf("deesc: error %d\n", err);
- return 1;
- }
- printf("deesc: %s\n", buf1);
- if (err = url_split(buf1, &url, buf2))
- {
- printf("split: error %d\n", err);
- return 1;
- }
- printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
- if (err = url_split(base, &url0, buf3))
- {
- printf("split base: error %d\n", err);
- return 1;
- }
- if (err = url_normalize(&url0, NULL))
- {
- printf("normalize base: error %d\n", err);
- return 1;
- }
- printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest);
- if (err = url_normalize(&url, &url0))
- {
- printf("normalize: error %d\n", err);
- return 1;
- }
- printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
- if (err = url_canonicalize(&url))
- {
- printf("canonicalize: error %d\n", err);
- return 1;
- }
- printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
- if (err = url_pack(&url, buf4))
- {
- printf("pack: error %d\n", err);
- return 1;
- }
- printf("pack: %s\n", buf4);
- if (err = url_enescape(buf4, buf2))
- {
- printf("enesc: error %d\n", err);
- return 1;
- }
- printf("enesc: %s\n", buf2);
- return 0;
-}
-
-#endif
-
-struct component {
- const byte *start;
- int length;
- uns count;
- u32 hash;
-};
-
-static inline u32
-hashf(const byte *start, int length)
-{
- u32 hf = length;
- while (length-- > 0)
- hf = (hf << 8 | hf >> 24) ^ *start++;
- return hf;
-}
-
-static inline uns
-repeat_count(struct component *comp, uns count, uns len)
-{
- struct component *orig_comp = comp;
- uns found = 0;
- while (1)
- {
- uns i;
- comp += len;
- count -= len;
- found++;
- if (count < len)
- return found;
- for (i=0; i<len; i++)
- if (comp[i].hash != orig_comp[i].hash
- || comp[i].length != orig_comp[i].length
- || memcmp(comp[i].start, orig_comp[i].start, comp[i].length))
- return found;
- }
-}
-
-int
-url_has_repeated_component(const byte *url)
-{
- struct component *comp;
- uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
- const byte *c;
- uns i, j, k;
-
- for (comps=0, c=url; c; comps++)
- {
- c = strpbrk(c, url_component_separators);
- if (c)
- c++;
- }
- if (comps < url_min_repeat_count && comps <= url_max_occurences)
- return 0;
- comp = alloca(comps * sizeof(*comp));
- for (i=0, c=url; c; i++)
- {
- comp[i].start = c;
- c = strpbrk(c, url_component_separators);
- if (c)
- {
- comp[i].length = c - comp[i].start;
- c++;
- }
- else
- comp[i].length = strlen(comp[i].start);
- }
- ASSERT(i == comps);
- for (i=0; i<comps; i++)
- comp[i].hash = hashf(comp[i].start, comp[i].length);
- if (comps > url_max_occurences)
- {
- hash_size = next_table_prime(comps);
- hash = alloca(hash_size * sizeof(*hash));
- next = alloca(comps * sizeof(*next));
- memset(hash, 255, hash_size * sizeof(*hash));
- for (i=0; i<comps; i++)
- {
- j = comp[i].hash % hash_size;
- for (k = hash[j]; ~k && (comp[i].hash != comp[k].hash || comp[i].length != comp[k].length ||
- memcmp(comp[k].start, comp[i].start, comp[i].length)); k = next[k]);
- if (!~k)
- {
- next[i] = hash[j];
- hash[j] = i;
- comp[i].count = 1;
- }
- else
- {
- if (comp[k].count++ >= url_max_occurences)
- return 1;
- }
- }
- }
- for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++)
- for (rep_prefix = 0; rep_prefix <= comps - comp_len; rep_prefix++)
- if (repeat_count(comp + rep_prefix, comps - rep_prefix, comp_len) >= url_min_repeat_count)
- return comp_len;
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- URL Functions
- *
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
- * (c) 2001 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_URL_H
-#define _UCW_URL_H
-
-#define MAX_URL_SIZE 1024
-
-/* Non-control meanings of control characters */
-
-#define NCC_SEMICOLON 1
-#define NCC_SLASH 2
-#define NCC_QUEST 3
-#define NCC_COLON 4
-#define NCC_AT 5
-#define NCC_EQUAL 6
-#define NCC_AND 7
-#define NCC_HASH 8
-#define NCC_MAX 9
-
-#define NCC_CHARS " ;/?:@=&#"
-
-/* Remove/Introduce '%' escapes */
-
-int url_deescape(const byte *s, byte *d);
-int url_enescape(const byte *s, byte *d);
-int url_enescape_friendly(const byte *src, byte *dest); // for cards.c only
-
-/* URL splitting and normalization */
-
-struct url {
- byte *protocol;
- uns protoid;
- byte *user;
- byte *pass;
- byte *host;
- uns port; /* ~0 if unspec */
- byte *rest;
- byte *buf, *bufend;
-};
-
-int url_split(byte *s, struct url *u, byte *d);
-int url_normalize(struct url *u, struct url *b);
-int url_canonicalize(struct url *u);
-int url_pack(struct url *u, byte *d);
-int url_canon_split_rel(const byte *url, byte *buf1, byte *buf2, struct url *u, struct url *base);
-int url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base);
-uns identify_protocol(const byte *p);
-int url_has_repeated_component(const byte *url);
-
-static inline int url_canon_split(const byte *url, byte *buf1, byte *buf2, struct url *u)
-{ return url_canon_split_rel(url, buf1, buf2, u, NULL); }
-
-static inline int url_auto_canonicalize(const byte *src, byte *dst)
-{ return url_auto_canonicalize_rel(src, dst, NULL); }
-
-/* Error codes */
-
-char *url_error(uns);
-
-#define URL_ERR_TOO_LONG 1
-#define URL_ERR_INVALID_CHAR 2
-#define URL_ERR_INVALID_ESCAPE 3
-#define URL_ERR_INVALID_ESCAPED_CHAR 4
-#define URL_ERR_INVALID_PORT 5
-#define URL_ERR_REL_NOTHING 6
-#define URL_ERR_UNKNOWN_PROTOCOL 7
-#define URL_SYNTAX_ERROR 8
-#define URL_PATH_UNDERFLOW 9
-
-#define URL_PROTO_UNKNOWN 0
-#define URL_PROTO_HTTP 1
-#define URL_PROTO_FTP 2
-#define URL_PROTO_FILE 3
-#define URL_PROTO_MAX 4
-
-#define URL_PNAMES { "unknown", "http", "ftp", "file" }
-#define URL_DEFPORTS { ~0, 80, 21, 0 }
-#define URL_PATH_FLAGS { 0, 1, 1, 1 }
-
-extern byte *url_proto_names[];
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Pattern Matcher for Short Wildcard Patterns (only `?' and `*' supported)
- *
- * Traditional NFA -> DFA method with on-the-fly DFA construction.
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/wildmatch.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#define MAX_STATES 32 /* Must be <= 32, state 0 is reserved, state 1 is initial */
-#define MAX_CACHED 256 /* Maximum number of cached DFA states */
-#define HASH_SIZE 512 /* Number of entries in DFA hash table (at least MAX_CACHED+MAX_STATES) */
-#define HASH_SKIP 137
-
-struct nfa_state {
- byte ch; /* 0 for non-matching state */
- byte final; /* Accepting state */
- u32 match_states; /* States to go to when input character == ch */
- u32 default_states; /* States to go to whatever the input is */
-};
-
-struct dfa_state {
- uintptr_t edge[256]; /* Outgoing DFA edges. Bit 0 is set for incomplete edges which
- * contain just state set and clear for complete ones which point
- * to other states. NULL means `no match'.
- */
- u32 nfa_set; /* A set of NFA states this DFA state represents */
- int final; /* This is an accepting state */
- struct dfa_state *next; /* Next in the chain of free states */
-};
-
-struct wildpatt {
- struct nfa_state nfa[MAX_STATES];
- struct dfa_state *hash[HASH_SIZE];
- struct dfa_state *dfa_start;
- uns nfa_states;
- uns dfa_cache_counter;
- struct mempool *pool;
- struct dfa_state *free_states;
-};
-
-static inline unsigned
-wp_hash(u32 set)
-{
- set ^= set >> 16;
- set ^= set >> 8;
- return set % HASH_SIZE;
-}
-
-static struct dfa_state *
-wp_new_state(struct wildpatt *w, u32 set)
-{
- unsigned h = wp_hash(set);
- struct dfa_state *d;
- unsigned bit;
- u32 def_set;
-
- while (d = w->hash[h])
- {
- if (d->nfa_set == set)
- return d;
- h = (h + HASH_SKIP) % HASH_SIZE;
- }
- if (d = w->free_states)
- w->free_states = d->next;
- else
- d = mp_alloc(w->pool, sizeof(*d));
- w->hash[h] = d;
- bzero(d, sizeof(*d));
- d->nfa_set = set;
- def_set = 0;
- for(bit=1; bit <= w->nfa_states; bit++)
- if (set & (1 << bit))
- {
- struct nfa_state *n = &w->nfa[bit];
- if (n->ch)
- d->edge[n->ch] |= n->match_states | 1;
- d->final |= n->final;
- def_set |= n->default_states;
- }
- if (def_set)
- {
- unsigned i;
- def_set |= 1;
- for(i=0; i<256; i++)
- d->edge[i] |= def_set;
- }
- w->dfa_cache_counter++;
- return d;
-}
-
-struct wildpatt *
-wp_compile(const byte *p, struct mempool *pool)
-{
- struct wildpatt *w;
- uns i;
-
- if (strlen(p) >= MAX_STATES) /* Too long */
- return NULL;
- w = mp_alloc_zero(pool, sizeof(*w));
- w->pool = pool;
- for(i=1; *p; p++)
- {
- struct nfa_state *n = w->nfa + i;
- if (*p == '?')
- n->default_states |= 1 << (++i);/* Default edge to a new state */
- else if (*p == '*')
- n->default_states |= 1 << i; /* Default edge to the same state */
- else
- {
- n->ch = *p; /* Edge to new state labelled with 'c' */
- n->match_states = 1 << (++i);
- }
- }
- w->nfa[i].final = 1;
- w->nfa_states = i;
- w->dfa_start = wp_new_state(w, 1 << 1);
- return w;
-}
-
-static void
-wp_prune_cache(struct wildpatt *w)
-{
- /*
- * I was unable to trigger cache overflow on my large set of
- * test cases, so I decided to handle it in an extremely dumb
- * way. --mj
- */
- int i;
- for(i=0; i<HASH_SIZE; i++)
- if (w->hash[i] && w->hash[i]->nfa_set != (1 << 1))
- {
- struct dfa_state *d = w->hash[i];
- w->hash[i] = NULL;
- d->next = w->free_states;
- w->free_states = d;
- }
- w->dfa_cache_counter = 1; /* Only the initial state remains */
-}
-
-int
-wp_match(struct wildpatt *w, const byte *s)
-{
- struct dfa_state *d;
-
- if (w->dfa_cache_counter >= MAX_CACHED)
- wp_prune_cache(w);
- d = w->dfa_start;
- while (*s)
- {
- uintptr_t next = d->edge[*s];
- if (next & 1)
- {
- /* Need to lookup/create the destination state */
- struct dfa_state *new = wp_new_state(w, next & ~1);
- d->edge[*s] = (uintptr_t) new;
- d = new;
- }
- else if (!next)
- return 0;
- else
- d = (struct dfa_state *) next;
- s++;
- }
- return d->final;
-}
-
-int
-wp_min_size(const byte *p)
-{
- int s = 0;
-
- while (*p)
- if (*p++ != '*')
- s++;
- return s;
-}
-
-#ifdef TEST
-
-void
-wp_dump(struct wildpatt *w)
-{
- int i;
-
- puts("NFA:");
- for(i=1; i<=w->nfa_states; i++)
- {
- struct nfa_state *n = w->nfa + i;
- printf("%2d: %d %02x %08x %08x\n", i, n->final, n->ch, n->match_states, n->default_states);
- }
- puts("DFA:");
- for(i=0; i<HASH_SIZE; i++)
- if (w->hash[i])
- printf("%3d: %08x\n", i, w->hash[i]->nfa_set);
- printf("%d DFA states cached.\n", w->dfa_cache_counter);
-}
-
-int main(int argc, char **argv)
-{
- struct wildpatt *w;
- char buf[1024];
-
- if (argc != 2) return 1;
- w = wp_compile(argv[1], mp_new(65536));
- if (!w)
- {
- puts("Compile error");
- return 1;
- }
- wp_dump(w);
- while (fgets(buf, sizeof(buf)-1, stdin))
- {
- char *c = strchr(buf, '\n');
- if (!c) break;
- *c = 0;
-#if 0
- printf("%d\n", wp_match(w, buf));
-#else
- if (wp_match(w, buf))
- puts(buf);
-#endif
- }
- wp_dump(w);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Fast Wildcard Pattern Matcher (only `?' and `*' supported)
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-struct wildpatt;
-struct mempool;
-
-struct wildpatt *wp_compile(const byte *, struct mempool *);
-int wp_match(struct wildpatt *, const byte *);
-int wp_min_size(const byte *);
+++ /dev/null
-/*
- * UCW Library -- Word Splitting
- *
- * (c) 1997 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/chartype.h"
-
-#include <string.h>
-
-int
-sepsplit(char *str, uns sep, char **rec, uns max)
-{
- uns cnt = 0;
- while (1)
- {
- rec[cnt++] = str;
- str = strchr(str, sep);
- if (!str)
- return cnt;
- if (cnt >= max)
- return -1;
- *str++ = 0;
- }
-}
-
-int
-wordsplit(char *src, char **dst, uns max)
-{
- uns cnt = 0;
-
- for(;;)
- {
- while (Cspace(*src))
- *src++ = 0;
- if (!*src)
- break;
- if (cnt >= max)
- return -1;
- if (*src == '"')
- {
- src++;
- dst[cnt++] = src;
- while (*src && *src != '"')
- src++;
- if (*src)
- *src++ = 0;
- }
- else
- {
- dst[cnt++] = src;
- while (*src && !Cspace(*src))
- src++;
- }
- }
- return cnt;
-}
+++ /dev/null
-/*
- * UCW Library -- Thread Pools and Work Queues
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/threads.h"
-#include "lib/workqueue.h"
-#include "lib/heap.h"
-
-static void *
-worker_thread_init(void *arg)
-{
- struct worker_thread *t = arg;
- struct worker_pool *pool = t->pool;
-
- if (pool->init_thread)
- pool->init_thread(t);
- sem_post(pool->init_cleanup_sem);
-
- for (;;)
- {
- struct work *w = raw_queue_get(&pool->requests);
- w->go(t, w);
- raw_queue_put(&w->reply_to->finished, w);
- }
-
- return NULL;
-}
-
-static void
-worker_thread_signal_finish(struct worker_thread *t, struct work *w UNUSED)
-{
- if (t->pool->cleanup_thread)
- t->pool->cleanup_thread(t);
- sem_post(t->pool->init_cleanup_sem);
- pthread_exit(NULL);
-}
-
-void
-worker_pool_init(struct worker_pool *p)
-{
- clist_init(&p->worker_threads);
- raw_queue_init(&p->requests);
- p->init_cleanup_sem = sem_alloc();
-
- pthread_attr_t attr;
- if (pthread_attr_init(&attr) < 0 ||
- pthread_attr_setstacksize(&attr, p->stack_size ? : default_thread_stack_size) < 0)
- ASSERT(0);
-
- for (uns i=0; i < p->num_threads; i++)
- {
- struct worker_thread *t = (p->new_thread ? p->new_thread() : xmalloc(sizeof(*t)));
- t->pool = p;
- t->id = i;
- int err = pthread_create(&t->thread, &attr, worker_thread_init, t);
- if (err)
- die("Unable to create thread: %m");
- clist_add_tail(&p->worker_threads, &t->n);
- sem_wait(p->init_cleanup_sem);
- }
-
- pthread_attr_destroy(&attr);
-}
-
-void
-worker_pool_cleanup(struct worker_pool *p)
-{
- for (uns i=0; i < p->num_threads; i++)
- {
- struct work w = {
- .go = worker_thread_signal_finish
- };
- raw_queue_put(&p->requests, &w);
- sem_wait(p->init_cleanup_sem);
- }
-
- struct worker_thread *tmp;
- CLIST_FOR_EACH_DELSAFE(struct worker_thread *, t, p->worker_threads, tmp)
- {
- int err = pthread_join(t->thread, NULL);
- ASSERT(!err);
- if (p->free_thread)
- p->free_thread(t);
- else
- xfree(t);
- }
- raw_queue_cleanup(&p->requests);
- sem_free(p->init_cleanup_sem);
-}
-
-void
-raw_queue_init(struct raw_queue *q)
-{
- pthread_mutex_init(&q->queue_mutex, NULL);
- clist_init(&q->pri0_queue);
- q->queue_sem = sem_alloc();
- q->pri_heap = NULL;
- q->heap_cnt = q->heap_max = 0;
-}
-
-void
-raw_queue_cleanup(struct raw_queue *q)
-{
- ASSERT(clist_empty(&q->pri0_queue));
- ASSERT(!q->heap_cnt);
- xfree(q->pri_heap);
- sem_free(q->queue_sem);
- pthread_mutex_destroy(&q->queue_mutex);
-}
-
-#define PRI_LESS(x,y) ((x)->priority > (y)->priority)
-
-void
-raw_queue_put(struct raw_queue *q, struct work *w)
-{
- pthread_mutex_lock(&q->queue_mutex);
- if (!w->priority)
- clist_add_tail(&q->pri0_queue, &w->n);
- else
- {
- if (unlikely(q->heap_cnt >= q->heap_max))
- {
- struct work **old_heap = q->pri_heap;
- q->heap_max = (q->heap_max ? 2*q->heap_max : 16);
- q->pri_heap = xrealloc(old_heap, (q->heap_max + 1) * sizeof(struct work *));
- }
- struct work **heap = q->pri_heap;
- heap[++q->heap_cnt] = w;
- HEAP_INSERT(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
- }
- pthread_mutex_unlock(&q->queue_mutex);
- sem_post(q->queue_sem);
-}
-
-static inline struct work *
-raw_queue_do_get(struct raw_queue *q)
-{
- pthread_mutex_lock(&q->queue_mutex);
- struct work *w;
- if (!q->heap_cnt)
- {
- w = clist_head(&q->pri0_queue);
- ASSERT(w);
- clist_remove(&w->n);
- }
- else
- {
- struct work **heap = q->pri_heap;
- w = heap[1];
- HEAP_DELMIN(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
- }
- pthread_mutex_unlock(&q->queue_mutex);
- return w;
-}
-
-struct work *
-raw_queue_get(struct raw_queue *q)
-{
- sem_wait(q->queue_sem);
- return raw_queue_do_get(q);
-}
-
-struct work *
-raw_queue_try_get(struct raw_queue *q)
-{
- if (!sem_trywait(q->queue_sem))
- return raw_queue_do_get(q);
- else
- return NULL;
-}
-
-void
-work_queue_init(struct worker_pool *p, struct work_queue *q)
-{
- q->pool = p;
- q->nr_running = 0;
- raw_queue_init(&q->finished);
-}
-
-void
-work_queue_cleanup(struct work_queue *q)
-{
- ASSERT(!q->nr_running);
- raw_queue_cleanup(&q->finished);
-}
-
-void
-work_submit(struct work_queue *q, struct work *w)
-{
- ASSERT(w->go);
- w->reply_to = q;
- raw_queue_put(&q->pool->requests, w);
- q->nr_running++;
-}
-
-static struct work *
-work_do_wait(struct work_queue *q, int try)
-{
- if (!q->nr_running)
- return NULL;
- struct work *w = (try ? raw_queue_try_get : raw_queue_get)(&q->finished);
- if (!w)
- return NULL;
- q->nr_running--;
- return w;
-}
-
-struct work *
-work_wait(struct work_queue *q)
-{
- return work_do_wait(q, 0);
-}
-
-struct work *
-work_try_wait(struct work_queue *q)
-{
- return work_do_wait(q, 1);
-}
-
-#ifdef TEST
-
-#include <unistd.h>
-
-static void wt_init(struct worker_thread *t)
-{
- log(L_INFO, "INIT %d", t->id);
-}
-
-static void wt_cleanup(struct worker_thread *t)
-{
- log(L_INFO, "CLEANUP %d", t->id);
-}
-
-struct w {
- struct work w;
- uns id;
-};
-
-static void go(struct worker_thread *t, struct work *w)
-{
- log(L_INFO, "GO %d: request %d (pri %d)", t->id, ((struct w *)w)->id, w->priority);
- usleep(1);
-}
-
-int main(void)
-{
- struct worker_pool pool = {
- .num_threads = 10,
- .stack_size = 65536,
- .init_thread = wt_init,
- .cleanup_thread = wt_cleanup
- };
- worker_pool_init(&pool);
-
- struct work_queue q;
- work_queue_init(&pool, &q);
- for (uns i=0; i<500; i++)
- {
- struct w *w = xmalloc_zero(sizeof(*w));
- w->w.go = go;
- w->w.priority = (i < 250 ? i : 0);
- w->id = i;
- work_submit(&q, &w->w);
- log(L_INFO, "Submitted request %d (pri %d)", w->id, w->w.priority);
- }
-
- struct w *w;
- while (w = (struct w *) work_wait(&q))
- log(L_INFO, "Finished request %d", w->id);
-
- work_queue_cleanup(&q);
- worker_pool_cleanup(&pool);
- return 0;
-}
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Thread Pools and Work Queues
- *
- * (c) 2006 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_WORKQUEUE_H
-#define _UCW_WORKQUEUE_H
-
-/*
- * A thread pool is a set of threads receiving work requests from a common queue,
- * each work request contains a pointer to a function inside the thread.
- *
- * A work queue is an interface for submitting work requests. It's bound to a single
- * thread pool, it remembers running requests and gathers replies. A single work queue
- * should not be used by multiple threads simultaneously.
- *
- * Requests can have priorities. Requests with the highest priority are served first.
- * Requests of priority 0 are guaranteed to be served on first-come-first-served
- * basis, requests of higher priorities are unordered.
- *
- * When a thread pool is initialized, new_thread() is called for every thread first,
- * allocating struct worker_thread (and user-defined thread context following it) for
- * each thread. Then the threads are fired and each of them executes the init_thread()
- * callback. These callbacks are serialized and worker_pool_init() function waits
- * until all of them finish.
- */
-
-#include "lib/semaphore.h"
-#include "lib/clists.h"
-
-#include <pthread.h>
-
-struct worker_thread { // One of threads serving requests
- cnode n;
- pthread_t thread;
- struct worker_pool *pool;
- int id; // Inside the pool
- /* user-defined data can follow */
-};
-
-struct raw_queue { // Generic queue with locking
- pthread_mutex_t queue_mutex;
- clist pri0_queue; // Ordinary queue for requests with priority=0
- struct work **pri_heap; // A heap for request with priority>0
- uns heap_cnt, heap_max;
- sem_t *queue_sem; // Number of requests queued
-};
-
-struct worker_pool {
- struct raw_queue requests;
- uns num_threads;
- uns stack_size; // 0 for default
- struct worker_thread *(*new_thread)(void); // default: xmalloc the struct
- void (*free_thread)(struct worker_thread *t); // default: xfree
- void (*init_thread)(struct worker_thread *t); // default: empty
- void (*cleanup_thread)(struct worker_thread *t); // default: empty
- clist worker_threads;
- sem_t *init_cleanup_sem;
-};
-
-struct work_queue {
- struct worker_pool *pool;
- uns nr_running; // Number of requests in service
- struct raw_queue finished; // Finished requests queue up here
-};
-
-struct work { // A single request
- cnode n;
- uns priority;
- struct work_queue *reply_to; // Where to queue the request when it's finished
- void (*go)(struct worker_thread *t, struct work *w); // Called inside the worker thread
-};
-
-void worker_pool_init(struct worker_pool *p);
-void worker_pool_cleanup(struct worker_pool *p);
-
-void raw_queue_init(struct raw_queue *q);
-void raw_queue_cleanup(struct raw_queue *q);
-void raw_queue_put(struct raw_queue *q, struct work *w);
-struct work *raw_queue_get(struct raw_queue *q);
-struct work *raw_queue_try_get(struct raw_queue *q);
-
-void work_queue_init(struct worker_pool *p, struct work_queue *q);
-void work_queue_cleanup(struct work_queue *q);
-void work_submit(struct work_queue *q, struct work *w);
-struct work *work_wait(struct work_queue *q);
-struct work *work_try_wait(struct work_queue *q);
-
-#endif /* !_UCW_WORKQUEUE_H */
#include "sherlock/sherlock.h"
#include "sherlock/object.h"
#include "sherlock/attrset.h"
-#include "lib/clists.h"
+#include "ucw/clists.h"
#include "sherlock/conf.h"
struct attr_node {
#ifndef _SHERLOCK_ATTRSET_H
#define _SHERLOCK_ATTRSET_H
-#include "lib/bitarray.h"
+#include "ucw/bitarray.h"
#include "sherlock/object.h"
COMPILE_ASSERT(son_value, OBJ_ATTR_SON == 256);
#undef LOCAL_DEBUG
#include "sherlock/sherlock.h"
-#include "lib/unaligned.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
-#include "lib/unicode.h"
+#include "ucw/unaligned.h"
+#include "ucw/mempool.h"
+#include "ucw/fastbuf.h"
+#include "ucw/unicode.h"
#include "sherlock/object.h"
#include "sherlock/objread.h"
-#include "lib/lizard.h"
-#include "lib/bbuf.h"
-#include "lib/ff-unicode.h"
+#include "ucw/lizard.h"
+#include "ucw/bbuf.h"
+#include "ucw/ff-unicode.h"
#include <errno.h>
#include <unistd.h>
else
{
uns len;
- GET_UTF8_32(ptr, len);
+ ptr = utf8_32_get(ptr, &len);
if (!len--)
{
*pos = ptr;
while (ptr < end)
{
uns len;
- GET_UTF8_32(ptr, len);
+ ptr = utf8_32_get(ptr, &len);
if (!len--)
break;
byte type = ptr[len];
while (ptr < end)
{
uns len;
- GET_UTF8_32(ptr, len);
+ ptr = utf8_32_get(ptr, &len);
if (!len--)
break;
byte type = ptr[len];
obj_read_start(&st, o_hdr);
byte *b;
// ignore empty lines and read until the end of the bucket
- sh_off_t end = btell(body) + buck_len;
+ ucw_off_t end = btell(body) + buck_len;
while (btell(body) < end && bgets_bb(body, &buf->bb, ~0U))
if ((b = buf->bb.ptr)[0])
obj_read_attr(&st, b[0], b+1);
}
else if (buck_type == BUCKET_TYPE_V30)
{
- sh_off_t start = btell(body);
- sh_off_t end = start + buck_len;
+ ucw_off_t start = btell(body);
+ ucw_off_t end = start + buck_len;
byte *b;
struct obj_read_state st;
obj_read_start(&st, o_hdr);
/* Avoid reading the whole bucket if only its header is needed. */
if (body_start)
{
- sh_off_t start = btell(body);
- sh_off_t end = start + buck_len;
+ ucw_off_t start = btell(body);
+ ucw_off_t end = start + buck_len;
obj_read_start(&st, o_hdr);
while (btell(body) < end)
{
#include "sherlock/sherlock.h"
#include "sherlock/object.h"
-#include "lib/chartype.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-unicode.h"
-#include "lib/unicode.h"
+#include "ucw/chartype.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-unicode.h"
+#include "ucw/unicode.h"
#include "sherlock/conf.h"
/*** Attribute names ***/
#ifndef _SHERLOCK_CONF_H
#define _SHERLOCK_CONF_H
-#include "lib/conf.h"
+#include "ucw/conf.h"
/* All of the following objects are defined in conf-parse.c
*
#include "sherlock/sherlock.h"
#include "sherlock/object.h"
-#include "lib/stkstring.h"
+#include "ucw/stkstring.h"
#include <stdio.h>
*/
#include "sherlock/sherlock.h"
-#include "lib/fastbuf.h"
-#include "lib/ff-unicode.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-unicode.h"
#include "sherlock/object.h"
#include <stdio.h>
{
if (use_v33)
{
- PUT_UTF8_32(ptr, len+1);
+ ptr = utf8_32_put(ptr, len+1);
memcpy(ptr, val, len);
ptr += len;
*ptr++ = type;
if (len >= 127)
{
byte tmp[6], *tmp_end = tmp;
- PUT_UTF8_32(tmp_end, len+1);
+ tmp_end = utf8_32_put(tmp_end, len+1);
uns l = tmp_end - tmp;
memmove(ptr+l, ptr+1, len);
memcpy(ptr, tmp, l);
*/
#include "sherlock/sherlock.h"
-#include "lib/mempool.h"
-#include "lib/fastbuf.h"
+#include "ucw/mempool.h"
+#include "ucw/fastbuf.h"
#include "sherlock/object.h"
#include <string.h>
my $read_something = 0;
my $obj = $self;
my $raw;
+ my $read = $opts{read} ? $opts{read} : sub { my $fh = shift; return $_ = <$fh>; };
if ($opts{raw}) {
$raw = $obj->{"RAW"} = [];
}
- while (<$fh>) {
+ while ($read->($fh)) {
chomp;
/^$/ && last;
my ($a, $v) = /^(.)(.*)$/ or return undef;
#ifndef _SHERLOCK_LIB_H
#define _SHERLOCK_LIB_H
-#include "lib/lib.h"
+#include "ucw/lib.h"
#ifdef CONFIG_MAX_CONTEXTS
#define CONFIG_CONTEXTS
--- /dev/null
+# Makefile for the UCW Library (c) 1997--2009 Martin Mares <mj@ucw.cz>
+
+DIRS+=ucw
+LIBUCW=$(o)/ucw/libucw.pc
+
+ifdef CONFIG_UCW_UTILS
+include $(s)/ucw/utils/Makefile
+endif
+
+LIBUCW_MODS= \
+ threads \
+ alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \
+ mmap partmap hashfunc \
+ slists simple-lists bitsig \
+ log log-stream log-file log-syslog log-conf proctitle tbf \
+ conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \
+ ipaccess \
+ profile \
+ fastbuf ff-binary ff-string ff-printf ff-unicode ff-stkstring \
+ fb-file carefulio fb-mem fb-temp tempfile fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param fb-socket \
+ char-cat char-upper char-lower unicode stkstring \
+ wildmatch regex \
+ prime primetable random timer randomkey \
+ bit-ffs bit-fls \
+ url \
+ mainloop exitstatus runcmd sighandler \
+ lizard lizard-safe adler32 \
+ md5 sha1 sha1-hmac \
+ base64 base224 \
+ sync \
+ qache \
+ string str-esc str-split str-match str-imatch str-hex \
+ bbuf \
+ getopt
+
+LIBUCW_MAIN_INCLUDES= \
+ lib.h threads.h \
+ mempool.h \
+ clists.h slists.h simple-lists.h \
+ string.h stkstring.h unicode.h chartype.h regex.h \
+ wildmatch.h \
+ unaligned.h prefetch.h \
+ bbuf.h gbuf.h bitarray.h bitsig.h \
+ hashfunc.h hashtable.h \
+ heap.h binheap.h binheap-node.h \
+ redblack.h \
+ prime.h \
+ bitops.h \
+ conf.h getopt.h ipaccess.h \
+ profile.h \
+ fastbuf.h lfs.h ff-unicode.h ff-binary.h \
+ url.h \
+ mainloop.h \
+ lizard.h \
+ md5.h \
+ base64.h base224.h \
+ qache.h \
+ kmp.h kmp-search.h binsearch.h \
+ partmap.h
+
+ifdef CONFIG_UCW_THREADS
+# Some modules require threading
+LIBUCW_MODS+=threads-conf workqueue asio
+LIBUCW_MAIN_INCLUDES+=workqueue.h semaphore.h asio.h
+endif
+
+ifdef CONFIG_UCW_FB_DIRECT
+LIBUCW_MODS+=fb-direct
+endif
+
+ifdef CONFIG_OWN_GETOPT
+include $(s)/ucw/getopt/Makefile
+endif
+
+LIBUCW_INCLUDES=$(LIBUCW_MAIN_INCLUDES)
+
+include $(s)/ucw/sorter/Makefile
+include $(s)/ucw/doc/Makefile
+
+LIBUCW_MOD_PATHS=$(addprefix $(o)/ucw/,$(LIBUCW_MODS))
+
+$(o)/ucw/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
+$(o)/ucw/libucw-pic.a: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
+$(o)/ucw/libucw.so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS))
+
+$(o)/ucw/hashfunc.o $(o)/ucw/hashfunc.oo: CFLAGS += -funroll-loops
+$(o)/ucw/lizard.o: CFLAGS += $(COPT2) -funroll-loops
+
+$(o)/ucw/conf-test: $(o)/ucw/conf-test.o $(LIBUCW)
+$(o)/ucw/lfs-test: $(o)/ucw/lfs-test.o $(LIBUCW)
+$(o)/ucw/hash-test: $(o)/ucw/hash-test.o $(LIBUCW)
+$(o)/ucw/hashfunc-test: $(o)/ucw/hashfunc-test.o $(LIBUCW)
+$(o)/ucw/asort-test: $(o)/ucw/asort-test.o $(LIBUCW)
+$(o)/ucw/redblack-test: $(o)/ucw/redblack-test.o $(LIBUCW)
+$(o)/ucw/binheap-test: $(o)/ucw/binheap-test.o $(LIBUCW)
+$(o)/ucw/lizard-test: $(o)/ucw/lizard-test.o $(LIBUCW)
+$(o)/ucw/kmp-test: $(o)/ucw/kmp-test.o $(LIBUCW)
+ifdef CONFIG_CHARSET
+$(o)/ucw/kmp-test: $(LIBCHARSET)
+endif
+$(o)/ucw/ipaccess-test: $(o)/ucw/ipaccess-test.o $(LIBUCW)
+$(o)/ucw/trie-test: $(o)/ucw/trie-test.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test stkstring.test \
+ slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \
+ fb-socket.test trie-test.test string.test sha1.test asort-test.test binheap-test.test \
+ redblack-test.test fb-file.test fb-grow.test fb-pool.test fb-atomic.test \
+ fb-limfd.test fb-temp.test fb-mem.test fb-buffer.test fb-mmap.test url.test)
+
+$(o)/ucw/regex.test: $(o)/ucw/regex-t
+$(o)/ucw/unicode.test: $(o)/ucw/unicode-t
+$(o)/ucw/hash-test.test: $(o)/ucw/hash-test
+$(o)/ucw/mempool.test: $(o)/ucw/mempool-t $(o)/ucw/mempool-fmt-t $(o)/ucw/mempool-str-t
+$(o)/ucw/stkstring.test: $(o)/ucw/stkstring-t
+$(o)/ucw/bitops.test: $(o)/ucw/bit-ffs-t $(o)/ucw/bit-fls-t
+$(o)/ucw/slists.test: $(o)/ucw/slists-t
+$(o)/ucw/kmp-test.test: $(o)/ucw/kmp-test
+$(o)/ucw/bbuf.test: $(o)/ucw/bbuf-t
+$(o)/ucw/getopt.test: $(o)/ucw/getopt-t
+$(o)/ucw/ff-unicode.test: $(o)/ucw/ff-unicode-t
+$(o)/ucw/eltpool.test: $(o)/ucw/eltpool-t
+$(o)/ucw/string.test: $(o)/ucw/str-hex-t $(o)/ucw/str-esc-t
+$(o)/ucw/sha1.test: $(o)/ucw/sha1-t $(o)/ucw/sha1-hmac-t
+$(o)/ucw/trie-test.test: $(o)/ucw/trie-test
+$(o)/ucw/asort-test.test: $(o)/ucw/asort-test
+$(o)/ucw/binheap-test.test: $(o)/ucw/binheap-test
+$(o)/ucw/redblack-test.test: $(o)/ucw/redblack-test
+$(addprefix $(o)/ucw/fb-,file.test grow.test pool.test socket.test atomic.test \
+ limfd.test temp.test mem.test buffer.test mmap.test): %.test: %-t
+$(o)/ucw/url.test: $(o)/ucw/url-t
+
+ifdef CONFIG_UCW_THREADS
+TESTS+=$(addprefix $(o)/ucw/,asio.test)
+$(o)/ucw/asio.test: $(o)/ucw/asio-t
+endif
+
+# The version of autoconf.h that is a part of the public API needs to have
+# the internal symbols filtered out, so we generate ucw/autoconf.h in the
+# configure script and let the public config.h refer to <ucw/autoconf.h>
+# instead of plain "autoconf.h".
+
+API_LIBS+=libucw
+API_INCLUDES+=$(o)/ucw/.include-stamp
+$(o)/ucw/.include-stamp: $(addprefix $(s)/ucw/,$(LIBUCW_INCLUDES)) $(o)/ucw/autoconf.h
+ $(Q)$(BUILDSYS)/install-includes $(<D) run/include/ucw $(LIBUCW_INCLUDES)
+ $(Q)$(BUILDSYS)/install-includes $(o)/ucw run/include/ucw autoconf.h
+ $(Q)sed -e 's/^#include "autoconf\.h"/#include <ucw\/autoconf.h>/' <$(s)/ucw/config.h >run/include/ucw/config.h
+ $(Q)touch $@
+run/lib/pkgconfig/libucw.pc: $(o)/ucw/libucw.pc
+
+ifdef CONFIG_UCW_PERL
+include $(s)/ucw/perl/Makefile
+endif
+
+ifdef CONFIG_UCW_SHELL_UTILS
+include $(s)/ucw/shell/Makefile
+endif
+
+CONFIGS+=libucw
+
+INSTALL_TARGETS+=install-libucw
+install-libucw:
+ install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/ $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) $(DESTDIR)$(INSTALL_CONFIG_DIR)
+ install -m 644 $(addprefix run/include/ucw/,$(LIBUCW_MAIN_INCLUDES) autoconf.h config.h) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/
+ install -m 644 run/lib/pkgconfig/libucw.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR)
+ install -m 644 run/lib/libucw.$(LS) $(DESTDIR)$(INSTALL_LIB_DIR)
+ install -m 644 run/$(CONFIG_DIR)/libucw $(DESTDIR)$(INSTALL_CONFIG_DIR)
+
+.PHONY: install-libucw
--- /dev/null
+Generally, functions in the UCW library are reentrant as long as you call them
+on different data. Calling on the same object is not, unless otherwise told,
+which also includes functions acting on any kind of global state.
+
+There are some exceptions:
+
+- setproctitle() is not safe, it modifies global state
--- /dev/null
+/*
+ * adler32.c -- compute the Adler-32 checksum of a data stream
+ *
+ * Copyright (C) 1995--2003 Mark Adler
+ *
+ * Taken from zlib-1.2.1 and adjusted by Robert Spalek. For conditions of
+ * distribution and use, see copyright notice in zlib.h.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/lizard.h"
+
+#define BASE 65521UL /* largest prime smaller than 65536 */
+#define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+#define MOD(a) a %= BASE
+
+uns
+adler32_update(uns adler, const byte *buf, uns len)
+{
+ uns s1 = adler & 0xffff;
+ uns s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (!buf) return 1L;
+
+ while (len > 0) {
+ k = len < NMAX ? (int)len : NMAX;
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0) do {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ MOD(s1);
+ MOD(s2);
+ }
+ return (s2 << 16) | s1;
+}
--- /dev/null
+/*
+ * UCW Library -- Memory Allocation
+ *
+ * (c) 2000 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+void *
+xmalloc(uns size)
+{
+ void *x = malloc(size);
+ if (!x)
+ die("Cannot allocate %d bytes of memory", size);
+ return x;
+}
+
+void *
+xmalloc_zero(uns size)
+{
+ void *x = xmalloc(size);
+ bzero(x, size);
+ return x;
+}
+
+void
+xfree(void *ptr)
+{
+ /*
+ * Maybe it is a little waste of resources to make this a function instead
+ * of a macro, but xmalloc() is not used for anything critical anyway,
+ * so let's prefer simplicity.
+ */
+ free(ptr);
+}
--- /dev/null
+/*
+ * UCW Library -- String Allocation
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <string.h>
+
+char *
+xstrdup(const char *s)
+{
+ uns l = strlen(s) + 1;
+ return memcpy(xmalloc(l), s, l);
+}
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/asio.h"
+#include "ucw/threads.h"
+
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+static uns asio_num_users;
+static struct worker_pool asio_wpool;
+
+static void
+asio_init_unlocked(void)
+{
+ if (asio_num_users++)
+ return;
+
+ DBG("ASIO: INIT");
+ asio_wpool.num_threads = 1;
+ worker_pool_init(&asio_wpool);
+}
+
+static void
+asio_cleanup_unlocked(void)
+{
+ if (--asio_num_users)
+ return;
+
+ DBG("ASIO: CLEANUP");
+ worker_pool_cleanup(&asio_wpool);
+}
+
+void
+asio_init_queue(struct asio_queue *q)
+{
+ ucwlib_lock();
+ asio_init_unlocked();
+ ucwlib_unlock();
+
+ DBG("ASIO: New queue %p", q);
+ ASSERT(q->buffer_size);
+ q->allocated_requests = 0;
+ q->running_requests = 0;
+ q->running_writebacks = 0;
+ q->use_count = 0;
+ clist_init(&q->idle_list);
+ clist_init(&q->done_list);
+ work_queue_init(&asio_wpool, &q->queue);
+}
+
+void
+asio_cleanup_queue(struct asio_queue *q)
+{
+ DBG("ASIO: Removing queue %p", q);
+ ASSERT(!q->running_requests);
+ ASSERT(!q->running_writebacks);
+ ASSERT(!q->allocated_requests);
+ ASSERT(clist_empty(&q->done_list));
+
+ struct asio_request *r;
+ while (r = clist_remove_head(&q->idle_list))
+ {
+ big_free(r->buffer, q->buffer_size);
+ xfree(r);
+ }
+
+ work_queue_cleanup(&q->queue);
+
+ ucwlib_lock();
+ asio_cleanup_unlocked();
+ ucwlib_unlock();
+}
+
+struct asio_request *
+asio_get(struct asio_queue *q)
+{
+ q->allocated_requests++;
+ struct asio_request *r = clist_head(&q->idle_list);
+ if (!r)
+ {
+ r = xmalloc_zero(sizeof(*r));
+ r->queue = q;
+ r->buffer = big_alloc(q->buffer_size);
+ DBG("ASIO: Got %p (new)", r);
+ }
+ else
+ {
+ clist_remove(&r->work.n);
+ DBG("ASIO: Got %p", r);
+ }
+ r->op = ASIO_FREE;
+ r->fd = -1;
+ r->len = 0;
+ r->status = -1;
+ r->returned_errno = -1;
+ r->submitted = 0;
+ return r;
+}
+
+static int
+asio_raw_wait(struct asio_queue *q)
+{
+ struct asio_request *r = (struct asio_request *) work_wait(&q->queue);
+ if (!r)
+ return 0;
+ r->submitted = 0;
+ q->running_requests--;
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ DBG("ASIO: Finished writeback %p", r);
+ if (r->status < 0)
+ die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno));
+ if (r->status != (int)r->len)
+ die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len);
+ q->running_writebacks--;
+ asio_put(r);
+ }
+ else
+ clist_add_tail(&q->done_list, &r->work.n);
+ return 1;
+}
+
+static void
+asio_handler(struct worker_thread *t UNUSED, struct work *w)
+{
+ struct asio_request *r = (struct asio_request *) w;
+
+ DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r,
+ (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len);
+ errno = 0;
+ switch (r->op)
+ {
+ case ASIO_READ:
+ r->status = read(r->fd, r->buffer, r->len);
+ break;
+ case ASIO_WRITE:
+ case ASIO_WRITE_BACK:
+ r->status = write(r->fd, r->buffer, r->len);
+ break;
+ default:
+ die("ASIO: Got unknown request type %d", r->op);
+ }
+ r->returned_errno = errno;
+ DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno);
+}
+
+void
+asio_submit(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Submitting %p on queue %p", r, q);
+ ASSERT(r->op != ASIO_FREE);
+ ASSERT(!r->submitted);
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ while (q->running_writebacks >= q->max_writebacks)
+ {
+ DBG("ASIO: Waiting for free writebacks");
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+ }
+ q->running_writebacks++;
+ }
+ q->running_requests++;
+ r->submitted = 1;
+ r->work.go = asio_handler;
+ r->work.priority = 0;
+ work_submit(&q->queue, &r->work);
+}
+
+struct asio_request *
+asio_wait(struct asio_queue *q)
+{
+ struct asio_request *r;
+ while (!(r = clist_head(&q->done_list)))
+ {
+ DBG("ASIO: Waiting on queue %p", q);
+ if (!asio_raw_wait(q))
+ return NULL;
+ }
+ clist_remove(&r->work.n);
+ DBG("ASIO: Done %p", r);
+ return r;
+}
+
+void
+asio_put(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Put %p", r);
+ ASSERT(!r->submitted);
+ ASSERT(q->allocated_requests);
+ clist_add_tail(&q->idle_list, &r->work.n);
+ q->allocated_requests--;
+}
+
+void
+asio_sync(struct asio_queue *q)
+{
+ DBG("ASIO: Syncing queue %p", q);
+ while (q->running_requests)
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct asio_queue q;
+ struct asio_request *r;
+
+ q.buffer_size = 4096;
+ q.max_writebacks = 2;
+ asio_init_queue(&q);
+
+#if 0
+
+ for (;;)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = q.buffer_size;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ if (r->status <= 0)
+ {
+ asio_put(r);
+ break;
+ }
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = r->status;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+#else
+
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = 1;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+ for (uns i=0; i<10; i++)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = 'A' + i;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+ r = asio_get(&q);
+ r->op = ASIO_WRITE;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = '\n';
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+#endif
+
+ asio_cleanup_queue(&q);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ASIO_H
+#define _UCW_ASIO_H
+
+#include "ucw/workqueue.h"
+#include "ucw/clists.h"
+
+/*
+ * This module takes care of scheduling and executing asynchronous I/O requests
+ * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf
+ * back-end, but you can use it explicitly, too.
+ *
+ * You can define several I/O queues, each for use by a single thread. Requests
+ * on a single queue are always processed in order of their submits, requests
+ * from different queues may be interleaved (although the current implementation
+ * does not do so). Normal read and write requests are returned to their queue
+ * when they are completed. Write-back requests are automatically freed when
+ * done, but the number of such requests in fly is limited in order to avoid
+ * consuming all memory, so a submit of a write-back request can block.
+ */
+
+struct asio_queue {
+ uns buffer_size; // How large buffers do we use [user-settable]
+ uns max_writebacks; // Maximum number of writeback requests active [user-settable]
+ uns allocated_requests;
+ uns running_requests; // Total number of running requests
+ uns running_writebacks; // How many of them are writebacks
+ clist idle_list; // Recycled requests waiting for get
+ clist done_list; // Finished requests
+ struct work_queue queue;
+ uns use_count; // For use by the caller
+};
+
+enum asio_op {
+ ASIO_FREE,
+ ASIO_READ,
+ ASIO_WRITE,
+ ASIO_WRITE_BACK, // Background write with no success notification
+};
+
+struct asio_request {
+ struct work work; // asio_requests are internally just work nodes
+ struct asio_queue *queue;
+ byte *buffer;
+ int fd;
+ enum asio_op op;
+ uns len;
+ int status;
+ int returned_errno;
+ int submitted;
+ void *user_data; // For use by the caller
+};
+
+void asio_init_queue(struct asio_queue *q); // Initialize a new queue
+void asio_cleanup_queue(struct asio_queue *q);
+struct asio_request *asio_get(struct asio_queue *q); // Get an empty request
+void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks)
+struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more
+void asio_put(struct asio_request *r); // Return a finished request for recycling
+void asio_sync(struct asio_queue *q); // Wait until all requests are finished
+
+#endif /* !_UCW_ASIO_H */
--- /dev/null
+# Tests for asynchronous I/O
+
+Run: echo y | ../obj/ucw/asio-t
+Out: ABCDEFGHIJ
--- /dev/null
+/*
+ * UCW Library -- Universal Array Sorter Test and Benchmark
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define N 4000037 /* a prime */
+
+struct elt {
+ u32 key;
+ u32 x, y;
+};
+
+static struct elt array[N];
+
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) array[i].key
+#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0)
+
+static void generate(void)
+{
+ uns i;
+ for (i=0; i<N; i++)
+#if 0
+ ASORT_ELT(i) = N-i-1;
+#elif 0
+ ASORT_ELT(i) = i;
+#else
+ ASORT_ELT(i) = (i ? ASORT_ELT(i-1)+1944833754 : 3141592) % N;
+#endif
+}
+
+static int errors = 0;
+
+static void check(void)
+{
+ uns i;
+ for (i=0; i<N; i++)
+ if (ASORT_ELT(i) != i)
+ {
+ printf("error at pos %d: %08x != %08x\n", i, ASORT_ELT(i), i);
+ errors = 1;
+ }
+}
+
+static int qs_comp(const struct elt *X, const struct elt *Y)
+{
+ if (X->key < Y->key)
+ return -1;
+ else if (X->key > Y->key)
+ return 1;
+ else
+ return 0;
+}
+
+#define ASORT_PREFIX(x) as_##x
+#include "ucw/sorter/array-simple.h"
+
+int main(void)
+{
+ timestamp_t timer;
+
+ generate();
+ init_timer(&timer);
+ qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp);
+ printf("qsort: %d ms\n", get_timer(&timer));
+ check();
+ generate();
+ init_timer(&timer);
+ as_sort(N);
+ printf("asort: %d ms\n", get_timer(&timer));
+ check();
+ return errors;
+}
--- /dev/null
+# Test for the arraysort module
+
+Run: ../obj/ucw/asort-test
--- /dev/null
+/*
+ * UCW Library -- Base 224 Encoding & Decoding
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * The `base-224' encoding transforms general sequences of bytes
+ * to sequences of non-control 8-bit characters (0x20-0xff). Since
+ * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l)
+ * and we want to avoid lengthy calculations, we cheat a bit:
+ *
+ * Each base-224 digit can be represented as a (base-7 digit, base-32 digit)
+ * pair, so we pass the lower 5 bits directly and use a base-7 encoder
+ * for the upper part. We process blocks of 39 bits and encode them
+ * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert
+ * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get
+ * the 7 upper parts we need (with a little redundancy). Little endian
+ * ordering is used to make handling of partial blocks easy.
+ *
+ * We transform 39 source bits to 40 destination bits, stretching the data
+ * by 1/39 = approx. 2.56%.
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/base224.h"
+
+static void
+encode_block(byte *w, u32 hi, u32 lo)
+{
+ uns x, y;
+
+ /*
+ * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7.
+ * +----------------+----------------+----------------+----------------+----------------+
+ * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0|
+ * +----------------+----------------+----------------+----------------+----------------+
+ */
+
+ w[0] = lo & 0x1f;
+ w[1] = (lo >> 7) & 0x1f;
+ w[2] = (lo >> 15) & 0x1f;
+ w[3] = (lo >> 23) & 0x1f;
+ w[4] = (lo >> 31) | ((hi << 1) & 0x1e);
+ x = (lo >> 5) & 0x0003
+ | (lo >> 10) & 0x001c
+ | (lo >> 15) & 0x00e0
+ | (lo >> 20) & 0x0700
+ | (hi << 7) & 0x3800;
+ DBG("<<< h=%08x l=%08x x=%d", hi, lo, x);
+ for (y=0; y<5; y++)
+ {
+ w[y] += 0x20 + ((x % 7) << 5);
+ x /= 7;
+ }
+}
+
+uns
+base224_encode(byte *dest, const byte *src, uns len)
+{
+ u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */
+ uns i=0; /* How many source bits do we have buffered */
+ u32 x;
+ byte *w=dest;
+
+ while (len--)
+ {
+ x = *src++;
+ if (i < 32)
+ {
+ lo |= x << i;
+ if (i > 24)
+ hi |= x >> (32-i);
+ }
+ else
+ hi |= x << (i-32);
+ i += 8;
+ if (i >= 39)
+ {
+ encode_block(w, hi, lo);
+ w += 5;
+ lo = hi >> 7;
+ hi = 0;
+ i -= 39;
+ }
+ }
+ if (i) /* Partial block */
+ {
+ encode_block(w, hi, lo);
+ w += (i+8)/8; /* Just check logarithms if you want to understand */
+ }
+ return w - dest;
+}
+
+uns
+base224_decode(byte *dest, const byte *src, uns len)
+{
+ u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */
+ uns i=0; /* How many bits do we have accumulated */
+ u32 h, l; /* Decoding of the current block */
+ uns x; /* base-7 part of the current block */
+ uns len0;
+ byte *start = dest;
+
+ do
+ {
+ if (!len)
+ break;
+ len0 = len;
+
+ ASSERT(*src >= 0x20); /* byte 0 */
+ h = 0;
+ l = *src & 0x1f;
+ x = (*src++ >> 5) - 1;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 1 */
+ l |= (*src & 0x1f) << 7;
+ x += ((*src++ >> 5) - 1) * 7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 2 */
+ l |= (*src & 0x1f) << 15;
+ x += ((*src++ >> 5) - 1) * 7*7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 3 */
+ l |= (*src & 0x1f) << 23;
+ x += ((*src++ >> 5) - 1) * 7*7*7;
+ if (!--len)
+ goto blockend;
+
+ ASSERT(*src >= 0x20); /* byte 4 */
+ l |= *src << 31;
+ h = (*src & 0x1f) >> 1;
+ x += ((*src++ >> 5) - 1) * 7*7*7*7;
+ --len;
+
+ blockend:
+ len0 -= len;
+ l |= ((x & 0x0003) << 5) /* Decode base-7 */
+ | ((x & 0x001c) << 10)
+ | ((x & 0x00e0) << 15)
+ | ((x & 0x0700) << 20);
+ h |= (x & 0x3800) >> 7;
+
+ DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0);
+ lo |= l << i;
+ hi |= h << i;
+ if (i)
+ hi |= l >> (32-i);
+ i += len0*8 - 1;
+
+ while (i >= 8)
+ {
+ *dest++ = lo;
+ lo = (lo >> 8U) | (hi << 24);
+ hi >>= 8;
+ i -= 8;
+ }
+ }
+ while (len0 == 5);
+ return dest-start;
+}
--- /dev/null
+/*
+ * UCW Library -- Base 224 Encoding & Decoding
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/**
+ * Encodes @len bytes of data pointed to by @src by base224 encoding.
+ * Stores them in @dest and returns the number of bytes the output
+ * takes.
+ */
+uns base224_encode(byte *dest, const byte *src, uns len);
+/**
+ * Decodes @len bytes of data pointed to by @src from base224 encoding.
+ * All invalid characters are ignored. The result is stored into @dest
+ * and length of the result is returned.
+ */
+uns base224_decode(byte *dest, const byte *src, uns len);
+
+/**
+ * Use this macro to calculate @base224_encode() output buffer size.
+ * It can happen 4 more bytes would be needed, this macro takes care
+ * of that.
+ */
+#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5)
+
+/*
+ * When called for BASE224_IN_CHUNK-byte chunks, the result will be
+ * always BASE224_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE224_IN_CHUNK 39 /** Chunk size on the un-encoded side. **/
+#define BASE224_OUT_CHUNK 40 /** Chunk size on the encoded side. **/
--- /dev/null
+/*
+ * UCW Library -- Base 64 Encoding & Decoding
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/base64.h"
+
+#include <string.h>
+
+static const byte base64_table[] =
+ { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
+ };
+static const byte base64_pad = '=';
+
+uns
+base64_encode(byte *dest, const byte *src, uns len)
+{
+ const byte *current = src;
+ uns i = 0;
+
+ while (len > 2) { /* keep going until we have less than 24 bits */
+ dest[i++] = base64_table[current[0] >> 2];
+ dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+ dest[i++] = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
+ dest[i++] = base64_table[current[2] & 0x3f];
+
+ current += 3;
+ len -= 3; /* we just handle 3 octets of data */
+ }
+
+ /* now deal with the tail end of things */
+ if (len != 0) {
+ dest[i++] = base64_table[current[0] >> 2];
+ if (len > 1) {
+ dest[i++] = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
+ dest[i++] = base64_table[(current[1] & 0x0f) << 2];
+ dest[i++] = base64_pad;
+ }
+ else {
+ dest[i++] = base64_table[(current[0] & 0x03) << 4];
+ dest[i++] = base64_pad;
+ dest[i++] = base64_pad;
+ }
+ }
+ return i;
+}
+
+/* as above, but backwards. :) */
+uns
+base64_decode(byte *dest, const byte *src, uns len)
+{
+ const byte *current = src;
+ uns ch;
+ uns i = 0, j = 0;
+ static byte reverse_table[256];
+ static uns table_built = 0;
+
+ if (table_built == 0) {
+ byte *chp;
+ table_built = 1;
+ for(ch = 0; ch < 256; ch++) {
+ chp = strchr(base64_table, ch);
+ if(chp) {
+ reverse_table[ch] = chp - base64_table;
+ } else {
+ reverse_table[ch] = 0xff;
+ }
+ }
+ }
+
+ /* run through the whole string, converting as we go */
+ ch = 0;
+ while (len > 0) {
+ len--;
+ ch = *current++;
+ if (ch == base64_pad) break;
+
+ /* When Base64 gets POSTed, all pluses are interpreted as spaces.
+ This line changes them back. It's not exactly the Base64 spec,
+ but it is completely compatible with it (the spec says that
+ spaces are invalid). This will also save many people considerable
+ headache. - Turadg Aleahmad <turadg@wise.berkeley.edu>
+ */
+
+ if (ch == ' ') ch = '+';
+
+ ch = reverse_table[ch];
+ if (ch == 0xff) continue;
+
+ switch(i % 4) {
+ case 0:
+ dest[j] = ch << 2;
+ break;
+ case 1:
+ dest[j++] |= ch >> 4;
+ dest[j] = (ch & 0x0f) << 4;
+ break;
+ case 2:
+ dest[j++] |= ch >>2;
+ dest[j] = (ch & 0x03) << 6;
+ break;
+ case 3:
+ dest[j++] |= ch;
+ break;
+ }
+ i++;
+ }
+ return j;
+}
--- /dev/null
+/*
+ * UCW Library -- Base 64 Encoding & Decoding
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/**
+ * Encodes @len bytes of data pointed to by @src by base64 encoding.
+ * Stores them in @dest and returns the number of bytes the output
+ * takes.
+ */
+uns base64_encode(byte *dest, const byte *src, uns len);
+/**
+ * Decodes @len bytes of data pointed to by @src from base64 encoding.
+ * All invalid characters are ignored. The result is stored into @dest
+ * and length of the result is returned.
+ */
+uns base64_decode(byte *dest, const byte *src, uns len);
+
+/**
+ * Use this macro to calculate @base64_encode() output buffer size.
+ */
+#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4)
+
+/*
+ * When called for BASE64_IN_CHUNK-byte chunks, the result will be
+ * always BASE64_OUT_CHUNK bytes long. If a longer block is split
+ * to such chunks, the result will be identical.
+ */
+#define BASE64_IN_CHUNK 3 /** Size of chunk on the un-encoded side. **/
+#define BASE64_OUT_CHUNK 4 /** Size of chunk on the encoded side. **/
+
--- /dev/null
+# Tests for base64 and base224 modules
+
+Name: Base64 encode
+Run: ../obj/ucw/basecode -e
+Input: Here are some test data
+Output: SGVyZSBhcmUgc29tZSB0ZXN0IGRhdGEK
+
+Name: Base64 decode
+Run: ../obj/ucw/basecode -d
+Input: SGVyZSBhcmUgc29tZSB0ZXN0IGRhdGEK
+Output: Here are some test data
+
+Name: Base224 encode & decode
+Run: ../obj/ucw/basecode -E | ../obj/ucw/basecode -D
+Input: Some more test data for 224 encoding
+Output: Some more test data for 224 encoding
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffers for byte-sized items
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/bbuf.h"
+
+#include <stdio.h>
+
+char *
+bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args)
+{
+ bb_grow(bb, ofs + 1);
+ va_list args2;
+ va_copy(args2, args);
+ int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ if (cnt < 0)
+ {
+ /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+ do
+ {
+ bb_do_grow(bb, bb->len + 1);
+ va_copy(args2, args);
+ cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ }
+ while (cnt < 0);
+ }
+ else if ((uns)cnt >= bb->len - ofs)
+ {
+ bb_do_grow(bb, ofs + cnt + 1);
+ va_copy(args2, args);
+ int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2);
+ va_end(args2);
+ ASSERT(cnt2 == cnt);
+ }
+ return bb->ptr + ofs;
+}
+
+char *
+bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = bb_vprintf_at(bb, ofs, fmt, args);
+ va_end(args);
+ return res;
+}
+
+char *
+bb_vprintf(bb_t *bb, const char *fmt, va_list args)
+{
+ return bb_vprintf_at(bb, 0, fmt, args);
+}
+
+char *
+bb_printf(bb_t *bb, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = bb_vprintf_at(bb, 0, fmt, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ bb_t bb;
+ bb_init(&bb);
+ char *x = bb_printf(&bb, "<Hello, %s!>", "World");
+ fputs(x, stdout);
+ x = bb_printf_at(&bb, 5, "<Hello, %50s!>\n", "World");
+ fputs(x, stdout);
+ bb_done(&bb);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffer for byte-sized items.
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BBUF_H
+#define _UCW_BBUF_H
+
+#define GBUF_TYPE byte
+#define GBUF_PREFIX(x) bb_##x
+#include "ucw/gbuf.h"
+
+/**
+ * printf() into a growing buffer with `va_list` arguments.
+ * Generates a `'\0'`-terminated string at the beginning of the buffer
+ * and returns pointer to it.
+ *
+ * See @bb_printf().
+ **/
+char *bb_vprintf(bb_t *bb, const char *fmt, va_list args);
+/**
+ * printf() into a growing buffer.
+ * Generates a `'\0'`-terminated string at the beginning of the buffer
+ * and returns pointer to it.
+ *
+ * See @bb_vprintf().
+ **/
+char *bb_printf(bb_t *bb, const char *fmt, ...);
+/**
+ * Like @bb_vprintf(), but it does not start at the beginning of the
+ * buffer, but @ofs bytes further.
+ *
+ * Returns pointer to the new string (eg. @ofs bytes after the
+ * beginning of buffer).
+ **/
+char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args);
+/**
+ * Like @bb_vprintf_at(), but it takes individual arguments.
+ **/
+char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...);
+
+#endif
--- /dev/null
+# Tests for growing buffers
+
+Run: ../obj/ucw/bbuf-t
+Out: <Hello, World!><Hello, World!>
--- /dev/null
+/*
+ * UCW Library -- Allocation of Large Aligned Buffers
+ *
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <char@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <sys/mman.h>
+#include <string.h>
+#include <limits.h>
+
+void *
+page_alloc(u64 len)
+{
+ if (!len)
+ return NULL;
+ if (len > SIZE_MAX)
+ die("page_alloc: Size %llu is too large for the current architecture", (long long) len);
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (p == (byte*) MAP_FAILED)
+ die("Cannot mmap %llu bytes of memory: %m", (long long)len);
+ return p;
+}
+
+void *
+page_alloc_zero(u64 len)
+{
+ void *p = page_alloc(len);
+ bzero(p, len);
+ return p;
+}
+
+void
+page_free(void *start, u64 len)
+{
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
+ munmap(start, len);
+}
+
+void *
+page_realloc(void *start, u64 old_len, u64 new_len)
+{
+ void *p = page_alloc(new_len);
+ memcpy(p, start, MIN(old_len, new_len));
+ page_free(start, old_len);
+ return p;
+}
+
+static u64
+big_round(u64 len)
+{
+ return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
+}
+
+void *
+big_alloc(u64 len)
+{
+ u64 l = big_round(len);
+ if (l > SIZE_MAX - 2*CPU_PAGE_SIZE)
+ die("big_alloc: Size %llu is too large for the current architecture", (long long) len);
+#ifdef CONFIG_DEBUG
+ l += 2*CPU_PAGE_SIZE;
+#endif
+ byte *p = page_alloc(l);
+#ifdef CONFIG_DEBUG
+ *(u64*)p = len;
+ mprotect(p, CPU_PAGE_SIZE, PROT_NONE);
+ mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
+ p += CPU_PAGE_SIZE;
+#endif
+ return p;
+}
+
+void *
+big_alloc_zero(u64 len)
+{
+ void *p = big_alloc(len);
+ bzero(p, big_round(len));
+ return p;
+}
+
+void
+big_free(void *start, u64 len)
+{
+ byte *p = start;
+ u64 l = big_round(len);
+#ifdef CONFIG_DEBUG
+ p -= CPU_PAGE_SIZE;
+ mprotect(p, CPU_PAGE_SIZE, PROT_READ);
+ ASSERT(*(u64*)p == len);
+ l += 2*CPU_PAGE_SIZE;
+#endif
+ page_free(p, l);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ byte *p = big_alloc(123456);
+ // p[-1] = 1;
+ big_free(p, 123456);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps: Declarations
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BINHEAP_NODE_H
+#define _UCW_BINHEAP_NODE_H
+
+/***
+ * [[common]]
+ * Common definitions
+ * ------------------
+ ***/
+
+/**
+ * Common header of binomial heap nodes.
+ **/
+struct bh_node {
+ struct bh_node *first_son;
+ struct bh_node *last_son;
+ struct bh_node *next_sibling;
+ byte order;
+};
+
+/**
+ * A binomial heap.
+ **/
+struct bh_heap {
+ struct bh_node root;
+};
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps: Testing
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define BH_PREFIX(x) bht_##x
+#define BH_WANT_INSERT
+#define BH_WANT_FINDMIN
+#define BH_WANT_DELETEMIN
+#include "ucw/binheap-node.h"
+
+struct item {
+ struct bh_node n;
+ uns key;
+};
+
+static inline uns bht_key(struct bh_node *n)
+{
+ return ((struct item *)n)->key;
+}
+
+static inline uns bht_less(struct bh_node *a, struct bh_node *b)
+{
+ return bht_key(a) < bht_key(b);
+}
+
+static void
+bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uns offset)
+{
+ if (!a)
+ return;
+ printf("%*s", offset, "");
+ printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : "");
+ for (struct bh_node *b=a->first_son; b; b=b->next_sibling)
+ bht_do_dump(b, a->last_son, offset+1);
+}
+
+static void
+bht_dump(struct bh_heap *h)
+{
+ printf("root\n");
+ for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling)
+ bht_do_dump(b, b->last_son, 1);
+}
+
+#include "ucw/binheap.h"
+
+int main(void)
+{
+ uns i;
+ struct bh_heap h;
+#define N 1048576
+#define K(i) ((259309*i+1009)%N)
+
+ bht_init(&h);
+
+ for (i=0; i<N; i++)
+ {
+ struct item *a = xmalloc_zero(sizeof(*a));
+ a->key = K(i);
+ // printf("Insert %d\n", a->key);
+ bht_insert(&h, &a->n);
+ // bht_dump(&h);
+ }
+ // bht_dump(&h);
+ ASSERT(bht_key(bht_findmin(&h)) == 0);
+ uns cnt = 0;
+ BH_FOR_ALL(bht_, &h, a)
+ {
+ cnt++;
+ }
+ BH_END_FOR;
+ printf("cnt=%d\n", cnt);
+ ASSERT(cnt == N);
+ for (i=0; i<N; i++)
+ {
+ struct item *a = (struct item *) bht_deletemin(&h);
+ // printf("\nDeleted %d:\n", a->key);
+ ASSERT(a->key == i);
+ // bht_dump(&h);
+ }
+ bht_dump(&h);
+
+ return 0;
+}
--- /dev/null
+# Test for the binheap module
+
+Run: ../obj/ucw/binheap-test
+Out: cnt=1048576
+ root
--- /dev/null
+/*
+ * UCW Library -- Binomial Heaps
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a generic implementation of Binomial Heaps. Each time you include
+ * this file with parameters set in the corresponding preprocessor macros
+ * as described below, it generates functions for manipulating the particular
+ * version of the binomial heap.
+ */
+
+/***
+ * [[generator]]
+ * Interface to the generator
+ * --------------------------
+ *
+ * To use the binomial heaps, you need to specify:
+ *
+ * - `BH_PREFIX(x)` -- macro to add a name prefix (used on all global names
+ * defined by the generator). All further names mentioned
+ * here except for macro names will be implicitly prefixed.
+ *
+ * Then you continue by including `ucw/binheap-node.h` which defines <<struct_bh_node,struct bh_node>>
+ * and <<struct_bh_heap,struct bh_heap>> (both without prefix). The heap elements are always allocated by
+ * you and they must include `struct bh_node` which serves as a handle used for all
+ * the heap functions and it contains all information needed for heap-keeping.
+ * The heap itself is also allocated by you and it's represented by `struct bh_heap`.
+ *
+ * When you have the declaration of heap nodes, you continue with defining:
+ *
+ * - `less(p,q)` -- returns `1` if the key corresponding to `bh_node *p`
+ * is less than the one corresponding to `*q`.
+ *
+ * Then specify what operations you request:
+ *
+ * - `init(heap\*)` -- initialize the heap (always defined).
+ * - `insert(heap\*, node\*)` -- insert the node to the heap (`BH_WANT_INSERT`).
+ * - `node\* findmin(heap\*)` -- find node with minimum key (`BH_WANT_FINDMIN`).
+ * - `node\* deletemin(heap\*)` -- findmin and delete the node (`BH_WANT_DELETEMIN`).
+ *
+ * Then include `ucw/binheap.h` and voila, you have a binomial heap
+ * suiting all your needs (at least those which you've revealed :) ).
+ *
+ * You also get a iterator macro at no extra charge:
+ *
+ * BH_FOR_ALL(bh_prefix, heap*, variable)
+ * {
+ * // node* variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use BH_BREAK and BH_CONTINUE instead of break and continue
+ * // you must not alter contents of the binomial heap here
+ * }
+ * BH_END_FOR;
+ *
+ * After including this file, all parameter macros are automatically undef'd.
+ ***/
+
+#define BH_NODE struct bh_node
+#define BH_HEAP struct bh_heap
+
+static void
+BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b)
+{
+ BH_NODE **pp = &a->first_son;
+ BH_NODE *q = b->first_son;
+ BH_NODE *p, *r, *s;
+
+ while ((p = *pp) && q)
+ {
+ /* p,q are the next nodes of a,b; pp points to where p is linked */
+ if (p->order < q->order) /* p is smaller => skip it */
+ pp = &p->next_sibling;
+ else if (p->order > q->order) /* q is smaller => insert it before p */
+ {
+ r = q;
+ q = q->next_sibling;
+ r->next_sibling = p;
+ *pp = r;
+ pp = &r->next_sibling;
+ }
+ else /* p and q are of the same order => need to merge them */
+ {
+ if (BH_PREFIX(less)(p, q)) /* we'll hang r below s */
+ {
+ r = q;
+ s = p;
+ }
+ else
+ {
+ r = p;
+ s = q;
+ }
+ *pp = p->next_sibling; /* unlink p,q from their lists */
+ q = q->next_sibling;
+
+ if (s->last_son) /* merge r to s, increasing order */
+ s->last_son->next_sibling = r;
+ else
+ s->first_son = r;
+ s->last_son = r;
+ s->order++;
+ r->next_sibling = NULL;
+
+ if (!q || q->order > s->order) /* put the result into the b's list if possible */
+ {
+ s->next_sibling = q;
+ q = s;
+ }
+ else /* otherwise put the result to the a's list */
+ {
+ p = s->next_sibling = *pp;
+ *pp = s;
+ if (p && p->order == s->order) /* 3-collision */
+ pp = &s->next_sibling;
+ }
+ }
+ }
+ if (!p)
+ *pp = q;
+}
+
+#ifdef BH_WANT_INSERT
+static void
+BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a)
+{
+ BH_NODE sh;
+
+ sh.first_son = a;
+ a->first_son = a->last_son = a->next_sibling = NULL;
+ BH_PREFIX(merge)(&heap->root, &sh);
+}
+#endif
+
+#ifdef BH_WANT_FINDMIN
+static BH_NODE *
+BH_PREFIX(findmin)(BH_HEAP *heap)
+{
+ BH_NODE *p, *best;
+
+ best = NULL;
+ for (p=heap->root.first_son; p; p=p->next_sibling)
+ if (!best || BH_PREFIX(less)(p, best))
+ best = p;
+ return best;
+}
+#endif
+
+#ifdef BH_WANT_DELETEMIN
+static BH_NODE *
+BH_PREFIX(deletemin)(BH_HEAP *heap)
+{
+ BH_NODE *p, **pp, **bestp;
+
+ bestp = NULL;
+ for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling)
+ if (!bestp || BH_PREFIX(less)(p, *bestp))
+ bestp = pp;
+ if (!bestp)
+ return NULL;
+
+ p = *bestp;
+ *bestp = p->next_sibling;
+ BH_PREFIX(merge)(&heap->root, p);
+ return p;
+}
+#endif
+
+static inline void
+BH_PREFIX(init)(BH_HEAP *heap)
+{
+ bzero(heap, sizeof(*heap));
+}
+
+#ifndef BH_FOR_ALL
+
+#define BH_FOR_ALL(bh_px, bh_heap, bh_var) \
+do { \
+ struct bh_node *bh_stack[32]; \
+ uns bh_sp = 0; \
+ if (bh_stack[0] = (bh_heap)->root.first_son) \
+ bh_sp++; \
+ while (bh_sp) { \
+ struct bh_node *bh_var = bh_stack[--bh_sp]; \
+ if (bh_var->next_sibling) \
+ bh_stack[bh_sp++] = bh_var->next_sibling; \
+ if (bh_var->first_son) \
+ bh_stack[bh_sp++] = bh_var->first_son;
+#define BH_END_FOR \
+ } \
+} while (0)
+
+#define BH_BREAK { bh_sp=0; break; }
+#define BH_CONTINUE continue
+
+#endif
+
+#undef BH_PREFIX
+#undef BH_NODE
+#undef BH_HEAP
+#undef BH_WANT_INSERT
+#undef BH_WANT_FINDMIN
+#undef BH_WANT_DELETEMIN
--- /dev/null
+/*
+ * UCW Library -- Generic Binary Search
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/***
+ * [[defs]]
+ * Definitions
+ * -----------
+ ***/
+
+/**
+ * Find the first element not lower than @x in the sorted array @ary of @N elements (non-decreasing order).
+ * Returns the index of the found element or @N if no exists. Uses `ary_lt_x(ary,i,x)` to compare the @i'th element with @x.
+ * The time complexity is `O(log(N))`.
+ **/
+#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \
+ uns l = 0, r = (N); \
+ while (l < r) \
+ { \
+ uns m = (l+r)/2; \
+ if (ary_lt_x(ary,m,x)) \
+ l = m+1; \
+ else \
+ r = m; \
+ } \
+ l; \
+})
+
+/**
+ * The default comparision macro for @BIN_SEARCH_FIRST_GE_CMP().
+ **/
+#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x)
+
+/**
+ * Same as @BIN_SEARCH_FIRST_GE_CMP(), but uses the default `<` operator for comparisions.
+ **/
+#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM)
+
+/**
+ * Search the sorted array @ary of @N elements (non-decreasing) for the first occurence of @x.
+ * Returns the index or -1 if no such element exists. Uses the `<` operator for comparisions.
+ **/
+#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; })
--- /dev/null
+/*
+ * UCW Library -- Find Lowest Set Bit
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/bitops.h"
+
+/* Just a table, the rest is in bitops.h */
+
+const byte ffs_table[] = {
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ uns i;
+ while (scanf("%x", &i) == 1)
+ printf("%d\n", bit_ffs(i));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Find Highest Set Bit
+ *
+ * (c) 1997-2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/bitops.h"
+
+int
+bit_fls(u32 x)
+{
+ uns l;
+
+ if (!x)
+ return -1;
+
+ l = 0;
+ if (x & 0xffff0000) { l += 16; x &= 0xffff0000; }
+ if (x & 0xff00ff00) { l += 8; x &= 0xff00ff00; }
+ if (x & 0xf0f0f0f0) { l += 4; x &= 0xf0f0f0f0; }
+ if (x & 0xcccccccc) { l += 2; x &= 0xcccccccc; }
+ if (x & 0xaaaaaaaa) l++;
+ return l;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ uns i;
+ while (scanf("%x", &i) == 1)
+ printf("%d\n", bit_fls(i));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Array Operations
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITARRAY_H
+#define _UCW_BITARRAY_H
+
+#include <string.h>
+
+typedef u32 *bitarray_t;
+#define BIT_ARRAY_WORDS(n) (((n)+31)/32)
+#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n))
+#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)]
+
+static inline bitarray_t
+bit_array_xmalloc(uns n)
+{
+ return xmalloc(BIT_ARRAY_BYTES(n));
+}
+
+static inline bitarray_t
+bit_array_xmalloc_zero(uns n)
+{
+ return xmalloc_zero(BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_zero(bitarray_t a, uns n)
+{
+ bzero(a, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set_all(bitarray_t a, uns n)
+{
+ memset(a, 255, BIT_ARRAY_BYTES(n));
+}
+
+static inline void
+bit_array_set(bitarray_t a, uns i)
+{
+ a[i/32] |= (1 << (i%32));
+}
+
+static inline void
+bit_array_clear(bitarray_t a, uns i)
+{
+ a[i/32] &= ~(1 << (i%32));
+}
+
+static inline void
+bit_array_assign(bitarray_t a, uns i, uns x)
+{
+ if (x)
+ bit_array_set(a, i);
+ else
+ bit_array_clear(a, i);
+}
+
+static inline uns
+bit_array_isset(bitarray_t a, uns i)
+{
+ return a[i/32] & (1 << (i%32));
+}
+
+static inline uns
+bit_array_get(bitarray_t a, uns i)
+{
+ return !! bit_array_isset(a, i);
+}
+
+static inline uns
+bit_array_test_and_set(bitarray_t a, uns i)
+{
+ uns t = bit_array_isset(a, i);
+ bit_array_set(a, i);
+ return t;
+}
+
+static inline uns
+bit_array_test_and_clear(bitarray_t a, uns i)
+{
+ uns t = bit_array_isset(a, i);
+ bit_array_clear(a, i);
+ return t;
+}
+
+/* Iterate over all set bits, possibly destructively */
+#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size) \
+ for (uns var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++) \
+ for (uns var##_lo=0; ary[var##_hi]; var##_lo++) \
+ if (ary[var##_hi] & (1 << var##_lo)) \
+ { \
+ uns var = 32*var##_hi + var##_lo; \
+ ary[var##_hi] &= ~(1 << var##_lo); \
+ do
+
+#define BIT_ARRAY_FISH_BITS_END \
+ while (0); \
+ }
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Operations
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITOPS_H
+#define _UCW_BITOPS_H
+
+/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */
+
+int bit_fls(u32 x); /* bit_fls(0)=-1 */
+
+/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */
+
+extern const byte ffs_table[256];
+
+#ifdef __pentium4 /* On other ia32 machines, the C version is faster */
+
+static inline uns bit_ffs(uns w)
+{
+ asm("bsfl %1,%0" :"=r" (w) :"rm" (w));
+ return w;
+}
+
+#else
+
+static inline uns bit_ffs(uns w)
+{
+ uns b = (w & 0xffff) ? 0 : 16;
+ b += ((w >> b) & 0xff) ? 0 : 8;
+ return b + ffs_table[(w >> b) & 0xff];
+}
+
+#endif
+
+#endif
--- /dev/null
+# Tests for bitops modules
+
+Run: ../obj/ucw/bit-ffs-t
+In: 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 12345678
+ 23030300
+ 23030000
+ 23000000
+ 40000000
+ 80000000
+Out: 0
+ 1
+ 0
+ 2
+ 0
+ 1
+ 3
+ 8
+ 16
+ 24
+ 30
+ 31
+
+Run: ../obj/ucw/bit-fls-t
+In: 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 12345678
+ 23030303
+ 03030303
+ 00030303
+ 00000303
+ 0fedcba9
+Out: 0
+ 1
+ 1
+ 2
+ 2
+ 2
+ 28
+ 29
+ 25
+ 17
+ 9
+ 27
--- /dev/null
+/*
+ * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files
+ * (An access method for documents and its analytical performance evaluation),
+ * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984.
+ *
+ * This data structure provides a very compact representation
+ * of a set of strings with insertion and membership search,
+ * but with a certain low probability it cheats by incidentally
+ * reporting a non-member as a member. Generally the larger you
+ * create the structure, the lower this probability is.
+ *
+ * How does it work: the structure is just an array of M bits
+ * and each possible element is hashed to a set of (at most) L
+ * bit positions. For each element of the represented set, we
+ * set its L bits to ones and we report as present all elements
+ * whose all L bits ar set.
+ *
+ * Analysis: Let's assume N items have already been stored and let A
+ * denote L/M (density of the hash function). The probability that
+ * a fixed bit of the array is set by any of the N items is
+ * 1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA.
+ * This is minimized by setting A=(ln 2)/N (try taking derivative).
+ * Given a non-present item, the probability that all of the bits
+ * corresponding to this item are set by the other items (that is,
+ * the structure gives a false answer) is (1-e^-NA)^L = 2^-L.
+ * Hence, if we want to give false answers with probability less
+ * than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L.
+ *
+ * Example: For a set of 10^7 items with P[error] < 10^-6, we set
+ * L := 20 and M := 290*10^6 bits = cca 34.5 MB (29 bits per item).
+ *
+ * We leave L and an upper bound for N as parameters set during
+ * creation of the structure. Currently, the structure is limited
+ * to 4 Gb = 512 MB.
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/bitsig.h"
+#include "ucw/md5.h"
+
+#include <string.h>
+
+struct bitsig {
+ uns l, m, n, maxn, max_m_mult;
+ u32 hash[4];
+ uns hindex;
+ byte array[0];
+};
+
+struct bitsig *
+bitsig_init(uns perrlog, uns maxn)
+{
+ struct bitsig *b;
+ u64 m;
+ uns mbytes;
+
+ m = ((u64) maxn * perrlog * 145 + 99) / 100;
+ if (m >= (u64) 1 << 32)
+ die("bitsig_init: bitsig array too large (maximum is 4 Gb)");
+ mbytes = (m + 7) >> 3U;
+ b = xmalloc(sizeof(struct bitsig) + mbytes);
+ b->l = perrlog;
+ b->m = m;
+ b->n = 0;
+ b->maxn = maxn;
+ b->max_m_mult = (0xffffffff / m) * m;
+ bzero(b->array, mbytes);
+ msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
+ return b;
+}
+
+void
+bitsig_free(struct bitsig *b)
+{
+ xfree(b);
+}
+
+static void
+bitsig_hash_init(struct bitsig *b, byte *item)
+{
+ md5_hash_buffer((byte *) b->hash, item, strlen(item));
+ b->hindex = 0;
+}
+
+static inline uns
+bitsig_hash_bit(struct bitsig *b)
+{
+ u32 h;
+ do
+ {
+ h = b->hash[b->hindex];
+ b->hash[b->hindex] *= 3006477127U;
+ b->hindex = (b->hindex+1) % 4;
+ }
+ while (h >= b->max_m_mult);
+ return h % b->m;
+}
+
+int
+bitsig_member(struct bitsig *b, byte *item)
+{
+ uns i, bit;
+
+ bitsig_hash_init(b, item);
+ for (i=0; i<b->l; i++)
+ {
+ bit = bitsig_hash_bit(b);
+ if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+ return 0;
+ }
+ return 1;
+}
+
+int
+bitsig_insert(struct bitsig *b, byte *item)
+{
+ uns i, bit, was;
+
+ bitsig_hash_init(b, item);
+ was = 1;
+ for (i=0; i<b->l; i++)
+ {
+ bit = bitsig_hash_bit(b);
+ if (!(b->array[bit >> 3] & (1 << (bit & 7))))
+ {
+ was = 0;
+ b->array[bit >> 3] |= (1 << (bit & 7));
+ }
+ }
+ if (!was && b->n++ == b->maxn+1)
+ msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
+ return was;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv)
+{
+ struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2]));
+ byte buf[1024];
+
+ while (fgets(buf, 1024, stdin))
+ printf("%d\n", bitsig_insert(b, buf));
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_BITSIG_H
+#define _UCW_BITSIG_H
+
+struct bitsig;
+
+struct bitsig *bitsig_init(uns perrlog, uns maxn);
+void bitsig_free(struct bitsig *b);
+int bitsig_member(struct bitsig *b, byte *item);
+int bitsig_insert(struct bitsig *b, byte *item);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Careful Read/Write
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <unistd.h>
+
+/*
+ * Reads and writes on sockets and pipes can return partial results,
+ * so we implement an iterated read/write call.
+ */
+
+int
+careful_read(int fd, void *buf, int len)
+{
+ byte *pos = buf;
+ while (len)
+ {
+ int l = read(fd, pos, len);
+ if (l < 0)
+ return -1;
+ if (!l)
+ return 0;
+ pos += l;
+ len -= l;
+ }
+ return 1;
+}
+
+int
+careful_write(int fd, const void *buf, int len)
+{
+ const byte *pos = buf;
+ while (len)
+ {
+ int l = write(fd, pos, len);
+ if (l < 0)
+ return -1;
+ if (!l)
+ return 0;
+ pos += l;
+ len -= l;
+ }
+ return 1;
+}
--- /dev/null
+/*
+ * UCW Library -- Character Classes
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+
+const byte _c_cat[256] = {
+#define CHAR(code,upper,lower,cat) cat,
+#include "ucw/char-map.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- Lowercase Map
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+
+const byte _c_lower[256] = {
+#define CHAR(code,upper,lower,cat) lower,
+#include "ucw/char-map.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- Character Code Map (UTF-8 Version)
+ *
+ * (c) 1998--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/* Syntax: CHAR(code, uppercase, lowercase, category) */
+
+CHAR(0x00,0x00,0x00,_C_CTRL) // <control>
+CHAR(0x01,0x01,0x01,_C_CTRL) // <control>
+CHAR(0x02,0x02,0x02,_C_CTRL) // <control>
+CHAR(0x03,0x03,0x03,_C_CTRL) // <control>
+CHAR(0x04,0x04,0x04,_C_CTRL) // <control>
+CHAR(0x05,0x05,0x05,_C_CTRL) // <control>
+CHAR(0x06,0x06,0x06,_C_CTRL) // <control>
+CHAR(0x07,0x07,0x07,_C_CTRL) // <control>
+CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT) // <control>
+CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0B,0x0B,0x0B,_C_CTRL) // <control>
+CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK) // <control>
+CHAR(0x0E,0x0E,0x0E,_C_CTRL) // <control>
+CHAR(0x0F,0x0F,0x0F,_C_CTRL) // <control>
+CHAR(0x10,0x10,0x10,_C_CTRL) // <control>
+CHAR(0x11,0x11,0x11,_C_CTRL) // <control>
+CHAR(0x12,0x12,0x12,_C_CTRL) // <control>
+CHAR(0x13,0x13,0x13,_C_CTRL) // <control>
+CHAR(0x14,0x14,0x14,_C_CTRL) // <control>
+CHAR(0x15,0x15,0x15,_C_CTRL) // <control>
+CHAR(0x16,0x16,0x16,_C_CTRL) // <control>
+CHAR(0x17,0x17,0x17,_C_CTRL) // <control>
+CHAR(0x18,0x18,0x18,_C_CTRL) // <control>
+CHAR(0x19,0x19,0x19,_C_CTRL) // <control>
+CHAR(0x1A,0x1A,0x1A,_C_CTRL) // <control>
+CHAR(0x1B,0x1B,0x1B,_C_CTRL) // <control>
+CHAR(0x1C,0x1C,0x1C,_C_CTRL) // <control>
+CHAR(0x1D,0x1D,0x1D,_C_CTRL) // <control>
+CHAR(0x1E,0x1E,0x1E,_C_CTRL) // <control>
+CHAR(0x1F,0x1F,0x1F,_C_CTRL) // <control>
+CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT) // SPACE
+CHAR(0x21,0x21,0x21,_C_PRINT) // EXCLAMATION MARK
+CHAR(0x22,0x22,0x22,_C_PRINT) // QUOTATION MARK
+CHAR(0x23,0x23,0x23,_C_PRINT) // NUMBER SIGN
+CHAR(0x24,0x24,0x24,_C_PRINT) // DOLLAR SIGN
+CHAR(0x25,0x25,0x25,_C_PRINT) // PERCENT SIGN
+CHAR(0x26,0x26,0x26,_C_PRINT) // AMPERSAND
+CHAR(0x27,0x27,0x27,_C_PRINT) // APOSTROPHE
+CHAR(0x28,0x28,0x28,_C_PRINT) // LEFT PARENTHESIS
+CHAR(0x29,0x29,0x29,_C_PRINT) // RIGHT PARENTHESIS
+CHAR(0x2A,0x2A,0x2A,_C_PRINT) // ASTERISK
+CHAR(0x2B,0x2B,0x2B,_C_PRINT) // PLUS SIGN
+CHAR(0x2C,0x2C,0x2C,_C_PRINT) // COMMA
+CHAR(0x2D,0x2D,0x2D,_C_PRINT) // HYPHEN-MINUS
+CHAR(0x2E,0x2E,0x2E,_C_PRINT) // FULL STOP
+CHAR(0x2F,0x2F,0x2F,_C_PRINT) // SOLIDUS
+CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ZERO
+CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ONE
+CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT TWO
+CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT THREE
+CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FOUR
+CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FIVE
+CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SIX
+CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SEVEN
+CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT EIGHT
+CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT NINE
+CHAR(0x3A,0x3A,0x3A,_C_PRINT) // COLON
+CHAR(0x3B,0x3B,0x3B,_C_PRINT) // SEMICOLON
+CHAR(0x3C,0x3C,0x3C,_C_PRINT) // LESS-THAN SIGN
+CHAR(0x3D,0x3D,0x3D,_C_PRINT) // EQUALS SIGN
+CHAR(0x3E,0x3E,0x3E,_C_PRINT) // GREATER-THAN SIGN
+CHAR(0x3F,0x3F,0x3F,_C_PRINT) // QUESTION MARK
+CHAR(0x40,0x40,0x40,_C_PRINT) // COMMERCIAL AT
+CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER A
+CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER B
+CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER C
+CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER D
+CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER E
+CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER F
+CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER G
+CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER H
+CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER I
+CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER J
+CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER K
+CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER L
+CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER M
+CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER N
+CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER O
+CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER P
+CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Q
+CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER R
+CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER S
+CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER T
+CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER U
+CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER V
+CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER W
+CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER X
+CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Y
+CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Z
+CHAR(0x5B,0x5B,0x5B,_C_PRINT) // LEFT SQUARE BRACKET
+CHAR(0x5C,0x5C,0x5C,_C_PRINT) // REVERSE SOLIDUS
+CHAR(0x5D,0x5D,0x5D,_C_PRINT) // RIGHT SQUARE BRACKET
+CHAR(0x5E,0x5E,0x5E,_C_PRINT) // CIRCUMFLEX ACCENT
+CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT) // LOW LINE
+CHAR(0x60,0x60,0x60,_C_PRINT) // GRAVE ACCENT
+CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER A
+CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER B
+CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER C
+CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER D
+CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER E
+CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER F
+CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER G
+CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER H
+CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER I
+CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER J
+CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER K
+CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER L
+CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER M
+CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER N
+CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER O
+CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER P
+CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Q
+CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER R
+CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER S
+CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER T
+CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER U
+CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER V
+CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER W
+CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER X
+CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Y
+CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Z
+CHAR(0x7B,0x7B,0x7B,_C_PRINT) // LEFT CURLY BRACKET
+CHAR(0x7C,0x7C,0x7C,_C_PRINT) // VERTICAL LINE
+CHAR(0x7D,0x7D,0x7D,_C_PRINT) // RIGHT CURLY BRACKET
+CHAR(0x7E,0x7E,0x7E,_C_PRINT) // TILDE
+CHAR(0x7F,0x7F,0x7F,_C_CTRL) // <control>
+CHAR(0x80,0x80,0x80,_C_PRINT) // UTF-8
+CHAR(0x81,0x81,0x81,_C_PRINT) // UTF-8
+CHAR(0x82,0x82,0x82,_C_PRINT) // UTF-8
+CHAR(0x83,0x83,0x83,_C_PRINT) // UTF-8
+CHAR(0x84,0x84,0x84,_C_PRINT) // UTF-8
+CHAR(0x85,0x85,0x85,_C_PRINT) // UTF-8
+CHAR(0x86,0x86,0x86,_C_PRINT) // UTF-8
+CHAR(0x87,0x87,0x87,_C_PRINT) // UTF-8
+CHAR(0x88,0x88,0x88,_C_PRINT) // UTF-8
+CHAR(0x89,0x89,0x89,_C_PRINT) // UTF-8
+CHAR(0x8A,0x8A,0x8A,_C_PRINT) // UTF-8
+CHAR(0x8B,0x8B,0x8B,_C_PRINT) // UTF-8
+CHAR(0x8C,0x8C,0x8C,_C_PRINT) // UTF-8
+CHAR(0x8D,0x8D,0x8D,_C_PRINT) // UTF-8
+CHAR(0x8E,0x8E,0x8E,_C_PRINT) // UTF-8
+CHAR(0x8F,0x8F,0x8F,_C_PRINT) // UTF-8
+CHAR(0x90,0x90,0x90,_C_PRINT) // UTF-8
+CHAR(0x91,0x91,0x91,_C_PRINT) // UTF-8
+CHAR(0x92,0x92,0x92,_C_PRINT) // UTF-8
+CHAR(0x93,0x93,0x93,_C_PRINT) // UTF-8
+CHAR(0x94,0x94,0x94,_C_PRINT) // UTF-8
+CHAR(0x95,0x95,0x95,_C_PRINT) // UTF-8
+CHAR(0x96,0x96,0x96,_C_PRINT) // UTF-8
+CHAR(0x97,0x97,0x97,_C_PRINT) // UTF-8
+CHAR(0x98,0x98,0x98,_C_PRINT) // UTF-8
+CHAR(0x99,0x99,0x99,_C_PRINT) // UTF-8
+CHAR(0x9A,0x9A,0x9A,_C_PRINT) // UTF-8
+CHAR(0x9B,0x9B,0x9B,_C_PRINT) // UTF-8
+CHAR(0x9C,0x9C,0x9C,_C_PRINT) // UTF-8
+CHAR(0x9D,0x9D,0x9D,_C_PRINT) // UTF-8
+CHAR(0x9E,0x9E,0x9E,_C_PRINT) // UTF-8
+CHAR(0x9F,0x9F,0x9F,_C_PRINT) // UTF-8
+CHAR(0xA0,0xA0,0xA0,_C_PRINT) // UTF-8
+CHAR(0xA1,0xA1,0xA1,_C_PRINT) // UTF-8
+CHAR(0xA2,0xA2,0xA2,_C_PRINT) // UTF-8
+CHAR(0xA3,0xA3,0xA3,_C_PRINT) // UTF-8
+CHAR(0xA4,0xA4,0xA4,_C_PRINT) // UTF-8
+CHAR(0xA5,0xA5,0xA5,_C_PRINT) // UTF-8
+CHAR(0xA6,0xA6,0xA6,_C_PRINT) // UTF-8
+CHAR(0xA7,0xA7,0xA7,_C_PRINT) // UTF-8
+CHAR(0xA8,0xA8,0xA8,_C_PRINT) // UTF-8
+CHAR(0xA9,0xA9,0xA9,_C_PRINT) // UTF-8
+CHAR(0xAA,0xAA,0xAA,_C_PRINT) // UTF-8
+CHAR(0xAB,0xAB,0xAB,_C_PRINT) // UTF-8
+CHAR(0xAC,0xAC,0xAC,_C_PRINT) // UTF-8
+CHAR(0xAD,0xAD,0xAD,_C_PRINT) // UTF-8
+CHAR(0xAE,0xAE,0xAE,_C_PRINT) // UTF-8
+CHAR(0xAF,0xAF,0xAF,_C_PRINT) // UTF-8
+CHAR(0xB0,0xB0,0xB0,_C_PRINT) // UTF-8
+CHAR(0xB1,0xB1,0xB1,_C_PRINT) // UTF-8
+CHAR(0xB2,0xB2,0xB2,_C_PRINT) // UTF-8
+CHAR(0xB3,0xB3,0xB3,_C_PRINT) // UTF-8
+CHAR(0xB4,0xB4,0xB4,_C_PRINT) // UTF-8
+CHAR(0xB5,0xB5,0xB5,_C_PRINT) // UTF-8
+CHAR(0xB6,0xB6,0xB6,_C_PRINT) // UTF-8
+CHAR(0xB7,0xB7,0xB7,_C_PRINT) // UTF-8
+CHAR(0xB8,0xB8,0xB8,_C_PRINT) // UTF-8
+CHAR(0xB9,0xB9,0xB9,_C_PRINT) // UTF-8
+CHAR(0xBA,0xBA,0xBA,_C_PRINT) // UTF-8
+CHAR(0xBB,0xBB,0xBB,_C_PRINT) // UTF-8
+CHAR(0xBC,0xBC,0xBC,_C_PRINT) // UTF-8
+CHAR(0xBD,0xBD,0xBD,_C_PRINT) // UTF-8
+CHAR(0xBE,0xBE,0xBE,_C_PRINT) // UTF-8
+CHAR(0xBF,0xBF,0xBF,_C_PRINT) // UTF-8
+CHAR(0xC0,0xC0,0xC0,_C_PRINT) // UTF-8
+CHAR(0xC1,0xC1,0xC1,_C_PRINT) // UTF-8
+CHAR(0xC2,0xC2,0xC2,_C_PRINT) // UTF-8
+CHAR(0xC3,0xC3,0xC3,_C_PRINT) // UTF-8
+CHAR(0xC4,0xC4,0xC4,_C_PRINT) // UTF-8
+CHAR(0xC5,0xC5,0xC5,_C_PRINT) // UTF-8
+CHAR(0xC6,0xC6,0xC6,_C_PRINT) // UTF-8
+CHAR(0xC7,0xC7,0xC7,_C_PRINT) // UTF-8
+CHAR(0xC8,0xC8,0xC8,_C_PRINT) // UTF-8
+CHAR(0xC9,0xC9,0xC9,_C_PRINT) // UTF-8
+CHAR(0xCA,0xCA,0xCA,_C_PRINT) // UTF-8
+CHAR(0xCB,0xCB,0xCB,_C_PRINT) // UTF-8
+CHAR(0xCC,0xCC,0xCC,_C_PRINT) // UTF-8
+CHAR(0xCD,0xCD,0xCD,_C_PRINT) // UTF-8
+CHAR(0xCE,0xCE,0xCE,_C_PRINT) // UTF-8
+CHAR(0xCF,0xCF,0xCF,_C_PRINT) // UTF-8
+CHAR(0xD0,0xD0,0xD0,_C_PRINT) // UTF-8
+CHAR(0xD1,0xD1,0xD1,_C_PRINT) // UTF-8
+CHAR(0xD2,0xD2,0xD2,_C_PRINT) // UTF-8
+CHAR(0xD3,0xD3,0xD3,_C_PRINT) // UTF-8
+CHAR(0xD4,0xD4,0xD4,_C_PRINT) // UTF-8
+CHAR(0xD5,0xD5,0xD5,_C_PRINT) // UTF-8
+CHAR(0xD6,0xD6,0xD6,_C_PRINT) // UTF-8
+CHAR(0xD7,0xD7,0xD7,_C_PRINT) // UTF-8
+CHAR(0xD8,0xD8,0xD8,_C_PRINT) // UTF-8
+CHAR(0xD9,0xD9,0xD9,_C_PRINT) // UTF-8
+CHAR(0xDA,0xDA,0xDA,_C_PRINT) // UTF-8
+CHAR(0xDB,0xDB,0xDB,_C_PRINT) // UTF-8
+CHAR(0xDC,0xDC,0xDC,_C_PRINT) // UTF-8
+CHAR(0xDD,0xDD,0xDD,_C_PRINT) // UTF-8
+CHAR(0xDE,0xDE,0xDE,_C_PRINT) // UTF-8
+CHAR(0xDF,0xDF,0xDF,_C_PRINT) // UTF-8
+CHAR(0xE0,0xE0,0xE0,_C_PRINT) // UTF-8
+CHAR(0xE1,0xE1,0xE1,_C_PRINT) // UTF-8
+CHAR(0xE2,0xE2,0xE2,_C_PRINT) // UTF-8
+CHAR(0xE3,0xE3,0xE3,_C_PRINT) // UTF-8
+CHAR(0xE4,0xE4,0xE4,_C_PRINT) // UTF-8
+CHAR(0xE5,0xE5,0xE5,_C_PRINT) // UTF-8
+CHAR(0xE6,0xE6,0xE6,_C_PRINT) // UTF-8
+CHAR(0xE7,0xE7,0xE7,_C_PRINT) // UTF-8
+CHAR(0xE8,0xE8,0xE8,_C_PRINT) // UTF-8
+CHAR(0xE9,0xE9,0xE9,_C_PRINT) // UTF-8
+CHAR(0xEA,0xEA,0xEA,_C_PRINT) // UTF-8
+CHAR(0xEB,0xEB,0xEB,_C_PRINT) // UTF-8
+CHAR(0xEC,0xEC,0xEC,_C_PRINT) // UTF-8
+CHAR(0xED,0xED,0xED,_C_PRINT) // UTF-8
+CHAR(0xEE,0xEE,0xEE,_C_PRINT) // UTF-8
+CHAR(0xEF,0xEF,0xEF,_C_PRINT) // UTF-8
+CHAR(0xF0,0xF0,0xF0,_C_PRINT) // UTF-8
+CHAR(0xF1,0xF1,0xF1,_C_PRINT) // UTF-8
+CHAR(0xF2,0xF2,0xF2,_C_PRINT) // UTF-8
+CHAR(0xF3,0xF3,0xF3,_C_PRINT) // UTF-8
+CHAR(0xF4,0xF4,0xF4,_C_PRINT) // UTF-8
+CHAR(0xF5,0xF5,0xF5,_C_PRINT) // UTF-8
+CHAR(0xF6,0xF6,0xF6,_C_PRINT) // UTF-8
+CHAR(0xF7,0xF7,0xF7,_C_PRINT) // UTF-8
+CHAR(0xF8,0xF8,0xF8,_C_PRINT) // UTF-8
+CHAR(0xF9,0xF9,0xF9,_C_PRINT) // UTF-8
+CHAR(0xFA,0xFA,0xFA,_C_PRINT) // UTF-8
+CHAR(0xFB,0xFB,0xFB,_C_PRINT) // UTF-8
+CHAR(0xFC,0xFC,0xFC,_C_PRINT) // UTF-8
+CHAR(0xFD,0xFD,0xFD,_C_PRINT) // UTF-8
+CHAR(0xFE,0xFE,0xFE,_C_PRINT) // UTF-8
+CHAR(0xFF,0xFF,0xFF,_C_PRINT) // UTF-8
--- /dev/null
+/*
+ * UCW Library -- Uppercase Map
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+
+const byte _c_upper[256] = {
+#define CHAR(code,upper,lower,cat) upper,
+#include "ucw/char-map.h"
+#undef CHAR
+};
--- /dev/null
+/*
+ * UCW Library -- Character Types
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CHARTYPE_H
+#define _UCW_CHARTYPE_H
+
+/***
+ * We define our own routines to classify 8-bit characters (based on US-ASCII charset).
+ * This way we bypass most possible problems with different compilation environments.
+ *
+ * All functions and macros accept any numbers and if it is necessary, they simply ignore higher bits.
+ * It does not matter whether a parameter is signed or unsigned. Parameters are evaluated exactly once,
+ * so they can have side-effects.
+ ***/
+
+#define _C_UPPER 1 /* Upper-case letters */
+#define _C_LOWER 2 /* Lower-case letters */
+#define _C_PRINT 4 /* Printable */
+#define _C_DIGIT 8 /* Digits */
+#define _C_CTRL 16 /* Control characters */
+#define _C_XDIGIT 32 /* Hexadecimal digits */
+#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */
+#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */
+
+#define _C_ALPHA (_C_UPPER | _C_LOWER)
+#define _C_ALNUM (_C_ALPHA | _C_DIGIT)
+#define _C_WORD (_C_ALNUM | _C_INNER)
+#define _C_WSTART (_C_ALPHA | _C_INNER)
+
+extern const byte _c_cat[256], _c_upper[256], _c_lower[256];
+
+#define Category(x) (_c_cat[(byte)(x)])
+#define Ccat(x,y) (Category(x) & y)
+
+#define Cupper(x) Ccat(x, _C_UPPER) /** Checks for an upper-case character (`A-Z`). **/
+#define Clower(x) Ccat(x, _C_LOWER) /** Checks for a lower-case character (`a-z`). **/
+#define Calpha(x) Ccat(x, _C_ALPHA) /** Checks for an alphabetic character (`a-z`, `A-Z`). **/
+#define Calnum(x) Ccat(x, _C_ALNUM) /** Checks for an alpha-numeric character (`a-z`, `A-Z`, `0-9`). */
+#define Cprint(x) Ccat(x, _C_PRINT) /** Checks for printable characters, including 8-bit values (`\t`, `0x20-0x7E`, `0x80-0xFF`). **/
+#define Cdigit(x) Ccat(x, _C_DIGIT) /** Checks for a digit (`0-9`). **/
+#define Cxdigit(x) Ccat(x, _C_XDIGIT) /** Checks for a hexadecimal digit (`0-9`, `a-f`, `A-F`). **/
+#define Cword(x) Ccat(x, _C_WORD) /** Checks for an alpha-numeric character or an inner punctation (`a-z`, `A-Z`, `0-9`, `_`). **/
+#define Cblank(x) Ccat(x, _C_BLANK) /** Checks for a white space (`0x20`, `\t`, `\n`, `\r`, `0x8`, `0xC`). **/
+#define Cctrl(x) Ccat(x, _C_CTRL) /** Checks for control characters (`0x0-0x1F`, `0x7F`). **/
+#define Cspace(x) Cblank(x)
+
+#define Cupcase(x) (_c_upper[(byte)(x)]) /** Convert a letter to upper case, leave non-letter characters unchanged. **/
+#define Clocase(x) (_c_lower[(byte)(x)]) /** Convert a letter to lower case, leave non-letter characters unchanged. **/
+
+/**
+ * Compute the value of a valid hexadecimal character (ie. passed the @Cxdigit() check).
+ **/
+static inline uns Cxvalue(byte x)
+{
+ return (x < (uns)'A') ? x - '0' : (x & 0xdf) - 'A' + 10;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Circular Linked Lists
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CLISTS_H
+#define _UCW_CLISTS_H
+
+/**
+ * Common header for list nodes.
+ **/
+typedef struct cnode {
+ struct cnode *next, *prev;
+} cnode;
+
+/**
+ * Circilar linked list.
+ **/
+typedef struct clist {
+ struct cnode head;
+} clist;
+
+/**
+ * Initialize a new circular linked list. Must be called before any other function.
+ **/
+static inline void clist_init(clist *l)
+{
+ cnode *head = &l->head;
+ head->next = head->prev = head;
+}
+
+/**
+ * Return the first node on @l or NULL if @l is empty.
+ **/
+static inline void *clist_head(clist *l)
+{
+ return (l->head.next != &l->head) ? l->head.next : NULL;
+}
+
+/**
+ * Return the last node on @l or NULL if @l is empty.
+ **/
+static inline void *clist_tail(clist *l)
+{
+ return (l->head.prev != &l->head) ? l->head.prev : NULL;
+}
+
+/**
+ * Find the next node to @n or NULL if @n is the last one.
+ **/
+static inline void *clist_next(clist *l, cnode *n)
+{
+ return (n->next != &l->head) ? (void *) n->next : NULL;
+}
+
+/**
+ * Find the previous node to @n or NULL if @n is the first one.
+ **/
+static inline void *clist_prev(clist *l, cnode *n)
+{
+ return (n->prev != &l->head) ? (void *) n->prev : NULL;
+}
+
+/**
+ * Return a non-zero value iff @l is empty.
+ **/
+static inline int clist_empty(clist *l)
+{
+ return (l->head.next == &l->head);
+}
+
+/**
+ * Loop over all nodes in the @list and perform the next C statement on them. The current node is stored in @n which must be defined before as pointer to any type.
+ * The list should not be changed during this loop command.
+ **/
+#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+
+/**
+ * Same as @CLIST_WALK(), but allows removal of the current node. This macro requires one more variable to store some temporary pointers.
+ **/
+#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+
+/**
+ * Same as @CLIST_WALK(), but it defines the variable for the current node in place. @type should be a pointer type.
+ **/
+#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next)
+
+/**
+ * Same as @CLIST_WALK_DELSAFE(), but it defines the variable for the current node in place. @type should be a pointer type. The temporary variable must be still known before.
+ **/
+#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp)
+
+/**
+ * Reversed version of @CLIST_FOR_EACH().
+ **/
+#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev)
+
+/**
+ * Insert a new node just after the node @after. To insert at the head of the list, use @clist_add_head() instead.
+ **/
+static inline void clist_insert_after(cnode *what, cnode *after)
+{
+ cnode *before = after->next;
+ what->next = before;
+ what->prev = after;
+ before->prev = what;
+ after->next = what;
+}
+
+/**
+ * Insert a new node just before the node @before. To insert at the tail of the list, use @clist_add_tail() instead.
+ **/
+static inline void clist_insert_before(cnode *what, cnode *before)
+{
+ cnode *after = before->prev;
+ what->next = before;
+ what->prev = after;
+ before->prev = what;
+ after->next = what;
+}
+
+/**
+ * Insert a new node in front of all other nodes.
+ **/
+static inline void clist_add_head(clist *l, cnode *n)
+{
+ clist_insert_after(n, &l->head);
+}
+
+/**
+ * Insert a new node after all other nodes.
+ **/
+static inline void clist_add_tail(clist *l, cnode *n)
+{
+ clist_insert_before(n, &l->head);
+}
+
+/**
+ * Remove node @n.
+ **/
+static inline void clist_remove(cnode *n)
+{
+ cnode *before = n->prev;
+ cnode *after = n->next;
+ before->next = after;
+ after->prev = before;
+}
+
+/**
+ * Remove the first node in @l. The list can be empty.
+ **/
+static inline void *clist_remove_head(clist *l)
+{
+ cnode *n = clist_head(l);
+ if (n)
+ clist_remove(n);
+ return n;
+}
+
+/**
+ * Remove the last node in @l. The list can be empty.
+ **/
+static inline void *clist_remove_tail(clist *l)
+{
+ cnode *n = clist_tail(l);
+ if (n)
+ clist_remove(n);
+ return n;
+}
+
+/**
+ * Merge two lists by inserting the list @what just after the node @after in a different list.
+ * The first list is then cleared.
+ **/
+static inline void clist_insert_list_after(clist *what, cnode *after)
+{
+ if (!clist_empty(what))
+ {
+ cnode *w = &what->head;
+ w->prev->next = after->next;
+ after->next->prev = w->prev;
+ w->next->prev = after;
+ after->next = w->next;
+ clist_init(what);
+ }
+}
+
+/**
+ * Move all items from a source list to a destination list. The source list
+ * becomes empty, the original contents of the destination list are destroyed.
+ **/
+static inline void clist_move(clist *to, clist *from)
+{
+ clist_init(to);
+ clist_insert_list_after(from, &to->head);
+ clist_init(from);
+}
+
+/**
+ * Compute the number of nodes in @l. Beware linear time complexity.
+ **/
+static inline uns clist_size(clist *l)
+{
+ uns i = 0;
+ CLIST_FOR_EACH(cnode *, n, *l)
+ i++;
+ return i;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Configuration files: memory allocation
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/mempool.h"
+
+struct mempool *cf_pool; // current pool for loading new configuration
+
+void *
+cf_malloc(uns size)
+{
+ return mp_alloc(cf_pool, size);
+}
+
+void *
+cf_malloc_zero(uns size)
+{
+ return mp_alloc_zero(cf_pool, size);
+}
+
+char *
+cf_strdup(const char *s)
+{
+ return mp_strdup(cf_pool, s);
+}
+
+char *
+cf_printf(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf(cf_pool, fmt, args);
+ va_end(args);
+ return res;
+}
--- /dev/null
+/*
+ * UCW Library -- Configuration files: dumping
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/conf-internal.h"
+#include "ucw/clists.h"
+#include "ucw/fastbuf.h"
+
+static void
+spaces(struct fastbuf *fb, uns nr)
+{
+ for (uns i=0; i<nr; i++)
+ bputs(fb, " ");
+}
+
+static void
+dump_basic(struct fastbuf *fb, void *ptr, enum cf_type type, union cf_union *u)
+{
+ switch (type) {
+ case CT_INT: bprintf(fb, "%d ", *(uns*)ptr); break;
+ case CT_U64: bprintf(fb, "%llu ", (long long) *(u64*)ptr); break;
+ case CT_DOUBLE: bprintf(fb, "%lg ", *(double*)ptr); break;
+ case CT_IP: bprintf(fb, "%08x ", *(uns*)ptr); break;
+ case CT_STRING:
+ if (*(char**)ptr)
+ bprintf(fb, "'%s' ", *(char**)ptr);
+ else
+ bprintf(fb, "NULL ");
+ break;
+ case CT_LOOKUP: bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : "???"); break;
+ case CT_USER:
+ if (u->utype->dumper)
+ u->utype->dumper(fb, ptr);
+ else
+ bprintf(fb, "??? ");
+ break;
+ }
+}
+
+static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr);
+
+static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
+
+static void
+dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr)
+{
+ ptr += (uintptr_t) item->ptr;
+ enum cf_type type = item->type;
+ uns size = cf_type_size(item->type, item->u.utype);
+ int i;
+ spaces(fb, level);
+ bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]);
+ if (item->number == CF_ANY_NUM)
+ bputs(fb, "any ");
+ else
+ bprintf(fb, "%d ", item->number);
+ if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) {
+ bprintf(fb, "T%s ", cf_type_names[type]);
+ if (item->type == CT_USER)
+ bprintf(fb, "U%s S%d ", item->u.utype->name, size);
+ }
+ if (item->cls == CC_STATIC) {
+ for (i=0; i<item->number; i++)
+ dump_basic(fb, ptr + i * size, type, &item->u);
+ } else if (item->cls == CC_DYNAMIC) {
+ ptr = * (void**) ptr;
+ if (ptr) {
+ int real_nr = DARY_LEN(ptr);
+ bprintf(fb, "N%d ", real_nr);
+ for (i=0; i<real_nr; i++)
+ dump_basic(fb, ptr + i * size, type, &item->u);
+ } else
+ bprintf(fb, "NULL ");
+ } else if (item->cls == CC_BITMAP) {
+ u32 mask = * (u32*) ptr;
+ for (i=0; i<32; i++) {
+ if (item->type == CT_LOOKUP && !item->u.lookup[i])
+ break;
+ if (mask & (1<<i)) {
+ if (item->type == CT_INT)
+ bprintf(fb, "%d ", i);
+ else if (item->type == CT_LOOKUP)
+ bprintf(fb, "%s ", item->u.lookup[i]);
+ }
+ }
+ }
+ bputc(fb, '\n');
+ if (item->cls == CC_SECTION)
+ dump_section(fb, item->u.sec, level+1, ptr);
+ else if (item->cls == CC_LIST) {
+ uns idx = 0;
+ CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) {
+ spaces(fb, level+1);
+ bprintf(fb, "item %d\n", ++idx);
+ dump_section(fb, item->u.sec, level+2, n);
+ }
+ }
+}
+
+static void
+dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr)
+{
+ spaces(fb, level);
+ bprintf(fb, "S%d F%x:\n", sec->size, sec->flags);
+ for (struct cf_item *item=sec->cfg; item->cls; item++)
+ dump_item(fb, item, level, ptr);
+}
+
+void
+cf_dump_sections(struct fastbuf *fb)
+{
+ dump_section(fb, &cf_sections, 0, NULL);
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: parsing input streams
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/conf-internal.h"
+#include "ucw/clists.h"
+#include "ucw/mempool.h"
+#include "ucw/fastbuf.h"
+#include "ucw/chartype.h"
+#include "ucw/string.h"
+#include "ucw/stkstring.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+/* Text file parser */
+
+static const char *name_parse_fb;
+static struct fastbuf *parse_fb;
+static uns line_num;
+
+#define MAX_LINE 4096
+static char line_buf[MAX_LINE];
+static char *line = line_buf;
+
+#include "ucw/bbuf.h"
+static bb_t copy_buf;
+static uns copied;
+
+#define GBUF_TYPE uns
+#define GBUF_PREFIX(x) split_##x
+#include "ucw/gbuf.h"
+static split_t word_buf;
+static uns words;
+static uns ends_by_brace; // the line is ended by "{"
+
+static int
+get_line(char **msg)
+{
+ int err = bgets_nodie(parse_fb, line_buf, MAX_LINE);
+ line_num++;
+ if (err <= 0) {
+ *msg = err < 0 ? "Line too long" : NULL;
+ return 0;
+ }
+ line = line_buf;
+ while (Cblank(*line))
+ line++;
+ return 1;
+}
+
+static void
+append(char *start, char *end)
+{
+ uns len = end - start;
+ bb_grow(©_buf, copied + len + 1);
+ memcpy(copy_buf.ptr + copied, start, len);
+ copied += len + 1;
+ copy_buf.ptr[copied-1] = 0;
+}
+
+static char *
+get_word(uns is_command_name)
+{
+ char *msg;
+ if (*line == '\'') {
+ line++;
+ while (1) {
+ char *start = line;
+ while (*line && *line != '\'')
+ line++;
+ append(start, line);
+ if (*line)
+ break;
+ copy_buf.ptr[copied-1] = '\n';
+ if (!get_line(&msg))
+ return msg ? : "Unterminated apostrophe word at the end";
+ }
+ line++;
+
+ } else if (*line == '"') {
+ line++;
+ uns start_copy = copied;
+ while (1) {
+ char *start = line;
+ uns escape = 0;
+ while (*line) {
+ if (*line == '"' && !escape)
+ break;
+ else if (*line == '\\')
+ escape ^= 1;
+ else
+ escape = 0;
+ line++;
+ }
+ append(start, line);
+ if (*line)
+ break;
+ if (!escape)
+ copy_buf.ptr[copied-1] = '\n';
+ else // merge two lines
+ copied -= 2;
+ if (!get_line(&msg))
+ return msg ? : "Unterminated quoted word at the end";
+ }
+ line++;
+
+ char *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
+ uns l = strlen(tmp);
+ bb_grow(©_buf, start_copy + l + 1);
+ strcpy(copy_buf.ptr + start_copy, tmp);
+ copied = start_copy + l + 1;
+
+ } else {
+ // promised that *line is non-null and non-blank
+ char *start = line;
+ while (*line && !Cblank(*line)
+ && *line != '{' && *line != '}' && *line != ';'
+ && (*line != '=' || !is_command_name))
+ line++;
+ if (*line == '=') { // nice for setting from a command-line
+ if (line == start)
+ return "Assignment without a variable";
+ *line = ' ';
+ }
+ if (line == start) // already the first char is control
+ line++;
+ append(start, line);
+ }
+ while (Cblank(*line))
+ line++;
+ return NULL;
+}
+
+static char *
+get_token(uns is_command_name, char **err)
+{
+ *err = NULL;
+ while (1) {
+ if (!*line || *line == '#') {
+ if (!is_command_name || !get_line(err))
+ return NULL;
+ } else if (*line == ';') {
+ *err = get_word(0);
+ if (!is_command_name || *err)
+ return NULL;
+ } else if (*line == '\\' && !line[1]) {
+ if (!get_line(err)) {
+ if (!*err)
+ *err = "Last line ends by a backslash";
+ return NULL;
+ }
+ if (!*line || *line == '#')
+ msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num);
+ } else {
+ split_grow(&word_buf, words+1);
+ uns start = copied;
+ word_buf.ptr[words++] = copied;
+ *err = get_word(is_command_name);
+ return *err ? NULL : copy_buf.ptr + start;
+ }
+ }
+}
+
+static char *
+split_command(void)
+{
+ words = copied = ends_by_brace = 0;
+ char *msg, *start_word;
+ if (!(start_word = get_token(1, &msg)))
+ return msg;
+ if (*start_word == '{') // only one opening brace
+ return "Unexpected opening brace";
+ while (*line != '}') // stays for the next time
+ {
+ if (!(start_word = get_token(0, &msg)))
+ return msg;
+ if (*start_word == '{') {
+ words--; // discard the brace
+ ends_by_brace = 1;
+ break;
+ }
+ }
+ return NULL;
+}
+
+/* Parsing multiple files */
+
+static char *
+parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth)
+{
+ char *err;
+ name_parse_fb = name_fb;
+ parse_fb = fb;
+ line_num = 0;
+ line = line_buf;
+ *line = 0;
+ while (1)
+ {
+ err = split_command();
+ if (err)
+ goto error;
+ if (!words)
+ return NULL;
+ char *name = copy_buf.ptr + word_buf.ptr[0];
+ char *pars[words-1];
+ for (uns i=1; i<words; i++)
+ pars[i-1] = copy_buf.ptr + word_buf.ptr[i];
+ if (!strcasecmp(name, "include"))
+ {
+ if (words != 2)
+ err = "Expecting one filename";
+ else if (depth > 8)
+ err = "Too many nested files";
+ else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared
+ err = "The include command must be the last one on a line";
+ if (err)
+ goto error;
+ struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14);
+ if (!new_fb) {
+ err = cf_printf("Cannot open file %s: %m", pars[0]);
+ goto error;
+ }
+ uns ll = line_num;
+ err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
+ line_num = ll;
+ bclose(new_fb);
+ if (err)
+ goto error;
+ parse_fb = fb;
+ continue;
+ }
+ enum cf_operation op;
+ char *c = strchr(name, ':');
+ if (!c)
+ op = strcmp(name, "}") ? OP_SET : OP_CLOSE;
+ else {
+ *c++ = 0;
+ switch (Clocase(*c)) {
+ case 's': op = OP_SET; break;
+ case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break;
+ case 'a': switch (Clocase(c[1])) {
+ case 'p': op = OP_APPEND; break;
+ case 'f': op = OP_AFTER; break;
+ default: op = OP_ALL;
+ }; break;
+ case 'p': op = OP_PREPEND; break;
+ case 'r': op = (c[1] && Clocase(c[2]) == 'm') ? OP_REMOVE : OP_RESET; break;
+ case 'e': op = OP_EDIT; break;
+ case 'b': op = OP_BEFORE; break;
+ default: op = OP_SET; break;
+ }
+ if (strcasecmp(c, cf_op_names[op])) {
+ err = cf_printf("Unknown operation %s", c);
+ goto error;
+ }
+ }
+ if (ends_by_brace)
+ op |= OP_OPEN;
+ err = cf_interpret_line(name, op, words-1, pars);
+ if (err)
+ goto error;
+ }
+error:
+ if (name_fb)
+ msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err);
+ else if (line_num == 1)
+ msg(L_ERROR, "Manual setting of configuration: %s", err);
+ else
+ msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err);
+ return "included from here";
+}
+
+#ifndef DEFAULT_CONFIG
+#define DEFAULT_CONFIG NULL
+#endif
+char *cf_def_file = DEFAULT_CONFIG;
+static int cf_def_loaded;
+
+#ifndef ENV_VAR_CONFIG
+#define ENV_VAR_CONFIG NULL
+#endif
+char *cf_env_file = ENV_VAR_CONFIG;
+
+static uns postpone_commit; // only for cf_getopt()
+static uns everything_committed; // after the 1st load, this flag is set on
+
+static int
+done_stack(void)
+{
+ if (cf_check_stack())
+ return 1;
+ if (cf_commit_all(postpone_commit ? CF_NO_COMMIT : everything_committed ? CF_COMMIT : CF_COMMIT_ALL))
+ return 1;
+ if (!postpone_commit)
+ everything_committed = 1;
+ return 0;
+}
+
+static int
+load_file(const char *file)
+{
+ cf_init_stack();
+ struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14);
+ if (!fb) {
+ msg(L_ERROR, "Cannot open %s: %m", file);
+ return 1;
+ }
+ char *err_msg = parse_fastbuf(file, fb, 0);
+ bclose(fb);
+ return !!err_msg || done_stack();
+}
+
+static int
+load_string(const char *string)
+{
+ cf_init_stack();
+ struct fastbuf fb;
+ fbbuf_init_read(&fb, (byte *)string, strlen(string), 0);
+ char *msg = parse_fastbuf(NULL, &fb, 0);
+ return !!msg || done_stack();
+}
+
+/* Safe loading and reloading */
+
+struct conf_entry { /* We remember a list of actions to apply upon reload */
+ cnode n;
+ enum {
+ CE_FILE = 1,
+ CE_STRING = 2,
+ } type;
+ char *arg;
+};
+
+static clist conf_entries;
+
+static void
+cf_remember_entry(uns type, const char *arg)
+{
+ if (!cf_need_journal)
+ return;
+ if (!postpone_commit)
+ return;
+ struct conf_entry *ce = cf_malloc(sizeof(*ce));
+ ce->type = type;
+ ce->arg = cf_strdup(arg);
+ clist_add_tail(&conf_entries, &ce->n);
+}
+
+int
+cf_reload(const char *file)
+{
+ cf_journal_swap();
+ struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+ uns ec = everything_committed;
+ everything_committed = 0;
+
+ if (!conf_entries.head.next)
+ clist_init(&conf_entries);
+ clist old_entries;
+ clist_move(&old_entries, &conf_entries);
+ postpone_commit = 1;
+
+ int err = 0;
+ if (file)
+ err = load_file(file);
+ else
+ CLIST_FOR_EACH(struct conf_entry *, ce, old_entries) {
+ if (ce->type == CE_FILE)
+ err |= load_file(ce->arg);
+ else
+ err |= load_string(ce->arg);
+ if (err)
+ break;
+ cf_remember_entry(ce->type, ce->arg);
+ }
+
+ postpone_commit = 0;
+ if (!err)
+ err |= done_stack();
+
+ if (!err) {
+ cf_journal_delete();
+ cf_journal_commit_transaction(1, NULL);
+ } else {
+ everything_committed = ec;
+ cf_journal_rollback_transaction(1, oldj);
+ cf_journal_swap();
+ clist_move(&conf_entries, &old_entries);
+ }
+ return err;
+}
+
+int
+cf_load(const char *file)
+{
+ struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+ int err = load_file(file);
+ if (!err) {
+ cf_journal_commit_transaction(1, oldj);
+ cf_remember_entry(CE_FILE, file);
+ cf_def_loaded = 1;
+ } else
+ cf_journal_rollback_transaction(1, oldj);
+ return err;
+}
+
+int
+cf_set(const char *string)
+{
+ struct cf_journal_item *oldj = cf_journal_new_transaction(0);
+ int err = load_string(string);
+ if (!err) {
+ cf_journal_commit_transaction(0, oldj);
+ cf_remember_entry(CE_STRING, string);
+ } else
+ cf_journal_rollback_transaction(0, oldj);
+ return err;
+}
+
+/* Command-line parser */
+
+static void
+load_default(void)
+{
+ if (cf_def_loaded++)
+ return;
+ if (cf_def_file)
+ {
+ char *env;
+ if (cf_env_file && (env = getenv(cf_env_file)))
+ {
+ if (cf_load(env))
+ die("Cannot load config file %s", env);
+ }
+ else if (cf_load(cf_def_file))
+ die("Cannot load default config %s", cf_def_file);
+ }
+ else
+ {
+ // We need to create an empty pool and initialize all configuration items
+ struct cf_journal_item *oldj = cf_journal_new_transaction(1);
+ cf_init_stack();
+ done_stack();
+ cf_journal_commit_transaction(1, oldj);
+ }
+}
+
+static void
+final_commit(void)
+{
+ if (postpone_commit) {
+ postpone_commit = 0;
+ if (done_stack())
+ die("Cannot commit after the initialization");
+ }
+}
+
+int
+cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index)
+{
+ clist_init(&conf_entries);
+ postpone_commit = 1;
+
+ static int other_options = 0;
+ while (1) {
+ int res = getopt_long (argc, argv, short_opts, long_opts, long_index);
+ if (res == 'S' || res == 'C' || res == 0x64436667)
+ {
+ if (other_options)
+ die("The -S and -C options must precede all other arguments");
+ if (res == 'S') {
+ load_default();
+ if (cf_set(optarg))
+ die("Cannot set %s", optarg);
+ } else if (res == 'C') {
+ if (cf_load(optarg))
+ die("Cannot load config file %s", optarg);
+ }
+#ifdef CONFIG_DEBUG
+ else { /* --dumpconfig */
+ load_default();
+ final_commit();
+ struct fastbuf *b = bfdopen(1, 4096);
+ cf_dump_sections(b);
+ bclose(b);
+ exit(0);
+ }
+#endif
+ } else {
+ /* unhandled option or end of options */
+ if (res != ':' && res != '?') {
+ load_default();
+ final_commit();
+ }
+ other_options++;
+ return res;
+ }
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Configuration files: only for internal use of conf-*.c
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONF_INTERNAL_H
+#define _UCW_CONF_INTERNAL_H
+
+/* conf-intr.c */
+#define OP_MASK 0xff // only get the operation
+#define OP_OPEN 0x100 // here we only get an opening brace instead of parameters
+#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask
+#define OP_2ND 0x400 // in the 2nd phase real data are entered
+enum cf_operation;
+extern char *cf_op_names[];
+extern char *cf_type_names[];
+
+uns cf_type_size(enum cf_type type, struct cf_user_type *utype);
+char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars);
+void cf_init_stack(void);
+int cf_check_stack(void);
+
+/* conf-journal.c */
+void cf_journal_swap(void);
+void cf_journal_delete(void);
+
+/* conf-section.c */
+#define SEC_FLAG_DYNAMIC 0x80000000 // contains a dynamic attribute
+#define SEC_FLAG_UNKNOWN 0x40000000 // ignore unknown entriies
+#define SEC_FLAG_CANT_COPY 0x20000000 // contains lists or parsers
+#define SEC_FLAG_NUMBER 0x0fffffff // number of entries
+enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL };
+extern struct cf_section cf_sections;
+
+struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name);
+int cf_commit_all(enum cf_commit_mode cm);
+void cf_add_dirty(struct cf_section *sec, void *ptr);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Configuration files: interpreter
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/conf-internal.h"
+#include "ucw/clists.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0)
+
+/* Register size of and parser for each basic type */
+
+static char *
+cf_parse_string(char *str, char **ptr)
+{
+ *ptr = cf_strdup(str);
+ return NULL;
+}
+
+typedef char *cf_basic_parser(char *str, void *ptr);
+static struct {
+ uns size;
+ void *parser;
+} parsers[] = {
+ { sizeof(int), cf_parse_int },
+ { sizeof(u64), cf_parse_u64 },
+ { sizeof(double), cf_parse_double },
+ { sizeof(u32), cf_parse_ip },
+ { sizeof(char*), cf_parse_string },
+ { sizeof(int), NULL }, // lookups are parsed extra
+ { 0, NULL }, // user-defined types are parsed extra
+};
+
+inline uns
+cf_type_size(enum cf_type type, struct cf_user_type *utype)
+{
+ if (type < CT_USER)
+ return parsers[type].size;
+ else
+ return utype->size;
+}
+
+static char *
+cf_parse_lookup(char *str, int *ptr, const char * const *t)
+{
+ const char * const *n = t;
+ uns total_len = 0;
+ while (*n && strcasecmp(*n, str)) {
+ total_len += strlen(*n) + 2;
+ n++;
+ }
+ if (*n) {
+ *ptr = n - t;
+ return NULL;
+ }
+ char *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
+ c += sprintf(err, "Invalid value %s, possible values are: ", str);
+ for (n=t; *n; n++)
+ c+= sprintf(c, "%s, ", *n);
+ if (*t)
+ c[-2] = 0;
+ *ptr = -1;
+ return err;
+}
+
+static char *
+cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u)
+{
+ for (uns i=0; i<number; i++)
+ {
+ char *msg;
+ uns size = cf_type_size(type, u->utype);
+ if (type < CT_LOOKUP)
+ msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size);
+ else if (type == CT_LOOKUP)
+ msg = cf_parse_lookup(pars[i], ptr + i * size, u->lookup);
+ else if (type == CT_USER)
+ msg = u->utype->parser(pars[i], ptr + i * size);
+ else
+ ASSERT(0);
+ if (msg)
+ return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg;
+ }
+ return NULL;
+}
+
+/* Interpreter */
+
+#define T(x) #x,
+char *cf_op_names[] = { CF_OPERATIONS };
+#undef T
+char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
+
+#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN)
+
+static char *
+interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr)
+{
+ enum cf_type type = item->type;
+ cf_journal_block(ptr, sizeof(void*));
+ // boundary checks done by the caller
+ uns size = cf_type_size(item->type, item->u.utype);
+ *ptr = cf_malloc(DARY_HDR_SIZE + number * size) + DARY_HDR_SIZE;
+ DARY_LEN(*ptr) = number;
+ return cf_parse_ary(number, pars, *ptr, type, &item->u);
+}
+
+static char *
+interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op)
+{
+ enum cf_type type = item->type;
+ void *old_p = *ptr;
+ uns size = cf_type_size(item->type, item->u.utype);
+ ASSERT(size >= sizeof(uns));
+ int old_nr = old_p ? DARY_LEN(old_p) : 0;
+ int taken = MIN(number, ABS(item->number)-old_nr);
+ *processed = taken;
+ // stretch the dynamic array
+ void *new_p = cf_malloc(DARY_HDR_SIZE + (old_nr + taken) * size) + DARY_HDR_SIZE;
+ DARY_LEN(new_p) = old_nr + taken;
+ cf_journal_block(ptr, sizeof(void*));
+ *ptr = new_p;
+ if (op == OP_APPEND) {
+ memcpy(new_p, old_p, old_nr * size);
+ return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u);
+ } else if (op == OP_PREPEND) {
+ memcpy(new_p + taken * size, old_p, old_nr * size);
+ return cf_parse_ary(taken, pars, new_p, type, &item->u);
+ } else
+ return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]);
+}
+
+static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic);
+
+static char *
+interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+ cf_add_dirty(sec, ptr);
+ *processed = 0;
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ {
+ int taken;
+ char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
+ if (msg)
+ return cf_printf("Item %s: %s", ci->name, msg);
+ *processed += taken;
+ number -= taken;
+ pars += taken;
+ if (!number) // stop parsing, because many parsers would otherwise complain that number==0
+ break;
+ }
+ return NULL;
+}
+
+static void
+add_to_list(cnode *where, cnode *new_node, enum cf_operation op)
+{
+ switch (op)
+ {
+ case OP_EDIT: // editation has been done in-place
+ break;
+ case OP_REMOVE:
+ CF_JOURNAL_VAR(where->prev->next);
+ CF_JOURNAL_VAR(where->next->prev);
+ clist_remove(where);
+ break;
+ case OP_AFTER: // implementation dependent (prepend_head = after(list)), and where==list, see clists.h:74
+ case OP_PREPEND:
+ case OP_COPY:
+ CF_JOURNAL_VAR(where->next->prev);
+ CF_JOURNAL_VAR(where->next);
+ clist_insert_after(new_node, where);
+ break;
+ case OP_BEFORE: // implementation dependent (append_tail = before(list))
+ case OP_APPEND:
+ case OP_SET:
+ CF_JOURNAL_VAR(where->prev->next);
+ CF_JOURNAL_VAR(where->prev);
+ clist_insert_before(new_node, where);
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+static char *
+interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op)
+{
+ if (op >= OP_REMOVE)
+ return cf_printf("You have to open a block for operation %s", cf_op_names[op]);
+ if (!number)
+ return "Nothing to add to the list";
+ struct cf_section *sec = item->u.sec;
+ *processed = 0;
+ uns index = 0;
+ while (number > 0)
+ {
+ void *node = cf_malloc(sec->size);
+ cf_init_section(item->name, sec, node, 1);
+ add_to_list(ptr, node, op);
+ int taken;
+ /* If the node contains any dynamic attribute at the end, we suppress
+ * auto-repetition here and pass the flag inside instead. */
+ index++;
+ char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
+ if (msg)
+ return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg);
+ *processed += taken;
+ number -= taken;
+ pars += taken;
+ if (sec->flags & SEC_FLAG_DYNAMIC)
+ break;
+ }
+ return NULL;
+}
+
+static char *
+interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op)
+{
+ if (op == OP_PREPEND || op == OP_APPEND)
+ op = OP_SET;
+ if (op != OP_SET && op != OP_REMOVE)
+ return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]);
+ else if (item->type != CT_INT && item->type != CT_LOOKUP)
+ return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]);
+ cf_journal_block(ptr, sizeof(u32));
+ for (int i=0; i<number; i++) {
+ uns idx;
+ if (item->type == CT_INT)
+ TRY( cf_parse_int(pars[i], &idx) );
+ else
+ TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) );
+ if (idx >= 32)
+ return "Bitmaps only have 32 bits";
+ if (op == OP_SET)
+ *ptr |= 1<<idx;
+ else
+ *ptr &= ~(1<<idx);
+ }
+ *processed = number;
+ return NULL;
+}
+
+static char *
+interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
+{
+ int taken;
+ switch (item->cls)
+ {
+ case CC_STATIC:
+ if (!number)
+ return "Missing value";
+ taken = MIN(number, item->number);
+ *processed = taken;
+ uns size = cf_type_size(item->type, item->u.utype);
+ cf_journal_block(ptr, taken * size);
+ return cf_parse_ary(taken, pars, ptr, item->type, &item->u);
+ case CC_DYNAMIC:
+ if (!allow_dynamic)
+ return "Dynamic array cannot be used here";
+ taken = MIN(number, ABS(item->number));
+ *processed = taken;
+ return interpret_set_dynamic(item, taken, pars, ptr);
+ case CC_PARSER:
+ if (item->number < 0 && !allow_dynamic)
+ return "Parsers with variable number of parameters cannot be used here";
+ if (item->number > 0 && number < item->number)
+ return "Not enough parameters available for the parser";
+ taken = MIN(number, ABS(item->number));
+ *processed = taken;
+ for (int i=0; i<taken; i++)
+ pars[i] = cf_strdup(pars[i]);
+ return item->u.par(taken, pars, ptr);
+ case CC_SECTION:
+ return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic);
+ case CC_LIST:
+ if (!allow_dynamic)
+ return "Lists cannot be used here";
+ return interpret_add_list(item, number, pars, processed, ptr, OP_SET);
+ case CC_BITMAP:
+ if (!allow_dynamic)
+ return "Bitmaps cannot be used here";
+ return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET);
+ default:
+ ASSERT(0);
+ }
+}
+
+static char *
+interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+ if (item->cls == CC_BITMAP) {
+ cf_journal_block(ptr, sizeof(u32));
+ if (op == OP_CLEAR)
+ * (u32*) ptr = 0;
+ else
+ if (item->type == CT_INT)
+ * (u32*) ptr = ~0u;
+ else {
+ uns nr = -1;
+ while (item->u.lookup[++nr]);
+ * (u32*) ptr = ~0u >> (32-nr);
+ }
+ return NULL;
+ } else if (op != OP_CLEAR)
+ return "The item is not a bitmap";
+
+ if (item->cls == CC_LIST) {
+ cf_journal_block(ptr, sizeof(clist));
+ clist_init(ptr);
+ } else if (item->cls == CC_DYNAMIC) {
+ cf_journal_block(ptr, sizeof(void *));
+ static uns zero = 0;
+ * (void**) ptr = (&zero) + 1;
+ } else if (item->cls == CC_STATIC && item->type == CT_STRING) {
+ cf_journal_block(ptr, item->number * sizeof(char*));
+ bzero(ptr, item->number * sizeof(char*));
+ } else
+ return "The item is not a list, dynamic array, bitmap, or string";
+ return NULL;
+}
+
+static int
+cmp_items(void *i1, void *i2, struct cf_item *item)
+{
+ ASSERT(item->cls == CC_STATIC);
+ i1 += (uintptr_t) item->ptr;
+ i2 += (uintptr_t) item->ptr;
+ if (item->type == CT_STRING)
+ return strcmp(* (char**) i1, * (char**) i2);
+ else // all numeric types
+ return memcmp(i1, i2, cf_type_size(item->type, item->u.utype));
+}
+
+static void *
+find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask)
+{
+ CLIST_FOR_EACH(cnode *, n, *list)
+ {
+ uns found = 1;
+ for (uns i=0; i<32; i++)
+ if (mask & (1<<i))
+ if (cmp_items(n, query, sec->cfg+i))
+ {
+ found = 0;
+ break;
+ }
+ if (found)
+ return n;
+ }
+ return NULL;
+}
+
+static char *
+record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask)
+{
+ uns nr = sec->flags & SEC_FLAG_NUMBER;
+ if (item >= sec->cfg && item < sec->cfg + nr) // setting an attribute relative to this section
+ {
+ uns i = item - sec->cfg;
+ if (i >= 32)
+ return "Cannot select list nodes by this attribute";
+ if (sec->cfg[i].cls != CC_STATIC)
+ return "Selection can only be done based on basic attributes";
+ *mask |= 1 << i;
+ }
+ return NULL;
+}
+
+#define MAX_STACK_SIZE 10
+static struct item_stack {
+ struct cf_section *sec; // nested section
+ void *base_ptr; // because original pointers are often relative
+ enum cf_operation op; // it is performed when a closing brace is encountered
+ void *list; // list the operations should be done on
+ u32 mask; // bit array of selectors searching in a list
+ struct cf_item *item; // cf_item of the list
+} stack[MAX_STACK_SIZE];
+static uns level;
+
+static char *
+opening_brace(struct cf_item *item, void *ptr, enum cf_operation op)
+{
+ if (level >= MAX_STACK_SIZE-1)
+ return "Too many nested sections";
+ enum cf_operation pure_op = op & OP_MASK;
+ stack[++level] = (struct item_stack) {
+ .sec = NULL,
+ .base_ptr = NULL,
+ .op = pure_op,
+ .list = NULL,
+ .mask = 0,
+ .item = NULL,
+ };
+ if (!item) // unknown is ignored; we just need to trace recursion
+ return NULL;
+ stack[level].sec = item->u.sec;
+ if (item->cls == CC_SECTION)
+ {
+ if (pure_op != OP_SET)
+ return "Only SET operation can be used with a section";
+ stack[level].base_ptr = ptr;
+ stack[level].op = OP_EDIT | OP_2ND; // this list operation does nothing
+ }
+ else if (item->cls == CC_LIST)
+ {
+ stack[level].base_ptr = cf_malloc(item->u.sec->size);
+ cf_init_section(item->name, item->u.sec, stack[level].base_ptr, 1);
+ stack[level].list = ptr;
+ stack[level].item = item;
+ if (pure_op == OP_ALL)
+ return "Operation ALL cannot be applied on lists";
+ else if (pure_op < OP_REMOVE) {
+ add_to_list(ptr, stack[level].base_ptr, pure_op);
+ stack[level].op |= OP_2ND;
+ } else
+ stack[level].op |= OP_1ST;
+ }
+ else
+ return "Opening brace can only be used on sections and lists";
+ return NULL;
+}
+
+static char *
+closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars)
+{
+ if (st->op == OP_CLOSE) // top-level
+ return "Unmatched } parenthesis";
+ if (!st->sec) { // dummy run on unknown section
+ if (!(op & OP_OPEN))
+ level--;
+ return NULL;
+ }
+ enum cf_operation pure_op = st->op & OP_MASK;
+ if (st->op & OP_1ST)
+ {
+ st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask);
+ if (!st->list)
+ return "Cannot find a node matching the query";
+ if (pure_op != OP_REMOVE)
+ {
+ if (pure_op == OP_EDIT)
+ st->base_ptr = st->list;
+ else if (pure_op == OP_AFTER || pure_op == OP_BEFORE)
+ cf_init_section(st->item->name, st->sec, st->base_ptr, 1);
+ else if (pure_op == OP_COPY) {
+ if (st->sec->flags & SEC_FLAG_CANT_COPY)
+ return cf_printf("Item %s cannot be copied", st->item->name);
+ memcpy(st->base_ptr, st->list, st->sec->size); // strings and dynamic arrays are shared
+ if (st->sec->copy)
+ TRY( st->sec->copy(st->base_ptr, st->list) );
+ } else
+ ASSERT(0);
+ if (op & OP_OPEN) { // stay at the same recursion level
+ st->op = (st->op | OP_2ND) & ~OP_1ST;
+ add_to_list(st->list, st->base_ptr, pure_op);
+ return NULL;
+ }
+ int taken; // parse parameters on 1 line immediately
+ TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) );
+ number -= taken;
+ pars += taken;
+ // and fall-thru to the 2nd phase
+ }
+ add_to_list(st->list, st->base_ptr, pure_op);
+ }
+ level--;
+ if (number)
+ return "No parameters expected after the }";
+ else if (op & OP_OPEN)
+ return "No { is expected";
+ else
+ return NULL;
+}
+
+static struct cf_item *
+find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr)
+{
+ *msg = NULL;
+ if (name[0] == '^') // absolute name instead of relative
+ name++, curr_sec = &cf_sections, *ptr = NULL;
+ if (!curr_sec) // don't even search in an unknown section
+ return NULL;
+ while (1)
+ {
+ if (curr_sec != &cf_sections)
+ cf_add_dirty(curr_sec, *ptr);
+ char *c = strchr(name, '.');
+ if (c)
+ *c++ = 0;
+ struct cf_item *ci = cf_find_subitem(curr_sec, name);
+ if (!ci->cls)
+ {
+ if (!(curr_sec->flags & SEC_FLAG_UNKNOWN)) // ignore silently unknown top-level sections and unknown attributes in flagged sections
+ *msg = cf_printf("Unknown item %s", name);
+ return NULL;
+ }
+ *ptr += (uintptr_t) ci->ptr;
+ if (!c)
+ return ci;
+ if (ci->cls != CC_SECTION)
+ {
+ *msg = cf_printf("Item %s is not a section", name);
+ return NULL;
+ }
+ curr_sec = ci->u.sec;
+ name = c;
+ }
+}
+
+static char *
+interpret_add(char *name, struct cf_item *item, int number, char **pars, int *takenp, void *ptr, enum cf_operation op)
+{
+ switch (item->cls) {
+ case CC_DYNAMIC:
+ return interpret_add_dynamic(item, number, pars, takenp, ptr, op);
+ case CC_LIST:
+ return interpret_add_list(item, number, pars, takenp, ptr, op);
+ case CC_BITMAP:
+ return interpret_add_bitmap(item, number, pars, takenp, ptr, op);
+ default:
+ return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name);
+ }
+}
+
+char *
+cf_interpret_line(char *name, enum cf_operation op, int number, char **pars)
+{
+ char *msg;
+ if ((op & OP_MASK) == OP_CLOSE)
+ return closing_brace(stack+level, op, number, pars);
+ void *ptr = stack[level].base_ptr;
+ struct cf_item *item = find_item(stack[level].sec, name, &msg, &ptr);
+ if (msg)
+ return msg;
+ if (stack[level].op & OP_1ST)
+ TRY( record_selector(item, stack[level].sec, &stack[level].mask) );
+ if (op & OP_OPEN) { // the operation will be performed after the closing brace
+ if (number)
+ return "Cannot open a block after a parameter has been passed on a line";
+ return opening_brace(item, ptr, op);
+ }
+ if (!item) // ignored item in an unknown section
+ return NULL;
+ op &= OP_MASK;
+
+ int taken = 0; // process as many parameters as possible
+ switch (op) {
+ case OP_CLEAR:
+ case OP_ALL:
+ msg = interpret_set_all(item, ptr, op);
+ break;
+ case OP_SET:
+ msg = interpret_set_item(item, number, pars, &taken, ptr, 1);
+ break;
+ case OP_RESET:
+ msg = interpret_set_all(item, ptr, OP_CLEAR);
+ if (!msg)
+ msg = interpret_add(name, item, number, pars, &taken, ptr, OP_APPEND);
+ break;
+ default:
+ msg = interpret_add(name, item, number, pars, &taken, ptr, op);
+ }
+ if (msg)
+ return msg;
+ if (taken < number)
+ return cf_printf("Too many parameters: %d>%d", number, taken);
+
+ return NULL;
+}
+
+char *
+cf_find_item(const char *name, struct cf_item *item)
+{
+ char *msg;
+ void *ptr = NULL;
+ struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr);
+ if (msg)
+ return msg;
+ if (ci) {
+ *item = *ci;
+ item->ptr = ptr;
+ } else
+ bzero(item, sizeof(struct cf_item));
+ return NULL;
+}
+
+char *
+cf_modify_item(struct cf_item *item, enum cf_operation op, int number, char **pars)
+{
+ char *msg;
+ int taken = 0;
+ switch (op) {
+ case OP_SET:
+ msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1);
+ break;
+ case OP_CLEAR:
+ case OP_ALL:
+ msg = interpret_set_all(item, item->ptr, op);
+ break;
+ case OP_APPEND:
+ case OP_PREPEND:
+ switch (item->cls) {
+ case CC_DYNAMIC:
+ msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op);
+ break;
+ case CC_LIST:
+ msg = interpret_add_list(item, number, pars, &taken, item->ptr, op);
+ break;
+ case CC_BITMAP:
+ msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op);
+ break;
+ default:
+ return "The attribute does not support append/prepend";
+ }
+ break;
+ case OP_REMOVE:
+ if (item->cls == CC_BITMAP)
+ msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op);
+ else
+ return "Only applicable on bitmaps";
+ break;
+ default:
+ return "Unsupported operation";
+ }
+ if (msg)
+ return msg;
+ if (taken < number)
+ return "Too many parameters";
+ return NULL;
+}
+
+void
+cf_init_stack(void)
+{
+ static uns initialized = 0;
+ if (!initialized++) {
+ cf_sections.flags |= SEC_FLAG_UNKNOWN;
+ cf_sections.size = 0; // size of allocated array used to be stored here
+ cf_init_section(NULL, &cf_sections, NULL, 0);
+ }
+ level = 0;
+ stack[0] = (struct item_stack) {
+ .sec = &cf_sections,
+ .base_ptr = NULL,
+ .op = OP_CLOSE,
+ .list = NULL,
+ .mask = 0,
+ .item = NULL
+ };
+}
+
+int
+cf_check_stack(void)
+{
+ if (level > 0) {
+ msg(L_ERROR, "Unterminated block");
+ return 1;
+ }
+ return 0;
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: journaling
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/conf-internal.h"
+#include "ucw/mempool.h"
+
+#include <string.h>
+
+static struct old_pools {
+ struct old_pools *prev;
+ struct mempool *pool;
+} *pools; // link-list of older cf_pool's
+
+uns cf_need_journal = 1; // some programs do not need journal
+static struct cf_journal_item {
+ struct cf_journal_item *prev;
+ byte *ptr;
+ uns len;
+ byte copy[0];
+} *journal;
+
+void
+cf_journal_block(void *ptr, uns len)
+{
+ if (!cf_need_journal)
+ return;
+ struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len);
+ ji->prev = journal;
+ ji->ptr = ptr;
+ ji->len = len;
+ memcpy(ji->copy, ptr, len);
+ journal = ji;
+}
+
+void
+cf_journal_swap(void)
+ // swaps the contents of the memory and the journal, and reverses the list
+{
+ struct cf_journal_item *curr, *prev, *next;
+ for (next=NULL, curr=journal; curr; next=curr, curr=prev)
+ {
+ prev = curr->prev;
+ curr->prev = next;
+ for (uns i=0; i<curr->len; i++)
+ {
+ byte x = curr->copy[i];
+ curr->copy[i] = curr->ptr[i];
+ curr->ptr[i] = x;
+ }
+ }
+ journal = next;
+}
+
+struct cf_journal_item *
+cf_journal_new_transaction(uns new_pool)
+{
+ if (new_pool)
+ cf_pool = mp_new(1<<10);
+ struct cf_journal_item *oldj = journal;
+ journal = NULL;
+ return oldj;
+}
+
+void
+cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+ if (new_pool)
+ {
+ struct old_pools *p = cf_malloc(sizeof(struct old_pools));
+ p->prev = pools;
+ p->pool = cf_pool;
+ pools = p;
+ }
+ if (oldj)
+ {
+ struct cf_journal_item **j = &journal;
+ while (*j)
+ j = &(*j)->prev;
+ *j = oldj;
+ }
+}
+
+void
+cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj)
+{
+ if (!cf_need_journal)
+ die("Cannot rollback the configuration, because the journal is disabled.");
+ cf_journal_swap();
+ journal = oldj;
+ if (new_pool)
+ {
+ mp_delete(cf_pool);
+ cf_pool = pools ? pools->pool : NULL;
+ }
+}
+
+void
+cf_journal_delete(void)
+{
+ for (struct old_pools *p=pools; p; p=pools)
+ {
+ pools = p->prev;
+ mp_delete(p->pool);
+ }
+}
+
+/* TODO: more space efficient journal */
--- /dev/null
+/*
+ * UCW Library -- Configuration files: parsers for basic types
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/chartype.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+struct unit {
+ uns name; // one-letter name of the unit
+ uns num, den; // fraction
+};
+
+static const struct unit units[] = {
+ { 'd', 86400, 1 },
+ { 'h', 3600, 1 },
+ { 'k', 1000, 1 },
+ { 'm', 1000000, 1 },
+ { 'g', 1000000000, 1 },
+ { 'K', 1024, 1 },
+ { 'M', 1048576, 1 },
+ { 'G', 1073741824, 1 },
+ { '%', 1, 100 },
+ { 0, 0, 0 }
+};
+
+static const struct unit *
+lookup_unit(const char *value, const char *end, char **msg)
+{
+ if (end && *end) {
+ if (end == value || end[1] || *end >= '0' && *end <= '9')
+ *msg = "Invalid number";
+ else {
+ for (const struct unit *u=units; u->name; u++)
+ if ((char)u->name == *end)
+ return u;
+ *msg = "Invalid unit";
+ }
+ }
+ return NULL;
+}
+
+static char cf_rngerr[] = "Number out of range";
+
+char *
+cf_parse_int(const char *str, int *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ char *end;
+ errno = 0;
+ uns x = strtoul(str, &end, 0);
+ if (errno == ERANGE)
+ msg = cf_rngerr;
+ else if (u = lookup_unit(str, end, &msg)) {
+ u64 y = (u64)x * u->num;
+ if (y % u->den)
+ msg = "Number is not an integer";
+ else {
+ y /= u->den;
+ if (y > 0xffffffff)
+ msg = cf_rngerr;
+ *ptr = y;
+ }
+ } else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_u64(const char *str, u64 *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ char *end;
+ errno = 0;
+ u64 x = strtoull(str, &end, 0);
+ if (errno == ERANGE)
+ msg = cf_rngerr;
+ else if (u = lookup_unit(str, end, &msg)) {
+ if (x > ~(u64)0 / u->num)
+ msg = "Number out of range";
+ else {
+ x *= u->num;
+ if (x % u->den)
+ msg = "Number is not an integer";
+ else
+ *ptr = x / u->den;
+ }
+ } else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_double(const char *str, double *ptr)
+{
+ char *msg = NULL;
+ if (!*str)
+ msg = "Missing number";
+ else {
+ const struct unit *u;
+ double x;
+ uns read_chars;
+ if (sscanf(str, "%lf%n", &x, &read_chars) != 1)
+ msg = "Invalid number";
+ else if (u = lookup_unit(str, str + read_chars, &msg))
+ *ptr = x * u->num / u->den;
+ else
+ *ptr = x;
+ }
+ return msg;
+}
+
+char *
+cf_parse_ip(const char *p, u32 *varp)
+{
+ if (!*p)
+ return "Missing IP address";
+ uns x = 0;
+ char *p2;
+ if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) {
+ errno = 0;
+ x = strtoul(p, &p2, 16);
+ if (errno == ERANGE || x > 0xffffffff)
+ goto error;
+ p = p2;
+ }
+ else
+ for (uns i = 0; i < 4; i++) {
+ if (i) {
+ if (*p++ != '.')
+ goto error;
+ }
+ if (!Cdigit(*p))
+ goto error;
+ errno = 0;
+ uns y = strtoul(p, &p2, 10);
+ if (errno == ERANGE || p2 == (char*) p || y > 255)
+ goto error;
+ p = p2;
+ x = (x << 8) + y;
+ }
+ *varp = x;
+ return *p ? "Trailing characters" : NULL;
+error:
+ return "Invalid IP address";
+}
+
--- /dev/null
+/*
+ * UCW Library -- Configuration files: sections
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/conf-internal.h"
+#include "ucw/clists.h"
+#include "ucw/binsearch.h"
+
+#include <string.h>
+
+/* Dirty sections */
+
+struct dirty_section {
+ struct cf_section *sec;
+ void *ptr;
+};
+#define GBUF_TYPE struct dirty_section
+#define GBUF_PREFIX(x) dirtsec_##x
+#include "ucw/gbuf.h"
+static dirtsec_t dirty;
+static uns dirties;
+
+void
+cf_add_dirty(struct cf_section *sec, void *ptr)
+{
+ dirtsec_grow(&dirty, dirties+1);
+ struct dirty_section *dest = dirty.ptr + dirties;
+ if (dirties && dest[-1].sec == sec && dest[-1].ptr == ptr)
+ return;
+ dest->sec = sec;
+ dest->ptr = ptr;
+ dirties++;
+}
+
+#define ASORT_PREFIX(x) dirtsec_##x
+#define ASORT_KEY_TYPE struct dirty_section
+#define ASORT_ELT(i) dirty.ptr[i]
+#define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr
+#include "ucw/sorter/array-simple.h"
+
+static void
+sort_dirty(void)
+{
+ if (dirties <= 1)
+ return;
+ dirtsec_sort(dirties);
+ // and compress the list
+ struct dirty_section *read = dirty.ptr + 1, *write = dirty.ptr + 1, *limit = dirty.ptr + dirties;
+ while (read < limit) {
+ if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) {
+ if (read != write)
+ *write = *read;
+ write++;
+ }
+ read++;
+ }
+ dirties = write - dirty.ptr;
+}
+
+/* Initialization */
+
+struct cf_section cf_sections; // root section
+
+struct cf_item *
+cf_find_subitem(struct cf_section *sec, const char *name)
+{
+ struct cf_item *ci = sec->cfg;
+ for (; ci->cls; ci++)
+ if (!strcasecmp(ci->name, name))
+ return ci;
+ return ci;
+}
+
+static void
+inspect_section(struct cf_section *sec)
+{
+ sec->flags = 0;
+ struct cf_item *ci;
+ for (ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION) {
+ inspect_section(ci->u.sec);
+ sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY);
+ } else if (ci->cls == CC_LIST) {
+ inspect_section(ci->u.sec);
+ sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY;
+ } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP)
+ sec->flags |= SEC_FLAG_DYNAMIC;
+ else if (ci->cls == CC_PARSER) {
+ sec->flags |= SEC_FLAG_CANT_COPY;
+ if (ci->number < 0)
+ sec->flags |= SEC_FLAG_DYNAMIC;
+ }
+ if (sec->copy)
+ sec->flags &= ~SEC_FLAG_CANT_COPY;
+ sec->flags |= ci - sec->cfg; // record the number of entries
+}
+
+void
+cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown)
+{
+ if (!cf_sections.cfg)
+ {
+ cf_sections.size = 50;
+ cf_sections.cfg = xmalloc_zero(cf_sections.size * sizeof(struct cf_item));
+ }
+ struct cf_item *ci = cf_find_subitem(&cf_sections, name);
+ if (ci->cls)
+ die("Cannot register section %s twice", name);
+ ci->cls = CC_SECTION;
+ ci->name = name;
+ ci->number = 1;
+ ci->ptr = NULL;
+ ci->u.sec = sec;
+ inspect_section(sec);
+ if (allow_unknown)
+ sec->flags |= SEC_FLAG_UNKNOWN;
+ ci++;
+ if (ci - cf_sections.cfg >= (int) cf_sections.size)
+ {
+ cf_sections.cfg = xrealloc(cf_sections.cfg, 2*cf_sections.size * sizeof(struct cf_item));
+ bzero(cf_sections.cfg + cf_sections.size, cf_sections.size * sizeof(struct cf_item));
+ cf_sections.size *= 2;
+ }
+}
+
+void
+cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero)
+{
+ if (do_bzero) {
+ ASSERT(sec->size);
+ bzero(ptr, sec->size);
+ }
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION)
+ cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0);
+ else if (ci->cls == CC_LIST)
+ clist_init(ptr + (uintptr_t) ci->ptr);
+ else if (ci->cls == CC_DYNAMIC) {
+ void **dyn = ptr + (uintptr_t) ci->ptr;
+ if (!*dyn) { // replace NULL by an empty array
+ static uns zero = 0;
+ *dyn = (&zero) + 1;
+ }
+ }
+ if (sec->init) {
+ char *msg = sec->init(ptr);
+ if (msg)
+ die("Cannot initialize section %s: %s", name, msg);
+ }
+}
+
+static char *
+commit_section(struct cf_section *sec, void *ptr, uns commit_all)
+{
+ char *err;
+ for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
+ if (ci->cls == CC_SECTION) {
+ if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) {
+ msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
+ return "commit of a subsection failed";
+ }
+ } else if (ci->cls == CC_LIST) {
+ uns idx = 0;
+ CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr))
+ if (idx++, err = commit_section(ci->u.sec, n, commit_all)) {
+ msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
+ return "commit of a list failed";
+ }
+ }
+ if (sec->commit) {
+ /* We have to process the whole tree of sections even if just a few changes
+ * have been made, because there are dependencies between commit-hooks and
+ * hence we need to call them in a fixed order. */
+#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr
+ struct dirty_section comp = { sec, ptr };
+ uns pos = BIN_SEARCH_FIRST_GE_CMP(dirty.ptr, dirties, comp, ARY_LT_X);
+
+ if (commit_all
+ || (pos < dirties && dirty.ptr[pos].sec == sec && dirty.ptr[pos].ptr == ptr))
+ return sec->commit(ptr);
+ }
+ return 0;
+}
+
+int
+cf_commit_all(enum cf_commit_mode cm)
+{
+ sort_dirty();
+ if (cm == CF_NO_COMMIT)
+ return 0;
+ if (commit_section(&cf_sections, NULL, cm == CF_COMMIT_ALL))
+ return 1;
+ dirties = 0;
+ return 0;
+}
--- /dev/null
+/*
+ * Insane tester of reading configuration files
+ *
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/clists.h"
+#include "ucw/fastbuf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+static int verbose;
+
+struct sub_sect_1 {
+ cnode n;
+ char *name;
+ time_t t;
+ char *level;
+ int confidence[2];
+ double *list;
+};
+
+static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) };
+
+static char *
+init_sec_1(struct sub_sect_1 *s)
+{
+ if (s == &sec1) // this is a static variable; skip clearing
+ return NULL;
+ s->name = "unknown";
+ s->level = "default";
+ s->confidence[0] = 5;
+ s->confidence[1] = 6;
+ // leave s->list==NULL
+ return NULL;
+}
+
+static char *
+commit_sec_1(struct sub_sect_1 *s)
+{
+ if (s->confidence[0] < 0 || s->confidence[0] > 10)
+ return "Well, this can't be";
+ return NULL;
+}
+
+static char *
+time_parser(uns number, char **pars, time_t *ptr)
+{
+ *ptr = number ? atoi(pars[0]) : time(NULL);
+ return NULL;
+}
+
+static struct cf_section cf_sec_1 = {
+ CF_TYPE(struct sub_sect_1),
+ CF_INIT(init_sec_1),
+ CF_COMMIT(commit_sec_1),
+#define F(x) PTR_TO(struct sub_sect_1, x)
+ CF_ITEMS {
+ CF_STRING("name", F(name)),
+ //CF_PARSER("t", F(t), time_parser, 0),
+ CF_STRING("level", F(level)),
+ CF_INT_ARY("confidence", F(confidence[0]), 2), // XXX: the [0] is needed for the sake of type checking
+ CF_DOUBLE_DYN("list", F(list), 100),
+ CF_END
+ }
+#undef F
+};
+
+static uns nr1 = 15;
+static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1);
+static int nrs2[5];
+static char *str1 = "no worries";
+static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob");
+static u64 u1 = 0xCafeBeefDeadC00ll;
+static double d1 = -1.1;
+static clist secs;
+static time_t t1, t2;
+static u32 ip;
+static int *look = DARY_ALLOC(int, 2, 2, 1);
+static u16 numbers[10] = { 2, 100, 1, 5 };
+static u32 bitmap1 = 0xff;
+static u32 bitmap2 = 3;
+
+static char *
+parse_u16(char *string, u16 *ptr)
+{
+ uns a;
+ char *msg = cf_parse_int(string, &a);
+ if (msg)
+ return msg;
+ if (a >= (1<<16))
+ return "Come on, man, this doesn't fit to 16 bits";
+ *ptr = a;
+ return NULL;
+}
+
+static void
+dump_u16(struct fastbuf *fb, u16 *ptr)
+{
+ bprintf(fb, "%d ", *ptr);
+}
+
+static struct cf_user_type u16_type = {
+ .size = sizeof(u16),
+ .name = "u16",
+ .parser = (cf_parser1*) parse_u16,
+ .dumper = (cf_dumper1*) dump_u16
+};
+
+static char *
+init_top(void *ptr UNUSED)
+{
+ for (uns i=0; i<5; i++)
+ {
+ struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed
+ cf_init_section("slaves", &cf_sec_1, s, 1);
+ s->confidence[1] = i;
+ clist_add_tail(&secs, &s->n);
+ }
+ return NULL;
+}
+
+static char *
+commit_top(void *ptr UNUSED)
+{
+ if (nr1 != 15)
+ return "Don't touch my variable!";
+ return NULL;
+}
+
+static const char * const alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
+static struct cf_section cf_top = {
+ CF_INIT(init_top),
+ CF_COMMIT(commit_top),
+ CF_ITEMS {
+ CF_UNS("nr1", &nr1),
+ CF_INT_DYN("nrs1", &nrs1, 1000),
+ CF_INT_ARY("nrs2", nrs2, 5),
+ CF_STRING("str1", &str1),
+ CF_STRING_DYN("str2", &str2, 20),
+ CF_U64("u1", &u1),
+ CF_DOUBLE("d1", &d1),
+ CF_PARSER("FirstTime", &t1, time_parser, -1),
+ CF_PARSER("SecondTime", &t2, time_parser, 1),
+ CF_SECTION("master", &sec1, &cf_sec_1),
+ CF_LIST("slaves", &secs, &cf_sec_1),
+ CF_IP("ip", &ip),
+ CF_LOOKUP_DYN("look", &look, alphabet, 1000),
+ CF_USER_ARY("numbers", numbers, &u16_type, 10),
+ CF_BITMAP_INT("bitmap1", &bitmap1),
+ CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((const char* const[]) {
+ "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen",
+ "eighteen", "nineteen", "twenty", NULL // hidden joke here
+ })),
+ CF_END
+ }
+};
+
+static char short_opts[] = CF_SHORT_OPTS "v";
+static struct option long_opts[] = {
+ CF_LONG_OPTS
+ {"verbose", 0, 0, 'v'},
+ {NULL, 0, 0, 0}
+};
+
+static char *help = "\
+Usage: conf-test <options>\n\
+\n\
+Options:\n"
+CF_USAGE
+"-v\t\t\tBe verbose\n\
+";
+
+static void NONRET
+usage(char *msg, ...)
+{
+ va_list va;
+ va_start(va, msg);
+ if (msg)
+ vfprintf(stderr, msg, va);
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ log_init(argv[0]);
+ cf_declare_section("top", &cf_top, 0);
+ cf_def_file = "ucw/conf-test.cf";
+
+ int opt;
+ while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0)
+ switch (opt) {
+ case 'v': verbose++; break;
+ default: usage("unknown option %c\n", opt);
+ }
+ if (optind < argc)
+ usage("too many parameters (%d more)\n", argc-optind);
+
+ /*
+ cf_load("non-existent file");
+ //cf_reload("non-existent file");
+ cf_load("non-existent file");
+ cf_set("top.d1 -1.1; top.master b");
+ cf_reload(NULL);
+ cf_reload(NULL);
+ */
+
+ struct fastbuf *out = bfdopen(1, 1<<14);
+ cf_dump_sections(out);
+ bclose(out);
+
+ return 0;
+}
--- /dev/null
+# test config file
+#include ucw/conf-test.t ; top.xa=1
+#include 'non-existent file'; #top.xa=1
+Top { \
+
+ nr1=16 #!!!
+ nrs1 2 3 5 \
+ 7 11 13 \
+ \
+ 17M
+ nrs2 3 3k 3 3 3 ; \
+ str1 "hello,\t\x2bworld%%\n"
+ str2 'Hagenuk,
+ the best' "\
+ " qu'est-ce que c'est?
+ u1 0xbadcafebadbeefc0
+ str2:prepend prepended
+ str2:append appended
+ d1 7%
+ d1 -1.14e-25
+ firsttime ; secondtime 56
+ ^top.master:set alice HB8+
+ slaves:clear
+ ip 0xa
+ ip 195.113.31.123
+ look Alpha
+ look:prepend Beta GAMMA
+ numbers 11000 65535
+ bitmap1 31
+ bitmap1:remove 3 3
+ bitmap2:all
+ bitmap2:remove eleven twelve one
+};;;;;;
+
+unknown.ignored :-)
+
+top.slaves:reset cairns gpua 7 7 -10% +10%
+top.slaves daintree rafc 4 5 -171%
+top.slaves coogee pum 9 8
+top.slaves:prepend {name=bondi; level=\
+ "PUG"; confidence 10 10}
+top.slaves:remove {name daintree}
+top.slaveS:edit {level PUG} Bondi PUG!
+top.slaveS:before {level pum}{
+ confidence 2
+ list 123 456 789
+}
+top.slaves:copy {name coogee} Coogee2 PUM
+
+topp.a=15
+top.nr1= ' 15'
+a { ;-D }
--- /dev/null
+/*
+ * UCW Library -- Configuration files
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONF_H
+#define _UCW_CONF_H
+
+/*** === Data types [[conf_types]] ***/
+
+enum cf_class { /** Class of the configuration item. **/
+ CC_END, // end of list
+ CC_STATIC, // single variable or static array
+ CC_DYNAMIC, // dynamically allocated array
+ CC_PARSER, // arbitrary parser function
+ CC_SECTION, // section appears exactly once
+ CC_LIST, // list with 0..many nodes
+ CC_BITMAP // of up to 32 items
+};
+
+enum cf_type { /** Type of a single value. **/
+ CT_INT, CT_U64, CT_DOUBLE, // number types
+ CT_IP, // IP address
+ CT_STRING, // string type
+ CT_LOOKUP, // in a string table
+ CT_USER // user-defined type
+};
+
+struct fastbuf;
+
+/**
+ * A parser function gets an array of (strdup'ed) strings and a pointer with
+ * the customized information (most likely the target address). It can store
+ * the parsed value anywhere in any way it likes, however it must first call
+ * @cf_journal_block() on the overwritten memory block. It returns an error
+ * message or NULL if everything is all right.
+ **/
+typedef char *cf_parser(uns number, char **pars, void *ptr);
+/**
+ * A parser function for user-defined types gets a string and a pointer to
+ * the destination variable. It must store the value within [ptr,ptr+size),
+ * where size is fixed for each type. It should not call @cf_journal_block().
+ **/
+typedef char *cf_parser1(char *string, void *ptr);
+/**
+ * An init- or commit-hook gets a pointer to the section or NULL if this
+ * is the global section. It returns an error message or NULL if everything
+ * is all right. The init-hook should fill in default values (needed for
+ * dynamically allocated nodes of link lists or for filling global variables
+ * that are run-time dependent). The commit-hook should perform sanity
+ * checks and postprocess the parsed values. Commit-hooks must call
+ * @cf_journal_block() too. Caveat! init-hooks for static sections must not
+ * use @cf_malloc() but normal <<memory:xmalloc()>>.
+ **/
+typedef char *cf_hook(void *ptr);
+/**
+ * Dumps the contents of a variable of a user-defined type.
+ **/
+typedef void cf_dumper1(struct fastbuf *fb, void *ptr);
+/**
+ * Similar to init-hook, but it copies attributes from another list node
+ * instead of setting the attributes to default values. You have to provide
+ * it if your node contains parsed values and/or sub-lists.
+ **/
+typedef char *cf_copier(void *dest, void *src);
+
+struct cf_user_type { /** Structure to store information about user-defined variable type. **/
+ uns size; // of the parsed attribute
+ char *name; // name of the type (for dumping)
+ cf_parser1 *parser; // how to parse it
+ cf_dumper1 *dumper; // how to dump the type
+};
+
+struct cf_section;
+struct cf_item { /** Single configuration item. **/
+ const char *name; // case insensitive
+ int number; // length of an array or #parameters of a parser (negative means at most)
+ void *ptr; // pointer to a global variable or an offset in a section
+ union cf_union {
+ struct cf_section *sec; // declaration of a section or a list
+ cf_parser *par; // parser function
+ const char * const *lookup; // NULL-terminated sequence of allowed strings for lookups
+ struct cf_user_type *utype; // specification of the user-defined type
+ } u;
+ enum cf_class cls:16; // attribute class
+ enum cf_type type:16; // type of a static or dynamic attribute
+};
+
+struct cf_section { /** A section. **/
+ uns size; // 0 for a global block, sizeof(struct) for a section
+ cf_hook *init; // fills in default values (no need to bzero)
+ cf_hook *commit; // verifies parsed data (optional)
+ cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes)
+ struct cf_item *cfg; // CC_END-terminated array of items
+ uns flags; // for internal use only
+};
+
+/***
+ * [[conf_macros]]
+ * Convenience macros
+ * ~~~~~~~~~~~~~~~~~~
+ *
+ * You could create the structures manually, but you can use these macros to
+ * save some typing.
+ */
+
+/***
+ * Declaration of <<struct_cf_section,`cf_section`>>
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * These macros can be used to configure the <<struct_cf_section,`cf_section`>>
+ * structure.
+ ***/
+
+/**
+ * Data type of a section.
+ * If you store the section into a structure, use this macro.
+ *
+ * Storing a section into a structure is useful mostly when you may have multiple instances of the
+ * section (eg. <<conf_multi,array or list>>).
+ *
+ * Example:
+ *
+ * struct list_node {
+ * cnode n; // This one is for the list itself
+ * char *name;
+ * uns value;
+ * };
+ *
+ * static struct clist nodes;
+ *
+ * static struct cf_section node = {
+ * CF_TYPE(struct list_node),
+ * CF_ITEMS {
+ * CF_STRING("name", PTR_TO(struct list_node, name)),
+ * CF_UNS("value", PTR_TO(struct list_node, value)),
+ * CF_END
+ * }
+ * };
+ *
+ * static struct cf_section section = {
+ * CF_LIST("node", &nodes, &node),
+ * CF_END
+ * };
+ *
+ * You could use <<def_CF_STATIC,`CF_STATIC`>> or <<def_CF_DYNAMIC,`CF_DYNAMIC`>>
+ * macros to create arrays.
+ */
+#define CF_TYPE(s) .size = sizeof(s)
+/**
+ * An init <<hooks,hook>>.
+ * You can use this to initialize dynamically allocated items (for a dynamic array or list).
+ * The hook returns an error message or NULL if everything was OK.
+ */
+#define CF_INIT(f) .init = (cf_hook*) f
+/**
+ * A commit <<hooks,hook>>.
+ * You can use this one to check sanity of loaded data and postprocess them.
+ * You must call @cf_journal_block() if you change anything.
+ *
+ * Return error message or NULL if everything went OK.
+ **/
+#define CF_COMMIT(f) .commit = (cf_hook*) f
+/**
+ * A <<hooks,copy function>>.
+ * You need to provide one for too complicated sections where a memcpy is not
+ * enough to copy it properly. It happens, for example, when you have a dynamically
+ * allocated section containing a list of other sections.
+ *
+ * You return an error message or NULL if you succeed.
+ **/
+#define CF_COPY(f) .copy = (cf_copier*) f /** **/
+#define CF_ITEMS .cfg = ( struct cf_item[] ) /** List of sub-items. **/
+#define CF_END { .cls = CC_END } /** End of the structure. **/
+/***
+ * Declaration of a configuration item
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Each of these describe single <<struct_cf_item,configuration item>>. They are mostly
+ * for internal use, do not use them directly unless you really know what you are doing.
+ ***/
+
+/**
+ * Static array of items.
+ * Expects you to allocate the memory and provide pointer to it.
+ **/
+#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) }
+/**
+ * Dynamic array of items.
+ * Expects you to provide pointer to your pointer to data and it will allocate new memory for it
+ * and set your pointer to it.
+ **/
+#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) }
+#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f } /** A low-level parser. **/
+#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s } /** A sub-section. **/
+#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s } /** A list with sub-items. **/
+#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) } /** A bitmap. **/
+#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t } /** A bitmap with named bits. **/
+/***
+ * Basic configuration items
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * They describe basic data types used in the configuration. This should be enough for
+ * most real-life purposes.
+ *
+ * The parameters are as follows:
+ *
+ * * @n -- name of the item.
+ * * @p -- pointer to the variable where it shall be stored.
+ * * @c -- count.
+ **/
+#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1) /** Single `int` value. **/
+#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c) /** Static array of integers. **/
+#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c) /** Dynamic array of integers. **/
+#define CF_UNS(n,p) CF_STATIC(n,p,INT,uns,1) /** Single `uns` (`unsigned`) value. **/
+#define CF_UNS_ARY(n,p,c) CF_STATIC(n,p,INT,uns,c) /** Static array of unsigned integers. **/
+#define CF_UNS_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uns,c) /** Dynamic array of unsigned integers. **/
+#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1) /** Single unsigned 64bit integer (`u64`). **/
+#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c) /** Static array of u64s. **/
+#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c) /** Dynamic array of u64s. **/
+#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1) /** Single instance of `double`. **/
+#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c) /** Static array of doubles. **/
+#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c) /** Dynamic array of doubles. **/
+#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1) /** Single IPv4 address. **/
+#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c) /** Static array of IP addresses. **/.
+#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c) /** Dynamic array of IP addresses. **/
+/**
+ * A string.
+ * You provide a pointer to a `char *` variable and it will fill it with
+ * dynamically allocated string. For example:
+ *
+ * static char *string = "Default string";
+ *
+ * static struct cf_section section = {
+ * CF_ITEMS {
+ * CF_STRING("string", &string),
+ * CF_END
+ * }
+ * };
+ **/
+#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1)
+#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c) /** Static array of strings. **/
+#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c) /** Dynamic array of strings. **/
+/**
+ * One string out of a predefined set.
+ * You provide the set as an array of strings terminated by NULL (similar to @argv argument
+ * of main()) as the @t parameter.
+ *
+ * The configured variable (pointer to `int`) is set to index of the string.
+ * So, it works this way:
+ *
+ * static *strings[] = { "First", "Second", "Third", NULL };
+ *
+ * static int variable;
+ *
+ * static struct cf_section section = {
+ * CF_ITEMS {
+ * CF_LOOKUP("choice", &variable, strings),
+ * CF_END
+ * }
+ * };
+ *
+ * Now, if the configuration contains `choice "Second"`, `variable` will be set to 1.
+ **/
+#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+/**
+ * Static array of strings out of predefined set.
+ **/
+#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
+/**
+ * Dynamic array of strings out of predefined set.
+ **/
+#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t }
+/**
+ * A user-defined type.
+ * See <<custom_parser,creating custom parsers>> section if you want to know more.
+ **/
+#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t }
+/**
+ * Static array of user-defined types (all of the same type).
+ * See <<custom_parser,creating custom parsers>> section.
+ **/
+#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+/**
+ * Dynamic array of user-defined types.
+ * See <<custom_parser,creating custom parsers>> section.
+ **/
+#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t }
+
+/**
+ * Any number of dynamic array elements
+ **/
+#define CF_ANY_NUM -0x7fffffff
+
+#define DARY_LEN(a) ((uns*)a)[-1] /** Length of an dynamic array. **/
+#define DARY_ALLOC(type,len,val...) ((struct { uns l; type a[len]; }) { .l = len, .a = { val } }).a
+ // creates a static instance of a dynamic array
+
+/***
+ * [[alloc]]
+ * Memory allocation
+ * ~~~~~~~~~~~~~~~~~
+ *
+ * Uses <<mempool:,memory pools>> for efficiency and journal recovery.
+ * You should use these routines when implementing custom parsers.
+ ***/
+struct mempool;
+/**
+ * A <<mempool:type_mempool,memory pool>> for configuration parser needs.
+ * Memory allocated from here is valid as long as the current config is loaded
+ * (if you allocate some memory and rollback the transaction or you load some
+ * other configuration, it gets lost).
+ **/
+extern struct mempool *cf_pool;
+void *cf_malloc(uns size); /** Returns @size bytes of memory. Allocates from <<var_cf_pool,`cf_pool`>>. **/
+void *cf_malloc_zero(uns size); /** Like @cf_malloc(), but zeroes the memory. **/
+char *cf_strdup(const char *s); /** Copy a string into @cf_malloc()ed memory. **/
+char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2); /** printf() into @cf_malloc()ed memory. **/
+
+/***
+ * [[journal]]
+ * Undo journal
+ * ~~~~~~~~~~~~
+ *
+ * For error recovery when <<reload,reloading configuration>>.
+ ***/
+extern uns cf_need_journal; /** Is the journal needed? If you do not reload configuration, you set this to 0 and gain a little more performance and free memory. **/
+/**
+ * When a block of memory is about to be changed, put the old value
+ * into journal with this function. You need to call it from a <<hooks,commit hook>>
+ * if you change anything. It is used internally by low-level parsers.
+ * <<custom_parser,Custom parsers>> do not need to call it, it is called
+ * before them.
+ **/
+void cf_journal_block(void *ptr, uns len);
+#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var)) // Store single value into journal.
+
+/***
+ * [[declare]]
+ * Section declaration
+ * ~~~~~~~~~~~~~~~~~~~
+ **/
+
+/**
+ * Plug another top-level section into the configuration system.
+ * @name is the name in the configuration file,
+ * @sec is pointer to the section description.
+ * If @allow_unknown is set to 0 and a variable not described in @sec
+ * is found in the configuration file, it produces an error.
+ * If you set it to 1, all such variables are ignored.
+ **/
+void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown);
+/**
+ * If you have a section in a structure and you want to initialize it
+ * (eg. if you want a copy of default values outside the configuration),
+ * you can use this. It initializes it recursively.
+ *
+ * This is used mostly internally. You probably do not need it.
+ **/
+void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero);
+
+/***
+ * [[bparser]]
+ * Parsers for basic types
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Each of them gets a string to parse and pointer to store the value.
+ * It returns either NULL or error message.
+ *
+ * The parsers support units. See <<config:units,their list>>.
+ ***/
+char *cf_parse_int(const char *str, int *ptr); /** Parser for integers. **/
+char *cf_parse_u64(const char *str, u64 *ptr); /** Parser for 64 unsigned integers. **/
+char *cf_parse_double(const char *str, double *ptr); /** Parser for doubles. **/
+char *cf_parse_ip(const char *p, u32 *varp); /** Parser for IP addresses. **/
+
+#endif
+
--- /dev/null
+/*
+ * UCW Library -- Configuration-Dependent Definitions
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_CONFIG_H
+#define _UCW_CONFIG_H
+
+/* Configuration switches */
+
+#include "autoconf.h"
+
+/* Tell libc we're going to use all extensions available */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+/* Types (based on standard C99 integers) */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef uint8_t byte; /** Exactly 8 bits, unsigned **/
+typedef uint8_t u8; /** Exactly 8 bits, unsigned **/
+typedef int8_t s8; /** Exactly 8 bits, signed **/
+typedef uint16_t u16; /** Exactly 16 bits, unsigned **/
+typedef int16_t s16; /** Exactly 16 bits, signed **/
+typedef uint32_t u32; /** Exactly 32 bits, unsigned **/
+typedef int32_t s32; /** Exactly 32 bits, signed **/
+typedef uint64_t u64; /** Exactly 64 bits, unsigned **/
+typedef int64_t s64; /** Exactly 64 bits, signed **/
+
+typedef unsigned int uns; /** A better pronounceable alias for `unsigned int` **/
+typedef u32 ucw_time_t; /** Seconds since UNIX epoch **/
+typedef s64 timestamp_t; /** Milliseconds since UNIX epoch **/
+
+#ifdef CONFIG_LARGE_FILES
+typedef s64 ucw_off_t; /** File position (either 32- or 64-bit, depending on `CONFIG_LARGE_FILES`). **/
+#else
+typedef s32 ucw_off_t;
+#endif
+
+#endif
--- /dev/null
+# Configuration variables of the UCW library and their default values
+# (c) 2005--2009 Martin Mares <mj@ucw.cz>
+
+# Version of the whole package
+Set("SHERLOCK_VERSION" => "3.99.2");
+Set("SHERLOCK_VERSION_CODE" => 3099002);
+Set("UCW_VERSION" => Get("SHERLOCK_VERSION"));
+Set("UCW_VERSION_CODE" => Get("SHERLOCK_VERSION_CODE"));
+
+# Compile everything with debug information and ASSERT's
+UnSet("CONFIG_DEBUG");
+
+# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages)
+UnSet("CONFIG_EXACT_CPU");
+
+# Support files >2GB
+Set("CONFIG_LARGE_FILES");
+
+# Use shared libraries
+UnSet("CONFIG_SHARED");
+
+# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy
+# of GNU libc's getopt. This should not be necessary on GNU libc.
+UnSet("CONFIG_OWN_GETOPT");
+
+# Install libraries and their API includes
+UnSet("CONFIG_INSTALL_API");
+
+# Build with support for multi-threaded programs
+Set("CONFIG_UCW_THREADS" => 1);
+
+# Include Perl modules
+Set("CONFIG_UCW_PERL" => 1);
+
+# Include Perl modules written in C
+UnSet("CONFIG_UCW_PERL_MODULES");
+
+# Include support utilities for shell scripts
+Set("CONFIG_UCW_SHELL_UTILS" => 1);
+
+# Include utilities
+Set("CONFIG_UCW_UTILS" => 1);
+
+# Default configuration file
+UnSet("DEFAULT_CONFIG");
+
+# Environment variable with configuration file
+UnSet("ENV_VAR_CONFIG");
+
+# Use obsolete URL escaping rules (if you need behavior identical to the older versions of libucw)
+UnSet("CONFIG_URL_ESCAPE_COMPAT");
+
+# Allow use of direct IO on files
+Set("CONFIG_DIRECT_IO");
+Set("CONFIG_UCW_FB_DIRECT");
+
+# Return success
+1;
--- /dev/null
+# Makefile for the UCW documentation, (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+DIRS+=ucw/doc
+
+UCW_DOCS=basics log fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort hashtable
+UCW_INDEX=$(o)/ucw/doc/def_index.html
+UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS)))
+
+$(UCW_INDEX): DOC_HEAD=$(s)/ucw/doc/def_index.txt
+$(UCW_INDEX): DOC_LIST=$(patsubst %,$(o)/ucw/doc/%.deflist,$(UCW_DOCS))
+$(UCW_INDEX) $(UCW_DOCS_HTML): DOC_MODULE=ucw
+
+DOCS+=$(UCW_DOCS_HTML)
+DOC_INDICES+=$(UCW_INDEX)
+DOC_MODULES+=ucw
+
+ifdef CONFIG_DOC
+INSTALL_TARGETS+=install-libucw-docs
+endif
+
+.PHONY: install-libucw-docs
+install-libucw-docs: $(UCW_INDEX) $(UCW_DOCS_HTML)
+ install -d -m 755 $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/
+ install -m 644 $^ $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/
--- /dev/null
+Base64 and Base224 encodings
+============================
+
+These modules can be used to encode and decode data to and from
+base64 (described in RFC 3548) and base224 (not described in any
+standard, uses all non-control characters, briefly described in
+a comment at the beginning of `ucw/base224.c`).
+
+- <<base64,Base64>>
+- <<base224,Base224>>
+- <<usage,Usage>>
+- <<basecode,The basecode utility>>
+
+[[base64]]
+ucw/base64.h
+------------
+!!ucw/base64.h
+
+[[base224]]
+ucw/base224.h
+-------------
+!!ucw/base224.h
+
+[[usage]]
+Usage
+-----
+
+- You may want to encode small block of known size. Just allocate the
+ output buffer and feed the data to the function.
+
+ byte output[BASE64_ENC_LENGTH(input_size)];
+ uns output_size = base64_encode(output, input, input_size);
+
+- Decoding can be done in similar way. It is enough to have output
+ buffer of the same size as the input one.
+
+- Encoding of a stream of unknown or large size can be split into
+ chunks. The input chunk size must be multiple of `BASE64_IN_CHUNK`.
+ The output will be corresponding multiple of `BASE64_OUT_CHUNK`.
+
+ uns input_size;
+ byte input[BASE64_IN_CHUNK * 10];
+ while(input_size = read_chunk(input, BASE64_IN_CHUNK * 10)) {
+ byte output[BASE64_OUT_CHUNK * 10];
+ uns output_size = base64_encode(output, input, input_size);
+ use_chunk(output, output_size);
+ }
+
+- Decoding of a stream is done in the same way, just swap
+ `BASE64_IN_CHUNK` and `BASE64_OUT_CHUNK` (you feed the decode
+ function with `BASE64_OUT_CHUNK` multiple and get `BASE64_IN_CHUNK`
+ multiple).
+
+The base224 has similar interface, therefore you can use it the same
+way as base64.
+
+[[basecode]]
+The basecode utility
+--------------------
+You can use the encoding/decoding routines from command line, trough
+`basecode` command. You have to specify the operation by a command
+line argument and give it the data on standard input. The arguments
+are:
+
+- `-e`: Encode to base64.
+- `-d`: Decode from base64.
+- `-E`: Encode to base224.
+- `-D`: Decode from base224.
+
+Furthermore, you can provide `--prefix` argument. If you do, the
+output (when encoding) will be split to lines by default number of
+chunks and the value of prefix will be prepended to each of them.
+When decoding, it removes the prefix from the beginning of line.
+
+You can override the default number of blocks for line-splitting by
+`--blocks` argument.
--- /dev/null
+LibUCW Basics
+=============
+
+Every program using LibUCW should start with `#include <ucw/lib.h>` which
+brings in the most frequently used library functions, macros and types.
+This should be done before you include any of the system headers, since
+`lib.h` defines the feature macros of the system C library.
+
+Portability
+-----------
+
+LibUCW is written in C99 with a couple of GNU extensions mixed in where needed.
+It currently requires the GNU C compiler version 4.0 or newer, but most modules
+should be very easy to adapt to a different C99 compiler. (A notable exception
+is `stkstring.h`, which is heavily tied to GNU extensions.)
+
+The library has been developed on Linux with the GNU libc and it is known to run
+on Darwin, too. The authors did not try using it on other systems, but most of
+the code is written for a generic POSIX system, so porting to any UNIX-like system
+should be a piece of cake.
+
+ucw/lib.h
+---------
+*Only partially documented.*
+
+!!ucw/lib.h
+
+ucw/config.h
+------------
+This header contains the standard set of types used by LibUCW. It is automatically
+included by `ucw/lib.h`.
+
+!!ucw/config.h
--- /dev/null
+Binomial heaps
+==============
+
+* <<intro,Introduction>>
+* <<common,Common definitions>>
+* <<generator,Interface to the generator>>
+
+[[intro]]
+Introduction
+------------
+
+Binomial heap is a data structure that supports for example efficient merge of two heaps, insertions, deletions or access to the minimum element.
+All these operations are logarithimc in the worst case. If the merge is not significat, it is usually better to use simplier <<heap:,binary heaps>>.
+
+They are defined in `ucw/binheap.h` as <<generic:,generics generated by preprocessor>>, some common definitions are also in `ucw/binheap-node.h`.
+
+!!ucw/binheap-node.h
+
+!!ucw/binheap.h
--- /dev/null
+Binary search
+=============
+
+* <<defs,Definitions>>
+* <<examples,Examples>>
+
+!!ucw/binsearch.h
+
+[[examples]]
+Examples
+--------
+
+You can find few examples of binary search usage. Although we define only few macros, they can be used
+for several different cases, for example to find lower elements in a (non-)decreasing array or even to find
+elements in a (non-)increasing array.
+
+ static int inc[10] = { 1, 4, 4, 5, 6, 10, 11, 20, 25, 50 };
+ static const char *str[5] = { "aaa", "abc", "bflmpsvz", "rep", "rep" };
+ static int dec[3] = { 5, 2, 1 };
+
+ // find the first equal element
+ printf("%d\n", BIN_SEARCH_EQ(inc, 10, 4)); // prints 1
+ printf("%d\n", BIN_SEARCH_EQ(inc, 10, 15)); // prints -1 (not found)
+
+ // find the first greater or equal element
+ printf("%d\n", BIN_SEARCH_GE(inc, 10, 9)); // prints 5
+ printf("%d\n", BIN_SEARCH_GE(inc, 10, 10)); // prints 5
+ printf("%d\n", BIN_SEARCH_GE(inc, 10, 4)); // prints 1
+ printf("%d\n", BIN_SEARCH_GE(inc, 10, 99)); // prints 10 (not found)
+
+ // find the last equal element (or -1 if does not exist)
+ #define CMP_LE(ary, i, x) ((ary[i]) <= (x))
+ int i = BIN_SEARCH_FIRST_GE_CMP(inc, 10, 4, CMP_LE);
+ printf("%d\n", (i && inc[i - 1] == 4) ? i - 1 : -1); // prints 2
+
+ // find the first greater element
+ printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE)); // prints 9
+
+ // find the last lower or equal element (or -1 if does not exist)
+ printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE) - 1); // prints 8
+
+ // find the last lower element (or -1 if does not exist)
+ printf("%d\n", BIN_SEARCH_FIRST_GE(inc, 10, 25) - 1); // prints 7
+
+ // find the first greater or equal string
+ #define CMP_STR(ary, i, x) (strcmp((ary[i]), (x)) < 0)
+ printf("%d\n", BIN_SEARCH_GE_CMP(str, 5, "bfl", CMP_STR)); // prints 2
+
+ // find the first lower or equal element in the non-increasing array
+ #define CMP_GT(ary, i, x) ((ary[i]) > (x))
+ printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(dec, 3, 4, CMP_GT)); // prints 1
--- /dev/null
+Single-byte characters
+======================
+
+!!ucw/chartype.h
--- /dev/null
+Compression
+===========
+
+The library contains a compression routine, called LiZaRd. It is
+modified Lempel-Ziv 77 method with slightly worse compression ratio,
+but with faster compression and decompression (compression is few times
+faster than zlib, decompression is slightly slower than memcpy()).
+
+The data format and inspiration for code comes from the LZO project
+(which couldn't be used due to licence problems). They might be
+compatible, but no-one tested that.
+
+- <<basic,Basic application>>
+- <<safe,Safe decompression>>
+- <<adler,Adler-32 checksum>>
+
+!!ucw/lizard.h
--- /dev/null
+Configuration and command line parser
+=====================================
+
+Libucw contains a parser for configuration files described in
+<<config:>>.
+
+The principle is you specify the structure of the configuration file,
+the section names, variable names and types and your C variables that
+are assigned to them. Then you run the parser and it fills your
+variables with the values from the configuration file.
+
+It is modular. It means you do not have to write all configuration at
+the same place, you just declare the parts you need locally and do not
+care about the other parts.
+
+The command line parser has the same interface as unix getopt_long(),
+but handles setting of configuration files and configuration values
+from command line.
+
+- <<example,Example>>
+ * <<ex_structure,The structure>>
+ * <<ex_load,Loading>>
+- <<deep,Getting deeper>>
+ * <<conf_multi,Arrays and lists>>
+ * <<reload,Reloading configuration>>
+ * <<custom_parser,Creating custom parsers>>
+ * <<hooks,Hooks>>
+- <<conf_h,ucw/conf.h>>
+ * <<conf_types,Data types>>
+ * <<conf_macros,Convenience macros>>
+ * <<alloc,Memory allocation>>
+ * <<journal,Undo journal>>
+ * <<declare,Section declaration>>
+ * <<bparser,Parsers for basic types>>
+- <<getopt_h,ucw/getopt.h>>
+ * <<conf_load,Safe configuration loading>>
+ * <<conf_direct,Direct access>>
+ * <<conf_dump,Debug dumping>>
+ * <<conf_journal,Journaling control>>
+ * <<conf_getopt,Loading by cf_getopt()>>
+
+[[example]]
+Example
+-------
+If you want to just load simple configuration, this is the part you
+want to read. This simple example should give you the overview. Look
+into the <<conf_macros,convenience macros>> section to see list of
+supported data types, sections, etc.
+
+[[ex_cfile]]
+Let's say you have configuration file with this content and want to
+load it:
+
+ HelloWorld {
+ Text "Hello planet"
+ Count 3
+ }
+
+[[ex_structure]]
+The structure
+~~~~~~~~~~~~~
+First, you declare the structure and let the configuration parser know
+it exists.
+
+ #include <ucw/lib.h>
+ #include <ucw/conf.h>
+
+ static char *hw_text = "Hello world";
+ static int hw_count = 1;
+ static int hw_wait_answer = 0;
+
+ static struct cf_section hw_config = {
+ CF_ITEMS {
+ CF_STRING("Text", &hw_text),
+ CF_INT("Count", &hw_count),
+ CF_INT("WaitAnswer", &hw_wait_answer),
+ CF_END
+ }
+ };
+
+ static void CONSTRUCTOR hw_init(void) {
+ cf_declare_section("HelloWorld", &hw_config, 0);
+ }
+
+The variables are used to store the loaded values. Their initial
+values work as default, if nothing else is loaded. The hw_config()
+structure assigns the variables to configuration names. The hw_init()
+function (because of the `CONSTRUCTOR` macro) is run before main()
+is called and it plugs in the whole section to the parser (alternatively,
+you can call @cf_declare_section() at the start of your main()).
+
+You can plug in as many configuration sections as you like, from
+various places across your code.
+
+[[ex_load]]
+Loading of the values
+~~~~~~~~~~~~~~~~~~~~~
+Suppose you need to parse the command line arguments and load the
+configuration. Then @cf_getopt() is there for you: it works like
+the the traditional @getopt() from the C library, but it also handles
+configuration files.
+
+ #include <ucw/lib.h>
+ #include <ucw/conf.h>
+ #include <ucw/getopt.h>
+
+ static char short_opts[] = CF_SHORT_OPTS "v";
+ static struct option long_opts[] = {
+ CF_LONG_OPTS
+ { "verbose", 0, 0, 'v' },
+ { NULL, 0, 0, 0 }
+ };
+
+ static int verbose;
+
+ int main(int argc, char *argv[]) {
+ cf_def_file = "default.cf";
+ int opt;
+ while((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0)
+ switch(opt) {
+ case 'v': verbose = 1; break;
+ default: fprintf("Unknown option %c\n", opt); return 1;
+ }
+ }
+
+The `short_opts` and `long_opts` variables describe the command line
+arguments. Notice the `CF_SHORT_OPTS` and `CF_LONG_OPTS` macros. They
+add the `-S` and `-C` options for the configuration parser as described
+in <<config:>>. These options are handled internally by @cf_getopt().
+
+You can rely on the configuration files having been loaded before the
+first of your program's options is parsed.
+
+[[deep]]
+Getting deeper
+--------------
+
+Since the configuration system is somehow complicated, this part gives
+you a little overview of what you can find and where.
+
+[[conf_multi]]
+Arrays and lists
+~~~~~~~~~~~~~~~~
+
+It is sometime needed to have multiple items of the same type. There
+are three ways to do that:
+
+*Static arrays*::
+ An array with fixed maximum length. You provide
+ the length and already allocated array which is filled with items.
+ The configuration may contain less than the maximum length items.
++
+For example, you can have an static array of five unsigned integers:
++
+ static uns array[] = { 1, 2, 3, 4, 5 };
++
+ static struct cf_section section = {
+ CF_ITEMS {
+ CF_UNS_ARY("array", array, 5),
+ CF_END
+ }
+ };
+
+*Dynamic arrays*::
+ Similar to static array, but you provide pointer
+ to pointer to the given item (eg. if you want dynamic array of
+ integers, you give `**int`). The parser allocates an array of needed
+ size. You can use the <<def_DARY_LEN,`DARY_LEN`>> macro to find out
+ the number of elements actually loaded.
++
+If you want dynamic array of strings, you would use:
++
+ static char *array[];
++
+ static struct cf_section section = {
+ CF_ITEMS {
+ CF_STRING_DYN("array", &array, CF_ANY_NUM),
+ CF_END
+ }
+ };
+
+*Lists*::
+ Linked lists based on <<clist:>>. You provide description
+ of single node and pointer to the
+ <<clist:struct_clist,`struct clist`>> variable. All the nodes will
+ be created dynamically and put there.
++
+First element of your structure must be <<clist:type_cnode,`cnode`>>.
++
+The first example is list of strings and uses <<clist:simple,simple
+lists>>:
++
+ static struct clist list;
++
+ static struct cf_section section = {
+ CF_ITEMS {
+ CF_LIST("list", &list, &cf_string_list_config),
+ CF_END
+ }
+ };
++
+Another example, describing how to create more complicated list node
+than just a string can be found at the <<def_CF_TYPE,`CF_TYPE`>> macro.
+
+[[reload]]
+Reloading configuration
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The configuration system allows you to reload configuration at
+runtime. The new config changes the values against the default values.
+It means, if the default value for variable `A` is `10`, the currently
+loaded config sets it to `42` and the new config does not talk about
+this variable, `A` will have a value of `10` after a successful load.
+
+Furthermore, if the loading of a new configuration fails, the current
+configuration is preserved.
+
+All this is done with <<journal,config journaling>>. The load of the
+first config creates a journal entry. If you try to load some new
+configuration, it is partially rolled back to defaults (the rollback
+happens, but instead of removing the journal entry, another journal
+entry is added for the rollback). If the loading succeeds, the two
+journal entries are removed and a new one, for the new configuration,
+is added. If it fails, the first one is replayed and the rollback
+entry is removed.
+
+See <<cf_reload()>>.
+
+[[custom_parser]]
+Creating custom parsers
+~~~~~~~~~~~~~~~~~~~~~~~
+
+If you need to parse some data type the configuration system can't
+handle, you can write your own parser. But before you start, you
+should know a few things.
+
+The parser needs to support <<journal,journaling>>. To accomplish that,
+you have to use the <<alloc,configuration mempool>> for memory allocation.
+
+Now, you need a function with the same signature as
+<<type_cf_parser1,`cf_parser1`>>. Parse the first parameter (the
+string) and store the data in the second parameter. You may want to
+write a dumper function, with signature of
+<<type_cf_dumper1,`cf_dumper1`>> (needed for debug dumps).
+
+Fill in a structure <<struct_cf_user_type,cf_user_type>> and use the
+new data type in your configuration description with
+<<def_CF_USER,`CF_USER`>> macro as its @t parameter.
+
+You do not need to call @cf_journal_block() on the variable you store
+the result. It is true you change it, but it was stored to journal
+before your parser function was called.
+
+[[hooks]]
+Hooks
+~~~~~
+
+The configuration system supports hooks. They are used to initialize the
+configuration (if simple default value of variable is not enough) and
+to check the sanity of loaded data.
+
+Each hook is of type <<type_cf_hook,`cf_hook`>> and you can include
+them in configuration description using <<def_CF_INIT,`CF_INIT`>> and
+<<def_CF_COMMIT,`CF_COMMIT`>> macros.
+
+The hooks should follow similar guidelines as custom parsers (well,
+init hooks do not need to call @cf_journal_block()) to support
+journaling. If you change nothing in the commit hook, you do not need
+to care about the journaling either.
+
+You may use the return value to inform about errors. Just return the
+error message, or NULL if everything went well.
+
+Another similar function is a copy function. It is very similar to a
+hook and is used when the item is copied and is too complicated to use
+simple memcpy(). Its type is <<type_cf_copier,`cf_copier`>> and is
+specified by the <<def_CF_COPY,`CF_COPY`>> macro. It's return value is
+the same as the one of a hook.
+
+[[conf_h]]
+ucw/conf.h
+----------
+
+Use this file if you want define a configuration section, request
+loading of some variables or create new item type.
+
+!!ucw/conf.h
+
+[[getopt_h]]
+ucw/getopt.h
+------------
+
+This header contains routines for parsing command line arguments and
+loading the configuration.
+
+!!ucw/getopt.h
--- /dev/null
+Configuration files
+===================
+
+This document describes run-time configuration of libucw-based
+programs using config files. For compile-time configuration,
+see <<configure:>>.
+
+[[terminology]]
+Terminology
+-----------
+
+Configuration items of all modules are organized into sections.
+The sections form a tree structure with top-level sections corresponding
+to program modules.
+
+Each configuration item belongs to one of the following classes:
+
+ 1. single value or a fixed-length array of values
+ 2. variable-length array of values
+ 3. subsection with several nested attributes
+ 4. list of nodes, each being an instance of a subsection
+ 5. bitmap of small integers (0..31) or fixed list of strings
+ 6. exceptions (items with irregular syntax; however, they always
+ appear as a sequence of strings, only the semantics differ)
+
+Both fixed- and variable-length arrays consist of items of the same
+type. The basic types supported by the configuration mechanism are:
+
+ 1. 32-bit integer
+ 2. 64-bit integer
+ 3. floating point number
+ 4. IP address
+ 5. string
+ 6. choice (one of a fixed list of strings)
+
+Program modules can define their own special types (such as network
+masks or attribute names) and decide how are they parsed.
+
+[[format]]
+Format of configuration files
+-----------------------------
+
+Configuration files are text files that usually set one attribute per
+line, though it is possible to split one assignment into multiple lines
+and/or assign several attributes in one line. The basic format of an
+assignment command is
+
+ name value1 value2 ... valueN
+
+or
+
+ name=value1 value2 ... valueN
+
+The end of line means also end of a command unless it is preceded by a
+backslash. On the other hand, a semicolon terminates the command and
+another command can start after the semicolon. A hash starts a comment
+that lasts until the end of the line. A value can be enclosed in
+apostrophes or quotation marks and then it can contain spaces and/or
+control characters, otherwise the first space or control character
+denotes the end of the value. Values enclosed in quotation marks are
+interpreted as C-strings. For example, the following are valid
+assignment commands:
+
+ Database "main db\x2b"; Directory='index/'; Weights 100 20 30 \
+ 40 50 80 # a comment that is ignored
+
+Numerical values can be succeeded by a unit. The following units are
+supported:
+
+[[units]]
+
+ d=86400 k=1000 K=1024
+ h=3600 m=1000000 M=1048576
+ %=0.01 g=1000000000 G=1073741824
+
+Attributes of a section or a list node can be set in two ways. First,
+you can write the name of the section or list, open a bracket, and then
+set the attributes inside the section. For example,
+
+ Section1 {
+ Attr1 value1
+ Attr2 value2
+ ListNode { #creates a list and adds its first node
+ Attr3 value3
+ Attr4 value4
+ }
+ ListNode { Attr3=value5; Attr4=value6 }
+ #appends a new node; this is still the same syntax
+ }
+
+The second possibility is using a shorter syntax when all attributes of a
+section are set on one line in a fixed order. The above example could
+be as well written as
+
+ Section1 {
+ Attr1 value1
+ Attr2 value2
+ ListNode value3 value4
+ ListNode value5 value6
+ }
+
+Of course, you cannot use the latter syntax when the attributes allow
+variable numbers of parameters. The parser of the configuration files
+checks this possibility.
+
+If you want to set a single attribute in some section, you can also
+refer to the attribute as Section.Attribute.
+
+Lists support several operations besides adding a new node. You just
+have to write a colon immediately after the attribute name, followed by
+the name of the operation. The following operations are supported:
+
+[[operations]]
+
+ List:clear # removes all nodes
+ List:append { attr1=value1; ... } # adds a new node at the end
+ List:prepend { attr1=value1; ... } # adds a new node at the beginning
+ List:remove { attr1=search1 } # find a node and delete it
+ List:edit { attr1=search1 } { attr1=value1; ... }
+ # find a node and edit it
+ List:after { attr1=search1 } { ... } # insert a node after a found node
+ List:before { attr1=search1 } { ... } # insert a node before a found node
+ List:copy { attr1=search1 } { ... } # duplicate a node and edit the copy
+ List:reset { attr=value1; ... } # equivalent to :clear and :append
+
+You can specify several attributes in the search condition and the nodes
+are tested for equality in all these attributes. In the editing
+commands, you can either open a second block with overridden attributes,
+or specify the new values using the shorter one-line syntax.
+
+The commands :clear, :append, and :prepend are also supported by var-length
+arrays. The command :clear can also be used on string values. The following
+operations can be used on bitmaps: :set (which is equal to :append and :prepend),
+:remove, :clear, and :all (set all bits).
+
+[[include]]
+Including other files
+---------------------
+
+To include another file, use the command
+
+ include another/file
+
+(Beware that this command has to be the last one on the line.)
+
+[[command_line]]
+Command-line parameters
+-----------------------
+
+The default configuration file (cf_def_file possibly overriden
+by environment variable cf_env_file) is read before the program is started.
+You can use a -C option to override the name of the configuration file.
+If you use this parameter several times, then all those files are loaded
+consecutively. A parameter -S can be used to execute a configuration
+command directly (after loading the default or specified configuration
+file). Example:
+
+ bin/program -Ccf/my-config -S'module.trace=2;module.logfile:clear' ...
+
+If the program is compiled with debugging information, then one more
+parameter `--dumpconfig` is supported. It prints all parsed configuration
+items and exits.
+
+All these switches must be used before any other parameters of the
+program.
+
+[[preprocess]]
+Preprocessing
+-------------
+
+During compilation, all configuration files are pre-processed by a simple
+C-like preprocessor, which supports `#ifdef`, `#ifndef`, `#if`,
+`#elsif`, `#else` and `#endif` directives referring to compile-time
+configuration variables (the ones detected by `configure` script, you
+can see list of them in `obj/autoconf.h`). `#if` and `#elsif` can contain
+any Perl expression where each `CONFIG_xyz` configuration variable is
+substituted to 0 or 1 depending on its value.
+
+The preprocessor also substitutes `@VARIABLE@` by the value of the variable,
+which must be defined.
+
+[[caveats]]
+Caveats
+-------
+
+Trying to access an unknown attribute causes an error, but unrecognized
+top-level sections are ignored. The reason is that a common config file
+is used for a lot of programs which recognize only their own sections.
+
+Names of sections, attributes and choices are case-insensitive. Units are
+case-sensitive.
--- /dev/null
+How to Configure Sherlock libraries
+===================================
+
+What can be configured
+----------------------
+There are two different levels of configuring/customizing program
+based on sherlock libraries:
+
+ - runtime configuration in configuration files (see <<config:>>)
+
+ - compile-time configuration of the libraries: config switches set
+ before compiling, selecting optional features.
+
+Where to build
+--------------
+If you run configure in the source directory, it prepares for compilation inside
+the source tree. In this case, an `obj` subdirectory is created to hold all generated
+files (object files, binaries, generated source files etc.) and all final files
+are linked to the `run` subdirectory. No other parts of the source tree are written into.
+
+Alternatively, you can compile in a separate object tree (which is useful when you
+want to build several different configurations from a single source tree). In order
+to do that, switch to the destination directory and issue `<source-dir>/configure ...`.
+This way, configure will create the `obj` and `run` directories locally and set up
+a Makefile which refers to the original source tree.
+
+How to configure
+----------------
+To set up compilation, possibly overriding default compile-time
+options, just run:
+
+ ./configure [<option> | -<option> | <option>=<value> ...]
+
+The default values of feature options are taken from `default.cfg`.
+Compiler flags and options dependent on compiler, OS and CPU type
+are set in `ucw/perl/UCW/Configure/C.pm`. Everything can be overriden by
+options specified on the configure's command line, which have the highest
+priority.
+
+If you want to see the resulting set of options, take a look at
+`obj/config.mk`.
+
+Options specifying compiler/linker/debugger options can be also overriden
+during compilation by `make <option>=<value>`. While it's also possible
+to specify the other options in this way, it probably won't have the desired
+effect, because configure also generates C include files containing the
+options.
+
+Installation options
+--------------------
+By default, the package is compiled to be installed and it chooses a
+place where it will install. If you want to place it somewhere else,
+set the `PREFIX` option:
+
+ ./configure PREFIX=/where/to/install
+
+[[local_build]]
+You can create a local compilation, which does not need to be
+installed. All paths are set relative to a `run` directory and
+programs expect to have current working directory set to it. However,
+you can move the `run` directory around your filesystem as you like.
+To compile it that way, turn on the `CONFIG_LOCAL` option (and do not
+set `PREFIX`):
+
+ ./configure CONFIG_LOCAL
+
+Examples
+--------
+ - `./configure -CONFIG_SHARED` will build the libraries statically.
+ - `./configure -CONFIG_IMAGES` avoids building the image library.
+ - `./configure CONFIG_UCW_ONLY -CONFIG_UCW_UTILS` compiles only the
+ libucw library without its util programs.
+ - `./configure CONFIG_XML PREFIX=$HOME/sherlock` includes a xml
+ library and installs into your home directory.
--- /dev/null
+All definitions in libucw
+=========================
+
+- <<enum,Enumerations>>
+- <<struct,Structures>>
+- <<type,Types>>
+- <<fun,Functions>>
+- <<var,Variables>>
+- <<def,Preprocessor definitions>>
--- /dev/null
+The documentation system
+========================
+
+////
+Warning: if you read this file in plain-text, keep in mind the markup
+had to be escaped to show in the result as the thing you should type.
+You should ignore all backslashes here, or better, read the html
+version.
+////
+
+The ucw documentation system is based on the
+http://www.methods.co.nz/asciidoc/[ASCIIDOC] documentation formatter.
+It supports all it's markup, but it was extended slightly.
+
+- <<markup,Markup extensions>>
+ * <<xrefs,Cross referencing>>
+ * <<symbols,Symbol formatting>>
+- <<extract,Header extraction>>
+ * <<scomm,Stand-alone comments>>
+ * <<defcomm,Definition comments>>
+- <<generics,Macro generics>>
+ * <<geext,Generics extraction>>
+ * <<gelink,Links to generics>>
+
+[[markup]]
+Markup extensions
+-----------------
+
+[[xrefs]]
+Cross referencing
+~~~~~~~~~~~~~~~~~
+ASCIIDOC supports creating anchors with `\[[anchor-name]]` and links
+to them with `\<<anchor,caption>>`. The extension supports links to
+anchor in other files (`\\<<filename:anchor,caption>>`) or to other
+files (`\\<<filename:,caption>>`). The `filename` is without any suffix
+like `.txt` or `.html`, it is added to the link automatically. The caption
+is optional, if you omit it, some reasonable one will be guessed from
+the anchor name.
+
+The links support linking to function descriptions (the anchors for
+them are generated automatically by <<extract,header extraction>>).
+Just write `\<<function(),caption>>` or
+`\\<<filename:function(),caption>>`.
+
+[[symbols]]
+Symbol formatting
+~~~~~~~~~~~~~~~~~
+If you talk about function parameter, prefix its name with `@`. It
+will be typeset in monospace italic font to mark it visually, like
+this: @parameter.
+
+If a word is suffixed by parenthesis without a space (eg. word()), it
+is considered to be a function name and is typeset in monospace font.
+
+You can prefix a function name by `@`, which makes it a link to that
+function (`\@function()` is equivalent to `\<<function()>>`).
+
+If you write NULL anywhere, it is recognized and typeset in monospace.
+
+[[extract]]
+Header extraction
+-----------------
+Line starting with two exclamation marks, followed by a filename, is a
+command to process source file. Special comments and commented
+declarations are extracted and included in the place of the command.
+
+The command looks like this:
+
+ !!filename
+
+[[scomm]]
+Stand-alone comments
+~~~~~~~~~~~~~~~~~~~~
+C comments with tripled asterisks are extracted, the left side
+asterisk decoration is removed and the rest is put trough verbatim.
+
+Single-line version looks like
+
+ /*** This will be put into documentation. ***/
+
+If you need more than one line, use the same type of comment.
+
+ /***
+ * This is part of documentation too.
+ * But the asterisks on the left side aren't.
+ ***/
+
+[[defcomm]]
+Definition comments
+~~~~~~~~~~~~~~~~~~~
+You can write comments documenting specific definition. Definition
+comment has the asterisks doubled. The multi-line version must be
+directly above the definition, the single-line on the same line right
+of the definition or on a line before.
+
+ void function(int parameter); /** This is a function. **/
+
+or
+
+ /** This is a function. **/
+ void function(int parameter);
+
+or
+
+ /**
+ * This is a complicated function.
+ * It takes multiple lines to describe how useful it is.
+ **/
+ void function(int parameter);
+
+Each such commented definition is taken and formatted, with the
+description attached. It also generates an anchor for the symbol name.
+The anchors look like `symboltype_symbolname`. The symbol types are
+these:
+
+- `fun` for functions
+- `def` for preprocessor macro
+- `var` for variable
+- `struct` for structure
+- `enum` for enumeration
+- `type` for a type definition
+
+There is a support for building a page with list of all symbols with
+links to them. Look into `ucw/doc/Makefile` to see how to request that.
+
+[[generics]]
+Support for macro generics
+--------------------------
+
+Some of the headers contain <<generic:,macro generics>>. Since the
+preprocessor macros look somewhat weird, the documentation extractor
+needs some help to understand them.
+
+[[geext]]
+Extraction of generics
+~~~~~~~~~~~~~~~~~~~~~~
+
+When extracting info from some header, the extractor needs to know,
+which prefix macros are used in the source file, to distinguish them
+from function calls. To inform it about them, append a comma separated
+list of such macros to the extraction command, like this:
+
+ !!filename PREFIX_MACRO_ONE,PREFIX_MACRO_TWO
+
+Then this will be correctly identified as a structure:
+
+ struct PREFIX_MACRO_ONE(struct_name) {
+ content;
+ };
+
+[[gelink]]
+Links to generics
+~~~~~~~~~~~~~~~~~
+
+Since the anchors for them are generated in a complicated manner and
+typing them in plain-text would convert them back to the original,
+with real parenthesis, there is a special pattern to create the
+symbolname part of the anchor. It looks like:
+
+ _GENERIC_LINK_|PREFIX_MACRO|symbol_name|
+
+So link to the above structure would look like:
+
+ <<struct__GENERIC_LINK_|PREFIX_MACRO_ONE|struct_name|,link text>>
+
+However, this is implemented only in the included header files (if it
+is needed, it will be implemented in the top-level documentation files
+too).
--- /dev/null
+Fixed-sized alocators
+=====================
+
+You can use them for efficient allocation of large amount of small
+fixed-sized memory blocks and to free them all at once.
+If you need more features, see more complex <<mempool:,memory pools>>.
+
+* <<defs,Definitions>>
+* <<basic,Basic manipulation>>
+* <<alloc,Allocation routines>>
+
+!!ucw/eltpool.h
--- /dev/null
+Fastbufs
+========
+
+A *fastbuf* is a stream (or file) abstraction optimized for both speed
+and flexibility.
+
+Fastbufs can represent many different kinds of objects: regular files, network
+sockets, file descriptors in general, or various memory buffers. These objects
+are handled by different fastbuf *back-ends.*
+
+Once you have a fastbuf, you can access it by fuctions similar to those of
+`stdio.h`, or you can use a variety of fastbuf *front-ends* providing various
+formatted operations.
+
+Please keep in mind that fastbufs do not allow arbitrary mixing of reads and
+writes on the same stream. If you need to mix them, you have to call @bflush()
+inbetween and remember that the file position reported by @btell() points after
+the flushed buffer, which is not necessarily the same as after the data you've
+really read.
+
+.Back-ends:
+- <<fbparam,Files (parametrized)>>
+- <<fbfile,Regular files>>
+- <<fbtemp,Temporary files>>
+- <<fblim,File fragments>>
+- <<fbmem,In-memory streams>>
+- <<fbbuf,Buffers>>
+- <<fbgrow,Growing buffers>>
+- <<fbpool,Memory pools>>
+- <<fbatomic,Atomic files>>
+
+.Front-ends:
+- <<ffbasic,Basic functions>>
+
+.Other reading:
+- <<internal,Internal structure>>
+- <<bconfig,Configuring streams>>
+
+ucw/fastbuf.h
+-------------
+
+!!ucw/fastbuf.h
+
+ucw/fb-socket.h
+---------------
+
+Fastbufs on network sockets with timeouts.
+
+!!ucw/fb-socket.h
+
+ucw/ff-unicode.h
+----------------
+
+Reading and writing of unicode characters.
+
+Invalid codes are replaced by `UNI_REPLACEMENT` when reading.
+
+!!ucw/ff-unicode.h
+
+ucw/ff-binary.h
+---------------
+
+!!ucw/ff-binary.h
--- /dev/null
+Generic data structures and algorithms
+======================================
+
+The C preprocessor is a very powerful tool. One handy way to use it
+can be generating generic data structures and algorithms. Here you can
+find some conventions that are used in all such generic structures in
+libUCW, and also hints for use of these structures.
+
+- <<idea,General idea>>
+- <<use,How to use them>>
+- <<implement,How it is implemented>>
+- Modules with generics
+ * <<growbuf:gbuf,`gbuf.h`>>
+ * <<sort:,Sorting>>
+ * <<binheap:,`binheap.h`>>
+ * <<hashtable:,`hashtable.h`>>
+
+// TODO The module list
+
+[[idea]]
+General idea
+------------
+
+The idea is simple. If you have some code, you can customize it a
+little by preprocessor macros. You can change constants, data types it
+operates on, whole expressions, or you can compile parts of the code
+conditionally. You can generate new function names using macros.
+
+So if you provide few macros for data types, function names and
+parameters and include some code using them, it gets modified by it
+and a code for a specific data type is created. Then you can provide
+new macros and include it again, to get another version of the code,
+with different function names and types.
+
+[[use]]
+How to use them
+---------------
+
+The use is best explained with an example, so we will suppose there
+is a header file `array.h`, which contains a generic array data type
+and an indexing function, which returns a pointer to n'th element.
+
+To get an array of integers, we need to provide macro for used data
+type and macro that will provide prefixes for identifier names. Then
+we include the file. Then we could get another array with unsigned
+integers, so we will do the same:
+
+ #define ARRAY_TYPE int
+ #define ARRAY_PREFIX(name) intarray_##name
+ #include <array.h>
+
+ #define ARRAY_TYPE uns
+ #define ARRAY_PREFIX(name) unsarray_##name
+ #include <array.h>
+
+This will generate the data types (presumably `intarray_t` and
+`unsarray_t`) and the index functions (`intarray_index` and
+`unsarray_index`). We can use them like anything else.
+
+Maybe the `ARRAY_PREFIX` deserves some attention. When the header file
+wants to generate an identifier, it uses this macro with
+some name. Then the macro takes the name, adds a prefix to it and
+returns the new identifier, so `ARRAY_PREFIX(t)` will generate
+`intarray_t` in the first case and `unsarray_t` in the second. This
+allows having more than one instance of the same data structure or
+algorithm, because it generates different identifiers for them.
+
+A similar macro is needed for every generic header in libUCW.
+
+[[implement]]
+How it is implemented
+---------------------
+
+For those who want to write their own or are just interested, how it
+works, here is the `array.h` header and some description to it.
+
+ #define ARRAY_A_TYPE ARRAY_PREFIX(t)
+ typedef ARRAY_TYPE *ARRAY_A_TYPE
+
+ static ARRAY_TYPE *ARRAY_PREFIX(index)(ARRAY_A_TYPE array, uns index)
+ {
+ return array + index;
+ }
+
+ #undef ARRAY_A_TYPE
+ #undef ARRAY_TYPE
+ #undef ARRAY_PREFIX
+
+There are few things that are worth noticing. The first two lines
+define the data type. The macro (`ARRAY_A_TYPE`) is only for
+convenience inside the header, since such type names can be used quite
+often inside the header (if it is large).
+
+Then there is the function with its name generated (do not get scared
+by the double parenthesis, ones will be eaten by the macro, the second
+ones are real function parameters). The function is static, since more
+than one `.c` file might want to use the same header with the same
+prefix -- each one generates it's own instance.
+
+And the end just undefines all the macros, so user may define them
+again and get another instance of the data structure.
+
+Also note it is not protected against multiple inclusion in the usual
+way (eg. `#ifndef ARRAY_H` ...), since multiple inclusion is desired
+-- it generates multiple versions of the data structure.
--- /dev/null
+Growing buffers
+===============
+
+It is quite usual situation when you need an array of items and you
+don not know how large it will be in the time you allocate it. Then
+you need some kind of dynamically growing buffer.
+
+You can either use <<mempool:gbuf,mempools>>, which has similar
+functionality, or this module.
+
+- <<gbuf,Generic growing buffers>>
+- <<bbuf,Growing buffers for byte-sized items>>
+
+[[gbuf]]
+Generic growing buffers
+-----------------------
+
+The generic buffers are in `ucw/gbuf.h`. They are <<generic:,generics
+generated by preprocessor>>. To use them, you need to define:
+
+- `GBUF_PREFIX(name)` -- the identifier generating macro.
+- `GBUF_TYPE` -- the data type they operate with.
+
+You may define `GBUF_TRACE(\...)` macro. If you do, it will be used to
+log the growing of the buffer. The macro should act like printf() --
+the first parameter will be format, the rest variables for it.
+
+!!ucw/gbuf.h GBUF_PREFIX
+
+[[bbuf]]
+Growing buffers for byte-sized items
+------------------------------------
+
+It is often needed to allocate the buffer by bytes (if you handle some
+anonymous data) or characters (strings of unknown length).
+
+With the `ucw/bbuf.h` header, you get an instance of growing buffers
+with type `byte` and prefix `bb_`. Aside from that, you get few
+functions to manipulate strings in the buffers.
+
+!!ucw/bbuf.h
--- /dev/null
+Hashing routines
+================
+
+Libucw contains two cryptographic hash algorithms: MD5 (RFC 1321) and SHA1 (RFC
+3174). A SHA1-HMAC (RFC 2104) message authentication is available.
+
+There are non-cryptographic hashes as well.
+
+<<crypto,Cryptographic ones>>:
+
+- <<md5,MD5>>
+- <<sha1,SHA1>>
+- <<sha1:sha1_hmac(),SHA1_HMAC>>
+- <<usage,Common usage>>
+
+<<checksum,Checksums>>:
+- <<crypto:adler,Adler-32>>
+
+<<nocrypto,Non-cryptographic ones>>:
+
+- <<strhash,String & block hashes>>
+- <<inthash,Integer hashes>>
+
+[[crypto]]
+Cryptographic hashes
+--------------------
+
+[[md5]]
+MD5
+~~~
+!!ucw/md5.h
+
+[[sha1]]
+SHA1
+~~~~
+!!ucw/sha1.h
+
+[[usage]]
+Common usage
+~~~~~~~~~~~~
+
+There are two ways you can use the hashing routines.
+
+- Single-shot interface. If you have an in-memory buffer of the whole
+ message you want to hash, you can use this.
+
+ char *message = "Hello world";
+ byte output[MD5_SIZE];
+ md5_hash_buffer(output, message, strlen(message));
+
+- Multi-shot interface. If you have the message scattered in many
+ buffers or you get it by parts, you do not need to concatenate the
+ parts together.
+
+ byte buffer[MAX_BUFFER];
+ uns buffer_len;
+ md5_context c;
+ md5_init(&c);
+ while(buffer_len = get_chunk(buffer, MAX_BUFFER)) {
+ md5_update(&c, buffer, buffer_len);
+ }
+ byte output[MD5_SIZE];
+ memcpy(output, md5_final(&c), MD5_SIZE);
+
+SHA1 has the same interface, so the same two ways apply.
+
+See also <<string:mem_to_hex()>>.
+
+[[checksum]]
+Checksums
+---------
+
+Their purpose is checking against random data changes, hardware
+failures and alike. They are not to be used against aimed attacks.
+
+The <<compress:adler,Adler-32 checksum>> is documented in the
+<<compression,compression capter>>.
+
+[[nocrypto]]
+Non-cryptographic hashes
+------------------------
+
+They are usually used to identify values in hash tables.
+
+All these functions expect to be moduled by the size of a hash table.
+The size should be a prime number (it gives better distribution).
+
+!!ucw/hashfunc.h
--- /dev/null
+Hash tables
+===========
+
+A hash table is very universal data structure. It does most of it's
+operations in O(1) average time. The library contains a header to
+generate hash tables suiting your needs.
+
+They are <<generic:,generic data structures>>.
+
+- <<mandatory,Mandatory macros>>
+- <<functions,Optional function switches>>
+- <<params,Optional parameters>>
+- <<wants,Functionality switches>>
+- <<generated,Generated functions>>
+- <<iterator,Iterator>>
+
+[[mandatory]]
+Mandatory macros
+----------------
+
+- `HASH_NODE` -- a data type where a node dwells. It is usually a
+ structure.
+- `HASH_PREFIX(x)` -- the name generating macro.
+- Key type and name. Must be one of following.
+ [[key_atomic]]
+ * `HASH_KEY_ATOMIC` -- the key (`node\->HASH_KEY_ATOMIC`) is an
+ atomic type which can be compared using `==`.
+ [[key_string]]
+ * `HASH_KEY_STRING` -- the key is a zero-terminated string,
+ allocated separately from the rest of the node.
+ [[key_endstring]]
+ * `HASH_KEY_ENDSTRING` -- a zero-terminated string which lives at
+ the end of node (it is allocated together with the node). It
+ should be declared as `char key[1]`.
+ * `HASH_KEY_MEMORY` -- the `node\->HASH_KEY_MEMORY` is to be compared
+ using memcmp() function. In this case, you need to provide
+ `HASH_KEY_SIZE` macro as well, to specify the length of the key.
+ [[key_complex]]
+ * `HASH_KEY_COMPLEX(x)` -- the key is compound of more than one
+ component. The macro should expand to `x key1, x key2, ..., x kn`.
+ Furthermore, you need to provide a `HASH_KEY_DECL` macro. It is
+ used to define function parameters. Therefore it should expand to
+ `type1 key1, type2 key2, ..., typen keyn`. And
+ <<give_hashfn,`HASH_GIVE_HASHFN`>> and <<give_eq,`HASH_GIVE_EQ`>>
+ are mandatory for this key type.
+
+[[functions]]
+Optional function switches
+--------------------------
+
+You can define any of these macros and provide corresponding functions
+to customize the behaviour. The macros are:
+
+[[give_hashfn]]
+- `HASH_GIVE_HASHFN` -- the table will use `uns
+ HASH_PREFIX(hash)(key)` to calculate hash of `key`.
+ There is a sensible default for integers and strings.
+ In the case of <<key_complex,`HASH_KEY_COMPLEX`>>, it is mandatory
+ to provide this macro and function.
+[[give_eq]]
+- `HASH_GIVE_EQ` -- tells the table to use `int HASH_PREFIX(eq)(key1,
+ key2)` function to decide if `key1` and `key2` are equal. Default
+ for atomic types is `==` and strcmp() or strcasecmp() for strings
+ (depends on <<nocase,`HASH_NOCASE`>> switch).
+ It is mandatory when you use <<key_complex,`HASH_KEY_COMPLEX`>>.
+- `HASH_GIVE_EXTRA_SIZE` -- function `int HASH_PREFIX(extra_size)(key)`
+ returns how many bytes after the node should be allocated. It
+ defaults to `0` or to length of key in case of
+ <<key_endstring,`HASH_KEY_ENDSTRING`>>.
+- `HASH_GIVE_INIT_KEY` -- function
+ `void HASH_PREFIX(init_key)(node *, key)` is used to initialize key
+ in newly created node. The default is assignment for atomic keys and
+ static strings (<<key_atomic,`HASH_KEY_ATOMIC`>>,
+ <<key_string,`HASH_KEY_STRING`>>) and strcpy() for
+ <<key_endstr,`HASH_KEY_ENDSTRING`>>.
+[[give_init_data]]
+- `HASH_GIVE_INIT_DATA` -- function `void HASH_PREFIX(init_data)(node
+ *)` is used to initialize the rest of node. Useful if you use
+ <<fun_HASH_PREFIX_OPEN_PAREN_lookup_CLOSE_PAREN_,`HASH_PREFIX(lookup())`>>
+- `HASH_GIVE_ALLOC` -- you need to provide `void
+ \*HASH_PREFIX(alloc)(uns size` and `void HASH_PREFIX(free)(void \*)`
+ to allocate and deallocate the nodes. Default uses
+ <<memory:xmalloc()>> and <<memory:xfree()>> or <<mempool:,mempool
+ routines>>, depending on <<use_pool,`HASH_USE_POOL`>> and
+ <<auto_pool,`HASH_AUTO_POOL>> switches.
+
+[[params]]
+Optional parameters
+-------------------
+
+You can customize the hash table a little more by these macros:
+
+[[nocase]]
+- `HASH_NOCASE` -- use case-insensitive comparison for strings.
+- `HASH_DEFAULT_SIZE` -- use approximately this many elements when
+ creating the hash table.
+- `HASH_CONSERVE_SPACE` -- use as little space as possible.
+- `HASH_FN_BITS` -- the hash function only provides this many
+ significants bits.
+- `HASH_ATOMIC_TYPE` -- the type of atomic key
+ (<<key_atomic,`HASH_KEY_ATOMIC`>>) is not `int`, but this type.
+[[use_pool]]
+- `HASH_USE_POOL` -- tells to use <<mempool:,mempool allocation>> to
+ allocate the nodes. You should define it to the name of mempool
+ variable to be used for this purpose.
+[[auto_pool]]
+- `HASH_AUTO_POOL` -- like above, but it creates it's own mempool.
+ Define it to the block size of the pool.
+- `HASH_ZERO_FILL` -- initialize new nodes to all zeroes.
+- `HASH_TABLE_ALLOC` -- allocate the table the same way as nodes. If
+ not provided, <<mempory:xmalloc()>> is used.
+[[table_dynamic]]
+- `HASH_TABLE_DYNAMIC` -- By default, only one global hash table is
+ used. With this macro defined, all functions gain new first
+ parameter of type `HASH_PREFIX(table) *` to allow them work with
+ multiple hash tables.
+
+[[wants]]
+Functionality switches
+----------------------
+
+Each of these macros enables some of the functionality the table has.
+
+[[want_cleanup]]
+- `HASH_WANT_CLEANUP` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_cleanup_CLOSE_PAREN_,`HASH_PREFIX((cleanup()`>>
+[[want_find]]
+- `HASH_WANT_FIND` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_find_CLOSE_PAREN_,`HASH_PREFIX((find()`>>
+[[want_find_next]]
+- `HASH_WANT_FIND_NEXT` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_find_next_CLOSE_PAREN_,`HASH_PREFIX((find_next()`>>
+[[want_new]]
+- `HASH_WANT_NEW` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_new_CLOSE_PAREN_,`HASH_PREFIX((new()`>>
+[[want_lookup]]
+- `HASH_WANT_LOOKUP` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_lookup_CLOSE_PAREN_,`HASH_PREFIX((lookup()`>>
+[[want_delete]]
+- `HASH_WANT_DELETE` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_delete_CLOSE_PAREN_,`HASH_PREFIX((delete()`>>
+[[want_remove]]
+- `HASH_WANT_REMOVE` --
+ <<fun_HASH_PREFIX_OPEN_PAREN_remove_CLOSE_PAREN_,`HASH_PREFIX((remove()`>>
+
+[[generated]]
+Generated functions
+-------------------
+
+These are the function that the header file generates for you. The
+strange first parameter of each function is a place where the
+`HASH_PREFIX(table) *` resides when you define
+<<table_dynamic,`HASH_TABLE_DYNAMIC`>>. If you do not, the parameter
+is empty.
+
+!!ucw/hashtable.h HASH_PREFIX
+
+[[iterator]]
+Iterator
+--------
+
+You can use the `HASH_FOR_ALL` iterator macro to run trough all the
+nodes. Lets say your `HASH_PREFIX(x)` macro is defined as
+`prefix_##x`. Then you would do something like:
+
+ HASH_FOR_ALL(prefix, node_variable)
+ {
+ do_something_with_node(node_variable);
+ }
+ HASH_END_FOR;
+
+If you use <<table_dynamic,`HASH_TABLE_DYNAMIC`>>, use
+`HASH_FOR_ALL_DYNAMIC(prefix, table, node_variable)` instead.
+
+You may not modify the table inside the block. Use `HASH_BREAK` and
+`HASH_CONTINUE` instead of `break` and `continue` statements.
--- /dev/null
+Binary heaps
+============
+
+* <<intro,Introduction>>
+* <<macros,Macros>>
+* <<example,Example>>
+
+!!ucw/heap.h
+
+[[example]]
+Example
+-------
+
+ static uns n;
+ static int heap[4];
+
+ // Create an empty heap
+ n = 0;
+ #define MY_CMP(x, y) ((x) < (y))
+
+ // Insert 20, 10, 30
+ heap[n + 1] = 20;
+ HEAP_INSERT(int, heap, n, MY_CMP, HEAP_SWAP);
+ heap[n + 1] = 10;
+ HEAP_INSERT(int, heap, n, MY_CMP, HEAP_SWAP);
+ heap[n + 1] = 30;
+ HEAP_INSERT(int, heap, n, MY_CMP, HEAP_SWAP);
+
+ // Remove the minimum (10)
+ HEAP_DELMIN(int, heap, n, MY_CMP, HEAP_SWAP);
+
+ // Print the new minimum (20)
+ printf("%d", heap[1]);
+
+ // Increase the minimum by 20 to 40
+ heap[1] += 20;
+ HEAP_INCREASE(int, heap, n, MY_CMP, HEAP_SWAP, 1);
+
+ // Print the new minimum (30)
+ printf("%d", heap[1]);
--- /dev/null
+The UCW library
+===============
+
+The UCW library aims to provide a set general purpose tools for programming
+in the C language. It contains generic data structures (lists, trees, hash
+tables etc.), fast memory allocators optimized for various usage patterns,
+abstract I/O streams and optimized implementations of some common algorithms
+(e.g., sorting of arrays and files).
+
+Please note that this documentation is not yet complete. Many modules are
+not described, so you might need to look into the source code.
+
+You can see the index of <<def_index:,documented definitions>>.
+
+Modules
+-------
+- <<basics:,Basics>>
+- <<log:,Logging>>
+- <<fastbuf:,Fastbufs>>
+- <<basecode:,Base64 and Base224 encoding>>
+- <<hash:,Hashing routines>>
+- <<conf:,Configuration and command line parser>>
+- <<mempool:,Memory pools>>
+- <<eltpool:,Fixed-sized allocators>>
+- <<mainloop:,Mainloop>>
+- <<unaligned:,Unaligned data>>
+- <<lists:,Link lists>>
+- <<heap:,Binary heaps>>
+- <<binheap:,Binomial heaps>>
+- <<hashtable:,Hash tables>>
+- <<growbuf:,Growing buffers>>
+- <<chartype:,Single-byte characters>>
+- <<unicode:,Multi-byte characters>>
+- <<prime:,Prime numbers>>
+- <<sort:,Sorting>>
+- <<binsearch:,Binary search>>
+- <<compress:,Compression>>
+
+Other features
+--------------
+- <<configure:,Compile time configuration>>
+- <<config:,Configuration file syntax>>
+- <<docsys:,Documentation system>>
+- <<generic:,Macro-generated generics>>
+
+Yet undocumented modules
+------------------------
+- Trie
+ * `trie.h`
+- Red-black trees
+ * `redblack.h`
+- Bit manipulation
+ * `bitarray.h`
+ * `bitopts.h`
+ * `bitsig.h`
+- String manipulation
+ * `kmp.h`
+ * `kmp-search.h`
+ * `regex.h`
+ * `stkstring.h`
+ * `string.h`
+ * `str-match.h`
+ * `wildmatch.h`
+- File manipulation
+ * `asio.h`
+ * `lfs.h`
+ * `partmap.h`
+- Address manipulation
+ * `url.h`
+ * `ipaccess.h`
+- Prefetching of memory
+ * `prefetch.h`
+- Caches
+ * `qache.h`
+- Threads
+ * `semaphore.h`
+ * `threads.h`
+ * `workqueue.h`
+- Profiling support
+ * `profile.h`
+
+License
+-------
+The UCW library is copyrighted by its authors:
+
+- Pavel Charvát <mailto:pchar\@ucw.cz[]>
+- Martin Mareš <mailto:mj\@ucw.cz[]>
+- Robert Špalek <mailto:robert\@ucw.cz[]>
+- Michal Vaner <mailto:vorner\@ucw.cz[]>
+
+It can be freely distributed and used according to the terms of
+the GNU Lesser General Public License.
--- /dev/null
+Installation of libucw
+======================
+
+Prerequisities
+--------------
+To build and run, you need:
+
+ - The GNU toolchain (gcc 4.0 or newer is required)
+ - GNU bash 2.0 or newer
+ - Perl (any reasonably new version; 5.6.1 works for me)
+ - pkg-config
+ - Linux (porting to other systems should be easy)
+ - Libjpeg, libpng and libungif to support all image formats
+ (jpeg, png and gif). These libraries can be replaced by libgif (gif)
+ and GraphicsMagick >= 1.1 (jpeg, png, gif and possible more formats).
+ See sherlock/default.cfg for related configuration switches.
+
+Compiling
+---------
+First of all, you need to run the `configure` script to set up compile-time
+options. If you want to compile the library with the default feature set,
+just use:
+
+ ./configure
+
+If you need to set anything unusual, please consult <<configure:>> for details.
+
+The defaults determined by the configure script should be correct on Linux/i386,
+on other architectures you will probably need to tweak the CPU detection section
+in `ucw/perl/UCW/Configure/C.pm` and possibly also the typedefs in `ucw/config.h`.
+
+Then run:
+
+ make
+
+It compiles the package. You need to install it (unless you specified
+a <<configure:local_build,local build>>). To do so, execute:
+
+ make install
--- /dev/null
+Linked lists
+============
+
+Ucwlib defines two basic linked list structures: single-linked lists and circular linked lists.
+Both of them support insertion of any number of nodes, removal of nodes and various searches.
+Single-linked lists are a bit simplier (they especially requires smaller nodes)
+but some operations need assymptoticaly more time.
+
+Linked lists can be used very simply. We define a structure as list's handle and
+a common header in all inserted nodes. All routines then accept and return pointers
+to this handle and node headers.
+
+Single-linked lists
+-------------------
+
+!!ucw/slists.h
+
+Circular linked lists
+---------------------
+
+!!ucw/clists.h
+
+Circular linked lists of simple items
+-------------------------------------
+
+!!ucw/simple-lists.h
--- /dev/null
+Logging
+=======
+
+LibUCW contains a powerful system for logging of messages. Depending on your
+needs, it can be used either as a very simple logger which writes all messages
+to stderr or to a single file, or as a multi-stream logger in which different
+messages can be directed to different streams and the streams can be combined
+in various ways.
+
+Simple logging
+--------------
+The basic logging functions are defined in <<basics:logging,lib.h>>.
+
+To log a message, call `msg(L_xxx,@fmt,@args)`, where `L_xxx` is a category of the log
+message (`L_INFO`, `L_WARN`, `L_ERR` etc.), @fmt is a format string as for printf,
+and @args are additional arguments to be substituted to the format string.
+A newline character is automatically appended; the message should not contain
+any control characters.
+
+The first argument of `msg` can be OR'ed with additional flags. Most notably, you can
+add `L_SIGHANDLER` if you wish to log a message from a signal handler (see below
+for discussion on signals and reentrancy in general).
+
+By default, all messages are logged to stderr. If you wish to use a log file,
+call `log_file(@name)`. All subsequent logging will use this file and stderr
+will be redirected there, too.
+
+Names of log files can contain strftime() escapes, which are expanded on the fly.
+This makes it easy to start a new log file every day.
+
+Example
+~~~~~~~
+ #include <ucw/lib.h>
+
+ int main(int argc, char **argv)
+ {
+ log_init(argv[0]);
+ log_file("/var/log/utterances");
+ msg(L_INFO, "This program does nothing, but successfully.");
+ return 0;
+ }
+
+Log streams
+-----------
+More generally, the logger can use multiple log streams. Each stream can be directed
+to a logging back-end (log file, syslog, ...) and equipped with a filter which
+selects a subset of the messages received. A stream can also have substreams
+attached, which are passed a copy of all log messages sent to the parent stream.
+
+Streams are identified by <<struct_log_stream,struct log_stream>> and also by
+their registration number. Messages can be directed to a stream by OR'ing the
+registration number to the first argument of msg().
+
+When a log stream receives a message, it is processed as follows:
+
+ 1. If the log level of the message does not match the set of accepted
+ levels of the stream (@levels), the message is dropped.
+ 2. The filter hook of the stream is consulted and if it returns a non-zero
+ value, the message is dropped.
+ 3. The message is passed to all substreams of the stream.
+ 4. The message is formatted according to the formatting flags (@msgfmt) of the stream.
+ 5. The handler hook of the stream is called (if it exists).
+
+When no stream is explicitly selected, msg() uses the default stream, which
+has registration number 0 and which is also returned by log_default_stream().
+This stream has no explicit destination, but it can have substreams. (When
+a program starts, the default stream is connected to stderr; a call to log_file()
+establishes a file logging stream and links it as the only substream of the
+default stream.)
+
+Streams are reference-counted. When a stream is created, it gets reference count 1.
+When it is linked as a substream of another stream, its reference count is incremented.
+Closing the stream by log_close_stream(), unlinking it or closing a parent stream
+(which causes an unlink) decrements the reference count and when it drops to zero,
+the stream is removed and all its substreams unlinked.
+
+Example
+~~~~~~~
+ #include <ucw/lib.h>
+ #include <ucw/log.h>
+
+ int main(int argc, char **argv)
+ {
+ log_init(argv[0]);
+ struct log_stream *ls = log_new_file("/var/log/utterances", 0);
+ msg(L_INFO | ls->regnum, "Aye captain, we have a log file");
+ msg(L_INFO, "Alas, stderr still works");
+ return 0;
+ }
+
+Message types
+-------------
+Messages can also have types, which can be used for further filtering inside streams.
+By default, there is only the default message type. To obtain an identifier of a new
+type (again to be OR'ed to the log level when calling <<msg()>>), use <<log_register_type()>>.
+The number of types is currently limited to 32.
+
+If you want non-default types to be visible, enable the `LSFMT_TYPE` format flag.
+
+Processes, threads and signals
+------------------------------
+When you fork a new process, it automatically inherits all currently configured log
+streams. You should however call <<log_fork()>> to update the logger's notion
+of the current PID (at least when you use PID's in your log messages). Also, if you
+plan to exec() a process after fork(), do not forget to call <<log_close_all()>>,
+so that all file descriptors used for log files (except for stderr) are closed.
+
+The <<basics:msg()>> function itself can be called from multiple threads in parallel
+and it is atomic by design. The functions for setting up the logging machinery
+are however not reentrant (they follow our general rule about functions that
+affect global state).
+
+Logging from signal handlers is problematic, as is doing almost anything in signal
+handlers, because almost all libc functions are not signal-safe. Most importantly,
+functions for converting time to a human-readable representation aren't safe.
+LibUCW therefore offers only limited logging services in such situations and
+you must use the `L_SIGHANDLER` flag to request it. Otherwise, deadlocks get
+ready to happen.
+
+Messages logged with `L_SIGHANDLER` set are written directly to stderr (which
+is usually an alias for the main log file, at least if you use <<log_file()>>)
+and they do not carry a timestamp. Logging of sighandler messages to general
+log streams or to syslog is therefore not supported.
+
+ucw/log.h
+---------
+!!ucw/log.h
+
+Limiting rate: ucw/tbf.h
+------------------------
+
+LibUCW also offers simple means of limiting the rate of log messages (or of any other
+events) by means of a so called 'Token Bucket Filter.' The idea behind this filter is
+simple: To log a message, we need a token. The available tokens are accumulated in
+a bucket which has a fixed 'filling rate' (the number of tokens arriving in the bucket
+per second, which may be a fractional number) and fixed 'maximum capacity.' The
+bucket receives the tokens continuously with the given rate and when it reaches
+the maximum capacity, the extra tokens are dropped on the floor. When a message
+has to be sent, we take a single token from the bucket and if there wasn't any,
+we drop the message.
+
+The filling rate therefore describes the maximum sustained rate of messages,
+while the bucket capacity tells the filter the maximum length of a short burst,
+which can temporarily exceed the rate.
+
+A general bucket filter is available in `ucw/tbf.h`. The usual way of using it
+to limit logging is to set up a filter hook of a stream which asks the TBF for
+every message. (Remember, though, that if your program is multithreaded, the
+filter hook can be run in multiple threads in parallel, so it has to guard the
+TBF by a lock.) The configuration interface for log streams described above
+is able to attach rate limiters to streams per user's request, so you usually
+need not take any extra care.
+
+!!ucw/tbf.h
--- /dev/null
+Mainloop
+========
+
+Not every program is strictly sequential. Sometimes, an event-driven
+model is much easier to grasp. A fine example of such a program could
+be a railway server. It has a separate connection to each station
+and also to each train, so that it knows where each of them is (and
+that neither a train nor a station have got missing). So it has to wait
+for events coming from these connections and handle them appropriately.
+It also processes other events that it has itself generated -- for
+example various timers telling that a train is scheduled to depart
+from some station.
+
+The mainloop module takes care of the low-level part of event-driven
+programs: it calls the OS to monitor file activity, to interrupt
+the program at the right moment to serve a timer, and so on. The
+programmer only defines hooks that should be called to handle
+the events and calls mainloop functions to schedule them.
+
+// TODO Example?
+
+- <<conventions,Conventions>>
+- <<time,Time and timers>>
+- <<file,Activity on file descriptors>>
+- <<hooks,Loop hooks>>
+- <<process,Child processes>>
+- <<control,Control of the mainloop>>
+
+!!ucw/mainloop.h
--- /dev/null
+Memory pools
+============
+
+You can use them for efficient allocation of large amount of small
+memory blocks. You can use them to allocate many blocks and free them
+all at once. They allow storing and restoring state of what is
+allocated, growing and shrinking the last block and other tricks.
+
+* <<defs,Definitions>>
+* <<basic,Basic manipulation>>
+* <<alloc,Allocation routines>>
+* <<gbuf,Growing buffers>>
+* <<store,Storing and restoring state>>
+* <<string,String operations>>
+* <<format,Formatted output>>
+* <<examples,Examples>>
+ - <<ex_trie,String trie>>
+ - <<ex_try,Action which may fail>>
+ - <<ex_stdin,Load all data from stdin>>
+
+!!ucw/mempool.h
+
+[[examples]]
+Examples
+--------
+
+You can find few examples of mempools use. But their actual use is
+limited only by your fantasy.
+
+[[ex_trie]]
+String trie
+~~~~~~~~~~~
+
+There are two advantages for a trie to use a mempool. One, it has less
+overhead than malloc (with the cost you can not free the blocks one by
+one as you allocated them). Second is freeing the whole trie, you do
+not need to walk trough it and free each node, you just
+<<mp_flush(),flush>> the whole mempool.
+
+ struct trie_node {
+ struct trie_node *subs[256];
+ bool present;
+ };
+
+ struct trie {
+ struct trie_node root;
+ struct mempool *pool;
+ };
+
+ struct trie *trie_new(void) {
+ struct mempool *pool = mn_new(4096);
+ struct trie *result = mp_alloc_zero(pool, sizeof(*result));
+ result->pool = pool;
+ return result;
+ }
+
+ void trie_insert_internal(struct trie_node *where, struct mempool *pool, const char *string) {
+ if(*string) {
+ if(!where->subs[*string])
+ where->subs[*string] = mp_alloc_zero(pool, sizeof(*where->subs[*string]));
+ trie_insert_internal(where->subs[*string], pool, string + 1);
+ } else {
+ where->present = 1;
+ }
+ }
+
+ void trie_insert(struct trie *trie, const char *string) {
+ trie_insert_internal(&trie->root, trie->pool, string);
+ }
+
+ void trie_delete(struct trie *trie) {
+ mp_delete(trie->pool); //Free everything, including the trie structure
+ }
+
+[[ex_try]]
+Action which may fail
+~~~~~~~~~~~~~~~~~~~~~
+
+Imagine a situation where you want to load information from few files.
+Loading of each file consists of list of actions, each can allocate
+some memory and each can fail. If an action fails, the whole file is
+considered invalid, you want to ignore that file and keep loading the
+others.
+
+The problem with memory is you want to return the already allocated
+amount for the file which failed. You can use <<store,storing>> of
+mempool state.
+
+ void load_file(struct mempool *pool, const char *file) {
+ struct mempool_state state;
+ mp_save(pool, &state); // Store the current state
+ struct file_data *data = mp_alloc_zero(pool, sizeof(*data));
+ if(!(
+ file_open(file, data, pool) && // Load the file
+ header_load(data, pool) &&
+ part1_load(data, pool) &&
+ part2_load(data, pool) &&
+ file_close(data) &&
+ data_link(data, pool))) // Link the loaded data into global state
+ mp_restore(pool, &state); // Failed -> return all used memory
+ }
+
+[[ex_stdin]]
+Load all data from stdin
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+You may want to load all data from stdin into a memory buffer. But
+there is the problem you do not know how many of them there is. You
+may use mempool and it's <<gbuf,growing buffer>> feature.
+
+This example uses libucw's own IO system, <<fastbuf:,fastbufs>>.
+
+ void *stdin_data(struct mempool *pool) {
+ struct fastbuf *fb = bopen_fd(0, NULL); // Read from stdin
+ uns amount;
+ char *ptr = mp_start(pool, 1024);
+ while(amount = bread(fb, ptr, 1024)) { // Read a block
+ ptr += amount; // Move after it
+ ptr = mp_spread(pool, ptr, 1024); // Get space for the next block
+ }
+ bclose(fb);
+ return mp_end(pool, ptr);
+ }
--- /dev/null
+Prime numbers
+=============
+
+The library defines some simple functions to generate prime numbers.
+They are useful for example in hash tables.
+
+!!ucw/prime.h
--- /dev/null
+Sorting
+=======
+
+A very common need is sorting data. Therefore libUCW contains few
+routines to accomplish that task. They are much more universal than
+qsort(), since they allow you to sort structures indexed by a macro,
+sort data externally, if they do not fit into memory, merge data with
+the same keys and sort data of variable length.
+
+All routines described below are <<generic:,generic algorithms>>.
+
+- <<array-simple,Simple array sorting>>
+ * <<mandatory-simple,Mandatory macros>>
+ * <<optional-simple,Optional macros>>
+ * <<example-simple,Example>>
+- <<array,Huge array sorting>>
+ * <<mandatory-array,Mandatory macros>>
+ * <<optional-array,Optional macros>>
+- <<external,External sorting>>
+ * <<basic-external,Basic macros>>
+ * <<callback-external,Callbacks>>
+ * <<integer-external,Integer sorting>>
+ * <<hash-external,Hashing>>
+ * <<merge-external,Merging>>
+ * <<input-external,Input>>
+ * <<output-external,Output>>
+ * <<other-external,Other switches>>
+ * <<function-external,Generated function>>
+
+[[array-simple]]
+Simple array sorting
+--------------------
+
+If you want to sort some data in memory and you aren't too picky about
+setting how, you just use the routine defined in
+`sorter/array-simple.h`. It is an optimised hybrid
+quick-sort/insert-sort algorithm (quick-sort is used to split the
+input into small parts, each is then sorted by insert-sort). It is
+more than 2 times faster than stdlib's qsort(), mostly because of
+inlining.
+
+You need to define few macros and include the header. You get a
+sorting function in return. It will be called
+<<fun__GENERIC_LINK_|ASORT_PREFIX|sort|,`ASORT_PREFIX(sort)`>>.
+
+[[mandatory-simple]]
+Mandatory macros
+~~~~~~~~~~~~~~~~
+- `ASORT_PREFIX(name)` -- The identifier generating macro.
+- `ASORT_KEY_TYPE` -- Data type of a single array entry key.
+
+[[optional-simple]]
+Optional macros
+~~~~~~~~~~~~~~~
+- `ASORT_ELT(i)` -- Indexing macro. Returns the key of the
+ corresponding entry. If not provided, usual array with sequential
+ indexing is assumed.
+- `ASORT_LT(x,y)` -- Comparing macro. If not provided, compares by the
+ `<` operator.
+- `ASORT_SWAP(i,j)` -- Swap elements with indices `i` and `j`. If not
+ provided, it assumes `ASORT_ELT` is l-value and it just swaps keys.
+- `ASORT_TRESHOLD` -- Sequences of at most this amount of elements are
+ sorted by quick-sort, smaller are sorted by insert-sort. Defaults to
+ `8` (result of experimentation).
+- `ASORT_EXTRA_ARGS` -- Pass some extra arguments to the function.
+ They are visible from all the macros. Must start with a comma.
+
+!!ucw/sorter/array-simple.h ASORT_PREFIX
+
+[[example-simple]]
+Example
+~~~~~~~
+
+Let's sort an array of integers, in the usual way.
+
+ #define ASORT_PREFIX(X) intarr_##X
+ #define ASORT_TYPE int
+ #include <ucw/sorter/array-simple.h>
+
+This generates an intarr_sort(int *array, uns array_size) function that
+can be used the obvious way.
+
+A more complicated example could be sorting a structure, where items
+with odd indices are stored in one array, even in another. Each item
+could be a structure containing a string and an integer. We would like
+to sort them by the strings.
+
+ struct elem {
+ char *string;
+ int integer;
+ };
+
+ #include <string.h> // Because of strcmp
+ #define ASORT_PREFIX(X) complicated_##X
+ #define ASORT_TYPE struct elem
+ #define ASORT_ELT(i) ((i % 2 ? even_array : odd_array)[i / 2])
+ #define ASORT_LT(x, y) (strcmp((x).string, (y).string) < 0)
+ #define ASORT_EXTRA_ARGS , struct elem *odd_array, struct elem *even_array
+ #include <ucw/sorter/sorter/array-simple.h>
+
+Now we got a complicated_sort(uns array_size, struct elem *odd_array,
+struct *even_array) function to perform our sorting.
+
+[[array]]
+Huge array sorting
+------------------
+
+This one is very similar to the simple array sorter, but it is
+optimised for huge arrays. It is used mostly by the
+<<external,external sorter>> machinery described below, but you can
+use it directly.
+
+It is in the `sorter/array.h` header.
+
+It differs in few details:
+- It supports only continuous arrays, no indexing macro can be
+ provided.
+- It is able to sort in parallel on SMP systems. It assumes all
+ callbacks you provide are thread-safe.
+- If you provide a monotone hash function (if `hash(x) < hash(y)`, then
+ `x < y`, but `x` and `y` may differ when `hash(x) == hash(y)`), it
+ will use it to gain some more speed by radix-sort.
+
+[[mandatory-array]]
+Mandatory macros
+~~~~~~~~~~~~~~~~
+
+- `ASORT_PREFIX(x)` -- The identifier generating macro.
+- `ASORT_KEY_TYPE` -- Type of elements in the array.
+
+[[optional-array]]
+Optional macros
+~~~~~~~~~~~~~~~
+
+- `ASORT_LT(x,y)` -- Comparing macro. Uses the `<` operator if not
+ provided.
+- `ASORT_HASH(x)` -- A monotone hash function (or macro). Should
+ return `uns`.
+- `ASORT_LONG_HASH(x)` -- Like `ASORT_HASH(x)`, but returns 64-bit
+ number instead of 32-bit.
+- `ASORT_TRESHOLD` -- How small should a chunk of data be to be sorted
+ by insert-sort? Defaults to `8` elements.
+- `ASORT_RADIX_BITS` -- How many bits of the hash function should be
+ used at once for radix-sort? The default is guessed from your
+ architecture.
+
+!!ucw/sorter/array.h ASORT_PREFIX
+
+[[external]]
+External sorting
+----------------
+
+If you have too much data to fit into memory, you need to employ
+external sorting. This external sorter operates on
+<<fastbuf:,fastbufs>> containing sequences of items. Each item
+consists of a key, optionally followed by data. Both the keys and data
+may be of variable length, but the keys must be represented by
+fixed-size type in memory. The length of data must be computable from
+the key. Data are just copied verbatim, unless you use the merging
+mode, in which data with the same keys get merged together.
+
+All callbacks must be thread safe.
+
+The sorter resides in the `sorter/sorter.h` header file.
+
+[[basic-external]]
+Basic macros
+~~~~~~~~~~~~
+
+You need to provide some basic macros. Some of them are optional.
+
+- `SORT_PREFIX(x)` -- Identifier generating macro. This one is
+ mandatory.
+- `SORT_KEY` -- Data structure holding the key of item in memory. The
+ representation on disk may be different. Either this one or
+ `SORT_KEY_REGULAR` must be provided.
+- `SORT_KEY_REGULAR` -- You may use this instead of `SORT_KEY`, when
+ the keys have the same representation both in memory and on disk.
+ Then the sorter uses <<fastbuf:bread()>> and <<fastbuf:bwrite()>> to
+ load and store them. It also assumes the keys are not very long.
+- `SORT_KEY_SIZE(key)` -- Returns the real size of the key. The sorter
+ can use this to save space and truncate the key to the given number
+ of bytes, when the keys have variable lengths. If the keys have
+ fixed sizes, there is no need for this macro.
+- `SORT_DATA_SIZE(key)` -- Returns the amount of data following this
+ key. If you do not provide this one, the sorter assumes there are
+ only keys and no data.
+
+[[callback-external]]
+Callbacks
+~~~~~~~~~
+
+Furthermore, you need to provide these callback functions (make sure
+they are thread safe):
+
+- `int SORT_PREFIX(compare)(SORT_KEY *a, SORT_KEY *b)` -- Comparing
+ function. It should act like strcmp(). Mandatory unless provided by
+ <<integer-external,integer sorting>>.
+- `int SORT_PREFIX(read_key)(struct fastbuf *f, SORT_KEY *k)` --
+ Should read a key from the provided <<fastbuf:,fastbuf>> @f and
+ store it into @k. Returns nonzero when ok and zero when an `EOF` was
+ met. Mandatory unless `SORT_KEY_REGULAR` is defined.
+- `void SORT_PREFIX(write_key)(struct fastbuf *f, SORT_KEY *k)` --
+ Should store key @k into @f. Mandatory unless `SORT_KEY_REGULAR` is
+ defined.
+
+[[integer-external]]
+Integer sorting
+~~~~~~~~~~~~~~~
+
+If you sort by an integer value (either computed or available from
+the key), you can use this to save yourself some functions. It also
+activates the <<hash-external,hashing>> automatically.
+
+- `SORT_INT(key)` -- This macro returns the integer to sort by. When
+ you provide it, the compare function is automatically provided for
+ you and the sorting function gets another parameter specifying the
+ range of the integers. The better the range fits, the faster the
+ sorting runs.
+- `SORT_INT64(key)` -- The same, but with 64-bit integers.
+
+[[hash-external]]
+Hashing
+~~~~~~~
+
+If you have a monotone hash function for your keys, you may speed the
+sorting up by providing it. Monotone hashing function must satisfy if
+`hash(x) < hash(y)`, then `x < y`. It should be approximately
+uniformly distributed.
+
+When you want to use it, define `SORT_HASH_BITS` and set it to the
+number of significant bits the hashing function provides. Then provide
+a callback function `uns SORT_PREFIX(hash)(SORT_KEY *key)`.
+
+[[merge-external]]
+Merging items with identical keys
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sorter is able to merge items with the same keys (the compare
+function returns `0` for them). To use it, define `SORT_UNIFY` macro
+and provide these functions:
+
+- `void SORT_PREFIX(write_merged)(struct fastbuf \*dest, SORT_KEY
+ \*\*keys, void \*\*data, uns n, void *buf)`
+ -- This function takes @n records in memory and writes a single
+ record into the @dest <<fastbuf:,fastbuf>>. The @keys and @data are
+ just the records. The @buf parameter points to a workspace memory.
+ It is guaranteed to hold at last the sum of `SUM_UNIFY_WORKSPACE()`
+ macro over all the keys. The function is allowed to modify all its
+ parameters.
+- `void SORT_PREFIX(copy_merged)(SORT_KEY \*\*keys, struct fastbuf
+\*\*data, uns n, struct fastbuf \*dest)`
+ -- This one is similar to the above one, but the data are still in
+ the <<fastbuf:,fastbufs>> @data and no workspace is provided. This
+ is only used when `SORT_DATA_SIZE` or `SORT_UNIFY_WORKSPACE` is
+ provided.
+- `SORT_UNIFY_WORKSPACE(key)` -- Returns the amount of workspace
+ needed when merging this record. Defaults to `0`.
+
+[[input-external]]
+Specifying input
+~~~~~~~~~~~~~~~~
+
+To tell the sorter where is the input, you specify one of these
+macros:
+
+- `SORT_INPUT_FILE` -- The function takes a filename.
+- `SORT_INPUT_FB` -- The input is a seekable fastbuf stream.
+- `SORT_INPUT_PIPE` -- The input is a non-seekable fastbuf stream.
+- `SORT_INPUT_PRESORT` -- The input is a custom presorter. In this
+ case, you need to write a presorting function `int
+ SORT_PREFIX(presort)(struct fastbuf *dest, void *buf, size_t
+ bufsize)`. The function gets a buffer @buf of size @buf_size to
+ presort in and is supposed to write presorted bunch of data into the
+ @dest buffer. Should return `1` on success or `0` on `EOF` (all it
+ could was already written, no more data). In this case, you can
+ safely pass NULL as the input parameter. The function may be used to
+ generate the data on the fly. The function does not have to be
+ thread safe (it can access global variables).
+
+If you define `SORT_DELETE_INPUT` and it evaluates to true (nonzero),
+the input files are deleted as soon as possible.
+
+[[output-external]]
+Specifying output
+~~~~~~~~~~~~~~~~~
+
+You can configure the output in a similar way. Define one of macros:
+
+- `SORT_OUTPUT_FILE` -- The function takes a filename.
+- `SORT_OUTPUT_FB` -- The function should be provided with NULL and
+ the fastbuf with data is returned.
+- `SORT_THIS_FB` -- A fastbuf is provided to the function and it
+ writes into it. It can already contain some data.
+
+[[other-external]]
+Other switches
+~~~~~~~~~~~~~~
+
+You may define the `SORT_UNIQUE` macro if all keys are distinct. It is
+checked in debug mode.
+
+[[function-external]]
+The generated function
+~~~~~~~~~~~~~~~~~~~~~~
+
+A `SORT_PREFIX(sort)()` function is generated after you include the
+`sorter/sorter.h` header. It has up to three parameters:
+
+- Input. It is either a string (a filename) if you use
+ `SORT_INPUT_FILE` or a fastbuf (otherwise). It should be set to NULL
+ if you use the `SORT_INPUT_PRESORT` input.
+- Output. It is either a string (a filename) if you defined the
+ `SORT_OUTPUT_FILE` or a fastbuf. It must be NULL if you defined
+ `SORT_OUTPUT_FB`.
+- Integer range. The maximum value of integers that are used in the
+ <<integer-external,integer sorting>>. This parameter is here only
+ if you defined `SORT_INT` or `SORT_INT64`.
+
+The function returns a fastbuf you can read the data from.
--- /dev/null
+Fast access to unaligned data
+=============================
+
+Sometimes it is useful to access values which are not correctly aligned.
+To avoid slow copying to aligned buffers, we define several optimized read/write
+functions for accessing such integer values.
+
+!!ucw/unaligned.h
--- /dev/null
+Multi-byte characters
+=====================
+
+!!ucw/unicode.h
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This allocator is optimized for intensive allocation and freeing of small
+ * blocks of identical sizes. System memory is allocated by multiples of the
+ * page size and it is returned back only when the whole eltpool is deleted.
+ *
+ * In the future, we can add returning of memory to the system and also cache
+ * coloring like in the SLAB allocator used in the Linux kernel.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/eltpool.h"
+
+struct eltpool *
+ep_new(uns elt_size, uns elts_per_chunk)
+{
+ struct eltpool *pool = xmalloc_zero(sizeof(*pool));
+ pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN);
+ pool->chunk_size = CPU_PAGE_SIZE;
+ while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size)
+ pool->chunk_size *= 2;
+ pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size;
+ DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk);
+ return pool;
+}
+
+void
+ep_delete(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch;
+ while (ch = pool->first_chunk)
+ {
+ pool->first_chunk = ch->next;
+ page_free(ch, pool->chunk_size);
+ }
+ xfree(pool);
+}
+
+void *
+ep_alloc_slow(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch = page_alloc(pool->chunk_size);
+ void *p = (void *)(ch+1);
+ for (uns i=1; i<pool->elts_per_chunk; i++)
+ {
+ struct eltpool_free *f = p;
+ f->next = pool->first_free;
+ pool->first_free = f;
+ p += pool->elt_size;
+ }
+ ch->next = pool->first_chunk;
+ pool->first_chunk = ch;
+ pool->num_chunks++;
+ return p;
+}
+
+u64
+ep_total_size(struct eltpool *pool)
+{
+ return (u64)pool->num_chunks * pool->chunk_size + sizeof(*pool);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include "ucw/clists.h"
+
+struct argh {
+ cnode n;
+ byte x[1];
+} PACKED;
+
+int main(void)
+{
+ struct eltpool *ep = ep_new(sizeof(struct argh), 64);
+ clist l;
+ clist_init(&l);
+ for (uns i=0; i<65536; i++)
+ {
+ struct argh *a = ep_alloc(ep);
+ if (i % 3)
+ clist_add_tail(&l, &a->n);
+ else
+ clist_add_head(&l, &a->n);
+ if (!(i % 5))
+ {
+ a = clist_head(&l);
+ clist_remove(&a->n);
+ ep_free(ep, a);
+ }
+ }
+ ep_delete(ep);
+ puts("OK");
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ELTPOOL_H
+#define _UCW_ELTPOOL_H
+
+/***
+ * [[defs]]
+ * Definitions
+ * -----------
+ ***/
+
+/**
+ * Memory pool of fixed-sized elements.
+ * You should use this one as an opaque handle only, the insides are internal.
+ **/
+struct eltpool {
+ struct eltpool_chunk *first_chunk;
+ struct eltpool_free *first_free;
+ uns elt_size;
+ uns chunk_size;
+ uns elts_per_chunk;
+ uns num_allocated; // Just for debugging
+ uns num_chunks;
+};
+
+struct eltpool_chunk {
+ struct eltpool_chunk *next;
+ /* Chunk data continue here */
+};
+
+struct eltpool_free {
+ struct eltpool_free *next;
+};
+
+/***
+ * [[basic]]
+ * Basic manipulation
+ * ------------------
+ ***/
+
+/**
+ * Create a new memory pool for elements of @elt_size bytes.
+ * The pool will allocate chunks of at least @elts_per_chunk elements.
+ * Higher numbers lead to better allocation times but also to bigger
+ * unused memory blocks. Call @ep_delete() to free all pool's resources.
+ **/
+struct eltpool *ep_new(uns elt_size, uns elts_per_chunk);
+
+/**
+ * Release a memory pool created by @ep_new() including all
+ * elements allocated from that pool.
+ **/
+void ep_delete(struct eltpool *pool);
+
+/**
+ * Return the total number of bytes allocated by a given
+ * memory pool including all internals.
+ **/
+u64 ep_total_size(struct eltpool *pool);
+
+/***
+ * [[alloc]]
+ * Allocation routines
+ * -------------------
+ ***/
+
+void *ep_alloc_slow(struct eltpool *pool); /* Internal. Do not call directly. */
+/**
+ * Allocate a new element on a given memory pool.
+ * The results is always aligned to a multiple of the element's size.
+ **/
+static inline void *ep_alloc(struct eltpool *pool)
+{
+ pool->num_allocated++;
+#ifdef CONFIG_FAKE_ELTPOOL
+ return xmalloc(pool->elt_size);
+#else
+ struct eltpool_free *elt;
+ if (elt = pool->first_free)
+ pool->first_free = elt->next;
+ else
+ elt = ep_alloc_slow(pool);
+ return elt;
+#endif
+}
+
+/**
+ * Release an element previously allocated by @ep_alloc().
+ * Note thet the memory is not really freed (until @mp_delete()),
+ * but it can be reused by future @ep_alloc()'s.
+ **/
+static inline void ep_free(struct eltpool *pool, void *p)
+{
+ pool->num_allocated--;
+#ifdef CONFIG_FAKE_ELTPOOL
+ (void) pool;
+ xfree(p);
+#else
+ struct eltpool_free *elt = p;
+ elt->next = pool->first_free;
+ pool->first_free = elt;
+#endif
+}
+
+#endif
--- /dev/null
+# Tests for eltpools
+
+Run: ../obj/ucw/eltpool-t
+Out: OK
--- /dev/null
+/*
+ * UCW Library -- Formatting of Process Exit Status
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdio.h>
+#include <sys/wait.h>
+#include <errno.h>
+
+int
+format_exit_status(char *msg, int stat)
+{
+ if (stat < 0)
+ sprintf(msg, "failed to fork (err=%d)", errno);
+ else if (WIFEXITED(stat) && WEXITSTATUS(stat) < 256)
+ {
+ if (WEXITSTATUS(stat))
+ sprintf(msg, "died with exit code %d", WEXITSTATUS(stat));
+ else
+ {
+ msg[0] = 0;
+ return 0;
+ }
+ }
+ else if (WIFSIGNALED(stat))
+ sprintf(msg, "died on signal %d", WTERMSIG(stat));
+ else
+ sprintf(msg, "died with status %x", stat);
+ return 1;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void bclose(struct fastbuf *f)
+{
+ if (f)
+ {
+ bflush(f);
+ if (f->close)
+ f->close(f);
+ }
+}
+
+void bflush(struct fastbuf *f)
+{
+ if (f->bptr > f->bstop)
+ f->spout(f);
+ else if (f->bstop > f->buffer)
+ f->bptr = f->bstop = f->buffer;
+}
+
+inline void bsetpos(struct fastbuf *f, ucw_off_t pos)
+{
+ /* We can optimize seeks only when reading */
+ if (pos >= f->pos - (f->bstop - f->buffer) && pos <= f->pos)
+ f->bptr = f->bstop + (pos - f->pos);
+ else
+ {
+ bflush(f);
+ if (!f->seek || !f->seek(f, pos, SEEK_SET))
+ die("bsetpos: stream not seekable");
+ }
+}
+
+void bseek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ switch (whence)
+ {
+ case SEEK_SET:
+ return bsetpos(f, pos);
+ case SEEK_CUR:
+ return bsetpos(f, btell(f) + pos);
+ case SEEK_END:
+ bflush(f);
+ if (!f->seek || !f->seek(f, pos, SEEK_END))
+ die("bseek: stream not seekable");
+ break;
+ default:
+ die("bseek: invalid whence=%d", whence);
+ }
+}
+
+int bgetc_slow(struct fastbuf *f)
+{
+ if (f->bptr < f->bstop)
+ return *f->bptr++;
+ if (!f->refill(f))
+ return -1;
+ return *f->bptr++;
+}
+
+int bpeekc_slow(struct fastbuf *f)
+{
+ if (f->bptr < f->bstop)
+ return *f->bptr;
+ if (!f->refill(f))
+ return -1;
+ return *f->bptr;
+}
+
+void bputc_slow(struct fastbuf *f, uns c)
+{
+ if (f->bptr >= f->bufend)
+ f->spout(f);
+ *f->bptr++ = c;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check)
+{
+ uns total = 0;
+ while (l)
+ {
+ uns k = f->bstop - f->bptr;
+
+ if (!k)
+ {
+ f->refill(f);
+ k = f->bstop - f->bptr;
+ if (!k)
+ break;
+ }
+ if (k > l)
+ k = l;
+ memcpy(b, f->bptr, k);
+ f->bptr += k;
+ b = (byte *)b + k;
+ l -= k;
+ total += k;
+ }
+ if (check && total && l)
+ die("breadb: short read");
+ return total;
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l)
+{
+ while (l)
+ {
+ uns k = f->bufend - f->bptr;
+
+ if (!k)
+ {
+ f->spout(f);
+ k = f->bufend - f->bptr;
+ }
+ if (k > l)
+ k = l;
+ memcpy(f->bptr, b, k);
+ f->bptr += k;
+ b = (byte *)b + k;
+ l -= k;
+ }
+}
+
+void
+bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+ while (l)
+ {
+ byte *fptr, *tptr;
+ uns favail, tavail, n;
+
+ favail = bdirect_read_prepare(f, &fptr);
+ if (!favail)
+ {
+ if (l == ~0U)
+ return;
+ die("bbcopy: source exhausted");
+ }
+ tavail = bdirect_write_prepare(t, &tptr);
+ n = MIN(l, favail);
+ n = MIN(n, tavail);
+ memcpy(tptr, fptr, n);
+ bdirect_read_commit(f, fptr + n);
+ bdirect_write_commit(t, tptr + n);
+ if (l != ~0U)
+ l -= n;
+ }
+}
+
+int
+bconfig(struct fastbuf *f, uns item, int value)
+{
+ return f->config ? f->config(f, item, value) : -1;
+}
+
+void
+brewind(struct fastbuf *f)
+{
+ bflush(f);
+ bsetpos(f, 0);
+}
+
+int
+bskip_slow(struct fastbuf *f, uns len)
+{
+ while (len)
+ {
+ byte *buf;
+ uns l = bdirect_read_prepare(f, &buf);
+ if (!l)
+ return 0;
+ l = MIN(l, len);
+ bdirect_read_commit(f, buf+l);
+ len -= l;
+ }
+ return 1;
+}
+
+ucw_off_t
+bfilesize(struct fastbuf *f)
+{
+ if (!f)
+ return 0;
+ ucw_off_t pos = btell(f);
+ bflush(f);
+ if (!f->seek(f, 0, SEEK_END))
+ return -1;
+ ucw_off_t len = btell(f);
+ bsetpos(f, pos);
+ return len;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O
+ *
+ * (c) 1997--2008 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FASTBUF_H
+#define _UCW_FASTBUF_H
+
+#include <string.h>
+#include <alloca.h>
+
+/***
+ * === Internal structure [[internal]]
+ *
+ * Generally speaking, a fastbuf consists of a buffer and a set of callbacks.
+ * All front-end functions operate on the buffer and if the buffer becomes
+ * empty or fills up, they ask the corresponding callback to handle the
+ * situation. Back-ends then differ just in the definition of the callbacks.
+ *
+ * The state of the fastbuf is represented by a <<struct_fastbuf,`struct fastbuf`>>,
+ * which is a simple structure describing the state of the buffer (the pointers
+ * `buffer`, `bufend`), the front-end cursor (`bptr`), the back-end cursor (`bstop`),
+ * position of the back-end cursor in the file (`pos`), some flags (`flags`)
+ * and pointers to the callback functions.
+ *
+ * The buffer can be in one of the following states:
+ *
+ * 1. Flushed:
+ *
+ * +------------------------------------+---------------------------+
+ * | unused | free space |
+ * +------------------------------------+---------------------------+
+ * ^ ^ ^ ^
+ * buffer <= bstop (BE pos) <= bptr (FE pos) <= bufend
+ *
+ * * This schema describes a fastbuf after its initialization or bflush().
+ * * There is no cached data and we are ready for any read or write operation
+ * (well, only if the back-end supports it).
+ * * The interval `[bptr, bufend]` can be used by front-ends
+ * for writing. If it is empty, the `spout` callback gets called
+ * upon the first write attempt to allocate a new buffer. Otherwise
+ * the fastbuf silently comes to the writing mode.
+ * * When a front-end needs to read something, it calls the `refill` callback.
+ * * The pointers can be either all non-`NULL` or all NULL.
+ * * `bstop == bptr` in most back-ends, but it is not necessary. Some
+ * in-memory streams take advantage of this.
+ *
+ * 2. Reading:
+ *
+ * +------------------------------------+---------------------------+
+ * | read data | unused |
+ * +------------------------------------+---------------------------+
+ * ^ ^ ^ ^
+ * buffer <= bptr (FE pos) <= bstop (BE pos) <= bufend
+ *
+ * * If we try to read something, we get to the reading mode.
+ * * No writing is allowed until a flush operation. But note that @bflush()
+ * will simply set `bptr` to `bstop` before `spout`
+ * and it breaks the position of the front-end's cursor,
+ * so the user should seek afwards.
+ * * The interval `[buffer, bstop]` contains a block of data read by the back-end.
+ * `bptr` is the front-end's cursor which points to the next character to be read.
+ * After the last character is read, `bptr == bstop` and the `refill` callback
+ * gets called upon the next read attempt to bring further data.
+ * This gives us an easy way how to implement @bungetc().
+ *
+ * 3. Writing:
+ *
+ * +-----------------------+----------------+-----------------------+
+ * | unused | written data | free space |
+ * +-----------------------+----------------+-----------------------+
+ * ^ ^ ^ ^
+ * buffer <= bstop (BE pos) < bptr (FE pos) <= bufend
+ *
+ * * This schema corresponds to the situation after a write attempt.
+ * * No reading is allowed until a flush operation.
+ * * The `bptr` points at the position where the next character
+ * will be written to. When we want to write, but `bptr == bufend`, we call
+ * the `spout` hook to flush the witten data and get an empty buffer.
+ * * `bstop` usually points at the beginning of the written data,
+ * but it is not necessary.
+ *
+ *
+ * Rules for back-ends:
+ *
+ * - Front-ends are only allowed to change the value of `bptr`, some flags
+ * and if a fatal error occurs, then also `bstop`. Back-ends can rely on it.
+ * - `buffer <= bstop <= bufend` and `buffer <= bptr <= bufend`.
+ * - `pos` should be the real position in the file corresponding to the location of `bstop` in the buffer.
+ * It can be modified by any back-end's callback, but the position of `bptr` (`pos + (bptr - bstop)`)
+ * must stay unchanged after `refill` or `spout`.
+ * - Failed callbacks (except `close`) should use @bthrow().
+ * - Any callback pointer may be NULL in case the callback is not implemented.
+ * - Callbacks can change not only `bptr` and `bstop`, but also the location and size of the buffer;
+ * the fb-mem back-end takes advantage of it.
+ *
+ * - Initialization:
+ * * out: `buffer <= bstop <= bptr <= bufend` (flushed).
+ *
+ * - `refill`:
+ * * in: `buffer <= bstop <= bptr <= bufend` (reading or flushed).
+ * * out: `buffer <= bptr <= bstop <= bufend` (reading).
+ * * Resulting `bptr == bstop` signals the end of file.
+ * The next reading attempt will again call `refill` which can succeed this time.
+ * * The callback must also return zero on EOF (iff `bptr == bstop`).
+ *
+ * - `spout`:
+ * * in: `buffer <= bstop <= bptr <= bufend` (writing or flushed).
+ * * out: `buffer <= bstop <= bptr < bufend` (flushed).
+ *
+ * - `seek`:
+ * * in: `buffer <= bstop <= bptr <= bufend` (flushed).
+ * * in: `(ofs >= 0 && whence == SEEK_SET) || (ofs <= 0 && whence == SEEK_END)`.
+ * * out: `buffer <= bstop <= bptr <= bufend` (flushed).
+ *
+ * - `close`:
+ * * in: `buffer <= bstop <= bptr <= bufend` (flushed or after @bthrow()).
+ * * `close` must always free all internal structures, even when it throws an exception.
+ ***/
+
+/**
+ * This structure contains the state of the fastbuf. See the discussion above
+ * for how it works.
+ **/
+struct fastbuf {
+ byte is_fastbuf[0]; /* Dummy field for checking of type casts */
+ byte *bptr, *bstop; /* State of the buffer */
+ byte *buffer, *bufend; /* Start and end of the buffer */
+ char *name; /* File name (used for error messages) */
+ ucw_off_t pos; /* Position of bstop in the file */
+ int (*refill)(struct fastbuf *); /* Get a buffer with new data, returns 0 on EOF */
+ void (*spout)(struct fastbuf *); /* Write buffer data to the file */
+ int (*seek)(struct fastbuf *, ucw_off_t, int);/* Slow path for @bseek(), buffer already flushed; returns success */
+ void (*close)(struct fastbuf *); /* Close the stream */
+ int (*config)(struct fastbuf *, uns, int); /* Configure the stream */
+ int can_overwrite_buffer; /* Can the buffer be altered? 0=never, 1=temporarily, 2=permanently */
+};
+
+/***
+ * === Fastbuf on files [[fbparam]]
+ *
+ * If you want to use fastbufs to access files, you can choose one of several
+ * back-ends and set their parameters.
+ ***/
+
+/**
+ * Back-end types
+ */
+enum fb_type {
+ FB_STD, /* Standard buffered I/O */
+ FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
+ FB_MMAP /* Memory mapped files */
+};
+
+/**
+ * When you open a file fastbuf, you can use this structure to select a back-end
+ * and set its parameters. If you want just an "ordinary" file stream, you can
+ * happily pass NULL instead and the defaults from the configuration file (or
+ * hard-wired defaults if no config file has been read) will be used.
+ */
+struct fb_params {
+ enum fb_type type; /* The chosen back-end */
+ uns buffer_size; /* 0 for default size */
+ uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
+ uns read_ahead; /* FB_DIRECT options */
+ uns write_back;
+ struct asio_queue *asio;
+};
+
+struct cf_section;
+extern struct cf_section fbpar_cf; /** Configuration section with which you can fill the `fb_params` **/
+extern struct fb_params fbpar_def; /** The default `fb_params` **/
+
+/**
+ * Opens a file with file mode @mode (see the man page of open()).
+ * Use @params to select the fastbuf back-end and its parameters or
+ * pass NULL if you are fine with defaults.
+ *
+ * Dies if the file does not exist.
+ **/
+struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params);
+struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); /** Like bopen_file(), but returns NULL on failure. **/
+
+/**
+ * Opens a temporary file.
+ * It is placed with other temp files and it is deleted when closed.
+ * Again, use NULL for @params if you want the defaults.
+ **/
+struct fastbuf *bopen_tmp_file(struct fb_params *params);
+
+/**
+ * Creates a fastbuf from a file descriptor @fd and sets its filename
+ * to @name (the name is used only in error messages).
+ * When the fastbuf is closed, the fd is closed as well. You can override
+ * this behavior by calling @bconfig().
+ */
+struct fastbuf *bopen_fd_name(int fd, struct fb_params *params, const char *name);
+static inline struct fastbuf *bopen_fd(int fd, struct fb_params *params) /** Same as above, but with an auto-generated filename. **/
+{
+ return bopen_fd_name(fd, params, NULL);
+}
+
+/**
+ * Flushes all buffers and makes sure that they are written to the disk.
+ **/
+void bfilesync(struct fastbuf *b);
+
+/***
+ * === Fastbufs on regular files [[fbfile]]
+ *
+ * If you want to use the `FB_STD` back-end and not worry about setting
+ * up any parameters, there is a couple of shortcuts.
+ ***/
+
+struct fastbuf *bopen(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file() with `FB_STD` back-end. **/
+struct fastbuf *bopen_try(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file_try() with `FB_STD` back-end. **/
+struct fastbuf *bopen_tmp(uns buflen); /** Equivalent to @bopen_tmp_file() with `FB_STD` back-end. **/
+struct fastbuf *bfdopen(int fd, uns buflen); /** Equivalent to @bopen_fd() with `FB_STD` back-end. **/
+struct fastbuf *bfdopen_shared(int fd, uns buflen); /** Like @bfdopen(), but it does not close the @fd on @bclose(). **/
+
+/***
+ * === Temporary files [[fbtemp]]
+ *
+ * Usually, @bopen_tmp_file() is the best way how to come to a temporary file.
+ * However, in some specific cases you can need more, so there is also a set
+ * of more general functions.
+ ***/
+
+#define TEMP_FILE_NAME_LEN 256
+
+/**
+ * Generates a temporary filename and stores it to the @name_buf (of size
+ * at least * `TEMP_FILE_NAME_LEN`). If @open_flags are not NULL, flags that
+ * should be OR-ed with other flags to open() will be stored there.
+ *
+ * The location and style of temporary files is controlled by the configuration.
+ * By default, the system temp directory (`$TMPDIR` or `/tmp`) is used.
+ *
+ * If the location is a publicly writeable directory (like `/tmp`), the
+ * generated filename cannot be guaranteed to be unique, so @open_flags
+ * will include `O_EXCL` and you have to check the result of open() and
+ * iterate if needed.
+ *
+ * This function is not specific to fastbufs, it can be used separately.
+ **/
+void temp_file_name(char *name_buf, int *open_flags);
+
+/**
+ * Opens a temporary file and returns its file descriptor.
+ * You specify the file @mode and @open_flags passed to open().
+ *
+ * If the @name_buf (of at last `TEMP_FILE_NAME_LEN` chars) is not NULL,
+ * the filename is also stored in it.
+ *
+ * This function is not specific to fastbufs, it can be used separately.
+ */
+int open_tmp(char *name_buf, int open_flags, int mode);
+
+/**
+ * Sometimes, a file is created as temporary and then moved to a stable
+ * location. This function takes a fastbuf created by @bopen_tmp_file()
+ * or @bopen_tmp(), marks it as permanent, closes it and renames it to
+ * @name.
+ *
+ * Please note that it assumes that the temporary file and the @name
+ * are on the same volume (otherwise, rename() fails), so you might
+ * want to configure a special location for the temporary files
+ * beforehand.
+ */
+void bfix_tmp_file(struct fastbuf *fb, const char *name);
+
+/* Internal functions of some file back-ends */
+
+struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
+struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
+
+#ifdef CONFIG_UCW_FB_DIRECT
+extern uns fbdir_cheat;
+struct asio_queue;
+struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
+#endif
+
+void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
+
+/***
+ * === Fastbufs on file fragments [[fblim]]
+ *
+ * The `fblim` back-end reads from a file handle, but at most a given
+ * number of bytes. This is frequently used for reading from sockets.
+ ***/
+
+struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit); /** Create a fastbuf which reads at most @limit bytes from @fd. **/
+
+/***
+ * === Fastbufs on in-memory streams [[fbmem]]
+ *
+ * The `fbmem` back-end keeps the whole contents of the stream
+ * in memory (as a linked list of memory blocks, so address space
+ * fragmentation is avoided).
+ *
+ * First, you use @fbmem_create() to create the stream and the fastbuf
+ * used for writing to it. Then you can call @fbmem_clone_read() to get
+ * an arbitrary number of fastbuf for reading from the stream.
+ ***/
+
+struct fastbuf *fbmem_create(uns blocksize); /** Create stream and return its writing fastbuf. **/
+struct fastbuf *fbmem_clone_read(struct fastbuf *f); /** Given a writing fastbuf, create a new reading fastbuf. **/
+
+/***
+ * === Fastbufs on static buffers [[fbbuf]]
+ *
+ * The `fbbuf` back-end stores the stream in a given block of memory.
+ * This is useful for parsing and generating of complex data structures.
+ ***/
+
+/**
+ * Creates a read-only fastbuf that takes its data from a given buffer.
+ * The fastbuf structure is allocated by the caller and pointed to by @f.
+ * The @buffer and @size specify the location and size of the buffer.
+ *
+ * In some cases, the front-ends can take advantage of rewriting the contents
+ * of the buffer temporarily. In this case, set @can_overwrite as described
+ * in <<internal,Internals>>. If you do not care, keep @can_overwrite zero.
+ *
+ * It is not possible to close this fastbuf.
+ */
+void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
+
+/**
+ * Creates a write-only fastbuf which writes into a provided memory buffer.
+ * The fastbuf structure is allocated by the caller and pointed to by @f.
+ * An attempt to write behind the end of the buffer dies.
+ *
+ * Data are written directly into the buffer, so it is not necessary to call @bflush()
+ * at any moment.
+ *
+ * It is not possible to close this fastbuf.
+ */
+void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
+
+static inline uns fbbuf_count_written(struct fastbuf *f) /** Calculates, how many bytes were already written into the buffer. **/
+{
+ return f->bptr - f->bstop;
+}
+
+/***
+ * === Fastbuf on recyclable growing buffers [[fbgrow]]
+ *
+ * The `fbgrow` back-end keeps the stream in a contiguous buffer stored in the
+ * main memory, but unlike <<fbmem,`fbmem`>>, the buffer does not have a fixed
+ * size and it is expanded to accomodate all data.
+ *
+ * At every moment, you can use `fastbuf->buffer` to gain access to the stream.
+ ***/
+
+struct fastbuf *fbgrow_create(unsigned basic_size); /** Create the growing buffer pre-allocated to @basic_size bytes. **/
+void fbgrow_reset(struct fastbuf *b); /** Reset stream and prepare for writing. **/
+void fbgrow_rewind(struct fastbuf *b); /** Prepare for reading (of already written data). **/
+
+/***
+ * === Fastbuf on memory pools [[fbpool]]
+ *
+ * The write-only `fbpool` back-end also keeps the stream in a contiguous
+ * buffer, but this time the buffer is allocated from within a memory pool.
+ ***/
+
+struct mempool;
+struct fbpool { /** Structure for fastbufs & mempools. **/
+ struct fastbuf fb;
+ struct mempool *mp;
+};
+
+/**
+ * Initialize a new `fbpool`. The structure is allocated by the caller.
+ **/
+void fbpool_init(struct fbpool *fb); /** Initialize a new mempool fastbuf. **/
+/**
+ * Start a new continuous block and prepare for writing (see <<mempool:mp_start()>>).
+ * Provide the memory pool you want to use for this block as @mp.
+ **/
+void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
+/**
+ * Close the block and return the address of its start (see <<mempool:mp_end()>>).
+ * The length can be determined by calling <<mempool:mp_size(mp, ptr)>>.
+ **/
+void *fbpool_end(struct fbpool *fb);
+
+/***
+ * === Atomic files for multi-threaded programs [[fbatomic]]
+ *
+ * This fastbuf backend is designed for cases when several threads
+ * of a single program append records to a common file and while the
+ * record can mix in an arbitrary way, the bytes inside a single
+ * record must remain uninterrupted.
+ *
+ * In case of files with fixed record size, we just allocate the
+ * buffer to hold a whole number of records and take advantage
+ * of the atomicity of the write() system call.
+ *
+ * With variable-sized records, we need another solution: when
+ * writing a record, we keep the fastbuf in a locked state, which
+ * prevents buffer flushing (and if the buffer becomes full, we extend it),
+ * and we wait for an explicit commit operation which write()s the buffer
+ * if the free space in the buffer falls below the expected maximum record
+ * length.
+ *
+ * Please note that initialization of the clones is not thread-safe,
+ * so you have to serialize it yourself.
+ ***/
+
+struct fb_atomic {
+ struct fastbuf fb;
+ struct fb_atomic_file *af;
+ byte *expected_max_bptr;
+ uns slack_size;
+};
+#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
+
+/**
+ * Open an atomic fastbuf.
+ * If @master is NULL, the file @name is opened. If it is non-null,
+ * a new clone of an existing atomic fastbuf is created.
+ *
+ * If the file has fixed record length, just set @record_len to it.
+ * Otherwise set @record_len to the expected maximum record length
+ * with a negative sign (you need not fit in this length, but as long
+ * as you do, the fastbuf is more efficient) and call @fbatomic_commit()
+ * after each record.
+ *
+ * You can specify @record_len, if it is known (for optimisations).
+ *
+ * The file is closed when all fastbufs using it are closed.
+ **/
+struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
+void fbatomic_internal_write(struct fastbuf *b);
+
+/**
+ * Declare that you have finished writing a record. This is required only
+ * if a fixed record size was not specified.
+ **/
+static inline void fbatomic_commit(struct fastbuf *b)
+{
+ if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
+ fbatomic_internal_write(b);
+}
+
+/*** === Configuring stream parameters [[bconfig]] ***/
+
+enum bconfig_type { /** Parameters that could be configured. **/
+ BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
+ BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
+};
+
+int bconfig(struct fastbuf *f, uns type, int data); /** Configure a fastbuf. Returns previous value. **/
+
+/*** === Universal functions working on all fastbuf's [[ffbasic]] ***/
+
+/**
+ * Close and free fastbuf.
+ * Can not be used for fastbufs not returned from function (initialized in a parameter, for example the one from `fbbuf_init_read`).
+ */
+void bclose(struct fastbuf *f);
+void bflush(struct fastbuf *f); /** Write data (if it makes any sense, do not use for in-memory buffers). **/
+void bseek(struct fastbuf *f, ucw_off_t pos, int whence); /** Seek in the buffer. See `man fseek` for description of @whence. Only for seekable fastbufs. **/
+void bsetpos(struct fastbuf *f, ucw_off_t pos); /** Set position to @pos bytes from beginning. Only for seekable fastbufs. **/
+void brewind(struct fastbuf *f); /** Go to the beginning of the fastbuf. Only for seekable ones. **/
+ucw_off_t bfilesize(struct fastbuf *f); /** How large is the file? -1 if not seekable. **/
+
+static inline ucw_off_t btell(struct fastbuf *f) /** Where am I (from the beginning)? **/
+{
+ return f->pos + (f->bptr - f->bstop);
+}
+
+int bgetc_slow(struct fastbuf *f);
+static inline int bgetc(struct fastbuf *f) /** Return next character from the buffer. **/
+{
+ return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
+}
+
+int bpeekc_slow(struct fastbuf *f);
+static inline int bpeekc(struct fastbuf *f) /** Return next character from the buffer, but keep the current position. **/
+{
+ return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
+}
+
+static inline void bungetc(struct fastbuf *f) /** Return last read character back. Only one back is guaranteed to work. **/
+{
+ f->bptr--;
+}
+
+void bputc_slow(struct fastbuf *f, uns c);
+static inline void bputc(struct fastbuf *f, uns c) /** Write a single character. **/
+{
+ if (f->bptr < f->bufend)
+ *f->bptr++ = c;
+ else
+ bputc_slow(f, c);
+}
+
+static inline uns bavailr(struct fastbuf *f) /** Return the length of the cached data to be read. Do not use directly. **/
+{
+ return f->bstop - f->bptr;
+}
+
+static inline uns bavailw(struct fastbuf *f) /** Return the length of the buffer available for writing. Do not use directly. **/
+{
+ return f->bufend - f->bptr;
+}
+
+uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
+/**
+ * Read at most @l bytes of data into @b.
+ * Returns number of bytes read.
+ * 0 means end of file.
+ */
+static inline uns bread(struct fastbuf *f, void *b, uns l)
+{
+ if (bavailr(f) >= l)
+ {
+ memcpy(b, f->bptr, l);
+ f->bptr += l;
+ return l;
+ }
+ else
+ return bread_slow(f, b, l, 0);
+}
+
+/**
+ * Reads exactly @l bytes of data into @b.
+ * If at the end of file, it returns 0.
+ * If there are data, but less than @l, it dies.
+ */
+static inline uns breadb(struct fastbuf *f, void *b, uns l)
+{
+ if (bavailr(f) >= l)
+ {
+ memcpy(b, f->bptr, l);
+ f->bptr += l;
+ return l;
+ }
+ else
+ return bread_slow(f, b, l, 1);
+}
+
+void bwrite_slow(struct fastbuf *f, const void *b, uns l);
+static inline void bwrite(struct fastbuf *f, const void *b, uns l) /** Writes buffer @b of length @l into fastbuf. **/
+{
+ if (bavailw(f) >= l)
+ {
+ memcpy(f->bptr, b, l);
+ f->bptr += l;
+ }
+ else
+ bwrite_slow(f, b, l);
+}
+
+/**
+ * Reads a line into @b and strips trailing `\n`.
+ * Returns pointer to the terminating 0 or NULL on `EOF`.
+ * Dies if the line is longer than @l.
+ **/
+char *bgets(struct fastbuf *f, char *b, uns l);
+char *bgets0(struct fastbuf *f, char *b, uns l); /** The same as @bgets(), but for 0-terminated strings. **/
+/**
+ * Returns either length of read string (excluding the terminator) or -1 if it is too long.
+ * In such cases exactly @l bytes are read.
+ */
+int bgets_nodie(struct fastbuf *f, char *b, uns l);
+
+struct mempool;
+struct bb_t;
+/**
+ * Read a string, strip the trailing `\n` and store it into growing buffer @b.
+ * Dies if the line is longer than @limit.
+ **/
+uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
+/**
+ * Read a string, strip the trailing `\n` and store it into buffer allocated from a memory pool.
+ **/
+char *bgets_mp(struct fastbuf *f, struct mempool *mp);
+
+struct bgets_stk_struct {
+ struct fastbuf *f;
+ byte *old_buf, *cur_buf, *src;
+ uns old_len, cur_len, src_len;
+};
+void bgets_stk_init(struct bgets_stk_struct *s);
+void bgets_stk_step(struct bgets_stk_struct *s);
+
+/**
+ * Read a string, strip the trailing `\n` and store it on the stack (allocated using alloca()).
+ **/
+#define bgets_stk(fb) \
+ ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
+
+/**
+ * Write a string, without 0 or `\n` at the end.
+ **/
+static inline void bputs(struct fastbuf *f, const char *b)
+{
+ bwrite(f, b, strlen(b));
+}
+
+/**
+ * Write string, including terminating 0.
+ **/
+static inline void bputs0(struct fastbuf *f, const char *b)
+{
+ bwrite(f, b, strlen(b)+1);
+}
+
+/**
+ * Write string and append a newline to the end.
+ **/
+static inline void bputsn(struct fastbuf *f, const char *b)
+{
+ bputs(f, b);
+ bputc(f, '\n');
+}
+
+void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
+/**
+ * Copy @l bytes of data from fastbuf @f to fastbuf @t.
+ * `UINT_MAX` (`~0U`) means all data, even if more than `UINT_MAX` bytes remain.
+ **/
+static inline void bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
+{
+ if (bavailr(f) >= l && bavailw(t) >= l)
+ {
+ memcpy(t->bptr, f->bptr, l);
+ t->bptr += l;
+ f->bptr += l;
+ }
+ else
+ bbcopy_slow(f, t, l);
+}
+
+int bskip_slow(struct fastbuf *f, uns len);
+static inline int bskip(struct fastbuf *f, uns len) /** Skip @len bytes without reading them. **/
+{
+ if (bavailr(f) >= len)
+ {
+ f->bptr += len;
+ return 1;
+ }
+ else
+ return bskip_slow(f, len);
+}
+
+/*** === Direct I/O on buffers ***/
+
+/**
+ * Begin direct reading from fastbuf's internal buffer to avoid unnecessary copying.
+ * The function returns a buffer @buf together with its length in bytes (zero means EOF)
+ * with cached data to be read.
+ *
+ * Some back-ends allow the user to modify the data in the returned buffer to avoid unnecessary.
+ * If the back-end allows such modifications, it can set `f->can_overwrite_buffer` accordingly:
+ *
+ * - 0 if no modification is allowed,
+ * - 1 if the user can modify the buffer on the condition that
+ * the modifications will be undone before calling the next
+ * fastbuf operation
+ * - 2 if the user is allowed to overwrite the data in the buffer
+ * if @bdirect_read_commit_modified() is called afterwards.
+ * In this case, the back-end must be prepared for trimming
+ * of the buffer which is done by the commit function.
+ *
+ * The reading must be ended by @bdirect_read_commit() or @bdirect_read_commit_modified(),
+ * unless the user did not read or modify anything.
+ **/
+static inline uns bdirect_read_prepare(struct fastbuf *f, byte **buf)
+{
+ if (f->bptr == f->bstop && !f->refill(f))
+ {
+ *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings
+ return 0;
+ }
+ *buf = f->bptr;
+ return bavailr(f);
+}
+
+/**
+ * End direct reading started by @bdirect_read_prepare() and move the cursor at @pos.
+ * Data in the returned buffer must be same as after @bdirect_read_prepare() and
+ * @pos must point somewhere inside the buffer.
+ **/
+static inline void bdirect_read_commit(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+}
+
+/**
+ * Similar to @bdirect_read_commit(), but accepts also modified data before @pos.
+ * Note that such modifications are supported only if `f->can_overwrite_buffer == 2`.
+ **/
+static inline void bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+ f->buffer = pos; /* Avoid seeking backwards in the buffer */
+}
+
+/**
+ * Start direct writing to fastbuf's internal buffer to avoid copy overhead.
+ * The function returns the length of the buffer in @buf (at least one byte)
+ * where we can write to. The operation must be ended by @bdirect_write_commit(),
+ * unless nothing is written.
+ **/
+static inline uns bdirect_write_prepare(struct fastbuf *f, byte **buf)
+{
+ if (f->bptr == f->bufend)
+ f->spout(f);
+ *buf = f->bptr;
+ return bavailw(f);
+}
+
+/**
+ * Commit the data written to the buffer returned by @bdirect_write_prepare().
+ * The length is specified by @pos which must point just after the written data.
+ * Also moves the cursor to @pos.
+ **/
+static inline void bdirect_write_commit(struct fastbuf *f, byte *pos)
+{
+ f->bptr = pos;
+}
+
+/*** === Formatted output ***/
+
+/**
+ * printf into a fastbuf.
+ **/
+int bprintf(struct fastbuf *b, const char *msg, ...)
+ FORMAT_CHECK(printf,2,3);
+int vbprintf(struct fastbuf *b, const char *msg, va_list args); /** vprintf into a fastbuf. **/
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Atomic Buffered Write to Files
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/lfs.h"
+#include "ucw/conf.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static uns trace;
+
+#ifndef TEST
+
+static struct cf_section fbatomic_config = {
+ CF_ITEMS {
+ CF_UNS("Trace", &trace)
+ }
+};
+
+static void CONSTRUCTOR fbatomic_init_config(void)
+{
+ cf_declare_section("FBAtomic", &fbatomic_config, 1);
+}
+
+#endif
+
+#define TRACE(m...) do { if(trace) msg(L_DEBUG, "FB_ATOMIC: " m); } while(0)
+
+struct fb_atomic_file {
+ int fd;
+ int use_count;
+ int record_len;
+ uns locked;
+ byte name[1];
+};
+
+void
+fbatomic_internal_write(struct fastbuf *f)
+{
+ struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+ int size = f->bptr - f->buffer;
+ if (size)
+ {
+ ASSERT(af->record_len < 0 || !(size % af->record_len));
+ int res = write(af->fd, f->buffer, size);
+ if (res < 0)
+ die("Error writing %s: %m", f->name);
+ if (res != size)
+ die("Unexpected partial write to %s: written only %d bytes of %d", f->name, res, size);
+ f->bptr = f->buffer;
+ }
+}
+
+static void
+fbatomic_spout(struct fastbuf *f)
+{
+ if (f->bptr < f->bufend) /* Explicit flushes should be ignored */
+ return;
+
+ struct fb_atomic *F = FB_ATOMIC(f);
+ if (F->af->locked)
+ {
+ uns written = f->bptr - f->buffer;
+ uns size = f->bufend - f->buffer + F->slack_size;
+ F->slack_size *= 2;
+ TRACE("Reallocating buffer for atomic file %s with slack %d", f->name, F->slack_size);
+ f->buffer = xrealloc(f->buffer, size);
+ f->bufend = f->buffer + size;
+ f->bptr = f->buffer + written;
+ F->expected_max_bptr = f->bufend - F->slack_size;
+ }
+ else
+ fbatomic_internal_write(f);
+}
+
+static void
+fbatomic_close(struct fastbuf *f)
+{
+ struct fb_atomic_file *af = FB_ATOMIC(f)->af;
+ fbatomic_internal_write(f); /* Need to flush explicitly, because the file can be locked */
+ if (!--af->use_count)
+ {
+ close(af->fd);
+ xfree(af);
+ }
+ xfree(f);
+}
+
+struct fastbuf *
+fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len)
+{
+ struct fb_atomic *F = xmalloc_zero(sizeof(*F));
+ struct fastbuf *f = &F->fb;
+ struct fb_atomic_file *af;
+ if (master)
+ {
+ af = FB_ATOMIC(master)->af;
+ af->use_count++;
+ ASSERT(af->record_len == record_len);
+ }
+ else
+ {
+ af = xmalloc_zero(sizeof(*af) + strlen(name));
+ if ((af->fd = ucw_open(name, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666)) < 0)
+ die("Cannot create %s: %m", name);
+ af->use_count = 1;
+ af->record_len = record_len;
+ af->locked = (record_len < 0);
+ strcpy(af->name, name);
+ }
+ F->af = af;
+ if (record_len > 0 && bufsize % record_len)
+ bufsize += record_len - (bufsize % record_len);
+ f->buffer = xmalloc(bufsize);
+ f->bufend = f->buffer + bufsize;
+ F->slack_size = (record_len < 0) ? -record_len : 0;
+ ASSERT(bufsize > F->slack_size);
+ F->expected_max_bptr = f->bufend - F->slack_size;
+ f->bptr = f->bstop = f->buffer;
+ f->name = af->name;
+ f->spout = fbatomic_spout;
+ f->close = fbatomic_close;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+ struct fastbuf *f, *g;
+
+ // Always trace in the test
+ trace = 1;
+
+ msg(L_INFO, "Testing block writes");
+ f = fbatomic_open("test", NULL, 16, 4);
+ for (u32 i=0; i<17; i++)
+ bwrite(f, &i, 4);
+ bclose(f);
+
+ msg(L_INFO, "Testing interleaved var-size writes");
+ f = fbatomic_open("test2", NULL, 23, -5);
+ g = fbatomic_open("test2", f, 23, -5);
+ for (int i=0; i<100; i++)
+ {
+ struct fastbuf *x = (i%2) ? g : f;
+ bprintf(x, "%c<%d>\n", "fg"[i%2], ((259309*i) % 1000000) >> (i % 8));
+ fbatomic_commit(x);
+ }
+ bclose(f);
+ bclose(g);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+# Test for fb-atomic
+
+Run: ../obj/ucw/fb-atomic-t 2>&1 | sed -e 's/^\(.\) [^ ]* [^ ]* /\1 /'
+Out: I Testing block writes
+ I Testing interleaved var-size writes
+ D FB_ATOMIC: Reallocating buffer for atomic file test2 with slack 10
+ D FB_ATOMIC: Reallocating buffer for atomic file test2 with slack 10
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Static Buffers
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int
+fbbuf_refill(struct fastbuf *f UNUSED)
+{
+ return 0;
+}
+
+static int
+fbbuf_seek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ /* Somebody might want to seek to the end of buffer, try to be nice to him. */
+ ucw_off_t len = f->bufend - f->buffer;
+ if (whence == SEEK_END)
+ pos += len;
+ ASSERT(pos >= 0 && pos <= len);
+ f->bptr = f->buffer + pos;
+ f->bstop = f->bufend;
+ f->pos = len;
+ return 1;
+}
+
+void
+fbbuf_init_read(struct fastbuf *f, byte *buf, uns size, uns can_overwrite)
+{
+ f->buffer = f->bptr = buf;
+ f->bstop = f->bufend = buf + size;
+ f->name = "fbbuf-read";
+ f->pos = size;
+ f->refill = fbbuf_refill;
+ f->spout = NULL;
+ f->seek = fbbuf_seek;
+ f->close = NULL;
+ f->config = NULL;
+ f->can_overwrite_buffer = can_overwrite;
+}
+
+static void
+fbbuf_spout(struct fastbuf *f UNUSED)
+{
+ die("fbbuf: buffer overflow on write");
+}
+
+void
+fbbuf_init_write(struct fastbuf *f, byte *buf, uns size)
+{
+ f->buffer = f->bstop = f->bptr = buf;
+ f->bufend = buf + size;
+ f->name = "fbbuf-write";
+ f->pos = size;
+ f->refill = NULL;
+ f->spout = fbbuf_spout;
+ f->seek = NULL;
+ f->close = NULL;
+ f->config = NULL;
+ f->can_overwrite_buffer = 0;
+}
+
+#ifdef TEST
+
+int main(int argc, char *argv[])
+{
+ if (argc < 2)
+ {
+ fprintf(stderr, "You must specify a test (r, w, o)\n");
+ return 1;
+ }
+ switch (*argv[1])
+ {
+ case 'r':
+ {
+ struct fastbuf fb;
+ char *data = "Two\nlines\n";
+ fbbuf_init_read(&fb, data, strlen(data), 0);
+ char buffer[10];
+ while (bgets(&fb, buffer, 10))
+ puts(buffer);
+ bclose(&fb);
+ break;
+ }
+ case 'w':
+ {
+ struct fastbuf fb;
+ char buff[20];
+ fbbuf_init_write(&fb, buff, 20);
+ bputs(&fb, "Hello world\n");
+ bputc(&fb, 0);
+ fputs(buff, stdout);
+ break;
+ }
+ case 'o':
+ {
+ struct fastbuf fb;
+ char buff[4];
+ fbbuf_init_write(&fb, buff, 4);
+ bputs(&fb, "Hello");
+ bputc(&fb, 0);
+ fputs(buff, stdout);
+ break;
+ }
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+# Tests for fb-buffer.c
+
+Name: Read
+Run: ../obj/ucw/fb-buffer-t r
+Out: Two
+ lines
+
+Name: Write
+Run: ../obj/ucw/fb-buffer-t w
+Out: Hello world
+
+Name: Overflow
+Run: ../obj/ucw/fb-buffer-t o 2>&1 | grep 'buffer overflow on write'
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on O_DIRECT Files
+ *
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a fastbuf backend for fast streaming I/O using O_DIRECT and
+ * the asynchronous I/O module. It's designed for use on large files
+ * which don't fit in the disk cache.
+ *
+ * CAVEATS:
+ *
+ * - All operations with a single fbdirect handle must be done
+ * within a single thread, unless you provide a custom I/O queue
+ * and take care of locking.
+ *
+ * FIXME: what if the OS doesn't support O_DIRECT?
+ * FIXME: unaligned seeks and partial writes?
+ * FIXME: append to unaligned file
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/lfs.h"
+#include "ucw/asio.h"
+#include "ucw/conf.h"
+#include "ucw/threads.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#define FBDIR_ALIGN 512
+
+enum fbdir_mode { // Current operating mode
+ M_NULL,
+ M_READ,
+ M_WRITE
+};
+
+struct fb_direct {
+ struct fastbuf fb;
+ int fd; // File descriptor
+ int is_temp_file;
+ struct asio_queue *io_queue; // I/O queue to use
+ struct asio_queue *user_queue; // If io_queue was supplied by the user
+ struct asio_request *pending_read;
+ struct asio_request *done_read;
+ struct asio_request *active_buffer;
+ enum fbdir_mode mode;
+ byte name[0];
+};
+#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
+
+#ifndef TEST
+uns fbdir_cheat;
+
+static struct cf_section fbdir_cf = {
+ CF_ITEMS {
+ CF_UNS("Cheat", &fbdir_cheat),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+fbdir_global_init(void)
+{
+ cf_declare_section("FBDirect", &fbdir_cf, 0);
+}
+#endif
+
+static void
+fbdir_read_sync(struct fb_direct *F)
+{
+ while (F->pending_read)
+ {
+ struct asio_request *r = asio_wait(F->io_queue);
+ ASSERT(r);
+ struct fb_direct *G = r->user_data;
+ ASSERT(G);
+ ASSERT(G->pending_read == r && !G->done_read);
+ G->pending_read = NULL;
+ G->done_read = r;
+ }
+}
+
+static void
+fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
+{
+ if (F->mode == mode)
+ return;
+ DBG("FB-DIRECT: Switching mode to %d", mode);
+ switch (F->mode)
+ {
+ case M_NULL:
+ break;
+ case M_READ:
+ fbdir_read_sync(F); // Wait for read-ahead requests to finish
+ if (F->done_read) // Return read-ahead requests if any
+ {
+ asio_put(F->done_read);
+ F->done_read = NULL;
+ }
+ break;
+ case M_WRITE:
+ asio_sync(F->io_queue); // Wait for pending writebacks
+ break;
+ }
+ if (F->active_buffer)
+ {
+ asio_put(F->active_buffer);
+ F->active_buffer = NULL;
+ }
+ F->mode = mode;
+}
+
+static void
+fbdir_submit_read(struct fb_direct *F)
+{
+ struct asio_request *r = asio_get(F->io_queue);
+ r->fd = F->fd;
+ r->op = ASIO_READ;
+ r->len = F->io_queue->buffer_size;
+ r->user_data = F;
+ asio_submit(r);
+ F->pending_read = r;
+}
+
+static int
+fbdir_refill(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Refill");
+
+ if (!F->done_read)
+ {
+ if (!F->pending_read)
+ {
+ fbdir_change_mode(F, M_READ);
+ fbdir_submit_read(F);
+ }
+ fbdir_read_sync(F);
+ ASSERT(F->done_read);
+ }
+
+ struct asio_request *r = F->done_read;
+ F->done_read = NULL;
+ if (F->active_buffer)
+ asio_put(F->active_buffer);
+ F->active_buffer = r;
+ if (!r->status)
+ return 0;
+ if (r->status < 0)
+ die("Error reading %s: %s", f->name, strerror(r->returned_errno));
+ f->bptr = f->buffer = r->buffer;
+ f->bstop = f->bufend = f->buffer + r->status;
+ f->pos += r->status;
+
+ fbdir_submit_read(F); // Read-ahead the next block
+
+ return r->status;
+}
+
+static void
+fbdir_spout(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+ struct asio_request *r;
+
+ DBG("FB-DIRECT: Spout");
+
+ fbdir_change_mode(F, M_WRITE);
+ r = F->active_buffer;
+ if (r && f->bptr > f->bstop)
+ {
+ r->op = ASIO_WRITE_BACK;
+ r->fd = F->fd;
+ r->len = f->bptr - f->bstop;
+ ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
+ f->pos += r->len;
+ if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes
+ {
+ r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
+ asio_submit(r);
+ asio_sync(F->io_queue);
+ DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos);
+ if (ucw_ftruncate(F->fd, f->pos) < 0)
+ die("Error truncating %s: %m", f->name);
+ }
+ else
+ asio_submit(r);
+ r = NULL;
+ }
+ if (!r)
+ r = asio_get(F->io_queue);
+ f->bstop = f->bptr = f->buffer = r->buffer;
+ f->bufend = f->buffer + F->io_queue->buffer_size;
+ F->active_buffer = r;
+}
+
+static int
+fbdir_seek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence);
+
+ if (whence == SEEK_SET && pos == f->pos)
+ return 1;
+
+ fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish
+ ucw_off_t l = ucw_seek(FB_DIRECT(f)->fd, pos, whence);
+ if (l < 0)
+ return 0;
+ f->pos = l;
+ return 1;
+}
+
+static struct asio_queue *
+fbdir_get_io_queue(uns buffer_size, uns write_back)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ if (!q)
+ {
+ q = xmalloc_zero(sizeof(struct asio_queue));
+ q->buffer_size = buffer_size;
+ q->max_writebacks = write_back;
+ asio_init_queue(q);
+ ctx->io_queue = q;
+ }
+ q->use_count++;
+ DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
+ return q;
+}
+
+static void
+fbdir_put_io_queue(void)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ ASSERT(q);
+ DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
+ if (!--q->use_count)
+ {
+ asio_cleanup_queue(q);
+ xfree(q);
+ ctx->io_queue = NULL;
+ }
+}
+
+static void
+fbdir_close(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Close");
+
+ fbdir_change_mode(F, M_NULL);
+ if (!F->user_queue)
+ fbdir_put_io_queue();
+
+ bclose_file_helper(f, F->fd, F->is_temp_file);
+ xfree(f);
+}
+
+static int
+fbdir_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_DIRECT(f)->is_temp_file;
+ FB_DIRECT(f)->is_temp_file = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ DBG("FB-DIRECT: Open");
+ bzero(F, sizeof(*F));
+ f->name = F->name;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ if (q)
+ F->io_queue = F->user_queue = q;
+ else
+ F->io_queue = fbdir_get_io_queue(buffer_size, write_back);
+ f->refill = fbdir_refill;
+ f->spout = fbdir_spout;
+ f->seek = fbdir_seek;
+ f->close = fbdir_close;
+ f->config = fbdir_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+#ifdef TEST
+
+#include "ucw/getopt.h"
+
+int main(int argc, char **argv)
+{
+ struct fb_params par = { .type = FB_DIRECT };
+ struct fastbuf *f, *t;
+
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+ f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par);
+ t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par);
+
+ bbcopy(f, t, ~0U);
+ ASSERT(btell(f) == btell(t));
+
+#if 0 // This triggers unaligned write
+ bflush(t);
+ bputc(t, '\n');
+#endif
+
+ brewind(t);
+ bgetc(t);
+ ASSERT(btell(t) == 1);
+
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Files
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/lfs.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+struct fb_file {
+ struct fastbuf fb;
+ int fd; /* File descriptor */
+ int is_temp_file;
+ int keep_back_buf; /* Optimize for backwards reading */
+ ucw_off_t wpos; /* Real file position */
+ uns wlen; /* Window size */
+};
+#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1)
+
+static int
+bfd_refill(struct fastbuf *f)
+{
+ struct fb_file *F = FB_FILE(f);
+ byte *read_ptr = (f->buffer = FB_BUFFER(f));
+ uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen;
+ /* Forward or no seek */
+ if (F->wpos <= f->pos)
+ {
+ ucw_off_t diff = f->pos - F->wpos;
+ /* Formula for long forward seeks (prefer lseek()) */
+ if (diff > ((ucw_off_t)blen << 2))
+ {
+long_seek:
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ goto seek;
+ }
+ /* Short forward seek (prefer read() to skip data )*/
+ else if ((uns)diff >= back)
+ {
+ uns skip = diff - back;
+ F->wpos += skip;
+ while (skip)
+ {
+ int l = read(F->fd, f->buffer, MIN(skip, blen));
+ if (unlikely(l <= 0))
+ if (l < 0)
+ die("Error reading %s: %m", f->name);
+ else
+ {
+ F->wpos -= skip;
+ goto eof;
+ }
+ skip -= l;
+ }
+ }
+ /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */
+ else
+ {
+ uns keep = back - (uns)diff;
+ if (keep >= F->wlen)
+ back = diff + (keep = F->wlen);
+ else
+ memmove(f->buffer, f->buffer + F->wlen - keep, keep);
+ read_len -= keep;
+ read_ptr += keep;
+ }
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ }
+ /* Backwards seek */
+ else
+ {
+ ucw_off_t diff = F->wpos - f->pos;
+ /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */
+ if (diff > ((ucw_off_t)blen << 1))
+ {
+ if ((ucw_off_t)back > f->pos)
+ back = f->pos;
+ goto long_seek;
+ }
+ /* Seek into previous window (do nothing... for example brewind) */
+ else if ((uns)diff <= F->wlen)
+ {
+ f->bstop = f->buffer + F->wlen;
+ f->bptr = f->bstop - diff;
+ f->pos = F->wpos;
+ return 1;
+ }
+ back *= 3;
+ if ((ucw_off_t)back > f->pos)
+ back = f->pos;
+ f->bptr = f->buffer + back;
+ read_len = blen;
+ f->bstop = f->buffer + read_len;
+ /* Reuse part of previous window */
+ if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen)
+ {
+ uns keep = read_len + F->wlen - back - diff;
+ memmove(f->buffer + read_len - keep, f->buffer, keep);
+ }
+seek:
+ /* Do lseek() */
+ F->wpos = f->pos + (f->buffer - f->bptr);
+ if (ucw_seek(F->fd, F->wpos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+ }
+ /* Read (part of) buffer */
+ do
+ {
+ int l = read(F->fd, read_ptr, read_len);
+ if (unlikely(l < 0))
+ die("Error reading %s: %m", f->name);
+ if (!l)
+ if (unlikely(read_ptr < f->bptr))
+ goto eof;
+ else
+ break; /* Incomplete read because of EOF */
+ read_ptr += l;
+ read_len -= l;
+ F->wpos += l;
+ }
+ while (read_ptr <= f->bptr);
+ if (read_len)
+ f->bstop = read_ptr;
+ f->pos += f->bstop - f->bptr;
+ F->wlen = f->bstop - f->buffer;
+ return f->bstop - f->bptr;
+eof:
+ /* Seeked behind EOF */
+ f->bptr = f->bstop = f->buffer;
+ F->wlen = 0;
+ return 0;
+}
+
+static void
+bfd_spout(struct fastbuf *f)
+{
+ /* Do delayed lseek() if needed */
+ if (FB_FILE(f)->wpos != f->pos && ucw_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+
+ int l = f->bptr - f->buffer;
+ byte *c = f->buffer;
+
+ /* Write the buffer */
+ FB_FILE(f)->wpos = (f->pos += l);
+ FB_FILE(f)->wlen = 0;
+ while (l)
+ {
+ int z = write(FB_FILE(f)->fd, c, l);
+ if (z <= 0)
+ die("Error writing %s: %m", f->name);
+ l -= z;
+ c += z;
+ }
+ f->bptr = f->buffer = FB_BUFFER(f);
+}
+
+static int
+bfd_seek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */
+ ucw_off_t l;
+ switch (whence)
+ {
+ case SEEK_SET:
+ f->pos = pos;
+ return 1;
+ case SEEK_CUR:
+ l = f->pos + pos;
+ if ((pos > 0) ^ (l > f->pos))
+ return 0;
+ f->pos = l;
+ return 1;
+ case SEEK_END:
+ l = ucw_seek(FB_FILE(f)->fd, pos, SEEK_END);
+ if (l < 0)
+ return 0;
+ FB_FILE(f)->wpos = f->pos = l;
+ FB_FILE(f)->wlen = 0;
+ return 1;
+ default:
+ ASSERT(0);
+ }
+}
+
+static void
+bfd_close(struct fastbuf *f)
+{
+ bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file);
+ xfree(f);
+}
+
+static int
+bfd_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_FILE(f)->is_temp_file;
+ FB_FILE(f)->is_temp_file = value;
+ return orig;
+ case BCONFIG_KEEP_BACK_BUF:
+ orig = FB_FILE(f)->keep_back_buf;
+ FB_FILE(f)->keep_back_buf = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+bfdopen_internal(int fd, const char *name, uns buflen)
+{
+ ASSERT(buflen);
+ int namelen = strlen(name) + 1;
+ struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->buffer = (byte *)(F+1);
+ f->bptr = f->bstop = f->buffer;
+ f->bufend = f->buffer + buflen;
+ f->name = f->bufend;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ f->refill = bfd_refill;
+ f->spout = bfd_spout;
+ f->seek = bfd_seek;
+ f->close = bfd_close;
+ f->config = bfd_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+void
+bfilesync(struct fastbuf *b)
+{
+ bflush(b);
+ if (fsync(FB_FILE(b)->fd) < 0)
+ msg(L_ERROR, "fsync(%s) failed: %m", b->name);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *f, *t;
+ f = bopen_tmp(16);
+ t = bfdopen_shared(1, 13);
+ for (uns i = 0; i < 16; i++)
+ bwrite(f, "<hello>", 7);
+ bprintf(t, "%d\n", (int)btell(f));
+ brewind(f);
+ bbcopy(f, t, ~0U);
+ bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t));
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
--- /dev/null
+#Tests for fb-file
+
+Run: ../obj/ucw/fb-file-t
+Out: 112
+ <hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello>
+ 112 116
+
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Growing Buffers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct fb_gbuf {
+ struct fastbuf fb;
+ byte *last_written;
+};
+#define FB_GBUF(f) ((struct fb_gbuf *)(f)->is_fastbuf)
+
+static int
+fbgrow_refill(struct fastbuf *b)
+{
+ if (b->bstop != FB_GBUF(b)->last_written)
+ {
+ /* There was an intervening flush */
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = b->bstop - b->buffer;
+ return 1;
+ }
+ /* We are at the end */
+ return 0;
+}
+
+static void
+fbgrow_spout(struct fastbuf *b)
+{
+ if (b->bptr >= b->bufend)
+ {
+ uns len = b->bufend - b->buffer;
+ b->buffer = xrealloc(b->buffer, 2*len);
+ b->bufend = b->buffer + 2*len;
+ b->bstop = b->buffer;
+ b->bptr = b->buffer + len;
+ }
+}
+
+static int
+fbgrow_seek(struct fastbuf *b, ucw_off_t pos, int whence)
+{
+ ASSERT(FB_GBUF(b)->last_written); /* Seeks allowed only in read mode */
+ ucw_off_t len = FB_GBUF(b)->last_written - b->buffer;
+ if (whence == SEEK_END)
+ pos += len;
+ ASSERT(pos >= 0 && pos <= len);
+ b->bptr = b->buffer + pos;
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = len;
+ return 1;
+}
+
+static void
+fbgrow_close(struct fastbuf *b)
+{
+ xfree(b->buffer);
+ xfree(b);
+}
+
+struct fastbuf *
+fbgrow_create(unsigned basic_size)
+{
+ struct fastbuf *b = xmalloc_zero(sizeof(struct fb_gbuf));
+ b->buffer = xmalloc(basic_size);
+ b->bufend = b->buffer + basic_size;
+ b->bptr = b->bstop = b->buffer;
+ b->name = "<fbgbuf>";
+ b->refill = fbgrow_refill;
+ b->spout = fbgrow_spout;
+ b->seek = fbgrow_seek;
+ b->close = fbgrow_close;
+ b->can_overwrite_buffer = 1;
+ return b;
+}
+
+void
+fbgrow_reset(struct fastbuf *b)
+{
+ b->bptr = b->bstop = b->buffer;
+ b->pos = 0;
+ FB_GBUF(b)->last_written = NULL;
+}
+
+void
+fbgrow_rewind(struct fastbuf *b)
+{
+ if (!FB_GBUF(b)->last_written)
+ {
+ /* Last operation was a write, so remember the end position */
+ FB_GBUF(b)->last_written = b->bptr;
+ }
+ b->bptr = b->buffer;
+ b->bstop = FB_GBUF(b)->last_written;
+ b->pos = b->bstop - b->buffer;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *f;
+ uns t;
+
+ f = fbgrow_create(3);
+ for (uns i=0; i<5; i++)
+ {
+ fbgrow_reset(f);
+ bwrite(f, "12345", 5);
+ bwrite(f, "12345", 5);
+ printf("<%d>", (int)btell(f));
+ bflush(f);
+ printf("<%d>", (int)btell(f));
+ fbgrow_rewind(f);
+ printf("<%d>", (int)btell(f));
+ while ((t = bgetc(f)) != ~0U)
+ putchar(t);
+ printf("<%d>", (int)btell(f));
+ fbgrow_rewind(f);
+ bseek(f, -1, SEEK_END);
+ printf("<%d>", (int)btell(f));
+ while ((t = bgetc(f)) != ~0U)
+ putchar(t);
+ printf("<%d>\n", (int)btell(f));
+ }
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+# Tests for fb-grow.c
+
+Run: ../obj/ucw/fb-grow-t
+Out: <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered Input on Limited File Descriptors
+ *
+ * (c) 2003--2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <unistd.h>
+
+struct fb_limfd {
+ struct fastbuf fb;
+ int fd; /* File descriptor */
+ int limit;
+};
+#define FB_LIMFD(f) ((struct fb_limfd *)(f)->is_fastbuf)
+#define FB_BUFFER(f) (byte *)(FB_LIMFD(f) + 1)
+
+static int
+bfl_refill(struct fastbuf *f)
+{
+ f->bptr = f->buffer = FB_BUFFER(f);
+ int max = MIN(FB_LIMFD(f)->limit - f->pos, f->bufend - f->buffer);
+ int l = read(FB_LIMFD(f)->fd, f->buffer, max);
+ if (l < 0)
+ die("Error reading %s: %m", f->name);
+ f->bstop = f->buffer + l;
+ f->pos += l;
+ return l;
+}
+
+static void
+bfl_close(struct fastbuf *f)
+{
+ xfree(f);
+}
+
+struct fastbuf *
+bopen_limited_fd(int fd, uns buflen, uns limit)
+{
+ struct fb_limfd *F = xmalloc(sizeof(struct fb_limfd) + buflen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->buffer = (char *)(F+1);
+ f->bptr = f->bstop = f->buffer;
+ f->bufend = f->buffer + buflen;
+ f->name = "limited-fd";
+ F->fd = fd;
+ F->limit = limit;
+ f->refill = bfl_refill;
+ f->close = bfl_close;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int UNUSED argc, char UNUSED **argv)
+{
+ struct fastbuf *f = bopen_limited_fd(0, 3, 13);
+ struct fastbuf *o = bfdopen_shared(1, 16);
+ int c;
+ while ((c = bgetc(f)) >= 0)
+ bputc(o, c);
+ bclose(o);
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+# Tests for fb-limfd
+
+Run: ../obj/ucw/fb-limfd-t
+In: 123456789012
+ 4567890
+Out: 123456789012
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory Streams
+ *
+ * (c) 1997--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+
+struct memstream {
+ unsigned blocksize;
+ unsigned uc;
+ struct msblock *first;
+};
+
+struct msblock {
+ struct msblock *next;
+ ucw_off_t pos;
+ unsigned size;
+ byte data[0];
+};
+
+struct fb_mem {
+ struct fastbuf fb;
+ struct memstream *stream;
+ struct msblock *block;
+};
+#define FB_MEM(f) ((struct fb_mem *)(f)->is_fastbuf)
+
+static int
+fbmem_refill(struct fastbuf *f)
+{
+ struct memstream *s = FB_MEM(f)->stream;
+ struct msblock *b = FB_MEM(f)->block;
+
+ if (!b)
+ {
+ b = s->first;
+ if (!b)
+ return 0;
+ }
+ else if (f->buffer == b->data && f->bstop < b->data + b->size)
+ {
+ f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ return 1;
+ }
+ else if (!b->next)
+ return 0;
+ else
+ b = b->next;
+ if (!b->size)
+ return 0;
+ f->buffer = f->bptr = b->data;
+ f->bufend = f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ FB_MEM(f)->block = b;
+ return 1;
+}
+
+static void
+fbmem_spout(struct fastbuf *f)
+{
+ struct memstream *s = FB_MEM(f)->stream;
+ struct msblock *b = FB_MEM(f)->block;
+ struct msblock *bb;
+
+ if (b)
+ {
+ b->size = f->bptr - b->data;
+ if (b->size < s->blocksize)
+ return;
+ }
+ bb = xmalloc(sizeof(struct msblock) + s->blocksize);
+ if (b)
+ {
+ b->next = bb;
+ bb->pos = b->pos + b->size;
+ }
+ else
+ {
+ s->first = bb;
+ bb->pos = 0;
+ }
+ bb->next = NULL;
+ bb->size = 0;
+ f->buffer = f->bptr = f->bstop = bb->data;
+ f->bufend = bb->data + s->blocksize;
+ f->pos = bb->pos;
+ FB_MEM(f)->block = bb;
+}
+
+static int
+fbmem_seek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ struct memstream *m = FB_MEM(f)->stream;
+ struct msblock *b;
+
+ ASSERT(whence == SEEK_SET || whence == SEEK_END);
+ if (whence == SEEK_END)
+ {
+ for (b=m->first; b; b=b->next)
+ pos += b->size;
+ }
+ /* Yes, this is linear. But considering the average number of buckets, it doesn't matter. */
+ for (b=m->first; b; b=b->next)
+ {
+ if (pos <= b->pos + (ucw_off_t)b->size) /* <=, because we need to be able to seek just after file end */
+ {
+ f->buffer = b->data;
+ f->bptr = b->data + (pos - b->pos);
+ f->bufend = f->bstop = b->data + b->size;
+ f->pos = b->pos + b->size;
+ FB_MEM(f)->block = b;
+ return 1;
+ }
+ }
+ if (!m->first && !pos)
+ {
+ /* Seeking to offset 0 in an empty file needs an exception */
+ f->buffer = f->bptr = f->bufend = NULL;
+ f->pos = 0;
+ FB_MEM(f)->block = NULL;
+ return 1;
+ }
+ die("fbmem_seek to invalid offset");
+}
+
+static void
+fbmem_close(struct fastbuf *f)
+{
+ struct memstream *m = FB_MEM(f)->stream;
+ struct msblock *b;
+
+ if (!--m->uc)
+ {
+ while (b = m->first)
+ {
+ m->first = b->next;
+ xfree(b);
+ }
+ xfree(m);
+ }
+ xfree(f);
+}
+
+struct fastbuf *
+fbmem_create(unsigned blocksize)
+{
+ struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+ struct memstream *s = xmalloc_zero(sizeof(struct memstream));
+
+ s->blocksize = blocksize;
+ s->uc = 1;
+
+ FB_MEM(f)->stream = s;
+ f->name = "<fbmem-write>";
+ f->spout = fbmem_spout;
+ f->close = fbmem_close;
+ return f;
+}
+
+struct fastbuf *
+fbmem_clone_read(struct fastbuf *b)
+{
+ struct fastbuf *f = xmalloc_zero(sizeof(struct fb_mem));
+ struct memstream *s = FB_MEM(b)->stream;
+
+ bflush(b);
+ s->uc++;
+
+ FB_MEM(f)->stream = s;
+ f->name = "<fbmem-read>";
+ f->refill = fbmem_refill;
+ f->seek = fbmem_seek;
+ f->close = fbmem_close;
+ f->can_overwrite_buffer = 1;
+ return f;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *w, *r;
+ int t;
+
+ w = fbmem_create(7);
+ r = fbmem_clone_read(w);
+ bwrite(w, "12345", 5);
+ bwrite(w, "12345", 5);
+ printf("<%d>", (int)btell(w));
+ bflush(w);
+ printf("<%d>", (int)btell(w));
+ printf("<%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ printf("<%d>", (int)btell(r));
+ bwrite(w, "12345", 5);
+ bwrite(w, "12345", 5);
+ printf("<%d>", (int)btell(w));
+ bclose(w);
+ bsetpos(r, 0);
+ printf("<!%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ bsetpos(r, 3);
+ printf("<!%d>", (int)btell(r));
+ while ((t = bgetc(r)) >= 0)
+ putchar(t);
+ putchar('\n');
+ fflush(stdout);
+ bclose(r);
+ return 0;
+}
+
+#endif
--- /dev/null
+# Test for fb-mem.c
+
+Run: ../obj/ucw/fb-mem-t
+Out: <10><10><0>1234512345<10><20><!0>12345123451234512345<!3>45123451234512345
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory-Mapped Files
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/lfs.h"
+#include "ucw/conf.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+static uns mmap_window_size = 16*CPU_PAGE_SIZE;
+static uns mmap_extend_size = 4*CPU_PAGE_SIZE;
+
+#ifndef TEST
+static struct cf_section fbmm_config = {
+ CF_ITEMS {
+ CF_UNS("WindowSize", &mmap_window_size),
+ CF_UNS("ExtendSize", &mmap_extend_size),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR fbmm_init_config(void)
+{
+ cf_declare_section("FBMMap", &fbmm_config, 0);
+}
+#endif
+
+struct fb_mmap {
+ struct fastbuf fb;
+ int fd;
+ int is_temp_file;
+ ucw_off_t file_size;
+ ucw_off_t file_extend;
+ ucw_off_t window_pos;
+ uns window_size;
+ int mode;
+};
+#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf)
+
+static void
+bfmm_map_window(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+ ucw_off_t pos0 = f->pos & ~(ucw_off_t)(CPU_PAGE_SIZE-1);
+ int l = MIN((ucw_off_t)mmap_window_size, F->file_extend - pos0);
+ uns ll = ALIGN_TO(l, CPU_PAGE_SIZE);
+ int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE);
+
+ DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend);
+ if (ll != F->window_size && f->buffer)
+ {
+ munmap(f->buffer, F->window_size);
+ f->buffer = NULL;
+ }
+ F->window_size = ll;
+ if (!f->buffer)
+ f->buffer = ucw_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0);
+ else
+ f->buffer = ucw_mmap(f->buffer, ll, prot, MAP_SHARED | MAP_FIXED, F->fd, pos0);
+ if (f->buffer == (byte *) MAP_FAILED)
+ die("mmap(%s): %m", f->name);
+#ifdef MADV_SEQUENTIAL
+ if (ll > CPU_PAGE_SIZE)
+ madvise(f->buffer, ll, MADV_SEQUENTIAL);
+#endif
+ f->bufend = f->buffer + l;
+ f->bptr = f->buffer + (f->pos - pos0);
+ F->window_pos = pos0;
+}
+
+static int
+bfmm_refill(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+
+ DBG("Refill <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+ if (f->pos >= F->file_size)
+ return 0;
+ if (f->bstop >= f->bufend)
+ bfmm_map_window(f);
+ if (F->window_pos + (f->bufend - f->buffer) > F->file_size)
+ f->bstop = f->buffer + (F->file_size - F->window_pos);
+ else
+ f->bstop = f->bufend;
+ f->pos = F->window_pos + (f->bstop - f->buffer);
+ DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+ return 1;
+}
+
+static void
+bfmm_spout(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+ ucw_off_t end = f->pos + (f->bptr - f->bstop);
+
+ DBG("Spout <- %p %p %p %p", f->buffer, f->bptr, f->bstop, f->bufend);
+ if (end > F->file_size)
+ F->file_size = end;
+ if (f->bptr < f->bufend)
+ return;
+ f->pos = end;
+ if (f->pos >= F->file_extend)
+ {
+ F->file_extend = ALIGN_TO(F->file_extend + mmap_extend_size, (ucw_off_t)CPU_PAGE_SIZE);
+ if (ucw_ftruncate(F->fd, F->file_extend))
+ die("ftruncate(%s): %m", f->name);
+ }
+ bfmm_map_window(f);
+ f->bstop = f->bptr;
+ DBG(" -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+}
+
+static int
+bfmm_seek(struct fastbuf *f, ucw_off_t pos, int whence)
+{
+ if (whence == SEEK_END)
+ pos += FB_MMAP(f)->file_size;
+ else
+ ASSERT(whence == SEEK_SET);
+ ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size);
+ f->pos = pos;
+ f->bptr = f->bstop = f->bufend = f->buffer; /* force refill/spout call */
+ DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
+ return 1;
+}
+
+static void
+bfmm_close(struct fastbuf *f)
+{
+ struct fb_mmap *F = FB_MMAP(f);
+
+ if (f->buffer)
+ munmap(f->buffer, F->window_size);
+ if (F->file_extend > F->file_size &&
+ ucw_ftruncate(F->fd, F->file_size))
+ die("ftruncate(%s): %m", f->name);
+ bclose_file_helper(f, F->fd, F->is_temp_file);
+ xfree(f);
+}
+
+static int
+bfmm_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_MMAP(f)->is_temp_file;
+ FB_MMAP(f)->is_temp_file = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+bfmmopen_internal(int fd, const char *name, uns mode)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ f->name = (byte *)(F+1);
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ F->file_extend = F->file_size = ucw_seek(fd, 0, SEEK_END);
+ if (F->file_size < 0)
+ die("seek(%s): %m", name);
+ if (mode & O_APPEND)
+ f->pos = F->file_size;
+ F->mode = mode;
+
+ f->refill = bfmm_refill;
+ f->spout = bfmm_spout;
+ f->seek = bfmm_seek;
+ f->close = bfmm_close;
+ f->config = bfmm_config;
+ return f;
+}
+
+#ifdef TEST
+
+int main(int UNUSED argc, char **argv)
+{
+ struct fb_params par = { .type = FB_MMAP };
+ struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par);
+ struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par);
+ int c;
+
+ DBG("Copying");
+ while ((c = bgetc(f)) >= 0)
+ bputc(g, c);
+ bclose(f);
+ DBG("Seek inside last block");
+ bsetpos(g, btell(g)-1333);
+ bputc(g, 13);
+ DBG("Seek to the beginning & write");
+ bsetpos(g, 1333);
+ bputc(g, 13);
+ DBG("flush");
+ bflush(g);
+ bputc(g, 13);
+ bflush(g);
+ DBG("Seek nearby & read");
+ bsetpos(g, 133);
+ bgetc(g);
+ DBG("Seek far & read");
+ bsetpos(g, 133333);
+ bgetc(g);
+ DBG("Closing");
+ bclose(g);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+# Test for fb-mmap.c
+
+Run: dd bs=1024 count=1024 if=/dev/zero of=mmap.in 2>/dev/null && ../obj/ucw/fb-mmap-t mmap.in mmap.out && rm mmap.in mmap.out
--- /dev/null
+/*
+ * UCW Library -- FastIO on files with run-time parametrization
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/lfs.h"
+#include "ucw/fastbuf.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+
+struct fb_params fbpar_def = {
+ .buffer_size = 65536,
+ .read_ahead = 1,
+ .write_back = 1,
+};
+
+static char *
+fbpar_cf_commit(struct fb_params *p UNUSED)
+{
+ if (p->type == FB_DIRECT)
+ {
+#ifndef CONFIG_UCW_THREADS
+ return "Direct I/O is supported only with CONFIG_UCW_THREADS";
+#endif
+#ifdef CONFIG_DARWIN
+ return "Direct I/O is not supported on darwin";
+#endif
+#ifndef CONFIG_DIRECT_IO
+ return "Direct I/O disabled by configure switch -CONFIG_DIRECT_IO";
+#endif
+#ifndef CONFIG_UCW_FB_DIRECT
+ return "Direct I/O disabled by configure switch -CONFIG_UCW_FB_DIRECT";
+#endif
+ }
+ return NULL;
+}
+
+struct cf_section fbpar_cf = {
+# define F(x) PTR_TO(struct fb_params, x)
+ CF_TYPE(struct fb_params),
+ CF_COMMIT(fbpar_cf_commit),
+ CF_ITEMS {
+ CF_LOOKUP("Type", (int *)F(type), ((const char * const []){"std", "direct", "mmap", NULL})),
+ CF_UNS("BufSize", F(buffer_size)),
+ CF_UNS("KeepBackBuf", F(keep_back_buf)),
+ CF_UNS("ReadAhead", F(read_ahead)),
+ CF_UNS("WriteBack", F(write_back)),
+ CF_END
+ }
+# undef F
+};
+
+static struct cf_section fbpar_global_cf = {
+ CF_ITEMS {
+ CF_SECTION("Defaults", &fbpar_def, &fbpar_cf),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+fbpar_global_init(void)
+{
+ cf_declare_section("FBParam", &fbpar_global_cf, 0);
+}
+
+static struct fastbuf *
+bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name)
+{
+ char buf[32];
+ if (!name)
+ {
+ sprintf(buf, "fd%d", fd);
+ name = buf;
+ }
+ struct fastbuf *fb;
+ switch (params->type)
+ {
+#ifdef CONFIG_UCW_FB_DIRECT
+ case FB_DIRECT:
+ fb = fbdir_open_fd_internal(fd, name, params->asio,
+ params->buffer_size ? : fbpar_def.buffer_size,
+ params->read_ahead ? : fbpar_def.read_ahead,
+ params->write_back ? : fbpar_def.write_back);
+ if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0)
+ msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
+ return fb;
+#endif
+ case FB_STD:
+ fb = bfdopen_internal(fd, name,
+ params->buffer_size ? : fbpar_def.buffer_size);
+ if (params->keep_back_buf)
+ bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1);
+ return fb;
+ case FB_MMAP:
+ if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0)
+ die("Cannot get flags of fd %d: %m", fd);
+ return bfmmopen_internal(fd, name, mode);
+ default:
+ ASSERT(0);
+ }
+}
+
+static struct fastbuf *
+bopen_file_internal(const char *name, int mode, struct fb_params *params, int try)
+{
+ if (!params)
+ params = &fbpar_def;
+#ifdef CONFIG_UCW_FB_DIRECT
+ if (params->type == FB_DIRECT && !fbdir_cheat)
+ mode |= O_DIRECT;
+#endif
+ if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY)
+ mode = (mode & ~O_ACCMODE) | O_RDWR;
+ int fd = ucw_open(name, mode, 0666);
+ if (fd < 0)
+ if (try)
+ return NULL;
+ else
+ die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name);
+ struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name);
+ ASSERT(fb);
+ if (mode & O_APPEND)
+ bseek(fb, 0, SEEK_END);
+ return fb;
+}
+
+struct fastbuf *
+bopen_file(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 0);
+}
+
+struct fastbuf *
+bopen_file_try(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 1);
+}
+
+struct fastbuf *
+bopen_fd_name(int fd, struct fb_params *params, const char *name)
+{
+ return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, name);
+}
+
+/* Function for use by individual file back-ends */
+
+void
+bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file)
+{
+ switch (is_temp_file)
+ {
+ case 1:
+ if (unlink(f->name) < 0)
+ msg(L_ERROR, "unlink(%s): %m", f->name);
+ case 0:
+ if (close(fd))
+ die("close(%s): %m", f->name);
+ }
+}
+
+/* Compatibility wrappers */
+
+struct fastbuf *
+bopen_try(const char *name, uns mode, uns buflen)
+{
+ return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bopen(const char *name, uns mode, uns buflen)
+{
+ return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen(int fd, uns buflen)
+{
+ return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen_shared(int fd, uns buflen)
+{
+ struct fastbuf *f = bfdopen(fd, buflen);
+ bconfig(f, BCONFIG_IS_TEMP_FILE, 2);
+ return f;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory Pools
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf)
+
+static void
+fbpool_spout(struct fastbuf *b)
+{
+ if (b->bptr >= b->bufend)
+ {
+ uns len = b->bufend - b->buffer;
+ b->buffer = mp_expand(FB_POOL(b)->mp);
+ b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp);
+ b->bstop = b->buffer;
+ b->bptr = b->buffer + len;
+ }
+}
+
+void
+fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size)
+{
+ b->mp = mp;
+ b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size);
+ b->fb.bufend = b->fb.buffer + mp_avail(mp);
+}
+
+void *
+fbpool_end(struct fbpool *b)
+{
+ return mp_end(b->mp, b->fb.bptr);
+}
+
+void
+fbpool_init(struct fbpool *b)
+{
+ bzero(b, sizeof(*b));
+ b->fb.name = "<fbpool>";
+ b->fb.spout = fbpool_spout;
+ b->fb.can_overwrite_buffer = 1;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct mempool *mp;
+ struct fbpool fb;
+ byte *p;
+ uns l;
+
+ mp = mp_new(64);
+ fbpool_init(&fb);
+ fbpool_start(&fb, mp, 16);
+ for (uns i = 0; i < 1024; i++)
+ bprintf(&fb.fb, "<hello>");
+ p = fbpool_end(&fb);
+ l = mp_size(mp, p);
+ if (l != 1024 * 7)
+ ASSERT(0);
+ for (uns i = 0; i < 1024; i++)
+ if (memcmp(p + i * 7, "<hello>", 7))
+ ASSERT(0);
+ mp_delete(mp);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+# Test for fb-tool.c
+
+Run: ../obj/ucw/fb-pool-t
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Sockets with Timeouts
+ *
+ * (c) 2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/fb-socket.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <poll.h>
+#include <errno.h>
+
+struct fb_sock {
+ struct fastbuf fb;
+ struct fbsock_params par;
+ byte buf[0];
+};
+
+#define FB_SOCK(f) ((struct fb_sock *)(f)->is_fastbuf)
+
+static int
+fbs_refill(struct fastbuf *f)
+{
+ struct fbsock_params *p = &FB_SOCK(f)->par;
+ struct pollfd pf = {
+ .fd = p->fd,
+ .events = POLLIN
+ };
+
+ for (;;)
+ {
+ int e = poll(&pf, 1, p->timeout_ms);
+ if (e < 0)
+ {
+ p->err(p->data, FBSOCK_READ, "read error");
+ return 0;
+ }
+ if (!e)
+ {
+ p->err(p->data, FBSOCK_READ | FBSOCK_TIMEOUT, "read timeout");
+ return 0;
+ }
+
+ f->bptr = f->buffer;
+ int l = read(p->fd, f->buffer, f->bufend-f->buffer);
+ if (l < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ p->err(p->data, FBSOCK_READ, "read error");
+ return 0;
+ }
+ f->bstop = f->buffer + l;
+ f->pos += l;
+ return l;
+ }
+}
+
+static void
+fbs_spout(struct fastbuf *f)
+{
+ struct fbsock_params *p = &FB_SOCK(f)->par;
+ struct pollfd pf = {
+ .fd = p->fd,
+ .events = POLLOUT,
+ };
+
+ int l = f->bptr - f->buffer;
+ f->bptr = f->buffer;
+ char *buf = f->buffer;
+
+ while (l)
+ {
+ int e = poll(&pf, 1, p->timeout_ms);
+ if (e < 0)
+ {
+ p->err(p->data, FBSOCK_WRITE, "write error");
+ return;
+ }
+ if (!e)
+ {
+ p->err(p->data, FBSOCK_WRITE | FBSOCK_TIMEOUT, "write timeout");
+ return;
+ }
+
+ e = write(p->fd, buf, l);
+ if (e < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ p->err(p->data, FBSOCK_WRITE, "write error");
+ return;
+ }
+ buf += e;
+ l -= e;
+ }
+}
+
+static void
+fbs_close(struct fastbuf *f)
+{
+ close(FB_SOCK(f)->par.fd);
+ xfree(f);
+}
+
+struct fastbuf *
+fbsock_create(struct fbsock_params *p)
+{
+ struct fb_sock *F = xmalloc(sizeof(*F) + p->bufsize);
+ struct fastbuf *f = &F->fb;
+
+ bzero(F, sizeof(*F));
+ F->par = *p;
+ f->buffer = F->buf;
+ f->bptr = f->bstop = f->buffer;
+ f->bufend = f->buffer + p->bufsize;
+ f->name = "<socket>";
+ f->refill = fbs_refill;
+ f->spout = fbs_spout;
+ f->close = fbs_close;
+ f->can_overwrite_buffer = 1;
+ return f;
+}
+
+#ifdef TEST
+
+#include <stdlib.h>
+
+static void test_err(void *x UNUSED, uns flags, char *msg UNUSED)
+{
+ if (flags & FBSOCK_READ)
+ printf("READ");
+ else if (flags & FBSOCK_WRITE)
+ printf("WRITE");
+ if (flags & FBSOCK_TIMEOUT)
+ printf(" TIMEOUT\n");
+ else
+ printf(" ERROR\n");
+ exit(0);
+}
+
+int main(void)
+{
+ int fd[2];
+ if (pipe(fd) < 0)
+ ASSERT(0);
+
+ struct fbsock_params p = {
+ .fd = fd[0],
+ .bufsize = 16,
+ .timeout_ms = 100,
+ .err = test_err
+ };
+ struct fastbuf *f = fbsock_create(&p);
+
+ bputsn(f, "Oook!"); // This fits in PIPE_BUF
+ bflush(f);
+
+ char buf[256];
+ if (!bgets(f, buf, sizeof(buf)))
+ die("bgets failed");
+ if (strcmp(buf, "Oook!"))
+ die("Misread input");
+
+ bgets(f, buf, sizeof(buf));
+ puts("WRONG");
+ exit(0);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Sockets with Timeouts
+ *
+ * (c) 2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FB_SOCKET_H
+#define _UCW_FB_SOCKET_H
+
+#include "ucw/fastbuf.h"
+
+struct fbsock_params { /** Configuration of socket fastbuf. **/
+ int fd;
+ uns bufsize;
+ uns timeout_ms;
+ void (*err)(void *data, uns flags, char *msg);
+ void *data; // Passed to the err callback
+};
+
+enum fbsock_err_flags { /** Description of a socket error **/
+ FBSOCK_READ = 1, // Happened during read
+ FBSOCK_WRITE = 2, // Happened during write
+ FBSOCK_TIMEOUT = 4, // The error is a timeout
+};
+
+/**
+ * Create a new socket fastbuf.
+ * All information is passed by @par.
+ **/
+struct fastbuf *fbsock_create(struct fbsock_params *par);
+
+#endif
--- /dev/null
+# Tests for the fb-socket module
+
+Name: fb-socket
+Run: ../obj/ucw/fb-socket-t
+Out: WRITE TIMEOUT
--- /dev/null
+/*
+ * UCW Library -- Temporary Fastbufs
+ *
+ * (c) 2002--2008 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Michal Vaner <vorner@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+
+struct fastbuf *
+bopen_tmp_file(struct fb_params *params)
+{
+ char name[TEMP_FILE_NAME_LEN];
+ int fd = open_tmp(name, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ struct fastbuf *fb = bopen_fd_name(fd, params, name);
+ bconfig(fb, BCONFIG_IS_TEMP_FILE, 1);
+ return fb;
+}
+
+struct fastbuf *
+bopen_tmp(uns buflen)
+{
+ return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+void bfix_tmp_file(struct fastbuf *fb, const char *name)
+{
+ int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0);
+ ASSERT(was_temp == 1);
+ if (rename(fb->name, name))
+ die("Cannot rename %s to %s: %m", fb->name, name);
+ bclose(fb);
+}
+
+#ifdef TEST
+
+#include "ucw/getopt.h"
+
+int main(int argc, char **argv)
+{
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+
+ struct fastbuf *f = bopen_tmp(65536);
+ ASSERT(f && f->name);
+ bputsn(f, "Hello, world!");
+ bclose(f);
+ return 0;
+}
+
+#endif
--- /dev/null
+# Test for fb-temp.c
+
+Run: ../obj/ucw/fb-temp-t
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O: Binary Numbers
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-binary.h"
+
+#define GEN(type, name, size, endian) \
+type bget##name##_##endian##_slow(struct fastbuf *f) \
+{ \
+ byte buf[size/8]; \
+ if (bread(f, buf, sizeof(buf)) != sizeof(buf)) \
+ return ~(type)0; \
+ return get_u##size##_##endian(buf); \
+} \
+void bput##name##_##endian##_##slow(struct fastbuf *f, type x) \
+{ \
+ byte buf[size/8]; \
+ put_u##size##_##endian(buf, x); \
+ bwrite_slow(f, buf, sizeof(buf)); \
+}
+
+#define FF_ALL(type, name, size) GEN(type,name,size,be) GEN(type,name,size,le)
+
+FF_ALL(int, w, 16)
+FF_ALL(uns, l, 32)
+FF_ALL(u64, q, 64)
+FF_ALL(u64, 5, 40)
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Binary Values
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_BINARY_H
+#define _UCW_FF_BINARY_H
+
+#include "ucw/fastbuf.h"
+#include "ucw/unaligned.h"
+
+#ifdef CPU_BIG_ENDIAN
+#define FF_ENDIAN be
+#else
+#define FF_ENDIAN le
+#endif
+
+/***
+ *
+ * We define several functions to read or write binary integer values.
+ *
+ * The name patterns for such routines are:
+ *
+ * - `TYPE bget \#\# NAME \#\# ENDIAN(struct fastbuf *f);`
+ * - `void bput \#\# NAME \#\# ENDIAN(struct fastbuf *f, TYPE value);`
+ *
+ * where `NAME` together with `TYPE` can be:
+ *
+ * - `w` for 16-bit unsigned integers stored in sequences of 2 bytes, the `TYPE` is int
+ * - `l` for 32-bit unsigned integers stored in sequences of 4 bytes, the `TYPE` is uns
+ * - `5` for 40-bit unsigned integers stored in sequences of 5 bytes, the `TYPE` is u64
+ * - `q` for 64-bit unsigned integers stored in sequences of 8 bytes, the `TYPE` is u64
+ *
+ * and supported `ENDIAN` suffixes are:
+ *
+ * - empty for the default order of bytes (defined by CPU)
+ * - `_le` for little-endian
+ * - `_be` for big-endian
+ *
+ * If we fail to read enough bytes because of EOF, the reading function returns `(TYPE)-1`.
+ *
+ ***/
+
+#define GET_FUNC(type, name, bits, endian) \
+ type bget##name##_##endian##_slow(struct fastbuf *f); \
+ static inline type bget##name##_##endian(struct fastbuf *f) \
+ { \
+ if (bavailr(f) >= bits/8) \
+ { \
+ type w = get_u##bits##_##endian(f->bptr); \
+ f->bptr += bits/8; \
+ return w; \
+ } \
+ else \
+ return bget##name##_##endian##_slow(f); \
+ }
+
+#define PUT_FUNC(type, name, bits, endian) \
+ void bput##name##_##endian##_slow(struct fastbuf *f, type x); \
+ static inline void bput##name##_##endian(struct fastbuf *f, type x) \
+ { \
+ if (bavailw(f) >= bits/8) \
+ { \
+ put_u##bits##_##endian(f->bptr, x); \
+ f->bptr += bits/8; \
+ } \
+ else \
+ return bput##name##_##endian##_slow(f, x); \
+ }
+
+#define FF_ALL_X(type, name, bits, defendian) \
+ GET_FUNC(type, name, bits, be) \
+ GET_FUNC(type, name, bits, le) \
+ PUT_FUNC(type, name, bits, be) \
+ PUT_FUNC(type, name, bits, le) \
+ static inline type bget##name(struct fastbuf *f) { return bget##name##_##defendian(f); } \
+ static inline void bput##name(struct fastbuf *f, type x) { bput##name##_##defendian(f, x); }
+
+#define FF_ALL(type, name, bits, defendian) FF_ALL_X(type, name, bits, defendian)
+
+FF_ALL(int, w, 16, FF_ENDIAN)
+FF_ALL(uns, l, 32, FF_ENDIAN)
+FF_ALL(u64, q, 64, FF_ENDIAN)
+FF_ALL(u64, 5, 40, FF_ENDIAN)
+
+#undef GET_FUNC
+#undef PUT_FUNC
+#undef FF_ENDIAN
+#undef FF_ALL_X
+#undef FF_ALL
+
+/* I/O on uintptr_t (only native endianity) */
+
+#ifdef CPU_64BIT_POINTERS
+#define bputa(x,p) bputq(x,p)
+#define bgeta(x) bgetq(x)
+#else
+#define bputa(x,p) bputl(x,p)
+#define bgeta(x) bgetl(x)
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Printf on Fastbuf Streams
+ *
+ * (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <alloca.h>
+
+int
+vbprintf(struct fastbuf *b, const char *msg, va_list args)
+{
+ byte *buf;
+ int len, r;
+ va_list args2;
+
+ len = bdirect_write_prepare(b, &buf);
+ if (len >= 16)
+ {
+ va_copy(args2, args);
+ r = vsnprintf(buf, len, msg, args2);
+ va_end(args2);
+ if (r < 0)
+ len = 256;
+ else if (r < len)
+ {
+ bdirect_write_commit(b, buf+r);
+ return r;
+ }
+ else
+ len = r+1;
+ }
+ else
+ len = 256;
+
+ while (1)
+ {
+ buf = alloca(len);
+ va_copy(args2, args);
+ r = vsnprintf(buf, len, msg, args2);
+ va_end(args2);
+ if (r < 0)
+ len += len;
+ else if (r < len)
+ {
+ bwrite(b, buf, r);
+ return r;
+ }
+ else
+ len = r+1;
+ }
+}
+
+int
+bprintf(struct fastbuf *b, const char *msg, ...)
+{
+ va_list args;
+ int res;
+
+ va_start(args, msg);
+ res = vbprintf(b, msg, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct fastbuf *b = bfdopen_shared(1, 65536);
+ for (int i=0; i<10000; i++)
+ bprintf(b, "13=%d str=<%s> msg=%m\n", 13, "str");
+ bclose(b);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O: Strings on stack
+ *
+ * (c) 2008 Michal Vaner <vorner@ucw.cz>
+ *
+ * Code taken from ff-string.c by:
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+void
+bgets_stk_init(struct bgets_stk_struct *s)
+{
+ s->src_len = bdirect_read_prepare(s->f, &s->src);
+ if (!s->src_len)
+ {
+ s->cur_buf = NULL;
+ s->cur_len = 0;
+ }
+ else
+ {
+ s->old_buf = NULL;
+ s->cur_len = 256;
+ }
+}
+
+void
+bgets_stk_step(struct bgets_stk_struct *s)
+{
+ byte *buf = s->cur_buf;
+ uns buf_len = s->cur_len;
+ if (s->old_buf)
+ {
+ memcpy( s->cur_buf, s->old_buf, s->old_len);
+ buf += s->old_len;
+ buf_len -= s->old_len;
+ }
+ do
+ {
+ uns cnt = MIN(s->src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *s->src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(s->f, s->src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ if (cnt == s->src_len)
+ {
+ bdirect_read_commit(s->f, s->src);
+ s->src_len = bdirect_read_prepare(s->f, &s->src);
+ }
+ else
+ s->src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ s->old_len = s->cur_len;
+ s->old_buf = s->cur_buf;
+ s->cur_len *= 2;
+ return;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (s->src_len);
+exit:
+ *buf = 0;
+ s->cur_len = 0;
+}
+
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O: Strings
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/mempool.h"
+#include "ucw/bbuf.h"
+
+char * /* Non-standard */
+bgets(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *b++ = v;
+ }
+ if (unlikely(cnt == l))
+ die("%s: Line too long", f->name);
+ l -= cnt;
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+exit:
+ *b = 0;
+ return b;
+}
+
+int
+bgets_nodie(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src, *start = b;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return 0;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *b++ = v;
+ }
+ bdirect_read_commit(f, src);
+ if (cnt == l)
+ return -1;
+ l -= cnt;
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+exit:
+ *b++ = 0;
+ return b - (char *)start;
+}
+
+uns
+bgets_bb(struct fastbuf *f, struct bb_t *bb, uns limit)
+{
+ ASSERT(limit);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return 0;
+ bb_grow(bb, 1);
+ byte *buf = bb->ptr;
+ uns len = 0, buf_len = MIN(bb->len, limit);
+ do
+ {
+ uns cnt = MIN(src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ len += cnt;
+ if (cnt == src_len)
+ {
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ else
+ src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ if (unlikely(len == limit))
+ die("%s: Line too long", f->name);
+ bb_do_grow(bb, len + 1);
+ buf = bb->ptr + len;
+ buf_len = MIN(bb->len, limit) - len;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (src_len);
+exit:
+ *buf++ = 0;
+ return buf - bb->ptr;
+}
+
+char *
+bgets_mp(struct fastbuf *f, struct mempool *mp)
+{
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+#define BLOCK_SIZE (4096 - sizeof(void *))
+ struct block {
+ struct block *prev;
+ byte data[BLOCK_SIZE];
+ } *blocks = NULL;
+ uns sum = 0, buf_len = BLOCK_SIZE, cnt;
+ struct block first_block, *new_block = &first_block;
+ byte *buf = new_block->data;
+ do
+ {
+ cnt = MIN(src_len, buf_len);
+ for (uns i = cnt; i--;)
+ {
+ byte v = *src++;
+ if (v == '\n')
+ {
+ bdirect_read_commit(f, src);
+ goto exit;
+ }
+ *buf++ = v;
+ }
+ if (cnt == src_len)
+ {
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ else
+ src_len -= cnt;
+ if (cnt == buf_len)
+ {
+ new_block->prev = blocks;
+ blocks = new_block;
+ sum += buf_len = BLOCK_SIZE;
+ new_block = alloca(sizeof(struct block));
+ buf = new_block->data;
+ }
+ else
+ buf_len -= cnt;
+ }
+ while (src_len);
+exit: ;
+ uns len = buf - new_block->data;
+ byte *result = mp_alloc(mp, sum + len + 1) + sum;
+ result[len] = 0;
+ memcpy(result, new_block->data, len);
+ while (blocks)
+ {
+ result -= BLOCK_SIZE;
+ memcpy(result, blocks->data, BLOCK_SIZE);
+ blocks = blocks->prev;
+ }
+ return result;
+#undef BLOCK_SIZE
+}
+
+char *
+bgets0(struct fastbuf *f, char *b, uns l)
+{
+ ASSERT(l);
+ byte *src;
+ uns src_len = bdirect_read_prepare(f, &src);
+ if (!src_len)
+ return NULL;
+ do
+ {
+ uns cnt = MIN(l, src_len);
+ for (uns i = cnt; i--;)
+ {
+ *b = *src++;
+ if (!*b)
+ {
+ bdirect_read_commit(f, src);
+ return b;
+ }
+ b++;
+ }
+ if (unlikely(cnt == l))
+ die("%s: Line too long", f->name);
+ l -= cnt;
+ bdirect_read_commit(f, src);
+ src_len = bdirect_read_prepare(f, &src);
+ }
+ while (src_len);
+ *b = 0;
+ return b;
+}
--- /dev/null
+/*
+ * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ * (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/unicode.h"
+#include "ucw/ff-unicode.h"
+#include "ucw/ff-binary.h"
+
+/*** UTF-8 ***/
+
+int
+bget_utf8_slow(struct fastbuf *b, uns repl)
+{
+ int c = bgetc(b);
+ int code;
+
+ if (c < 0x80) /* Includes EOF */
+ return c;
+ if (c < 0xc0) /* Incorrect combination */
+ return repl;
+ if (c >= 0xf0) /* Too large, skip it */
+ {
+ while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+ ;
+ goto wrong;
+ }
+ if (c >= 0xe0) /* 3 bytes */
+ {
+ code = c & 0x0f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ else /* 2 bytes */
+ {
+ code = c & 0x1f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ return code;
+
+ wrong:
+ if (c >= 0)
+ bungetc(b);
+ return repl;
+}
+
+int
+bget_utf8_32_slow(struct fastbuf *b, uns repl)
+{
+ int c = bgetc(b);
+ int code;
+ int nr;
+
+ if (c < 0x80) /* Includes EOF */
+ return c;
+ if (c < 0xc0) /* Incorrect combination */
+ return repl;
+ if (c < 0xe0)
+ {
+ code = c & 0x1f;
+ nr = 1;
+ }
+ else if (c < 0xf0)
+ {
+ code = c & 0x0f;
+ nr = 2;
+ }
+ else if (c < 0xf8)
+ {
+ code = c & 0x07;
+ nr = 3;
+ }
+ else if (c < 0xfc)
+ {
+ code = c & 0x03;
+ nr = 4;
+ }
+ else if (c < 0xfe)
+ {
+ code = c & 0x01;
+ nr = 5;
+ }
+ else /* Too large, skip it */
+ {
+ while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+ ;
+ goto wrong;
+ }
+ while (nr-- > 0)
+ {
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ return code;
+
+ wrong:
+ if (c >= 0)
+ bungetc(b);
+ return repl;
+}
+
+void
+bput_utf8_slow(struct fastbuf *b, uns u)
+{
+ ASSERT(u < 65536);
+ if (u < 0x80)
+ bputc(b, u);
+ else
+ {
+ if (u < 0x800)
+ bputc(b, 0xc0 | (u >> 6));
+ else
+ {
+ bputc(b, 0xe0 | (u >> 12));
+ bputc(b, 0x80 | ((u >> 6) & 0x3f));
+ }
+ bputc(b, 0x80 | (u & 0x3f));
+ }
+}
+
+void
+bput_utf8_32_slow(struct fastbuf *b, uns u)
+{
+ ASSERT(u < (1U<<31));
+ if (u < 0x80)
+ bputc(b, u);
+ else
+ {
+ if (u < 0x800)
+ bputc(b, 0xc0 | (u >> 6));
+ else
+ {
+ if (u < (1<<16))
+ bputc(b, 0xe0 | (u >> 12));
+ else
+ {
+ if (u < (1<<21))
+ bputc(b, 0xf0 | (u >> 18));
+ else
+ {
+ if (u < (1<<26))
+ bputc(b, 0xf8 | (u >> 24));
+ else
+ {
+ bputc(b, 0xfc | (u >> 30));
+ bputc(b, 0x80 | ((u >> 24) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 18) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 12) & 0x3f));
+ }
+ bputc(b, 0x80 | ((u >> 6) & 0x3f));
+ }
+ bputc(b, 0x80 | (u & 0x3f));
+ }
+}
+
+/*** UTF-16 ***/
+
+int
+bget_utf16_be_slow(struct fastbuf *b, uns repl)
+{
+ if (bpeekc(b) < 0)
+ return -1;
+ uns u = bgetw_be(b), x, y;
+ if ((int)u < 0)
+ return repl;
+ if ((x = u - 0xd800) >= 0x800)
+ return u;
+ if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
+ return repl;
+ return 0x10000 + (x << 10) + y;
+}
+
+int
+bget_utf16_le_slow(struct fastbuf *b, uns repl)
+{
+ if (bpeekc(b) < 0)
+ return -1;
+ uns u = bgetw_le(b), x, y;
+ if ((int)u < 0)
+ return repl;
+ if ((x = u - 0xd800) >= 0x800)
+ return u;
+ if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
+ return repl;
+ return 0x10000 + (x << 10) + y;
+}
+
+void
+bput_utf16_be_slow(struct fastbuf *b, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ bputc(b, u >> 8);
+ bputc(b, u & 0xff);
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ bputc(b, 0xd8 | (u >> 18));
+ bputc(b, (u >> 10) & 0xff);
+ bputc(b, 0xdc | ((u >> 8) & 0x3));
+ bputc(b, u & 0xff);
+ }
+ else
+ ASSERT(0);
+}
+
+void
+bput_utf16_le_slow(struct fastbuf *b, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ bputc(b, u & 0xff);
+ bputc(b, u >> 8);
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ bputc(b, (u >> 10) & 0xff);
+ bputc(b, 0xd8 | (u >> 18));
+ bputc(b, u & 0xff);
+ bputc(b, 0xdc | ((u >> 8) & 0x3));
+ }
+ else
+ ASSERT(0);
+}
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+#define FUNCS \
+ F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
+ F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
+
+ enum {
+#define F(x) FUNC_##x,
+ FUNCS
+#undef F
+ };
+ char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+ FUNCS
+#undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func)
+ {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ struct fastbuf *b = fbgrow_create(8);
+ if (func < FUNC_BPUT_UTF8)
+ {
+ uns u;
+ while (scanf("%x", &u) == 1)
+ bputc(b, u);
+ fbgrow_rewind(b);
+ while (bpeekc(b) >= 0)
+ {
+ if (btell(b))
+ putchar(' ');
+ switch (func)
+ {
+ case FUNC_BGET_UTF8:
+ u = bget_utf8_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF8_32:
+ u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF16_BE:
+ u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
+ break;
+ case FUNC_BGET_UTF16_LE:
+ u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
+ break;
+ default:
+ ASSERT(0);
+ }
+ printf("%04x", u);
+ }
+ putchar('\n');
+ }
+ else
+ {
+ uns u, i = 0;
+ while (scanf("%x", &u) == 1)
+ {
+ switch (func)
+ {
+ case FUNC_BPUT_UTF8:
+ bput_utf8_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF8_32:
+ bput_utf8_32_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF16_BE:
+ bput_utf16_be_slow(b, u);
+ break;
+ case FUNC_BPUT_UTF16_LE:
+ bput_utf16_le_slow(b, u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ fbgrow_rewind(b);
+ u = 0;
+ while (bpeekc(b) >= 0)
+ {
+ if (i++)
+ putchar(' ');
+ printf("%02x", bgetc(b));
+ }
+ fbgrow_reset(b);
+ }
+ putchar('\n');
+ }
+ bclose(b);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library: Reading and writing of UTF-8 and UTF-16 on Fastbuf Streams
+ *
+ * (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_UNICODE_H
+#define _UCW_FF_UNICODE_H
+
+#include "ucw/fastbuf.h"
+#include "ucw/unicode.h"
+
+/* ** UTF-8 ** */
+
+int bget_utf8_slow(struct fastbuf *b, uns repl);
+int bget_utf8_32_slow(struct fastbuf *b, uns repl);
+void bput_utf8_slow(struct fastbuf *b, uns u);
+void bput_utf8_32_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf8_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 3)
+ {
+ b->bptr = utf8_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf8_slow(b, repl);
+}
+
+static inline int
+bget_utf8_32_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 6)
+ {
+ b->bptr = utf8_32_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf8_32_slow(b, repl);
+}
+
+static inline int bget_utf8(struct fastbuf *b) /** Read a single utf8 character from range [0, 0xffff]. **/
+{
+ return bget_utf8_repl(b, UNI_REPLACEMENT);
+}
+
+static inline int bget_utf8_32(struct fastbuf *b) /** Read a single utf8 character (from the whole unicode range). **/
+{
+ return bget_utf8_32_repl(b, UNI_REPLACEMENT);
+}
+
+static inline void bput_utf8(struct fastbuf *b, uns u) /** Write a single utf8 character from range [0, 0xffff]. **/
+{
+ if (bavailw(b) >= 3)
+ b->bptr = utf8_put(b->bptr, u);
+ else
+ bput_utf8_slow(b, u);
+}
+
+static inline void bput_utf8_32(struct fastbuf *b, uns u) /** Write a single utf8 character (from the whole unicode range). **/
+{
+ if (bavailw(b) >= 6)
+ b->bptr = utf8_32_put(b->bptr, u);
+ else
+ bput_utf8_32_slow(b, u);
+}
+
+/* ** UTF-16 ** */
+
+int bget_utf16_be_slow(struct fastbuf *b, uns repl);
+int bget_utf16_le_slow(struct fastbuf *b, uns repl);
+void bput_utf16_be_slow(struct fastbuf *b, uns u);
+void bput_utf16_le_slow(struct fastbuf *b, uns u);
+
+static inline int
+bget_utf16_be_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 4)
+ {
+ b->bptr = utf16_be_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf16_be_slow(b, repl);
+}
+
+static inline int
+bget_utf16_le_repl(struct fastbuf *b, uns repl)
+{
+ uns u;
+ if (bavailr(b) >= 4)
+ {
+ b->bptr = utf16_le_get_repl(b->bptr, &u, repl);
+ return u;
+ }
+ else
+ return bget_utf16_le_slow(b, repl);
+}
+
+/**
+ * Read an utf16 character from fastbuf.
+ * Big endian version.
+ **/
+static inline int bget_utf16_be(struct fastbuf *b)
+{
+ return bget_utf16_be_repl(b, UNI_REPLACEMENT);
+}
+
+/**
+ * Read an utf16 character from fastbuf.
+ * Little endian version.
+ **/
+static inline int bget_utf16_le(struct fastbuf *b)
+{
+ return bget_utf16_le_repl(b, UNI_REPLACEMENT);
+}
+
+/**
+ * Write an utf16 character to fastbuf.
+ * Big endian version.
+ **/
+static inline void bput_utf16_be(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 4)
+ b->bptr = utf16_be_put(b->bptr, u);
+ else
+ bput_utf16_be_slow(b, u);
+}
+
+/**
+ * Write an utf16 character to fastbuf.
+ * Little endian version.
+ **/
+static inline void bput_utf16_le(struct fastbuf *b, uns u)
+{
+ if (bavailw(b) >= 4)
+ b->bptr = utf16_le_put(b->bptr, u);
+ else
+ bput_utf16_le_slow(b, u);
+}
+
+#endif
--- /dev/null
+# Tests for the Unicode module
+
+Name: bput_utf8
+Run: ../obj/ucw/ff-unicode-t bput_utf8
+In: 0041 0048 004f 004a
+Out: 41 48 4f 4a
+
+Name: bget_utf8_32
+Run: ../obj/ucw/ff-unicode-t bget_utf8_32
+In: fe 83 81
+Out: fffc
+
+Name: bput_utf16_be
+Run: ../obj/ucw/ff-unicode-t bput_utf16_be
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name: bput_utf16_le
+Run: ../obj/ucw/ff-unicode-t bput_utf16_le
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name: bget_utf16_be (1)
+Run: ../obj/ucw/ff-unicode-t bget_utf16_be
+In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: bget_utf16_be (2)
+Run: ../obj/ucw/ff-unicode-t bget_utf16_be
+In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out: fffc 2a5f fffc 2a5f fffc
+
+Name: bget_utf16_le (1)
+Run: ../obj/ucw/ff-unicode-t bget_utf16_le
+In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: bget_utf16_le (2)
+Run: ../obj/ucw/ff-unicode-t bget_utf16_le
+In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out: fffc 2a5f fffc 2a5f fffc
--- /dev/null
+/*
+ * UCW Library -- A simple growing buffer
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2005, Martin Mares <mj@ucw.cz>
+ *
+ * Define the following macros:
+ *
+ * GBUF_TYPE data type of records stored in the buffer
+ * GBUF_PREFIX(x) add a name prefix to all global symbols
+ * GBUF_TRACE(msg...) log growing of buffer [optional]
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/**
+ * Type identifier of the buffer.
+ * The macro is not available outside the header file,
+ * but it is used in the definitions of functions.
+ **/
+#define BUF_T GBUF_PREFIX(t)
+
+/**
+ * The growing buffer.
+ * `ptr` holds the memory and `len` is the current
+ * length of available memory.
+ **/
+typedef struct BUF_T {
+ uns len;
+ GBUF_TYPE *ptr;
+} BUF_T;
+
+/**
+ * Initializes an empty growing buffer in @b.
+ **/
+static inline void GBUF_PREFIX(init)(BUF_T *b)
+{
+ b->ptr = NULL;
+ b->len = 0;
+}
+
+/**
+ * Frees all memory in the buffer and returns it
+ * to an empty state.
+ **/
+static void UNUSED GBUF_PREFIX(done)(BUF_T *b)
+{
+ if (b->ptr)
+ xfree(b->ptr);
+ b->ptr = NULL;
+ b->len = 0;
+}
+
+/**
+ * Sets the length of the buffer @b to exactly @len.
+ * Do not use for the growing (you can use this at the end,
+ * when you know the exact size), it would be slow.
+ *
+ * Use <<fun__GENERIC_LINK_|GBUF_PREFIX|grow|,`GBUF_PREFIX(grow)()`>>
+ * for growing.
+ **/
+static void UNUSED GBUF_PREFIX(set_size)(BUF_T *b, uns len)
+{
+ b->len = len;
+ b->ptr = xrealloc(b->ptr, len * sizeof(GBUF_TYPE));
+#ifdef GBUF_TRACE
+ GBUF_TRACE(STRINGIFY_EXPANDED(BUF_T) " growing to %u items", len);
+#endif
+}
+
+static void UNUSED GBUF_PREFIX(do_grow)(BUF_T *b, uns len)
+{
+ if (len < 2*b->len) // to ensure logarithmic cost
+ len = 2*b->len;
+ GBUF_PREFIX(set_size)(b, len);
+}
+
+/**
+ * Sets the size of the buffer @b to at last @len.
+ * It grows in exponential manner, to ensure the total cost
+ * of reallocs is linear with the final size.
+ *
+ * You can tweak the final size (when you do not need to grow
+ * any more) by
+ * <<fun__GENERIC_LINK_|GBUF_PREFIX|set_size|,`GBUF_PREFIX(set_size)()`>>.
+ **/
+static inline GBUF_TYPE *GBUF_PREFIX(grow)(BUF_T *b, uns len)
+{
+ if (unlikely(len > b->len))
+ GBUF_PREFIX(do_grow)(b, len);
+ return b->ptr;
+}
+
+#undef GBUF_TYPE
+#undef GBUF_PREFIX
+#undef GBUF_TRACE
+#undef BUF_T
--- /dev/null
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+
+void
+reset_getopt(void)
+{
+ // Should work on GNU libc
+ optind = 0;
+}
+
+#ifdef TEST
+#include <stdio.h>
+
+static void
+parse(int argc, char **argv)
+{
+ static struct option longopts[] = {
+ { "longa", 0, 0, 'a' },
+ { "longb", 0, 0, 'b' },
+ { "longc", 1, 0, 'c' },
+ { "longd", 1, 0, 'd' },
+ { 0, 0, 0, 0 }
+ };
+ int opt;
+ while ((opt = getopt_long(argc, argv, "abc:d:", longopts, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'a':
+ case 'b':
+ printf("option %c\n", opt);
+ break;
+ case 'c':
+ case 'd':
+ printf("option %c with value `%s'\n", opt, optarg);
+ break;
+ case '?':
+ printf("unknown option\n");
+ break;
+ default:
+ printf("getopt returned unexpected char 0x%02x\n", opt);
+ break;
+ }
+ if (optind != argc)
+ printf("%d nonoption arguments\n", argc - optind);
+}
+
+int
+main(int argc, char **argv)
+{
+ opterr = 0;
+ parse(argc, argv);
+ printf("reset\n");
+ reset_getopt();
+ parse(argc, argv);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * UCW Library -- Parsing of configuration and command-line options
+ *
+ * (c) 2001--2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_GETOPT_H
+#define _UCW_GETOPT_H
+
+#ifdef CONFIG_OWN_GETOPT
+#include "ucw/getopt/getopt-sh.h"
+#else
+#include <getopt.h>
+#endif
+
+void reset_getopt(void); /** If you want to start parsing of the arguments from the first one again. **/
+
+/***
+ * [[conf_load]]
+ * Safe configuration loading
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * These functions can be used to to safely load or reload configuration.
+ */
+
+/**
+ * The default config (DEFAULT_CONFIG config option) or NULL if already loaded.
+ * You can set it to something else manually.
+ */
+extern char *cf_def_file;
+/**
+ * Name of environment variable that can override what configuration
+ * is loaded.
+ **/
+extern char *cf_env_file;
+int cf_reload(const char *file); /** Reload configuration from @file, replace the old one. **/
+int cf_load(const char *file); /** Load configuration from @file. If @file is NULL, reload all loaded configuration files. **/
+/**
+ * Parse some part of configuration passed in @string.
+ * The syntax is the same as in the <<config:,configuration file>>.
+ **/
+int cf_set(const char *string);
+
+/***
+ * [[conf_direct]]
+ * Direct access
+ * ~~~~~~~~~~~~~
+ *
+ * Direct access to configuration items.
+ * You probably should not need this.
+ ***/
+
+/**
+ * List of operations used on items.
+ * This macro is used to generate internal source code,
+ * but you may be interested in the list of operations it creates.
+ *
+ * Each operation corresponds to the same-named operation
+ * described in <<config:operations,configuration syntax>>.
+ **/
+#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \
+ T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY) T(RESET)
+ /* Closing brace finishes previous block.
+ * Basic attributes (static, dynamic, parsed) can be used with SET.
+ * Dynamic arrays can be used with SET, APPEND, PREPEND.
+ * Sections can be used with SET.
+ * Lists can be used with everything. */
+#define T(x) OP_##x,
+enum cf_operation { CF_OPERATIONS }; /** Allowed operations on items. See <<def_CF_OPERATIONS,`CF_OPERATIONS`>> for list (they have an `OP_` prefix -- it means you use `OP_SET` instead of just `SET`). **/
+#undef T
+
+struct cf_item;
+/**
+ * Searches for a configuration item called @name.
+ * If it is found, it is copied into @item and NULL is returned.
+ * Otherwise, an error is returned and @item is zeroed.
+ **/
+char *cf_find_item(const char *name, struct cf_item *item);
+/**
+ * Performs a single operation on a given item.
+ **/
+char *cf_modify_item(struct cf_item *item, enum cf_operation op, int number, char **pars);
+
+/***
+ * [[conf_dump]]
+ * Debug dumping
+ * ~~~~~~~~~~~~~
+ ***/
+
+struct fastbuf;
+/**
+ * Take everything and write it into @fb.
+ **/
+void cf_dump_sections(struct fastbuf *fb);
+
+/***
+ * [[conf_journal]]
+ * Journaling control
+ * ~~~~~~~~~~~~~~~~~~
+ *
+ * The configuration system uses journaling to safely reload
+ * configuration. It begins a transaction and tries to load the
+ * configuration. If it fails, it restores the original state.
+ *
+ * The behaviour of journal is described in <<reload,reloading configuration>>.
+ ***/
+
+struct cf_journal_item; /** Opaque identifier of the journal state. **/
+/**
+ * Starts a new transaction. It returns the current state so you can
+ * get back to it. The @new_pool parameter tells if a new memory pool
+ * should be created and used from now.
+ **/
+struct cf_journal_item *cf_journal_new_transaction(uns new_pool);
+/**
+ * Marks current state as a complete transaction. The @new_pool
+ * parameter tells if the transaction was created with new memory pool
+ * (the parameter must be the same as the one with
+ * @cf_journal_new_transaction() was called with). The @oldj parameter
+ * is the journal state returned from last
+ * @cf_journal_new_transaction() call.
+ **/
+void cf_journal_commit_transaction(uns new_pool, struct cf_journal_item *oldj);
+/**
+ * Returns to an old journal state, reverting anything the current
+ * transaction did. The @new_pool parameter must be the same as the
+ * one you used when you created the transaction. The @oldj parameter
+ * is the journal state you got from @cf_journal_new_transaction() --
+ * it is the state to return to.
+ **/
+void cf_journal_rollback_transaction(uns new_pool, struct cf_journal_item *oldj);
+
+/***
+ * [[conf_getopt]]
+ * Loading by @cf_getopt()
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ ***/
+
+/**
+ * Short options for loading configuration by @cf_getopt().
+ * Prepend to your own options.
+ **/
+#define CF_SHORT_OPTS "C:S:"
+/**
+ * Long options for loading configuration by @cf_getopt().
+ * Prepend to your own options.
+ **/
+#define CF_LONG_OPTS {"config", 1, 0, 'C'}, {"set", 1, 0, 'S'}, CF_LONG_OPTS_DEBUG
+/**
+ * Use this constant as @long_opts parameter of @cf_getopt() if you do
+ * not have any long options in your program.
+ **/
+#define CF_NO_LONG_OPTS (const struct option []) { CF_LONG_OPTS { NULL, 0, 0, 0 } }
+#ifndef CF_USAGE_TAB
+#define CF_USAGE_TAB ""
+#endif
+/**
+ * This macro provides text describing usage of the configuration
+ * loading options. Concatenate with description of your options and
+ * write to the user, if he/she provides invalid options.
+ **/
+#define CF_USAGE \
+"-C, --config filename\t" CF_USAGE_TAB "Override the default configuration file\n\
+-S, --set sec.item=val\t" CF_USAGE_TAB "Manual setting of a configuration item\n" CF_USAGE_DEBUG
+
+#ifdef CONFIG_DEBUG
+#define CF_LONG_OPTS_DEBUG { "dumpconfig", 0, 0, 0x64436667 } ,
+#define CF_USAGE_DEBUG " --dumpconfig\t" CF_USAGE_TAB "Dump program configuration\n"
+#else
+#define CF_LONG_OPTS_DEBUG
+#define CF_USAGE_DEBUG
+#endif
+
+/**
+ * Takes care of parsing the command-line arguments, loading the
+ * default configuration file (<<var_cf_def_file,`cf_def_file`>>) and processing
+ * configuration options. The calling convention is the same as with GNU getopt_long(),
+ * but you must prefix your own short/long options by the
+ * <<def_CF_LONG_OPTS,`CF_LONG_OPTS`>> or <<def_CF_SHORT_OPTS,`CF_SHORT_OPTS`>> or
+ * pass <<def_CF_NO_LONG_OPTS,`CF_NO_LONG_OPTS`>> if there are no long options.
+ *
+ * The default configuration file can be overwritten by the --config options,
+ * which must come first. During parsing of all other options, the configuration
+ * is already available.
+ **/
+int cf_getopt(int argc, char * const argv[], const char *short_opts, const struct option *long_opts, int *long_index);
+
+#endif
--- /dev/null
+# Tests for getopt
+
+Run: ../obj/ucw/getopt-t -a -b --longc 2819 -d -a 1 2 3
+Out: option a
+ option b
+ option c with value `2819'
+ option d with value `-a'
+ 3 nonoption arguments
+ reset
+ option a
+ option b
+ option c with value `2819'
+ option d with value `-a'
+ 3 nonoption arguments
+
+Run: ../obj/ucw/getopt-t -a -x
+Out: option a
+ unknown option
+ reset
+ option a
+ unknown option
--- /dev/null
+# Makefile for the UCW GetOpt Library (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+DIRS+=ucw/getopt
+
+LIBUCW_MODS+=getopt/getopt-sh
--- /dev/null
+This directory contains getopt routines from the GNU libc 2.5.
+We need this as a fallback for our reset_getopt(), because there is
+no standardized interface for such instruction.
+
+They are distributed under the GNU LGPL.
+
+All files are exact copies of the original distribution with very
+few exceptions commented with `// SHERLOCK' prefix.
+I only provided my own getopt-sh.c, getopt-sh.h and Makefile.
+
+ Pavel Charvat, 2007
+
--- /dev/null
+#include "getopt-sh.h"
+#include "getopt_int.h"
+#include "getopt.c"
+#include "getopt1.c"
--- /dev/null
+#ifndef _UCW_GETOPT_GETOPT_SH_H
+#define _UCW_GETOPT_GETOPT_SH_H
+
+#define getopt ucw_getopt
+#define getopt_long ucw_getopt_long
+#define getopt_long_only ucw_getopt_longonly
+#define optarg ucw_optarg
+#define optind ucw_optind
+#define opterr ucw_opterr
+#define optopt ucw_optopt
+
+#include "ucw/getopt/getopt.h"
+
+#endif
--- /dev/null
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to drepper@gnu.org
+ before changing it!
+ Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+\f
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//# define ELIDE_CODE // SHERLOCK: disabled
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+# include <stdlib.h>
+# include <unistd.h>
+#endif /* GNU C library. */
+
+#include <string.h>
+
+#ifdef VMS
+# include <unixlib.h>
+#endif
+
+#ifdef _LIBC
+# include <libintl.h>
+#else
+//# include "gettext.h" // SHERLOCK: replaced by <libintl.h>
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#endif
+
+#if defined _LIBC && defined USE_IN_LIBIO
+# include <wchar.h>
+#endif
+
+#ifndef attribute_hidden
+# define attribute_hidden
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+#include "getopt_int.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Keep a global copy of all internal members of getopt_data. */
+
+static struct _getopt_data getopt_data;
+
+\f
+#ifndef __GNU_LIBRARY__
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+#endif /* not __GNU_LIBRARY__ */
+\f
+#ifdef _LIBC
+/* Stored original parameters.
+ XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+extern int __libc_argc;
+extern char **__libc_argv;
+
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+# ifdef USE_NONOPTION_FLAGS
+/* Defined in getopt_init.c */
+extern char *__getopt_nonoption_flags;
+# endif
+
+# ifdef USE_NONOPTION_FLAGS
+# define SWAP_FLAGS(ch1, ch2) \
+ if (d->__nonoption_flags_len > 0) \
+ { \
+ char __tmp = __getopt_nonoption_flags[ch1]; \
+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
+ __getopt_nonoption_flags[ch2] = __tmp; \
+ }
+# else
+# define SWAP_FLAGS(ch1, ch2)
+# endif
+#else /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (char **argv, struct _getopt_data *d)
+{
+ int bottom = d->__first_nonopt;
+ int middle = d->__last_nonopt;
+ int top = d->optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ /* First make sure the handling of the `__getopt_nonoption_flags'
+ string can work normally. Our top argument must be in the range
+ of the string. */
+ if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
+ {
+ /* We must extend the array. The user plays games with us and
+ presents new arguments. */
+ char *new_str = malloc (top + 1);
+ if (new_str == NULL)
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
+ else
+ {
+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
+ d->__nonoption_flags_max_len),
+ '\0', top + 1 - d->__nonoption_flags_max_len);
+ d->__nonoption_flags_max_len = top + 1;
+ __getopt_nonoption_flags = new_str;
+ }
+ }
+#endif
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ SWAP_FLAGS (bottom + i, middle + i);
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ d->__first_nonopt += (d->optind - d->__last_nonopt);
+ d->__last_nonopt = d->optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (int argc, char *const *argv, const char *optstring,
+ struct _getopt_data *d)
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ d->__first_nonopt = d->__last_nonopt = d->optind;
+
+ d->__nextchar = NULL;
+
+ d->__posixly_correct = !!getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ d->__ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ d->__ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (d->__posixly_correct)
+ d->__ordering = REQUIRE_ORDER;
+ else
+ d->__ordering = PERMUTE;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ if (!d->__posixly_correct
+ && argc == __libc_argc && argv == __libc_argv)
+ {
+ if (d->__nonoption_flags_max_len == 0)
+ {
+ if (__getopt_nonoption_flags == NULL
+ || __getopt_nonoption_flags[0] == '\0')
+ d->__nonoption_flags_max_len = -1;
+ else
+ {
+ const char *orig_str = __getopt_nonoption_flags;
+ int len = d->__nonoption_flags_max_len = strlen (orig_str);
+ if (d->__nonoption_flags_max_len < argc)
+ d->__nonoption_flags_max_len = argc;
+ __getopt_nonoption_flags =
+ (char *) malloc (d->__nonoption_flags_max_len);
+ if (__getopt_nonoption_flags == NULL)
+ d->__nonoption_flags_max_len = -1;
+ else
+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+ '\0', d->__nonoption_flags_max_len - len);
+ }
+ }
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len;
+ }
+ else
+ d->__nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+\f
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal_r (int argc, char *const *argv, const char *optstring,
+ const struct option *longopts, int *longind,
+ int long_only, struct _getopt_data *d)
+{
+ int print_errors = d->opterr;
+ if (optstring[0] == ':')
+ print_errors = 0;
+
+ if (argc < 1)
+ return -1;
+
+ d->optarg = NULL;
+
+ if (d->optind == 0 || !d->__initialized)
+ {
+ if (d->optind == 0)
+ d->optind = 1; /* Don't scan ARGV[0], the program name. */
+ optstring = _getopt_initialize (argc, argv, optstring, d);
+ d->__initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
+ || (d->optind < d->__nonoption_flags_len \
+ && __getopt_nonoption_flags[d->optind] == '1'))
+#else
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
+#endif
+
+ if (d->__nextchar == NULL || *d->__nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (d->__last_nonopt > d->optind)
+ d->__last_nonopt = d->optind;
+ if (d->__first_nonopt > d->optind)
+ d->__first_nonopt = d->optind;
+
+ if (d->__ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__last_nonopt != d->optind)
+ d->__first_nonopt = d->optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (d->optind < argc && NONOPTION_P)
+ d->optind++;
+ d->__last_nonopt = d->optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (d->optind != argc && !strcmp (argv[d->optind], "--"))
+ {
+ d->optind++;
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__first_nonopt == d->__last_nonopt)
+ d->__first_nonopt = d->optind;
+ d->__last_nonopt = argc;
+
+ d->optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (d->optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (d->__first_nonopt != d->__last_nonopt)
+ d->optind = d->__first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (d->__ordering == REQUIRE_ORDER)
+ return -1;
+ d->optarg = argv[d->optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ d->__nextchar = (argv[d->optind] + 1
+ + (longopts != NULL && argv[d->optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[d->optind][1] == '-'
+ || (long_only && (argv[d->optind][2]
+ || !strchr (optstring, argv[d->optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else if (long_only
+ || pfound->has_arg != p->has_arg
+ || pfound->flag != p->flag
+ || pfound->val != p->val)
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ d->optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind - 1][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+
+ d->optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[d->optind][1] == '-'
+ || strchr (optstring, *d->__nextchar) == NULL)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->__nextchar = (char *) "";
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *d->__nextchar++;
+ char *temp = strchr (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*d->__nextchar == '\0')
+ ++d->optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (d->__posixly_correct)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
+#endif
+ }
+ else
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `d->optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
+ nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ d->__nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ d->optind++;
+ }
+ else
+ d->optarg = NULL;
+ d->__nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+ d->__nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+_getopt_internal (int argc, char *const *argv, const char *optstring,
+ const struct option *longopts, int *longind, int long_only)
+{
+ int result;
+
+ getopt_data.optind = optind;
+ getopt_data.opterr = opterr;
+
+ result = _getopt_internal_r (argc, argv, optstring, longopts,
+ longind, long_only, &getopt_data);
+
+ optind = getopt_data.optind;
+ optarg = getopt_data.optarg;
+ optopt = getopt_data.optopt;
+
+ return result;
+}
+
+int
+getopt (int argc, char *const *argv, const char *optstring)
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* Not ELIDE_CODE. */
+\f
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
--- /dev/null
+/* Declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+ standalone, or this is the first header included in the source file.
+ If we are being used with glibc, we need to include <features.h>, but
+ that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
+ not defined, include <ctype.h>, which will pull in <features.h> for us
+ if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
+ doesn't flood the namespace with stuff the way some other headers do.) */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifndef __THROW
+# ifndef __GNUC_PREREQ
+# define __GNUC_PREREQ(maj, min) (0)
+# endif
+# if defined __cplusplus && __GNUC_PREREQ (2,8)
+# define __THROW throw ()
+# else
+# define __THROW
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+ const char *name;
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+# define no_argument 0
+# define required_argument 1
+# define optional_argument 2
+#endif /* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+ arguments in ARGV (ARGC of them, minus the program name) for
+ options given in OPTS.
+
+ Return the option character from OPTS just read. Return -1 when
+ there are no more options. For unrecognized options, or options
+ missing arguments, `optopt' is set to the option letter, and '?' is
+ returned.
+
+ The OPTS string is a list of characters which are recognized option
+ letters, optionally followed by colons, specifying that that letter
+ takes an argument, to be placed in `optarg'.
+
+ If a letter in OPTS is followed by two colons, its argument is
+ optional. This behavior is specific to the GNU `getopt'.
+
+ The argument `--' causes premature termination of argument
+ scanning, explicitly telling `getopt' that there are no more
+ options.
+
+ If OPTS begins with `--', then non-option arguments are treated as
+ arguments to the option '\0'. This behavior is specific to the GNU
+ `getopt'. */
+
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
+ __THROW;
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+
+#ifndef __need_getopt
+extern int getopt_long (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+extern int getopt_long_only (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations. */
+#undef __need_getopt
+
+#endif /* getopt.h */
--- /dev/null
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+\f
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _LIBC
+# include <getopt.h>
+#else
+# include "getopt.h"
+#endif
+#include "getopt_int.h"
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+//#define ELIDE_CODE // SHERLOCK: disabled
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+int
+_getopt_long_r (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 0, d);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+int
+_getopt_long_only_r (int argc, char *const *argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 1, d);
+}
+
+#endif /* Not ELIDE_CODE. */
+\f
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
--- /dev/null
+/* Perform additional initialization for getopt functions in GNU libc.
+ Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef USE_NONOPTION_FLAGS
+/* Attention: this file is *not* necessary when the GNU getopt functions
+ are used outside the GNU libc. Some additional functionality of the
+ getopt functions in GNU libc require this additional work. */
+
+#include <getopt.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <stdio-common/_itoa.h>
+
+/* Variable to synchronize work. */
+char *__getopt_nonoption_flags;
+
+
+/* Remove the environment variable "_<PID>_GNU_nonoption_argv_flags_" if
+ it is still available. If the getopt functions are also used in the
+ application it does not exist anymore since it was saved for the use
+ in getopt. */
+void
+__getopt_clean_environment (char **env)
+{
+ /* Bash 2.0 puts a special variable in the environment for each
+ command it runs, specifying which ARGV elements are the results
+ of file name wildcard expansion and therefore should not be
+ considered as options. */
+ static const char envvar_tail[] = "_GNU_nonoption_argv_flags_=";
+ char var[50];
+ char *cp, **ep;
+ size_t len;
+
+ /* Construct the "_<PID>_GNU_nonoption_argv_flags_=" string. We must
+ not use `sprintf'. */
+ cp = memcpy (&var[sizeof (var) - sizeof (envvar_tail)], envvar_tail,
+ sizeof (envvar_tail));
+ cp = _itoa_word (__getpid (), cp, 10, 0);
+ /* Note: we omit adding the leading '_' since we explicitly test for
+ it before calling strncmp. */
+ len = (var + sizeof (var) - 1) - cp;
+
+ for (ep = env; *ep != NULL; ++ep)
+ if ((*ep)[0] == '_'
+ && __builtin_expect (strncmp (*ep + 1, cp, len) == 0, 0))
+ {
+ /* Found it. Store this pointer and move later ones back. */
+ char **dp = ep;
+ __getopt_nonoption_flags = &(*ep)[len];
+ do
+ dp[0] = dp[1];
+ while (*dp++);
+ /* Continue the loop in case the name appears again. */
+ }
+}
+#endif /* USE_NONOPTION_FLAGS */
--- /dev/null
+/* Internal declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _GETOPT_INT_H
+#define _GETOPT_INT_H 1
+
+extern int _getopt_internal (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only);
+
+\f
+/* Reentrant versions which can handle parsing multiple argument
+ vectors at the same time. */
+
+/* Data type for reentrant functions. */
+struct _getopt_data
+{
+ /* These have exactly the same meaning as the corresponding global
+ variables, except that they are used for the reentrant
+ versions of getopt. */
+ int optind;
+ int opterr;
+ int optopt;
+ char *optarg;
+
+ /* Internal members. */
+
+ /* True if the internal members have been initialized. */
+ int __initialized;
+
+ /* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+ char *__nextchar;
+
+ /* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we
+ scan, so that eventually all the non-options are at the end.
+ This allows options to be given in any order, even with programs
+ that were not written to expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were
+ written to expect options and other ARGV-elements in any order
+ and that care about the ordering of the two. We describe each
+ non-option ARGV-element as if it were the argument of an option
+ with character code 1. Using `-' as the first character of the
+ list of option characters selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+ enum
+ {
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+ } __ordering;
+
+ /* If the POSIXLY_CORRECT environment variable is set. */
+ int __posixly_correct;
+
+
+ /* Handle permutation of arguments. */
+
+ /* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first
+ of them; `last_nonopt' is the index after the last of them. */
+
+ int __first_nonopt;
+ int __last_nonopt;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ int __nonoption_flags_max_len;
+ int __nonoption_flags_len;
+# endif
+};
+
+/* The initializer is necessary to set OPTIND and OPTERR to their
+ default values and to clear the initialization flag. */
+#define _GETOPT_DATA_INITIALIZER { 1, 1 }
+
+extern int _getopt_internal_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only, struct _getopt_data *__data);
+
+extern int _getopt_long_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ struct _getopt_data *__data);
+
+extern int _getopt_long_only_r (int ___argc, char *const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts,
+ int *__longind,
+ struct _getopt_data *__data);
+
+#endif /* getopt_int.h */
--- /dev/null
+/* Tests for hash table routines */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* TEST 1: integers */
+
+struct node1 {
+ int key;
+ int data;
+};
+
+#define HASH_NODE struct node1
+#define HASH_PREFIX(x) test1_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_ZERO_FILL
+
+#define HASH_GIVE_INIT_DATA
+static inline void test1_init_data(struct node1 *n)
+{
+ n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_REMOVE
+
+#include "ucw/hashtable.h"
+
+static void test1(void)
+{
+ int i;
+
+ test1_init();
+ for (i=0; i<1024; i++)
+ {
+ struct node1 *n = test1_lookup(i);
+ ASSERT(n->data == i+123);
+ }
+ for (i=1; i<1024; i+=2)
+ {
+ struct node1 *n = test1_lookup(i);
+ test1_remove(n);
+ }
+ for (i=0; i<1024; i++)
+ {
+ struct node1 *n = test1_find(i);
+ if (!n != (i&1) || (n && n->data != i+123))
+ die("Inconsistency at i=%d", i);
+ }
+ i=0;
+ HASH_FOR_ALL(test1, n)
+ {
+ i += 1 + n->key;
+ }
+ HASH_END_FOR;
+ ASSERT(i == 262144);
+ puts("OK");
+}
+
+/* TEST 2: external strings */
+
+struct node2 {
+ char *key;
+ int data;
+};
+
+#define HASH_NODE struct node2
+#define HASH_PREFIX(x) test2_##x
+#define HASH_KEY_STRING key
+#define HASH_NOCASE
+#define HASH_AUTO_POOL 4096
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#include "ucw/hashtable.h"
+
+static void test2(void)
+{
+ int i;
+
+ test2_init();
+ for (i=0; i<1024; i+=2)
+ {
+ char x[32];
+ sprintf(x, "abc%d", i);
+ test2_new(xstrdup(x));
+ }
+ for (i=0; i<1024; i++)
+ {
+ char x[32];
+ struct node2 *n;
+ sprintf(x, "ABC%d", i);
+ n = test2_find(x);
+ if (!n != (i&1))
+ die("Inconsistency at i=%d", i);
+ }
+ puts("OK");
+}
+
+/* TEST 3: internal strings + pools */
+
+static struct mempool *pool3;
+
+struct node3 {
+ int data;
+ char key[1];
+};
+
+#define HASH_NODE struct node3
+#define HASH_PREFIX(x) test3_##x
+#define HASH_KEY_ENDSTRING key
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+
+#define HASH_USE_POOL pool3
+
+#include "ucw/hashtable.h"
+
+static void test3(void)
+{
+ int i;
+
+ pool3 = mp_new(16384);
+ test3_init();
+ for (i=0; i<1048576; i+=2)
+ {
+ char x[32];
+ sprintf(x, "abc%d", i);
+ test3_new(x);
+ }
+ for (i=0; i<1048576; i++)
+ {
+ char x[32];
+ struct node3 *n;
+ sprintf(x, "abc%d", i);
+ n = test3_find(x);
+ if (!n != (i&1))
+ die("Inconsistency at i=%d", i);
+ }
+ puts("OK");
+}
+
+/* TEST 4: complex keys */
+
+#include "ucw/hashfunc.h"
+
+struct node4 {
+ int port;
+ int data;
+ char host[1];
+};
+
+#define HASH_NODE struct node4
+#define HASH_PREFIX(x) test4_##x
+#define HASH_KEY_COMPLEX(x) x host, x port
+#define HASH_KEY_DECL char *host, int port
+
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_LOOKUP
+#define HASH_WANT_DELETE
+#define HASH_WANT_REMOVE
+
+#define HASH_GIVE_HASHFN
+static uns test4_hash(char *host, int port)
+{
+ return hash_string_nocase(host) ^ hash_u32(port);
+}
+
+#define HASH_GIVE_EQ
+static inline int test4_eq(char *host1, int port1, char *host2, int port2)
+{
+ return !strcasecmp(host1,host2) && port1 == port2;
+}
+
+#define HASH_GIVE_EXTRA_SIZE
+static inline uns test4_extra_size(char *host, int port UNUSED)
+{
+ return strlen(host);
+}
+
+#define HASH_GIVE_INIT_KEY
+static inline void test4_init_key(struct node4 *n, char *host, int port)
+{
+ strcpy(n->host, host);
+ n->port = port;
+}
+
+#include "ucw/hashtable.h"
+
+static void test4(void)
+{
+ int i;
+ char x[32];
+ struct node4 *n;
+
+ test4_init();
+ for (i=0; i<1024; i++)
+ if ((i % 3) == 0)
+ {
+ sprintf(x, "abc%d", i);
+ n = test4_new(x, i%10);
+ n->data = i;
+ }
+ for (i=0; i<1024; i++)
+ {
+ sprintf(x, "abc%d", i);
+ n = test4_lookup(x, i%10);
+ n->data = i;
+ }
+ for (i=0; i<1024; i++)
+ if (i % 2)
+ {
+ sprintf(x, "aBc%d", i);
+ if ((i % 7) < 3)
+ {
+ n = test4_find(x, i%10);
+ ASSERT(n);
+ test4_remove(n);
+ }
+ else
+ test4_delete(x, i%10);
+ }
+ for (i=0; i<1024; i++)
+ {
+ sprintf(x, "ABC%d", i);
+ n = test4_find(x, i%10);
+ if (!n != (i&1) || (n && n->data != i))
+ die("Inconsistency at i=%d", i);
+ }
+ test4_cleanup();
+ puts("OK");
+}
+
+/* TEST 5: integers again, but this time dynamically */
+
+struct node5 {
+ int key;
+ int data;
+};
+
+#define HASH_NODE struct node5
+#define HASH_PREFIX(x) test5_##x
+#define HASH_KEY_ATOMIC key
+#define HASH_ATOMIC_TYPE int
+#define HASH_TABLE_DYNAMIC
+
+struct test5_table;
+
+#define HASH_GIVE_INIT_DATA
+static inline void test5_init_data(struct test5_table *table UNUSED, struct node5 *n)
+{
+ n->data = n->key + 123;
+}
+
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_DELETE
+
+#include "ucw/hashtable.h"
+
+static void test5(void)
+{
+ int i;
+ struct test5_table tab;
+
+ test5_init(&tab);
+ for (i=0; i<1024; i++)
+ {
+ struct node5 *n = test5_new(&tab, i);
+ ASSERT(n->data == i+123);
+ }
+ for (i=1; i<1024; i+=2)
+ test5_delete(&tab, i);
+ for (i=0; i<1024; i++)
+ {
+ struct node5 *n = test5_find(&tab, i);
+ if (!n != (i&1) || (n && n->data != i+123))
+ die("Inconsistency at i=%d", i);
+ }
+ i=0;
+ HASH_FOR_ALL_DYNAMIC(test5, &tab, n)
+ i += 1 + n->key;
+ HASH_END_FOR;
+ ASSERT(i == 262144);
+ puts("OK");
+}
+
+int
+main(int argc, char **argv)
+{
+ uns m = ~0U;
+ if (argc > 1)
+ {
+ m = 0;
+ for (int i=1; i<argc; i++)
+ m |= 1 << atol(argv[i]);
+ }
+ if (m & (1 << 1))
+ test1();
+ if (m & (1 << 2))
+ test2();
+ if (m & (1 << 3))
+ test3();
+ if (m & (1 << 4))
+ test4();
+ if (m & (1 << 5))
+ test5();
+ return 0;
+}
--- /dev/null
+# Tests for the hash table modules
+
+Run: ../obj/ucw/hash-test 1
+Out: OK
+
+Run: ../obj/ucw/hash-test 2
+Out: OK
+
+Run: ../obj/ucw/hash-test 3
+Out: OK
+
+Run: ../obj/ucw/hash-test 4
+Out: OK
--- /dev/null
+/*
+ * Checking the correctness of str_len() and hash_*() and proving, that
+ * it is faster than the classical version ;-)
+ */
+
+#include "ucw/hashfunc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+/* It will be divided by (10 + strlen()). */
+#define TEST_TIME 1000000
+
+/* The shift of the string according to the alignment. */
+static uns alignment = 0;
+
+static void
+random_string(byte *str, int len)
+{
+ int i;
+ for (i=0; i<len; i++)
+ str[i] = random() % 255 + 1;
+ str[len] = 0;
+}
+
+static uns
+elapsed_time(void)
+{
+ static struct timeval last_tv, tv;
+ uns elapsed;
+ gettimeofday(&tv, NULL);
+ elapsed = (tv.tv_sec - last_tv.tv_sec) * 1000000 + (tv.tv_usec - last_tv.tv_usec);
+ last_tv = tv;
+ return elapsed;
+}
+
+int
+main(int argc, char **argv)
+{
+ byte *strings[] = {
+ "",
+ "a",
+ "aa",
+ "aaa",
+ "aaaa",
+ "aaaaa",
+ "aaaaaa",
+ "aaaaaaa",
+ "aaaaaaaa",
+ "aaaaaaaaa",
+ "aaaaaaaaaa",
+ "AHOJ",
+ "\200aaaa",
+ "\200",
+ "\200\200",
+ "\200\200\200",
+ "\200\200\200\200",
+ "\200\200\200\200\200",
+ "kelapS treboR",
+ "Robert Spalek",
+ "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu",
+ "********************************",
+ "****************************************************************",
+ NULL
+ };
+ int lengths[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 30, 40, 50, 60, 70, 80, 90, 100,
+ 200, 300, 400, 500, 600, 700, 800, 900, 1000,
+ 2000, 4000, 8000, 16000, 32000, 64000,
+ -1
+ };
+ int i;
+ if (argc > 1)
+ alignment = atoi(argv[1]);
+ printf("Alignment set to %d\n", alignment);
+ for (i=0; strings[i]; i++)
+ if (strlen(strings[i]) != str_len(strings[i]))
+ die("Internal str_len() error on string %d", i);
+ printf("%d strings tested OK\n", i);
+ for (i=0; strings[i]; i++)
+ {
+ uns h1, h2;
+ h1 = hash_string(strings[i]);
+ h2 = hash_string_nocase(strings[i]);
+ if (h1 != hash_block(strings[i], str_len(strings[i])))
+ die("Internal hash_string() error on string %d", i);
+ printf("hash %2d = %08x %08x", i, h1, h2);
+ if (h1 == h2)
+ printf(" upper case?");
+ printf("\n");
+ }
+ for (i=0; lengths[i] >= 0; i++)
+ {
+ byte str[lengths[i] + 1 + alignment];
+ uns count = TEST_TIME / (lengths[i] + 10);
+ uns el1 = 0, el2 = 0, elh = 0, elhn = 0;
+ uns tot1 = 0, tot2 = 0, hash = 0, hashn = 0;
+ uns j;
+ for (j=0; j<count; j++)
+ {
+ random_string(str + alignment, lengths[i]);
+ elapsed_time();
+ /* Avoid "optimizing" by gcc, since the functions are
+ * attributed PURE. */
+ tot1 += strlen(str + alignment);
+ el1 += elapsed_time();
+ tot2 += str_len(str + alignment);
+ el2 += elapsed_time();
+ hash ^= hash_string(str + alignment);
+ elh += elapsed_time();
+ hashn ^= hash_string_nocase(str + alignment);
+ elhn += elapsed_time();
+ }
+ if (tot1 != tot2)
+ die("Internal error during test %d", i);
+ printf("Test %d: strlen = %d, passes = %d, classical = %d usec, speedup = %.4f\n",
+ i, lengths[i], count, el1, (el1 + 0.) / el2);
+ printf("\t\t total hash = %08x/%08x, hash time = %d/%d usec\n", hash, hashn, elh, elhn);
+ }
+/*
+ printf("test1: %d\n", hash_modify(10000000, 10000000, 99777555));
+ printf("test1: %d, %d\n", i, hash_modify(i, lengths[i-2], 99777333));
+ printf("test1: %d, %d\n", i, hash_modify(lengths[i-2], i, 99777333));
+ printf("test1: %d,%d,%d->%d\n", i, i*3-2, i*i, hash_modify(4587, i*3-2, i*i));
+ printf("test1: %d\n", hash_modify(lengths[5], 345, i));
+*/
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ * str_len() and hash_*() routines
+ *
+ * It is always at least as fast as the classical strlen() routine and for
+ * strings longer than 100 characters, it is substantially faster.
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/hashfunc.h"
+#include "ucw/chartype.h"
+
+/* The number of bits the hash in the function hash_*() is rotated by after
+ * every pass. It should be prime with the word size. */
+#define SHIFT_BITS 7
+
+/* A bit-mask which clears higher bytes than a given threshold. */
+static uns mask_higher_bits[sizeof(uns)];
+
+static void CONSTRUCTOR
+hashfunc_init(void)
+{
+ uns i, j;
+ byte *str;
+ for (i=0; i<sizeof(uns); i++)
+ {
+ str = (byte *) (mask_higher_bits + i);
+ for (j=0; j<i; j++)
+ str[j] = -1;
+ for (j=i; j<sizeof(uns); j++)
+ str[j] = 0;
+ }
+}
+
+static inline uns CONST
+str_len_uns(uns x)
+{
+ const uns sub = ~0U / 0xff;
+ const uns and = sub * 0x80;
+ uns a, i;
+ byte *bytes;
+ a = ~x & (x - sub) & and;
+ /*
+ * x_2 = x - 0x01010101;
+ * x_3 = ~x & x_2;
+ * a = x_3 & 0x80808080;
+ *
+ * If all bytes of x are nonzero, then the highest bit of each byte of
+ * x_2 is lower or equal to the corresponding bit of x. Hence x_3 has
+ * all these highest bits cleared (the target bit is set iff the source
+ * bit has changed from 0 to 1). If a == 0, then we are sure there is
+ * no zero byte in x.
+ */
+ if (!a)
+ return sizeof(uns);
+ bytes = (byte *) &x;
+ for (i=0; i<sizeof(uns) && bytes[i]; i++);
+ return i;
+}
+
+inline uns
+str_len_aligned(const char *str)
+{
+ const uns *u = (const uns *) str;
+ uns len = 0;
+ while (1)
+ {
+ uns l = str_len_uns(*u++);
+ len += l;
+ if (l < sizeof(uns))
+ return len;
+ }
+}
+
+inline uns
+hash_string_aligned(const char *str)
+{
+ const uns *u = (const uns *) str;
+ uns hash = 0;
+ while (1)
+ {
+ uns last_len = str_len_uns(*u);
+ hash = ROL(hash, SHIFT_BITS);
+ if (last_len < sizeof(uns))
+ {
+ uns tmp = *u & mask_higher_bits[last_len];
+ hash ^= tmp;
+ return hash;
+ }
+ hash ^= *u++;
+ }
+}
+
+inline uns
+hash_block_aligned(const byte *buf, uns len)
+{
+ const uns *u = (const uns *) buf;
+ uns hash = 0;
+ while (len >= sizeof(uns))
+ {
+ hash = ROL(hash, SHIFT_BITS) ^ *u++;
+ len -= sizeof(uns);
+ }
+ hash = ROL(hash, SHIFT_BITS) ^ (*u & mask_higher_bits[len]);
+ return hash;
+}
+
+#ifndef CPU_ALLOW_UNALIGNED
+uns
+str_len(const char *str)
+{
+ uns shift = UNALIGNED_PART(str, uns);
+ if (!shift)
+ return str_len_aligned(str);
+ else
+ {
+ uns i;
+ shift = sizeof(uns) - shift;
+ for (i=0; i<shift; i++)
+ if (!str[i])
+ return i;
+ return shift + str_len_aligned(str + shift);
+ }
+}
+
+uns
+hash_string(const char *str)
+{
+ const byte *s = str;
+ uns shift = UNALIGNED_PART(s, uns);
+ if (!shift)
+ return hash_string_aligned(s);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!s[i])
+ break;
+ hash ^= s[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+
+uns
+hash_block(const byte *buf, uns len)
+{
+ uns shift = UNALIGNED_PART(buf, uns);
+ if (!shift)
+ return hash_block_aligned(buf, len);
+ else
+ {
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (i >= len)
+ break;
+ hash ^= buf[i] << (shift * 8);
+ }
+ return hash;
+ }
+}
+#endif
+
+uns
+hash_string_nocase(const char *str)
+{
+ const byte *s = str;
+ uns hash = 0;
+ uns i;
+ for (i=0; ; i++)
+ {
+ uns modulo = i % sizeof(uns);
+ uns shift;
+#ifdef CPU_LITTLE_ENDIAN
+ shift = modulo;
+#else
+ shift = sizeof(uns) - 1 - modulo;
+#endif
+ if (!modulo)
+ hash = ROL(hash, SHIFT_BITS);
+ if (!s[i])
+ break;
+ hash ^= Cupcase(s[i]) << (shift * 8);
+ }
+ return hash;
+}
--- /dev/null
+/*
+ * UCW Library -- Hyper-super-meta-alt-control-shift extra fast
+ * str_len() and hash_*() routines
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#define _UCW_HASHFUNC_H
+
+#include "ucw/lib.h"
+
+/*** === String hashes [[strhash]] ***/
+
+/* The following functions need str to be aligned to sizeof(uns). */
+uns str_len_aligned(const char *str) PURE; /** Get the string length (not a really useful hash function, but there is no better place for it). The string must be aligned to sizeof(uns). For unaligned see @str_len(). **/
+uns hash_string_aligned(const char *str) PURE; /** Hash the string. The string must be aligned to sizeof(uns). For unaligned see @hash_string(). **/
+uns hash_block_aligned(const byte *buf, uns len) PURE; /** Hash arbitrary data. They must be aligned to sizeof(uns). For unaligned see @hash_block(). **/
+
+#ifdef CPU_ALLOW_UNALIGNED
+#define str_len(str) str_len_aligned(str)
+#define hash_string(str) hash_string_aligned(str)
+#define hash_block(str, len) hash_block_aligned(str, len)
+#else
+uns str_len(const char *str) PURE; /** Get the string length. If you know it is aligned to sizeof(uns), you can use faster @str_len_aligned(). **/
+uns hash_string(const char *str) PURE; /** Hash the string. If it is aligned to sizeof(uns), you can use faster @hash_string_aligned(). **/
+uns hash_block(const byte *buf, uns len) PURE; /** Hash arbitrary data. If they are aligned to sizeof(uns), use faster @hash_block_aligned(). **/
+#endif
+
+uns hash_string_nocase(const char *str) PURE; /** Hash the string in a case insensitive way. Works only with ASCII characters. **/
+
+/*** === Integer hashes [[inthash]] ***/
+
+/***
+ * We hash integers by multiplying by a reasonably large prime with
+ * few ones in its binary form (to give the compiler the possibility
+ * of using shifts and adds on architectures where multiplication
+ * instructions are slow).
+ */
+static inline uns CONST hash_u32(uns x) { return 0x01008041*x; } /** Hash a 32 bit unsigned integer. **/
+static inline uns CONST hash_u64(u64 x) { return hash_u32((uns)x ^ (uns)(x >> 32)); } /** Hash a 64 bit unsigned integer. **/
+static inline uns CONST hash_pointer(void *x) { return ((sizeof(x) <= 4) ? hash_u32((uns)(uintptr_t)x) : hash_u64((u64)(uintptr_t)x)); } /** Hash a pointer. **/
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Universal Hash Table
+ *
+ * (c) 2002--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2002--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of hash tables.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates a hash table with the parameters
+ * given.
+ *
+ * You need to specify:
+ *
+ * HASH_NODE data type where a node dwells (usually a struct).
+ * HASH_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the hash table generator).
+ *
+ * Then decide on type of keys:
+ *
+ * HASH_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
+ * a type which can be compared using `==')
+ * HASH_ATOMIC_TYPE (defaults to int).
+ * | HASH_KEY_STRING=f use node->f as a string key, allocated
+ * separately from the rest of the node.
+ * | HASH_KEY_ENDSTRING=f use node->f as a string key, allocated
+ * automatically at the end of the node struct
+ * (to be declared as "char f[1]" at the end).
+ * | HASH_KEY_COMPLEX use a multi-component key; as the name suggests,
+ * the passing of parameters is a bit complex then.
+ * The HASH_KEY_COMPLEX(x) macro should expand to
+ * `x k1, x k2, ... x kn' and you should also define:
+ * HASH_KEY_DECL declaration of function parameters in which key
+ * should be passed to all hash table operations.
+ * That is, `type1 k1, type2 k2, ... typen kn'.
+ * With complex keys, HASH_GIVE_HASHFN and HASH_GIVE_EQ
+ * are mandatory.
+ * | HASH_KEY_MEMORY=f use node->f as a raw data key, compared using
+ * memcmp
+ * HASH_KEY_SIZE the length of the key block
+ *
+ * Then specify what operations you request (all names are automatically
+ * prefixed by calling HASH_PREFIX):
+ *
+ * <always defined> init() -- initialize the hash table.
+ * HASH_WANT_CLEANUP cleanup() -- deallocate the hash table.
+ * HASH_WANT_FIND node *find(key) -- find first node with the specified
+ * key, return NULL if no such node exists.
+ * HASH_WANT_FIND_NEXT node *find(node *start) -- find next node with the
+ * specified key, return NULL if no such node exists.
+ * HASH_WANT_NEW node *new(key) -- create new node with given key.
+ * Doesn't check whether it already exists.
+ * HASH_WANT_LOOKUP node *lookup(key) -- find node with given key,
+ * if it doesn't exist, create it. Defining
+ * HASH_GIVE_INIT_DATA is strongly recommended.
+ * HASH_WANT_DELETE int delete(key) -- delete and deallocate node
+ * with given key. Returns success.
+ * HASH_WANT_REMOVE remove(node *) -- delete and deallocate given node.
+ *
+ * You can also supply several functions:
+ *
+ * HASH_GIVE_HASHFN unsigned int hash(key) -- calculate hash value of key.
+ * We have sensible default hash functions for strings
+ * and integers.
+ * HASH_GIVE_EQ int eq(key1, key2) -- return whether keys are equal.
+ * By default, we use == for atomic types and either
+ * strcmp or strcasecmp for strings.
+ * HASH_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ * node should be allocated for dynamic data. Default=0
+ * or length of the string with HASH_KEY_ENDSTRING.
+ * HASH_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ * created node. Defaults: assignment for atomic keys
+ * and static strings, strcpy for end-allocated strings.
+ * HASH_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
+ * newly created node. Very useful for lookup operations.
+ * HASH_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
+ * a node. Default is xmalloc() or pooled allocation, depending
+ * on HASH_USE_POOL and HASH_AUTO_POOL switches.
+ * void free(void *) -- the converse.
+ *
+ * ... and a couple of extra parameters:
+ *
+ * HASH_NOCASE String comparisons should be case-insensitive.
+ * HASH_DEFAULT_SIZE=n Initially, use hash table of approx. `n' entries.
+ * HASH_CONSERVE_SPACE Use as little space as possible.
+ * HASH_FN_BITS=n The hash function gives only `n' significant bits.
+ * HASH_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ * HASH_USE_POOL=pool Allocate all nodes from given mempool. Note, however, that
+ * deallocation is not supported by mempools, so delete/remove
+ * will leak pool memory.
+ * HASH_AUTO_POOL=size Create a pool of the given block size automatically.
+ * HASH_ZERO_FILL New entries should be initialized to all zeroes.
+ * HASH_TABLE_ALLOC The hash table itself will be allocated and freed using
+ * the same allocation functions as the nodes instead of
+ * the default xmalloc().
+ * HASH_TABLE_DYNAMIC Support multiple hash tables; the first parameter of all
+ * hash table operations is struct HASH_PREFIX(table) *.
+ *
+ * You also get a iterator macro at no extra charge:
+ *
+ * HASH_FOR_ALL(hash_prefix, variable)
+ * {
+ * // node *variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use HASH_BREAK and HASH_CONTINUE instead of break and continue
+ * // you must not alter contents of the hash table here
+ * }
+ * HASH_END_FOR;
+ *
+ * (For dynamic tables, use HASH_FOR_ALL_DYNAMIC(hash_prefix, hash_table, variable) instead.)
+ *
+ * Then include "ucw/hashtable.h" and voila, you have a hash table
+ * suiting all your needs (at least those which you've revealed :) ).
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#ifndef _UCW_HASHFUNC_H
+#include "ucw/hashfunc.h"
+#endif
+
+#include "ucw/prime.h"
+
+#include <string.h>
+
+/* Initial setup of parameters */
+
+#if !defined(HASH_NODE) || !defined(HASH_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#if defined(HASH_KEY_ATOMIC) && !defined(HASH_CONSERVE_SPACE)
+#define HASH_CONSERVE_SPACE
+#endif
+
+#define P(x) HASH_PREFIX(x)
+
+/* Declare buckets and the hash table */
+
+typedef HASH_NODE P(node);
+
+typedef struct P(bucket) {
+ struct P(bucket) *next;
+#ifndef HASH_CONSERVE_SPACE
+ uns hash;
+#endif
+ P(node) n;
+} P(bucket);
+
+struct P(table) {
+ uns hash_size;
+ uns hash_count, hash_max, hash_min, hash_hard_max;
+ P(bucket) **ht;
+#ifdef HASH_AUTO_POOL
+ struct mempool *pool;
+#endif
+};
+
+#ifdef HASH_TABLE_DYNAMIC
+#define T (*table)
+#define TA struct P(table) *table
+#define TAC TA,
+#define TAU TA UNUSED
+#define TAUC TA UNUSED,
+#define TT table
+#define TTC table,
+#else
+struct P(table) P(table);
+#define T P(table)
+#define TA void
+#define TAC
+#define TAU void
+#define TAUC
+#define TT
+#define TTC
+#endif
+
+/* Preset parameters */
+
+#if defined(HASH_KEY_ATOMIC)
+
+#define HASH_KEY(x) x HASH_KEY_ATOMIC
+
+#ifndef HASH_ATOMIC_TYPE
+# define HASH_ATOMIC_TYPE int
+#endif
+#define HASH_KEY_DECL HASH_ATOMIC_TYPE HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+# define HASH_GIVE_HASHFN
+ static inline int P(hash) (TAUC HASH_ATOMIC_TYPE x)
+ { return ((sizeof(x) <= 4) ? hash_u32(x) : hash_u64(x)); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC HASH_ATOMIC_TYPE x, HASH_ATOMIC_TYPE y)
+ { return x == y; }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, HASH_ATOMIC_TYPE k)
+ { HASH_KEY(n->) = k; }
+#endif
+
+#elif defined(HASH_KEY_MEMORY)
+
+#define HASH_KEY(x) x HASH_KEY_MEMORY
+
+#define HASH_KEY_DECL byte HASH_KEY( )[HASH_KEY_SIZE]
+
+#ifndef HASH_GIVE_HASHFN
+# define HASH_GIVE_HASHFN
+ static inline int P(hash) (TAUC byte *x)
+ { return hash_block(x, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC byte *x, byte *y)
+ { return !memcmp(x, y, HASH_KEY_SIZE); }
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, byte *k)
+ { memcpy(HASH_KEY(n->), k, HASH_KEY_SIZE); }
+#endif
+
+#elif defined(HASH_KEY_STRING) || defined(HASH_KEY_ENDSTRING)
+
+#ifdef HASH_KEY_STRING
+# define HASH_KEY(x) x HASH_KEY_STRING
+# ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, char *k)
+ { HASH_KEY(n->) = k; }
+# endif
+#else
+# define HASH_KEY(x) x HASH_KEY_ENDSTRING
+# define HASH_GIVE_EXTRA_SIZE
+ static inline int P(extra_size) (TAUC char *k)
+ { return strlen(k); }
+# ifndef HASH_GIVE_INIT_KEY
+# define HASH_GIVE_INIT_KEY
+ static inline void P(init_key) (TAUC P(node) *n, char *k)
+ { strcpy(HASH_KEY(n->), k); }
+# endif
+#endif
+#define HASH_KEY_DECL char *HASH_KEY( )
+
+#ifndef HASH_GIVE_HASHFN
+#define HASH_GIVE_HASHFN
+ static inline uns P(hash) (TAUC char *k)
+ {
+# ifdef HASH_NOCASE
+ return hash_string_nocase(k);
+# else
+ return hash_string(k);
+# endif
+ }
+#endif
+
+#ifndef HASH_GIVE_EQ
+# define HASH_GIVE_EQ
+ static inline int P(eq) (TAUC char *x, char *y)
+ {
+# ifdef HASH_NOCASE
+ return !strcasecmp(x,y);
+# else
+ return !strcmp(x,y);
+# endif
+ }
+#endif
+
+#elif defined(HASH_KEY_COMPLEX)
+
+#define HASH_KEY(x) HASH_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the hash key type.
+#endif
+
+/* Defaults for missing parameters */
+
+#ifndef HASH_GIVE_HASHFN
+#error Unable to determine which hash function to use.
+#endif
+
+#ifndef HASH_GIVE_EQ
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef HASH_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+#define HASH_EXTRA_SIZE(x) P(extra_size)(TTC x)
+#else
+/*
+ * Beware, C macros are expanded iteratively, not recursively,
+ * hence we get only a _single_ argument, although the expansion
+ * of HASH_KEY contains commas.
+ */
+#define HASH_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef HASH_GIVE_INIT_KEY
+#error Unable to determine how to initialize keys.
+#endif
+
+#ifndef HASH_GIVE_INIT_DATA
+static inline void P(init_data) (TAUC P(node) *n UNUSED)
+{
+}
+#endif
+
+#ifdef HASH_GIVE_ALLOC
+/* If the caller has requested to use his own allocation functions, do so */
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_USE_POOL)
+/* If the caller has requested to use his mempool, do so */
+#include "ucw/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(HASH_USE_POOL, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#elif defined(HASH_AUTO_POOL)
+/* Use our own pools */
+#include "ucw/mempool.h"
+static inline void * P(alloc) (TAUC unsigned int size) { return mp_alloc_fast(T.pool, size); }
+static inline void P(free) (TAUC void *x UNUSED) { }
+static inline void P(init_alloc) (TAU) { T.pool = mp_new(HASH_AUTO_POOL); }
+static inline void P(cleanup_alloc) (TAU) { mp_delete(T.pool); }
+#define HASH_USE_POOL
+
+#else
+/* The default allocation method */
+static inline void * P(alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(free) (TAUC void *x) { xfree(x); }
+static inline void P(init_alloc) (TAU) { }
+static inline void P(cleanup_alloc) (TAU) { }
+
+#endif
+
+#ifdef HASH_TABLE_ALLOC
+static inline void * P(table_alloc) (TAUC unsigned int size) { return P(alloc)(TTC size); }
+static inline void P(table_free) (TAUC void *x) { P(free)(TTC x); }
+#else
+static inline void * P(table_alloc) (TAUC unsigned int size) { return xmalloc(size); }
+static inline void P(table_free) (TAUC void *x) { xfree(x); }
+#endif
+
+#ifndef HASH_DEFAULT_SIZE
+#define HASH_DEFAULT_SIZE 32
+#endif
+
+#ifndef HASH_FN_BITS
+#define HASH_FN_BITS 32
+#endif
+
+#ifdef HASH_ZERO_FILL
+static inline void * P(new_bucket)(TAUC uns size)
+{
+ byte *buck = P(alloc)(TTC size);
+ bzero(buck, size);
+ return buck;
+}
+#else
+static inline void * P(new_bucket)(TAUC uns size) { return P(alloc)(TTC size); }
+#endif
+
+/* Now the operations */
+
+static void P(alloc_table) (TAU)
+{
+ T.hash_size = next_table_prime(T.hash_size);
+ T.ht = P(table_alloc)(TTC sizeof(void *) * T.hash_size);
+ bzero(T.ht, sizeof(void *) * T.hash_size);
+ if (2*T.hash_size < T.hash_hard_max)
+ T.hash_max = 2*T.hash_size;
+ else
+ T.hash_max = ~0U;
+ if (T.hash_size/2 > HASH_DEFAULT_SIZE)
+ T.hash_min = T.hash_size/4;
+ else
+ T.hash_min = 0;
+}
+
+/**
+ * Initializes the hash table.
+ * This one is available no matter what `HASH_WANT_` macros you defined or not.
+ **/
+static void HASH_PREFIX(init)(TA)
+{
+ T.hash_count = 0;
+ T.hash_size = HASH_DEFAULT_SIZE;
+#if HASH_FN_BITS < 28
+ T.hash_hard_max = 1 << HASH_FN_BITS;
+#else
+ T.hash_hard_max = 1 << 28;
+#endif
+ P(init_alloc)(TT);
+ P(alloc_table)(TT);
+}
+
+#ifdef HASH_WANT_CLEANUP
+/**
+ * Deallocates the hash table, including the nodes.
+ * It is available if you defined <<want_cleanup,`HASH_WANT_CLEANUP`>>.
+ **/
+static void HASH_PREFIX(cleanup)(TA)
+{
+#ifndef HASH_USE_POOL
+ uns i;
+ P(bucket) *b, *bb;
+
+ for (i=0; i<T.hash_size; i++)
+ for (b=T.ht[i]; b; b=bb)
+ {
+ bb = b->next;
+ P(free)(TTC b);
+ }
+#endif
+ P(cleanup_alloc)(TT);
+ P(table_free)(TTC T.ht);
+}
+#endif
+
+static inline uns P(bucket_hash) (TAUC P(bucket) *b)
+{
+#ifdef HASH_CONSERVE_SPACE
+ return P(hash)(TTC HASH_KEY(b->n.));
+#else
+ return b->hash;
+#endif
+}
+
+static void P(rehash) (TAC uns size)
+{
+ P(bucket) *b, *nb;
+ P(bucket) **oldt = T.ht, **newt;
+ uns oldsize = T.hash_size;
+ uns i, h;
+
+ DBG("Rehashing %d->%d at count %d", oldsize, size, T.hash_count);
+ T.hash_size = size;
+ P(alloc_table)(TT);
+ newt = T.ht;
+ for (i=0; i<oldsize; i++)
+ {
+ b = oldt[i];
+ while (b)
+ {
+ nb = b->next;
+ h = P(bucket_hash)(TTC b) % T.hash_size;
+ b->next = newt[h];
+ newt[h] = b;
+ b = nb;
+ }
+ }
+ P(table_free)(TTC oldt);
+}
+
+#ifdef HASH_WANT_FIND
+/**
+ * Finds a node with given key (specified in the @HAS_KEY_DECL parameter).
+ * If it does not exist, NULL is returned.
+ *
+ * Enabled by the <<want_find,`HASH_WANT_FIND`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(find)(TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b;
+
+ for (b=T.ht[h]; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+ return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_FIND_NEXT
+/**
+ * Finds next node with the same key. Returns NULL if it does not exist.
+ *
+ * Enabled by the <<want_find_next,`HASH_WANT_FIND_NEXT`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(find_next)(TAC P(node) *start)
+{
+#ifndef HASH_CONSERVE_SPACE
+ uns h0 = P(hash) (TTC HASH_KEY(start->));
+#endif
+ P(bucket) *b = SKIP_BACK(P(bucket), n, start);
+
+ for (b=b->next; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY(start->), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+ return NULL;
+}
+#endif
+
+#ifdef HASH_WANT_NEW
+/**
+ * Generates a new node with a given key.
+ *
+ * Enabled by the <<want_new,`HASH_WANT_NEW`>> macro.
+ **/
+static HASH_NODE * HASH_PREFIX(new)(TAC HASH_KEY_DECL)
+{
+ uns h0, h;
+ P(bucket) *b;
+
+ h0 = P(hash) (TTC HASH_KEY( ));
+ h = h0 % T.hash_size;
+ b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+ b->next = T.ht[h];
+ T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+ b->hash = h0;
+#endif
+ P(init_key)(TTC &b->n, HASH_KEY( ));
+ P(init_data)(TTC &b->n);
+ if (T.hash_count++ >= T.hash_max)
+ P(rehash)(TTC 2*T.hash_size);
+ return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_LOOKUP
+/**
+ * Finds a node with a given key. If it does not exist, a new one is created.
+ * It is strongly recommended to use <<give_init_data,`HASH_GIVE_INIT_DATA`>>.
+ *
+ * This one is enabled by the <<want_lookup,`HASH_WANT_LOOKUP`>> macro.
+ **/
+static HASH_NODE* HASH_PREFIX(lookup)(TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b;
+
+ for (b=T.ht[h]; b; b=b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ return &b->n;
+ }
+
+ b = P(new_bucket) (TTC sizeof(struct P(bucket)) + HASH_EXTRA_SIZE(HASH_KEY( )));
+ b->next = T.ht[h];
+ T.ht[h] = b;
+#ifndef HASH_CONSERVE_SPACE
+ b->hash = h0;
+#endif
+ P(init_key)(TTC &b->n, HASH_KEY( ));
+ P(init_data)(TTC &b->n);
+ if (T.hash_count++ >= T.hash_max)
+ P(rehash)(TTC 2*T.hash_size);
+ return &b->n;
+}
+#endif
+
+#ifdef HASH_WANT_DELETE
+/**
+ * Removes a node with the given key from hash table and deallocates it.
+ *
+ * Success is returned.
+ *
+ * This one is enabled by <<want_delete,`HASH_WANT_DELETE`>> macro.
+ **/
+static int HASH_PREFIX(delete)(TAC HASH_KEY_DECL)
+{
+ uns h0 = P(hash) (TTC HASH_KEY( ));
+ uns h = h0 % T.hash_size;
+ P(bucket) *b, **bb;
+
+ for (bb=&T.ht[h]; b=*bb; bb=&b->next)
+ {
+ if (
+#ifndef HASH_CONSERVE_SPACE
+ b->hash == h0 &&
+#endif
+ P(eq)(TTC HASH_KEY( ), HASH_KEY(b->n.)))
+ {
+ *bb = b->next;
+ P(free)(TTC b);
+ if (--T.hash_count < T.hash_min)
+ P(rehash)(TTC T.hash_size/2);
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+#ifdef HASH_WANT_REMOVE
+/**
+ * Removes a given node and deallocates it.
+ * It differs from <<fun__GENERIC_LINK|HASH_PREFIX|delete,`HASH_PREFIX(delete)()`>>
+ * in its type of parameter -- this one deletes a specific node, that one looks for it by a key.
+ *
+ * Enabled by <<want_remove,`HASH_WANT_REMOVE`>> macro.
+ **/
+static void HASH_PREFIX(remove)(TAC HASH_NODE *n)
+{
+ P(bucket) *x = SKIP_BACK(struct P(bucket), n, n);
+ uns h0 = P(bucket_hash)(TTC x);
+ uns h = h0 % T.hash_size;
+ P(bucket) *b, **bb;
+
+ for (bb=&T.ht[h]; (b=*bb) && b != x; bb=&b->next)
+ ;
+ ASSERT(b);
+ *bb = b->next;
+ P(free)(TTC b);
+ if (--T.hash_count < T.hash_min)
+ P(rehash)(TTC T.hash_size/2);
+}
+#endif
+
+/* And the iterator */
+
+#ifndef HASH_FOR_ALL
+
+#define HASH_FOR_ALL_DYNAMIC(h_px, h_table, h_var) \
+do { \
+ uns h_slot; \
+ struct GLUE_(h_px,bucket) *h_buck; \
+ for (h_slot=0; h_slot < (h_table)->hash_size; h_slot++) \
+ for (h_buck = (h_table)->ht[h_slot]; h_buck; h_buck = h_buck->next) \
+ { \
+ GLUE_(h_px,node) *h_var = &h_buck->n;
+#define HASH_FOR_ALL(h_px, h_var) HASH_FOR_ALL_DYNAMIC(h_px, &GLUE_(h_px,table), h_var)
+#define HASH_END_FOR } } while(0)
+#define HASH_BREAK
+#define HASH_CONTINUE continue
+
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+#undef TA
+#undef TAC
+#undef TAU
+#undef TAUC
+#undef TT
+#undef TTC
+
+#undef HASH_ATOMIC_TYPE
+#undef HASH_CONSERVE_SPACE
+#undef HASH_DEFAULT_SIZE
+#undef HASH_EXTRA_SIZE
+#undef HASH_FN_BITS
+#undef HASH_GIVE_ALLOC
+#undef HASH_GIVE_EQ
+#undef HASH_GIVE_EXTRA_SIZE
+#undef HASH_GIVE_HASHFN
+#undef HASH_GIVE_INIT_DATA
+#undef HASH_GIVE_INIT_KEY
+#undef HASH_KEY
+#undef HASH_KEY_ATOMIC
+#undef HASH_KEY_COMPLEX
+#undef HASH_KEY_DECL
+#undef HASH_KEY_ENDSTRING
+#undef HASH_KEY_STRING
+#undef HASH_KEY_MEMORY
+#undef HASH_KEY_SIZE
+#undef HASH_NOCASE
+#undef HASH_NODE
+#undef HASH_PREFIX
+#undef HASH_USE_POOL
+#undef HASH_AUTO_POOL
+#undef HASH_WANT_CLEANUP
+#undef HASH_WANT_DELETE
+#undef HASH_WANT_FIND
+#undef HASH_WANT_FIND_NEXT
+#undef HASH_WANT_LOOKUP
+#undef HASH_WANT_NEW
+#undef HASH_WANT_REMOVE
+#undef HASH_TABLE_ALLOC
+#undef HASH_TABLE_DYNAMIC
+#undef HASH_ZERO_FILL
--- /dev/null
+/*
+ * UCW Library -- Universal Heap Macros
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/***
+ * [[intro]]
+ * Introduction
+ * ------------
+ *
+ * Binary heap is a simple data structure, which for example supports efficient insertions, deletions
+ * and access to the minimal inserted item. We define several macros for such operations.
+ * Note that because of simplicity of heaps, we have decided to define direct macros instead
+ * of a <<generic:,macro generator>> as for several other data structures in the Libucw.
+ *
+ * A heap is represented by a number of elements and by an array of values. Beware that we
+ * index this array from one, not from zero as do the standard C arrays.
+ *
+ * Most macros use these parameters:
+ *
+ * - @type - the type of elements
+ * - @num - a variable (signed or unsigned integer) with the number of elements
+ * - @heap - a C array of type @type; the heap is stored in `heap[1] .. heap[num]`; `heap[0]` is unused
+ * - @less - a callback to compare two element values; `less(x, y)` shall return a non-zero value iff @x is lower than @y
+ * - @swap - a callback to swap two array elements; `swap(heap, i, j, t)` must swap `heap[i]` with `heap[j]` with possible help of temporary variable @t (type @type).
+ *
+ * A valid heap must follow these rules:
+ *
+ * - `num >= 0`
+ * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]`
+ *
+ * The first element `heap[1]` is always lower or equal to all other elements.
+ *
+ * [[macros]]
+ * Macros
+ * ------
+ ***/
+
+/* For internal usage. */
+#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ for (;;) \
+ { \
+ _l = 2*_j; \
+ if (_l > num) \
+ break; \
+ if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \
+ break; \
+ if (_l != num && less(heap[_l+1],heap[_l])) \
+ _l++; \
+ swap(heap,_j,_l,x); \
+ _j = _l; \
+ }
+
+/* For internal usage. */
+#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ while (_j > 1) \
+ { \
+ _u = _j/2; \
+ if (less(heap[_u], heap[_j])) \
+ break; \
+ swap(heap,_u,_j,x); \
+ _j = _u; \
+ }
+
+/**
+ * Shuffle the unordered array @heap of @num elements to become a valid heap. The time complexity is linear.
+ **/
+#define HEAP_INIT(type,heap,num,less,swap) \
+ do { \
+ uns _i = num; \
+ uns _j, _l; \
+ type x; \
+ while (_i >= 1) \
+ { \
+ _j = _i; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ _i--; \
+ } \
+ } while(0)
+
+/**
+ * Delete the minimum element `heap[1]` in `O(log(n))` time.
+ * The removed value is moved just after the resulting heap (`heap[num + 1]`).
+ **/
+#define HEAP_DELMIN(type,heap,num,less,swap) \
+ do { \
+ uns _j, _l; \
+ type x; \
+ swap(heap,1,num,x); \
+ num--; \
+ _j = 1; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Insert `heap[num]` in `O(log(n))` time. The value of @num must be increased before.
+ **/
+#define HEAP_INSERT(type,heap,num,less,swap) \
+ do { \
+ uns _j, _u; \
+ type x; \
+ _j = num; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to increase the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_INCREASE(type,heap,num,less,swap,pos) \
+ do { \
+ uns _j, _l; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to decrease the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_DECREASE(type,heap,num,less,swap,pos) \
+ do { \
+ uns _j, _u; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Delete `heap[pos]` in `O(log(n))` time.
+ **/
+#define HEAP_DELETE(type,heap,num,less,swap,pos) \
+ do { \
+ uns _j, _l, _u; \
+ type x; \
+ _j = pos; \
+ swap(heap,_j,num,x); \
+ num--; \
+ if (less(heap[_j], heap[num+1])) \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ else \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Default swapping macro.
+ **/
+#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
--- /dev/null
+/*
+ * UCW Library -- IP address access lists
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/clists.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ipaccess.h"
+
+#include <string.h>
+
+struct ipaccess_entry {
+ cnode n;
+ int allow;
+ struct ip_addrmask addr;
+};
+
+static char *
+addrmask_parser(char *c, void *ptr)
+{
+ /*
+ * This is tricky: addrmasks will be compared by memcmp(), so we must ensure
+ * that even the padding between structure members is zeroed out.
+ */
+ struct ip_addrmask *am = ptr;
+ bzero(am, sizeof(*am));
+
+ char *p = strchr(c, '/');
+ if (p)
+ *p++ = 0;
+ char *err = cf_parse_ip(c, &am->addr);
+ if (err)
+ return err;
+ if (p)
+ {
+ uns len;
+ if (!cf_parse_int(p, &len) && len <= 32)
+ am->mask = ~(len == 32 ? 0 : ~0U >> len);
+ else if (cf_parse_ip(p, &am->mask))
+ return "Invalid prefix length or netmask";
+ }
+ else
+ am->mask = ~0U;
+ return NULL;
+}
+
+static void
+addrmask_dumper(struct fastbuf *fb, void *ptr)
+{
+ struct ip_addrmask *am = ptr;
+ bprintf(fb, "%08x/%08x ", am->addr, am->mask);
+}
+
+struct cf_user_type ip_addrmask_type = {
+ .size = sizeof(struct ip_addrmask),
+ .name = "ip_addrmask",
+ .parser = addrmask_parser,
+ .dumper = addrmask_dumper
+};
+
+struct cf_section ipaccess_cf = {
+ CF_TYPE(struct ipaccess_entry),
+ CF_ITEMS {
+ CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((const char* const []) { "deny", "allow", NULL })),
+ CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type),
+ CF_END
+ }
+};
+
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip)
+{
+ return !((ip ^ am->addr) & am->mask);
+}
+
+int
+ipaccess_check(clist *l, u32 ip)
+{
+ CLIST_FOR_EACH(struct ipaccess_entry *, a, *l)
+ if (ip_addrmask_match(&a->addr, ip))
+ return a->allow;
+ return 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+static clist t;
+
+static struct cf_section test_cf = {
+ CF_ITEMS {
+ CF_LIST("A", &t, &ipaccess_cf),
+ CF_END
+ }
+};
+
+int main(int argc, char **argv)
+{
+ cf_declare_section("T", &test_cf, 0);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+ die("Invalid arguments");
+
+ byte buf[256];
+ while (fgets(buf, sizeof(buf), stdin))
+ {
+ char *c = strchr(buf, '\n');
+ if (c)
+ *c = 0;
+ u32 ip;
+ if (cf_parse_ip(buf, &ip))
+ puts("Invalid IP address");
+ else if (ipaccess_check(&t, ip))
+ puts("Allowed");
+ else
+ puts("Denied");
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- IP address access lists
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_IPACCESS_H
+#define _UCW_IPACCESS_H
+
+#include "ucw/clists.h"
+
+extern struct cf_section ipaccess_cf;
+int ipaccess_check(clist *l, u32 ip);
+
+/* Low-level handling of addresses and masks */
+
+struct ip_addrmask {
+ u32 addr;
+ u32 mask;
+};
+
+extern struct cf_user_type ip_addrmask_type;
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip);
+
+#endif
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given. See ucw/kmp.h before reading this description.
+ *
+ * This file defines:
+ *
+ * struct search structure with both the internal and the user-defined variables
+ * used during the search and accessible from all macros
+ *
+ * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output)
+ *
+ * void run(kmp,src) the same, but automatically allocates search structre from the stack
+ *
+ *
+ * Parameters to the generator (these marked with [*] are mandatory):
+ *
+ * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP search generator)
+ * [*] KMPS_KMP_PREFIX(x) prefix used for ucw/kmp.h
+ *
+ * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR);
+ * if unset, the one from ucw/kmp.h is taken
+ * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c
+ *
+ * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string
+ * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one
+ *
+ * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions)
+ *
+ * KMPS_INIT(kmp,src,search) statement executed at the beginning of search()
+ * KMPS_EXIT(kmp,src,search) ... at the end
+ * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update)
+ * of the algorithm, but before KMPS_FOUND[_CHAIN]
+ * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match
+ * (stored in search->out - NOT necessary search->s!);
+ * all matches form a NULL-terminated link list (search->out, search->out->next, ...)
+ * in order of decreasing length
+ * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out)
+ * KMPS_WANT_BEST algorithm computes globally longest match, which is available
+ * in search->best in KMPS_EXIT; if there is no match, it points to the null state
+ */
+
+#define P(x) KMPS_PREFIX(x)
+#define KP(x) KMPS_KMP_PREFIX(x)
+
+#ifdef KMPS_SOURCE
+typedef KMPS_SOURCE P(search_source_t);
+#else
+typedef KP(source_t) P(search_source_t);
+#endif
+
+#ifndef KMPS_GET_CHAR
+#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c))
+#endif
+
+struct P(search) {
+ struct KP(state) *s; /* current state */
+ struct KP(state) *out; /* output state */
+# ifdef KMPS_WANT_BEST
+ struct KP(state) *best; /* longest match */
+# endif
+ KP(char_t) c; /* last character */
+# ifdef KMPS_ADD_CONTROLS
+ uns eof;
+# endif
+# ifdef KMPS_VARS
+ struct {
+ KMPS_VARS
+ } u; /* user-defined */
+# endif
+};
+
+static void
+P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src)
+{
+ s->s = &kmp->null;
+# ifdef KMPS_WANT_BEST
+ s->best = &kmp->null;
+# endif
+# ifdef KMPS_ADD_CONTROLS
+ s->c = KP(control)();
+ s->eof = 0;
+# else
+ s->c = 0;
+# endif
+# ifdef KMPS_INIT
+ { KMPS_INIT(kmp, src, s); }
+# endif
+# ifndef KMPS_ADD_CONTROLS
+ goto start_read;
+# endif
+ for (;;)
+ {
+ for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back);
+ s->s = s->s ? : &kmp->null;
+
+# ifdef KMPS_STEP
+ { KMPS_STEP(kmp, src, s); }
+# endif
+
+# if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
+ s->out = s->s->len ? s->s : s->s->next;
+ if (s->out)
+ {
+# ifdef KMPS_WANT_BEST
+ if (s->out->len > s->best->len)
+ s->best = s->out;
+# endif
+# ifdef KMPS_FOUND_CHAIN
+ { KMPS_FOUND_CHAIN(kmp, src, s); }
+# endif
+# ifdef KMPS_FOUND
+ do
+ { KMPS_FOUND(kmp, src, s); }
+ while (s->out = s->out->next);
+# endif
+ }
+# endif
+
+# ifdef KMPS_ADD_CONTROLS
+ if (s->eof)
+ break;
+# endif
+
+# ifndef KMPS_ADD_CONTROLS
+start_read: ;
+# endif
+# ifdef KMPS_MERGE_CONTROLS
+ KP(char_t) last_c = s->c;
+# endif
+
+ do
+ {
+ if (!KMPS_GET_CHAR(kmp, src, s))
+ {
+# ifdef KMPS_ADD_CONTROLS
+ if (!KP(is_control)(kmp, s->c))
+ {
+ s->c = KP(control)();
+ s->eof = 1;
+ break;
+ }
+# endif
+ goto exit;
+ }
+ }
+ while (0
+# ifdef KMPS_MERGE_CONTROLS
+ || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c))
+# endif
+ );
+ }
+exit: ;
+# ifdef KMPS_EXIT
+ { KMPS_EXIT(kmp, src, s); }
+# endif
+}
+
+static inline void
+P(run) (struct KP(struct) *kmp, P(search_source_t) src)
+{
+ struct P(search) search;
+ P(search)(kmp, &search, src);
+}
+
+#undef P
+#undef KMPS_PREFIX
+#undef KMPS_KMP_PREFIX
+#undef KMPS_SOURCE
+#undef KMPS_GET_CHAR
+#undef KMPS_ADD_CONTROLS
+#undef KMPS_MERGE_CONTROLS
+#undef KMPS_VARS
+#undef KMPS_INIT
+#undef KMPS_EXIT
+#undef KMPS_FOUND
+#undef KMPS_FOUND_CHAIN
+#undef KMPS_WANT_BEST
+#undef KMPS_STEP
--- /dev/null
+/*
+ * Test of KMP search
+ *
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+#include <string.h>
+
+#if 0
+#define TRACE(x...) do{msg(L_DEBUG, x);}while(0)
+#else
+#define TRACE(x...) do{}while(0)
+#endif
+
+/* TEST1 - multiple searches */
+
+#define KMP_PREFIX(x) kmp1_##x
+#define KMP_WANT_CLEANUP
+#include "ucw/kmp.h"
+#define KMPS_PREFIX(x) kmp1s1_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_WANT_BEST
+#define KMPS_EXIT(kmp,src,s) TRACE("Best match has %d characters", s->best->len)
+#include "ucw/kmp-search.h"
+#define KMPS_PREFIX(x) kmp1s2_##x
+#define KMPS_KMP_PREFIX(x) kmp1_##x
+#define KMPS_VARS uns count;
+#define KMPS_INIT(kmp,src,s) s->u.count = 0
+#define KMPS_FOUND(kmp,src,s) s->u.count++
+#include "ucw/kmp-search.h"
+
+static void
+test1(void)
+{
+ TRACE("Running test1");
+ struct kmp1_struct kmp;
+ kmp1_init(&kmp);
+ kmp1_add(&kmp, "ahoj");
+ kmp1_add(&kmp, "hoj");
+ kmp1_add(&kmp, "aho");
+ kmp1_build(&kmp);
+ struct kmp1s1_search s1;
+ kmp1s1_search(&kmp, &s1, "asjlahslhalahosjkjhojsas");
+ ASSERT(s1.best->len == 3);
+ struct kmp1s2_search s2;
+ kmp1s2_search(&kmp, &s2, "asjlahslhalahojsjkjhojsas");
+ ASSERT(s2.u.count == 4);
+ kmp1_cleanup(&kmp);
+}
+
+#ifdef CONFIG_CHARSET /* This one depends on libcharset */
+
+/* TEST2 - various tracing */
+
+#define KMP_PREFIX(x) kmp2_##x
+#define KMP_USE_UTF8
+#define KMP_TOLOWER
+#define KMP_ONLYALPHA
+#define KMP_STATE_VARS char *str; uns id;
+#define KMP_ADD_EXTRA_ARGS uns id
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \
+ s->u.str = kmp->u.start; s->u.id = id; }while(0)
+#define KMP_ADD_DUP(kmp,src,s) TRACE("String %s already inserted", kmp->u.start)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#define KMPS_FOUND(kmp,src,s) TRACE("String %s with id %d found", s->out->u.str, s->out->u.id)
+#define KMPS_STEP(kmp,src,s) TRACE("Got to state %p after reading %d", s->s, s->c)
+#include "ucw/kmp.h"
+
+static void
+test2(void)
+{
+ TRACE("Running test2");
+ struct kmp2_struct kmp;
+ kmp2_init(&kmp);
+ kmp2_add(&kmp, "ahoj", 1);
+ kmp2_add(&kmp, "ahoj", 2);
+ kmp2_add(&kmp, "hoj", 3);
+ kmp2_add(&kmp, "aho", 4);
+ kmp2_add(&kmp, "aba", 5);
+ kmp2_add(&kmp, "aba", 5);
+ kmp2_add(&kmp, "pěl", 5);
+ kmp2_build(&kmp);
+ kmp2_run(&kmp, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla");
+ kmp2_cleanup(&kmp);
+}
+
+#endif
+
+/* TEST3 - random tests */
+
+#define KMP_PREFIX(x) kmp3_##x
+#define KMP_STATE_VARS uns index;
+#define KMP_ADD_EXTRA_ARGS uns index
+#define KMP_VARS char *start;
+#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
+#define KMP_ADD_NEW(kmp,src,s) s->u.index = index
+#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_VARS uns sum, *cnt;
+#define KMPS_FOUND(kmp,src,s) do{ ASSERT(s->u.cnt[s->out->u.index]); s->u.cnt[s->out->u.index]--; s->u.sum--; }while(0)
+#include "ucw/kmp.h"
+
+static void
+test3(void)
+{
+ TRACE("Running test3");
+ struct mempool *pool = mp_new(1024);
+ for (uns testn = 0; testn < 100; testn++)
+ {
+ mp_flush(pool);
+ uns n = random_max(100);
+ char *s[n];
+ struct kmp3_struct kmp;
+ kmp3_init(&kmp);
+ for (uns i = 0; i < n; i++)
+ {
+ uns m = random_max(10);
+ s[i] = mp_alloc(pool, m + 1);
+ for (uns j = 0; j < m; j++)
+ s[i][j] = 'a' + random_max(3);
+ s[i][m] = 0;
+ kmp3_add(&kmp, s[i], i);
+ }
+ kmp3_build(&kmp);
+ for (uns i = 0; i < 10; i++)
+ {
+ uns m = random_max(100);
+ byte b[m + 1];
+ for (uns j = 0; j < m; j++)
+ b[j] = 'a' + random_max(4);
+ b[m] = 0;
+ uns cnt[n];
+ struct kmp3_search search;
+ search.u.sum = 0;
+ search.u.cnt = cnt;
+ for (uns j = 0; j < n; j++)
+ {
+ cnt[j] = 0;
+ if (*s[j])
+ for (uns k = 0; k < m; k++)
+ if (!strncmp(b + k, s[j], strlen(s[j])))
+ cnt[j]++, search.u.sum++;
+ }
+ kmp3_search(&kmp, &search, b);
+ ASSERT(search.u.sum == 0);
+ }
+ kmp3_cleanup(&kmp);
+ }
+ mp_delete(pool);
+}
+
+/* TEST4 - user-defined character type */
+
+struct kmp4_struct;
+struct kmp4_state;
+
+static inline int
+kmp4_eq(struct kmp4_struct *kmp UNUSED, byte *a, byte *b)
+{
+ return (a == b) || (a && b && *a == *b);
+}
+
+static inline uns
+kmp4_hash(struct kmp4_struct *kmp UNUSED, struct kmp4_state *s, byte *c)
+{
+ return (c ? (*c << 16) : 0) + (uns)(uintptr_t)s;
+}
+
+#define KMP_PREFIX(x) kmp4_##x
+#define KMP_CHAR byte *
+#define KMP_CONTROL_CHAR NULL
+#define KMP_GET_CHAR(kmp,src,c) ({ c = src++; !!*c; })
+#define KMP_GIVE_HASHFN
+#define KMP_GIVE_EQ
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_FOUND(kmp,src,s) TRACE("found")
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#include "ucw/kmp.h"
+
+static void
+test4(void)
+{
+ TRACE("Running test4");
+ struct kmp4_struct kmp;
+ kmp4_init(&kmp);
+ kmp4_add(&kmp, "ahoj");
+ kmp4_build(&kmp);
+ kmp4_run(&kmp, "djdhaskjdahoahaahojojshdaksjahdahojskj");
+ kmp4_cleanup(&kmp);
+}
+
+int
+main(void)
+{
+ test1();
+#ifdef CONFIG_CHARSET
+ test2();
+#endif
+ test3();
+ test4();
+ return 0;
+}
--- /dev/null
+# Tests for the kmp module
+
+Run: ../obj/ucw/kmp-test
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given.
+ *
+ * This file contains only construction of the automaton. The search
+ * itself can be generated by inclusion of file ucw/kmp-search.h.
+ * Separeted headers allow the user to define multiple search
+ * routines for one common set of key strings.
+ *
+ * Example:
+ *
+ * #define KMP_PREFIX(x) kmp_##x
+ * #define KMP_WANT_CLEANUP
+ * #define KMP_WANT_SEARCH // includes ucw/kmp-search.h automatically
+ * #define KMPS_FOUND(kmp,src,s) printf("found\n")
+ * #include "ucw/kmp.h"
+ *
+ * [...]
+ *
+ * struct kmp_struct kmp; // a structure describing the whole automaton
+ * kmp_init(&kmp); // initialization (must be called before all other functions)
+ *
+ * // add key strings we want to search
+ * kmp_add(&kmp, "aaa");
+ * kmp_add(&kmp, "abc");
+ *
+ * // complete the automaton, no more strings can be added later
+ * kmp_build(&kmp);
+ *
+ * // example of search, should print single "found" to stdout
+ * kmp_run(&kmp, "aabaabca");
+ *
+ * // destroy all internal structures
+ * kmp_cleanup(&kmp);
+ *
+ *
+ * Brief description of all parameters:
+ *
+ * Basic parameters:
+ * KMP_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP generator); mandatory;
+ * we abbreviate this to P(x) below
+ *
+ * KMP_CHAR alphabet type, the default is u16
+ *
+ * KMP_SOURCE user-defined text source; KMP_GET_CHAR must
+ * KMP_GET_CHAR(kmp,src,c) return zero at the end or nonzero together with the next character in c otherwise;
+ * if not defined, zero-terminated array of bytes is used as the input
+ *
+ * KMP_VARS user-defined variables in 'struct P(struct)'
+ * -- a structure describing the whole automaton;
+ * these variables are stored in .u substructure to avoid collisions
+ * KMP_STATE_VARS user-defined variables in 'struct P(state)'
+ * -- created for each state of the automaton;
+ * these variables are stored in .u substructure to avoid collisions
+ *
+ * Parameters which select how the input is interpreted (if KMP_SOURCE is unset):
+ * KMP_USE_ASCII reads single bytes from the input (default)
+ * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed)
+ * KMP_TOLOWER converts all to lowercase
+ * KMP_UNACCENT removes accents
+ * KMP_ONLYALPHA converts non-alphas to KMP_CONTROL_CHAR (see below)
+ *
+ * Parameters controlling add(kmp, src):
+ * KMP_ADD_EXTRA_ARGS extra arguments, should be used carefully because of possible collisions
+ * KMP_ADD_INIT(kmp,src) called in the beginning of add(), src is the first
+ * KMP_INIT_STATE(kmp,s) initialization of a new state s (called before KMP_ADD_{NEW,DUP});
+ * null state is not included and should be handled after init() if necessary;
+ * all user-defined data are filled by zeros before call to KMP_INIT_STATE
+ * KMP_ADD_NEW(kmp,src,s) initialize last state of every new key string (called after KMP_INIT_STATE);
+ * the string must be parsed before so src is after the last string's character
+ * KMP_ADD_DUP(kmp,src,s) analogy of KMP_ADD_NEW called for duplicates
+ *
+ * Parameters to build():
+ * KMP_BUILD_STATE(kmp,s) called for all states (including null) in order of non-decreasing tree depth
+ *
+ * Other parameters:
+ * KMP_WANT_CLEANUP define cleanup()
+ * KMP_WANT_SEARCH includes ucw/kmp-search.h with the same prefix;
+ * there can be multiple search variants for a single KMP automaton
+ * KMP_USE_POOL allocates in a given pool
+ * KMP_CONTROL_CHAR special control character (default is ':')
+ * KMP_GIVE_ALLOC if set, you must supply custom allocation functions:
+ * void *alloc(unsigned int size) -- allocate space for
+ * a state. Default is pooled allocation from a local pool or HASH_USE_POOL.
+ * void free(void *) -- the converse.
+ * KMP_GIVE_HASHFN if set, you must supply custom hash function:
+ * unsigned int hash(struct P(struct) *kmp, struct P(state) *state, KMP_CHAR c);
+ * default hash function works only for integer character types
+ * KMP_GIVE_EQ if set, you must supply custom compare function of two characters:
+ * int eq(struct P(struct) *kmp, KMP_CHAR a, KMP_CHAR b);
+ * default is 'a == b'
+ */
+
+#ifndef KMP_PREFIX
+#error Missing KMP_PREFIX
+#endif
+
+#include "ucw/mempool.h"
+#include <alloca.h>
+#include <string.h>
+
+#define P(x) KMP_PREFIX(x)
+
+#ifdef KMP_CHAR
+typedef KMP_CHAR P(char_t);
+#else
+typedef u16 P(char_t);
+#endif
+
+typedef u32 P(len_t);
+
+#ifdef KMP_NODE
+typedef KMP_NODE P(node_t);
+#else
+typedef struct {} P(node_t);
+#endif
+
+struct P(struct);
+
+struct P(state) {
+ struct P(state) *from; /* state with the previous character (forms a tree with null state in the root) */
+ struct P(state) *back; /* backwards edge to the longest shorter state with same suffix */
+ struct P(state) *next; /* the longest of shorter matches (or NULL) */
+ P(len_t) len; /* state depth if it represents a key string, zero otherwise */
+ P(char_t) c; /* last character of the represented string */
+ struct {
+# ifdef KMP_STATE_VARS
+ KMP_STATE_VARS
+# endif
+ } u; /* user-defined data*/
+};
+
+/* Control char */
+static inline P(char_t)
+P(control) (void)
+{
+# ifdef KMP_CONTROL_CHAR
+ return KMP_CONTROL_CHAR;
+# else
+ return ':';
+# endif
+}
+
+/* User-defined source */
+struct P(hash_table);
+
+#define HASH_GIVE_HASHFN
+#ifdef KMP_GIVE_HASHFN
+static inline uns
+P(hash_hash) (struct P(hash_table) *t, struct P(state) *f, P(char_t) c)
+{
+ return P(hash) ((struct P(struct) *) t, f, c);
+}
+#else
+static inline uns
+P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
+{
+ return (((uns)c) << 16) + (uns)(uintptr_t)f;
+}
+#endif
+
+#ifndef KMP_GIVE_EQ
+static inline int
+P(eq) (struct P(struct) *kmp UNUSED, P(char_t) c1, P(char_t) c2)
+{
+ return c1 == c2;
+}
+#endif
+
+static inline int
+P(is_control) (struct P(struct) *kmp, P(char_t) c)
+{
+ return P(eq) (kmp, c, P(control)());
+}
+
+#define HASH_GIVE_EQ
+static inline int
+P(hash_eq) (struct P(hash_table) *t, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
+{
+ return f1 == f2 && P(eq)((struct P(struct) *) t, c1, c2);
+}
+
+#ifdef KMP_GIVE_ALLOC
+#define HASH_GIVE_ALLOC
+static inline void *
+P(hash_alloc) (struct P(hash_table) *t, uns size)
+{
+ return P(alloc) ((struct P(struct) *) t, size);
+}
+
+static inline void
+P(hash_free) (struct P(hash_table) *t, void *ptr)
+{
+ P(free) ((struct P(struct) *) t, ptr);
+}
+#endif
+
+#define HASH_GIVE_INIT_KEY
+static inline void
+P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
+{
+ bzero(s, sizeof(*s));
+# ifdef KMP_INIT_STATE
+ UNUSED struct P(struct) *kmp = (struct P(struct) *)t;
+ { KMP_INIT_STATE(kmp, s); }
+# endif
+ s->from = f;
+ s->c = c;
+ s->next = f->back; /* the pointers hold the link-list of sons... changed in build() */
+ f->back = s;
+}
+
+#undef P
+#define HASH_PREFIX(x) KMP_PREFIX(hash_##x)
+#define HASH_NODE struct KMP_PREFIX(state)
+#define HASH_KEY_COMPLEX(x) x from, x c
+#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
+#define HASH_WANT_NEW
+#define HASH_WANT_FIND
+#ifdef KMP_WANT_CLEANUP
+#define HASH_WANT_CLEANUP
+#endif
+#if defined(KMP_USE_POOL)
+#define HASH_USE_POOL KMP_USE_POOL
+#else
+#define HASH_AUTO_POOL 4096
+#endif
+#define HASH_CONSERVE_SPACE
+#define HASH_TABLE_DYNAMIC
+#include "ucw/hashtable.h"
+#define P(x) KMP_PREFIX(x)
+
+struct P(struct) {
+ struct P(hash_table) hash; /* hash table of state transitions */
+ struct P(state) null; /* null state */
+ struct {
+# ifdef KMP_VARS
+ KMP_VARS
+# endif
+ } u; /* user-defined data */
+};
+
+#ifdef KMP_SOURCE
+typedef KMP_SOURCE P(source_t);
+#else
+typedef char *P(source_t);
+#endif
+
+#ifdef KMP_GET_CHAR
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src UNUSED, P(char_t) *c UNUSED)
+{
+ return KMP_GET_CHAR(kmp, (*src), (*c));
+}
+#else
+# if defined(KMP_USE_UTF8)
+# include "ucw/unicode.h"
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
+# include "charset/unicat.h"
+# endif
+# elif defined(KMP_USE_ASCII)
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
+# include "ucw/chartype.h"
+# endif
+# endif
+static inline int
+P(get_char) (struct P(struct) *kmp UNUSED, P(source_t) *src, P(char_t) *c)
+{
+# ifdef KMP_USE_UTF8
+ uns cc;
+ *src = utf8_get(*src, &cc);
+# ifdef KMP_ONLYALPHA
+ if (!cc) {}
+ else if (!Ualpha(cc))
+ cc = P(control)();
+ else
+# endif
+ {
+# ifdef KMP_TOLOWER
+ cc = Utolower(cc);
+# endif
+# ifdef KMP_UNACCENT
+ cc = Uunaccent(cc);
+# endif
+ }
+# else
+ uns cc = *(*src)++;
+# ifdef KMP_ONLYALPHA
+ if (!cc) {}
+ else if (!Calpha(cc))
+ cc = P(control)();
+ else
+# endif
+# ifdef KMP_TOLOWER
+ cc = Clocase(cc);
+# endif
+# ifdef KMP_UNACCENT
+# error Do not know how to unaccent ASCII characters
+# endif
+# endif
+ *c = cc;
+ return !!cc;
+}
+#endif
+
+static struct P(state) *
+P(add) (struct P(struct) *kmp, P(source_t) src
+# ifdef KMP_ADD_EXTRA_ARGS
+ , KMP_ADD_EXTRA_ARGS
+# endif
+)
+{
+# ifdef KMP_ADD_INIT
+ { KMP_ADD_INIT(kmp, src); }
+# endif
+
+ P(char_t) c;
+ if (!P(get_char)(kmp, &src, &c))
+ return NULL;
+ struct P(state) *p = &kmp->null, *s;
+ uns len = 0;
+ do
+ {
+ s = P(hash_find)(&kmp->hash, p, c);
+ if (!s)
+ for (;;)
+ {
+ s = P(hash_new)(&kmp->hash, p, c);
+ len++;
+ if (!(P(get_char)(kmp, &src, &c)))
+ goto enter_new;
+ p = s;
+ }
+ p = s;
+ len++;
+ }
+ while (P(get_char)(kmp, &src, &c));
+ if (s->len)
+ {
+# ifdef KMP_ADD_DUP
+ { KMP_ADD_DUP(kmp, src, s); }
+# endif
+ return s;
+ }
+enter_new:
+ s->len = len;
+# ifdef KMP_ADD_NEW
+ { KMP_ADD_NEW(kmp, src, s); }
+# endif
+ return s;
+}
+
+static void
+P(init) (struct P(struct) *kmp)
+{
+ bzero(&kmp->null, sizeof(struct P(state)));
+ P(hash_init)(&kmp->hash);
+}
+
+#ifdef KMP_WANT_CLEANUP
+static inline void
+P(cleanup) (struct P(struct) *kmp)
+{
+ P(hash_cleanup)(&kmp->hash);
+}
+#endif
+
+static inline int
+P(empty) (struct P(struct) *kmp)
+{
+ return !kmp->hash.hash_count;
+}
+
+static inline struct P(state) *
+P(chain_start) (struct P(state) *s)
+{
+ return s->len ? s : s->next;
+}
+
+static void
+P(build) (struct P(struct) *kmp)
+{
+ if (P(empty)(kmp))
+ return;
+ uns read = 0, write = 0;
+ struct P(state) *fifo[kmp->hash.hash_count], *null = &kmp->null;
+ for (struct P(state) *s = null->back; s; s = s->next)
+ fifo[write++] = s;
+ null->back = NULL;
+# ifdef KMP_BUILD_STATE
+ { KMP_BUILD_STATE(kmp, null); }
+# endif
+ while (read != write)
+ {
+ struct P(state) *s = fifo[read++], *t;
+ for (t = s->back; t; t = t->next)
+ fifo[write++] = t;
+ for (t = s->from->back; 1; t = t->back)
+ {
+ if (!t)
+ {
+ s->back = null;
+ s->next = NULL;
+ break;
+ }
+ s->back = P(hash_find)(&kmp->hash, t, s->c);
+ if (s->back)
+ {
+ s->next = s->back->len ? s->back : s->back->next;
+ break;
+ }
+ }
+# ifdef KMP_BUILD_STATE
+ { KMP_BUILD_STATE(kmp, s); }
+# endif
+ }
+}
+
+#undef P
+#undef KMP_CHAR
+#undef KMP_SOURCE
+#undef KMP_GET_CHAR
+#undef KMP_VARS
+#undef KMP_STATE_VARS
+#undef KMP_CONTEXT
+#undef KMP_USE_ASCII
+#undef KMP_USE_UTF8
+#undef KMP_TOLOWER
+#undef KMP_UNACCENT
+#undef KMP_ONLYALPHA
+#undef KMP_CONTROL_CHAR
+#undef KMP_ADD_EXTRA_ARGS
+#undef KMP_ADD_INIT
+#undef KMP_ADD_NEW
+#undef KMP_ADD_DUP
+#undef KMP_INIT_STATE
+#undef KMP_BUILD_STATE
+#undef KMP_USE_POOL
+#undef KMP_GIVE_ALLOC
+#undef KMP_GIVE_HASHFN
+#undef KMP_GIVE_EQ
+
+#ifdef KMP_WANT_SEARCH
+# undef KMP_WANT_SEARCH
+# define KMPS_PREFIX(x) KMP_PREFIX(x)
+# define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
+# include "ucw/kmp-search.h"
+#endif
+
+#undef KMP_PREFIX
--- /dev/null
+/* Test of large files */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define BLOCK (1<<10)
+#define COUNT (5<<20)
+#define TESTS (1<<20)
+
+int main(void)
+{
+ struct fastbuf *b;
+ byte block[BLOCK];
+ uns i;
+
+ srand(time(NULL));
+#if 0
+ b = bopen("/big/robert/large-file", O_CREAT | O_TRUNC | O_RDWR, 1<<20);
+ if (!b)
+ die("Cannot create large-file");
+
+ msg(L_DEBUG, "Writing %d blocks of size %d", COUNT, BLOCK);
+ for (i=0; i<COUNT; i++)
+ {
+ memset(block, i & 0xff, BLOCK);
+ bwrite(b, block, BLOCK);
+ if ( i%1024 == 0 )
+ {
+ printf("\r%10d", i);
+ fflush(stdout);
+ }
+ }
+#else
+ b = bopen("/big/robert/large-file", O_RDWR, 1<<20);
+ if (!b)
+ die("Cannot create large-file");
+#endif
+ msg(L_DEBUG, "Checking the file contents in %d tests", TESTS);
+ for (i=0; i<TESTS; i++)
+ {
+ uns idx = random()%COUNT;
+ ucw_off_t ofs = idx*BLOCK;
+ bseek(b, ofs, SEEK_SET);
+ bread(b, block, BLOCK);
+ if (block[17] != (idx & 0xff))
+ die("Invalid block %d in test %d: %x != %x", idx, i, block[17], idx & 0xff);
+ if ( i%16 == 0 )
+ {
+ printf("\r%10d", i);
+ fflush(stdout);
+ }
+ }
+ msg(L_DEBUG, "Done");
+
+ bclose(b);
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Large File Support
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LFS_H
+#define _UCW_LFS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef CONFIG_LFS
+
+#define ucw_open open64
+#define ucw_seek lseek64
+#define ucw_pread pread64
+#define ucw_pwrite pwrite64
+#define ucw_ftruncate ftruncate64
+#define ucw_mmap(a,l,p,f,d,o) mmap64(a,l,p,f,d,o)
+#define ucw_pread pread64
+#define ucw_pwrite pwrite64
+#define ucw_stat stat64
+#define ucw_fstat fstat64
+typedef struct stat64 ucw_stat_t;
+
+#else /* !CONFIG_LFS */
+
+#define ucw_open open
+#define ucw_seek(f,o,w) lseek(f,o,w)
+#define ucw_ftruncate(f,o) ftruncate(f,o)
+#define ucw_mmap(a,l,p,f,d,o) mmap(a,l,p,f,d,o)
+#define ucw_pread pread
+#define ucw_pwrite pwrite
+#define ucw_stat stat
+#define ucw_fstat fstat
+typedef struct stat ucw_stat_t;
+
+#endif /* !CONFIG_LFS */
+
+#if defined(_POSIX_SYNCHRONIZED_IO) && (_POSIX_SYNCHRONIZED_IO > 0)
+#define ucw_fdatasync fdatasync
+#else
+#define ucw_fdatasync fsync
+#endif
+
+#define HAVE_PREAD
+
+static inline ucw_off_t
+ucw_file_size(const char *name)
+{
+ int fd = ucw_open(name, O_RDONLY);
+ if (fd < 0)
+ die("Cannot open %s: %m", name);
+ ucw_off_t len = ucw_seek(fd, 0, SEEK_END);
+ close(fd);
+ return len;
+}
+
+#endif /* !_UCW_LFS_H */
--- /dev/null
+/*
+ * The UCW Library -- Miscellaneous Functions
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIB_H
+#define _UCW_LIB_H
+
+#include "ucw/config.h"
+#include <stdarg.h>
+
+/*** === Macros for handling structures, offsets and alignment ***/
+
+#define CHECK_PTR_TYPE(x, type) ((x)-(type)(x) + (type)(x)) /** Check that a pointer @x is of type @type. Fail compilation if not. **/
+#define PTR_TO(s, i) &((s*)0)->i /** Return OFFSETOF() in form of a pointer. **/
+#define OFFSETOF(s, i) ((unsigned int) (uintptr_t) PTR_TO(s, i)) /** Offset of item @i from the start of structure @s **/
+#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i))) /** Given a pointer @p to item @i of structure @s, return a pointer to the start of the struct. **/
+
+/** Align an integer @s to the nearest higher multiple of @a (which should be a power of two) **/
+#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1))
+
+/** Align a pointer @p to the nearest higher multiple of @s. **/
+#define ALIGN_PTR(p, s) ((uintptr_t)(p) % (s) ? (typeof(p))((uintptr_t)(p) + (s) - (uintptr_t)(p) % (s)) : (p))
+
+#define UNALIGNED_PART(ptr, type) (((uintptr_t) (ptr)) % sizeof(type))
+
+/*** === Other utility macros ***/
+
+#define MIN(a,b) (((a)<(b))?(a):(b)) /** Minimum of two numbers **/
+#define MAX(a,b) (((a)>(b))?(a):(b)) /** Maximum of two numbers **/
+#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; }) /** Clip a number @x to interval [@min,@max] **/
+#define ABS(x) ((x) < 0 ? -(x) : (x)) /** Absolute value **/
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) /** The number of elements of an array **/
+#define STRINGIFY(x) #x /** Convert macro parameter to a string **/
+#define STRINGIFY_EXPANDED(x) STRINGIFY(x) /** Convert an expanded macro parameter to a string **/
+#define GLUE(x,y) x##y /** Glue two tokens together **/
+#define GLUE_(x,y) x##_##y /** Glue two tokens together, separating them by an underscore **/
+
+#define COMPARE(x,y) do { if ((x)<(y)) return -1; if ((x)>(y)) return 1; } while(0) /** Numeric comparison function for qsort() **/
+#define REV_COMPARE(x,y) COMPARE(y,x) /** Reverse numeric comparison **/
+#define COMPARE_LT(x,y) do { if ((x)<(y)) return 1; if ((x)>(y)) return 0; } while(0)
+#define COMPARE_GT(x,y) COMPARE_LT(y,x)
+
+#define ROL(x, bits) (((x) << (bits)) | ((uns)(x) >> (sizeof(uns)*8 - (bits)))) /** Bitwise rotation of an unsigned int to the left **/
+#define ROR(x, bits) (((uns)(x) >> (bits)) | ((x) << (sizeof(uns)*8 - (bits)))) /** Bitwise rotation of an unsigned int to the right **/
+
+/*** === Shortcuts for GCC Extensions ***/
+
+#ifdef __GNUC__
+
+#undef inline
+#define NONRET __attribute__((noreturn)) /** Function does not return **/
+#define UNUSED __attribute__((unused)) /** Variable/parameter is knowingly unused **/
+#define CONSTRUCTOR __attribute__((constructor)) /** Call function upon start of program **/
+#define PACKED __attribute__((packed)) /** Structure should be packed **/
+#define CONST __attribute__((const)) /** Function depends only on arguments **/
+#define PURE __attribute__((pure)) /** Function depends only on arguments and global vars **/
+#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z))) /** Checking of printf-like format strings **/
+#define likely(x) __builtin_expect((x),1) /** Use `if (likely(@x))` if @x is almost always true **/
+#define unlikely(x) __builtin_expect((x),0) /** Use `if (unlikely(@x))` to hint that @x is almost always false **/
+
+#if __GNUC__ >= 4 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
+#define ALWAYS_INLINE inline __attribute__((always_inline)) /** Forcibly inline **/
+#define NO_INLINE __attribute__((noinline)) /** Forcibly uninline **/
+#else
+#define ALWAYS_INLINE inline
+#endif
+
+#if __GNUC__ >= 4
+#define LIKE_MALLOC __attribute__((malloc)) /** Function returns a "new" pointer **/
+#define SENTINEL_CHECK __attribute__((sentinel)) /** The last argument must be NULL **/
+#else
+#define LIKE_MALLOC
+#define SENTINEL_CHECK
+#endif
+
+#else
+#error This program requires the GNU C compiler.
+#endif
+
+/***
+ * [[logging]]
+ *
+ * === Basic logging functions (see <<log:,Logging>> and <ucw/log.h> for more)
+ ***/
+
+enum log_levels { /** The available log levels to pass to msg() and friends. **/
+ L_DEBUG=0, // 'D' - Debugging
+ L_INFO, // 'I' - Informational
+ L_WARN, // 'W' - Warning
+ L_ERROR, // 'E' - Error, but non-critical
+ L_INFO_R, // 'i' - An alternative set of levels for messages caused by remote events
+ L_WARN_R, // 'w' (e.g., a packet received via network)
+ L_ERROR_R, // 'e'
+ L_FATAL, // '!' - Fatal error
+ L_MAX
+};
+
+#define LOG_LEVEL_NAMES P(DEBUG) P(INFO) P(WARN) P(ERROR) P(INFO_R) P(WARN_R) P(ERROR_R) P(FATAL)
+
+// Return the letter associated with a given severity level
+#define LS_LEVEL_LETTER(level) ("DIWEiwe!###"[( level )])
+
+#define L_SIGHANDLER 0x80000000 /** Avoid operations that are unsafe in signal handlers **/
+#define L_LOGGER_ERR 0x40000000 /** Used internally to avoid infinite reporting of logging errors **/
+
+/**
+ * This is the basic printf-like function for logging a message.
+ * The @flags contain the log level and possibly other flag bits (like `L_SIGHANDLER`).
+ **/
+void msg(uns flags, const char *fmt, ...) FORMAT_CHECK(printf,2,3);
+void vmsg(uns flags, const char *fmt, va_list args); /** A vararg version of msg(). **/
+void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2); /** Log a fatal error message and exit the program. **/
+
+extern char *log_title; /** An optional log message title. Set to program name by log_init(). **/
+extern int log_pid; /** An optional PID printed in each log message. Set to 0 if it shouldn't be logged. **/
+extern void (*log_die_hook)(void); /** An optional function called just before die() exists. **/
+
+void log_init(const char *argv0); /** Set @log_title to the program name extracted from @argv[0]. **/
+void log_fork(void); /** Call after fork() to update @log_pid. **/
+void log_file(const char *name); /** Establish logging to the named file. Also redirect stderr there. **/
+
+void assert_failed(const char *assertion, const char *file, int line) NONRET;
+void assert_failed_noinfo(void) NONRET;
+
+#ifdef DEBUG_ASSERTS
+/**
+ * Check an assertion. If the condition @x is false, stop the program with a fatal error.
+ * Assertion checks are compiled only when `DEBUG_ASSERTS` is defined.
+ **/
+#define ASSERT(x) ({ if (unlikely(!(x))) assert_failed(#x, __FILE__, __LINE__); 1; })
+#else
+#define ASSERT(x) ({ if (__builtin_constant_p(x) && !(x)) assert_failed_noinfo(); 1; })
+#endif
+
+#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1]
+
+#ifdef LOCAL_DEBUG
+#define DBG(x,y...) msg(L_DEBUG, x,##y) /** If `LOCAL_DEBUG` is defined before including <ucw/lib.h>, log a debug message. Otherwise do nothing. **/
+#else
+#define DBG(x,y...) do { } while(0)
+#endif
+
+/*** === Memory allocation ***/
+
+/*
+ * Unfortunately, several libraries we might want to link to define
+ * their own xmalloc and we don't want to interfere with them, hence
+ * the renaming.
+ */
+#define xmalloc ucw_xmalloc
+#define xrealloc ucw_xrealloc
+#define xfree ucw_xfree
+
+void *xmalloc(uns) LIKE_MALLOC; /** Allocate memory and die() if there is none. **/
+void *xrealloc(void *, uns); /** Reallocate memory and die() if there is none. **/
+void xfree(void *); /** Free memory allocated by xmalloc() or xrealloc(). **/
+
+void *xmalloc_zero(uns) LIKE_MALLOC; /** Allocate memory and fill it by zeroes. **/
+char *xstrdup(const char *) LIKE_MALLOC; /** Make a xmalloc()'ed copy of a string. **/
+
+/*** === Trivial timers (timer.c) ***/
+
+timestamp_t get_timestamp(void); /** Get current time as a millisecond timestamp. **/
+
+void init_timer(timestamp_t *timer); /** Initialize a timer. **/
+uns get_timer(timestamp_t *timer); /** Get the number of milliseconds since last init/get of a timer. **/
+uns switch_timer(timestamp_t *oldt, timestamp_t *newt); /** Stop ticking of one timer and resume another. **/
+
+/*** === Random numbers (random.c) ***/
+
+uns random_u32(void); /** Return a pseudorandom 32-bit number. **/
+uns random_max(uns max); /** Return a pseudorandom 32-bit number in range [0,@max). **/
+u64 random_u64(void); /** Return a pseudorandom 64-bit number. **/
+u64 random_max_u64(u64 max); /** Return a pseudorandom 64-bit number in range [0,@max). **/
+
+/* mmap.c */
+
+void *mmap_file(const char *name, unsigned *len, int writeable);
+void munmap_file(void *start, unsigned len);
+
+/* proctitle.c */
+
+void setproctitle_init(int argc, char **argv);
+void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2);
+char *getproctitle(void);
+
+/* randomkey.c */
+
+void randomkey(byte *buf, uns size);
+
+/* exitstatus.c */
+
+#define EXIT_STATUS_MSG_SIZE 32
+int format_exit_status(char *msg, int stat);
+
+/* runcmd.c */
+
+int run_command(const char *cmd, ...);
+void NONRET exec_command(const char *cmd, ...);
+void echo_command(char *buf, int size, const char *cmd, ...);
+int run_command_v(const char *cmd, va_list args);
+void NONRET exec_command_v(const char *cmd, va_list args);
+void echo_command_v(char *buf, int size, const char *cmd, va_list args);
+
+/* carefulio.c */
+
+int careful_read(int fd, void *buf, int len);
+int careful_write(int fd, const void *buf, int len);
+
+/* sync.c */
+
+void sync_dir(const char *name);
+
+/* sighandler.c */
+
+typedef int (*ucw_sighandler_t)(int); // gets signum, returns nonzero if abort() should be called
+
+void handle_signal(int signum);
+void unhandle_signal(int signum);
+ucw_sighandler_t set_signal_handler(int signum, ucw_sighandler_t newh);
+
+/* bigalloc.c */
+
+void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
+void *page_alloc_zero(u64 len) LIKE_MALLOC;
+void page_free(void *start, u64 len);
+void *page_realloc(void *start, u64 old_len, u64 new_len);
+
+void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
+void *big_alloc_zero(u64 len) LIKE_MALLOC;
+void big_free(void *start, u64 len);
+
+#endif
--- /dev/null
+# pkg-config metadata for libucw
+
+libdir=@LIBDIR@
+incdir=.
+
+#ifdef CONFIG_UCW_THREADS
+threads=-lpthread
+#else
+threads=
+#endif
+
+#ifdef CONFIG_PCRE
+regex=-lpcre
+#else
+regex=
+#endif
+
+perl_modules_dir=@INSTALL_PERL_DIR@
+perl_module_flags=-I${perl_modules_dir}
+build_system=@INSTALL_SHARE_DIR@/ucw/build
+
+Name: libucw
+Description: A library of utility functions and data structures
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lucw ${threads} ${regex}
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/threads.h"
+#include "ucw/lizard.h"
+
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <errno.h>
+
+struct lizard_buffer {
+ uns len;
+ void *ptr;
+};
+
+struct lizard_buffer *
+lizard_alloc(void)
+{
+ struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
+ buf->len = 0;
+ buf->ptr = NULL;
+ handle_signal(SIGSEGV);
+ return buf;
+}
+
+void
+lizard_free(struct lizard_buffer *buf)
+{
+ unhandle_signal(SIGSEGV);
+ if (buf->ptr)
+ munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+ xfree(buf);
+}
+
+static void
+lizard_realloc(struct lizard_buffer *buf, uns max_len)
+ /* max_len needs to be aligned to CPU_PAGE_SIZE */
+{
+ if (max_len <= buf->len)
+ return;
+ if (max_len < 2*buf->len) // to ensure logarithmic cost
+ max_len = 2*buf->len;
+
+ if (buf->ptr)
+ munmap(buf->ptr, buf->len + CPU_PAGE_SIZE);
+ buf->len = max_len;
+ buf->ptr = mmap(NULL, buf->len + CPU_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (buf->ptr == MAP_FAILED)
+ die("mmap(anonymous, %d bytes): %m", (uns)(buf->len + CPU_PAGE_SIZE));
+ if (mprotect(buf->ptr + buf->len, CPU_PAGE_SIZE, PROT_NONE) < 0)
+ die("mprotect: %m");
+}
+
+static jmp_buf safe_decompress_jump;
+static int
+sigsegv_handler(int signal UNUSED)
+{
+ longjmp(safe_decompress_jump, 1);
+ return 1;
+}
+
+byte *
+lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length)
+ /* Decompresses in into buf, sets *ptr to the data, and returns the
+ * uncompressed length. If an error has occured, -1 is returned and errno is
+ * set. The buffer buf is automatically reallocated. SIGSEGV is caught in
+ * case of buffer-overflow. The function is not re-entrant because of a
+ * static longjmp handler. */
+{
+ uns lock_offset = ALIGN_TO(expected_length + 3, CPU_PAGE_SIZE); // +3 due to the unaligned access
+ if (lock_offset > buf->len)
+ lizard_realloc(buf, lock_offset);
+ volatile ucw_sighandler_t old_handler = set_signal_handler(SIGSEGV, sigsegv_handler);
+ byte *ptr;
+ if (!setjmp(safe_decompress_jump))
+ {
+ ptr = buf->ptr + buf->len - lock_offset;
+ int len = lizard_decompress(in, ptr);
+ if (len != (int) expected_length)
+ {
+ ptr = NULL;
+ errno = EINVAL;
+ }
+ }
+ else
+ {
+ msg(L_ERROR, "SIGSEGV caught in lizard_decompress()");
+ ptr = NULL;
+ errno = EFAULT;
+ }
+ set_signal_handler(SIGSEGV, old_handler);
+ return ptr;
+}
--- /dev/null
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-binary.h"
+#include "ucw/lizard.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static char *options = CF_SHORT_OPTS "cdtx";
+static char *help = "\
+Usage: lizard-test <options> input-file [output-file]\n\
+\n\
+Options:\n"
+CF_USAGE
+"-c\t\tCompress\n\
+-d\t\tDecompress\n\
+-t\t\tCompress, decompress, and compare (in memory only, default)\n\
+-x\t\tLet the test crash by shrinking the output buffer\n\
+";
+
+static void NONRET
+usage(void)
+{
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int opt;
+ uns action = 't';
+ uns crash = 0;
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'c':
+ case 'd':
+ case 't':
+ action = opt;
+ break;
+ case 'x':
+ crash++;
+ break;
+ default:
+ usage();
+ }
+ if (action == 't' && argc != optind+1
+ || action != 't' && argc != optind+2)
+ usage();
+
+ void *mi, *mo;
+ int li, lo;
+ uns adler = 0;
+
+ struct stat st;
+ stat(argv[optind], &st);
+ li = st.st_size;
+ struct fastbuf *fi = bopen(argv[optind], O_RDONLY, 1<<16);
+ if (action != 'd')
+ {
+ lo = li * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD;
+ li += LIZARD_NEEDS_CHARS;
+ }
+ else
+ {
+ lo = bgetl(fi);
+ adler = bgetl(fi);
+ li -= 8;
+ }
+ mi = xmalloc(li);
+ mo = xmalloc(lo);
+ li = bread(fi, mi, li);
+ bclose(fi);
+
+ printf("%d ", li);
+ if (action == 'd')
+ printf("->expected %d (%08x) ", lo, adler);
+ fflush(stdout);
+ if (action != 'd')
+ lo = lizard_compress(mi, li, mo);
+ else
+ {
+ lo = lizard_decompress(mi, mo);
+ if (adler32(mo, lo) != adler)
+ printf("wrong Adler32 ");
+ }
+ printf("-> %d ", lo);
+ fflush(stdout);
+
+ if (action != 't')
+ {
+ struct fastbuf *fo = bopen(argv[optind+1], O_CREAT | O_TRUNC | O_WRONLY, 1<<16);
+ if (action == 'c')
+ {
+ bputl(fo, li);
+ bputl(fo, adler32(mi, li));
+ }
+ bwrite(fo, mo, lo);
+ bclose(fo);
+ }
+ else
+ {
+ int smaller_li;
+ if (li >= (int) CPU_PAGE_SIZE)
+ smaller_li = li - CPU_PAGE_SIZE;
+ else
+ smaller_li = 0;
+ struct lizard_buffer *buf = lizard_alloc();
+ byte *ptr = lizard_decompress_safe(mo, buf, crash ? smaller_li : li);
+ if (!ptr)
+ printf("err: %m");
+ else if (memcmp(mi, ptr, li))
+ printf("WRONG");
+ else
+ printf("OK");
+ lizard_free(buf);
+ }
+ printf("\n");
+}
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ * The file format is based on LZO1X and
+ * the compression method is based on zlib.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/lizard.h"
+
+#include <string.h>
+
+typedef u16 hash_ptr_t;
+struct hash_record {
+ /* the position in the original text is implicit; it is computed by locate_string() */
+ hash_ptr_t next; // 0=end
+ hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
+};
+
+#define HASH_SIZE (1<<14) // size of hash-table
+#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
+#define CHAIN_MAX_TESTS 8 // crop longer collision chains
+#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
+
+static inline uns
+hashf(const byte *string)
+ /* 0..HASH_SIZE-1 */
+{
+ return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
+}
+
+static inline byte *
+locate_string(const byte *string, int record_id, int head)
+ /* The strings are recorded into the hash-table regularly, hence there is no
+ * need to store the pointer there. */
+{
+ string += record_id - head;
+ if (record_id >= head)
+ string -= HASH_RECORDS-1;
+ return (byte *)string;
+}
+
+static inline uns
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
+ /* hash_tab[hash] == record_id points to the head of the double-linked
+ * link-list of strings with the same hash. The records are statically
+ * stored in circular array hash_rec (with the 1st entry unused), and the
+ * pointers are just 16-bit indices. The strings in every collision chain
+ * are ordered by age. */
+{
+ uns count = CHAIN_MAX_TESTS;
+ uns best_len = 0;
+ while (record_id && count-- > 0)
+ {
+ byte *record_string = locate_string(string, record_id, head);
+ byte *cmp = record_string;
+ if (cmp[0] == string[0] && cmp[2] == string[2])
+ /* implies cmp[1] == string[1] */
+ {
+ if (cmp[3] == string[3])
+ {
+ cmp += 4;
+ if (*cmp++ == string[4] && *cmp++ == string[5]
+ && *cmp++ == string[6] && *cmp++ == string[7])
+ {
+ const byte *str = string + 8;
+ while (str <= string_end && *cmp++ == *str++);
+ }
+ }
+ else
+ cmp += 4;
+ uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
+ if (len > best_len)
+ {
+ best_len = len;
+ *best_ptr = record_string;
+ if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
+ break;
+ }
+ }
+ record_id = hash_rec[record_id].next;
+ }
+ return best_len;
+}
+
+static uns
+hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
+ /* We reuse hash-records stored in a circular array. First, delete the old
+ * one and then add the new one in front of the link-list. */
+{
+ struct hash_record *rec = hash_rec + head;
+ if (*to_delete) /* unlink the original record */
+ {
+ uns prev_id = rec->prev & ((1<<15)-1);
+ if (rec->prev & (1<<15)) /* was a head */
+ hash_tab[prev_id] = 0;
+ else /* thanks to the ordering, this was a tail */
+ hash_rec[prev_id].next = 0;
+ }
+ rec->next = hash_tab[hash];
+ rec->prev = (1<<15) | hash;
+ hash_rec[rec->next].prev = head;
+ hash_tab[hash] = head; /* add the new record before the link-list */
+
+ if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
+ {
+ head = 1;
+ *to_delete = 1;
+ }
+ return head;
+}
+
+static inline byte *
+dump_unary_value(byte *out, uns l)
+{
+ while (l > 255)
+ {
+ l -= 255;
+ *out++ = 0;
+ }
+ *out++ = l;
+ return out;
+}
+
+static byte *
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
+{
+ if (bof && len <= 238)
+ *out++ = len + 17;
+ else if (len < 4)
+ {
+ /* cannot happen when !!bof */
+ out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) start;
+ return out + len;
+#else
+ while (len-- > 0)
+ *out++ = *start++;
+ return out;
+#endif
+ }
+ else
+ {
+ /* leave 2 least significant bits of out[-2] set to 0 */
+ if (len <= 18)
+ *out++ = len - 3;
+ else
+ {
+ *out++ = 0;
+ out = dump_unary_value(out, len - 18);
+ }
+ }
+ memcpy(out, start, len);
+ return out + len;
+}
+
+int
+lizard_compress(const byte *in, uns in_len, byte *out)
+ /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+ * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
+ * allocated after in. Returns the actual compressed length. */
+{
+ hash_ptr_t hash_tab[HASH_SIZE];
+ struct hash_record hash_rec[HASH_RECORDS];
+ const byte *in_end = in + in_len;
+ byte *out_start = out;
+ const byte *copy_start = in;
+ uns head = 1; /* 0 in unused */
+ uns to_delete = 0, bof = 1;
+ bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
+ while (in < in_end)
+ {
+ uns hash = hashf(in);
+ byte *best = NULL;
+ uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
+ if (len < 3)
+#if 0 // TODO: now, our routine does not detect matches of length 2
+ if (len == 2 && (in - best->string - 1) < (1<<10))
+ { /* pass-thru */ }
+ else
+#endif
+ {
+literal:
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+ in++; /* add a literal */
+ continue;
+ }
+
+ if (in + len > in_end) /* crop EOF */
+ {
+ len = in_end - in;
+ if (len < 3)
+ goto literal;
+ }
+ /* Record the match. */
+ uns copy_len = in - copy_start;
+ uns is_in_copy_mode = bof || copy_len >= 4;
+ uns shift = in - best - 1;
+ /* Try to use a 2-byte sequence. */
+#if 0
+ if (len == 2)
+ {
+ if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
+ goto literal;
+ else
+ goto dump_2sequence;
+ } else
+#endif
+ /* now, len >= 3 */
+ if (shift < (1<<11) && len <= 8)
+ {
+ shift |= (len-3 + 2)<<11;
+dump_2sequence:
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
+ *out++ = shift & 0xff;
+ }
+ else if (len == 3 && is_in_copy_mode)
+ {
+ if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
+ {
+ shift -= 1<<11;
+ goto dump_2sequence; /* shift has 11 bits and contains also len */
+ }
+ else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+ goto literal;
+ }
+ /* We have to use a 3-byte sequence. */
+ else
+ {
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ if (shift < (1<<14))
+ {
+ if (len <= 33)
+ *out++ = (1<<5) | (len-2);
+ else
+ {
+ *out++ = 1<<5;
+ out = dump_unary_value(out, len - 33);
+ }
+ }
+ else /* shift < (1<<15)-1 becase of HASH_RECORDS */
+ {
+ shift++; /* because shift==0 is reserved for EOF */
+ byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
+ if (len <= 9)
+ *out++ = pos_bit | (len-2);
+ else
+ {
+ *out++ = pos_bit;
+ out = dump_unary_value(out, len - 9);
+ }
+ }
+ *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
+ *out++ = shift & 0xff;
+ }
+ /* Update the hash-table. */
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+ for (uns i=1; i<len; i++)
+ head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
+ in += len;
+ copy_start = in;
+ bof = 0;
+ }
+ uns copy_len = in - copy_start;
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
+ *out++ = 17; /* add EOF */
+ *out++ = 0;
+ *out++ = 0;
+ return out - out_start;
+}
+
+static inline byte *
+read_unary_value(const byte *in, uns *val)
+{
+ uns l = 0;
+ while (!*in++)
+ l += 255;
+ l += in[-1];
+ *val = l;
+ return (byte *)in;
+}
+
+int
+lizard_decompress(const byte *in, byte *out)
+ /* Requires out being allocated for the decompressed length must be known
+ * beforehand. It is desirable to lock the following memory page for
+ * read-only access to prevent buffer overflow. Returns the actual
+ * decompressed length or a negative number when an error has occured. */
+{
+ byte *out_start = out;
+ uns expect_copy_command = 1;
+ uns len;
+ if (*in > 17) /* short copy command at BOF */
+ {
+ len = *in++ - 17;
+ goto perform_copy_command;
+ }
+ while (1)
+ {
+ uns c = *in++;
+ uns pos;
+ if (c < 0x10)
+ if (expect_copy_command == 1)
+ {
+ if (!c)
+ {
+ in = read_unary_value(in, &len);
+ len += 18;
+ }
+ else
+ len = c + 3;
+ goto perform_copy_command;
+ }
+ else
+ {
+ pos = ((c&0xc)<<6) | *in++;
+ if (expect_copy_command == 2)
+ {
+ pos += 1<<11;
+ len = 3;
+ }
+ else
+ len = 2;
+ pos++;
+ }
+ else if (c < 0x20)
+ {
+ pos = (c&0x8)<<11;
+ len = c&0x7;
+ if (!len)
+ {
+ in = read_unary_value(in, &len);
+ len += 9;
+ }
+ else
+ len += 2;
+ pos |= (*in++ & 0xfc)<<6;
+ pos |= *in++;
+ if (!pos) /* EOF */
+ break;
+ /* do NOT pos++ */
+ }
+ else if (c < 0x40)
+ {
+ len = c&0x1f;
+ if (!len)
+ {
+ in = read_unary_value(in, &len);
+ len += 33;
+ }
+ else
+ len += 2;
+ pos = (*in++ & 0xfc)<<6;
+ pos |= *in++;
+ pos++;
+ }
+ else /* high bits encode the length */
+ {
+ len = ((c&0xe0)>>5) -2 +3;
+ pos = (c&0x1c)<<6;
+ pos |= *in++;
+ pos++;
+ }
+ /* take from the sliding window */
+ if (len <= pos)
+ {
+ memcpy(out, out-pos, len);
+ out += len;
+ }
+ else
+ { /* overlapping */
+ for (; len-- > 0; out++)
+ *out = *(out-pos);
+ /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
+ }
+ /* extract the copy-bits */
+ len = in[-2] & 0x3;
+ if (len)
+ {
+ expect_copy_command = 0;
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) in;
+ out += len;
+ in += len;
+#else
+ while (len-- > 0)
+ *out++ = *in++;
+#endif
+ }
+ else
+ expect_copy_command = 1;
+ continue;
+
+perform_copy_command:
+ expect_copy_command = 2;
+ memcpy(out, in, len);
+ out += len;
+ in += len;
+ }
+
+ return out - out_start;
+}
+
+/*
+
+Description of the LZO1X format :
+=================================
+
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
+Beginning of file:
+------------------
+
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
+
+Compressed mode:
+----------------
+
+Read the first byte of the sequence and determine the type of bit encoding by
+looking at the most significant bits. The sequence is always at least 2 bytes
+long. Decode sequences of these types until the EOF or END marker is read.
+
+ length L = length of the text taken from the sliding window
+
+ If L=0, then count the number Z of the following zero bytes and add Z*255
+ to the value of the following non-zero byte. This allows setting L
+ arbitrarily high.
+
+ position p = relative position of the beginning of the text
+
+ Exception: 00010001 00000000 00000000 means EOF
+
+ copying C = length 1..3 of copied characters or END=0
+
+ C following characters will be copied from the compressed text to the
+ output. The number CC is always stored in the 2 least significant bits of
+ the second last byte of the sequence.
+
+ If END is read, the algorithm switches to the copy mode.
+
+pattern length position
+
+0000ppCC pppppppp 2 10 bits [default interpretation]
+0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
+0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
+001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
+LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
+
+Copy mode:
+----------
+
+Read the first byte and, if the most significant bits are 0000, perform the
+following command, otherwise switch to the compressed mode (and evaluate the
+command there).
+
+pattern length position
+
+0000LLLL L* 4..18 + extend N/A
+
+ Copy L characters from the compressed text to the output. The overhead for
+ incompressible strings is only roughly 1/256 + epsilon.
+
+*/
--- /dev/null
+/*
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LIZARD_H
+#define _UCW_LIZARD_H
+
+/***
+ * [[basic]]
+ * Basic application
+ * -----------------
+ **/
+
+/**
+ * The compression routine needs input buffer 8 characters longer, because it
+ * does not check the input bounds all the time.
+ **/
+#define LIZARD_NEEDS_CHARS 8
+
+#define LIZARD_MAX_MULTIPLY 23./22
+#define LIZARD_MAX_ADD 4
+ /* In the worst case, the compressed file will not be longer than its
+ * original length * 23/22 + 4.
+ *
+ * The additive constant is for EOF and the header of the file.
+ *
+ * The multiplicative constant comes from 19-byte incompressible string
+ * followed by a 3-sequence that can be compressed into 2-byte link. This
+ * breaks the copy-mode and it needs to be restarted with a new header. The
+ * total length is 2(header) + 19(string) + 2(link) = 23.
+ */
+
+/**
+ * The compressed data will not be longer than `LIZARD_MAX_LEN(input_length)`.
+ * Note that `LIZARD_MAX_LEN(length) > length` (this is not a problem of the algorithm,
+ * every lossless compression algorithm must have an input for which it produces a larger
+ * output).
+ *
+ * Use this to compute the size of @out paramater of @lizard_compress().
+ **/
+#define LIZARD_MAX_LEN(LENGTH) ((LENGTH) * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD)
+
+/* lizard.c */
+
+/**
+ * Compress data provided in @in.
+ * The input buffer must be at last `@in_len + <<def_LIZARD_NEEDS_CHARS,LIZARD_NEEDS_CHARS>>`
+ * long (the compression algorithm does not check the bounds all the time).
+ *
+ * The output will be stored in @out. The @out buffer must be at last <<def_LIZARD_LEN,`LIZARD_LEN(@in_len)`>>
+ * bytes long for the output to fit in for sure.
+ *
+ * The function returns number of bytes actually needed (the size of output).
+ *
+ * Use @lizard_decompress() to get the original data.
+ **/
+int lizard_compress(const byte *in, uns in_len, byte *out);
+
+/**
+ * Decompress data previously compressed by @lizard_compress().
+ * Input is taken from @in and the result stored in @out.
+ * The size of output is returned.
+ *
+ * Note that you need to know the maximal possible size of the output to
+ * allocate enough memory.
+ *
+ * See also <<safe,safe decompression>>.
+ **/
+int lizard_decompress(const byte *in, byte *out);
+
+/* lizard-safe.c */
+
+/***
+ * [[safe]]
+ * Safe decompression
+ * ------------------
+ *
+ * You can use safe decompression, when you want to make sure you got the
+ * length right and when you want to reuse the buffer for output.
+ ***/
+
+struct lizard_buffer; /** Type of the output buffer for @lizard_decompress_safe(). **/
+
+struct lizard_buffer *lizard_alloc(void); /** Get me a new <<struct_lizard_buffer,`lizard_buffer`>>. **/
+/**
+ * Return memory used by a <<struct_lizard_buffer,`lizard_buffer`>>.
+ * It frees even the data stored in it (the result of
+ * @lizard_decompress_safe() call that used this buffer).
+ **/
+void lizard_free(struct lizard_buffer *buf);
+
+/**
+ * This one acts much like @lizard_decompress(). The difference is it
+ * checks the data to be of correct length (therefore it will not
+ * crash on invalid data).
+ *
+ * It decompresses data provided by @in. The @buf is used to get the
+ * memory for output (you get one by @lizard_alloc()).
+ *
+ * The pointer to decompressed data is returned. To free it, free the
+ * buffer by @lizard_free().
+ *
+ * In the case of error, NULL is returned. In that case, `errno` is
+ * set either to `EINVAL` (expected_length does not match) or to
+ * `EFAULT` (a segfault has been caught while decompressing -- it
+ * probably means expected_length was set way too low). Both cases
+ * suggest either wrongly computed length or data corruption.
+ *
+ * The @buf argument may be reused for multiple decompresses. However,
+ * the data will be overwritten by the next call.
+ *
+ * Beware this function is not thread-safe and is not even reentrant
+ * (because of internal segfault handling).
+ **/
+byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
+
+/* adler32.c */
+
+/***
+ * [[adler]]
+ * Adler-32 checksum
+ * -----------------
+ *
+ * This is here because it is commonly used to check data compressed by LiZaRd.
+ * However, it could also belong to <<hash,hashing routines>>.
+ ***/
+
+/**
+ * Update the Adler-32 checksum with more data.
+ * @adler is the old value, @byte points to @len bytes of data to update with.
+ * Result is returned.
+ **/
+uns adler32_update(uns adler, const byte *ptr, uns len);
+
+/**
+ * Compute the Adler-32 checksum of a block of data.
+ **/
+static inline uns adler32(const byte *buf, uns len)
+{
+ return adler32_update(1, buf, len);
+}
+
+#endif
--- /dev/null
+Include cf/libucw
+
+Logging {
+
+Stream {
+ Name logfile
+ FileName log/test
+ StdErrFollows 1
+ Limit { Rate 100 }
+}
+
+Stream {
+ Name logfile2
+ FileName log/test2
+ Microseconds 1
+ Levels:reset info warn error
+ Types:reset default foo
+ ErrorsFatal 1
+ ShowTypes 1
+}
+
+Stream {
+ Name syslog
+ SyslogFacility user
+ SyslogPID 1
+}
+
+Stream {
+ Name combined
+ Substream logfile
+ Substream logfile2
+ Substream syslog
+}
+
+}
--- /dev/null
+/*
+ * UCW Library -- Logging: Configuration of Log Streams
+ *
+ * (c) 2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+#include "ucw/log-internal.h"
+#include "ucw/conf.h"
+#include "ucw/simple-lists.h"
+#include "ucw/tbf.h"
+#include "ucw/threads.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <sys/time.h>
+
+/*** Configuration of streams ***/
+
+struct stream_config {
+ cnode n;
+ char *name;
+ char *file_name;
+ char *syslog_facility;
+ u32 levels;
+ clist types; // simple_list of names
+ clist substreams; // simple_list of names
+ clist limits; // of struct limit_config's
+ int microseconds; // Enable logging of precise timestamps
+ int show_types;
+ int syslog_pids;
+ int errors_fatal;
+ int stderr_follows;
+ struct log_stream *ls;
+ int mark; // Used temporarily in log_config_commit()
+};
+
+struct limit_config {
+ cnode n;
+ clist types; // simple_list of names
+ double rate;
+ uns burst;
+};
+
+static char *
+stream_init(void *ptr)
+{
+ struct stream_config *c = ptr;
+
+ c->levels = ~0U;
+ return NULL;
+}
+
+static char *
+stream_commit(void *ptr)
+{
+ struct stream_config *c = ptr;
+
+ if (c->syslog_facility)
+ {
+ if (!log_syslog_facility_exists(c->syslog_facility))
+ return cf_printf("SyslogFacility `%s' is not recognized", c->syslog_facility);
+ if (c->file_name)
+ return "Both FileName and SyslogFacility selected";
+ if (c->microseconds)
+ return "Syslog streams do not support microsecond precision";
+ }
+ if (c->stderr_follows && !c->file_name)
+ return "StdErrFollows requires a file-based stream";
+ return NULL;
+}
+
+static const char * const level_names[] = {
+#define P(x) #x,
+ LOG_LEVEL_NAMES
+#undef P
+ NULL
+};
+
+static struct cf_section limit_config = {
+ CF_TYPE(struct limit_config),
+ CF_ITEMS {
+#define P(x) PTR_TO(struct limit_config, x)
+ CF_LIST("Types", P(types), &cf_string_list_config),
+ CF_DOUBLE("Rate", P(rate)),
+ CF_UNS("Burst", P(burst)),
+#undef P
+ CF_END
+ }
+};
+
+static struct cf_section stream_config = {
+ CF_TYPE(struct stream_config),
+ CF_INIT(stream_init),
+ CF_COMMIT(stream_commit),
+ CF_ITEMS {
+#define P(x) PTR_TO(struct stream_config, x)
+ CF_STRING("Name", P(name)),
+ CF_STRING("FileName", P(file_name)),
+ CF_STRING("SyslogFacility", P(syslog_facility)),
+ CF_BITMAP_LOOKUP("Levels", P(levels), level_names),
+ CF_LIST("Types", P(types), &cf_string_list_config),
+ CF_LIST("Substream", P(substreams), &cf_string_list_config),
+ CF_LIST("Limit", P(limits), &limit_config),
+ CF_INT("Microseconds", P(microseconds)),
+ CF_INT("ShowTypes", P(show_types)),
+ CF_INT("SyslogPID", P(syslog_pids)),
+ CF_INT("ErrorsFatal", P(errors_fatal)),
+ CF_INT("StdErrFollows", P(stderr_follows)),
+#undef P
+ CF_END
+ }
+};
+
+static clist log_stream_confs;
+
+static struct stream_config *
+stream_find(const char *name)
+{
+ CLIST_FOR_EACH(struct stream_config *, c, log_stream_confs)
+ if (!strcmp(c->name, name))
+ return c;
+ return NULL;
+}
+
+static char *
+stream_resolve(struct stream_config *c)
+{
+ if (c->mark == 2)
+ return NULL;
+ if (c->mark == 1)
+ return cf_printf("Log stream `%s' has substreams which refer to itself", c->name);
+
+ c->mark = 1;
+ char *err;
+ CLIST_FOR_EACH(simp_node *, s, c->substreams)
+ {
+ struct stream_config *d = stream_find(s->s);
+ if (!d)
+ return cf_printf("Log stream `%s' refers to unknown substream `%s'", c->name, s->s);
+ if (err = stream_resolve(d))
+ return err;
+ }
+ c->mark = 2;
+ return NULL;
+}
+
+static char *
+log_config_commit(void *ptr UNUSED)
+{
+ // Verify uniqueness of names
+ CLIST_FOR_EACH(struct stream_config *, c, log_stream_confs)
+ if (stream_find(c->name) != c)
+ return cf_printf("Log stream `%s' defined twice", c->name);
+
+ // Check that all substreams resolve and that there are no cycles
+ char *err;
+ CLIST_FOR_EACH(struct stream_config *, c, log_stream_confs)
+ if (err = stream_resolve(c))
+ return err;
+
+ return NULL;
+}
+
+static struct cf_section log_config = {
+ CF_COMMIT(log_config_commit),
+ CF_ITEMS {
+ CF_LIST("Stream", &log_stream_confs, &stream_config),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+log_config_init(void)
+{
+ cf_declare_section("Logging", &log_config, 0);
+}
+
+/*** Type sets ***/
+
+static uns
+log_type_mask(clist *l)
+{
+ if (clist_empty(l))
+ return ~0U;
+
+ uns types = 0;
+ CLIST_FOR_EACH(simp_node *, s, *l)
+ if (!strcmp(s->s, "all"))
+ return ~0U;
+ else
+ {
+ /*
+ * We intentionally ignore unknown types as not all types are known
+ * to all programs sharing a common configuration file. This is also
+ * the reason why Types is a list and not a bitmap.
+ */
+ int type = log_find_type(s->s);
+ if (type >= 0)
+ types |= 1 << LS_GET_TYPE(type);
+ }
+ return types;
+}
+
+/*** Generating limiters ***/
+
+/*
+ * When limiting is enabled, we let log_stream->filter point to this function
+ * and log_stream->user_data point to an array of pointers to token bucket
+ * filters for individual message types.
+ */
+static int
+log_limiter(struct log_stream *ls, struct log_msg *m)
+{
+ struct token_bucket_filter **limits = ls->user_data;
+ if (!limits)
+ return 0;
+ struct token_bucket_filter *tbf = limits[LS_GET_TYPE(m->flags)];
+ if (!tbf)
+ return 0;
+
+ ASSERT(!(m->flags & L_SIGHANDLER));
+ if (m->flags & L_LOGGER_ERR)
+ return 0;
+
+ timestamp_t now = ((timestamp_t) m->tv->tv_sec * 1000) + (m->tv->tv_usec / 1000);
+ ucwlib_lock();
+ int res = tbf_limit(tbf, now);
+ ucwlib_unlock();
+
+ if (res < 0)
+ {
+ if (res == -1)
+ {
+ struct log_msg mm = *m;
+ mm.flags |= L_LOGGER_ERR;
+ mm.raw_msg = "(maximum logging rate exceeded, some messages will be suppressed)";
+ log_pass_msg(0, ls, &mm);
+ }
+ return 1;
+ }
+ else
+ return 0;
+}
+
+static void
+log_apply_limits(struct log_stream *ls, struct limit_config *lim)
+{
+ uns mask = log_type_mask(&lim->types);
+ if (!mask)
+ return;
+
+ if (!ls->user_data)
+ {
+ ls->user_data = cf_malloc_zero(LS_NUM_TYPES * sizeof(struct token_bucket_filter *));
+ ls->filter = log_limiter;
+ }
+ struct token_bucket_filter **limits = ls->user_data;
+ struct token_bucket_filter *tbf = cf_malloc_zero(sizeof(*lim));
+ tbf->rate = lim->rate;
+ tbf->burst = lim->burst;
+ tbf_init(tbf);
+
+ for (uns i=0; i < LS_NUM_TYPES; i++)
+ if (mask & (1 << i))
+ limits[i] = tbf;
+}
+
+/*** Generating streams ***/
+
+char *
+log_check_configured(const char *name)
+{
+ if (stream_find(name))
+ return NULL;
+ else
+ return cf_printf("Log stream `%s' not found", name);
+}
+
+static struct log_stream *
+do_new_configured(struct stream_config *c)
+{
+ struct log_stream *ls;
+ ASSERT(c);
+
+ if (c->ls)
+ return c->ls;
+
+ if (c->file_name)
+ ls = log_new_file(c->file_name, (c->stderr_follows ? FF_FD2_FOLLOWS : 0));
+ else if (c->syslog_facility)
+ ls = log_new_syslog(c->syslog_facility, (c->syslog_pids ? LOG_PID : 0));
+ else
+ ls = log_new_stream(sizeof(*ls));
+
+ CLIST_FOR_EACH(simp_node *, s, c->substreams)
+ log_add_substream(ls, do_new_configured(stream_find(s->s)));
+
+ ls->levels = c->levels;
+ if (c->microseconds)
+ ls->msgfmt |= LSFMT_USEC;
+ if (c->show_types)
+ ls->msgfmt |= LSFMT_TYPE;
+ if (c->errors_fatal)
+ ls->stream_flags |= LSFLAG_ERR_IS_FATAL;
+ ls->types = log_type_mask(&c->types);
+
+ CLIST_FOR_EACH(struct limit_config *, lim, c->limits)
+ log_apply_limits(ls, lim);
+
+ c->ls = ls;
+ return ls;
+}
+
+struct log_stream *
+log_new_configured(const char *name)
+{
+ struct stream_config *c = stream_find(name);
+ if (!c)
+ die("Unable to find log stream %s", name);
+ if (c->ls)
+ return log_ref_stream(c->ls);
+ return do_new_configured(c);
+}
+
+void
+log_configured(const char *name)
+{
+ struct log_stream *ls = log_new_configured(name);
+ struct log_stream *def = log_stream_by_flags(0);
+ log_rm_substream(def, NULL);
+ log_add_substream(def, ls);
+ log_close_stream(ls);
+}
+
+#ifdef TEST
+
+#include <unistd.h>
+#include "ucw/getopt.h"
+
+int main(int argc, char **argv)
+{
+ log_init(argv[0]);
+ int c;
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL)) >= 0)
+ die("No options here.");
+
+ int type = log_register_type("foo");
+ struct log_stream *ls = log_new_configured("combined");
+ for (uns i=0; i<10; i++)
+ {
+ msg(L_INFO | ls->regnum | type, "Hello, universe!");
+ usleep(200000);
+ }
+ fprintf(stderr, "Alas, this was printed to stderr.\n");
+
+ log_close_all();
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Logging to Files
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+#include "ucw/log-internal.h"
+#include "ucw/lfs.h"
+#include "ucw/threads.h"
+#include "ucw/simple-lists.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+
+struct file_stream {
+ struct log_stream ls; // ls.name is the current name of the log file
+ int fd;
+ uns flags; // FF_xxx
+ char *orig_name; // Original name with strftime escapes
+};
+
+#define MAX_EXPAND 64 // Maximum size of expansion of strftime escapes
+
+static int log_switch_nest;
+
+static void
+do_log_reopen(struct file_stream *fs, const char *name)
+{
+ int fd = ucw_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
+ if (fd < 0)
+ die("Unable to open log file %s: %m", name);
+ if (fs->fd >= 0)
+ close(fs->fd);
+ fs->fd = fd;
+ if (fs->flags & FF_FD2_FOLLOWS)
+ dup2(fd, 2);
+ if (fs->ls.name)
+ {
+ xfree(fs->ls.name);
+ fs->ls.name = NULL; // We have to keep the stream consistent -- die() below can invoke logging
+ }
+ fs->ls.name = xstrdup(name);
+}
+
+static int
+do_log_switch(struct file_stream *fs, struct tm *tm)
+{
+ if (!(fs->flags & FF_FORMAT_NAME))
+ {
+ if (fs->fd >= 0)
+ return 1;
+ else
+ {
+ do_log_reopen(fs, fs->orig_name);
+ return 1;
+ }
+ }
+
+ int buflen = strlen(fs->orig_name) + MAX_EXPAND;
+ char name[buflen];
+ int switched = 0;
+
+ ucwlib_lock();
+ if (!log_switch_nest) // Avoid infinite loops if we die when switching logs
+ {
+ log_switch_nest++;
+ int l = strftime(name, buflen, fs->orig_name, tm);
+ if (l < 0 || l >= buflen)
+ die("Error formatting log file name: %m");
+ if (!fs->ls.name || strcmp(name, fs->ls.name))
+ {
+ do_log_reopen(fs, name);
+ switched = 1;
+ }
+ log_switch_nest--;
+ }
+ ucwlib_unlock();
+ return switched;
+}
+
+static void
+file_close(struct log_stream *ls)
+{
+ struct file_stream *fs = (struct file_stream *) ls;
+ if ((fs->flags & FF_CLOSE_FD) && fs->fd >= 0)
+ close(fs->fd);
+ xfree(fs->ls.name);
+ xfree(fs->orig_name);
+}
+
+static int
+file_handler(struct log_stream *ls, struct log_msg *m)
+{
+ struct file_stream *fs = (struct file_stream *) ls;
+ if ((fs->flags & FF_FORMAT_NAME) && m->tm)
+ do_log_switch(fs, m->tm);
+
+ int r = write(fs->fd, m->m, m->m_len);
+ return ((r < 0) ? errno : 0);
+}
+
+struct log_stream *
+log_new_fd(int fd, uns flags)
+{
+ struct log_stream *ls = log_new_stream(sizeof(struct file_stream));
+ struct file_stream *fs = (struct file_stream *) ls;
+ fs->fd = fd;
+ fs->flags = flags;
+ ls->msgfmt = LSFMT_DEFAULT;
+ ls->handler = file_handler;
+ ls->close = file_close;
+ ls->name = xmalloc(16);
+ snprintf(ls->name, 16, "fd%d", fd);
+ return ls;
+}
+
+struct log_stream *
+log_new_file(const char *path, uns flags)
+{
+ struct log_stream *ls = log_new_stream(sizeof(struct file_stream));
+ struct file_stream *fs = (struct file_stream *) ls;
+ fs->fd = -1;
+ fs->orig_name = xstrdup(path);
+ if (strchr(path, '%'))
+ fs->flags = FF_FORMAT_NAME;
+ fs->flags |= FF_CLOSE_FD | flags;
+ ls->msgfmt = LSFMT_DEFAULT;
+ ls->handler = file_handler;
+ ls->close = file_close;
+
+ time_t now = time(NULL);
+ struct tm *tm = localtime(&now);
+ ASSERT(tm);
+ do_log_switch(fs, tm); // die()'s on errors
+ return ls;
+}
+
+int
+log_switch(void)
+{
+ time_t now = time(NULL);
+ struct tm *tm = localtime(&now);
+ ASSERT(tm);
+
+ int switched = 0;
+ for (int i=0; i < log_streams_after; i++)
+ if (log_streams.ptr[i]->handler == file_handler)
+ switched |= do_log_switch((struct file_stream *) log_streams.ptr[i], tm);
+ return switched;
+}
+
+void
+log_switch_disable(void)
+{
+ log_switch_nest++;
+}
+
+void
+log_switch_enable(void)
+{
+ ASSERT(log_switch_nest);
+ log_switch_nest--;
+}
+
+void
+log_file(const char *name)
+{
+ if (!name)
+ return;
+
+ struct log_stream *ls = log_new_file(name, FF_FD2_FOLLOWS);
+ struct log_stream *def = log_stream_by_flags(0);
+ log_rm_substream(def, NULL);
+ log_add_substream(def, ls);
+ log_close_stream(ls);
+}
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ log_init(argv[0]);
+ log_file("/proc/self/fd/1");
+ // struct log_stream *ls = log_new_fd(1, 0);
+ // struct log_stream *ls = log_new_file("/tmp/quork-%Y%m%d-%H%M%S", 0);
+ for (int i=1; i<argc; i++)
+ msg(L_INFO, argv[i]);
+ log_close_all();
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Internals of Logging
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LOG_INTERNAL_H_
+#define _UCW_LOG_INTERNAL_H_
+
+/*
+ * Pass a message to a stream.
+ * @depth prevents loops.
+ * Returns 1 in case of loop detection or other fatal error,
+ * 0 otherwise
+ */
+int log_pass_msg(int depth, struct log_stream *ls, struct log_msg *m);
+
+/* Define an array (growing buffer) for pointers to log_streams. */
+#define GBUF_TYPE struct log_stream*
+#define GBUF_PREFIX(x) lsbuf_##x
+#include "ucw/gbuf.h"
+
+extern struct lsbuf_t log_streams;
+extern int log_streams_after;
+
+extern struct log_stream log_stream_default;
+
+extern char **log_type_names;
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Logging: Management of Log Streams
+ *
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ * (c) 2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+#include "ucw/log-internal.h"
+#include "ucw/simple-lists.h"
+
+#include <string.h>
+
+/* Initial number of streams to allocate (must be >=2) */
+#define LS_INIT_STREAMS 8
+
+/* Flag indicating initialization of the module */
+static int log_initialized = 0;
+
+/* The head of the list of freed log_streams indexes in log_streams.ptr (~0U if none free).
+ * Freed positions in log_streams.ptr are connected into a linked list in the following way:
+ * log_streams.ptr[log_streams_free].levels is the index of next freed position (or ~0U) */
+static uns log_streams_free = ~0U;
+
+/* Initialize the logstream module.
+ * It is not neccessary to call this explicitely as it is called by
+ * the first log_new_stream() (for backward compatibility and ease of use). */
+static void
+log_init_module(void)
+{
+ if (log_initialized)
+ return;
+
+ /* Create the growing array */
+ lsbuf_init(&log_streams);
+ lsbuf_set_size(&log_streams, LS_INIT_STREAMS);
+
+ bzero(log_streams.ptr, sizeof(struct log_stream*) * (log_streams.len));
+ log_streams_free = ~0U;
+
+ log_initialized = 1;
+
+ /* init the default stream (0) as forwarder to fd2 */
+ struct log_stream *ls = log_new_stream(sizeof(*ls));
+ ASSERT(ls == log_streams.ptr[0]);
+ ASSERT(ls->regnum == 0);
+ ls->name = "default";
+ log_add_substream(ls, &log_stream_default);
+}
+
+void
+log_close_all(void)
+{
+ if (!log_initialized)
+ return;
+
+ // Remove substreams of all streams
+ for (int i=0; i < log_streams_after; i++)
+ if (log_streams.ptr[i]->regnum >= 0)
+ log_rm_substream(log_streams.ptr[i], NULL);
+
+ // Close all streams that remain and free all cached structures
+ for (int i=0; i < log_streams_after; i++)
+ {
+ struct log_stream *ls = log_streams.ptr[i];
+ if (ls->regnum >= 0)
+ log_close_stream(ls);
+ ASSERT(ls->regnum < 0 || !ls->use_count);
+ xfree(ls);
+ }
+
+ /* Back to the default state */
+ lsbuf_done(&log_streams);
+ log_streams_after = 0;
+ log_streams_free = ~0U;
+ log_initialized = 0;
+}
+
+void
+log_add_substream(struct log_stream *where, struct log_stream *what)
+{
+ ASSERT(where);
+ ASSERT(what);
+
+ simp_node *n = xmalloc(sizeof(simp_node));
+ n->p = log_ref_stream(what);
+ clist_add_tail(&where->substreams, &n->n);
+}
+
+int
+log_rm_substream(struct log_stream *where, struct log_stream *what)
+{
+ void *tmp;
+ int cnt = 0;
+ ASSERT(where);
+
+ CLIST_FOR_EACH_DELSAFE(simp_node *, i, where->substreams, tmp)
+ if (i->p == what || !what)
+ {
+ clist_remove(&i->n);
+ log_close_stream(i->p);
+ xfree(i);
+ cnt++;
+ }
+ return cnt;
+}
+
+struct log_stream *
+log_new_stream(size_t size)
+{
+ struct log_stream *l;
+ int index;
+
+ /* Initialize the data structures if needed */
+ log_init_module();
+
+ /* Get a free stream, possibly recycling a closed one */
+ if (log_streams_free == ~0U)
+ {
+ lsbuf_grow(&log_streams, log_streams_after+1);
+ index = log_streams_after++;
+ l = log_streams.ptr[index] = xmalloc(size);
+ }
+ else
+ {
+ index = log_streams_free;
+ l = xrealloc(log_streams.ptr[index], size);
+ log_streams.ptr[index] = l;
+ log_streams_free = l->levels;
+ }
+
+ /* Initialize the stream */
+ bzero(l, sizeof(*l));
+ l->levels = ~0U;
+ l->types = ~0U;
+ l->regnum = LS_SET_STRNUM(index);
+ clist_init(&l->substreams);
+ return log_ref_stream(l);
+}
+
+int
+log_close_stream(struct log_stream *ls)
+{
+ ASSERT(ls);
+ ASSERT(ls->use_count);
+ if (--ls->use_count)
+ return 0;
+
+ /* Unlink all subtreams */
+ log_rm_substream(ls, NULL);
+
+ /* Close the stream and add it to the free-list */
+ if (ls->close)
+ ls->close(ls);
+ ls->levels = log_streams_free;
+ log_streams_free = LS_GET_STRNUM(ls->regnum);
+ ls->regnum = -1;
+ return 1;
+}
+
+void
+log_set_format(struct log_stream *ls, uns mask, uns data)
+{
+ ls->msgfmt = (ls->msgfmt & mask) | data;
+ CLIST_FOR_EACH(simp_node *, i, ls->substreams)
+ log_set_format(i->p, mask, data);
+}
+
+/*** Registry of type names ***/
+
+int log_register_type(const char *name)
+{
+ if (!log_type_names)
+ {
+ log_type_names = xmalloc_zero(LS_NUM_TYPES * sizeof(char *));
+ log_type_names[0] = "default";
+ }
+ uns id;
+ for (id=0; id < LS_NUM_TYPES && log_type_names[id]; id++)
+ if (!strcmp(log_type_names[id], name))
+ return LS_SET_TYPE(id);
+ ASSERT(id < LS_NUM_TYPES);
+ log_type_names[id] = xstrdup(name);
+ return LS_SET_TYPE(id);
+}
+
+/** Find a message type by name and return its ID encoded by `LS_SET_TYPE`. Returns -1 if no such type found. **/
+int log_find_type(const char *name)
+{
+ if (!strcmp(name, "default"))
+ return 0;
+ if (!log_type_names)
+ return -1;
+
+ for (uns id=0; id < LS_NUM_TYPES && log_type_names[id]; id++)
+ if (!strcmp(log_type_names[id], name))
+ return LS_SET_TYPE(id);
+ return -1;
+}
--- /dev/null
+/*
+ * UCW Library -- Logging to Syslog
+ *
+ * (c) 2009 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+
+#include <string.h>
+#include <syslog.h>
+
+struct syslog_stream {
+ struct log_stream ls;
+ int facility;
+};
+
+static int syslog_open_count;
+
+static void
+syslog_close(struct log_stream *ls UNUSED)
+{
+ if (!--syslog_open_count)
+ closelog();
+}
+
+/* Convert syslog facility to its identifier. */
+static int
+syslog_facility(const char *name)
+{
+ // Unfortunately, there is no standard way how to get at the list of facility names
+ static const struct {
+ const char *name;
+ int id;
+ } facilities[] = {
+ { "auth", LOG_AUTH },
+ { "authpriv", LOG_AUTHPRIV },
+ { "cron", LOG_CRON },
+ { "daemon", LOG_DAEMON },
+ { "ftp", LOG_FTP },
+ { "kern", LOG_KERN },
+ { "lpr", LOG_LPR },
+ { "mail", LOG_MAIL },
+ { "news", LOG_NEWS },
+ { "syslog", LOG_SYSLOG },
+ { "user", LOG_USER },
+ { "uucp", LOG_UUCP },
+ { "local0", LOG_LOCAL0 },
+ { "local1", LOG_LOCAL1 },
+ { "local2", LOG_LOCAL2 },
+ { "local3", LOG_LOCAL3 },
+ { "local4", LOG_LOCAL4 },
+ { "local5", LOG_LOCAL5 },
+ { "local6", LOG_LOCAL6 },
+ { "local7", LOG_LOCAL7 },
+ };
+
+ for (uns i=0; i < ARRAY_SIZE(facilities); i++)
+ if (!strcmp(facilities[i].name, name))
+ return facilities[i].id;
+ return -1;
+}
+
+/* Convert severity level to syslog constants */
+static int
+syslog_level(int level)
+{
+ static const int levels[] = {
+ [L_DEBUG] = LOG_DEBUG,
+ [L_INFO] = LOG_INFO,
+ [L_INFO_R] = LOG_INFO,
+ [L_WARN] = LOG_WARNING,
+ [L_WARN_R] = LOG_WARNING,
+ [L_ERROR] = LOG_ERR,
+ [L_ERROR_R] = LOG_ERR,
+ [L_FATAL] = LOG_CRIT,
+ };
+ return ((level < (int)ARRAY_SIZE(levels)) ? levels[level] : LOG_NOTICE);
+}
+
+/* simple syslog write handler */
+static int
+syslog_handler(struct log_stream *ls, struct log_msg *m)
+{
+ struct syslog_stream *ss = (struct syslog_stream *) ls;
+ int prio;
+ ASSERT(ls);
+ ASSERT(m);
+
+ prio = syslog_level(LS_GET_LEVEL(m->flags)) | ss->facility;
+ syslog(prio, "%s", m->m);
+ return 0;
+}
+
+struct log_stream *
+log_new_syslog(const char *facility, int options)
+{
+ int fac = syslog_facility(facility);
+ if (fac < 0)
+ die("No such syslog facility: %s", facility);
+
+ struct log_stream *ls = log_new_stream(sizeof(struct syslog_stream));
+ struct syslog_stream *ss = (struct syslog_stream *) ls;
+ ls->name = "syslog";
+ ls->msgfmt = 0;
+ ls->handler = syslog_handler;
+ ls->close = syslog_close;
+ ss->facility = fac;
+
+ if (!syslog_open_count++)
+ openlog(log_title, options, LOG_INFO);
+ return ls;
+}
+
+int
+log_syslog_facility_exists(const char *facility)
+{
+ return (syslog_facility(facility) >= 0);
+}
--- /dev/null
+/*
+ * UCW Library -- Logging
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+#include "ucw/log-internal.h"
+#include "ucw/simple-lists.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
+#include <alloca.h>
+#include <errno.h>
+
+char *log_title;
+int log_pid;
+void (*log_die_hook)(void);
+
+static void NONRET do_die(void);
+
+/*** The default log stream, which logs to stderr ***/
+
+static int default_log_handler(struct log_stream *ls UNUSED, struct log_msg *m)
+{
+ // This is a completely bare version of the log-file module. Errors are ignored.
+ write(2, m->m, m->m_len);
+ return 0;
+}
+
+struct log_stream log_stream_default = {
+ .name = "stderr",
+ .use_count = 1000000,
+ .handler = default_log_handler,
+ .levels = ~0U,
+ .types = ~0U,
+ .msgfmt = LSFMT_DEFAULT,
+ // an empty clist
+ .substreams.head.next = (cnode *) &log_stream_default.substreams.head,
+ .substreams.head.prev = (cnode *) &log_stream_default.substreams.head,
+};
+
+/*** Registry of streams and their identifiers ***/
+
+struct lsbuf_t log_streams; /* A growing array of pointers to log_streams */
+int log_streams_after = 0; /* The first never-used index in log_streams.ptr */
+
+/*
+ * Find a stream by its identifier given as LS_SET_STRNUM(flags).
+ * Returns NULL if the stream doesn't exist or it's invalid.
+ *
+ * If the log-stream machinery has not been initialized (which is normal for programs
+ * with no fancy logging), the log_streams gbuf is empty and this function only
+ * translates stream #0 to the static log_stream_default.
+ */
+
+struct log_stream *
+log_stream_by_flags(uns flags)
+{
+ int n = LS_GET_STRNUM(flags);
+ if (n < 0 || n >= log_streams_after || log_streams.ptr[n]->regnum == -1)
+ return (n ? NULL : &log_stream_default);
+ return log_streams.ptr[n];
+}
+
+/*** Known message types ***/
+
+char **log_type_names;
+
+char *
+log_type_name(uns flags)
+{
+ uns type = LS_GET_TYPE(flags);
+
+ if (!log_type_names || !log_type_names[type])
+ return "default";
+ else
+ return log_type_names[type];
+}
+
+/*** Logging ***/
+
+void
+vmsg(uns cat, const char *fmt, va_list args)
+{
+ struct timeval tv;
+ struct tm tm;
+ va_list args2;
+ char stime[24];
+ char sutime[12];
+ char msgbuf[256];
+ char *p;
+ int len;
+ uns sighandler = cat & L_SIGHANDLER;
+ struct log_stream *ls;
+ struct log_msg m = { .flags = cat };
+
+ /* Find the destination stream */
+ if (sighandler)
+ ls = &log_stream_default;
+ else if (!(ls = log_stream_by_flags(cat)))
+ {
+ msg((LS_CTRL_MASK&cat)|L_WARN, "No log_stream with number %d! Logging to the default log.", LS_GET_STRNUM(cat));
+ ls = &log_stream_default;
+ }
+
+ /* Get the current time */
+ if (!sighandler)
+ {
+ /* CAVEAT: These calls are not safe in signal handlers. */
+ gettimeofday(&tv, NULL);
+ m.tv = &tv;
+ if (localtime_r(&tv.tv_sec, &tm))
+ m.tm = &tm;
+ }
+
+ /* Generate time strings */
+ if (m.tm)
+ {
+ strftime(stime, sizeof(stime), "%Y-%m-%d %H:%M:%S", &tm);
+ snprintf(sutime, sizeof(sutime), ".%06d", (int)tv.tv_usec);
+ m.stime = stime;
+ m.sutime = sutime;
+ }
+ else
+ {
+ m.stime = "\?\?\?\?-\?\?-\?\? \?\?:\?\?:\?\?";
+ m.sutime = ".\?\?\?\?\?\?";
+ }
+
+ /* Generate the message string */
+ va_copy(args2, args);
+ len = vsnprintf(msgbuf, sizeof(msgbuf), fmt, args2);
+ va_end(args2);
+ if (len < (int) sizeof(msgbuf) || sighandler)
+ m.raw_msg = msgbuf;
+ else
+ {
+ m.raw_msg = xmalloc(len+1);
+ vsnprintf(m.raw_msg, len+1, fmt, args);
+ }
+
+ /* Remove non-printable characters and newlines */
+ p = m.raw_msg;
+ while (*p)
+ {
+ if (*p < 0x20 && *p != '\t')
+ *p = 0x7f;
+ p++;
+ }
+
+ /* Pass the message to the log_stream */
+ if (log_pass_msg(0, ls, &m))
+ {
+ /* Error (such as infinite loop) occurred */
+ log_pass_msg(0, &log_stream_default, &m);
+ }
+
+ if (m.raw_msg != msgbuf)
+ xfree(m.raw_msg);
+}
+
+static void
+log_report_err(struct log_stream *ls, struct log_msg *m, int err)
+{
+ if (m->flags & L_LOGGER_ERR)
+ return;
+ if (ls->stream_flags & LSFLAG_ERR_REPORTED)
+ return;
+ ls->stream_flags |= LSFLAG_ERR_REPORTED;
+
+ struct log_msg errm = *m;
+ char errbuf[128];
+ char *name = (ls->name ? : "<unnamed>");
+
+ errm.flags = ((ls->stream_flags & LSFLAG_ERR_IS_FATAL) ? L_FATAL : L_ERROR);
+ errm.flags |= L_LOGGER_ERR | (m->flags & LS_CTRL_MASK);
+ errm.raw_msg = errbuf;
+ if (err == EDEADLK)
+ snprintf(errbuf, sizeof(errbuf), "Error logging to %s: Maximum nesting level of log streams exceeded", name);
+ else
+ {
+ errno = err;
+ snprintf(errbuf, sizeof(errbuf), "Error logging to %s: %m", name);
+ }
+ log_pass_msg(0, &log_stream_default, &errm);
+
+ if (ls->stream_flags & LSFLAG_ERR_IS_FATAL)
+ do_die();
+}
+
+/* Maximal depth of log_pass_msg recursion */
+#define LS_MAX_DEPTH 64
+
+int
+log_pass_msg(int depth, struct log_stream *ls, struct log_msg *m)
+{
+ ASSERT(ls);
+
+ /* Check recursion depth */
+ if (depth > LS_MAX_DEPTH)
+ {
+ log_report_err(ls, m, EDEADLK);
+ return 1;
+ }
+
+ /* Filter by level, type and hook function */
+ if (!((1 << LS_GET_LEVEL(m->flags)) & ls->levels) ||
+ !((1 << LS_GET_TYPE(m->flags)) & ls->types) ||
+ ls->filter && ls->filter(ls, m))
+ return 0;
+
+ /* Pass the message to substreams */
+ CLIST_FOR_EACH(simp_node *, s, ls->substreams)
+ if (log_pass_msg(depth+1, s->p, m))
+ return 1;
+
+ /* Will pass to the handler of this stream... is there any? */
+ if (!ls->handler)
+ return 0;
+
+ /* Will print a message type? */
+ char *type = NULL;
+ if ((ls->msgfmt & LSFMT_TYPE) && LS_GET_TYPE(m->flags))
+ type = log_type_name(m->flags);
+
+ /* Upper bound on message length */
+ int len = strlen(m->raw_msg) + strlen(m->stime) + strlen(m->sutime) + 32;
+ if (log_title)
+ len += strlen(log_title);
+ if (ls->name)
+ len += strlen(ls->name);
+ if (type)
+ len += strlen(type) + 3;
+
+ /* Get a buffer and format the message */
+ char *free_buf = NULL;
+ if (len <= 256 || (m->flags & L_SIGHANDLER))
+ m->m = alloca(len);
+ else
+ m->m = free_buf = xmalloc(len);
+ char *p = m->m;
+
+ /* Level (2 chars) */
+ if (ls->msgfmt & LSFMT_LEVEL)
+ {
+ *p++ = LS_LEVEL_LETTER(LS_GET_LEVEL(m->flags));
+ *p++ = ' ';
+ }
+
+ /* Time (|stime| + |sutime| + 1 chars) */
+ if (ls->msgfmt & LSFMT_TIME)
+ {
+ const char *q = m->stime;
+ while (*q)
+ *p++ = *q++;
+ if (ls->msgfmt & LSFMT_USEC)
+ {
+ q = m->sutime;
+ while (*q)
+ *p++ = *q++;
+ }
+ *p++ = ' ';
+ }
+
+ /* Process name, PID ( |log_title| + 6 + (|PID|<=10) chars ) */
+ if ((ls->msgfmt & LSFMT_TITLE) && log_title)
+ {
+ if ((ls->msgfmt & LSFMT_PID) && log_pid)
+ p += sprintf(p, "[%s (%d)] ", log_title, log_pid);
+ else
+ p += sprintf(p, "[%s] ", log_title);
+ }
+ else
+ {
+ if ((ls->msgfmt & LSFMT_PID) && log_pid)
+ p += sprintf(p, "[%d] ", log_pid);
+ }
+
+ /* log_stream name ( |ls->name| + 4 chars ) */
+ if (ls->msgfmt & LSFMT_LOGNAME)
+ {
+ if (ls->name)
+ p += sprintf(p, "<%s> ", ls->name);
+ else
+ p += sprintf(p, "<?> ");
+ }
+
+ /* Message type ( |type| + 3 chars ) */
+ if (type)
+ p += sprintf(p, "{%s} ", type);
+
+ /* The message itself ( |m| + 1 chars ) */
+ {
+ const char *q = m->raw_msg;
+ while (*q)
+ *p++ = *q++;
+ *p++ = '\n';
+ *p = '\0';
+ m->m_len = p - m->m;
+ int err = ls->handler(ls, m);
+ if (err)
+ log_report_err(ls, m, err);
+ }
+
+ if (free_buf)
+ xfree(free_buf);
+ return 0;
+}
+
+/*** Utility functions ***/
+
+void
+msg(unsigned int cat, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vmsg(cat, fmt, args);
+ va_end(args);
+}
+
+static void NONRET
+do_die(void)
+{
+#ifdef DEBUG_DIE_BY_ABORT
+ abort();
+#else
+ exit(1);
+#endif
+}
+
+void
+die(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vmsg(L_FATAL, fmt, args);
+ va_end(args);
+ if (log_die_hook)
+ log_die_hook();
+ do_die();
+}
+
+void
+assert_failed(const char *assertion, const char *file, int line)
+{
+ msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
+ abort();
+}
+
+void
+assert_failed_noinfo(void)
+{
+ die("Internal error: Assertion failed.");
+}
+
+static const char *
+log_basename(const char *n)
+{
+ const char *p = n;
+
+ while (*n)
+ if (*n++ == '/')
+ p = n;
+ return p;
+}
+
+void
+log_init(const char *argv0)
+{
+ if (argv0)
+ {
+ static char log_progname[32];
+ strncpy(log_progname, log_basename(argv0), sizeof(log_progname)-1);
+ log_progname[sizeof(log_progname)-1] = 0;
+ log_title = log_progname;
+ }
+}
+
+void
+log_fork(void)
+{
+ log_pid = getpid();
+}
+
+#ifdef TEST
+
+#include <syslog.h>
+
+int main(void)
+{
+ int type = log_find_type("foo");
+ ASSERT(type < 0);
+ type = log_register_type("foo");
+
+ struct log_stream *ls = log_new_syslog("local3", 0);
+#if 0
+ log_add_substream(ls, ls);
+ ls->stream_flags |= LSFLAG_ERR_IS_FATAL;
+#endif
+ msg(L_INFO | ls->regnum, "Brum <%300s>", ":-)");
+ log_set_format(log_default_stream(), ~0U, LSFMT_USEC | LSFMT_TYPE);
+ msg(L_INFO | type, "Brum <%300s>", ":-)");
+ log_close_all();
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Logging
+ *
+ * (c) 1997--2009 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Tomas Gavenciak <gavento@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_LOG_H_
+#define _UCW_LOG_H_
+
+#include "ucw/clists.h"
+
+/*** === Messages and streams ***/
+
+/**
+ * Inside the logging system, a log message is always represented by this structure.
+ **/
+struct log_msg {
+ char *m; // The formatted message itself, ending with \n\0
+ int m_len; // Length without the \0
+ struct tm *tm; // Current time
+ struct timeval *tv;
+ uns flags; // Category and other flags as passed to msg()
+ char *raw_msg; // Unformatted parts
+ char *stime;
+ char *sutime;
+};
+
+/**
+ * Each stream is represented by an instance of this structure.
+ **/
+struct log_stream {
+ char *name; // Optional name, allocated by the user (or constructor)
+ int regnum; // Stream number, already encoded by LS_SET_STRNUM(); -1 if closed
+ uns levels; // Bitmask of accepted severity levels (default: all)
+ uns types; // Bitmask of accepted message types (default: all)
+ uns msgfmt; // Formatting flags (LSFMT_xxx)
+ uns use_count; // Number of references to the stream
+ uns stream_flags; // Various other flags (LSFLAG_xxx)
+ int (*filter)(struct log_stream* ls, struct log_msg *m); // Filter function, return non-zero to discard the message
+ clist substreams; // Pass the message to these streams (simple_list of pointers)
+ int (*handler)(struct log_stream *ls, struct log_msg *m); // Called to commit the message, return 0 for success, errno on error
+ void (*close)(struct log_stream* ls); // Called upon log_close_stream()
+ void *user_data; // Not used by the logging system
+ // Private data of the handler follow
+};
+
+/**
+ * Formatting flags specifying the format of the message passed to the handler.
+ **/
+enum ls_fmt {
+ LSFMT_LEVEL = 1, // severity level (one letter) */
+ LSFMT_TIME = 2, // date and time (YYYY-mm-dd HH:MM:SS) */
+ LSFMT_USEC = 4, // also micro-seconds */
+ LSFMT_TITLE = 8, // program title (log_title) */
+ LSFMT_PID = 16, // program PID (log_pid) */
+ LSFMT_LOGNAME = 32, // name of the log_stream */
+ LSFMT_TYPE = 64, // message type
+};
+
+#define LSFMT_DEFAULT (LSFMT_LEVEL | LSFMT_TIME | LSFMT_TITLE | LSFMT_PID) /** Default format **/
+
+/**
+ * General stream flags.
+ **/
+enum ls_flag {
+ LSFLAG_ERR_IS_FATAL = 1, // When a logging error occurs, die() immediately
+ LSFLAG_ERR_REPORTED = 2, // A logging error has been already reported on this stream
+};
+
+/***
+ * === Message flags
+ *
+ * The @flags parameter of msg() is divided to several groups of bits (from the LSB):
+ * message severity level (`L_xxx`), destination stream, message type
+ * and control bits (e.g., `L_SIGHANDLER`).
+ ***/
+
+enum ls_flagbits { // Bit widths of groups
+ LS_LEVEL_BITS = 8,
+ LS_STRNUM_BITS = 16,
+ LS_TYPE_BITS = 5,
+ LS_CTRL_BITS = 3,
+};
+
+enum ls_flagpos { // Bit positions of groups
+ LS_LEVEL_POS = 0,
+ LS_STRNUM_POS = LS_LEVEL_POS + LS_LEVEL_BITS,
+ LS_TYPE_POS = LS_STRNUM_POS + LS_STRNUM_BITS,
+ LS_CTRL_POS = LS_TYPE_POS + LS_TYPE_BITS,
+};
+
+enum ls_flagmasks { // Bit masks of groups
+ LS_LEVEL_MASK = ((1 << LS_LEVEL_BITS) - 1) << LS_LEVEL_POS,
+ LS_STRNUM_MASK = ((1 << LS_STRNUM_BITS) - 1) << LS_STRNUM_POS,
+ LS_TYPE_MASK = ((1 << LS_TYPE_BITS) - 1) << LS_TYPE_POS,
+ LS_CTRL_MASK = ((1 << LS_CTRL_BITS) - 1) << LS_CTRL_POS,
+};
+
+// "Get" macros (break flags to parts)
+#define LS_GET_LEVEL(flags) (((flags) & LS_LEVEL_MASK) >> LS_LEVEL_POS) /** Extract severity level **/
+#define LS_GET_STRNUM(flags) (((flags) & LS_STRNUM_MASK) >> LS_STRNUM_POS) /** Extract stream number **/
+#define LS_GET_TYPE(flags) (((flags) & LS_TYPE_MASK) >> LS_TYPE_POS) /** Extract message type **/
+#define LS_GET_CTRL(flags) (((flags) & LS_CTRL_MASK) >> LS_CTRL_POS) /** Extract control bits **/
+
+// "Set" macros (parts to flags)
+#define LS_SET_LEVEL(level) ((level) << LS_LEVEL_POS) /** Convert severity level to flags **/
+#define LS_SET_STRNUM(strnum) ((strnum) << LS_STRNUM_POS) /** Convert stream number to flags **/
+#define LS_SET_TYPE(type) ((type) << LS_TYPE_POS) /** Convert message type to flags **/
+#define LS_SET_CTRL(ctrl) ((ctrl) << LS_CTRL_POS) /** Convert control bits to flags **/
+
+#define LS_NUM_TYPES (1 << LS_TYPE_BITS)
+
+/** Register a new message type and return the corresponding flag set (encoded by `LS_SET_TYPE`). **/
+int log_register_type(const char *name);
+
+/** Find a message type by name and return the corresponding flag set. Returns -1 if no such type found. **/
+int log_find_type(const char *name);
+
+/** Given a flag set, extract the message type ID and return its name. **/
+char *log_type_name(uns flags);
+
+/*** === Operations on streams ***/
+
+/**
+ * Allocate a new log stream with no handler and an empty substream list.
+ * Since struct log_stream is followed by private data, @size bytes of memory are allocated
+ * for the whole structure. See below for functions creating specific stream types.
+ **/
+struct log_stream *log_new_stream(size_t size);
+
+/**
+ * Decrement the use count of a stream. If it becomes zero, close the stream,
+ * free its memory, and unlink all its substreams.
+ **/
+int log_close_stream(struct log_stream *ls);
+
+/**
+ * Get a new reference on an existing stream. For convenience, the return value is
+ * equal to the argument @ls.
+ **/
+static inline struct log_stream *log_ref_stream(struct log_stream *ls)
+{
+ ls->use_count++;
+ return ls;
+}
+
+/**
+ * Link a substream to a stream. The substream gains a reference, preventing
+ * it from being freed until it is unlinked.
+ **/
+void log_add_substream(struct log_stream *where, struct log_stream *what);
+
+/**
+ * Unlink all occurrences of a substream @what from stream @where. Each
+ * occurrence loses a reference. If @what is NULL, all substreams are unlinked.
+ * Returns the number of unlinked substreams.
+ **/
+int log_rm_substream(struct log_stream *where, struct log_stream *what);
+
+/**
+ * Set formatting flags of a given stream and all its substreams. The flags are
+ * AND'ed with @mask and OR'ed with @data.
+ **/
+void log_set_format(struct log_stream *ls, uns mask, uns data);
+
+/**
+ * Find a stream by its registration number (in the format of logging flags).
+ * Returns NULL if there is no such stream.
+ **/
+struct log_stream *log_stream_by_flags(uns flags);
+
+/** Return a pointer to the default stream (stream #0). **/
+static inline struct log_stream *log_default_stream(void)
+{
+ return log_stream_by_flags(0);
+}
+
+/**
+ * Close all open streams, un-initialize the module, free all memory and
+ * reset the logging mechanism to use stderr only.
+ **/
+void log_close_all(void);
+
+/***
+ * === Logging to files
+ *
+ * All log files are open in append mode, which guarantees atomicity of write()
+ * even in multi-threaded programs.
+ ***/
+
+struct log_stream *log_new_file(const char *path, uns flags); /** Create a stream bound to a log file. See `FF_xxx` for @flags. **/
+struct log_stream *log_new_fd(int fd, uns flags); /** Create a stream bound to a file descriptor. See `FF_xxx` for @flags. **/
+
+enum log_file_flag { /** Flags used for file-based logging **/
+ FF_FORMAT_NAME = 1, // Internal: Name contains strftime escapes
+ FF_CLOSE_FD = 2, // Close the fd with the stream (use with log_new_fd())
+ FF_FD2_FOLLOWS = 4, // Maintain stderr as a clone of this stream
+};
+
+/**
+ * When a time-based name of the log file changes, the logger switches to a new
+ * log file automatically. This can be sometimes inconvenient, so you can use
+ * this function to disable the automatic switches. The calls to this function
+ * can be nested.
+ **/
+void log_switch_disable(void);
+void log_switch_enable(void); /** Negate the effect of log_switch_disable(). **/
+int log_switch(void); /** Switch log files manually. **/
+
+/***
+ * === Logging to syslog
+ *
+ * This log stream uses the libc interface to the system logging daemon (`syslogd`).
+ * This interface has several limitations:
+ *
+ * * Syslog are poorer than our scheme, so they are translated with a slight
+ * loss of information (most importantly, the distinction between local and
+ * remote messages is lost). If you are interested in details, search the
+ * source for syslog_level().
+ * * Syslog options (especially logging of PID with each message) must be fixed
+ * during initialization of the logger
+ * * Syslog provides its own formatting, so we turn off all formatting flags
+ * of the LibUCW logger. You can override this manually by setting the @msgfmt
+ * field of the log stream, but the result won't be nice.
+ * * Syslog does not support timestamps with sub-second precision.
+ ***/
+
+/**
+ * Create a log stream for logging to a selected syslog facility.
+ * The @options are passed to openlog(). (Beware, due to limitations of the
+ * syslog interface in libc, the @options are shared for all syslog streams
+ * and they are applied when the first stream is created.)
+ **/
+struct log_stream *log_new_syslog(const char *facility, int options);
+
+/**
+ * Verify that a facility of the given name exists. Return 1 if it does, 0 otherwise.
+ **/
+int log_syslog_facility_exists(const char *facility);
+
+/***
+ * === Configuring log streams
+ *
+ * If you use the LibUCW mechanism for parsing config files, you can let your
+ * user configure arbitrary log streams in the Logging section of the config file
+ * (see examples in the default config file). LibUCW automatically verifies that
+ * the configuration is consistent (this is performed in the commit hook of the
+ * config section), but it opens the streams only upon request. The following
+ * functions can be used to control that.
+ ***/
+
+/** Open a log stream configured under the specified name and increase its use count. **/
+struct log_stream *log_new_configured(const char *name);
+
+/** Open a log stream configured under the specified name and use it as the default destination. **/
+void log_configured(const char *name);
+
+/**
+ * Verify that a stream called @name was configured. If it wasn't, return an error
+ * message. This is intended to be used in configuration commit hooks.
+ **/
+char *log_check_configured(const char *name);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Main Loop
+ *
+ * (c) 2004--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/mainloop.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+
+timestamp_t main_now;
+ucw_time_t main_now_seconds;
+timestamp_t main_idle_time;
+uns main_shutdown;
+
+clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+static uns main_file_cnt;
+static uns main_poll_table_obsolete, main_poll_table_size;
+static struct pollfd *main_poll_table;
+static uns main_sigchld_set_up;
+
+void
+main_get_time(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ main_now_seconds = tv.tv_sec;
+ main_now = (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+ // DBG("It's %lld o'clock", (long long) main_now);
+}
+
+void
+main_init(void)
+{
+ DBG("MAIN: Initializing");
+ clist_init(&main_timer_list);
+ clist_init(&main_file_list);
+ clist_init(&main_hook_list);
+ clist_init(&main_process_list);
+ main_file_cnt = 0;
+ main_poll_table_obsolete = 1;
+ main_get_time();
+}
+
+void
+timer_add(struct main_timer *tm, timestamp_t expires)
+{
+ if (expires)
+ DBG("MAIN: Setting timer %p (expire at now+%lld)", tm, (long long)(expires-main_now));
+ else
+ DBG("MAIN: Clearing timer %p", tm);
+ if (tm->expires)
+ clist_remove(&tm->n);
+ tm->expires = expires;
+ if (expires)
+ {
+ cnode *t = main_timer_list.head.next;
+ while (t != &main_timer_list.head && ((struct main_timer *) t)->expires < expires)
+ t = t->next;
+ clist_insert_before(&tm->n, t);
+ }
+}
+
+void
+timer_del(struct main_timer *tm)
+{
+ timer_add(tm, 0);
+}
+
+static void
+file_timer_expired(struct main_timer *tm)
+{
+ struct main_file *fi = tm->data;
+ timer_del(&fi->timer);
+ if (fi->error_handler)
+ fi->error_handler(fi, MFERR_TIMEOUT);
+}
+
+void
+file_add(struct main_file *fi)
+{
+ DBG("MAIN: Adding file %p (fd=%d)", fi, fi->fd);
+ ASSERT(!fi->n.next);
+ clist_add_tail(&main_file_list, &fi->n);
+ fi->timer.handler = file_timer_expired;
+ fi->timer.data = fi;
+ main_file_cnt++;
+ main_poll_table_obsolete = 1;
+ if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0)
+ msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
+}
+
+void
+file_chg(struct main_file *fi)
+{
+ struct pollfd *p = fi->pollfd;
+ if (p)
+ {
+ p->events = 0;
+ if (fi->read_handler)
+ p->events |= POLLIN | POLLHUP | POLLERR;
+ if (fi->write_handler)
+ p->events |= POLLOUT | POLLERR;
+ }
+}
+
+void
+file_del(struct main_file *fi)
+{
+ DBG("MAIN: Deleting file %p (fd=%d)", fi, fi->fd);
+ ASSERT(fi->n.next);
+ timer_del(&fi->timer);
+ clist_remove(&fi->n);
+ main_file_cnt--;
+ main_poll_table_obsolete = 1;
+ fi->n.next = fi->n.prev = NULL;
+}
+
+static int
+file_read_handler(struct main_file *fi)
+{
+ while (fi->rpos < fi->rlen)
+ {
+ int l = read(fi->fd, fi->rbuf + fi->rpos, fi->rlen - fi->rpos);
+ DBG("MAIN: FD %d: read %d", fi->fd, l);
+ if (l < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+ fi->error_handler(fi, MFERR_READ);
+ return 0;
+ }
+ else if (!l)
+ break;
+ fi->rpos += l;
+ }
+ DBG("MAIN: FD %d done read %d of %d", fi->fd, fi->rpos, fi->rlen);
+ fi->read_handler = NULL;
+ file_chg(fi);
+ fi->read_done(fi);
+ return 1;
+}
+
+static int
+file_write_handler(struct main_file *fi)
+{
+ while (fi->wpos < fi->wlen)
+ {
+ int l = write(fi->fd, fi->wbuf + fi->wpos, fi->wlen - fi->wpos);
+ DBG("MAIN: FD %d: write %d", fi->fd, l);
+ if (l < 0)
+ {
+ if (errno != EINTR && errno != EAGAIN && fi->error_handler)
+ fi->error_handler(fi, MFERR_WRITE);
+ return 0;
+ }
+ fi->wpos += l;
+ }
+ DBG("MAIN: FD %d done write %d", fi->fd, fi->wpos);
+ fi->write_handler = NULL;
+ file_chg(fi);
+ fi->write_done(fi);
+ return 1;
+}
+
+void
+file_read(struct main_file *fi, void *buf, uns len)
+{
+ ASSERT(fi->n.next);
+ if (len)
+ {
+ fi->read_handler = file_read_handler;
+ fi->rbuf = buf;
+ fi->rpos = 0;
+ fi->rlen = len;
+ }
+ else
+ {
+ fi->read_handler = NULL;
+ fi->rbuf = NULL;
+ fi->rpos = fi->rlen = 0;
+ }
+ file_chg(fi);
+}
+
+void
+file_write(struct main_file *fi, void *buf, uns len)
+{
+ ASSERT(fi->n.next);
+ if (len)
+ {
+ fi->write_handler = file_write_handler;
+ fi->wbuf = buf;
+ fi->wpos = 0;
+ fi->wlen = len;
+ }
+ else
+ {
+ fi->write_handler = NULL;
+ fi->wbuf = NULL;
+ fi->wpos = fi->wlen = 0;
+ }
+ file_chg(fi);
+}
+
+void
+file_set_timeout(struct main_file *fi, timestamp_t expires)
+{
+ ASSERT(fi->n.next);
+ timer_add(&fi->timer, expires);
+}
+
+void
+file_close_all(void)
+{
+ CLIST_FOR_EACH(struct main_file *, f, main_file_list)
+ close(f->fd);
+}
+
+void
+hook_add(struct main_hook *ho)
+{
+ DBG("MAIN: Adding hook %p", ho);
+ ASSERT(!ho->n.next);
+ clist_add_tail(&main_hook_list, &ho->n);
+}
+
+void
+hook_del(struct main_hook *ho)
+{
+ DBG("MAIN: Deleting hook %p", ho);
+ ASSERT(ho->n.next);
+ clist_remove(&ho->n);
+ ho->n.next = ho->n.prev = NULL;
+}
+
+static void
+main_sigchld_handler(int x UNUSED)
+{
+ DBG("SIGCHLD received");
+}
+
+void
+process_add(struct main_process *mp)
+{
+ DBG("MAIN: Adding process %p (pid=%d)", mp, mp->pid);
+ ASSERT(!mp->n.next);
+ ASSERT(mp->handler);
+ clist_add_tail(&main_process_list, &mp->n);
+ if (!main_sigchld_set_up)
+ {
+ struct sigaction sa;
+ bzero(&sa, sizeof(sa));
+ sa.sa_handler = main_sigchld_handler;
+ sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
+ sigaction(SIGCHLD, &sa, NULL);
+ main_sigchld_set_up = 1;
+ }
+}
+
+void
+process_del(struct main_process *mp)
+{
+ DBG("MAIN: Deleting process %p (pid=%d)", mp, mp->pid);
+ ASSERT(mp->n.next);
+ clist_remove(&mp->n);
+ mp->n.next = NULL;
+}
+
+int
+process_fork(struct main_process *mp)
+{
+ pid_t pid = fork();
+ if (pid < 0)
+ {
+ DBG("MAIN: Fork failed");
+ mp->status = -1;
+ format_exit_status(mp->status_msg, -1);
+ mp->handler(mp);
+ return 1;
+ }
+ else if (!pid)
+ return 0;
+ else
+ {
+ DBG("MAIN: Forked process %d", (int) pid);
+ mp->pid = pid;
+ process_add(mp);
+ return 1;
+ }
+}
+
+void
+main_debug(void)
+{
+#ifdef CONFIG_DEBUG
+ msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
+ msg(L_DEBUG, "\tActive timers:");
+ struct main_timer *tm;
+ CLIST_WALK(tm, main_timer_list)
+ msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
+ struct main_file *fi;
+ msg(L_DEBUG, "\tActive files:");
+ CLIST_WALK(fi, main_file_list)
+ msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
+ fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler,
+ (long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data);
+ msg(L_DEBUG, "\tActive hooks:");
+ struct main_hook *ho;
+ CLIST_WALK(ho, main_hook_list)
+ msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
+ msg(L_DEBUG, "\tActive processes:");
+ struct main_process *pr;
+ CLIST_WALK(pr, main_process_list)
+ msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
+#endif
+}
+
+static void
+main_rebuild_poll_table(void)
+{
+ struct main_file *fi;
+ if (main_poll_table_size < main_file_cnt)
+ {
+ if (main_poll_table)
+ xfree(main_poll_table);
+ else
+ main_poll_table_size = 1;
+ while (main_poll_table_size < main_file_cnt)
+ main_poll_table_size *= 2;
+ main_poll_table = xmalloc(sizeof(struct pollfd) * main_poll_table_size);
+ }
+ struct pollfd *p = main_poll_table;
+ DBG("MAIN: Rebuilding poll table: %d of %d entries set", main_file_cnt, main_poll_table_size);
+ CLIST_WALK(fi, main_file_list)
+ {
+ p->fd = fi->fd;
+ fi->pollfd = p++;
+ file_chg(fi);
+ }
+ main_poll_table_obsolete = 0;
+}
+
+void
+main_loop(void)
+{
+ DBG("MAIN: Entering main_loop");
+ ASSERT(main_timer_list.head.next);
+
+ struct main_file *fi;
+ struct main_hook *ho;
+ struct main_timer *tm;
+ struct main_process *pr;
+ cnode *tmp;
+
+ main_get_time();
+ for (;;)
+ {
+ timestamp_t wake = main_now + 1000000000;
+ while ((tm = clist_head(&main_timer_list)) && tm->expires <= main_now)
+ {
+ DBG("MAIN: Timer %p expired at now-%lld", tm, (long long)(main_now - tm->expires));
+ tm->handler(tm);
+ }
+ int hook_min = HOOK_RETRY;
+ int hook_max = HOOK_SHUTDOWN;
+ CLIST_WALK_DELSAFE(ho, main_hook_list, tmp)
+ {
+ DBG("MAIN: Hook %p", ho);
+ int ret = ho->handler(ho);
+ hook_min = MIN(hook_min, ret);
+ hook_max = MAX(hook_max, ret);
+ }
+ if (hook_min == HOOK_SHUTDOWN ||
+ hook_min == HOOK_DONE && hook_max == HOOK_DONE ||
+ main_shutdown)
+ {
+ DBG("MAIN: Shut down by %s", main_shutdown ? "main_shutdown" : "a hook");
+ return;
+ }
+ if (hook_max == HOOK_RETRY)
+ wake = 0;
+ if (main_poll_table_obsolete)
+ main_rebuild_poll_table();
+ if (!clist_empty(&main_process_list))
+ {
+ int stat;
+ pid_t pid;
+ wake = MIN(wake, main_now + 10000);
+ while ((pid = waitpid(-1, &stat, WNOHANG)) > 0)
+ {
+ DBG("MAIN: Child %d exited with status %x", pid, stat);
+ CLIST_WALK(pr, main_process_list)
+ if (pr->pid == pid)
+ {
+ pr->status = stat;
+ process_del(pr);
+ format_exit_status(pr->status_msg, pr->status);
+ DBG("MAIN: Calling process exit handler");
+ pr->handler(pr);
+ break;
+ }
+ wake = 0;
+ }
+ }
+ /* FIXME: Here is a small race window where SIGCHLD can come unnoticed. */
+ if ((tm = clist_head(&main_timer_list)) && tm->expires < wake)
+ wake = tm->expires;
+ main_get_time();
+ int timeout = (wake ? wake - main_now : 0);
+ DBG("MAIN: Poll for %d fds and timeout %d ms", main_file_cnt, timeout);
+ int p = poll(main_poll_table, main_file_cnt, timeout);
+ timestamp_t old_now = main_now;
+ main_get_time();
+ main_idle_time += main_now - old_now;
+ if (p > 0)
+ {
+ struct pollfd *p = main_poll_table;
+ CLIST_WALK(fi, main_file_list)
+ {
+ if (p->revents & (POLLIN | POLLHUP | POLLERR))
+ {
+ do
+ DBG("MAIN: Read event on fd %d", p->fd);
+ while (fi->read_handler && fi->read_handler(fi) && !main_poll_table_obsolete);
+ if (main_poll_table_obsolete) /* File entries have been inserted or deleted => better not risk continuing to nowhere */
+ break;
+ }
+ if (p->revents & (POLLOUT | POLLERR))
+ {
+ do
+ DBG("MAIN: Write event on fd %d", p->fd);
+ while (fi->write_handler && fi->write_handler(fi) && !main_poll_table_obsolete);
+ if (main_poll_table_obsolete)
+ break;
+ }
+ p++;
+ }
+ }
+ }
+}
+
+#ifdef TEST
+
+static struct main_process mp;
+static struct main_file fin, fout;
+static struct main_hook hook;
+static struct main_timer tm;
+
+static byte rb[16];
+
+static void dread(struct main_file *fi)
+{
+ if (fi->rpos < fi->rlen)
+ {
+ msg(L_INFO, "Read EOF");
+ file_del(fi);
+ }
+ else
+ {
+ msg(L_INFO, "Read done");
+ file_read(fi, rb, sizeof(rb));
+ }
+}
+
+static void derror(struct main_file *fi, int cause)
+{
+ msg(L_INFO, "Error: %m !!! (cause %d)", cause);
+ file_del(fi);
+}
+
+static void dwrite(struct main_file *fi UNUSED)
+{
+ msg(L_INFO, "Write done");
+}
+
+static int dhook(struct main_hook *ho UNUSED)
+{
+ msg(L_INFO, "Hook called");
+ return 0;
+}
+
+static void dtimer(struct main_timer *tm)
+{
+ msg(L_INFO, "Timer tick");
+ timer_add(tm, main_now + 10000);
+}
+
+static void dentry(void)
+{
+ msg(L_INFO, "*** SUBPROCESS START ***");
+ sleep(2);
+ msg(L_INFO, "*** SUBPROCESS FINISH ***");
+ exit(0);
+}
+
+static void dexit(struct main_process *pr)
+{
+ msg(L_INFO, "Subprocess %d exited with status %x", pr->pid, pr->status);
+}
+
+int
+main(void)
+{
+ log_init(NULL);
+ main_init();
+
+ fin.fd = 0;
+ fin.read_done = dread;
+ fin.error_handler = derror;
+ file_add(&fin);
+ file_read(&fin, rb, sizeof(rb));
+
+ fout.fd = 1;
+ fout.write_done = dwrite;
+ fout.error_handler = derror;
+ file_add(&fout);
+ file_write(&fout, "Hello, world!\n", 14);
+
+ hook.handler = dhook;
+ hook_add(&hook);
+
+ tm.handler = dtimer;
+ timer_add(&tm, main_now + 1000);
+
+ mp.handler = dexit;
+ if (!process_fork(&mp))
+ dentry();
+
+ main_debug();
+
+ main_loop();
+ msg(L_INFO, "Finished.");
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Main Loop
+ *
+ * (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_MAINLOOP_H
+#define _UCW_MAINLOOP_H
+
+#include "ucw/clists.h"
+
+/***
+ * [[conventions]]
+ * Conventions
+ * -----------
+ *
+ * The descriptions of structures contain some fields marked with `[*]`.
+ * These are the only ones that are intended to be manipulated by the user.
+ * The remaining fields serve for internal use only and you must initialize them
+ * to zeroes.
+ ***/
+
+/***
+ * [[time]]
+ * Time manipulation
+ * -----------------
+ *
+ * This part allows you to get the current time and request
+ * to have your function called when the time comes.
+ ***/
+
+extern timestamp_t main_now; /** Current time in milliseconds since the UNIX epoch. See @main_get_time(). **/
+extern ucw_time_t main_now_seconds; /** Current time in seconds since the epoch. **/
+extern timestamp_t main_idle_time; /** Total time in milliseconds spent in the poll() call. **/
+extern clist main_timer_list, main_file_list, main_hook_list, main_process_list;
+
+/**
+ * This is a description of a timer.
+ * You fill in a handler function, any user-defined data you wish to pass
+ * to the handler, and then you invoke @timer_add().
+ *
+ * The handler() function must either call @timer_del() to delete the timer,
+ * or call @timer_add() with a different expiration time.
+ **/
+struct main_timer {
+ cnode n;
+ timestamp_t expires;
+ void (*handler)(struct main_timer *tm); /* [*] Function to be called when the timer expires. */
+ void *data; /* [*] Data for use by the handler */
+};
+
+/**
+ * Adds a new timer into the mainloop to be watched and called
+ * when it expires. It can also be used to modify an already running
+ * timer. It is permitted (and usual) to call this function from the
+ * timer's handler itself if you want the timer to trigger again.
+ *
+ * The @expire parameter is absolute, just add <<var_main_now,`main_now`>> if you need a relative timer.
+ **/
+void timer_add(struct main_timer *tm, timestamp_t expires);
+/**
+ * Removes a timer from the active ones. It is permitted (and usual) to call
+ * this function from the timer's handler itself if you want to deactivate
+ * the timer.
+ **/
+void timer_del(struct main_timer *tm);
+
+/**
+ * Forces refresh of <<var_main_now,`main_now`>>. You do not usually
+ * need to call this, since it is called every time the loop polls for
+ * changes. It is here if you need extra precision or some of the
+ * hooks takes a long time.
+ **/
+void main_get_time(void);
+
+/***
+ * [[file]]
+ * Activity on file descriptors
+ * ----------------------------
+ *
+ * You can let the mainloop watch over a set of file descriptors
+ * for a changes.
+ *
+ * It supports two ways of use. With the first one, you provide
+ * low-level handlers for reading and writing (`read_handler` and
+ * `write_handler`). They will be called every time the file descriptor
+ * is ready to be read from or written to.
+ *
+ * Return non-zero if you want to get the handler called again right now (you
+ * handled a block of data and expect more). If you return `0`, the hook will
+ * be called again in the next iteration, if it is still ready to be read/written.
+ *
+ * This way is suitable for listening sockets, interactive connections, where
+ * you need to parse everything that comes right away and similar cases.
+ *
+ * The second way is to ask mainloop to read or write a buffer of data. You
+ * provide a `read_done` or `write_done` handler respectively and call @file_read()
+ * or @file_write(). This is handy for data connections where you need to transfer
+ * data between two endpoints or for binary connections where the size of message
+ * is known in advance.
+ *
+ * It is possible to combine both methods, but it may be tricky to do it right.
+ *
+ * Both ways use `error_handler` to notify you about errors.
+ ***/
+
+/**
+ * If you want mainloop to watch a file descriptor, fill at last `fd` into this
+ * structure. To get any useful information from the mainloop, provide some handlers
+ * too.
+ *
+ * After that, insert it into the mainloop by calling @file_add().
+ **/
+struct main_file {
+ cnode n;
+ int fd; /* [*] File descriptor */
+ int (*read_handler)(struct main_file *fi); /* [*] To be called when ready for reading/writing; must call file_chg() afterwards */
+ int (*write_handler)(struct main_file *fi);
+ void (*error_handler)(struct main_file *fi, int cause); /* [*] Handler to call on errors */
+ void *data; /* [*] Data for use by the handlers */
+ byte *rbuf; /* Read/write pointers for use by file_read/write */
+ uns rpos, rlen;
+ byte *wbuf;
+ uns wpos, wlen;
+ void (*read_done)(struct main_file *fi); /* [*] Called when file_read is finished; rpos < rlen if EOF */
+ void (*write_done)(struct main_file *fi); /* [*] Called when file_write is finished */
+ struct main_timer timer;
+ struct pollfd *pollfd;
+};
+
+/**
+ * Specifies when or why an error happened. This is passed to the error handler.
+ * `errno` is still set to the original source of error. The only exception
+ * is `MFERR_TIMEOUT`, in which case `errno` is not set and the only possible
+ * cause of it is timeout on the file descriptor (see @file_set_timeout).
+ **/
+enum main_file_err_cause {
+ MFERR_READ,
+ MFERR_WRITE,
+ MFERR_TIMEOUT
+};
+
+/**
+ * Inserts a <<struct_main_file,`main_file`>> structure into the mainloop to be
+ * watched for activity. You can call this at any time, even inside a handler
+ * (of course for a different file descriptor than the one of the handler).
+ **/
+void file_add(struct main_file *fi);
+/**
+ * Tells the mainloop the file has changed its state. Call it whenever you
+ * change any of the handlers.
+ *
+ * Can be called only on active files (only the ones added by @file_add()).
+ **/
+void file_chg(struct main_file *fi);
+/**
+ * Removes a file from the watched set. You have to call this on closed files
+ * too, since the mainloop does not handle close in any way.
+ *
+ * Can be called from a handler.
+ **/
+void file_del(struct main_file *fi);
+/**
+ * Asks the mainloop to read @len bytes of data from @fi into @buf.
+ * It cancels any previous unfinished read requested this way and overwrites
+ * `read_handler`.
+ *
+ * When the read is done, read_done() handler is called. If an EOF occurred,
+ * `rpos < rlen` (eg. not all data were read).
+ *
+ * Can be called from a handler.
+ *
+ * You can use a call with zero @len to cancel current read, but all read data
+ * will be thrown away.
+ **/
+void file_read(struct main_file *fi, void *buf, uns len);
+/**
+ * Requests that the mainloop writes @len bytes of data from @buf to @fi.
+ * Cancels any previous unfinished write and overwrites `write_handler`.
+ *
+ * When it is written, write_done() handler is called.
+ *
+ * Can be called from a handler.
+ *
+ * If you call it with zero @len, it will cancel the previous write, but note
+ * some data may already be written.
+ **/
+void file_write(struct main_file *fi, void *buf, uns len);
+/**
+ * Sets a timer for a file @fi. If the timer is not overwritten or disabled
+ * until @expires, the file timeouts and error_handler() is called with
+ * <<enum_main_file_err_cause,`MFERR_TIMEOUT`>>.
+ *
+ * The mainloop does not disable or reset it, when something happens, it just
+ * bundles a timer with the file. If you want to watch for inactivity, it is
+ * your task to reset it whenever your handler is called.
+ *
+ * The @expires parameter is absolute (add <<var_main_now,`main_now`>> if you
+ * need relative). The call and overwrites previously set timeout. Value of `0`
+ * disables the timeout (the <<enum_main_file_err_cause,`MFERR_TIMEOUT`>> will
+ * not trigger).
+ *
+ * The use-cases for this are mainly sockets or pipes, when:
+ *
+ * - You want to drop inactive connections (no data come or go for a given time, not
+ * incomplete messages).
+ * - You want to enforce answer in a given time (for example authentication).
+ * - You give maximum time for a whole connection.
+ **/
+void file_set_timeout(struct main_file *fi, timestamp_t expires);
+/**
+ * Closes all file descriptors known to mainloop. Often used between fork()
+ * and exec().
+ **/
+void file_close_all(void);
+
+/***
+ * [[hooks]]
+ * Loop hooks
+ * ----------
+ *
+ * The hooks are called whenever the mainloop perform an iteration.
+ * You can shutdown the mainloop from within them or request an iteration
+ * to happen without sleeping (just poll, no waiting for events).
+ ***/
+
+/**
+ * A hook. It contains the function to call and some user data.
+ *
+ * The handler() must return one value from
+ * <<enum_main_hook_return,`main_hook_return`>>.
+ *
+ * Fill with the hook and data and pass it to @hook_add().
+ **/
+struct main_hook {
+ cnode n;
+ int (*handler)(struct main_hook *ho); /* [*] Hook function; returns HOOK_xxx */
+ void *data; /* [*] For use by the handler */
+};
+
+/**
+ * Return value of the hook handler().
+ * Specifies what should happen next.
+ *
+ * - `HOOK_IDLE` -- Let the loop sleep until something happens, call after that.
+ * - `HOOK_RETRY` -- Force the loop to perform another iteration without sleeping.
+ * This will cause calling of all the hooks again soon.
+ * - `HOOK_DONE` -- The loop will terminate if all hooks return this.
+ * - `HOOK_SHUTDOWN` -- Shuts down the loop.
+ **/
+enum main_hook_return {
+ HOOK_IDLE,
+ HOOK_RETRY,
+ HOOK_DONE = -1,
+ HOOK_SHUTDOWN = -2
+};
+
+/**
+ * Inserts a new hook into the loop.
+ * May be called from inside a hook handler too.
+ **/
+void hook_add(struct main_hook *ho);
+/**
+ * Removes an existing hook from the loop.
+ * May be called from inside a hook handler (to delete itself or other hook).
+ **/
+void hook_del(struct main_hook *ho);
+
+/***
+ * [[process]]
+ * Child processes
+ * ---------------
+ *
+ * The main loop can watch child processes and notify you,
+ * when some of them terminates.
+ ***/
+
+/**
+ * Description of a watched process.
+ * You fill in the handler() and `data`.
+ * The rest is set with @process_fork().
+ **/
+struct main_process {
+ cnode n;
+ int pid; /* Process id (0=not running) */
+ int status; /* Exit status (-1=fork failed) */
+ char status_msg[EXIT_STATUS_MSG_SIZE];
+ void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */
+ void *data; /* [*] For use by the handler */
+};
+
+/**
+ * Asks the mainloop to watch this process.
+ * As it is done automatically in @process_fork(), you need this only
+ * if you removed the process previously by @process_del().
+ **/
+void process_add(struct main_process *mp);
+/**
+ * Removes the process from the watched set. This is done
+ * automatically, when the process terminates, so you need it only
+ * when you do not want to watch a running process any more.
+ */
+void process_del(struct main_process *mp);
+/**
+ * Forks and fills the @mp with information about the new process.
+ *
+ * If the fork() succeeds, it:
+ *
+ * - Returns 0 in the child.
+ * - Returns 1 in the parent and calls @process_add() on it.
+ *
+ * In the case of unsuccessful fork(), it:
+ *
+ * - Fills in the `status_msg` and sets `status` to -1.
+ * - Calls the handler() as if the process terminated.
+ * - Returns 1.
+ **/
+int process_fork(struct main_process *mp);
+
+/***
+ * [[control]]
+ * Control of the mainloop
+ * -----------------------
+ *
+ * These functions control the mainloop as a whole.
+ ***/
+
+extern uns main_shutdown; /** Setting this to nonzero forces the @main_loop() function to terminate. **/
+void main_init(void); /** Initializes the mainloop structures. Call before any `*_add` function. **/
+/**
+ * Start the mainloop.
+ * It will watch the provided objects and call callbacks.
+ * Terminates when someone sets <<var_main_shutdown,`main_shutdown`>>
+ * to nonzero, when all <<hook,hooks>> return
+ * <<enum_main_hook_return,`HOOK_DONE`>> or at last one <<hook,hook>>
+ * returns <<enum_main_hook_return,`HOOK_SHUTDOWN`>>.
+ **/
+void main_loop(void);
+void main_debug(void); /** Prints a lot of debug information about current status of the mainloop. **/
+
+#endif
--- /dev/null
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest. This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * md5_context structure, pass it to md5_init, call md5_update as
+ * needed on buffers full of bytes, and then call md5_final, which
+ * will fill a supplied 16-byte array with the digest.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/md5.h"
+
+#include <string.h> /* for memcpy() */
+
+#ifdef CPU_LITTLE_ENDIAN
+#define byteReverse(buf, len) /* Nothing */
+#else
+void byteReverse(byte *buf, uns longs);
+
+/*
+ * Note: this code is harmless on little-endian machines.
+ */
+void byteReverse(byte *buf, uns longs)
+{
+ u32 t;
+ do {
+ t = (u32) ((uns) buf[3] << 8 | buf[2]) << 16 |
+ ((uns) buf[1] << 8 | buf[0]);
+ *(u32 *) buf = t;
+ buf += 4;
+ } while (--longs);
+}
+#endif
+
+/*
+ * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
+ * initialization constants.
+ */
+void md5_init(md5_context *ctx)
+{
+ ctx->buf[0] = 0x67452301;
+ ctx->buf[1] = 0xefcdab89;
+ ctx->buf[2] = 0x98badcfe;
+ ctx->buf[3] = 0x10325476;
+
+ ctx->bits[0] = 0;
+ ctx->bits[1] = 0;
+}
+
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
+ */
+void md5_update(md5_context *ctx, const byte *buf, uns len)
+{
+ u32 t;
+
+ /* Update bitcount */
+
+ t = ctx->bits[0];
+ if ((ctx->bits[0] = t + ((u32) len << 3)) < t)
+ ctx->bits[1]++; /* Carry from low to high */
+ ctx->bits[1] += len >> 29;
+
+ t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
+
+ /* Handle any leading odd-sized chunks */
+
+ if (t) {
+ byte *p = (byte *) ctx->in + t;
+
+ t = 64 - t;
+ if (len < t) {
+ memcpy(p, buf, len);
+ return;
+ }
+ memcpy(p, buf, t);
+ byteReverse(ctx->in, 16);
+ md5_transform(ctx->buf, (u32 *) ctx->in);
+ buf += t;
+ len -= t;
+ }
+ /* Process data in 64-byte chunks */
+
+ while (len >= 64) {
+ memcpy(ctx->in, buf, 64);
+ byteReverse(ctx->in, 16);
+ md5_transform(ctx->buf, (u32 *) ctx->in);
+ buf += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+
+ memcpy(ctx->in, buf, len);
+}
+
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+byte *md5_final(md5_context *ctx)
+{
+ uns count;
+ byte *p;
+
+ /* Compute number of bytes mod 64 */
+ count = (ctx->bits[0] >> 3) & 0x3F;
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = ctx->in + count;
+ *p++ = 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ memset(p, 0, count);
+ byteReverse(ctx->in, 16);
+ md5_transform(ctx->buf, (u32 *) ctx->in);
+
+ /* Now fill the next block with 56 bytes */
+ memset(ctx->in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ memset(p, 0, count - 8);
+ }
+ byteReverse(ctx->in, 14);
+
+ /* Append length in bits and transform */
+ ((u32 *) ctx->in)[14] = ctx->bits[0];
+ ((u32 *) ctx->in)[15] = ctx->bits[1];
+
+ md5_transform(ctx->buf, (u32 *) ctx->in);
+ byteReverse((byte *) ctx->buf, 4);
+ return (byte *) ctx->buf;
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+ ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data. MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void md5_transform(u32 buf[4], u32 const in[16])
+{
+ u32 a, b, c, d;
+
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+}
+
+void md5_hash_buffer(byte *outbuf, const byte *buffer, uns length)
+{
+ md5_context c;
+ md5_init(&c);
+ md5_update(&c, buffer, length);
+ memcpy(outbuf, md5_final(&c), MD5_SIZE);
+}
--- /dev/null
+/*
+ * UCW Library -- MD5 Message Digest
+ *
+ * This file is in public domain (see ucw/md5.c).
+ */
+
+#ifndef _UCW_MD5_H
+#define _UCW_MD5_H
+
+/**
+ * Internal MD5 hash state.
+ * You should use it just as an opaque handle only.
+ */
+typedef struct {
+ u32 buf[4];
+ u32 bits[2];
+ byte in[64];
+} md5_context;
+
+void md5_init(md5_context *context); /** Initialize the MD5 hashing algorithm in @context. **/
+/**
+ * Push another @len bytes of data from @buf to the MD5 hash
+ * represented by @context. You can call it multiple time on the same
+ * @context without reinitializing it and the result will be the same
+ * as if you concatenated all the data together and fed them here all at
+ * once.
+ */
+void md5_update(md5_context *context, const byte *buf, uns len);
+/**
+ * Call this after the last @md5_update(). It will terminate the
+ * algorithm and return a pointer to the result.
+ *
+ * Note that the data it points to are stored inside the @context, so
+ * if you use it to compute another hash or it ceases to exist, the
+ * pointer becomes invalid.
+ *
+ * To convert the hash to its usual hexadecimal representation, see
+ * <<string:mem_to_hex()>>.
+ */
+byte *md5_final(md5_context *context);
+
+/**
+ * This is the core routine of the MD5 algorithm. It takes 16 longwords of
+ * data in @in and transforms the hash in @buf according to them.
+ *
+ * You probably do not want to call this one directly.
+ */
+void md5_transform(u32 buf[4], const u32 in[16]);
+
+/**
+ * MD5 one-shot convenience method. It takes @length bytes from
+ * @buffer, creates the hash from them and returns it in @output.
+ *
+ * It is equivalent to this code:
+ *
+ * md5_context c;
+ * md5_init(&c);
+ * md5_update(&c, buffer, length);
+ * memcpy(outbuf, md5_final(&c), MD5_SIZE);
+ */
+void md5_hash_buffer(byte *outbuf, const byte *buffer, uns length);
+
+#define MD5_HEX_SIZE 33 /** How many bytes a string buffer for MD5 in hexadecimal format should have. **/
+#define MD5_SIZE 16 /** Number of bytes the MD5 hash takes in the binary form. **/
+
+#endif /* !_UCW_MD5_H */
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (Formatting)
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+
+#include <alloca.h>
+#include <stdio.h>
+#include <string.h>
+
+static char *
+mp_vprintf_at(struct mempool *mp, uns ofs, const char *fmt, va_list args)
+{
+ char *ret = mp_grow(mp, ofs + 1) + ofs;
+ va_list args2;
+ va_copy(args2, args);
+ int cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+ va_end(args2);
+ if (cnt < 0)
+ {
+ /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */
+ do
+ {
+ ret = mp_expand(mp) + ofs;
+ va_copy(args2, args);
+ cnt = vsnprintf(ret, mp_avail(mp) - ofs, fmt, args2);
+ va_end(args2);
+ }
+ while (cnt < 0);
+ }
+ else if ((uns)cnt >= mp_avail(mp) - ofs)
+ {
+ ret = mp_grow(mp, ofs + cnt + 1) + ofs;
+ va_copy(args2, args);
+ int cnt2 = vsnprintf(ret, cnt + 1, fmt, args2);
+ va_end(args2);
+ ASSERT(cnt2 == cnt);
+ }
+ mp_end(mp, ret + cnt + 1);
+ return ret - ofs;
+}
+
+char *
+mp_vprintf(struct mempool *mp, const char *fmt, va_list args)
+{
+ mp_start(mp, 1);
+ return mp_vprintf_at(mp, 0, fmt, args);
+}
+
+char *
+mp_printf(struct mempool *p, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf(p, fmt, args);
+ va_end(args);
+ return res;
+}
+
+char *
+mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args)
+{
+ uns ofs = mp_open(mp, ptr);
+ ASSERT(ofs && !ptr[ofs - 1]);
+ return mp_vprintf_at(mp, ofs - 1, fmt, args);
+}
+
+char *
+mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *res = mp_vprintf_append(mp, ptr, fmt, args);
+ va_end(args);
+ return res;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct mempool *mp = mp_new(64);
+ char *x = mp_printf(mp, "<Hello, %s!>", "World");
+ fputs(x, stdout);
+ x = mp_printf_append(mp, x, "<Appended>");
+ fputs(x, stdout);
+ x = mp_printf(mp, "<Hello, %50s!>\n", "World");
+ fputs(x, stdout);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (String Operations)
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+
+#include <alloca.h>
+#include <string.h>
+
+char *
+mp_strdup(struct mempool *p, const char *s)
+{
+ uns l = strlen(s) + 1;
+ char *t = mp_alloc_fast_noalign(p, l);
+ memcpy(t, s, l);
+ return t;
+}
+
+void *
+mp_memdup(struct mempool *p, const void *s, uns len)
+{
+ void *t = mp_alloc_fast(p, len);
+ memcpy(t, s, len);
+ return t;
+}
+
+char *
+mp_multicat(struct mempool *p, ...)
+{
+ va_list args, a;
+ va_start(args, p);
+ char *x, *y;
+ uns cnt = 0;
+ va_copy(a, args);
+ while (x = va_arg(a, char *))
+ cnt++;
+ uns *sizes = alloca(cnt * sizeof(uns));
+ uns len = 1;
+ cnt = 0;
+ va_end(a);
+ va_copy(a, args);
+ while (x = va_arg(a, char *))
+ len += sizes[cnt++] = strlen(x);
+ char *buf = mp_alloc_fast_noalign(p, len);
+ y = buf;
+ va_end(a);
+ cnt = 0;
+ while (x = va_arg(args, char *))
+ {
+ memcpy(y, x, sizes[cnt]);
+ y += sizes[cnt++];
+ }
+ *y = 0;
+ va_end(args);
+ return buf;
+}
+
+char *
+mp_strjoin(struct mempool *p, char **a, uns n, uns sep)
+{
+ uns sizes[n];
+ uns len = 1;
+ for (uns i=0; i<n; i++)
+ len += sizes[i] = strlen(a[i]);
+ if (sep && n)
+ len += n-1;
+ char *dest = mp_alloc_fast_noalign(p, len);
+ char *d = dest;
+ for (uns i=0; i<n; i++)
+ {
+ if (sep && i)
+ *d++ = sep;
+ memcpy(d, a[i], sizes[i]);
+ d += sizes[i];
+ }
+ *d = 0;
+ return dest;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ struct mempool *p = mp_new(64);
+ char *s = mp_strdup(p, "12345");
+ char *c = mp_multicat(p, "<<", s, ">>", NULL);
+ puts(c);
+ char *a[] = { "bugs", "gnats", "insects" };
+ puts(mp_strjoin(p, a, 3, '.'));
+ puts(mp_strjoin(p, a, 3, 0));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools (One-Time Allocation)
+ *
+ * (c) 1997--2001 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+
+#include <string.h>
+
+#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN)
+#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE)
+
+struct mempool_chunk {
+ struct mempool_chunk *next;
+ uns size;
+};
+
+static uns
+mp_align_size(uns size)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL;
+#else
+ return ALIGN_TO(size, CPU_STRUCT_ALIGN);
+#endif
+}
+
+void
+mp_init(struct mempool *pool, uns chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ *pool = (struct mempool) {
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big };
+}
+
+static void *
+mp_new_big_chunk(uns size)
+{
+ struct mempool_chunk *chunk;
+ chunk = xmalloc(size + MP_CHUNK_TAIL) + size;
+ chunk->size = size;
+ return chunk;
+}
+
+static void
+mp_free_big_chunk(struct mempool_chunk *chunk)
+{
+ xfree((void *)chunk - chunk->size);
+}
+
+static void *
+mp_new_chunk(uns size)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ struct mempool_chunk *chunk;
+ chunk = page_alloc(size + MP_CHUNK_TAIL) + size;
+ chunk->size = size;
+ return chunk;
+#else
+ return mp_new_big_chunk(size);
+#endif
+}
+
+static void
+mp_free_chunk(struct mempool_chunk *chunk)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ page_free((void *)chunk - chunk->size, chunk->size + MP_CHUNK_TAIL);
+#else
+ mp_free_big_chunk(chunk);
+#endif
+}
+
+struct mempool *
+mp_new(uns chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ struct mempool_chunk *chunk = mp_new_chunk(chunk_size);
+ struct mempool *pool = (void *)chunk - chunk_size;
+ DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size);
+ chunk->next = NULL;
+ *pool = (struct mempool) {
+ .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } },
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big };
+ return pool;
+}
+
+static void
+mp_free_chain(struct mempool_chunk *chunk)
+{
+ while (chunk)
+ {
+ struct mempool_chunk *next = chunk->next;
+ mp_free_chunk(chunk);
+ chunk = next;
+ }
+}
+
+static void
+mp_free_big_chain(struct mempool_chunk *chunk)
+{
+ while (chunk)
+ {
+ struct mempool_chunk *next = chunk->next;
+ mp_free_big_chunk(chunk);
+ chunk = next;
+ }
+}
+
+void
+mp_delete(struct mempool *pool)
+{
+ DBG("Deleting mempool %p", pool);
+ mp_free_big_chain(pool->state.last[1]);
+ mp_free_chain(pool->unused);
+ mp_free_chain(pool->state.last[0]); // can contain the mempool structure
+}
+
+void
+mp_flush(struct mempool *pool)
+{
+ mp_free_big_chain(pool->state.last[1]);
+ struct mempool_chunk *chunk, *next;
+ for (chunk = pool->state.last[0]; chunk && (void *)chunk - chunk->size != pool; chunk = next)
+ {
+ next = chunk->next;
+ chunk->next = pool->unused;
+ pool->unused = chunk;
+ }
+ pool->state.last[0] = chunk;
+ pool->state.free[0] = chunk ? chunk->size - sizeof(*pool) : 0;
+ pool->state.last[1] = NULL;
+ pool->state.free[1] = 0;
+ pool->state.next = NULL;
+ pool->last_big = &pool->last_big;
+}
+
+static void
+mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, uns idx)
+{
+ while (chunk)
+ {
+ stats->chain_size[idx] += chunk->size + sizeof(*chunk);
+ stats->chain_count[idx]++;
+ chunk = chunk->next;
+ }
+ stats->total_size += stats->chain_size[idx];
+}
+
+void
+mp_stats(struct mempool *pool, struct mempool_stats *stats)
+{
+ bzero(stats, sizeof(*stats));
+ mp_stats_chain(pool->state.last[0], stats, 0);
+ mp_stats_chain(pool->state.last[1], stats, 1);
+ mp_stats_chain(pool->unused, stats, 2);
+}
+
+u64
+mp_total_size(struct mempool *pool)
+{
+ struct mempool_stats stats;
+ mp_stats(pool, &stats);
+ return stats.total_size;
+}
+
+void *
+mp_alloc_internal(struct mempool *pool, uns size)
+{
+ struct mempool_chunk *chunk;
+ if (size <= pool->threshold)
+ {
+ pool->idx = 0;
+ if (pool->unused)
+ {
+ chunk = pool->unused;
+ pool->unused = chunk->next;
+ }
+ else
+ chunk = mp_new_chunk(pool->chunk_size);
+ chunk->next = pool->state.last[0];
+ pool->state.last[0] = chunk;
+ pool->state.free[0] = pool->chunk_size - size;
+ return (void *)chunk - pool->chunk_size;
+ }
+ else if (likely(size <= MP_SIZE_MAX))
+ {
+ pool->idx = 1;
+ uns aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN);
+ chunk = mp_new_big_chunk(aligned);
+ chunk->next = pool->state.last[1];
+ pool->state.last[1] = chunk;
+ pool->state.free[1] = aligned - size;
+ return pool->last_big = (void *)chunk - aligned;
+ }
+ else
+ die("Cannot allocate %u bytes from a mempool", size);
+}
+
+void *
+mp_alloc(struct mempool *pool, uns size)
+{
+ return mp_alloc_fast(pool, size);
+}
+
+void *
+mp_alloc_noalign(struct mempool *pool, uns size)
+{
+ return mp_alloc_fast_noalign(pool, size);
+}
+
+void *
+mp_alloc_zero(struct mempool *pool, uns size)
+{
+ void *ptr = mp_alloc_fast(pool, size);
+ bzero(ptr, size);
+ return ptr;
+}
+
+void *
+mp_start_internal(struct mempool *pool, uns size)
+{
+ void *ptr = mp_alloc_internal(pool, size);
+ pool->state.free[pool->idx] += size;
+ return ptr;
+}
+
+void *
+mp_start(struct mempool *pool, uns size)
+{
+ return mp_start_fast(pool, size);
+}
+
+void *
+mp_start_noalign(struct mempool *pool, uns size)
+{
+ return mp_start_fast_noalign(pool, size);
+}
+
+void *
+mp_grow_internal(struct mempool *pool, uns size)
+{
+ if (unlikely(size > MP_SIZE_MAX))
+ die("Cannot allocate %u bytes of memory", size);
+ uns avail = mp_avail(pool);
+ void *ptr = mp_ptr(pool);
+ if (pool->idx)
+ {
+ uns amortized = likely(avail <= MP_SIZE_MAX / 2) ? avail * 2 : MP_SIZE_MAX;
+ amortized = MAX(amortized, size);
+ amortized = ALIGN_TO(amortized, CPU_STRUCT_ALIGN);
+ struct mempool_chunk *chunk = pool->state.last[1], *next = chunk->next;
+ ptr = xrealloc(ptr, amortized + MP_CHUNK_TAIL);
+ chunk = ptr + amortized;
+ chunk->next = next;
+ chunk->size = amortized;
+ pool->state.last[1] = chunk;
+ pool->state.free[1] = amortized;
+ pool->last_big = ptr;
+ return ptr;
+ }
+ else
+ {
+ void *p = mp_start_internal(pool, size);
+ memcpy(p, ptr, avail);
+ return p;
+ }
+}
+
+uns
+mp_open(struct mempool *pool, void *ptr)
+{
+ return mp_open_fast(pool, ptr);
+}
+
+void *
+mp_realloc(struct mempool *pool, void *ptr, uns size)
+{
+ return mp_realloc_fast(pool, ptr, size);
+}
+
+void *
+mp_realloc_zero(struct mempool *pool, void *ptr, uns size)
+{
+ uns old_size = mp_open_fast(pool, ptr);
+ ptr = mp_grow(pool, size);
+ if (size > old_size)
+ bzero(ptr + old_size, size - old_size);
+ mp_end(pool, ptr + size);
+ return ptr;
+}
+
+void *
+mp_spread_internal(struct mempool *pool, void *p, uns size)
+{
+ void *old = mp_ptr(pool);
+ void *new = mp_grow_internal(pool, p-old+size);
+ return p-old+new;
+}
+
+void
+mp_restore(struct mempool *pool, struct mempool_state *state)
+{
+ struct mempool_chunk *chunk, *next;
+ struct mempool_state s = *state;
+ for (chunk = pool->state.last[0]; chunk != s.last[0]; chunk = next)
+ {
+ next = chunk->next;
+ chunk->next = pool->unused;
+ pool->unused = chunk;
+ }
+ for (chunk = pool->state.last[1]; chunk != s.last[1]; chunk = next)
+ {
+ next = chunk->next;
+ mp_free_big_chunk(chunk);
+ }
+ pool->state = s;
+ pool->last_big = &pool->last_big;
+}
+
+struct mempool_state *
+mp_push(struct mempool *pool)
+{
+ struct mempool_state state = pool->state;
+ struct mempool_state *p = mp_alloc_fast(pool, sizeof(*p));
+ *p = state;
+ pool->state.next = p;
+ return p;
+}
+
+void
+mp_pop(struct mempool *pool)
+{
+ ASSERT(pool->state.next);
+ struct mempool_state state = pool->state;
+ mp_restore(pool, &state);
+}
+
+#ifdef TEST
+
+#include "ucw/getopt.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+static void
+fill(byte *ptr, uns len, uns magic)
+{
+ while (len--)
+ *ptr++ = (magic++ & 255);
+}
+
+static void
+check(byte *ptr, uns len, uns magic, uns align)
+{
+ ASSERT(!((uintptr_t)ptr & (align - 1)));
+ while (len--)
+ if (*ptr++ != (magic++ & 255))
+ ASSERT(0);
+}
+
+int main(int argc, char **argv)
+{
+ srand(time(NULL));
+ log_init(argv[0]);
+ cf_def_file = NULL;
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || argc != optind)
+ die("Invalid usage");
+
+ uns max = 1000, n = 0, m = 0, can_realloc = 0;
+ void *ptr[max];
+ struct mempool_state *state[max];
+ uns len[max], num[max], align[max];
+ struct mempool *mp = mp_new(128), mp_static;
+
+ for (uns i = 0; i < 5000; i++)
+ {
+ for (uns j = 0; j < n; j++)
+ check(ptr[j], len[j], j, align[j]);
+#if 0
+ DBG("free_small=%u free_big=%u idx=%u chunk_size=%u last_big=%p", mp->state.free[0], mp->state.free[1], mp->idx, mp->chunk_size, mp->last_big);
+ for (struct mempool_chunk *ch = mp->state.last[0]; ch; ch = ch->next)
+ DBG("small %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+ for (struct mempool_chunk *ch = mp->state.last[1]; ch; ch = ch->next)
+ DBG("big %p %p %p %d", (byte *)ch - ch->size, ch, ch + 1, ch->size);
+#endif
+ int r = random_max(100);
+ if ((r -= 1) < 0)
+ {
+ DBG("flush");
+ mp_flush(mp);
+ n = m = 0;
+ }
+ else if ((r -= 1) < 0)
+ {
+ DBG("delete & new");
+ mp_delete(mp);
+ if (random_max(2))
+ mp = mp_new(random_max(0x1000) + 1);
+ else
+ mp = &mp_static, mp_init(mp, random_max(512) + 1);
+ n = m = 0;
+ }
+ else if (n < max && (r -= 30) < 0)
+ {
+ len[n] = random_max(0x2000);
+ DBG("alloc(%u)", len[n]);
+ align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+ ptr[n] = (align[n] == 1) ? mp_alloc_fast_noalign(mp, len[n]) : mp_alloc_fast(mp, len[n]);
+ DBG(" -> (%p)", ptr[n]);
+ fill(ptr[n], len[n], n);
+ n++;
+ can_realloc = 1;
+ }
+ else if (n < max && (r -= 20) < 0)
+ {
+ len[n] = random_max(0x2000);
+ DBG("start(%u)", len[n]);
+ align[n] = random_max(2) ? CPU_STRUCT_ALIGN : 1;
+ ptr[n] = (align[n] == 1) ? mp_start_fast_noalign(mp, len[n]) : mp_start_fast(mp, len[n]);
+ DBG(" -> (%p)", ptr[n]);
+ fill(ptr[n], len[n], n);
+ n++;
+ can_realloc = 1;
+ goto grow;
+ }
+ else if (can_realloc && n && (r -= 10) < 0)
+ {
+ if (mp_open(mp, ptr[n - 1]) != len[n - 1])
+ ASSERT(0);
+grow:
+ {
+ uns k = n - 1;
+ for (uns i = random_max(4); i--; )
+ {
+ uns l = len[k];
+ len[k] = random_max(0x2000);
+ DBG("grow(%u)", len[k]);
+ ptr[k] = mp_grow(mp, len[k]);
+ DBG(" -> (%p)", ptr[k]);
+ check(ptr[k], MIN(l, len[k]), k, align[k]);
+ fill(ptr[k], len[k], k);
+ }
+ mp_end(mp, ptr[k] + len[k]);
+ }
+ }
+ else if (can_realloc && n && (r -= 20) < 0)
+ {
+ uns i = n - 1, l = len[i];
+ DBG("realloc(%p, %u)", ptr[i], len[i]);
+ ptr[i] = mp_realloc(mp, ptr[i], len[i] = random_max(0x2000));
+ DBG(" -> (%p, %u)", ptr[i], len[i]);
+ check(ptr[i], MIN(len[i], l), i, align[i]);
+ fill(ptr[i], len[i], i);
+ }
+ else if (m < max && (r -= 5) < 0)
+ {
+ DBG("push(%u)", m);
+ num[m] = n;
+ state[m++] = mp_push(mp);
+ can_realloc = 0;
+ }
+ else if (m && (r -= 2) < 0)
+ {
+ m--;
+ DBG("pop(%u)", m);
+ mp_pop(mp);
+ n = num[m];
+ can_realloc = 0;
+ }
+ else if (m && (r -= 1) < 0)
+ {
+ uns i = random_max(m);
+ DBG("restore(%u)", i);
+ mp_restore(mp, state[i]);
+ n = num[m = i];
+ can_realloc = 0;
+ }
+ else if (can_realloc && n && (r -= 5) < 0)
+ ASSERT(mp_size(mp, ptr[n - 1]) == len[n - 1]);
+ }
+
+ mp_delete(mp);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Memory Pools
+ *
+ * (c) 1997--2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_POOLS_H
+#define _UCW_POOLS_H
+
+/***
+ * [[defs]]
+ * Definitions
+ * -----------
+ ***/
+
+/**
+ * Memory pool state (see @mp_push(), ...).
+ * You should use this one as an opaque handle only, the insides are internal.
+ **/
+struct mempool_state {
+ uns free[2];
+ void *last[2];
+ struct mempool_state *next;
+};
+
+/**
+ * Memory pool.
+ * You should use this one as an opaque handle only, the insides are internal.
+ **/
+struct mempool {
+ struct mempool_state state;
+ void *unused, *last_big;
+ uns chunk_size, threshold, idx;
+};
+
+struct mempool_stats { /** Mempool statistics. See @mp_stats(). **/
+ u64 total_size; /* Real allocated size in bytes */
+ uns chain_count[3]; /* Number of allocated chunks in small/big/unused chains */
+ uns chain_size[3]; /* Size of allocated chunks in small/big/unused chains */
+};
+
+/***
+ * [[basic]]
+ * Basic manipulation
+ * ------------------
+ ***/
+
+/**
+ * Initialize a given mempool structure.
+ * @chunk_size must be in the interval `[1, UINT_MAX / 2]`.
+ * It will allocate memory by this large chunks and take
+ * memory to satisfy requests from them.
+ **/
+void mp_init(struct mempool *pool, uns chunk_size);
+
+/**
+ * Allocate and initialize a new memory pool.
+ * See @mp_init() for @chunk_size limitations.
+ *
+ * The new mempool structure is allocated on the new mempool.
+ **/
+struct mempool *mp_new(uns chunk_size);
+
+/**
+ * Cleanup mempool initialized by mp_init or mp_new.
+ * Frees all the memory allocated by this mempool and,
+ * if created by @mp_new(), the @pool itself.
+ **/
+void mp_delete(struct mempool *pool);
+
+/**
+ * Frees all data on a memory pool, but leaves it working.
+ * It can keep some of the chunks allocated to serve
+ * further allocation requests. Leaves the @pool alive,
+ * even if it was created with @mp_new().
+ **/
+void mp_flush(struct mempool *pool);
+
+/**
+ * Compute some statistics for debug purposes.
+ * See the definition of the <<struct_mempool_stats,mempool_stats structure>>.
+ **/
+void mp_stats(struct mempool *pool, struct mempool_stats *stats);
+u64 mp_total_size(struct mempool *pool); /** How many bytes were allocated by the pool. **/
+
+
+/***
+ * [[alloc]]
+ * Allocation routines
+ * -------------------
+ ***/
+
+/* For internal use only, do not call directly */
+void *mp_alloc_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+
+/**
+ * The function allocates new @size bytes on a given memory pool.
+ * If the @size is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * `CPU_STRUCT_ALIGN` bytes and this condition remains true also
+ * after future reallocations.
+ **/
+void *mp_alloc(struct mempool *pool, uns size);
+
+/**
+ * The same as @mp_alloc(), but the result may be unaligned.
+ **/
+void *mp_alloc_noalign(struct mempool *pool, uns size);
+
+/**
+ * The same as @mp_alloc(), but fills the newly allocated memory with zeroes.
+ **/
+void *mp_alloc_zero(struct mempool *pool, uns size);
+
+/**
+ * Inlined version of @mp_alloc().
+ **/
+static inline void *mp_alloc_fast(struct mempool *pool, uns size)
+{
+ uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+ if (size <= avail)
+ {
+ pool->state.free[0] = avail - size;
+ return (byte *)pool->state.last[0] - avail;
+ }
+ else
+ return mp_alloc_internal(pool, size);
+}
+
+/**
+ * Inlined version of @mp_alloc_noalign().
+ **/
+static inline void *mp_alloc_fast_noalign(struct mempool *pool, uns size)
+{
+ if (size <= pool->state.free[0])
+ {
+ void *ptr = (byte *)pool->state.last[0] - pool->state.free[0];
+ pool->state.free[0] -= size;
+ return ptr;
+ }
+ else
+ return mp_alloc_internal(pool, size);
+}
+
+/***
+ * [[gbuf]]
+ * Growing buffers
+ * ---------------
+ *
+ * You do not need to know, how a buffer will need to be large,
+ * you can grow it incrementally to needed size. You can grow only
+ * one buffer at a time on a given mempool.
+ *
+ * Similar functionality is provided by <<growbuf:,growing buffes>> module.
+ ***/
+
+/* For internal use only, do not call directly */
+void *mp_start_internal(struct mempool *pool, uns size) LIKE_MALLOC;
+void *mp_grow_internal(struct mempool *pool, uns size);
+void *mp_spread_internal(struct mempool *pool, void *p, uns size);
+
+static inline uns
+mp_idx(struct mempool *pool, void *ptr)
+{
+ return ptr == pool->last_big;
+}
+
+/**
+ * Open a new growing buffer (at least @size bytes long).
+ * If the @size is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * `CPU_STRUCT_ALIGN` bytes and this condition remains true also
+ * after future reallocations. There is an unaligned version as well.
+ *
+ * Keep in mind that you can't make any other pool allocations
+ * before you "close" the growing buffer with @mp_end().
+ */
+void *mp_start(struct mempool *pool, uns size);
+void *mp_start_noalign(struct mempool *pool, uns size);
+
+/**
+ * Inlined version of @mp_start().
+ **/
+static inline void *mp_start_fast(struct mempool *pool, uns size)
+{
+ uns avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+ if (size <= avail)
+ {
+ pool->idx = 0;
+ pool->state.free[0] = avail;
+ return (byte *)pool->state.last[0] - avail;
+ }
+ else
+ return mp_start_internal(pool, size);
+}
+
+/**
+ * Inlined version of @mp_start_noalign().
+ **/
+static inline void *mp_start_fast_noalign(struct mempool *pool, uns size)
+{
+ if (size <= pool->state.free[0])
+ {
+ pool->idx = 0;
+ return (byte *)pool->state.last[0] - pool->state.free[0];
+ }
+ else
+ return mp_start_internal(pool, size);
+}
+
+/**
+ * Return start pointer of the growing buffer allocated by latest @mp_start() or a similar function.
+ **/
+static inline void *mp_ptr(struct mempool *pool)
+{
+ return (byte *)pool->state.last[pool->idx] - pool->state.free[pool->idx];
+}
+
+/**
+ * Return the number of bytes available for extending the growing buffer.
+ * (Before a reallocation will be needed).
+ **/
+static inline uns mp_avail(struct mempool *pool)
+{
+ return pool->state.free[pool->idx];
+}
+
+/**
+ * Grow the buffer allocated by @mp_start() to be at least @size bytes long
+ * (@size may be less than @mp_avail(), even zero). Reallocated buffer may
+ * change its starting position. The content will be unchanged to the minimum
+ * of the old and new sizes; newly allocated memory will be uninitialized.
+ * Multiple calls to mp_grow() have amortized linear cost wrt. the maximum value of @size. */
+static inline void *mp_grow(struct mempool *pool, uns size)
+{
+ return (size <= mp_avail(pool)) ? mp_ptr(pool) : mp_grow_internal(pool, size);
+}
+
+/**
+ * Grow the buffer by at least one byte -- equivalent to <<mp_grow(),`mp_grow`>>`(@pool, @mp_avail(pool) + 1)`.
+ **/
+static inline void *mp_expand(struct mempool *pool)
+{
+ return mp_grow_internal(pool, mp_avail(pool) + 1);
+}
+
+/**
+ * Ensure that there is at least @size bytes free after @p,
+ * if not, reallocate and adjust @p.
+ **/
+static inline void *mp_spread(struct mempool *pool, void *p, uns size)
+{
+ return (((uns)((byte *)pool->state.last[pool->idx] - (byte *)p) >= size) ? p : mp_spread_internal(pool, p, size));
+}
+
+/**
+ * Close the growing buffer. The @end must point just behind the data, you want to keep
+ * allocated (so it can be in the interval `[@mp_ptr(@pool), @mp_ptr(@pool) + @mp_avail(@pool)]`).
+ * Returns a pointer to the beginning of the just closed block.
+ **/
+static inline void *mp_end(struct mempool *pool, void *end)
+{
+ void *p = mp_ptr(pool);
+ pool->state.free[pool->idx] = (byte *)pool->state.last[pool->idx] - (byte *)end;
+ return p;
+}
+
+/**
+ * Return size in bytes of the last allocated memory block (with @mp_alloc() or @mp_end()).
+ **/
+static inline uns mp_size(struct mempool *pool, void *ptr)
+{
+ uns idx = mp_idx(pool, ptr);
+ return ((byte *)pool->state.last[idx] - (byte *)ptr) - pool->state.free[idx];
+}
+
+/**
+ * Open the last memory block (allocated with @mp_alloc() or @mp_end())
+ * for growing and return its size in bytes. The contents and the start pointer
+ * remain unchanged. Do not forget to call @mp_end() to close it.
+ **/
+uns mp_open(struct mempool *pool, void *ptr);
+
+/**
+ * Inlined version of mp_open().
+ **/
+static inline uns mp_open_fast(struct mempool *pool, void *ptr)
+{
+ pool->idx = mp_idx(pool, ptr);
+ uns size = ((byte *)pool->state.last[pool->idx] - (byte *)ptr) - pool->state.free[pool->idx];
+ pool->state.free[pool->idx] += size;
+ return size;
+}
+
+/**
+ * Reallocate the last memory block (allocated with @mp_alloc() or @mp_end())
+ * to the new @size. Behavior is similar to @mp_grow(), but the resulting
+ * block is closed.
+ **/
+void *mp_realloc(struct mempool *pool, void *ptr, uns size);
+
+/**
+ * The same as @mp_realloc(), but fills the additional bytes (if any) with zeroes.
+ **/
+void *mp_realloc_zero(struct mempool *pool, void *ptr, uns size);
+
+/**
+ * Inlined version of mp_realloc().
+ **/
+static inline void *mp_realloc_fast(struct mempool *pool, void *ptr, uns size)
+{
+ mp_open_fast(pool, ptr);
+ ptr = mp_grow(pool, size);
+ mp_end(pool, (byte *)ptr + size);
+ return ptr;
+}
+
+/***
+ * [[store]]
+ * Storing and restoring state
+ * ---------------------------
+ *
+ * Mempools can remember history of what was allocated and return back
+ * in time.
+ ***/
+
+/**
+ * Save the current state of a memory pool.
+ * Do not call this function with an opened growing buffer.
+ **/
+static inline void mp_save(struct mempool *pool, struct mempool_state *state)
+{
+ *state = pool->state;
+ pool->state.next = state;
+}
+
+/**
+ * Save the current state to a newly allocated mempool_state structure.
+ * Do not call this function with an opened growing buffer.
+ **/
+struct mempool_state *mp_push(struct mempool *pool);
+
+/**
+ * Restore the state saved by @mp_save() or @mp_push() and free all
+ * data allocated after that point (including the state structure itself).
+ * You can't reallocate the last memory block from the saved state.
+ **/
+void mp_restore(struct mempool *pool, struct mempool_state *state);
+
+/**
+ * Inlined version of @mp_restore().
+ **/
+static inline void mp_restore_fast(struct mempool *pool, struct mempool_state *state)
+{
+ if (pool->state.last[0] != state->last[0] || pool->state.last[1] != state->last[1])
+ mp_restore(pool, state);
+ else
+ {
+ pool->state = *state;
+ pool->last_big = &pool->last_big;
+ }
+}
+
+/**
+ * Restore the state saved by the last call to @mp_push().
+ * @mp_pop() and @mp_push() works as a stack so you can push more states safely.
+ **/
+void mp_pop(struct mempool *pool);
+
+
+/***
+ * [[string]]
+ * String operations
+ * -----------------
+ ***/
+
+char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC; /** Makes a copy of a string on a mempool. **/
+void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC; /** Makes a copy of a memory block on a mempool. **/
+/**
+ * Concatenates all passed strings. The last parameter must be NULL.
+ * This will concatenate two strings:
+ *
+ * char *message = mp_multicat(pool, "hello ", "world", NULL);
+ **/
+char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK;
+/**
+ * Concatenates two strings and stores result on @mp.
+ */
+static inline char *LIKE_MALLOC mp_strcat(struct mempool *mp, const char *x, const char *y)
+{
+ return mp_multicat(mp, x, y, NULL);
+}
+/**
+ * Join strings and place @sep between each two neighboring.
+ * @p is the mempool to provide memory, @a is array of strings and @n
+ * tells how many there is of them.
+ **/
+char *mp_strjoin(struct mempool *p, char **a, uns n, uns sep) LIKE_MALLOC;
+
+
+/***
+ * [[format]]
+ * Formatted output
+ * ---------------
+ ***/
+
+/**
+ * printf() into a in-memory string, allocated on the memory pool.
+ **/
+char *mp_printf(struct mempool *mp, const char *fmt, ...) FORMAT_CHECK(printf,2,3) LIKE_MALLOC;
+/**
+ * Like @mp_printf(), but uses `va_list` for parameters.
+ **/
+char *mp_vprintf(struct mempool *mp, const char *fmt, va_list args) LIKE_MALLOC;
+/**
+ * Like @mp_printf(), but it appends the data at the end of string
+ * pointed to by @ptr. The string is @mp_open()ed, so you have to
+ * provide something that can be.
+ *
+ * Returns pointer to the beginning of the string (the pointer may have
+ * changed due to reallocation).
+ **/
+char *mp_printf_append(struct mempool *mp, char *ptr, const char *fmt, ...) FORMAT_CHECK(printf,3,4);
+/**
+ * Like @mp_printf_append(), but uses `va_list` for parameters.
+ **/
+char *mp_vprintf_append(struct mempool *mp, char *ptr, const char *fmt, va_list args);
+
+#endif
--- /dev/null
+# Tests for mempool modules
+
+Run: ../obj/ucw/mempool-t
+
+Run: ../obj/ucw/mempool-fmt-t
+Out: <Hello, World!><Hello, World!><Appended><Hello, World!>
+
+Run: ../obj/ucw/mempool-str-t
+Out: <<12345>>
+ bugs.gnats.insects
+ bugsgnatsinsects
--- /dev/null
+/*
+ * UCW Library -- Mapping of Files
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+void *
+mmap_file(const char *name, unsigned *len, int writeable)
+{
+ int fd = open(name, writeable ? O_RDWR : O_RDONLY);
+ struct stat st;
+ void *x;
+
+ if (fd < 0)
+ die("open(%s): %m", name);
+ if (fstat(fd, &st) < 0)
+ die("fstat(%s): %m", name);
+ if (len)
+ *len = st.st_size;
+ if (st.st_size)
+ {
+ x = mmap(NULL, st.st_size, writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, 0);
+ if (x == MAP_FAILED)
+ die("mmap(%s): %m", name);
+ }
+ else /* For empty file, we can return any non-zero address */
+ x = "";
+ close(fd);
+ return x;
+}
+
+void
+munmap_file(void *start, unsigned len)
+{
+ munmap(start, len);
+}
--- /dev/null
+/*
+ * UCW Library -- Mapping of File Parts
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2003--2009 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/lfs.h"
+#include "ucw/partmap.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#ifdef CONFIG_UCW_PARTMAP_IS_MMAP
+#define PARTMAP_WINDOW ~(size_t)0
+#else
+#ifdef TEST
+#define PARTMAP_WINDOW 4096
+#else
+#define PARTMAP_WINDOW 16777216
+#endif
+#endif
+
+struct partmap *
+partmap_open(char *name, int writeable)
+{
+ struct partmap *p = xmalloc_zero(sizeof(struct partmap));
+
+ p->fd = ucw_open(name, writeable ? O_RDWR : O_RDONLY);
+ if (p->fd < 0)
+ die("open(%s): %m", name);
+ if ((p->file_size = ucw_seek(p->fd, 0, SEEK_END)) < 0)
+ die("lseek(%s): %m", name);
+ p->writeable = writeable;
+#ifdef CONFIG_UCW_PARTMAP_IS_MMAP
+ partmap_load(p, 0, p->file_size);
+#endif
+ return p;
+}
+
+ucw_off_t
+partmap_size(struct partmap *p)
+{
+ return p->file_size;
+}
+
+void
+partmap_close(struct partmap *p)
+{
+ if (p->start_map)
+ munmap(p->start_map, p->end_off - p->start_off);
+ close(p->fd);
+ xfree(p);
+}
+
+void
+partmap_load(struct partmap *p, ucw_off_t start, uns size)
+{
+ if (p->start_map)
+ munmap(p->start_map, p->end_off - p->start_off);
+ ucw_off_t end = start + size;
+ ucw_off_t win_start = start/CPU_PAGE_SIZE * CPU_PAGE_SIZE;
+ size_t win_len = PARTMAP_WINDOW;
+ if (win_len > (size_t) (p->file_size - win_start))
+ win_len = ALIGN_TO(p->file_size - win_start, CPU_PAGE_SIZE);
+ if ((ucw_off_t) (win_start+win_len) < end)
+ die("partmap_map: Window is too small for mapping %d bytes", size);
+ if (win_len)
+ {
+ p->start_map = ucw_mmap(NULL, win_len, p->writeable ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, p->fd, win_start);
+ if (p->start_map == MAP_FAILED)
+ die("mmap failed at position %lld: %m", (long long)win_start);
+ }
+ else
+ p->start_map = NULL;
+ p->start_off = win_start;
+ p->end_off = win_start+win_len;
+ madvise(p->start_map, win_len, MADV_SEQUENTIAL);
+}
+
+#ifdef TEST
+int main(int argc, char **argv)
+{
+ struct partmap *p = partmap_open(argv[1], 0);
+ uns l = partmap_size(p);
+ uns i;
+ for (i=0; i<l; i++)
+ putchar(*(char *)partmap_map(p, i, 1));
+ partmap_close(p);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * UCW Library -- Mapping of File Parts
+ *
+ * (c) 2003--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2003--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PARTMAP_H
+#define _UCW_PARTMAP_H
+
+struct partmap {
+ int fd;
+ ucw_off_t file_size;
+ ucw_off_t start_off, end_off;
+ byte *start_map;
+ int writeable;
+};
+
+struct partmap *partmap_open(char *name, int writeable);
+void partmap_close(struct partmap *p);
+ucw_off_t partmap_size(struct partmap *p);
+void partmap_load(struct partmap *p, ucw_off_t start, uns size);
+
+static inline void *
+partmap_map(struct partmap *p, ucw_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_UCW_PARTMAP_IS_MMAP
+ if (unlikely(!p->start_map || start < p->start_off || (ucw_off_t) (start+size) > p->end_off))
+ partmap_load(p, start, size);
+#endif
+ return p->start_map + (start - p->start_off);
+}
+
+static inline void *
+partmap_map_forward(struct partmap *p, ucw_off_t start, uns size UNUSED)
+{
+#ifndef CONFIG_UCW_PARTMAP_IS_MMAP
+ if (unlikely((ucw_off_t) (start+size) > p->end_off))
+ partmap_load(p, start, size);
+#endif
+ return p->start_map + (start - p->start_off);
+}
+
+#endif
--- /dev/null
+# Perl module for setting process limits
+#
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+# UCW::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
+#
+
+package UCW::Filelock;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Filelock $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
--- /dev/null
+/*
+ * PerlXS module for managing file locks
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <unistd.h>
+#include <fcntl.h>
+
+
+MODULE = UCW::Filelock PACKAGE = UCW::Filelock
+
+PROTOTYPES: ENABLED
+
+int
+fcntl_lock(IN int fd, IN int cmd, IN int type, IN int whence, IN int start, IN int len)
+CODE:
+ struct flock fl;
+ fl.l_type = type;
+ fl.l_whence = whence;
+ fl.l_start = start;
+ fl.l_len = len;
+
+ RETVAL = fcntl(fd, cmd, &fl);
+OUTPUT:
+ RETVAL
--- /dev/null
+Makefile
+Makefile.PL
+MANIFEST
+Fcntllock.pm
+Fcntllock.xs
+lib/Sherlock/.exists
--- /dev/null
+# Makefile for the Filelock Perl module (c) 2007 Pavel Chrvat <pchar@ucw.cz>
+
+DIRS+=ucw/perl/Filelock/arch/auto/UCW/Filelock
+FILELOCK_DIR=ucw/perl/Filelock
+
+PROGS+=$(o)/ucw/perl/Filelock/Filelock.pm
+
+extras:: $(o)/ucw/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT)
+
+$(o)/ucw/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
+
+$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs
+ $(Q)cp $^ $@
+
+$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL
+ $(Q)cp $^ $@
+
+INSTALL_TARGETS+=install-perl-ucw-filelock
+install-perl-ucw-filelock:
+ install -d -m 755 $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/ $(DESTDIR)$(INSTALL_LIB_DIR)
+ install -m 644 $(s)/$(FILELOCK_DIR)/Filelock.pm $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/
+ install -m 644 $(o)/ucw/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT) $(DESTDIR)$(INSTALL_LIB_DIR)
+.PHONY: install-perl-ucw-filelock
--- /dev/null
+# Makefile for Perl MakeMaker (c) 2007 Pavel Charvat <pchar@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+ 'NAME' => 'UCW::Filelock',
+ 'VERSION_FROM' => 'Filelock.pm',
+ 'INST_LIB' => 'lib',
+ 'INST_ARCHLIB' => 'arch',
+);
--- /dev/null
+# Perl modules
+
+DIRS+=ucw/perl
+
+include $(s)/ucw/perl/UCW/Makefile
+
+ifdef CONFIG_UCW_PERL_MODULES
+include $(s)/ucw/perl/Ulimit/Makefile
+include $(s)/ucw/perl/Filelock/Makefile
+endif
--- /dev/null
+# Poor Man's CGI Module for Perl
+#
+# (c) 2002--2007 Martin Mares <mj@ucw.cz>
+# Slightly modified by Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+# FIXME:
+# - respond with proper HTTP error codes
+# - if we get invalid parameters, generate HTTP error or redirect
+
+package UCW::CGI;
+
+# First of all, set up error handling, so that even errors during parsing
+# will be reported properly.
+
+# Variables to be set by the calling module:
+# $UCW::CGI::error_mail mail address of the script admin (optional)
+# (this one has to be set in the BEGIN block!)
+# $UCW::CGI::error_hook function to be called for reporting errors
+
+my $error_reported;
+my $exit_code;
+my $debug = 0;
+
+sub report_bug($)
+{
+ if (!defined $error_reported) {
+ $error_reported = 1;
+ print STDERR $_[0];
+ if (defined($UCW::CGI::error_hook)) {
+ &$UCW::CGI::error_hook($_[0]);
+ } else {
+ print "Content-type: text/plain\n\n";
+ print "Internal bug:\n";
+ print $_[0], "\n";
+ print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail;
+ }
+ }
+ die;
+}
+
+BEGIN {
+ $SIG{__DIE__} = sub { report_bug($_[0]); };
+ $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); };
+ $exit_code = 0;
+}
+
+END {
+ $? = $exit_code;
+}
+
+use strict;
+use warnings;
+
+require Exporter;
+our $VERSION = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&html_escape &url_escape &url_deescape &url_param_escape &url_param_deescape &self_ref &self_form &http_get);
+our @EXPORT_OK = qw();
+
+### Escaping ###
+
+sub url_escape($) {
+ my $x = shift @_;
+ $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge;
+ return $x;
+}
+
+sub url_deescape($) {
+ my $x = shift @_;
+ $x =~ s/%(..)/pack("H2",$1)/ge;
+ return $x;
+}
+
+sub url_param_escape($) {
+ my $x = shift @_;
+ $x = url_escape($x);
+ $x =~ s/%20/+/g;
+ return $x;
+}
+
+sub url_param_deescape($) {
+ my $x = shift @_;
+ $x =~ s/\+/ /g;
+ return url_deescape($x);
+}
+
+sub html_escape($) {
+ my $x = shift @_;
+ $x =~ s/&/&/g;
+ $x =~ s/</</g;
+ $x =~ s/>/>/g;
+ $x =~ s/"/"/g;
+ return $x;
+}
+
+### Analysing RFC 822 Style Headers ###
+
+sub rfc822_prepare($) {
+ my $x = shift @_;
+ # Convert all %'s and backslash escapes to %xx escapes
+ $x =~ s/%/%25/g;
+ $x =~ s/\\(.)/"%".unpack("H2",$1)/ge;
+ # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically)
+ while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { }
+ # Remove quotes and escape dangerous characters inside (again closing at the end automatically)
+ $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge;
+ # All control characters are properly escaped, tokens are clearly visible.
+ # Finally remove all unnecessary spaces.
+ $x =~ s/\s+/ /g;
+ $x =~ s/(^ | $)//g;
+ $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g;
+ return $x;
+}
+
+sub rfc822_deescape($) {
+ my $x = shift @_;
+ return url_deescape($x);
+}
+
+### Reading of HTTP headers ###
+
+sub http_get($) {
+ my $h = shift @_;
+ $h =~ tr/a-z-/A-Z_/;
+ return $ENV{"HTTP_$h"} || $ENV{"$h"};
+}
+
+### Parsing of Arguments ###
+
+my $arg_table;
+
+sub parse_arg_string($) {
+ my ($s) = @_;
+ $s =~ s/\s+//;
+ foreach $_ (split /[&:]/,$s) {
+ (/^([^=]+)=(.*)$/) or next;
+ my $arg = $arg_table->{$1} or next;
+ $_ = $2;
+ s/\+/ /g;
+ s/%(..)/pack("H2",$1)/eg;
+ s/\r\n/\n/g;
+ s/\r/\n/g;
+ $arg->{'multiline'} || s/(\n|\t)/ /g;
+ s/^\s+//;
+ s/\s+$//;
+ if (my $rx = $arg->{'check'}) {
+ if (!/^$rx$/) { $_ = $arg->{'default'}; }
+ }
+
+ my $r = ref($arg->{'var'});
+ if ($r eq 'SCALAR') {
+ ${$arg->{'var'}} = $_;
+ } elsif ($r eq 'ARRAY') {
+ push @{$arg->{'var'}}, $_;
+ }
+ }
+}
+
+sub parse_multipart_form_data();
+
+sub parse_args($) {
+ $arg_table = shift @_;
+ if (!defined $ENV{"GATEWAY_INTERFACE"}) {
+ print STDERR "Must be called as a CGI script.\n";
+ $exit_code = 1;
+ exit;
+ }
+ foreach my $a (values %$arg_table) {
+ my $r = ref($a->{'var'});
+ defined($a->{'default'}) or $a->{'default'}="";
+ if ($r eq 'SCALAR') {
+ ${$a->{'var'}} = $a->{'default'};
+ } elsif ($r eq 'ARRAY') {
+ @{$a->{'var'}} = ();
+ }
+ }
+ my $method = $ENV{"REQUEST_METHOD"};
+ my $qs = $ENV{"QUERY_STRING"};
+ parse_arg_string($qs) if defined($qs);
+ if ($method eq "GET") {
+ } elsif ($method eq "POST") {
+ if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) {
+ while (<STDIN>) {
+ chomp;
+ parse_arg_string($_);
+ }
+ } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) {
+ parse_multipart_form_data();
+ } else {
+ die "Unknown content type for POST data";
+ }
+ } else {
+ die "Unknown request method";
+ }
+}
+
+### Parsing Multipart Form Data ###
+
+my $boundary;
+my $boundary_len;
+my $mp_buffer;
+my $mp_buffer_i;
+my $mp_buffer_boundary;
+my $mp_eof;
+
+sub refill_mp_data($) {
+ my ($more) = @_;
+ if ($mp_buffer_boundary >= $mp_buffer_i) {
+ return $mp_buffer_boundary - $mp_buffer_i;
+ } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) {
+ return $more;
+ } else {
+ if ($mp_buffer_i) {
+ $mp_buffer = substr($mp_buffer, $mp_buffer_i);
+ $mp_buffer_i = 0;
+ }
+ while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) {
+ last if $mp_eof;
+ my $data;
+ my $n = read(STDIN, $data, 2048);
+ if ($n > 0) {
+ $mp_buffer .= $data;
+ } else {
+ $mp_eof = 1;
+ }
+ }
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ($mp_buffer_boundary >= 0) {
+ return $mp_buffer_boundary;
+ } elsif ($mp_eof) {
+ return length($mp_buffer);
+ } else {
+ return length($mp_buffer) - $boundary_len;
+ }
+ }
+}
+
+sub get_mp_line($) {
+ my ($allow_empty) = @_;
+ my $n = refill_mp_data(1024);
+ my $i = index($mp_buffer, "\r\n", $mp_buffer_i);
+ if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) {
+ my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i);
+ $mp_buffer_i = $i + 2;
+ return $s;
+ } elsif ($allow_empty) {
+ if ($n) { # An incomplete line
+ my $s = substr($mp_buffer, $mp_buffer_i, $n);
+ $mp_buffer_i += $n;
+ return $s;
+ } else { # No more lines
+ return undef;
+ }
+ } else {
+ die "Premature end of multipart POST data";
+ }
+}
+
+sub skip_mp_boundary() {
+ if ($mp_buffer_boundary != $mp_buffer_i) {
+ die "Premature end of multipart POST data";
+ }
+ $mp_buffer_boundary = -1;
+ $mp_buffer_i += 2;
+ my $b = get_mp_line(0);
+ print STDERR "SEP $b\n" if $debug;
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ("\r\n$b" =~ /^$boundary--/) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub parse_mp_header() {
+ my $h = {};
+ my $last;
+ while ((my $l = get_mp_line(0)) ne "") {
+ print STDERR "HH $l\n" if $debug;
+ if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) {
+ $name =~ tr/A-Z/a-z/;
+ $h->{$name} = $value;
+ $last = $name;
+ } elsif ($l =~ /^\s+/ && $last) {
+ $h->{$last} .= $l;
+ } else {
+ $last = undef;
+ }
+ }
+ foreach my $n (keys %$h) {
+ $h->{$n} = rfc822_prepare($h->{$n});
+ print STDERR "H $n: $h->{$n}\n" if $debug;
+ }
+ return (keys %$h) ? $h : undef;
+}
+
+sub parse_multipart_form_data() {
+ # First of all, find the boundary string
+ my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"});
+ if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) {
+ die "Multipart content with no boundary string received";
+ }
+ $boundary = rfc822_deescape($boundary);
+ print STDERR "BOUNDARY IS $boundary\n" if $debug;
+
+ # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string
+ # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution.
+ my $agent = http_get("User-agent") || "";
+ $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/;
+ $boundary = "\r\n$boundary";
+ $boundary_len = length($boundary) + 2;
+
+ # Check upload size in advance
+ if (my $size = http_get("Content-Length")) {
+ my $max_allowed = 0;
+ foreach my $a (values %$arg_table) {
+ $max_allowed += $a->{"maxsize"} || 65536;
+ }
+ if ($size > $max_allowed) {
+ die "Maximum form data length exceeded";
+ }
+ }
+
+ # Initialize our buffering mechanism and part splitter
+ $mp_buffer = "\r\n";
+ $mp_buffer_i = 0;
+ $mp_buffer_boundary = -1;
+ $mp_eof = 0;
+
+ # Skip garbage before the 1st part
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ skip_mp_boundary() || return;
+
+ # Process individual parts
+ do { PART: {
+ print STDERR "NEXT PART\n" if $debug;
+ my $h = parse_mp_header();
+ my ($field, $cdisp, $a);
+ if ($h &&
+ ($cdisp = $h->{"content-disposition"}) &&
+ $cdisp =~ /^form-data/ &&
+ (($field) = ($cdisp =~ /;name=([^;]+)/)) &&
+ ($a = $arg_table->{"$field"})) {
+ print STDERR "FIELD $field\n" if $debug;
+ if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; }
+ if (defined $a->{"var"}) {
+ while (defined (my $l = get_mp_line(1))) {
+ print STDERR "VALUE $l\n" if $debug;
+ parse_arg_string("$field=$l");
+ }
+ next PART;
+ } elsif (defined $a->{"file"}) {
+ require File::Temp;
+ require IO::Handle;
+ my $max_size = $a->{"maxsize"} || 1048576;
+ my @tmpargs = (undef, UNLINK => 1);
+ push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"};
+ my ($fh, $fn) = File::Temp::tempfile(@tmpargs);
+ print STDERR "FILE UPLOAD to $fn\n" if $debug;
+ ${$a->{"file"}} = $fn;
+ ${$a->{"fh"}} = $fh if defined $a->{"fh"};
+ my $total_size = 0;
+ while (my $i = refill_mp_data(4096)) {
+ print $fh substr($mp_buffer, $mp_buffer_i, $i);
+ $mp_buffer_i += $i;
+ $total_size += $i;
+ if ($total_size > $max_size) { die "Uploaded file too long"; }
+ }
+ $fh->flush(); # Don't close the handle, the file would disappear otherwise
+ next PART;
+ }
+ }
+ print STDERR "SKIPPING\n" if $debug;
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ } } while (skip_mp_boundary());
+}
+
+### Generating Self-ref URL's ###
+
+sub make_out_args($) {
+ my ($overrides) = @_;
+ my $out = {};
+ foreach my $name (keys %$arg_table) {
+ my $arg = $arg_table->{$name};
+ defined($arg->{'var'}) || next;
+ defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next;
+ my $value;
+ if (!defined($value = $overrides->{$name})) {
+ if (exists $overrides->{$name}) {
+ $value = $arg->{'default'};
+ } else {
+ $value = ${$arg->{'var'}};
+ }
+ }
+ if ($value ne $arg->{'default'}) {
+ $out->{$name} = $value;
+ }
+ }
+ return $out;
+}
+
+sub self_ref(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out);
+}
+
+sub self_form(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return join('', map { "<input type=hidden name=$_ value='" . html_escape($out->{$_}) . "'>\n" } sort keys %$out);
+}
+
+### Cookies
+
+sub set_cookie($$@) {
+ #
+ # Unfortunately, the support for the new cookie standard (RFC 2965) among
+ # web browsers is still very scarce, so we are still using the old Netscape
+ # specification.
+ #
+ # Usage: set_cookie(name, value, option => value...), where options are:
+ #
+ # max-age maximal age in seconds
+ # domain domain name scope
+ # path path name scope
+ # secure if present, cookie applies only to SSL connections
+ # (in this case, the value should be undefined)
+ # discard if present with any value, the cookie is discarded
+ #
+
+ my $key = shift @_;
+ my $value = shift @_;
+ my %other = @_;
+ if (exists $other{'discard'}) {
+ delete $other{'discard'};
+ $other{'max-age'} = 0;
+ }
+ if (defined(my $age = $other{'max-age'})) {
+ delete $other{'max-age'};
+ my $exp = ($age ? (time + $age) : 0);
+ # Avoid problems with locales
+ my ($S,$M,$H,$d,$m,$y,$wd) = gmtime $exp;
+ my @wdays = ( 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat' );
+ my @mons = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+ $other{'expires'} = sprintf("%s, %02d-%s-%d %02d:%02d:%02d GMT",
+ $wdays[$wd], $d, $mons[$m], $y+1900, $H, $M, $S);
+ }
+
+ print "Set-Cookie: $key=", url_escape($value);
+ foreach my $k (keys %other) {
+ print "; $k";
+ print "=", $other{$k} if defined $other{$k};
+ }
+ print "\n";
+}
+
+sub parse_cookies() {
+ my $h = http_get("Cookie") or return ();
+ my @cook = ();
+ foreach my $x (split /;\s*/, $h) {
+ my ($k,$v) = split /=/, $x;
+ $v = url_deescape($v) if defined $v;
+ push @cook, $k => $v;
+ }
+ return @cook;
+}
+
+1; # OK
--- /dev/null
+# Perl module for parsing Sherlock configuration files (using the config utility)
+#
+# (c) 2002--2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+package UCW::Config;
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+our %Sections = ();
+
+our $DefaultConfigFile = "";
+our $Usage = "-C, --config filename Override the default configuration file
+-S, --set sec.item=val Manual setting of a configuration item";
+
+
+sub Parse(@) {
+ my @options = @_;
+ my $defargs = "";
+ my $override_config = 0;
+ push @options, "config|C=s" => sub { my ($o,$a)=@_; $defargs .= " -C'$a'"; $override_config=1; };
+ push @options, "set|S=s" => sub { my ($o,$a)=@_; $defargs .= " -S'$a'"; };
+ Getopt::Long::Configure("bundling");
+ Getopt::Long::GetOptions(@options) or return 0;
+ if (!$override_config && $DefaultConfigFile) {
+ $defargs = "-C'$DefaultConfigFile' $defargs";
+ }
+ foreach my $section (keys %Sections) {
+ my $opts = $Sections{$section};
+ my $optlist = join(";", keys %$opts);
+ my %filtered_opts = map { my $t=$_; $t=~s/[#\$]+$//; $t => $$opts{$_} } keys %$opts;
+ my @l = `bin/config $defargs "$section\{$optlist\}"`;
+ $? && exit 1;
+ foreach my $o (@l) {
+ $o =~ /^CF_.*_([^=]+)='(.*)'\n$/ or die "Cannot parse bin/config output: $_";
+ my $var = $filtered_opts{$1};
+ my $val = $2;
+ if (ref $var eq "SCALAR") {
+ $$var = $val;
+ } elsif (ref $var eq "ARRAY") {
+ push @$var, $val;
+ } elsif (ref $var) {
+ die ("UCW::Config::Parse: don't know how to set $o");
+ }
+ }
+ }
+ 1;
+}
+
+1; # OK
--- /dev/null
+# Perl module for UCW Configure Scripts
+#
+# (c) 2005--2008 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+package UCW::Configure;
+
+use strict;
+use warnings;
+
+BEGIN {
+ # The somewhat hairy Perl export mechanism
+ use Exporter();
+ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+ $VERSION = 1.0;
+ @ISA = qw(Exporter);
+ @EXPORT = qw(&Init &Log &Notice &Warn &Fail &IsSet &IsGiven &Set &UnSet &Append &Override &Get &Test &Include &Finish &FindFile &TryFindFile &DebugDump &PostConfig &AtWrite);
+ @EXPORT_OK = qw();
+ %EXPORT_TAGS = ();
+}
+
+our %vars;
+our %overriden;
+our @postconfigs;
+our @atwrites;
+
+sub DebugDump() {
+ print "VARS:\n";
+ print "$_: $vars{$_}\n" foreach( keys %vars );
+}
+
+sub Log($) {
+ print @_;
+}
+
+sub Notice($) {
+ print @_ if $vars{"VERBOSE"};
+}
+
+sub Warn($) {
+ print "WARNING: ", @_;
+}
+
+sub Fail($) {
+ Log("ERROR: " . (shift @_) . "\n");
+ exit 1;
+}
+
+sub IsSet($) {
+ my ($x) = @_;
+ return exists $vars{$x};
+}
+
+sub IsGiven($) {
+ my ($x) = @_;
+ return exists $overriden{$x};
+}
+
+sub Get($) {
+ my ($x) = @_;
+ return $vars{$x};
+}
+
+sub Set($;$) {
+ my ($x,$y) = @_;
+ $y=1 unless defined $y;
+ $vars{$x}=$y unless $overriden{$x};
+}
+
+sub UnSet($) {
+ my ($x) = @_;
+ delete $vars{$x} unless $overriden{$x};
+}
+
+sub Append($$) {
+ my ($x,$y) = @_;
+ Set($x, (IsSet($x) ? (Get($x) . " $y") : $y));
+}
+
+sub Override($;$) {
+ my ($x,$y) = @_;
+ $y=1 unless defined $y;
+ $vars{$x}=$y;
+ $overriden{$x} = 1;
+}
+
+sub Test($$$) {
+ my ($var,$msg,$sub) = @_;
+ Log "$msg ... ";
+ if (!IsSet($var)) {
+ Set $var, &$sub();
+ }
+ Log Get($var) . "\n";
+}
+
+sub TryFindFile($) {
+ my ($f) = @_;
+ if (-f $f) {
+ return $f;
+ } elsif ($f !~ /^\// && -f (Get("SRCDIR")."/$f")) {
+ return Get("SRCDIR")."/$f";
+ } else {
+ return undef;
+ }
+}
+
+sub FindFile($) {
+ my ($f) = @_;
+ my $F;
+ defined ($F = TryFindFile($f)) or Fail "Cannot find file $f";
+ return $F;
+}
+
+sub Init($$) {
+ my ($srcdir,$defconfig) = @_;
+ sub usage($) {
+ my ($dc) = @_;
+ print STDERR "Usage: [<srcdir>/]configure " . (defined $dc ? "[" : "") . "<config-name>" . (defined $dc ? "]" : "") .
+ " [<option>[=<value>] | -<option>] ...\n";
+ exit 1;
+ }
+ Set('CONFIG' => $defconfig) if defined $defconfig;
+ if (@ARGV) {
+ usage($defconfig) if $ARGV[0] eq "--help";
+ if (!defined($defconfig) || $ARGV[0] !~ /^-?[A-Z][A-Z0-9_]*(=|$)/) {
+ # This does not look like an option, so read it as a file name
+ Set('CONFIG' => shift @ARGV);
+ }
+ }
+ Set("SRCDIR", $srcdir);
+
+ foreach my $x (@ARGV) {
+ if ($x =~ /^(\w+)=(.*)/) {
+ Override($1 => $2);
+ } elsif ($x =~ /^-(\w+)$/) {
+ Override($1 => 0);
+ delete $vars{$1};
+ } elsif ($x =~ /^(\w+)$/) {
+ Override($1 => 1);
+ } else {
+ print STDERR "Invalid option $x\n";
+ exit 1;
+ }
+ }
+
+ defined Get("CONFIG") or usage($defconfig);
+ if (!TryFindFile(Get("CONFIG"))) {
+ TryFindFile(Get("CONFIG")."/config") or Fail "Cannot find configuration " . Get("CONFIG");
+ Override("CONFIG" => Get("CONFIG")."/config");
+ }
+}
+
+sub Include($) {
+ my ($f) = @_;
+ $f = FindFile($f);
+ Notice "Loading configuration $f\n";
+ require $f;
+}
+
+sub PostConfig(&) {
+ unshift @postconfigs, $_[0];
+}
+
+sub AtWrite(&) {
+ unshift @atwrites, $_[0];
+}
+
+sub Finish() {
+ for my $post (@postconfigs) {
+ &$post();
+ }
+
+ print "\n";
+
+ if (Get("SRCDIR") ne ".") {
+ Log "Preparing for compilation from directory " . Get("SRCDIR") . " to obj/ ... ";
+ -l "src" and unlink "src";
+ symlink Get("SRCDIR"), "src" or Fail "Cannot link source directory to src: $!";
+ Override("SRCDIR" => "src");
+ -l "Makefile" and unlink "Makefile";
+ -f "Makefile" and Fail "Makefile already exists";
+ symlink "src/Makefile", "Makefile" or Fail "Cannot link Makefile: $!";
+ } else {
+ Log "Preparing for compilation from current directory to obj/ ... ";
+ }
+ if (-d "obj") {
+ `rm -rf obj`; Fail "Cannot delete old obj directory" if $?;
+ }
+ -d "obj" or mkdir("obj", 0777) or Fail "Cannot create obj directory: $!";
+ -d "obj/ucw" or mkdir("obj/ucw", 0777) or Fail "Cannot create obj/ucw directory: $!";
+ Log "done\n";
+
+ Log "Generating config.mk ... ";
+ open X, ">obj/config.mk" or Fail $!;
+ print X "# Generated automatically by $0, please don't touch manually.\n";
+ foreach my $x (sort keys %vars) {
+ print X "$x=$vars{$x}\n";
+ }
+ print X "s=\${SRCDIR}\n";
+ print X "o=obj\n";
+ close X;
+ Log "done\n";
+
+ for my $wr (@atwrites) {
+ &$wr();
+ }
+}
+
+1; # OK
--- /dev/null
+# UCW Library configuration system: find UCW build system
+# (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+# This module asks pkg-config for a path to UCW build system
+# and sets propper variables for it (or fails, as it is expected
+# the build system is crucial).
+
+package UCW::Configure::Build;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+if (!IsGiven("BUILDSYS")) {
+ Test("BUILDSYS", "Looking for UCW build system", sub {
+ my $path=`pkg-config libucw --variable=build_system`;
+ if($? || not defined $path) {
+ Fail("Not found (is libUCW installed and PKG_CONFIG_PATH set?)");
+ }
+ chomp $path;
+ return $path;
+ });
+}
+
+# We succeeded
+1;
--- /dev/null
+# UCW Library configuration system: OS and C compiler
+# (c) 2005--2008 Martin Mares <mj@ucw.cz>
+# (c) 2006 Robert Spalek <robert@ucw.cz>
+# (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+### OS ###
+
+package UCW::Configure::C;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+Test("OS", "Checking on which OS we run", sub {
+ my $os = `uname`;
+ chomp $os;
+ Fail "Unable to determine OS type" if $? || $os eq "";
+ return $os;
+});
+
+if (Get("OS") eq "Linux") {
+ Set("CONFIG_LINUX");
+} elsif (Get("OS") eq "Darwin") {
+ Set("CONFIG_DARWIN");
+} else {
+ Fail "Don't know how to run on this operating system.";
+}
+
+### Compiler ###
+
+# Default compiler
+Test("CC", "Checking for C compiler", sub { return "gcc"; });
+
+# GCC version
+Test("GCCVER", "Checking for GCC version", sub {
+ my $gcc = Get("CC");
+ my $ver = `$gcc --version | sed '2,\$d; s/^\\(.* \\)*\\([0-9]*\\.[0-9]*\\).*/\\2/'`;
+ chomp $ver;
+ Fail "Unable to determine GCC version" if $? || $ver eq "";
+ return $ver;
+});
+my ($gccmaj, $gccmin) = split(/\./, Get("GCCVER"));
+my $gccver = 1000*$gccmaj + $gccmin;
+$gccver >= 3000 or Fail "GCC older than 3.0 doesn't support C99 well enough.";
+
+### CPU ###
+
+Test("ARCH", "Checking for machine architecture", sub {
+ #
+ # We have to ask GCC for the target architecture, because it may
+ # differ from what uname tells us. This can happen even if we are
+ # not cross-compiling, for example on Linux with amd64 kernel, but
+ # i386 userspace.
+ #
+ my $gcc = Get("CC");
+ my $mach = `$gcc -dumpmachine 2>/dev/null`;
+ if (!$? && $mach ne "") {
+ $mach =~ s/-.*//;
+ } else {
+ $mach = `uname -m`;
+ Fail "Unable to determine machine type" if $? || $mach eq "";
+ }
+ chomp $mach;
+ if ($mach =~ /^i[0-9]86$/) {
+ return "i386";
+ } elsif ($mach =~ /^(x86[_-]|amd)64$/) {
+ return "amd64";
+ } else {
+ return "unknown ($mach)";
+ }
+});
+
+sub parse_cpuinfo_linux() {
+ open X, "/proc/cpuinfo" || undef;
+ my %pc = ();
+ while (<X>) {
+ chomp;
+ /^$/ && last;
+ /^([^\t]+)\t+:\s*(.*)$/ and $pc{$1}=$2;
+ }
+ close X;
+ return ($pc{'vendor_id'},
+ $pc{'cpu family'},
+ $pc{'model'});
+}
+
+sub parse_cpuinfo_darwin() {
+ my @cpu = (`sysctl -n machdep.cpu.vendor`,
+ `sysctl -n machdep.cpu.family`,
+ `sysctl -n machdep.cpu.model`);
+ chomp @cpu;
+ return @cpu;
+}
+
+sub parse_cpuinfo() {
+ my @cpu;
+ if (IsSet("CONFIG_LINUX")) {
+ @cpu = parse_cpuinfo_linux();
+ } elsif (IsSet("CONFIG_DARWIN")) {
+ @cpu = parse_cpuinfo_darwin();
+ }
+ $cpu[0] = "" if !defined $cpu[0];
+ $cpu[1] = 0 if !defined $cpu[1];
+ $cpu[2] = 0 if !defined $cpu[2];
+ return @cpu;
+}
+
+Test("CPU_ARCH", "Checking for CPU architecture", sub {
+ my $mach = Get("ARCH");
+ my $arch = "";
+ if ($mach eq "i386") {
+ Set("CPU_I386");
+ UnSet("CPU_64BIT_POINTERS");
+ Set("CPU_LITTLE_ENDIAN");
+ UnSet("CPU_BIG_ENDIAN");
+ Set("CPU_ALLOW_UNALIGNED");
+ Set("CPU_STRUCT_ALIGN" => 4);
+ if (IsSet("CONFIG_EXACT_CPU")) {
+ my ($vendor, $family, $model) = parse_cpuinfo();
+ # Try to understand CPU vendor, family and model [inspired by MPlayer's configure script]
+ if ($vendor eq "AuthenticAMD") {
+ if ($family >= 6) {
+ if ($model >= 31 && $gccver >= 3004) { $arch = "athlon64"; }
+ elsif ($model >= 6 && $gccver >= 3003) { $arch = "athlon-xp"; }
+ else { $arch = "athlon"; }
+ }
+ } elsif ($vendor eq "GenuineIntel") {
+ if ($family >= 15 && $gccver >= 3003) {
+ if ($model >= 4) { $arch = "nocona"; }
+ elsif ($model >= 3) { $arch = "prescott"; }
+ else { $arch = "pentium4"; }
+ } elsif ($family == 6 && $gccver >= 3003) {
+ if ($model == 23) { $arch = "nocona"; }
+ elsif ($model == 15) { $arch = "prescott"; }
+ elsif (($model == 9 || $model == 13) && $gccver >= 3004) { $arch = "pentium-m"; }
+ elsif ($model >= 7) { $arch = "pentium3"; }
+ elsif ($model >= 3) { $arch = "pentium2"; }
+ }
+ }
+
+ # No match on vendor, try the family
+ if ($arch eq "") {
+ if ($family >= 6) {
+ $arch = "i686";
+ } elsif ($family >= 3) {
+ $arch = "i${family}86";
+ }
+ }
+ Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+ return $arch;
+ } else {
+ return "default";
+ }
+ } elsif ($mach eq "amd64") {
+ Set("CPU_AMD64");
+ Set("CPU_64BIT_POINTERS");
+ Set("CPU_LITTLE_ENDIAN");
+ UnSet("CPU_BIG_ENDIAN");
+ Set("CPU_ALLOW_UNALIGNED");
+ Set("CPU_STRUCT_ALIGN" => 8);
+ if (IsSet("CONFIG_EXACT_CPU")) {
+ # In x86-64 world, the detection is somewhat easier so far...
+ my ($vendor, $family, $model) = parse_cpuinfo();
+ if ($vendor eq "AuthenticAMD") {
+ $arch = "athlon64";
+ } elsif ($vendor eq "GenuineIntel") {
+ $arch = "nocona";
+ }
+ Log (($arch ne "") ? "(using /proc/cpuinfo) " : "(don't understand /proc/cpuinfo) ");
+ return $arch;
+ } else {
+ return "default";
+ }
+ } else {
+ return "unknown";
+ }
+});
+
+if (Get("CPU_ARCH") eq "unknown") {
+ Warn "CPU architecture not recognized, using defaults, keep fingers crossed.\n";
+}
+
+### Compiler and its Options ###
+
+# C flags: tell the compiler we're speaking C99, and disable common symbols
+Set("CLANG" => "-std=gnu99 -fno-common");
+
+# C optimizations
+Set("COPT" => '-O2');
+if (Get("CPU_ARCH") ne "unknown" && Get("CPU_ARCH") ne "default") {
+ Append("COPT", '-march=$(CPU_ARCH)');
+}
+
+# C optimizations for highly exposed code
+Set("COPT2" => '-O3');
+
+# Warnings
+Set("CWARNS" => '-Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline');
+Set("CWARNS_OFF" => '');
+
+# Linker flags
+Set("LOPT" => "");
+
+# Extra libraries
+Set("LIBS" => "");
+
+# Extra flags for compiling and linking shared libraries
+Set("CSHARED" => '-fPIC');
+if (IsSet("CONFIG_LOCAL")) {
+ Set("SONAME_PREFIX" => "lib/");
+} else {
+ Set("SONAME_PREFIX" => "");
+}
+if (IsSet("CONFIG_DARWIN")) {
+ Set("LSHARED" => '-dynamiclib -install_name $(SONAME_PREFIX)$(@F)$(SONAME_SUFFIX) -undefined dynamic_lookup');
+} else {
+ Set("LSHARED" => '-shared -Wl,-soname,$(SONAME_PREFIX)$(@F)$(SONAME_SUFFIX)');
+}
+
+# Extra switches depending on GCC version:
+if ($gccver == 3000) {
+ Append("COPT" => "-fstrict-aliasing");
+} elsif ($gccver == 3003) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls");
+ Append("COPT" => "-finline-limit=20000 --param max-inline-insns-auto=1000");
+} elsif ($gccver == 3004) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls");
+ Append("COPT" => "-finline-limit=2000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver >= 4000) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+ Append("CWARNS_OFF" => "-Wno-pointer-sign");
+ Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+ if ($gccver >= 4002) {
+ Append("COPT" => "-fgnu89-inline");
+ }
+} else {
+ Warn "Don't know anything about this GCC version, using default switches.\n";
+}
+
+if (IsSet("CONFIG_DEBUG")) {
+ # If debugging:
+ Set("DEBUG_ASSERTS");
+ Set("DEBUG_DIE_BY_ABORT") if Get("CONFIG_DEBUG") > 1;
+ Set("CDEBUG" => "-ggdb");
+} else {
+ # If building a release version:
+ Append("COPT" => "-fomit-frame-pointer");
+ Append("LOPT" => "-s");
+}
+
+if (IsSet("CONFIG_DARWIN")) {
+ # gcc-4.0 on Darwin doesn't set this in the gnu99 mode
+ Append("CLANG" => "-fnested-functions");
+ # Directory hierarchy of the fink project
+ Append("LIBS" => "-L/sw/lib");
+ Append("COPT" => "-I/sw/include");
+ # Fill in some constants not found in the system header files
+ Set("SOL_TCP" => 6); # missing in /usr/include/netinet/tcp.h
+ if (IsGiven("CONFIG_DIRECT_IO") && IsSet("CONFIG_DIRECT_IO")) {
+ Fail("Direct I/O is not available on darwin");
+ } else {
+ UnSet("CONFIG_DIRECT_IO");
+ }
+ if (!IsSet("CONFIG_POSIX_REGEX") && !IsSet("CONFIG_PCRE")) {
+ Set("CONFIG_POSIX_REGEX" => 1);
+ Warn "BSD regex library on Darwin isn't compatible, using POSIX regex.\n";
+ }
+}
+
+### Writing C headers with configuration ###
+
+sub ConfigHeader($$) {
+ my ($hdr, $rules) = @_;
+ Log "Generating $hdr ... ";
+ open X, ">obj/$hdr" or Fail $!;
+ print X "/* Generated automatically by $0, please don't touch manually. */\n";
+
+ sub match_rules($$) {
+ my ($rules, $name) = @_;
+ for (my $i=0; $i < scalar @$rules; $i++) {
+ my ($r, $v) = ($rules->[$i], $rules->[$i+1]);
+ return $v if $name =~ $r;
+ }
+ return 0;
+ }
+
+ foreach my $x (sort keys %UCW::Configure::vars) {
+ next unless match_rules($rules, $x);
+ my $v = $UCW::Configure::vars{$x};
+ # Try to add quotes if necessary
+ $v = '"' . $v . '"' unless ($v =~ /^"/ || $v =~ /^\d*$/);
+ print X "#define $x $v\n";
+ }
+ close X;
+ Log "done\n";
+}
+
+AtWrite {
+ ConfigHeader("autoconf.h", [
+ # Symbols with "_" anywhere in their name are exported
+ "_" => 1
+ ]);
+};
+
+# Return success
+1;
--- /dev/null
+# UCW Library configuration system: documentation requirements
+# (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+package UCW::Configure::Doc;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+if (!IsGiven("CONFIG_DOC") || IsSet("CONFIG_DOC")) {
+ Test("HAVE_ASCII_DOC", "Checking for AsciiDoc", sub {
+ my $version = `asciidoc --version 2>&1`;
+ return "none" if !defined $version || $version eq "";
+ my( $vnum ) = $version =~ / (\d+\.\S*)$/;
+ return $vnum;
+ });
+
+ my( $major ) = Get("HAVE_ASCII_DOC") =~ /^(\d+)/;
+ if (defined $major && $major >= 7) {
+ Set("CONFIG_DOC");
+ } else {
+ if (IsGiven("CONFIG_DOC")) {
+ Fail("Need asciidoc >= 7");
+ } else {
+ Warn("Need asciidoc >= 7 to build documentation");
+ UnSet("CONFIG_DOC");
+ }
+ }
+}
+
+# We succeeded
+1;
--- /dev/null
+# UCW Library configuration system: parameters of the library
+# (c) 2005--2008 Martin Mares <mj@ucw.cz>
+# (c) 2006 Robert Spalek <robert@ucw.cz>
+# (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+package UCW::Configure::LibUCW;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+# Determine page size
+Test("CPU_PAGE_SIZE", "Determining page size", sub {
+ my $p;
+ if (IsSet("CONFIG_DARWIN")) {
+ $p = `sysctl -n hw.pagesize`;
+ defined $p or Fail "sysctl hw.pagesize failed";
+ } elsif (IsSet("CONFIG_LINUX")) {
+ $p = `getconf PAGE_SIZE`;
+ defined $p or Fail "getconf PAGE_SIZE failed";
+ }
+ chomp $p;
+ return $p;
+});
+
+if (IsSet("CONFIG_LARGE_FILES") && IsSet("CONFIG_LINUX")) {
+ # Use 64-bit versions of file functions
+ Set("CONFIG_LFS");
+}
+
+# Decide how will ucw/partmap.c work
+Set("CONFIG_UCW_PARTMAP_IS_MMAP") if IsSet("CPU_64BIT_POINTERS");
+
+# Option for ucw/mempool.c
+Set("CONFIG_UCW_POOL_IS_MMAP");
+
+# Guess optimal bit width of the radix-sorter
+if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) {
+ # This should be safe everywhere
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10);
+} else {
+ # Use this on modern CPU's
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12);
+}
+
+PostConfig {
+ AtWrite {
+ UCW::Configure::C::ConfigHeader("ucw/autoconf.h", [
+ # Excluded symbols (danger of collision)
+ '^CONFIG_DEBUG$' => 0,
+
+ # Included symbols
+ '^CONFIG_' => 1,
+ '^CPU_' => 1,
+ '^(SHERLOCK|UCW)_VERSION(_|$)' => 1,
+
+ ]);
+ } if Get("CONFIG_INSTALL_API");
+
+ # Include direct FB?
+ if (!IsSet("CONFIG_UCW_THREADS") || !IsSet("CONFIG_DIRECT_IO")) {
+ if (IsGiven("CONFIG_UCW_FB_DIRECT") && IsSet("CONFIG_UCW_FB_DIRECT")) {
+ if (!IsSet("CONFIG_UCW_THREADS")) {
+ Fail("CONFIG_UCW_FB_DIRECT needs CONFIG_UCW_THREADS");
+ } else {
+ Fail("CONFIG_UCW_FB_DIRECT needs CONFIG_DIRECT_IO");
+ }
+ }
+ UnSet("CONFIG_UCW_FB_DIRECT");
+ }
+};
+
+# We succeeded
+1;
--- /dev/null
+# Perl modules for Configure
+
+DIRS+=ucw/perl/UCW/Configure
+EXTRA_RUNDIRS+=lib/perl5/UCW/Configure
+UCW_CONFIGURE_PERL_MODULES=$(addsuffix .pm, C Doc Paths Pkg Build)
+CONFIGURE_MODULES=$(addprefix $(o)/ucw/perl/UCW/Configure/,$(UCW_CONFIGURE_PERL_MODULES))
+PROGS+=$(CONFIGURE_MODULES)
+
+$(CONFIGURE_MODULES) : PERL_MODULE_DIR=UCW/Configure
+
+INSTALL_TARGETS+=install-perl-ucw-configure
+install-perl-ucw-configure:
+ install -d -m 755 $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/Configure
+ install -m 644 $(addprefix $(s)/ucw/perl/UCW/Configure/,$(UCW_CONFIGURE_PERL_MODULES)) $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/Configure
+.PHONY: install-perl-ucw-configure
--- /dev/null
+# UCW Library configuration system: installation paths
+# (c) 2005--2009 Martin Mares <mj@ucw.cz>
+# (c) 2006 Robert Spalek <robert@ucw.cz>
+# (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+package UCW::Configure::Paths;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+Log "Determining installation prefix ... ";
+if (IsSet("CONFIG_LOCAL")) {
+ Log("local build\n");
+ Set("INSTALL_PREFIX", "");
+ Set("INSTALL_USR_PREFIX", "");
+ Set("INSTALL_VAR_PREFIX", "");
+} else {
+ Set("PREFIX", "/usr/local") unless IsSet("PREFIX");
+ my $ipx = Get("PREFIX");
+ $ipx =~ s{/$}{};
+ Set("INSTALL_PREFIX", "$ipx/");
+ my $upx = ($ipx eq "" ? "/usr/" : "$ipx/");
+ Set("INSTALL_USR_PREFIX", $upx);
+ $upx =~ s{^/usr\b}{/var};
+ Set("INSTALL_VAR_PREFIX", $upx);
+ Log(Get("PREFIX") . "\n");
+}
+
+Set('CONFIG_DIR', 'etc') unless IsSet('CONFIG_DIR');
+Set('INSTALL_CONFIG_DIR', Get('INSTALL_PREFIX') . Get('CONFIG_DIR'));
+Set('INSTALL_BIN_DIR', Get('INSTALL_USR_PREFIX') . 'bin');
+Set('INSTALL_SBIN_DIR', Get('INSTALL_USR_PREFIX') . 'sbin');
+Set('INSTALL_LIB_DIR', Get('INSTALL_USR_PREFIX') . 'lib');
+Set('INSTALL_INCLUDE_DIR', Get('INSTALL_USR_PREFIX') . 'include');
+Set('INSTALL_PKGCONFIG_DIR', Get('INSTALL_USR_PREFIX') . 'lib/pkgconfig');
+Set('INSTALL_SHARE_DIR', Get('INSTALL_USR_PREFIX') . 'share');
+Set('INSTALL_MAN_DIR', Get('INSTALL_USR_PREFIX') . 'share/man');
+Set('INSTALL_LOG_DIR', Get('INSTALL_VAR_PREFIX') . 'log');
+Set('INSTALL_STATE_DIR', Get('INSTALL_VAR_PREFIX') . 'lib');
+Set('INSTALL_RUN_DIR', Get('INSTALL_VAR_PREFIX') . 'run');
+Set('INSTALL_DOC_DIR', Get('INSTALL_USR_PREFIX') . 'share/doc');
+Set('INSTALL_PERL_DIR', Get('INSTALL_LIB_DIR') . '/perl5');
+
+# Remember PKG_CONFIG_PATH used for building, so that it will be propagated to
+# pkg-config's run locally in the makefiles.
+Set("PKG_CONFIG_PATH", $ENV{"PKG_CONFIG_PATH"}) if defined $ENV{"PKG_CONFIG_PATH"};
+
+# We succeeded
+1;
--- /dev/null
+# UCW Library configuration system: pkg-config and friends
+# (c) 2008 Martin Mares <mj@ucw.cz>
+
+package UCW::Configure::Pkg;
+use UCW::Configure;
+
+use strict;
+use warnings;
+
+require Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&TryCmd &PkgConfig &TrivConfig);
+
+sub TryCmd($) {
+ my ($cmd) = @_;
+ my $res = `$cmd`;
+ defined $res or return;
+ chomp $res;
+ return $res unless $?;
+ return;
+}
+
+sub maybe_manually($) {
+ my ($n) = @_;
+ if (IsGiven($n)) {
+ if (Get("$n")) { Log "YES (set manually)\n"; }
+ else { Log "NO (set manually)\n"; }
+ return 1;
+ }
+ return 0;
+}
+
+sub PkgConfigTool() {
+ Log "Checking for pkg-config ... ";
+ if (!maybe_manually("CONFIG_HAVE_PKGCONFIG")) {
+ my $ver = TryCmd("pkg-config --version 2>/dev/null");
+ if (!defined $ver) {
+ Log("NONE\n");
+ Set("CONFIG_HAVE_PKGCONFIG", 0);
+ } else {
+ Log("YES: version $ver\n");
+ Set("CONFIG_HAVE_PKGCONFIG", 1);
+ Set("CONFIG_VER_PKGCONFIG", $ver);
+ }
+ }
+ return Get("CONFIG_HAVE_PKGCONFIG");
+}
+
+sub PkgConfig($@) {
+ my $pkg = shift @_;
+ my %opts = @_;
+ my $upper = $pkg; $upper =~ tr/a-z/A-Z/; $upper =~ s/[^0-9A-Z]+/_/g;
+ PkgConfigTool() unless IsSet("CONFIG_HAVE_PKGCONFIG");
+ Log "Checking for package $pkg ... ";
+ maybe_manually("CONFIG_HAVE_$upper") and return Get("CONFIG_HAVE_$upper");
+ if (!Get("CONFIG_HAVE_PKGCONFIG")) {
+ Log("NONE: pkg-config missing\n");
+ return 0;
+ }
+ my $ver = TryCmd("pkg-config --modversion $pkg 2>/dev/null");
+ if (!defined $ver) {
+ Log("NONE\n");
+ return 0;
+ }
+ if (defined($opts{minversion})) {
+ my $min = $opts{minversion};
+ if (!defined TryCmd("pkg-config --atleast-version=$min $pkg")) {
+ Log("NO: version $ver is too old (need >= $min)\n");
+ return 0;
+ }
+ }
+ Log("YES: version $ver\n");
+ Set("CONFIG_HAVE_$upper" => 1);
+ Set("CONFIG_VER_$upper" => $ver);
+ my $cf = TryCmd("pkg-config --cflags $pkg");
+ Set("${upper}_CFLAGS" => $cf) if defined $cf;
+ my $lf = TryCmd("pkg-config --libs $pkg");
+ Set("${upper}_LIBS" => $lf) if defined $lf;
+ return 1;
+}
+
+sub ver_norm($) {
+ my ($v) = @_;
+ return join(".", map { sprintf("%05s", $_) } split(/\./, $v));
+}
+
+sub TrivConfig($@) {
+ my $pkg = shift @_;
+ my %opts = @_;
+ my $upper = $pkg; $upper =~ tr/a-z/A-Z/; $upper =~ s/[^0-9A-Z]+/_/g;
+ Log "Checking for package $pkg ... ";
+ maybe_manually("CONFIG_HAVE_$upper") and return Get("CONFIG_HAVE_$upper");
+ my $pc = $opts{script};
+ my $ver = TryCmd("$pc --version 2>/dev/null");
+ if (!defined $ver) {
+ Log("NONE\n");
+ return 0;
+ }
+ if (defined($opts{minversion})) {
+ my $min = $opts{minversion};
+ if (ver_norm($ver) lt ver_norm($min)) {
+ Log("NO: version $ver is too old (need >= $min)\n");
+ return 0;
+ }
+ }
+ Log("YES: version $ver\n");
+ Set("CONFIG_HAVE_$upper" => 1);
+ Set("CONFIG_VER_$upper" => $ver);
+
+ my $want = $opts{want};
+ defined $want or $want = ["cflags", "libs"];
+ for my $w (@$want) {
+ my $uw = $w; $uw =~ tr/a-z-/A-Z_/;
+ my $cf = TryCmd("$pc --$w");
+ Set("${upper}_${uw}" => $cf) if defined $cf;
+ }
+ return 1;
+}
+# We succeeded
+1;
--- /dev/null
+#
+# Perl module for Logging
+#
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
+#
+
+package UCW::Log;
+
+use lib 'lib/perl5';
+use strict;
+use warnings;
+use POSIX;
+use Exporter;
+
+our $version = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = ();
+our %EXPORT_TAGS = ( all => [qw(&Log &Die)]);
+our @EXPORT_OK = (@{$EXPORT_TAGS{'all'}});
+
+my $Prog = (reverse split(/\//, $0))[0];
+
+sub Log {
+ my $level = shift;
+ my $text = join(' ', @_);
+ print STDERR $level, strftime(" %Y-%m-%d %H:%M:%S ", localtime()), "[$Prog] ", $text, "\n";
+}
+
+sub Die {
+ Log('!', @_);
+ exit 1;
+}
+
+1;
--- /dev/null
+# More Perl modules
+
+DIRS+=ucw/perl/UCW
+EXTRA_RUNDIRS+=lib/perl5/UCW
+UCW_PERL_MODULES=$(addsuffix .pm,Config Log CGI Configure)
+PROGS+=$(addprefix $(o)/ucw/perl/UCW/,$(UCW_PERL_MODULES))
+
+include $(s)/ucw/perl/UCW/Configure/Makefile
+
+INSTALL_TARGETS+=install-perl-ucw
+install-perl-ucw:
+ install -d -m 755 $(DESTDIR)$(INSTALL_PERL_DIR)/UCW
+ install -m 644 $(addprefix $(s)/ucw/perl/UCW/,$(UCW_PERL_MODULES)) $(DESTDIR)$(INSTALL_PERL_DIR)/UCW
+.PHONY: install-perl-ucw
--- /dev/null
+Makefile
+Makefile.PL
+MANIFEST
+Ulimit.pm
+Ulimit.xs
+lib/Sherlock/.exists
--- /dev/null
+# Makefile for the Ulimit Perl module (c) 2003 Tomas Valla <tom@ucw.cz>
+
+DIRS+=ucw/perl/Ulimit/arch/auto/UCW/Ulimit
+ULIMIT_DIR=ucw/perl/Ulimit
+
+PROGS+=$(o)/ucw/perl/Ulimit/Ulimit.pm
+
+extras:: $(o)/ucw/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT)
+
+$(o)/ucw/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
+
+$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
+
+$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs
+ $(Q)cp $^ $@
+
+$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL
+ $(Q)cp $^ $@
+
+INSTALL_TARGETS+=install-perl-ucw-ulimit
+install-perl-ucw-ulimit:
+ install -d -m 755 $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/ $(DESTDIR)$(INSTALL_LIB_DIR)
+ install -m 644 $(s)/$(ULIMIT_DIR)/Ulimit.pm $(DESTDIR)$(INSTALL_PERL_DIR)/UCW/
+ install -m 644 $(o)/ucw/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT) $(DESTDIR)$(INSTALL_LIB_DIR)
+.PHONY: install-perl-ucw-ulimit
--- /dev/null
+# Makefile for Perl MakeMaker (c) 2003 Tomas Valla <tom@ucw.cz>
+
+use ExtUtils::MakeMaker;
+WriteMakefile(
+ 'NAME' => 'UCW::Ulimit',
+ 'VERSION_FROM' => 'Ulimit.pm',
+ 'INST_LIB' => 'lib',
+ 'INST_ARCHLIB' => 'arch',
+);
--- /dev/null
+# Perl module for setting process limits
+#
+# (c) 2003 Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+#
+#
+#
+# Interface:
+# UCW::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
+# UCW::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
+#
+# setlimit sets limit to values supplied in softlimit and hardlimit
+# getlimit reads limits into softlimit and hardlimit
+# $resource constants are defined below
+#
+
+package UCW::Ulimit;
+
+use 5.006;
+use strict;
+use warnings;
+
+require DynaLoader;
+
+our @ISA = qw(DynaLoader);
+unshift @DynaLoader::dl_library_path, "lib";
+
+our $CPU = 0;
+our $FSIZE = 1;
+our $DATA = 2;
+our $STACK = 3;
+our $CORE = 4;
+our $RSS = 5;
+our $NPROC = 6;
+our $NOFILE = 7;
+our $MEMLOCK = 8;
+our $AS = 9;
+
+our $VERSION = '0.01';
+
+bootstrap UCW::Ulimit $VERSION;
+
+# Preloaded methods go here.
+
+1;
+__END__
--- /dev/null
+/*
+ * PerlXS module for managing process limits
+ *
+ * (c) 2003 Tomas Valla <tom@ucw.cz>
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include <sys/resource.h>
+#include <unistd.h>
+
+
+MODULE = UCW::Ulimit PACKAGE = UCW::Ulimit
+
+PROTOTYPES: ENABLED
+
+int
+setlimit(IN int resource, IN int soft, IN int hard)
+CODE:
+ struct rlimit rl;
+ int r;
+
+ switch(resource) {
+ case 0:
+ r = RLIMIT_CPU; break;
+ case 1:
+ r = RLIMIT_FSIZE; break;
+ case 2:
+ r = RLIMIT_DATA; break;
+ case 3:
+ r = RLIMIT_STACK; break;
+ case 4:
+ r = RLIMIT_CORE; break;
+ case 5:
+ r = RLIMIT_RSS; break;
+ case 6:
+ r = RLIMIT_NPROC; break;
+ case 7:
+ r = RLIMIT_NOFILE; break;
+ case 8:
+ r = RLIMIT_MEMLOCK; break;
+ case 9:
+ r = RLIMIT_AS; break;
+ }
+ rl.rlim_cur = soft;
+ rl.rlim_max = hard;
+ RETVAL = setrlimit(r, &rl);
+OUTPUT:
+ RETVAL
+
+
+int
+getlimit(IN int resource, OUT int soft, OUT int hard)
+CODE:
+ struct rlimit rl;
+ int r;
+
+ switch(resource) {
+ case 0:
+ r = RLIMIT_CPU; break;
+ case 1:
+ r = RLIMIT_FSIZE; break;
+ case 2:
+ r = RLIMIT_DATA; break;
+ case 3:
+ r = RLIMIT_STACK; break;
+ case 4:
+ r = RLIMIT_CORE; break;
+ case 5:
+ r = RLIMIT_RSS; break;
+ case 6:
+ r = RLIMIT_NPROC; break;
+ case 7:
+ r = RLIMIT_NOFILE; break;
+ case 8:
+ r = RLIMIT_MEMLOCK; break;
+ case 9:
+ r = RLIMIT_AS; break;
+ }
+
+ RETVAL = getrlimit(r, &rl);
+ soft = rl.rlim_cur;
+ hard = rl.rlim_max;
+OUTPUT:
+ RETVAL
--- /dev/null
+/*
+ * UCW Library -- Prefetch
+ *
+ * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PREFETCH_H
+#define _UCW_PREFETCH_H
+
+#if defined(__k6)
+ /* K6 doesn't have prefetches */
+
+#elif defined(__athlon) || defined(__k8) || \
+ defined(__i686) || \
+ defined(__pentium4) || defined(__prescott) || defined(__nocona)
+
+#define HAVE_PREFETCH
+static inline void prefetch(void *addr)
+{
+ asm volatile ("prefetcht0 %0" : : "m" (*(byte*)addr));
+}
+
+#else
+#warning "Don't know how to prefetch on your CPU. Please fix ucw/prefetch.h."
+#endif
+
+#ifndef HAVE_PREFETCH
+static inline void prefetch(void *addr UNUSED)
+{
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Prime Number Tests
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/prime.h"
+
+static int /* Sequential search */
+__isprime(uns x) /* We know x != 2 && x != 3 */
+{
+ uns test = 5;
+
+ if (x == 5)
+ return 1;
+ for(;;)
+ {
+ if (!(x % test))
+ return 0;
+ if (x / test <= test)
+ return 1;
+ test += 2; /* 6k+1 */
+ if (!(x % test))
+ return 0;
+ if (x / test <= test)
+ return 1;
+ test += 4; /* 6k-1 */
+ }
+}
+
+int
+isprime(uns x)
+{
+ if (x < 5)
+ return (x == 2 || x == 3);
+ switch (x % 6)
+ {
+ case 1:
+ case 5:
+ return __isprime(x);
+ default:
+ return 0;
+ }
+}
+
+uns
+nextprime(uns x) /* Returns some prime greater than x */
+{
+ x += 5 - (x % 6); /* x is 6k-1 */
+ for(;;)
+ {
+ x += 2; /* 6k+1 */
+ if (__isprime(x))
+ return x;
+ x += 4; /* 6k-1 */
+ if (__isprime(x))
+ return x;
+ }
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+ uns k = atol(argv[1]);
+ printf("%d is%s prime\n", k, isprime(k) ? "" : "n't");
+ printf("Next prime is %d\n", nextprime(k));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- Prime numbers
+ *
+ * (c) 2008 Michal Vaner <vorner@ucw.cz>
+ *
+ * Code taken from ucw/lib.h by:
+ *
+ * (c) 1997--2008 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_PRIME_H
+#define _UCW_PRIME_H
+
+#include "ucw/lib.h"
+
+/* prime.c */
+
+/**
+ * Return a non-zero value iff @x is a prime number.
+ * The time complexity is `O(sqrt(x))`.
+ **/
+int isprime(uns x);
+
+/**
+ * Return some prime greater than @x. The function does not checks overflows, but it should
+ * be safe at least for @x lower than `1U << 31`.
+ * If the Cramer's conjecture is true, it should have complexity `O(sqrt(x) * log(x)^2)`.
+ **/
+uns nextprime(uns x);
+
+/* primetable.c */
+
+/**
+ * Quickly lookup a precomputed table to return a prime number greater than @x.
+ * Returns zero if there is no such prime (we guarantee the existance of at
+ * least one prime greater than `1U << 31` in the table).
+ **/
+uns next_table_prime(uns x);
+
+/**
+ * Quickly lookup a precomputed table to return a prime number smaller than @x.
+ * Returns zero if @x is smaller than `7`.
+ **/
+uns prev_table_prime(uns x);
+
+#endif // _UCW_PRIME_H
--- /dev/null
+/*
+ * UCW Library -- Prime Number Table
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/prime.h"
+#include "ucw/binsearch.h"
+
+/* A table of odd primes, each is about 1.2 times the previous one */
+static uns prime_table[] = {
+ 3,
+ 7,
+ 13,
+ 19,
+ 29,
+ 37,
+ 53,
+ 67,
+ 89,
+ 109,
+ 137,
+ 173,
+ 211,
+ 263,
+ 331,
+ 409,
+ 499,
+ 601,
+ 727,
+ 877,
+ 1061,
+ 1279,
+ 1543,
+ 1861,
+ 2239,
+ 2689,
+ 3229,
+ 3877,
+ 4657,
+ 5623,
+ 6761,
+ 8123,
+ 9767,
+ 11731,
+ 14083,
+ 16903,
+ 20287,
+ 24359,
+ 29243,
+ 35099,
+ 42131,
+ 50581,
+ 60703,
+ 72859,
+ 87433,
+ 104933,
+ 125927,
+ 151121,
+ 181361,
+ 217643,
+ 261223,
+ 313471,
+ 376171,
+ 451411,
+ 541699,
+ 650059,
+ 780119,
+ 936151,
+ 1123391,
+ 1348111,
+ 1617739,
+ 1941293,
+ 2329559,
+ 2795477,
+ 3354581,
+ 4025507,
+ 4830619,
+ 5796797,
+ 6956203,
+ 8347483,
+ 10017011,
+ 12020431,
+ 14424539,
+ 17309471,
+ 20771371,
+ 24925661,
+ 29910821,
+ 35892991,
+ 43071601,
+ 51685939,
+ 62023139,
+ 74427803,
+ 89313379,
+ 107176057,
+ 128611313,
+ 154333591,
+ 185200339,
+ 222240413,
+ 266688509,
+ 320026249,
+ 384031507,
+ 460837813,
+ 553005391,
+ 663606499,
+ 796327811,
+ 955593439,
+ 1146712139,
+ 1376054569,
+ 1651265507,
+ 1981518631,
+ 2377822387,
+ 2853386881,
+ 3424064269,
+ 4108877153,
+ 4294967291
+};
+
+#define NPRIMES ARRAY_SIZE(prime_table)
+
+uns
+next_table_prime(uns x)
+{
+ if (x >= prime_table[NPRIMES-1])
+ return 0;
+ else
+ return prime_table[BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x+1)];
+}
+
+uns
+prev_table_prime(uns x)
+{
+ int i = BIN_SEARCH_FIRST_GE(prime_table, NPRIMES, x);
+ return i ? prime_table[i-1] : 0;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+#if 0 /* Generate the table */
+ uns x = 3, xx;
+ do
+ {
+ printf(" %u,\n", x);
+ xx = x;
+ x = nextprime(1.2*x);
+ }
+ while (x > xx);
+#else
+ for (int i=1; i<=100; i++)
+ printf("%d\t%d\t%d\n", i, next_table_prime(i), prev_table_prime(i));
+ for (uns i=0xfffffff0; i; i++)
+ printf("%u\t%u\t%u\n", i, next_table_prime(i), prev_table_prime(i));
+ return 0;
+#endif
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Setting of Process Title
+ *
+ * (c) 2001--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+static char **spt_argv;
+static char *spt_start, *spt_end;
+
+void
+setproctitle_init(int argc, char **argv)
+{
+#ifdef CONFIG_LINUX
+ int i, len;
+ char **env, **oldenv, *t;
+
+ spt_argv = argv;
+
+ /* Create a backup copy of environment */
+ oldenv = __environ;
+ len = 0;
+ for (i=0; oldenv[i]; i++)
+ len += strlen(oldenv[i]) + 1;
+ __environ = env = xmalloc(sizeof(char *)*(i+1));
+ t = xmalloc(len);
+ for (i=0; oldenv[i]; i++)
+ {
+ env[i] = t;
+ len = strlen(oldenv[i]) + 1;
+ memcpy(t, oldenv[i], len);
+ t += len;
+ }
+ env[i] = NULL;
+
+ /* Scan for consecutive free space */
+ spt_start = spt_end = argv[0];
+ for (i=0; i<argc; i++)
+ if (!i || spt_end+1 == argv[i])
+ spt_end = argv[i] + strlen(argv[i]);
+ for (i=0; oldenv[i]; i++)
+ if (spt_end+1 == oldenv[i])
+ spt_end = oldenv[i] + strlen(oldenv[i]);
+#endif
+}
+
+void
+setproctitle(const char *msg, ...)
+{
+ va_list args;
+ byte buf[256];
+ int n;
+
+ va_start(args, msg);
+ if (spt_end > spt_start)
+ {
+ n = vsnprintf(buf, sizeof(buf), msg, args);
+ if (n >= (int) sizeof(buf) || n < 0)
+ sprintf(buf, "<too-long>");
+ n = spt_end - spt_start;
+ strncpy(spt_start, buf, n);
+ spt_start[n] = 0;
+ spt_argv[0] = spt_start;
+ spt_argv[1] = NULL;
+ }
+ va_end(args);
+}
+
+char *
+getproctitle(void)
+{
+ return (spt_start < spt_end) ? spt_start : NULL;
+}
--- /dev/null
+/*
+ * UCW Library -- Poor Man's Profiler
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/profile.h"
+
+#include <stdio.h>
+
+/* PROFILE_TOD */
+
+#include <sys/time.h>
+
+void
+prof_tod_init(struct prof_tod *c)
+{
+ c->sec = c->usec = 0;
+}
+
+void
+prof_tod_switch(struct prof_tod *o, struct prof_tod *n)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ if (n)
+ {
+ n->start_sec = tv.tv_sec;
+ n->start_usec = tv.tv_usec;
+ }
+ if (o)
+ {
+ o->sec += tv.tv_sec - o->start_sec;
+ o->usec += tv.tv_usec - o->start_usec;
+ if (o->usec < 0)
+ {
+ o->usec += 1000000;
+ o->sec--;
+ }
+ else while (o->usec >= 1000000)
+ {
+ o->usec -= 1000000;
+ o->sec++;
+ }
+ }
+}
+
+int
+prof_tod_format(char *buf, struct prof_tod *c)
+{
+ return sprintf(buf, "%d.%06d", c->sec, c->usec);
+}
+
+/* PROFILE_TSC */
+
+#ifdef CPU_I386
+
+void
+prof_tsc_init(struct prof_tsc *c)
+{
+ c->ticks = 0;
+}
+
+int
+prof_tsc_format(char *buf, struct prof_tsc *c)
+{
+ return sprintf(buf, "%lld", c->ticks);
+}
+
+#endif
+
+/* PROFILE_KTSC */
+
+#ifdef CONFIG_LINUX
+
+#include <fcntl.h>
+#include <unistd.h>
+static int self_prof_fd = -1;
+
+void
+prof_ktsc_init(struct prof_ktsc *c)
+{
+ if (self_prof_fd < 0)
+ {
+ self_prof_fd = open("/proc/self/profile", O_RDONLY, 0);
+ if (self_prof_fd < 0)
+ die("Unable to open /proc/self/profile: %m");
+ }
+ c->ticks_user = 0;
+ c->ticks_sys = 0;
+}
+
+void
+prof_ktsc_switch(struct prof_ktsc *o, struct prof_ktsc *n)
+{
+ unsigned long long u, s;
+ byte buf[256];
+
+ int l = pread(self_prof_fd, buf, sizeof(buf)-1, 0);
+ ASSERT(l > 0 && l < (int)sizeof(buf)-1);
+ buf[l] = 0;
+ l = sscanf(buf, "%lld%lld", &u, &s);
+ ASSERT(l == 2);
+
+ if (n)
+ {
+ n->start_user = u;
+ n->start_sys = s;
+ }
+ if (o)
+ {
+ u -= o->start_user;
+ o->ticks_user += u;
+ s -= o->start_sys;
+ o->ticks_sys += s;
+ }
+}
+
+int
+prof_ktsc_format(char *buf, struct prof_ktsc *c)
+{
+ return sprintf(buf, "%lld+%lld", (long long) c->ticks_user, (long long) c->ticks_sys);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Poor Man's Profiler
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * Usage:
+ * #define PROFILE_xxx
+ * #include "ucw/profile.h"
+ * prof_t cnt;
+ * prof_init(&cnt);
+ * ...
+ * prof_start(&cnt);
+ * ...
+ * prof_stop(&cnt);
+ * printf("%s\n", PROF_STR(cnt));
+ */
+
+#ifndef _UCW_PROFILE_H
+#define _UCW_PROFILE_H
+
+/* PROFILE_TOD: gettimeofday() profiler */
+
+struct prof_tod {
+ u32 start_sec, start_usec;
+ s32 sec, usec;
+};
+
+void prof_tod_init(struct prof_tod *);
+void prof_tod_switch(struct prof_tod *, struct prof_tod *);
+int prof_tod_format(char *, struct prof_tod *);
+
+/* PROFILE_TSC: i386 TSC profiler */
+
+#ifdef CPU_I386
+
+struct prof_tsc {
+ u64 start_tsc;
+ u64 ticks;
+};
+
+void prof_tsc_init(struct prof_tsc *);
+int prof_tsc_format(char *, struct prof_tsc *);
+
+#endif
+
+/* PROFILE_KTSC: Linux kernel TSC profiler */
+
+#ifdef CONFIG_LINUX
+
+struct prof_ktsc {
+ u64 start_user, start_sys;
+ u64 ticks_user, ticks_sys;
+};
+
+void prof_ktsc_init(struct prof_ktsc *);
+void prof_ktsc_switch(struct prof_ktsc *, struct prof_ktsc *);
+int prof_ktsc_format(char *, struct prof_ktsc *);
+
+#endif
+
+/* Select the right profiler */
+
+#if defined(PROFILE_TOD)
+
+#define PROFILER
+#define PROF_STR_SIZE 21
+typedef struct prof_tod prof_t;
+#define prof_init prof_tod_init
+#define prof_switch prof_tod_switch
+#define prof_format prof_tod_format
+
+#elif defined(PROFILE_TSC)
+
+#define PROFILER
+#define PROFILER_INLINE
+#define PROF_STR_SIZE 24
+
+typedef struct prof_tsc prof_t;
+#define prof_init prof_tsc_init
+#define prof_format prof_tsc_format
+
+#define rdtscll(val) __asm__ __volatile__("rdtsc" : "=A" (val))
+
+static inline void prof_start(prof_t *c)
+{
+ rdtscll(c->start_tsc);
+}
+
+static inline void prof_stop(prof_t *c)
+{
+ u64 tsc;
+ rdtscll(tsc);
+ tsc -= c->start_tsc;
+ c->ticks += tsc;
+}
+
+static inline void prof_switch(prof_t *o, prof_t *n)
+{
+ u64 tsc;
+ rdtscll(tsc);
+ n->start_tsc = tsc;
+ tsc -= o->start_tsc;
+ o->ticks += tsc;
+}
+
+#elif defined(PROFILE_KTSC)
+
+#define PROFILER
+#define PROF_STR_SIZE 50
+typedef struct prof_ktsc prof_t;
+#define prof_init prof_ktsc_init
+#define prof_switch prof_ktsc_switch
+#define prof_format prof_ktsc_format
+
+#endif
+
+#ifdef PROFILER
+
+/* Stuff common for all profilers */
+#ifndef PROFILER_INLINE
+static inline void prof_start(prof_t *c) { prof_switch(NULL, c); }
+static inline void prof_stop(prof_t *c) { prof_switch(c, NULL); }
+#endif
+#define PROF_STR(C) ({ static char _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
+
+#else
+
+/* Dummy profiler with no output */
+typedef struct { } prof_t;
+static inline void prof_init(prof_t *c UNUSED) { }
+static inline void prof_start(prof_t *c UNUSED) { }
+static inline void prof_stop(prof_t *c UNUSED) { }
+static inline void prof_switch(prof_t *c UNUSED, prof_t *d UNUSED) { }
+static inline void prof_format(char *b, prof_t *c UNUSED) { b[0]='?'; b[1]=0; }
+#define PROF_STR_SIZE 2
+#define PROF_STR(C) "?"
+
+#endif
+
+#endif
--- /dev/null
+/*
+ * Simple and Quick Shared Memory Cache
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/bitops.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-binary.h"
+#include "ucw/qache.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/*
+ * The cache lives in a mmapped file of the following format:
+ * qache_header
+ * qache_entry[max_entries] table of entries and their keys
+ * u32 qache_hash[hash_size] hash table pointing to keys
+ * u32 block_next[num_blocks] next block pointers
+ * padding to a multiple of block size
+ * blocks[] data blocks
+ */
+
+struct qache_header {
+ u32 magic; /* QCACHE_MAGIC */
+ u32 block_size; /* Parameters as in qache_params */
+ u32 block_shift; /* block_size = 1 << block_shift */
+ u32 num_blocks;
+ u32 format_id;
+ u32 entry_table_start; /* Array of qache_entry's */
+ u32 max_entries;
+ u32 hash_table_start; /* Hash table containing all keys */
+ u32 hash_size;
+ u32 next_table_start; /* Array of next pointers */
+ u32 first_data_block;
+};
+
+#define QACHE_MAGIC 0xb79f6d12
+
+struct qache_entry {
+ u32 lru_prev, lru_next; /* Entry #0: head of the cyclic LRU list */
+ u32 data_len; /* Entry #0: number of free blocks, Free entries: ~0U */
+ u32 first_data_block; /* Entry #0: first free block */
+ qache_key_t key;
+ u32 hash_next; /* Entry #0: first free entry, Free entries: next free */
+};
+
+struct qache {
+ struct qache_header *hdr;
+ struct qache_entry *entry_table;
+ u32 *hash_table;
+ u32 *next_table;
+ int fd;
+ byte *mmap_data;
+ uns file_size;
+ char *file_name;
+ uns locked;
+};
+
+#define first_free_entry entry_table[0].hash_next
+#define first_free_block entry_table[0].first_data_block
+#define num_free_blocks entry_table[0].data_len
+
+static inline char *
+format_key(qache_key_t *key)
+{
+ static char keybuf[2*sizeof(qache_key_t)+1];
+ for (uns i=0; i<sizeof(qache_key_t); i++)
+ sprintf(keybuf+2*i, "%02x", (*key)[i]);
+ return keybuf;
+}
+
+static void
+qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
+{
+#ifndef CONFIG_LINUX
+ /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
+ len += (start % CPU_PAGE_SIZE);
+ start -= start % CPU_PAGE_SIZE;
+ len = ALIGN_TO(len, CPU_PAGE_SIZE);
+ if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
+ msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
+#endif
+}
+
+static void
+qache_msync_block(struct qache *q, uns blk)
+{
+ DBG("\tSyncing block %d", blk);
+ qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
+}
+
+static void
+qache_lock(struct qache *q)
+{
+ /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
+ ASSERT(!q->locked);
+ struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+ if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+ die("fcntl lock on %s: %m", q->file_name);
+ q->locked = 1;
+ DBG("Locked cache %s", q->file_name);
+}
+
+static void
+qache_unlock(struct qache *q, uns dirty)
+{
+ ASSERT(q->locked);
+ if (dirty) /* Sync header, entry table and hash table */
+ qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
+ struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
+ if (fcntl(q->fd, F_SETLKW, &fl) < 0)
+ die("fcntl unlock on %s: %m", q->file_name);
+ q->locked = 0;
+ DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
+}
+
+enum entry_audit_flags {
+ ET_FREE_LIST = 1,
+ ET_LRU = 2,
+ ET_HASH = 4
+};
+
+static char *
+audit_entries(struct qache *q, byte *entrymap)
+{
+ uns i, j;
+
+ DBG("Auditing entries");
+
+ /* Check the free list */
+ i = q->first_free_entry;
+ while (i)
+ {
+ if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
+ return "inconsistent free entry list";
+ entrymap[i] |= ET_FREE_LIST;
+ i = q->entry_table[i].hash_next;
+ }
+
+ /* Check the hash table */
+ for (i=0; i<q->hdr->hash_size; i++)
+ {
+ j = q->hash_table[i];
+ while (j)
+ {
+ if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
+ return "inconsistent hash chains";
+ entrymap[j] |= ET_HASH;
+ j = q->entry_table[j].hash_next;
+ }
+ }
+
+ /* Check the LRU */
+ i = 0;
+ do
+ {
+ j = q->entry_table[i].lru_next;
+ if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
+ return "inconsistent LRU list";
+ entrymap[i] |= ET_LRU;
+ i = j;
+ }
+ while (i);
+
+ /* Check if all non-free items are in all lists */
+ for (i=1; i<q->hdr->max_entries; i++)
+ {
+ if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
+ return "inconsistent lists";
+ }
+ return NULL;
+}
+
+enum block_audit_flags {
+ BT_FREE_LIST = 1,
+ BT_ALLOC = 2
+};
+
+static char *
+audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
+{
+ uns i, j;
+
+ DBG("Auditing blocks");
+
+ /* Check the free list */
+ for (i=q->first_free_block; i; i=q->next_table[i])
+ {
+ if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
+ return "inconsistent free block list";
+ blockmap[i] |= BT_FREE_LIST;
+ }
+
+ /* Check allocation lists of entries */
+ for (i=1; i<q->hdr->max_entries; i++)
+ if (!(entrymap[i] & ET_FREE_LIST))
+ {
+ uns blocks = 0;
+ for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
+ {
+ if (blockmap[j])
+ return "inconsistent entry block list";
+ blockmap[j] |= BT_ALLOC;
+ blocks++;
+ }
+ if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
+ return "inconsistent entry data length";
+ }
+
+ /* Check if all blocks belong somewhere */
+ for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
+ if (!blockmap[i])
+ {
+ DBG("Block %d unreferenced", i);
+ return "unreferenced blocks found";
+ }
+
+ return NULL;
+}
+
+static char *
+do_audit(struct qache *q)
+{
+ byte *entry_map = xmalloc_zero(q->hdr->max_entries);
+ byte *block_map = xmalloc_zero(q->hdr->num_blocks);
+ byte *err = audit_entries(q, entry_map);
+ if (!err)
+ err = audit_blocks(q, entry_map, block_map);
+ xfree(block_map);
+ xfree(entry_map);
+ return err;
+}
+
+static void
+qache_setup_pointers(struct qache *q)
+{
+ q->hdr = (struct qache_header *) q->mmap_data;
+ q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
+ q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
+ q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
+}
+
+static int
+qache_open_existing(struct qache *q, struct qache_params *par)
+{
+ if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
+ return 0;
+
+ struct stat st;
+ char *err = "stat failed";
+ if (fstat(q->fd, &st) < 0)
+ goto close_and_fail;
+
+ err = "invalid file size";
+ if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
+ goto close_and_fail;
+ q->file_size = st.st_size;
+
+ err = "requested size change";
+ if (q->file_size != par->cache_size)
+ goto close_and_fail;
+
+ err = "cannot mmap";
+ if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+ goto close_and_fail;
+ struct qache_header *h = (struct qache_header *) q->mmap_data;
+
+ qache_setup_pointers(q);
+ qache_lock(q);
+
+ err = "incompatible format";
+ if (h->magic != QACHE_MAGIC ||
+ h->block_size != par->block_size ||
+ h->max_entries != par->max_entries ||
+ h->format_id != par->format_id)
+ goto unlock_and_fail;
+
+ err = "incomplete file";
+ if (h->num_blocks*h->block_size != q->file_size)
+ goto unlock_and_fail;
+
+ if (err = do_audit(q))
+ goto unlock_and_fail;
+
+ qache_unlock(q, 0);
+ msg(L_INFO, "Cache %s: using existing data", q->file_name);
+ return 1;
+
+ unlock_and_fail:
+ qache_unlock(q, 0);
+ munmap(q->mmap_data, q->file_size);
+ close_and_fail:
+ msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
+ close(q->fd);
+ return 0;
+}
+
+static void
+qache_create(struct qache *q, struct qache_params *par)
+{
+ q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (q->fd < 0)
+ die("Cache %s: unable to create (%m)", q->file_name);
+ struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
+
+ struct qache_header h;
+ bzero(&h, sizeof(h));
+ h.magic = QACHE_MAGIC;
+ h.block_size = par->block_size;
+ h.block_shift = bit_fls(h.block_size);
+ h.num_blocks = par->cache_size >> h.block_shift;
+ h.format_id = par->format_id;
+ h.entry_table_start = sizeof(h);
+ h.max_entries = par->max_entries;
+ h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
+ h.hash_size = 1;
+ while (h.hash_size < h.max_entries)
+ h.hash_size *= 2;
+ h.next_table_start = h.hash_table_start + h.hash_size * 4;
+ h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
+ if (h.first_data_block >= h.num_blocks)
+ die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
+ bwrite(fb, &h, sizeof(h));
+
+ /* Entry #0: heads of all lists */
+ ASSERT(btell(fb) == (ucw_off_t)h.entry_table_start);
+ struct qache_entry ent;
+ bzero(&ent, sizeof(ent));
+ ent.first_data_block = h.first_data_block;
+ ent.data_len = h.num_blocks - h.first_data_block;
+ ent.hash_next = 1;
+ bwrite(fb, &ent, sizeof(ent));
+
+ /* Other entries */
+ bzero(&ent, sizeof(ent));
+ ent.data_len = ~0U;
+ for (uns i=1; i<h.max_entries; i++)
+ {
+ ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
+ bwrite(fb, &ent, sizeof(ent));
+ }
+
+ /* The hash table */
+ ASSERT(btell(fb) == (ucw_off_t)h.hash_table_start);
+ for (uns i=0; i<h.hash_size; i++)
+ bputl(fb, 0);
+
+ /* The next pointers */
+ ASSERT(btell(fb) == (ucw_off_t)h.next_table_start);
+ for (uns i=0; i<h.num_blocks; i++)
+ bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
+
+ /* Padding */
+ ASSERT(btell(fb) <= (ucw_off_t)(h.first_data_block << h.block_shift));
+ while (btell(fb) < (ucw_off_t)(h.first_data_block << h.block_shift))
+ bputc(fb, 0);
+
+ /* Data blocks */
+ for (uns i=h.first_data_block; i<h.num_blocks; i++)
+ for (uns j=0; j<h.block_size; j+=4)
+ bputl(fb, 0);
+
+ ASSERT(btell(fb) == (ucw_off_t)par->cache_size);
+ bclose(fb);
+ msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
+
+ if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
+ die("Cache %s: mmap failed (%m)", par->file_name);
+ q->file_size = par->cache_size;
+ qache_setup_pointers(q);
+}
+
+struct qache *
+qache_open(struct qache_params *par)
+{
+ struct qache *q = xmalloc_zero(sizeof(*q));
+ q->file_name = xstrdup(par->file_name);
+
+ ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
+ par->cache_size = ALIGN_TO(par->cache_size, par->block_size);
+
+ if (par->force_reset <= 0 && qache_open_existing(q, par))
+ ;
+ else if (par->force_reset < 0)
+ die("Cache %s: read-only access requested, but no data available", q->file_name);
+ else
+ qache_create(q, par);
+ return q;
+}
+
+void
+qache_close(struct qache *q, uns retain_data)
+{
+ munmap(q->mmap_data, q->file_size);
+ close(q->fd);
+ if (!retain_data && unlink(q->file_name) < 0)
+ msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
+ xfree(q->file_name);
+ xfree(q);
+}
+
+static uns
+qache_hash(struct qache *q, qache_key_t *key)
+{
+ uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
+ return h % q->hdr->hash_size;
+}
+
+static uns
+qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+ ASSERT(q->locked);
+
+ if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
+ return pos_hint;
+
+ uns h = qache_hash(q, key);
+ for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
+ if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
+ return e;
+ return 0;
+}
+
+static void
+qache_hash_insert(struct qache *q, uns e)
+{
+ uns h = qache_hash(q, &q->entry_table[e].key);
+ q->entry_table[e].hash_next = q->hash_table[h];
+ q->hash_table[h] = e;
+}
+
+static void
+qache_hash_remove(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ uns f, *hh;
+ for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
+ if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
+ {
+ *hh = entry->hash_next;
+ return;
+ }
+ ASSERT(0);
+}
+
+static uns
+qache_alloc_entry(struct qache *q)
+{
+ uns e = q->first_free_entry;
+ ASSERT(q->locked && e);
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(entry->data_len == ~0U);
+ q->first_free_entry = entry->hash_next;
+ entry->data_len = 0;
+ return e;
+}
+
+static void
+qache_free_entry(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(q->locked && entry->data_len != ~0U);
+ entry->data_len = ~0U;
+ entry->hash_next = q->first_free_entry;
+ q->first_free_entry = e;
+}
+
+static inline void *
+get_block_start(struct qache *q, uns block)
+{
+ ASSERT(block && block < q->hdr->num_blocks);
+ return q->mmap_data + (block << q->hdr->block_shift);
+}
+
+static uns
+qache_alloc_block(struct qache *q)
+{
+ ASSERT(q->locked && q->num_free_blocks);
+ uns blk = q->first_free_block;
+ q->first_free_block = q->next_table[blk];
+ q->num_free_blocks--;
+ DBG("\tAllocated block %d", blk);
+ return blk;
+}
+
+static void
+qache_free_block(struct qache *q, uns blk)
+{
+ ASSERT(q->locked);
+ q->next_table[blk] = q->first_free_block;
+ q->first_free_block = blk;
+ q->num_free_blocks++;
+ DBG("\tFreed block %d", blk);
+}
+
+static void
+qache_lru_insert(struct qache *q, uns e)
+{
+ struct qache_entry *head = &q->entry_table[0];
+ struct qache_entry *entry = &q->entry_table[e];
+ ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
+ uns succe = head->lru_next;
+ struct qache_entry *succ = &q->entry_table[succe];
+ head->lru_next = e;
+ entry->lru_prev = 0;
+ entry->lru_next = succe;
+ succ->lru_prev = e;
+}
+
+static void
+qache_lru_remove(struct qache *q, uns e)
+{
+ ASSERT(q->locked);
+ struct qache_entry *entry = &q->entry_table[e];
+ q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
+ q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
+ entry->lru_prev = entry->lru_next = 0;
+}
+
+static uns
+qache_lru_get(struct qache *q)
+{
+ return q->entry_table[0].lru_prev;
+}
+
+static void
+qache_ll_delete(struct qache *q, uns e)
+{
+ struct qache_entry *entry = &q->entry_table[e];
+ uns blk = entry->first_data_block;
+ while (entry->data_len)
+ {
+ uns next = q->next_table[blk];
+ qache_free_block(q, blk);
+ blk = next;
+ if (entry->data_len >= q->hdr->block_size)
+ entry->data_len -= q->hdr->block_size;
+ else
+ entry->data_len = 0;
+ }
+ qache_lru_remove(q, e);
+ qache_hash_remove(q, e);
+ qache_free_entry(q, e);
+}
+
+uns
+qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
+{
+ qache_lock(q);
+
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ qache_ll_delete(q ,e);
+ DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
+ }
+
+ uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
+ if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
+ {
+ qache_unlock(q, 0);
+ return 0;
+ }
+ while (q->num_free_blocks < blocks || !q->first_free_entry)
+ {
+ e = qache_lru_get(q);
+ DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
+ ASSERT(e);
+ qache_ll_delete(q, e);
+ }
+ e = qache_alloc_entry(q);
+ struct qache_entry *entry = &q->entry_table[e];
+ entry->data_len = size;
+ memcpy(entry->key, key, sizeof(*key));
+ DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
+
+ entry->first_data_block = 0;
+ while (size)
+ {
+ uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
+ uns blk = qache_alloc_block(q);
+ q->next_table[blk] = entry->first_data_block;
+ memcpy(get_block_start(q, blk), data+size-chunk, chunk);
+ qache_msync_block(q, blk);
+ entry->first_data_block = blk;
+ size -= chunk;
+ }
+
+ qache_lru_insert(q, e);
+ qache_hash_insert(q, e);
+ qache_unlock(q, 1);
+ return e;
+}
+
+static void
+copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
+{
+ if (sizep)
+ {
+ uns size = *sizep;
+ uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
+ uns xfer = MIN(size, avail);
+ *sizep = avail;
+ if (datap)
+ {
+ if (!*datap)
+ *datap = xmalloc(xfer);
+ uns blk = entry->first_data_block;
+ while (start >= q->hdr->block_size)
+ {
+ blk = q->next_table[blk];
+ start -= q->hdr->block_size;
+ }
+ byte *data = *datap;
+ while (xfer)
+ {
+ uns len = MIN(xfer, q->hdr->block_size - start);
+ memcpy(data, get_block_start(q, blk), len);
+ blk = q->next_table[blk];
+ data += len;
+ xfer -= len;
+ start = 0;
+ }
+ }
+ }
+ else
+ ASSERT(!datap);
+}
+
+uns
+qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
+{
+ qache_lock(q);
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ struct qache_entry *entry = &q->entry_table[e];
+ DBG("Lookup <%s>: found entry %d", format_key(key), e);
+ qache_lru_remove(q, e);
+ qache_lru_insert(q, e);
+ copy_out(q, entry, datap, sizep, start);
+ qache_unlock(q, 1); /* Yes, modified -- we update the LRU */
+ }
+ else
+ {
+ DBG("Lookup <%s>: not found", format_key(key));
+ qache_unlock(q, 0);
+ }
+ return e;
+}
+
+uns
+qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
+{
+ if (!pos || pos >= q->hdr->max_entries)
+ {
+ DBG("Probe %d: Out of range", pos);
+ return ~0U;
+ }
+
+ qache_lock(q);
+ uns ret = 0;
+ struct qache_entry *entry = &q->entry_table[pos];
+ if (entry->data_len != ~0U)
+ {
+ DBG("Probe %d: Found key <%s>", format_key(entry->key));
+ if (key)
+ memcpy(key, entry->key, sizeof(qache_key_t));
+ copy_out(q, entry, datap, sizep, start);
+ ret = pos;
+ }
+ else
+ DBG("Probe %d: Empty", pos);
+ qache_unlock(q, 0);
+ return ret;
+}
+
+uns
+qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
+{
+ qache_lock(q);
+ uns e = qache_hash_find(q, key, pos_hint);
+ if (e)
+ {
+ DBG("Delete <%s: deleting entry %d", format_key(key), e);
+ qache_ll_delete(q, e);
+ }
+ else
+ DBG("Delete <%s>: No match", format_key(key));
+ qache_unlock(q, 1);
+ return e;
+}
+
+void
+qache_debug(struct qache *q)
+{
+ msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
+ q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
+ q->hdr->max_entries, q->hdr->hash_size);
+
+ msg(L_DEBUG, "Table of cache entries:");
+ msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
+ for (uns e=0; e<q->hdr->max_entries; e++)
+ {
+ struct qache_entry *ent = &q->entry_table[e];
+ msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
+ ent->first_data_block, ent->hash_next, format_key(&ent->key));
+ }
+
+ msg(L_DEBUG, "Hash table:");
+ for (uns h=0; h<q->hdr->hash_size; h++)
+ msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
+
+ msg(L_DEBUG, "Next pointers:");
+ for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
+ msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
+}
+
+void
+qache_audit(struct qache *q)
+{
+ char *err;
+ qache_lock(q);
+ if (err = do_audit(q))
+ die("Cache %s: %s", q->file_name, err);
+ qache_unlock(q, 0);
+}
+
+#ifdef TEST
+
+int main(int argc UNUSED, char **argv UNUSED)
+{
+ struct qache_params par = {
+ .file_name = "tmp/test",
+ .block_size = 256,
+ .cache_size = 65536,
+ .max_entries = 123,
+ .force_reset = 0,
+ .format_id = 0xfeedcafe
+ };
+ struct qache *q = qache_open(&par);
+
+ qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
+#define N 100
+ uns i, j;
+ byte data[11*N];
+ for (i=0; i<N; i++)
+ {
+ key[3] = i / 16; key[15] = i % 16;
+ for (j=0; j<11*i; j++)
+ data[j] = 0x33 + i*j;
+ qache_insert(q, &key, 0, data, 11*i);
+ }
+ qache_debug(q);
+ qache_audit(q);
+
+ uns found = 0;
+ for (i=0; i<100; i++)
+ {
+ key[3] = i / 16; key[15] = i % 16;
+ byte *dptr = data;
+ uns sz = sizeof(data);
+ uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
+ if (e)
+ {
+ ASSERT(sz == 11*i);
+ for (j=0; j<sz; j++)
+ ASSERT(data[j] == (byte)(0x33 + i*j));
+ found++;
+ }
+ }
+ msg(L_INFO, "Found %d of %d entries", found, N);
+
+ qache_close(q, 1);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * Simple and Quick Shared Memory Cache
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _UCW_QACHE_H
+#define _UCW_QACHE_H
+
+struct qache_params {
+ char *file_name;
+ uns block_size; /* Cache block size (a power of two) */
+ uns cache_size; /* Size of the whole cache */
+ uns max_entries; /* Maximum number of cached entries */
+ int force_reset; /* Force creation of a new cache even if the old one seems usable, -1 if reset should never be done */
+ uns format_id; /* Data format ID (old cache not used if formats differ) */
+};
+
+typedef byte qache_key_t[16];
+
+struct qache;
+
+/* Create and destroy a cache */
+struct qache *qache_open(struct qache_params *p);
+void qache_close(struct qache *q, uns retain_data);
+
+/* Insert new item to the cache with a given key and data. If pos_hint is non-zero, it serves
+ * as a hint about the position of the entry (if it's known that an entry with the particular key
+ * was located there a moment ago). Returns position of the new entry.
+ */
+uns qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size);
+
+/* Look up data in the cache, given a key and a position hint (as above). If datap is non-NULL, data
+ * from the cache entry are copied either to *datap (if *datap is NULL, new memory is allocated by
+ * calling xmalloc and *datap is set to point to that memory). The *sizep contains the maximum number
+ * of bytes to be copied (~0U if unlimited) and it is replaced by the number of bytes available (so it
+ * can be greater than the original value requested). The start indicates starting offset inside the
+ * entry's data.
+ */
+uns qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start);
+
+/* Inspect data in the cache (but don't modify LRU nor anything else), given a position.
+ * If key is non-NULL, it's filled with the cache key. The rest works as in qache_lookup.
+ * Returns 0 if the entry is empty, ~0 for position out of range, entry number otherwise.
+ */
+uns qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start);
+
+/* Delete data from the cache, given a key and a position hint. */
+uns qache_delete(struct qache *q, qache_key_t *key, uns pos_hint);
+
+/* Debugging dump (beware, doesn't lock the cache!) */
+void qache_debug(struct qache *q);
+
+/* Check consistency of the cache structure */
+void qache_audit(struct qache *q);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Unbiased Random Numbers
+ *
+ * (c) 1998--2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+
+/* We expect the random generator in libc to give at least 30 bits of randomness */
+COMPILE_ASSERT(RAND_MAX_RANGE_TEST, RAND_MAX >= (1 << 30)-1);
+
+uns
+random_u32(void)
+{
+ return (random() & 0xffff) | ((random() & 0xffff) << 16);
+}
+
+uns
+random_max(uns max)
+{
+ uns r, l;
+
+ ASSERT(max <= (1 << 30));
+ l = (RAND_MAX + 1U) - ((RAND_MAX + 1U) % max);
+ do
+ r = random();
+ while (r >= l);
+ return r % max;
+}
+
+u64
+random_u64(void)
+{
+ return
+ ((u64)(random() & 0xffff) << 48) |
+ ((u64)(random() & 0xffffff) << 24) |
+ (random() & 0xffffff);
+}
+
+u64
+random_max_u64(u64 max)
+{
+ if (max < (1 << 30))
+ return random_max(max);
+
+ u64 r, l, m;
+ m = 0xffffffffffffffff;
+ l = m - (m % max);
+ do
+ r = random_u64();
+ while (r >= l);
+ return r % max;
+}
--- /dev/null
+/*
+ * UCW Library -- Cryptographically Safe Random Key Generator
+ *
+ * (c) 2002 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+randomkey(byte *buf, uns size)
+{
+ int fd;
+
+ if ((fd = open("/dev/urandom", O_RDONLY, 0)) < 0)
+ die("Unable to open /dev/urandom: %m");
+ if (read(fd, buf, size) != (int) size)
+ die("Error reading /dev/urandom: %m");
+ close(fd);
+}
--- /dev/null
+/*
+ * UCW Library -- Memory Re-allocation
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+
+void *
+xrealloc(void *old, uns size)
+{
+ /* We assume that realloc(NULL, x) works like malloc(x), which is true with the glibc. */
+ void *x = realloc(old, size);
+ if (!x)
+ die("Cannot reallocate %d bytes of memory", size);
+ return x;
+}
--- /dev/null
+/*
+ * Test of red-black trees
+ *
+ * (c) 2002, Robert Spalek <robert@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+#include "ucw/fastbuf.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+struct my1_node
+{
+ int key;
+ int x;
+};
+
+static void my_dump_key(struct fastbuf *fb, struct my1_node *n)
+{
+ char tmp[20];
+ sprintf(tmp, "key=%d ", n->key);
+ bputs(fb, tmp);
+}
+
+static void my_dump_data(struct fastbuf *fb, struct my1_node *n)
+{
+ char tmp[20];
+ sprintf(tmp, "x=%d ", n->x);
+ bputs(fb, tmp);
+}
+
+#define TREE_NODE struct my1_node
+#define TREE_PREFIX(x) my_##x
+#define TREE_KEY_ATOMIC key
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_LOOKUP
+#define TREE_WANT_DELETE
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void my_check_order(struct fastbuf *fb, struct my_tree *t)
+{
+ int last_key = 0x80000000;
+ TREE_FOR_ALL(my, t, n)
+ {
+ ASSERT(n->key >= last_key);
+ last_key = n->key;
+ if (fb)
+ {
+ char tmp[30];
+ sprintf(tmp, "%d -> %d\n", n->key, n->x);
+ bputs(fb, tmp);
+ }
+ }
+ TREE_END_FOR;
+ if (fb)
+ bflush(fb);
+}
+
+struct my2_node
+{
+ char key[1];
+};
+
+static void my2_dump_key(struct fastbuf *fb, struct my2_node *n)
+{
+ bputs(fb, "key=");
+ bputs(fb, n->key);
+ bputc(fb, ' ');
+}
+
+static void my2_dump_data(struct fastbuf *fb UNUSED, struct my2_node *n UNUSED)
+{
+}
+
+#define TREE_NODE struct my2_node
+#define TREE_PREFIX(x) my2_##x
+#define TREE_KEY_ENDSTRING key
+#define TREE_NOCASE
+#define TREE_WANT_CLEANUP
+#define TREE_WANT_NEW
+#define TREE_WANT_SEARCH
+#define TREE_WANT_REMOVE
+#define TREE_WANT_FIND_NEXT
+#define TREE_WANT_ITERATOR
+#define TREE_WANT_DUMP
+#define TREE_STATIC
+#define TREE_CONSERVE_SPACE
+#include "redblack.h"
+
+static void random_string(char *txt, uns max_len)
+{
+ uns len = random() % max_len;
+ uns j;
+ for (j=0; j<len; j++)
+ txt[j] = random() % 96 + 32;
+ txt[len] = 0;
+}
+
+static char *options = CF_SHORT_OPTS "vn:a";
+
+static char *help = "\
+Usage: test1.bin <options>\n\
+Options:\n"
+CF_USAGE
+"-v\tSet verbose mode\n\
+-n num\tNumber of inserted nodes\n\
+-a\tProbe some ASSERTs\n\
+";
+
+static void NONRET
+usage(void)
+{
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int verbose = 0, number = 1000, asserts = 0;
+ int opt;
+ struct fastbuf *fb, *dump_fb;
+ struct my_tree t;
+ struct my2_tree t2;
+ int i;
+ cf_def_file = NULL;
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, options, CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'v':
+ verbose++;
+ break;
+ case 'n':
+ number = atoi(optarg);
+ break;
+ case 'a':
+ asserts++;
+ break;
+ default:
+ usage();
+ break;
+ }
+ if (optind < argc)
+ usage();
+ fb = bfdopen(1, 4096);
+ if (verbose > 1)
+ dump_fb = fb;
+ else
+ dump_fb = NULL;
+
+ my_init(&t);
+ for (i=0; i<number; i++)
+ my_lookup(&t, random() % 1000000)->x = i;
+ my_dump(dump_fb, &t);
+ my_check_order(dump_fb, &t);
+ if (asserts)
+ {
+ my_new(&t, 1);
+ my_new(&t, 1);
+ }
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Load test passed\n");
+
+ my_init(&t);
+ for (i=0; i<100; i++)
+ {
+ my_new(&t, i)->x = i;
+ my_dump(dump_fb, &t);
+ }
+ for (i=0; i<100; i++)
+ {
+ int a = i/10, b = i%10, j = a*10 + (b + a) % 10;
+ int res UNUSED = my_delete(&t, j);
+ ASSERT(res);
+ my_dump(dump_fb, &t);
+ }
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Sequential adding and deleting passed\n");
+
+ my_init(&t);
+ for (i=0; i<997; i++)
+ {
+ my_new(&t, i*238 % 997)->x = i;
+ my_dump(NULL, &t);
+ }
+ my_dump(dump_fb, &t);
+ i = 0;
+ TREE_FOR_ALL(my, &t, n)
+ {
+ ASSERT(n->key == i);
+ i++;
+ }
+ TREE_END_FOR;
+ ASSERT(i == 997);
+ for (i=0; i<997; i++)
+ {
+ int res UNUSED = my_delete(&t, i*111 % 997);
+ ASSERT(res);
+ my_dump(NULL, &t);
+ }
+ my_dump(dump_fb, &t);
+ my_cleanup(&t);
+ if (verbose > 0)
+ bputs(fb, "Complete tree passed\n");
+
+ my2_init(&t2);
+ for (i=0; i<number; i++)
+ {
+ char txt[30];
+ random_string(txt, 30);
+ my2_new(&t2, txt);
+ }
+ my2_dump(dump_fb, &t2);
+ TREE_FOR_ALL(my2, &t2, n)
+ {
+ my2_node *tmp;
+ int count = 0;
+ for (tmp=n; tmp; tmp = my2_find_next(tmp))
+ count++;
+ if (dump_fb)
+ {
+ char txt[20];
+ bputs(dump_fb, n->key);
+ sprintf(txt, ": %d\n", count);
+ bputs(dump_fb, txt);
+ }
+ }
+ TREE_END_FOR;
+ while (t2.count > 0)
+ {
+ char txt[30];
+ my2_node *n;
+ random_string(txt, 30);
+ n = my2_search(&t2, txt);
+ ASSERT(n);
+ my2_remove(&t2, n);
+ }
+ my2_dump(dump_fb, &t2);
+ my2_cleanup(&t2);
+ if (verbose > 0)
+ bputs(fb, "String test passed\n");
+
+ bclose(fb);
+ return 0;
+}
--- /dev/null
+# Test for the redblack module
+
+Run: ../obj/ucw/redblack-test
--- /dev/null
+/*
+ * UCW Library -- Red-black trees
+ *
+ * (c) 2002--2005, Robert Spalek <robert@ucw.cz>
+ *
+ * Skeleton based on hash-tables by:
+ *
+ * (c) 2002, Martin Mares <mj@ucw.cz>
+ *
+ */
+
+/*
+ * Data structure description:
+ *
+ * A red-black tree is a binary search tree, where records are stored
+ * in nodes (may be also leaves). Every node has a colour. The
+ * following restrictions hold:
+ *
+ * - a parent of a red node is black
+ * - every path from the root to a node with less than 2 children
+ * contains the same number of black nodes
+ *
+ * A usual interpretation is, that leaves are intervals between records
+ * and contain no data. Every leaf is black. This is equivalent, but
+ * saves the space.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of red-black trees.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates a tree structure with the parameters
+ * given.
+ *
+ * You need to specify:
+ *
+ * TREE_NODE data type where a node dwells (usually a struct).
+ * TREE_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the tree generator).
+ *
+ * Then decide on type of keys:
+ *
+ * TREE_KEY_ATOMIC=f use node->f as a key of an atomic type (i.e.,
+ * a type which can be compared using '>', `==', and '<')
+ * & TREE_ATOMIC_TYPE (defaults to int).
+ * | TREE_KEY_STRING=f use node->f as a string key, allocated
+ * separately from the rest of the node.
+ * | TREE_KEY_ENDSTRING=f use node->f as a string key, allocated
+ * automatically at the end of the node struct
+ * (to be declared as "char f[1]" at the end).
+ * | TREE_KEY_COMPLEX use a multi-component key; as the name suggests,
+ * the passing of parameters is a bit complex then.
+ * The TREE_KEY_COMPLEX(x) macro should expand to
+ * `x k1, x k2, ... x kn' and you should also define:
+ * & TREE_KEY_DECL declaration of function parameters in which key
+ * should be passed to all tree operations.
+ * That is, `type1 k1, type2 k2, ... typen kn'.
+ * With complex keys, TREE_GIVE_CMP is mandatory.
+ *
+ * Then specify what operations you request (all names are automatically
+ * prefixed by calling TREE_PREFIX):
+ *
+ * <always defined> init() -- initialize the tree.
+ * TREE_WANT_CLEANUP cleanup() -- deallocate the tree.
+ * TREE_WANT_FIND node *find(key) -- find first node with the specified
+ * key, return NULL if no such node exists.
+ * TREE_WANT_FIND_NEXT node *find_next(node *start) -- find next node with the
+ * specified key, return NULL if no such node exists.
+ * Implies TREE_DUPLICATES.
+ * TREE_WANT_SEARCH node *search(key) -- find the node with the specified
+ * or, if it does not exist, the nearest one.
+ * TREE_WANT_SEARCH_DOWN node *search_down(key) -- find either the node with
+ * specified value, or if it does not exist, the node
+ * with nearest smaller value.
+ * TREE_WANT_BOUNDARY node *boundary(uns direction) -- finds smallest
+ * (direction==0) or largest (direction==1) node.
+ * TREE_WANT_ADJACENT node *adjacent(node *, uns direction) -- finds next
+ * (direction==1) or previous (direction==0) node.
+ * TREE_WANT_NEW node *new(key) -- create new node with given key.
+ * If it already exists, it is created as the last one.
+ * TREE_WANT_LOOKUP node *lookup(key) -- find node with given key,
+ * if it doesn't exist, create it. Defining
+ * TREE_GIVE_INIT_DATA is strongly recommended.
+ * TREE_WANT_DELETE int delete(key) -- delete and deallocate node
+ * with a given key. Returns success.
+ * TREE_WANT_REMOVE remove(node *) -- delete and deallocate given node.
+ *
+ * TREE_WANT_DUMP dump() -- dumps the whole tree to stdout
+ *
+ * You can also supply several functions:
+ *
+ * TREE_GIVE_CMP int cmp(key1, key2) -- return -1, 0, and 1 according to
+ * the relation of keys. By default, we use <, ==, > for
+ * atomic types and either strcmp or strcasecmp for
+ * strings.
+ * TREE_GIVE_EXTRA_SIZE int extra_size(key) -- returns how many bytes after the
+ * node should be allocated for dynamic data. Default=0
+ * or length of the string with TREE_KEY_ENDSTRING.
+ * TREE_GIVE_INIT_KEY void init_key(node *,key) -- initialize key in a newly
+ * created node. Defaults: assignment for atomic keys
+ * and static strings, strcpy for end-allocated strings.
+ * TREE_GIVE_INIT_DATA void init_data(node *) -- initialize data fields in a
+ * newly created node. Very useful for lookup operations.
+ * TREE_GIVE_ALLOC void *alloc(unsigned int size) -- allocate space for
+ * a node. Default is either normal or pooled allocation
+ * depending on whether we want deletions.
+ * void free(void *) -- the converse.
+ *
+ * ... and a couple of extra parameters:
+ *
+ * TREE_NOCASE string comparisons should be case-insensitive.
+ * TREE_ATOMIC_TYPE=t Atomic values are of type `t' instead of int.
+ * TREE_USE_POOL=pool Allocate all nodes from given mempool.
+ * Collides with delete/remove functions.
+ * TREE_GLOBAL Functions are exported (i.e., not static).
+ * TREE_CONSERVE_SPACE Use as little space as possible at the price of a
+ * little slowdown.
+ * TREE_DUPLICATES Records with duplicate keys are allowed.
+ * TREE_MAX_DEPTH Maximal depth of a tree (for stack allocation).
+ *
+ * If you set TREE_WANT_ITERATOR, you also get a iterator macro at no
+ * extra charge:
+ *
+ * TREE_FOR_ALL(tree_prefix, tree_pointer, variable)
+ * {
+ * // node *variable gets declared automatically
+ * do_something_with_node(variable);
+ * // use TREE_BREAK and TREE_CONTINUE instead of break and continue
+ * // you must not alter contents of the tree here
+ * }
+ * TREE_END_FOR;
+ *
+ * Then include "ucw/redblack.h" and voila, you have a tree suiting all your
+ * needs (at least those which you've revealed :) ).
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(TREE_NODE) || !defined(TREE_PREFIX)
+#error Some of the mandatory configuration macros are missing.
+#endif
+
+#define P(x) TREE_PREFIX(x)
+
+/* Declare buckets and the tree. */
+
+typedef TREE_NODE P(node);
+
+#if defined(TREE_WANT_FIND_NEXT) || defined(TREE_WANT_ADJACENT) || defined(TREE_WANT_ITERATOR) || defined(TREE_WANT_REMOVE)
+# define TREE_STORE_PARENT
+#endif
+
+typedef struct P(bucket) {
+ struct P(bucket) *son[2];
+#ifdef TREE_STORE_PARENT
+ struct P(bucket) *parent;
+#endif
+#if !defined(TREE_CONSERVE_SPACE) && (defined(TREE_GIVE_EXTRA_SIZE) || defined(TREE_KEY_ENDSTRING))
+ uns red_flag:1;
+#endif
+ P(node) n;
+#if !defined(TREE_CONSERVE_SPACE) && !defined(TREE_GIVE_EXTRA_SIZE) && !defined(TREE_KEY_ENDSTRING)
+ uns red_flag:1;
+#endif
+} P(bucket);
+
+struct P(tree) {
+ uns count;
+ uns height; /* of black nodes */
+ P(bucket) *root;
+};
+
+typedef struct P(stack_entry) {
+ P(bucket) *buck;
+ uns son;
+} P(stack_entry);
+
+#define T struct P(tree)
+
+/* Preset parameters */
+
+#if defined(TREE_KEY_ATOMIC)
+
+#define TREE_KEY(x) x TREE_KEY_ATOMIC
+
+#ifndef TREE_ATOMIC_TYPE
+# define TREE_ATOMIC_TYPE int
+#endif
+#define TREE_KEY_DECL TREE_ATOMIC_TYPE TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+# define TREE_GIVE_CMP
+ static inline int P(cmp) (TREE_ATOMIC_TYPE x, TREE_ATOMIC_TYPE y)
+ {
+ if (x < y)
+ return -1;
+ else if (x > y)
+ return 1;
+ else
+ return 0;
+ }
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, TREE_ATOMIC_TYPE k)
+ { TREE_KEY(n->) = k; }
+#endif
+
+#elif defined(TREE_KEY_STRING) || defined(TREE_KEY_ENDSTRING)
+
+#ifdef TREE_KEY_STRING
+# define TREE_KEY(x) x TREE_KEY_STRING
+# ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, char *k)
+ { TREE_KEY(n->) = k; }
+# endif
+#else
+# define TREE_KEY(x) x TREE_KEY_ENDSTRING
+# define TREE_GIVE_EXTRA_SIZE
+ static inline int P(extra_size) (char *k)
+ { return strlen(k); }
+# ifndef TREE_GIVE_INIT_KEY
+# define TREE_GIVE_INIT_KEY
+ static inline void P(init_key) (P(node) *n, char *k)
+ { strcpy(TREE_KEY(n->), k); }
+# endif
+#endif
+#define TREE_KEY_DECL char *TREE_KEY()
+
+#ifndef TREE_GIVE_CMP
+# define TREE_GIVE_CMP
+ static inline int P(cmp) (char *x, char *y)
+ {
+# ifdef TREE_NOCASE
+ return strcasecmp(x,y);
+# else
+ return strcmp(x,y);
+# endif
+ }
+#endif
+
+#elif defined(TREE_KEY_COMPLEX)
+
+#define TREE_KEY(x) TREE_KEY_COMPLEX(x)
+
+#else
+#error You forgot to set the tree key type.
+#endif
+
+#ifndef TREE_CONSERVE_SPACE
+ static inline uns P(red_flag) (P(bucket) *node)
+ { return node->red_flag; }
+ static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+ { node->red_flag = flag; }
+ static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+ { return node->son[id]; }
+ static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+ { node->son[id] = son; }
+#else
+ /* Pointers are aligned, hence we can use lower bits. */
+ static inline uns P(red_flag) (P(bucket) *node)
+ { return ((uintptr_t) node->son[0]) & 1L; }
+ static inline void P(set_red_flag) (P(bucket) *node, uns flag)
+ { node->son[0] = (void*) ( (((uintptr_t) node->son[0]) & ~1L) | (flag & 1L) ); }
+ static inline P(bucket) * P(tree_son) (P(bucket) *node, uns id)
+ { return (void *) (((uintptr_t) node->son[id]) & ~1L); }
+ static inline void P(set_tree_son) (P(bucket) *node, uns id, P(bucket) *son)
+ { node->son[id] = (void *) ((uintptr_t) son | (((uintptr_t) node->son[id]) & 1L) ); }
+#endif
+
+/* Defaults for missing parameters. */
+
+#ifndef TREE_GIVE_CMP
+#error Unable to determine how to compare two keys.
+#endif
+
+#ifdef TREE_GIVE_EXTRA_SIZE
+/* This trickery is needed to avoid `unused parameter' warnings */
+# define TREE_EXTRA_SIZE P(extra_size)
+#else
+/*
+ * Beware, C macros are expanded iteratively, not recursively,
+ * hence we get only a _single_ argument, although the expansion
+ * of TREE_KEY contains commas.
+ */
+# define TREE_EXTRA_SIZE(x) 0
+#endif
+
+#ifndef TREE_GIVE_INIT_KEY
+# error Unable to determine how to initialize keys.
+#endif
+
+#ifndef TREE_GIVE_INIT_DATA
+static inline void P(init_data) (P(node) *n UNUSED)
+{
+}
+#endif
+
+#include <stdlib.h>
+
+#ifndef TREE_GIVE_ALLOC
+# ifdef TREE_USE_POOL
+ static inline void * P(alloc) (unsigned int size)
+ { return mp_alloc_fast(TREE_USE_POOL, size); }
+# define TREE_SAFE_FREE(x)
+# else
+ static inline void * P(alloc) (unsigned int size)
+ { return xmalloc(size); }
+
+ static inline void P(free) (void *x)
+ { xfree(x); }
+# endif
+#endif
+
+#ifndef TREE_SAFE_FREE
+# define TREE_SAFE_FREE(x) P(free) (x)
+#endif
+
+#ifdef TREE_GLOBAL
+# define STATIC
+#else
+# define STATIC static
+#endif
+
+#ifndef TREE_MAX_DEPTH
+# define TREE_MAX_DEPTH 64
+#endif
+
+#if defined(TREE_WANT_FIND_NEXT) && !defined(TREE_DUPLICATES)
+# define TREE_DUPLICATES
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+#ifndef TREE_WANT_FIND
+# define TREE_WANT_FIND
+#endif
+#ifndef TREE_WANT_NEW
+# define TREE_WANT_NEW
+#endif
+#endif
+
+/* Now the operations */
+
+STATIC void P(init) (T *t)
+{
+ t->count = t->height = 0;
+ t->root = NULL;
+}
+
+#ifdef TREE_WANT_CLEANUP
+static void P(cleanup_subtree) (T *t, P(bucket) *node)
+{
+ if (!node)
+ return;
+ P(cleanup_subtree) (t, P(tree_son) (node, 0));
+ P(cleanup_subtree) (t, P(tree_son) (node, 1));
+ P(free) (node);
+ t->count--;
+}
+
+STATIC void P(cleanup) (T *t)
+{
+ P(cleanup_subtree) (t, t->root);
+ ASSERT(!t->count);
+ t->height = 0;
+}
+#endif
+
+static uns P(fill_stack) (P(stack_entry) *stack, uns max_depth, P(bucket) *node, TREE_KEY_DECL, uns son_id UNUSED)
+{
+ uns i;
+ stack[0].buck = node;
+ for (i=0; stack[i].buck; i++)
+ {
+ int cmp;
+ cmp = P(cmp) (TREE_KEY(), TREE_KEY(stack[i].buck->n.));
+ if (cmp == 0)
+ break;
+ else if (cmp < 0)
+ stack[i].son = 0;
+ else
+ stack[i].son = 1;
+ ASSERT(i+1 < max_depth);
+ stack[i+1].buck = P(tree_son) (stack[i].buck, stack[i].son);
+ }
+#ifdef TREE_DUPLICATES
+ if (stack[i].buck)
+ {
+ uns idx;
+ /* Find first/last of equal keys according to son_id. */
+ idx = P(fill_stack) (stack+i+1, max_depth-i-1,
+ P(tree_son) (stack[i].buck, son_id), TREE_KEY(), son_id);
+ if (stack[i+1+idx].buck)
+ {
+ stack[i].son = son_id;
+ i = i+1+idx;
+ }
+ }
+#endif
+ stack[i].son = 10;
+ return i;
+}
+
+#ifdef TREE_WANT_FIND
+STATIC P(node) * P(find) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+ return stack[depth].buck ? &stack[depth].buck->n : NULL;
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH_DOWN
+STATIC P(node) * P(search_down) (T *t, TREE_KEY_DECL)
+{
+ P(node) *last_right=NULL;
+ P(bucket) *node=t->root;
+ while(node)
+ {
+ int cmp;
+ cmp = P(cmp) (TREE_KEY(), TREE_KEY(node->n.));
+ if (cmp == 0)
+ return &node->n;
+ else if (cmp < 0)
+ node=P(tree_son) (node, 0);
+ else
+ {
+ last_right=&node->n;
+ node=P(tree_son) (node, 1);
+ }
+ }
+ return last_right;
+}
+#endif
+
+#ifdef TREE_WANT_BOUNDARY
+STATIC P(node) * P(boundary) (T *t, uns direction)
+{
+ P(bucket) *n = t->root, *ns;
+ if (!n)
+ return NULL;
+ else
+ {
+ uns son = !!direction;
+ while ((ns = P(tree_son) (n, son)))
+ n = ns;
+ return &n->n;
+ }
+}
+#endif
+
+#ifdef TREE_STORE_PARENT
+STATIC P(node) * P(adjacent) (P(node) *start, uns direction)
+{
+ P(bucket) *node = SKIP_BACK(P(bucket), n, start);
+ P(bucket) *next = P(tree_son) (node, direction);
+ if (next)
+ {
+ while (1)
+ {
+ node = P(tree_son) (next, 1 - direction);
+ if (!node)
+ break;
+ next = node;
+ }
+ }
+ else
+ {
+ next = node->parent;
+ while (next && node == P(tree_son) (next, direction))
+ {
+ node = next;
+ next = node->parent;
+ }
+ if (!next)
+ return NULL;
+ ASSERT(node == P(tree_son) (next, 1 - direction));
+ }
+ return &next->n;
+}
+#endif
+
+#if defined(TREE_DUPLICATES) || defined(TREE_WANT_DELETE) || defined(TREE_WANT_REMOVE)
+static int P(find_next_node) (P(stack_entry) *stack, uns max_depth, uns direction)
+{
+ uns depth = 0;
+ if (stack[0].buck)
+ {
+ ASSERT(depth+1 < max_depth);
+ stack[depth].son = direction;
+ stack[depth+1].buck = P(tree_son) (stack[depth].buck, direction);
+ depth++;
+ while (stack[depth].buck)
+ {
+ ASSERT(depth+1 < max_depth);
+ stack[depth].son = 1 - direction;
+ stack[depth+1].buck = P(tree_son) (stack[depth].buck, 1 - direction);
+ depth++;
+ }
+ }
+ return depth;
+}
+#endif
+
+#ifdef TREE_WANT_FIND_NEXT
+STATIC P(node) * P(find_next) (P(node) *start)
+{
+ P(node) *next = P(adjacent) (start, 1);
+ if (next && P(cmp) (TREE_KEY(start->), TREE_KEY(next->)) == 0)
+ return next;
+ else
+ return NULL;
+
+}
+#endif
+
+#ifdef TREE_WANT_SEARCH
+STATIC P(node) * P(search) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 0);
+ if (!stack[depth].buck)
+ {
+ if (depth > 0)
+ depth--;
+ else
+ return NULL;
+ }
+ return &stack[depth].buck->n;
+}
+#endif
+
+#if 0
+#define TREE_TRACE(txt...) do { printf(txt); fflush(stdout); } while (0)
+#else
+#define TREE_TRACE(txt...)
+#endif
+
+static inline P(bucket) * P(rotation) (P(bucket) *node, uns son_id)
+{
+ /* Destroys red_flag's in node, son. Returns new root. */
+ P(bucket) *son = P(tree_son) (node, son_id);
+ TREE_TRACE("Rotation (node %d, son %d), direction %d\n", node->n.key, son->n.key, son_id);
+ node->son[son_id] = P(tree_son) (son, 1-son_id);
+ son->son[1-son_id] = node;
+#ifdef TREE_STORE_PARENT
+ if (node->son[son_id])
+ node->son[son_id]->parent = node;
+ son->parent = node->parent;
+ node->parent = son;
+#endif
+ return son;
+}
+
+static void P(rotate_after_insert) (T *t, P(stack_entry) *stack, uns depth)
+{
+ P(bucket) *node;
+ P(bucket) *parent, *grand, *uncle;
+ int s1, s2;
+try_it_again:
+ node = stack[depth].buck;
+ ASSERT(P(red_flag) (node));
+ /* At this moment, node became red. The paths sum have
+ * been preserved, but we have to check the parental
+ * condition. */
+ if (depth == 0)
+ {
+ ASSERT(t->root == node);
+ return;
+ }
+ parent = stack[depth-1].buck;
+ if (!P(red_flag) (parent))
+ return;
+ if (depth == 1)
+ {
+ ASSERT(t->root == parent);
+ P(set_red_flag) (parent, 0);
+ t->height++;
+ return;
+ }
+ grand = stack[depth-2].buck;
+ ASSERT(!P(red_flag) (grand));
+ /* The parent is also red, the grandparent exists and it
+ * is black. */
+ s1 = stack[depth-1].son;
+ s2 = stack[depth-2].son;
+ uncle = P(tree_son) (grand, 1-s2);
+ if (uncle && P(red_flag) (uncle))
+ {
+ /* Red parent and uncle, black grandparent.
+ * Exchange and try another iteration. */
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (uncle, 0);
+ P(set_red_flag) (grand, 1);
+ depth -= 2;
+ TREE_TRACE("Swapping colours (parent %d, uncle %d, grand %d), passing thru\n", parent->n.key, uncle->n.key, grand->n.key);
+ goto try_it_again;
+ }
+ /* Black uncle and grandparent, we need to rotate. Test
+ * the direction. */
+ if (s1 == s2)
+ {
+ node = P(rotation) (grand, s2);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (grand, 1);
+ }
+ else
+ {
+ grand->son[s2] = P(rotation) (parent, s1);
+ node = P(rotation) (grand, s2);
+ P(set_red_flag) (grand, 1);
+ P(set_red_flag) (parent, 1);
+ P(set_red_flag) (node, 0);
+ }
+ if (depth >= 3)
+ P(set_tree_son) (stack[depth-3].buck, stack[depth-3].son, node);
+ else
+ t->root = node;
+}
+
+#ifdef TREE_WANT_NEW
+STATIC P(node) * P(new) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ P(bucket) *added;
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+#ifdef TREE_DUPLICATES
+ /* It is the last found value, hence everything in the right subtree is
+ * strongly _bigger_. */
+ depth += P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+#endif
+ ASSERT(!stack[depth].buck);
+ /* We are in a leaf, hence we can easily append a new leaf to it. */
+ added = P(alloc) (sizeof(struct P(bucket)) + TREE_EXTRA_SIZE(TREE_KEY()) );
+ added->son[0] = added->son[1] = NULL;
+ stack[depth].buck = added;
+ if (depth > 0)
+ {
+#ifdef TREE_STORE_PARENT
+ added->parent = stack[depth-1].buck;
+#endif
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, added);
+ }
+ else
+ {
+#ifdef TREE_STORE_PARENT
+ added->parent = NULL;
+#endif
+ t->root = added;
+ }
+ P(set_red_flag) (added, 1); /* Set it red to not disturb the path sum. */
+ P(init_key) (&added->n, TREE_KEY());
+ P(init_data) (&added->n);
+ t->count++;
+ /* Let us reorganize the red_flag's and the structure of the tree. */
+ P(rotate_after_insert) (t, stack, depth);
+ return &added->n;
+}
+#endif
+
+#ifdef TREE_WANT_LOOKUP
+STATIC P(node) * P(lookup) (T *t, TREE_KEY_DECL)
+{
+ P(node) *node;
+ node = P(find) (t, TREE_KEY());
+ if (node)
+ return node;
+ return P(new) (t, TREE_KEY());
+}
+#endif
+
+#if defined(TREE_WANT_REMOVE) || defined(TREE_WANT_DELETE)
+static void P(rotate_after_delete) (T *t, P(stack_entry) *stack, int depth)
+{
+ uns iteration = 0;
+ P(bucket) *parent, *sibling, *instead;
+ uns parent_red, del_son, sibl_red;
+missing_black:
+ if (depth < 0)
+ {
+ t->height--;
+ return;
+ }
+ parent = stack[depth].buck;
+ parent_red = P(red_flag) (parent);
+ del_son = stack[depth].son;
+ /* For the 1st iteration: we have deleted parent->son[del_son], which
+ * was a black node with no son. Hence there is one mising black
+ * vertex in that path, which we are going to fix now.
+ *
+ * For other iterations: in that path, there is also missing a black
+ * node. */
+ if (!iteration)
+ ASSERT(!P(tree_son) (parent, del_son));
+ sibling = P(tree_son) (parent, 1-del_son);
+ ASSERT(sibling);
+ sibl_red = P(red_flag) (sibling);
+ instead = NULL;
+ if (!sibl_red)
+ {
+ P(bucket) *son[2];
+ uns red[2];
+ son[0] = P(tree_son) (sibling, 0);
+ son[1] = P(tree_son) (sibling, 1);
+ red[0] = son[0] ? P(red_flag) (son[0]) : 0;
+ red[1] = son[1] ? P(red_flag) (son[1]) : 0;
+ if (!red[0] && !red[1])
+ {
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (parent, 0);
+ if (parent_red)
+ return;
+ else
+ {
+ depth--;
+ iteration++;
+ TREE_TRACE("Swapping colours (parent %d, sibling %d), passing thru\n", parent->n.key, sibling->n.key);
+ goto missing_black;
+ }
+ } else if (!red[del_son])
+ {
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, parent_red);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (son[1-del_son], 0);
+ } else /* red[del_son] */
+ {
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, parent_red);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 0);
+ }
+ } else /* sibl_red */
+ {
+ P(bucket) *grand[2], *son;
+ uns red[2];
+ ASSERT(!parent_red);
+ son = P(tree_son) (sibling, del_son);
+ ASSERT(son && !P(red_flag) (son));
+ grand[0] = P(tree_son) (son, 0);
+ grand[1] = P(tree_son) (son, 1);
+ red[0] = grand[0] ? P(red_flag) (grand[0]) : 0;
+ red[1] = grand[1] ? P(red_flag) (grand[1]) : 0;
+ if (!red[0] && !red[1])
+ {
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (son, 1);
+ }
+ else if (!red[del_son])
+ {
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (grand[1-del_son], 0);
+ } else /* red[del_son] */
+ {
+ sibling->son[del_son] = P(rotation) (son, del_son);
+ parent->son[1-del_son] = P(rotation) (sibling, del_son);
+ instead = P(rotation) (parent, 1-del_son);
+ P(set_red_flag) (instead, 0);
+ P(set_red_flag) (parent, 0);
+ P(set_red_flag) (sibling, 1);
+ P(set_red_flag) (son, 0);
+ }
+ }
+ /* We have performed all desired rotations and need to store the new
+ * pointer to the subtree. */
+ ASSERT(instead);
+ if (depth > 0)
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, instead);
+ else
+ t->root = instead;
+}
+
+static void P(remove_by_stack) (T *t, P(stack_entry) *stack, uns depth)
+{
+ P(bucket) *node = stack[depth].buck;
+ P(bucket) *son;
+ uns i;
+ for (i=0; i<depth; i++)
+ ASSERT(P(tree_son) (stack[i].buck, stack[i].son) == stack[i+1].buck);
+ if (P(tree_son) (node, 0) && P(tree_son) (node, 1))
+ {
+ P(bucket) *xchg;
+ uns flag_node, flag_xchg;
+ uns d = P(find_next_node) (stack+depth, TREE_MAX_DEPTH-depth, 1);
+
+ ASSERT(d >= 2);
+ d--;
+ xchg = stack[depth+d].buck;
+ flag_node = P(red_flag) (node);
+ flag_xchg = P(red_flag) (xchg);
+ ASSERT(!P(tree_son) (xchg, 0));
+ son = P(tree_son) (xchg, 1);
+ stack[depth].buck = xchg; /* Magic iff d == 1. */
+ stack[depth+d].buck = node;
+ xchg->son[0] = P(tree_son) (node, 0);
+ xchg->son[1] = P(tree_son) (node, 1);
+ if (depth > 0)
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, xchg);
+ else
+ t->root = xchg;
+ node->son[0] = NULL;
+ node->son[1] = son;
+ P(set_tree_son) (stack[depth+d-1].buck, stack[depth+d-1].son, node);
+#ifdef TREE_STORE_PARENT
+ xchg->parent = depth > 0 ? stack[depth-1].buck : NULL;
+ xchg->son[0]->parent = xchg;
+ xchg->son[1]->parent = xchg;
+ node->parent = stack[depth+d-1].buck;
+ if (son)
+ son->parent = node;
+#endif
+ P(set_red_flag) (xchg, flag_node);
+ P(set_red_flag) (node, flag_xchg);
+ depth += d;
+ }
+ else if (P(tree_son) (node, 0))
+ son = P(tree_son) (node, 0);
+ else
+ son = P(tree_son) (node, 1);
+ /* At this moment, stack[depth].buck == node and it has at most one son
+ * and it is stored in the variable son. */
+ t->count--;
+ if (depth > 0)
+ {
+ P(set_tree_son) (stack[depth-1].buck, stack[depth-1].son, son);
+#ifdef TREE_STORE_PARENT
+ if (son)
+ son->parent = stack[depth-1].buck;
+#endif
+ }
+ else
+ {
+ t->root = son;
+#ifdef TREE_STORE_PARENT
+ if (son)
+ son->parent = NULL;
+#endif
+ }
+ if (P(red_flag) (node))
+ {
+ ASSERT(!son);
+ return;
+ }
+ TREE_SAFE_FREE(node);
+ /* We have deleted a black node. */
+ if (son)
+ {
+ ASSERT(P(red_flag) (son));
+ P(set_red_flag) (son, 0);
+ return;
+ }
+ P(rotate_after_delete) (t, stack, (int) depth - 1);
+}
+#endif
+
+#ifdef TREE_WANT_REMOVE
+STATIC void P(remove) (T *t, P(node) *Node)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ P(bucket) *node = SKIP_BACK(P(bucket), n, Node);
+ uns depth = 0, i;
+ stack[0].buck = node;
+ stack[0].son = 10;
+ while (node->parent)
+ {
+ depth++;
+ ASSERT(depth < TREE_MAX_DEPTH);
+ stack[depth].buck = node->parent;
+ stack[depth].son = P(tree_son) (node->parent, 0) == node ? 0 : 1;
+ node = node->parent;
+ }
+ for (i=0; i<(depth+1)/2; i++)
+ {
+ P(stack_entry) tmp = stack[i];
+ stack[i] = stack[depth-i];
+ stack[depth-i] = tmp;
+ }
+ P(remove_by_stack) (t, stack, depth);
+}
+#endif
+
+#ifdef TREE_WANT_DELETE
+STATIC int P(delete) (T *t, TREE_KEY_DECL)
+{
+ P(stack_entry) stack[TREE_MAX_DEPTH];
+ uns depth;
+ depth = P(fill_stack) (stack, TREE_MAX_DEPTH, t->root, TREE_KEY(), 1);
+ if (stack[depth].buck)
+ {
+ P(remove_by_stack) (t, stack, depth);
+ return 1;
+ }
+ else
+ return 0;
+}
+#endif
+
+#ifdef TREE_WANT_DUMP
+static void P(dump_subtree) (struct fastbuf *fb, T *t, P(bucket) *node, P(bucket) *parent, int cmp_res, int level, uns black)
+{
+ uns flag;
+ int i;
+ if (!node)
+ {
+ ASSERT(black == t->height);
+ return;
+ }
+ flag = P(red_flag) (node);
+#ifdef TREE_STORE_PARENT
+ ASSERT(node->parent == parent);
+#endif
+ if (parent)
+ {
+ ASSERT(!flag || !P(red_flag) (parent));
+ cmp_res *= P(cmp) (TREE_KEY(node->n.), TREE_KEY(parent->n.));
+#ifdef TREE_DUPLICATES
+ ASSERT(cmp_res >= 0);
+#else
+ ASSERT(cmp_res > 0);
+#endif
+ }
+ P(dump_subtree) (fb, t, P(tree_son) (node, 0), node, -1, level+1, black + (1-flag));
+ if (fb)
+ {
+ char tmp[20];
+ for (i=0; i<level; i++)
+ bputs(fb, " ");
+ sprintf(tmp, "L%d %c\t", level, flag ? 'R' : 'B');
+ bputs(fb, tmp);
+ P(dump_key) (fb, &node->n);
+ P(dump_data) (fb, &node->n);
+ bputs(fb, "\n");
+ }
+ P(dump_subtree) (fb, t, P(tree_son) (node, 1), node, +1, level+1, black + (1-flag));
+}
+
+STATIC void P(dump) (struct fastbuf *fb, T *t)
+{
+ if (fb)
+ {
+ char tmp[50];
+ sprintf(tmp, "Tree of %d nodes and height %d\n", t->count, t->height);
+ bputs(fb, tmp);
+ }
+ P(dump_subtree) (fb, t, t->root, NULL, 0, 0, 0);
+ if (fb)
+ {
+ bputs(fb, "\n");
+ bflush(fb);
+ }
+}
+#endif
+
+/* And the iterator */
+
+#ifdef TREE_WANT_ITERATOR
+static P(node) * P(first_node) (T *t, uns direction)
+{
+ P(bucket) *node = t->root, *prev = NULL;
+ while (node)
+ {
+ prev = node;
+ node = P(tree_son) (node, direction);
+ }
+ return prev ? &prev->n : NULL;
+}
+
+#ifndef TREE_FOR_ALL
+
+#define TREE_FOR_ALL(t_px, t_ptr, t_var) \
+do \
+{ \
+ GLUE_(t_px,node) *t_var = GLUE_(t_px,first_node)(t_ptr, 0); \
+ for (; t_var; t_var = GLUE_(t_px,adjacent)(t_var, 1)) \
+ {
+#define TREE_END_FOR } } while(0)
+#define TREE_BREAK break
+#define TREE_CONTINUE continue
+
+#endif
+#endif
+
+/* Finally, undefine all the parameters */
+
+#undef P
+#undef T
+
+#undef TREE_NODE
+#undef TREE_PREFIX
+#undef TREE_KEY_ATOMIC
+#undef TREE_KEY_STRING
+#undef TREE_KEY_ENDSTRING
+#undef TREE_KEY_COMPLEX
+#undef TREE_KEY_DECL
+#undef TREE_WANT_CLEANUP
+#undef TREE_WANT_FIND
+#undef TREE_WANT_FIND_NEXT
+#undef TREE_WANT_SEARCH
+#undef TREE_WANT_SEARCH_DOWN
+#undef TREE_WANT_BOUNDARY
+#undef TREE_WANT_ADJACENT
+#undef TREE_WANT_NEW
+#undef TREE_WANT_LOOKUP
+#undef TREE_WANT_DELETE
+#undef TREE_WANT_REMOVE
+#undef TREE_WANT_DUMP
+#undef TREE_WANT_ITERATOR
+#undef TREE_GIVE_CMP
+#undef TREE_GIVE_EXTRA_SIZE
+#undef TREE_GIVE_INIT_KEY
+#undef TREE_GIVE_INIT_DATA
+#undef TREE_GIVE_ALLOC
+#undef TREE_NOCASE
+#undef TREE_ATOMIC_TYPE
+#undef TREE_USE_POOL
+#undef TREE_STATIC
+#undef TREE_CONSERVE_SPACE
+#undef TREE_DUPLICATES
+#undef TREE_MAX_DEPTH
+#undef TREE_STORE_PARENT
+#undef TREE_KEY
+#undef TREE_EXTRA_SIZE
+#undef TREE_SAFE_FREE
+#undef TREE_TRACE
+#undef STATIC
--- /dev/null
+/*
+ * UCW Library -- Interface to Regular Expression Libraries
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+#include "ucw/hashfunc.h"
+#include "ucw/regex.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef CONFIG_POSIX_REGEX
+
+/* POSIX regular expression library */
+
+#include <regex.h>
+
+struct regex {
+ regex_t rx;
+ regmatch_t matches[10];
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ regex *r = xmalloc_zero(sizeof(regex));
+
+ int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
+ if (err)
+ {
+ char msg[256];
+ regerror(err, &r->rx, msg, sizeof(msg)-1);
+ /* regfree(&r->rx) not needed */
+ die("Error parsing regular expression `%s': %s", p, msg);
+ }
+ return r;
+}
+
+void
+rx_free(regex *r)
+{
+ regfree(&r->rx);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int err = regexec(&r->rx, s, 10, r->matches, 0);
+ if (!err)
+ {
+ /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */
+ return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]);
+ }
+ else if (err == REG_NOMATCH)
+ return 0;
+ else if (err == REG_ESPACE)
+ die("Regex matching ran out of memory");
+ else
+ die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
+ {
+ const char *s = src + r->matches[j].rm_so;
+ uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#elif defined(CONFIG_PCRE)
+
+/* PCRE library */
+
+#include <pcre.h>
+
+struct regex {
+ pcre *rx;
+ pcre_extra *extra;
+ uns match_array_size;
+ uns real_matches;
+ int matches[0]; /* (max_matches+1) pairs (pos,len) plus some workspace */
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ const char *err;
+ int errpos, match_array_size, eno;
+
+ pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL);
+ if (!rx)
+ die("Error parsing regular expression `%s': %s at position %d", p, err, errpos);
+ eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size);
+ if (eno)
+ die("Internal error: pcre_fullinfo() failed with error %d", eno);
+ match_array_size = 3*(match_array_size+1);
+ regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int));
+ r->rx = rx;
+ r->match_array_size = match_array_size;
+ r->extra = pcre_study(r->rx, 0, &err);
+ if (err)
+ die("Error studying regular expression `%s': %s", p, err);
+ return r;
+}
+
+void
+rx_free(regex *r)
+{
+ xfree(r->rx);
+ xfree(r->extra);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int len = str_len(s);
+ int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
+ if (err >= 0)
+ {
+ r->real_matches = err;
+ /* need to check that the full string matches */
+ return !(r->matches[0] || s[r->matches[1]]);
+ }
+ else if (err == PCRE_ERROR_NOMATCH)
+ return 0;
+ else if (err == PCRE_ERROR_NOMEMORY)
+ die("Regex matching ran out of memory");
+ else
+ die("Regex matching failed with unknown error %d", err);
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j < r->real_matches && r->matches[2*j] >= 0)
+ {
+ char *s = src + r->matches[2*j];
+ uns i = r->matches[2*j+1] - r->matches[2*j];
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#else
+
+/* BSD regular expression library */
+
+#include <regex.h>
+
+#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */
+#define CHAR_SET_SIZE 256 /* How many characters in the character set. */
+
+struct regex {
+ struct re_pattern_buffer buf;
+ struct re_registers regs; /* Must not change between re_match() calls */
+ int len_cache;
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+ regex *r = xmalloc_zero(sizeof(regex));
+ const char *msg;
+
+ r->buf.buffer = xmalloc(INITIAL_MEM);
+ r->buf.allocated = INITIAL_MEM;
+ if (icase)
+ {
+ unsigned i;
+ r->buf.translate = xmalloc (CHAR_SET_SIZE);
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ r->buf.translate[i] = Cupcase(i);
+ }
+ else
+ r->buf.translate = NULL;
+ re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
+ msg = re_compile_pattern(p, strlen(p), &r->buf);
+ if (!msg)
+ return r;
+ die("Error parsing pattern `%s': %s", p, msg);
+}
+
+void
+rx_free(regex *r)
+{
+ xfree(r->buf.buffer);
+ if (r->buf.translate)
+ xfree(r->buf.translate);
+ xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+ int len = strlen(s);
+
+ r->len_cache = len;
+ if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
+ return 0;
+ if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
+ return 0;
+ return 1;
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+ char *end = dest + destlen - 1;
+
+ if (!rx_match(r, src))
+ return 0;
+
+ while (*by)
+ {
+ if (*by == '\\')
+ {
+ by++;
+ if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
+ {
+ uns j = *by++ - '0';
+ if (j < r->regs.num_regs)
+ {
+ const char *s = src + r->regs.start[j];
+ uns i = r->regs.end[j] - r->regs.start[j];
+ if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
+ return -1;
+ if (dest + i >= end)
+ return -1;
+ memcpy(dest, s, i);
+ dest += i;
+ continue;
+ }
+ }
+ }
+ if (dest < end)
+ *dest++ = *by++;
+ else
+ return -1;
+ }
+ *dest = 0;
+ return 1;
+}
+
+#endif
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ regex *r;
+ char buf1[4096], buf2[4096];
+ int opt_i = 0;
+
+ if (!strcmp(argv[1], "-i"))
+ {
+ opt_i = 1;
+ argv++;
+ argc--;
+ }
+ r = rx_compile(argv[1], opt_i);
+ while (fgets(buf1, sizeof(buf1), stdin))
+ {
+ char *p = strchr(buf1, '\n');
+ if (p)
+ *p = 0;
+ if (argc == 2)
+ {
+ if (rx_match(r, buf1))
+ puts("MATCH");
+ else
+ puts("NO MATCH");
+ }
+ else
+ {
+ int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2));
+ if (i < 0)
+ puts("OVERFLOW");
+ else if (!i)
+ puts("NO MATCH");
+ else
+ puts(buf2);
+ }
+ }
+ rx_free(r);
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Interface to Regular Expression Libraries
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_REGEX_H
+#define _UCW_REGEX_H
+
+typedef struct regex regex;
+
+regex *rx_compile(const char *r, int icase);
+void rx_free(regex *r);
+int rx_match(regex *r, const char *s);
+int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen);
+
+#endif
--- /dev/null
+# Tests for the regex module
+
+Run: ../obj/ucw/regex-t 'a.*b.*c'
+In: abc
+ ajkhkbbbbbc
+ Aabc
+Out: MATCH
+ MATCH
+ NO MATCH
+
+Run: ../obj/ucw/regex-t -i 'a.*b.*c'
+In: aBc
+ ajkhkbBBBBC
+ Aabc
+Out: MATCH
+ MATCH
+ MATCH
+
+Run: ../obj/ucw/regex-t -i '(ahoj|nebo)'
+In: Ahoj
+ nEBo
+ ahoja
+ (ahoj|nebo)
+Out: MATCH
+ MATCH
+ NO MATCH
+ NO MATCH
+
+Run: ../obj/ucw/regex-t '\(ahoj\)'
+In: (ahoj)
+ ahoj
+Out: MATCH
+ NO MATCH
+
+Run: ../obj/ucw/regex-t '(.*b)*'
+In: ababababab
+ ababababababababababababababababababababababababababababa
+Out: MATCH
+ NO MATCH
+
+Run: ../obj/ucw/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
+In: aaabbb
+ aabbccb
+ abcabc
+ aaccbb
+Out: a<aabb>b
+ aabb<>b
+ NO MATCH
+ aa<>bb
+
+Run: ../obj/ucw/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
+In: /nemecky/ubytovani/hotel.php?sort=&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3
+ /test...?f=1&s=3&sid=123&q=3&
+Out: NO MATCH
+ MATCH
+
+Run: ../obj/ucw/regex-t '.*[0-9a-f]{8,16}.*'
+In: abcdabcdabcd
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ asddajlkdkajlqwepoiequwiouio
+ 000001111p101010101010q12032
+Out: MATCH
+ MATCH
+ NO MATCH
+ MATCH
--- /dev/null
+/*
+ * UCW Library -- Running of Commands
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+void NONRET
+exec_command_v(const char *cmd, va_list args)
+{
+ va_list cargs;
+ va_copy(cargs, args);
+ int cnt = 2;
+ char *arg;
+ while (arg = va_arg(cargs, char *))
+ cnt++;
+ va_end(cargs);
+ char **argv = alloca(sizeof(char *) * cnt);
+ argv[0] = (char *)cmd;
+ cnt = 1;
+ va_copy(cargs, args);
+ while (arg = va_arg(cargs, char *))
+ argv[cnt++] = arg;
+ va_end(cargs);
+ argv[cnt] = NULL;
+ execv(cmd, argv);
+ char echo[256];
+ echo_command_v(echo, sizeof(echo), cmd, args);
+ msg(L_ERROR, "Cannot execute %s: %m", echo);
+ exit(255);
+}
+
+int
+run_command_v(const char *cmd, va_list args)
+{
+ pid_t p = fork();
+ if (p < 0)
+ {
+ msg(L_ERROR, "fork() failed: %m");
+ return 0;
+ }
+ else if (!p)
+ exec_command_v(cmd, args);
+ else
+ {
+ int stat;
+ char status_msg[EXIT_STATUS_MSG_SIZE];
+ p = waitpid(p, &stat, 0);
+ if (p < 0)
+ die("waitpid() failed: %m");
+ if (format_exit_status(status_msg, stat))
+ {
+ char echo[256];
+ echo_command_v(echo, sizeof(echo), cmd, args);
+ msg(L_ERROR, "`%s' failed: %s", echo, status_msg);
+ return 0;
+ }
+ return 1;
+ }
+}
+
+void
+echo_command_v(char *buf, int size, const char *cmd, va_list args)
+{
+ char *limit = buf + size - 4;
+ char *p = buf;
+ const char *arg = cmd;
+ do
+ {
+ int l = strlen(arg);
+ if (p != buf && p < limit)
+ *p++ = ' ';
+ if (p+l > limit)
+ {
+ memcpy(p, arg, limit-p);
+ strcpy(limit, "...");
+ return;
+ }
+ memcpy(p, arg, l);
+ p += l;
+ }
+ while (arg = va_arg(args, char *));
+ *p = 0;
+}
+
+int
+run_command(const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ int e = run_command_v(cmd, args);
+ va_end(args);
+ return e;
+}
+
+void NONRET
+exec_command(const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ exec_command_v(cmd, args);
+}
+
+void
+echo_command(char *buf, int len, const char *cmd, ...)
+{
+ va_list args;
+ va_start(args, cmd);
+ echo_command_v(buf, len, cmd, args);
+ va_end(args);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ char msg[1024];
+ echo_command(msg, sizeof(msg), "/bin/echo", "datel", "strakapoud", NULL);
+ msg(L_INFO, "Running <%s>", msg);
+ run_command("/bin/echo", "datel", "strakapoud", NULL);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- POSIX semaphores wrapper
+ *
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SEMAPHORE_H
+#define _UCW_SEMAPHORE_H
+
+#include <semaphore.h>
+
+#ifdef CONFIG_DARWIN
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "ucw/fastbuf.h" // For the temp_file_name
+
+/* In Darwin, sem_init() is unfortunately not implemented and the guide
+ * recommends emulating it using sem_open(). */
+
+static inline sem_t *
+sem_alloc(void)
+{
+ char buf[TEMP_FILE_NAME_LEN];
+ int mode, retry = 10;
+ sem_t *sem;
+ do
+ {
+ temp_file_name(buf, &mode);
+ sem = sem_open(buf, mode | O_CREAT, 0777, 0);
+ }
+ while (sem == (sem_t*) SEM_FAILED && errno == EEXIST && retry --);
+ ASSERT(sem != (sem_t*) SEM_FAILED);
+ return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+ sem_close(sem);
+}
+
+#else
+
+static inline sem_t *
+sem_alloc(void)
+{
+ sem_t *sem = xmalloc(sizeof(sem_t));
+ int res = sem_init(sem, 0, 0);
+ ASSERT(!res);
+ return sem;
+}
+
+static inline void
+sem_free(sem_t *sem)
+{
+ sem_destroy(sem);
+ xfree(sem);
+}
+
+#endif
+
+#endif
--- /dev/null
+/*
+ * HMAC-SHA1 Message Authentication Code (RFC 2202)
+ *
+ * (c) 2008--2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/sha1.h"
+
+#include <string.h>
+
+void
+sha1_hmac_init(sha1_hmac_context *hd, const byte *key, uns keylen)
+{
+ byte keybuf[SHA1_BLOCK_SIZE], buf[SHA1_BLOCK_SIZE];
+
+ // Hash the key if necessary
+ if (keylen <= SHA1_BLOCK_SIZE)
+ {
+ memcpy(keybuf, key, keylen);
+ bzero(keybuf + keylen, SHA1_BLOCK_SIZE - keylen);
+ }
+ else
+ {
+ sha1_hash_buffer(keybuf, key, keylen);
+ bzero(keybuf + SHA1_SIZE, SHA1_BLOCK_SIZE - SHA1_SIZE);
+ }
+
+ // Initialize the inner digest
+ sha1_init(&hd->ictx);
+ for (int i=0; i < SHA1_BLOCK_SIZE; i++)
+ buf[i] = keybuf[i] ^ 0x36;
+ sha1_update(&hd->ictx, buf, SHA1_BLOCK_SIZE);
+
+ // Initialize the outer digest
+ sha1_init(&hd->octx);
+ for (int i=0; i < SHA1_BLOCK_SIZE; i++)
+ buf[i] = keybuf[i] ^ 0x5c;
+ sha1_update(&hd->octx, buf, SHA1_BLOCK_SIZE);
+}
+
+void
+sha1_hmac_update(sha1_hmac_context *hd, const byte *data, uns datalen)
+{
+ // Just update the inner digest
+ sha1_update(&hd->ictx, data, datalen);
+}
+
+byte *sha1_hmac_final(sha1_hmac_context *hd)
+{
+ // Finish the inner digest
+ byte *isha = sha1_final(&hd->ictx);
+
+ // Finish the outer digest
+ sha1_update(&hd->octx, isha, SHA1_SIZE);
+ return sha1_final(&hd->octx);
+}
+
+void
+sha1_hmac(byte *outbuf, const byte *key, uns keylen, const byte *data, uns datalen)
+{
+ sha1_hmac_context hd;
+ sha1_hmac_init(&hd, key, keylen);
+ sha1_hmac_update(&hd, data, datalen);
+ byte *osha = sha1_hmac_final(&hd);
+ memcpy(outbuf, osha, SHA1_SIZE);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include "ucw/string.h"
+
+static uns rd(char *dest)
+{
+ char buf[1024];
+ fgets(buf, sizeof(buf), stdin);
+ *strchr(buf, '\n') = 0;
+ if (buf[0] == '0' && buf[1] == 'x')
+ {
+ const char *e = hex_to_mem(dest, buf+2, 1024, 0);
+ ASSERT(!*e);
+ return (e-buf-2)/2;
+ }
+ else
+ {
+ strcpy(dest, buf);
+ return strlen(dest);
+ }
+}
+
+int main(void)
+{
+ char key[1024], data[1024];
+ byte hmac[SHA1_SIZE];
+ uns kl = rd(key);
+ uns dl = rd(data);
+ sha1_hmac(hmac, key, kl, data, dl);
+ mem_to_hex(data, hmac, SHA1_SIZE, 0);
+ puts(data);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * SHA-1 Hash Function (FIPS 180-1, RFC 3174)
+ *
+ * Based on the code from libgcrypt-1.2.3, which is
+ * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * Adaptation for libucw:
+ * (c) 2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/sha1.h"
+#include "ucw/unaligned.h"
+
+#include <string.h>
+
+void
+sha1_init(sha1_context *hd)
+{
+ hd->h0 = 0x67452301;
+ hd->h1 = 0xefcdab89;
+ hd->h2 = 0x98badcfe;
+ hd->h3 = 0x10325476;
+ hd->h4 = 0xc3d2e1f0;
+ hd->nblocks = 0;
+ hd->count = 0;
+}
+
+/*
+ * Transform the message X which consists of 16 32-bit-words
+ */
+static void
+transform(sha1_context *hd, const byte *data)
+{
+ u32 a,b,c,d,e,tm;
+ u32 x[16];
+
+ /* Get values from the chaining vars. */
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+
+#ifdef CPU_BIG_ENDIAN
+ memcpy( x, data, 64 );
+#else
+ {
+ for (int i=0; i<16; i++)
+ x[i] = get_u32_be(data+4*i);
+ }
+#endif
+
+
+#define K1 0x5A827999L
+#define K2 0x6ED9EBA1L
+#define K3 0x8F1BBCDCL
+#define K4 0xCA62C1D6L
+#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) )
+#define F2(x,y,z) ( x ^ y ^ z )
+#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) )
+#define F4(x,y,z) ( x ^ y ^ z )
+
+
+#define M(i) ( tm = x[i&0x0f] ^ x[(i-14)&0x0f] \
+ ^ x[(i-8)&0x0f] ^ x[(i-3)&0x0f] \
+ , (x[i&0x0f] = ROL(tm, 1)) )
+
+#define R(a,b,c,d,e,f,k,m) do { e += ROL( a, 5 ) \
+ + f( b, c, d ) \
+ + k \
+ + m; \
+ b = ROL( b, 30 ); \
+ } while(0)
+ R( a, b, c, d, e, F1, K1, x[ 0] );
+ R( e, a, b, c, d, F1, K1, x[ 1] );
+ R( d, e, a, b, c, F1, K1, x[ 2] );
+ R( c, d, e, a, b, F1, K1, x[ 3] );
+ R( b, c, d, e, a, F1, K1, x[ 4] );
+ R( a, b, c, d, e, F1, K1, x[ 5] );
+ R( e, a, b, c, d, F1, K1, x[ 6] );
+ R( d, e, a, b, c, F1, K1, x[ 7] );
+ R( c, d, e, a, b, F1, K1, x[ 8] );
+ R( b, c, d, e, a, F1, K1, x[ 9] );
+ R( a, b, c, d, e, F1, K1, x[10] );
+ R( e, a, b, c, d, F1, K1, x[11] );
+ R( d, e, a, b, c, F1, K1, x[12] );
+ R( c, d, e, a, b, F1, K1, x[13] );
+ R( b, c, d, e, a, F1, K1, x[14] );
+ R( a, b, c, d, e, F1, K1, x[15] );
+ R( e, a, b, c, d, F1, K1, M(16) );
+ R( d, e, a, b, c, F1, K1, M(17) );
+ R( c, d, e, a, b, F1, K1, M(18) );
+ R( b, c, d, e, a, F1, K1, M(19) );
+ R( a, b, c, d, e, F2, K2, M(20) );
+ R( e, a, b, c, d, F2, K2, M(21) );
+ R( d, e, a, b, c, F2, K2, M(22) );
+ R( c, d, e, a, b, F2, K2, M(23) );
+ R( b, c, d, e, a, F2, K2, M(24) );
+ R( a, b, c, d, e, F2, K2, M(25) );
+ R( e, a, b, c, d, F2, K2, M(26) );
+ R( d, e, a, b, c, F2, K2, M(27) );
+ R( c, d, e, a, b, F2, K2, M(28) );
+ R( b, c, d, e, a, F2, K2, M(29) );
+ R( a, b, c, d, e, F2, K2, M(30) );
+ R( e, a, b, c, d, F2, K2, M(31) );
+ R( d, e, a, b, c, F2, K2, M(32) );
+ R( c, d, e, a, b, F2, K2, M(33) );
+ R( b, c, d, e, a, F2, K2, M(34) );
+ R( a, b, c, d, e, F2, K2, M(35) );
+ R( e, a, b, c, d, F2, K2, M(36) );
+ R( d, e, a, b, c, F2, K2, M(37) );
+ R( c, d, e, a, b, F2, K2, M(38) );
+ R( b, c, d, e, a, F2, K2, M(39) );
+ R( a, b, c, d, e, F3, K3, M(40) );
+ R( e, a, b, c, d, F3, K3, M(41) );
+ R( d, e, a, b, c, F3, K3, M(42) );
+ R( c, d, e, a, b, F3, K3, M(43) );
+ R( b, c, d, e, a, F3, K3, M(44) );
+ R( a, b, c, d, e, F3, K3, M(45) );
+ R( e, a, b, c, d, F3, K3, M(46) );
+ R( d, e, a, b, c, F3, K3, M(47) );
+ R( c, d, e, a, b, F3, K3, M(48) );
+ R( b, c, d, e, a, F3, K3, M(49) );
+ R( a, b, c, d, e, F3, K3, M(50) );
+ R( e, a, b, c, d, F3, K3, M(51) );
+ R( d, e, a, b, c, F3, K3, M(52) );
+ R( c, d, e, a, b, F3, K3, M(53) );
+ R( b, c, d, e, a, F3, K3, M(54) );
+ R( a, b, c, d, e, F3, K3, M(55) );
+ R( e, a, b, c, d, F3, K3, M(56) );
+ R( d, e, a, b, c, F3, K3, M(57) );
+ R( c, d, e, a, b, F3, K3, M(58) );
+ R( b, c, d, e, a, F3, K3, M(59) );
+ R( a, b, c, d, e, F4, K4, M(60) );
+ R( e, a, b, c, d, F4, K4, M(61) );
+ R( d, e, a, b, c, F4, K4, M(62) );
+ R( c, d, e, a, b, F4, K4, M(63) );
+ R( b, c, d, e, a, F4, K4, M(64) );
+ R( a, b, c, d, e, F4, K4, M(65) );
+ R( e, a, b, c, d, F4, K4, M(66) );
+ R( d, e, a, b, c, F4, K4, M(67) );
+ R( c, d, e, a, b, F4, K4, M(68) );
+ R( b, c, d, e, a, F4, K4, M(69) );
+ R( a, b, c, d, e, F4, K4, M(70) );
+ R( e, a, b, c, d, F4, K4, M(71) );
+ R( d, e, a, b, c, F4, K4, M(72) );
+ R( c, d, e, a, b, F4, K4, M(73) );
+ R( b, c, d, e, a, F4, K4, M(74) );
+ R( a, b, c, d, e, F4, K4, M(75) );
+ R( e, a, b, c, d, F4, K4, M(76) );
+ R( d, e, a, b, c, F4, K4, M(77) );
+ R( c, d, e, a, b, F4, K4, M(78) );
+ R( b, c, d, e, a, F4, K4, M(79) );
+
+ /* Update chaining vars. */
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+}
+
+
+/*
+ * Update the message digest with the contents
+ * of INBUF with length INLEN.
+ */
+void
+sha1_update(sha1_context *hd, const byte *inbuf, uns inlen)
+{
+ if( hd->count == 64 ) /* flush the buffer */
+ {
+ transform( hd, hd->buf );
+ hd->count = 0;
+ hd->nblocks++;
+ }
+ if( !inbuf )
+ return;
+
+ if( hd->count )
+ {
+ for( ; inlen && hd->count < 64; inlen-- )
+ hd->buf[hd->count++] = *inbuf++;
+ sha1_update( hd, NULL, 0 );
+ if( !inlen )
+ return;
+ }
+
+ while( inlen >= 64 )
+ {
+ transform( hd, inbuf );
+ hd->count = 0;
+ hd->nblocks++;
+ inlen -= 64;
+ inbuf += 64;
+ }
+ for( ; inlen && hd->count < 64; inlen-- )
+ hd->buf[hd->count++] = *inbuf++;
+}
+
+
+/*
+ * The routine final terminates the computation and
+ * returns the digest.
+ * The handle is prepared for a new cycle, but adding bytes to the
+ * handle will the destroy the returned buffer.
+ * Returns: 20 bytes representing the digest.
+ */
+
+byte *
+sha1_final(sha1_context *hd)
+{
+ u32 t, msb, lsb;
+ byte *p;
+
+ sha1_update(hd, NULL, 0); /* flush */;
+
+ t = hd->nblocks;
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = t >> 26;
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if( hd->count < 56 ) /* enough room */
+ {
+ hd->buf[hd->count++] = 0x80; /* pad */
+ while( hd->count < 56 )
+ hd->buf[hd->count++] = 0; /* pad */
+ }
+ else /* need one extra block */
+ {
+ hd->buf[hd->count++] = 0x80; /* pad character */
+ while( hd->count < 64 )
+ hd->buf[hd->count++] = 0;
+ sha1_update(hd, NULL, 0); /* flush */;
+ memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+ }
+ /* append the 64 bit count */
+ hd->buf[56] = msb >> 24;
+ hd->buf[57] = msb >> 16;
+ hd->buf[58] = msb >> 8;
+ hd->buf[59] = msb ;
+ hd->buf[60] = lsb >> 24;
+ hd->buf[61] = lsb >> 16;
+ hd->buf[62] = lsb >> 8;
+ hd->buf[63] = lsb ;
+ transform( hd, hd->buf );
+
+ p = hd->buf;
+#define X(a) do { put_u32_be(p, hd->h##a); p += 4; } while(0)
+ X(0);
+ X(1);
+ X(2);
+ X(3);
+ X(4);
+#undef X
+
+ return hd->buf;
+}
+
+/*
+ * Shortcut function which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 20 bytes.
+ */
+void
+sha1_hash_buffer(byte *outbuf, const byte *buffer, uns length)
+{
+ sha1_context hd;
+
+ sha1_init(&hd);
+ sha1_update(&hd, buffer, length);
+ memcpy(outbuf, sha1_final(&hd), SHA1_SIZE);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <unistd.h>
+#include "ucw/string.h"
+
+int main(void)
+{
+ sha1_context hd;
+ byte buf[3];
+ int cnt;
+
+ sha1_init(&hd);
+ while ((cnt = read(0, buf, sizeof(buf))) > 0)
+ sha1_update(&hd, buf, cnt);
+
+ char text[SHA1_HEX_SIZE];
+ mem_to_hex(text, sha1_final(&hd), SHA1_SIZE, 0);
+ puts(text);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * SHA-1 Hash Function (FIPS 180-1, RFC 3174)
+ *
+ * (c) 2008--2009 Martin Mares <mj@ucw.cz>
+ *
+ * Based on the code from libgcrypt-1.2.3, which was:
+ *
+ * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SHA1_H
+#define _UCW_SHA1_H
+
+/**
+ * Internal SHA1 state.
+ * You should use it just as an opaque handle only.
+ */
+typedef struct {
+ u32 h0,h1,h2,h3,h4;
+ u32 nblocks;
+ byte buf[64];
+ int count;
+} sha1_context;
+
+void sha1_init(sha1_context *hd); /** Initialize new algorithm run in the @hd context. **/
+/**
+ * Push another @inlen bytes of data pointed to by @inbuf onto the
+ * SHA1 hash currently in @hd. You can call this any times you want on
+ * the same hash (and you do not need to reinitialize it by
+ * @sha1_init()). It has the same effect as concatenating all the data
+ * together and passing them at once.
+ */
+void sha1_update(sha1_context *hd, const byte *inbuf, uns inlen);
+/**
+ * No more @sha1_update() calls will be done. This terminates the hash
+ * and returns a pointer to it.
+ *
+ * Note that the pointer points into data in the @hd context. If it ceases
+ * to exist, the pointer becomes invalid.
+ *
+ * To convert the hash to its usual hexadecimal representation, see
+ * <<string:mem_to_hex()>>.
+ */
+byte *sha1_final(sha1_context *hd);
+
+/**
+ * A convenience one-shot function for SHA1 hash.
+ * It is equivalent to this snippet of code:
+ *
+ * sha1_context hd;
+ * sha1_init(&hd);
+ * sha1_update(&hd, buffer, length);
+ * memcpy(outbuf, sha1_final(&hd), SHA1_SIZE);
+ */
+void sha1_hash_buffer(byte *outbuf, const byte *buffer, uns length);
+
+/**
+ * SHA1 HMAC message authentication. If you provide @key and @data,
+ * the result will be stored in @outbuf.
+ */
+void sha1_hmac(byte *outbuf, const byte *key, uns keylen, const byte *data, uns datalen);
+
+/**
+ * The HMAC also exists in a stream version in a way analogous to the
+ * plain SHA1. Pass this as a context.
+ */
+typedef struct {
+ sha1_context ictx;
+ sha1_context octx;
+} sha1_hmac_context;
+
+void sha1_hmac_init(sha1_hmac_context *hd, const byte *key, uns keylen); /** Initialize HMAC with context @hd and the given key. See sha1_init(). */
+void sha1_hmac_update(sha1_hmac_context *hd, const byte *data, uns datalen); /** Hash another @datalen bytes of data. See sha1_update(). */
+byte *sha1_hmac_final(sha1_hmac_context *hd); /** Terminate the HMAC and return a pointer to the allocated hash. See sha1_final(). */
+
+#define SHA1_SIZE 20 /** Size of the SHA1 hash in its binary representation **/
+#define SHA1_HEX_SIZE 41 /** Buffer length for a string containing SHA1 in hexadecimal format. **/
+#define SHA1_BLOCK_SIZE 64 /** SHA1 splits input to blocks of this size. **/
+
+#endif
--- /dev/null
+# Tests of the SHA1 module
+
+Name: SHA1-1
+Run: echo -n "abc" | ../obj/ucw/sha1-t
+Out: a9993e364706816aba3e25717850c26c9cd0d89d
+
+Name: SHA1-2
+Run: echo -n "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" | ../obj/ucw/sha1-t
+Out: 84983e441c3bd26ebaae4aa1f95129e5e54670f1
+
+# Tests of SHA-1 HMAC specified in RFC 2202
+
+Name: HMAC1
+Run: ../obj/ucw/sha1-hmac-t
+In: 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b
+ Hi There
+Out: b617318655057264e28bc0b6fb378c8ef146be00
+
+Name: HMAC2
+Run: ../obj/ucw/sha1-hmac-t
+In: Jefe
+ what do ya want for nothing?
+Out: effcdf6ae5eb2fa2d27416d5f184df9c259a7c79
+
+Name: HMAC3
+Run: ../obj/ucw/sha1-hmac-t
+In: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ 0xdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
+Out: 125d7342b9ac11cd91a39af48aa17b4f63f175d3
+
+Name: HMAC4
+Run: ../obj/ucw/sha1-hmac-t
+In: 0x0102030405060708090a0b0c0d0e0f10111213141516171819
+ 0xcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd
+Out: 4c9007f4026250c6bc8414f9bf50c86c2d7235da
+
+Name: HMAC5
+Run: ../obj/ucw/sha1-hmac-t
+In: 0x0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c
+ Test With Truncation
+Out: 4c1a03424b55e07fe7f27be1d58bb9324a9a5a04
+
+Name: HMAC6
+Run: ../obj/ucw/sha1-hmac-t
+In: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ Test Using Larger Than Block-Size Key - Hash Key First
+Out: aa4ae5e15272d00e95705637ce8a3b55ed402112
+
+Name: HMAC7
+Run: ../obj/ucw/sha1-hmac-t
+In: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data
+Out: e8e99d0f45237d786d6bbaa7965c7808bbff1a91
+
+Name: HMAC8
+Run: ../obj/ucw/sha1-hmac-t
+In: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ Test Using Larger Than Block-Size Key - Hash Key First
+Out: aa4ae5e15272d00e95705637ce8a3b55ed402112
+
+Name: HMAC9
+Run: ../obj/ucw/sha1-hmac-t
+In: 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data
+Out: e8e99d0f45237d786d6bbaa7965c7808bbff1a91
--- /dev/null
+# Support routines for shell scripts
+
+DIRS+=ucw/shell
+UCW_SHELL_PROGS=$(addprefix $(o)/ucw/shell/,config logger)
+PROGS+=$(UCW_SHELL_PROGS)
+DATAFILES+=$(o)/ucw/shell/libucw.sh
+
+$(o)/ucw/shell/config: $(o)/ucw/shell/config.o $(LIBUCW)
+$(o)/ucw/shell/logger: $(o)/ucw/shell/logger.o $(LIBUCW)
+
+TESTS+=$(addprefix $(o)/ucw/shell/,config.test)
+
+$(o)/ucw/shell/config.test: $(o)/ucw/shell/config
+
+INSTALL_TARGETS+=install-ucw-shell
+install-ucw-shell:
+ install -d -m 755 $(DESTDIR)$(INSTALL_BIN_DIR)
+ install -m 755 $(UCW_SHELL_PROGS) $(DESTDIR)$(INSTALL_BIN_DIR)
+
+.PHONY: install-ucw-shell
--- /dev/null
+/*
+ * UCW Library -- Shell Interface to Configuration Files
+ *
+ * (c) 2002--2005 Martin Mares <mj@ucw.cz>
+ * (c) 2006 Robert Spalek <robert@ucw.cz>
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * Once we were using this beautiful Shell version, but it turned out
+ * that it doesn't work with nested config files:
+ *
+ * eval `sed <cf/sherlock '/^#/d;/^ *$/d;s/ \+$//;
+ * h;s@[^ ]*@@;x;s@[ ].*@@;y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;G;s/\n//;
+ * /^\[SECTION\]/,/^\[/ {; /^[A-Z]/ { s/^\([^ ]\+\)[ ]*\(.*\)$/SH_\1="\2"/; p; }; };
+ * d;'`
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/getopt.h"
+#include "ucw/conf-internal.h"
+#include "ucw/clists.h"
+#include "ucw/mempool.h"
+#include "ucw/chartype.h"
+#include "ucw/bbuf.h"
+#include "ucw/string.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <alloca.h>
+
+static void
+help(void)
+{
+ fputs("\n\
+Usage: config [-C<configfile>] [-S<section>.<option>=<value>] <sections>\n\
+\n\
+<sections>\t<section>[;<sections>]\n\
+<section>\t[!]<name>{[<items>]}\n\
+<items>\t\t[-]<item>[;<items>]\n\
+<item>\t\t<static> | <array> | <list>\n\
+<static>\t<type><name>[=<value>]\n\
+<list>\t\t@<name>{[<items>]}\n\
+<array>\t\t<type><name><left-bracket>[<number>]<right-bracket>\n\
+<value>\t\t[a-zA-Z0-9.-/]* | 'string without single quotes'<value> | \"c-like string\"<value>\n\
+\n\
+Types:\n\
+<empty>\t\tString\n\
+#\t\t32-bit integer\n\
+##\t\t64-bit integer\n\
+$\t\tFloating point number\n\
+\n\
+Modifiers:\n\
+!\t\tReport unknown items as errors\n\
+-\t\tDo not dump item's value\n\
+", stderr);
+ exit(1);
+}
+
+union value {
+ void *v_ptr;
+ int v_int;
+ u64 v_u64;
+ double v_double;
+ clist list;
+};
+
+#define FLAG_HIDE 0x1
+#define FLAG_NO_UNKNOWN 0x2
+
+struct item {
+ cnode node;
+ uns flags;
+ struct cf_item cf;
+ union value value;
+ uns index;
+};
+
+struct section {
+ struct item item;
+ clist list;
+ uns count;
+ uns size;
+};
+
+static struct mempool *pool;
+static clist sections;
+static byte *pos;
+
+static void
+parse_white(void)
+{
+ while (Cspace(*pos))
+ pos++;
+}
+
+static void
+parse_char(byte c)
+{
+ if (*pos++ != c)
+ die("Missing '%c'", c);
+}
+
+static byte *
+parse_name(void)
+{
+ byte *name = pos;
+ while (Cword(*pos))
+ pos++;
+ uns len = pos - name;
+ if (!len)
+ die("Expected item/section name");
+ byte *buf = mp_alloc(pool, len + 1);
+ memcpy(buf, name, len);
+ buf[len] = 0;
+ return buf;
+}
+
+static void
+parse_section(struct section *section)
+{
+#define TRY(x) do{byte *_err=(x); if (_err) die(_err); }while(0)
+ for (uns sep = 0; ; sep = 1)
+ {
+ parse_white();
+ if (!*pos || *pos == '}')
+ break;
+ if (sep)
+ parse_char(';');
+ parse_white();
+
+ struct item *item;
+
+ if (*pos == '@')
+ {
+ pos++;
+ struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ sec->size = sizeof(cnode);
+ clist_init(&sec->list);
+ item = &sec->item;
+ item->cf.name = parse_name();
+ item->cf.cls = CC_LIST;
+ item->cf.number = 1;
+ parse_white();
+ parse_char('{');
+ parse_section(sec);
+ parse_char('}');
+ }
+ else
+ {
+ item = mp_alloc_zero(pool, sizeof(*item));
+ if (*pos == '-')
+ {
+ item->flags |= FLAG_HIDE;
+ pos++;
+ }
+ item->cf.cls = CC_STATIC;
+ item->cf.number = 1;
+ switch (*pos)
+ {
+ case '#':
+ if (*++pos == '#')
+ {
+ pos++;
+ item->cf.type = CT_U64;
+ }
+ else
+ item->cf.type = CT_INT;
+ break;
+ case '$':
+ pos++;
+ item->cf.type = CT_DOUBLE;
+ break;
+ default:
+ if (!Cword(*pos))
+ die("Invalid type syntax");
+ item->cf.type = CT_STRING;
+ break;
+ }
+ parse_white();
+ item->cf.name = parse_name();
+ parse_white();
+ if (*pos == '[')
+ {
+ pos++;
+ parse_white();
+ item->cf.cls = CC_DYNAMIC;
+ byte *num = pos;
+ while (*pos && *pos != ']')
+ pos++;
+ if (!*pos)
+ die("Missing ']'");
+ *pos++ = 0;
+ if (!*num)
+ item->cf.number = CF_ANY_NUM;
+ else
+ {
+ int inum;
+ TRY(cf_parse_int(num, &inum));
+ if (!inum)
+ die("Invalid array length");
+ item->cf.number = inum;
+ }
+ parse_white();
+ }
+ if (*pos == '=')
+ {
+ pos++;
+ parse_white();
+ if (section->item.cf.cls == CC_LIST)
+ die("List items can not have default values");
+ if (item->cf.cls == CC_DYNAMIC)
+ die("Arrays can not have default values");
+ byte *def = pos, *d = def;
+ while (*pos != ';' && *pos != '}' && !Cspace(*pos))
+ {
+ if (*pos == '\'')
+ {
+ pos++;
+ while (*pos != '\'')
+ {
+ if (!*pos)
+ die("Unterminated string");
+ *d++ = *pos++;
+ }
+ pos++;
+ }
+ else if (*pos == '"')
+ {
+ pos++;
+ byte *start = d;
+ uns esc = 0;
+ while (*pos != '"' || esc)
+ {
+ if (!*pos)
+ die("Unterminated string");
+ if (*pos == '\\')
+ esc ^= 1;
+ else
+ esc = 0;
+ *d++ = *pos++;
+ }
+ pos++;
+ *d = 0;
+ d = str_unesc(start, start);
+ }
+ else
+ *d++ = *pos++;
+ }
+ uns len = d - def;
+ byte *buf = mp_alloc(pool, len + 1);
+ memcpy(buf, def, len);
+ buf[len] = 0;
+ switch (item->cf.type)
+ {
+ case CT_STRING:
+ item->value.v_ptr = buf;
+ break;
+ case CT_INT:
+ TRY(cf_parse_int(buf, &item->value.v_int));
+ break;
+ case CT_U64:
+ TRY(cf_parse_u64(buf, &item->value.v_u64));
+ break;
+ case CT_DOUBLE:
+ TRY(cf_parse_double(buf, &item->value.v_double));
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+ }
+ if (section->item.cf.cls == CC_LIST)
+ {
+ item->cf.ptr = (void *)(uintptr_t)section->size;
+ section->size += sizeof(union value);
+ }
+ else
+ item->cf.ptr = &item->value;
+ clist_add_tail(§ion->list, &item->node);
+ section->count++;
+ }
+#undef TRY
+}
+
+static void
+parse_outer(void)
+{
+ for (uns sep = 0; ; sep = 1)
+ {
+ parse_white();
+ if (!*pos)
+ break;
+ if (sep)
+ parse_char(';');
+ parse_white();
+ struct section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ if (*pos == '!')
+ {
+ pos++;
+ sec->item.flags |= FLAG_NO_UNKNOWN;
+ }
+ sec->item.cf.name = parse_name();
+ parse_white();
+ parse_char('{');
+ clist_add_tail(§ions, &sec->item.node);
+ clist_init(&sec->list);
+ parse_section(sec);
+ parse_char('}');
+ }
+}
+
+static struct cf_section *
+generate_section(struct section *section)
+{
+ struct cf_section *sec = mp_alloc_zero(pool, sizeof(*sec));
+ if (section->item.cf.cls == CC_LIST)
+ sec->size = section->size;
+ struct cf_item *c = sec->cfg = mp_alloc_zero(pool, sizeof(struct cf_item) * (section->count + 1));
+ CLIST_FOR_EACH(struct item *, item, section->list)
+ {
+ *c = item->cf;
+ if (c->cls == CC_LIST)
+ c->u.sec = generate_section((struct section *)item);
+ c++;
+ }
+ c->cls = CC_END;
+ return sec;
+}
+
+static bb_t path;
+
+static void
+dump_value(uns array, struct item *item, void *v)
+{
+ byte buf[128], *value = buf;
+ if (!array)
+ printf("CF_%s_%s='", path.ptr, item->cf.name);
+ else
+ printf("CF_%s_%s[%u]='", path.ptr, item->cf.name, ++item->index);
+ switch (item->cf.type)
+ {
+ case CT_INT:
+ sprintf(buf, "%d", *(int *)v);
+ break;
+ case CT_U64:
+ sprintf(buf, "%llu", (long long) *(u64 *)v);
+ break;
+ case CT_DOUBLE:
+ sprintf(buf, "%g", *(double *)v);
+ break;
+ case CT_STRING:
+ if (*(byte **)v)
+ value = *(byte **)v;
+ else
+ *value = 0;
+ break;
+ default:
+ ASSERT(0);
+ }
+ while (*value) {
+ if (*value == '\'')
+ printf("'\\''");
+ else
+ putchar(*value);
+ value++;
+ }
+ printf("'\n");
+}
+
+static void
+dump_item(struct item *item, void *ptr, uns path_len)
+{
+ if (item->flags & FLAG_HIDE)
+ return;
+ byte *val = (byte *)((uintptr_t)ptr + (uintptr_t)item->cf.ptr);
+ if (item->cf.cls == CC_LIST)
+ {
+ uns len = strlen(item->cf.name);
+ bb_grow(&path, path_len + len + 1);
+ path.ptr[path_len] = '_';
+ memcpy(path.ptr + path_len + 1, item->cf.name, len);
+ CLIST_FOR_EACH(cnode *, ptr2, *(clist *)val)
+ CLIST_FOR_EACH(struct item *, item2, ((struct section *)item)->list)
+ dump_item(item2, ptr2, path_len + len + 1);
+ }
+ else
+ {
+ bb_grow(&path, path_len + 1)[path_len] = 0;
+ if (item->cf.cls == CC_STATIC)
+ dump_value(!!ptr, item, val);
+ else
+ {
+ val = *(void **)val;
+ uns len = DARY_LEN(val);
+ uns size = cf_type_size(item->cf.type, NULL);
+ for (uns i = 0; i < len; i++, val += size)
+ dump_value(1, item, val);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ log_init("config");
+ if (argc < 2)
+ help();
+ pos = argv[argc - 1];
+ argv[argc - 1] = NULL;
+
+ pool = mp_new(0x1000);
+ clist_init(§ions);
+ parse_outer();
+ CLIST_FOR_EACH(struct section *, sec, sections)
+ cf_declare_section(sec->item.cf.name, generate_section(sec), !(sec->item.flags & FLAG_NO_UNKNOWN));
+
+ if (cf_getopt(argc - 1, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) != -1)
+ help();
+
+ bb_init(&path);
+ CLIST_FOR_EACH(struct section *, section, sections)
+ {
+ uns len = strlen(section->item.cf.name);
+ memcpy(bb_grow(&path, len), section->item.cf.name, len);
+ CLIST_FOR_EACH(struct item *, item, section->list)
+ dump_item(item, NULL, len);
+ }
+ bb_done(&path);
+
+ return 0;
+}
+
--- /dev/null
+# Tests for configuration parser
+
+Run: ../obj/ucw/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
+Out: CF_sec1_int1='23'
+ CF_sec1_long1='1234567812345678'
+ CF_sec1_str2='s2'
+ CF_sec1_int2='123'
+ CF_sec1_long2='4321'
+ CF_sec1_int3='16'
+ CF_sec1_int4='0'
+ CF_sec1_dbl1='1.1'
+ CF_sec1_dbl2='0'
+ CF_sec2_str3=''
+
+Run: ../obj/ucw/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
+Out: CF_sec1_list1_int1[1]='2'
+ CF_sec1_list1_str1[1]='a2'
+ CF_sec1_list1_int1[2]='3'
+ CF_sec1_list1_str1[2]='a3'
+
+Run: ../obj/ucw/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
+Out: CF_sec1_ar1[1]='a'
+ CF_sec1_ar1[2]='b'
+ CF_sec1_ar1[3]='c'
+ CF_sec1_ar2[1]='1'
+ CF_sec1_ar2[2]='2'
+ CF_sec1_ar3[1]='1.1'
+
+Run: ../obj/ucw/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
+Out: CF_sec1_list1_str1[1]='1'
+ CF_sec1_list1_list2_str2[1]='a'
+ CF_sec1_list1_list2_str2[2]='b'
+ CF_sec1_list1_list2_str2[3]='c'
+ CF_sec1_list1_str1[2]='2'
+ CF_sec1_list1_list2_str2[4]='d'
+ CF_sec1_list1_list2_str2[5]='e'
+
+Run: ../obj/ucw/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
+Out: CF_sec_str='ab"cd\e'\''fg'
--- /dev/null
+# The UCW Library -- Shell Functions
+# (c) 2005 Martin Mares <mj@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+UCW_CF=
+while [ "${1:0:2}" = "-C" -o "${1:0:2}" = "-S" ] ; do
+ if [ -z "${1:2:1}" ] ; then
+ UCW_CF="$UCW_CF $1 $2"
+ shift 2
+ else
+ UCW_CF="$UCW_CF $1"
+ shift 1
+ fi
+done
+
+function log # msg
+{
+ bin/logger $UCW_PROGNAME I "$1"
+}
+
+function errlog # msg
+{
+ bin/logger $UCW_PROGNAME E "$1"
+}
+
+function warnlog # msg
+{
+ bin/logger $UCW_PROGNAME E "$1"
+}
+
+function die # msg
+{
+ bin/logger $UCW_PROGNAME ! "$1"
+ exit 1
+}
+
+function parse-config # section vars...
+{
+ eval `bin/config$UCW_CF "$@"`
+}
--- /dev/null
+/*
+ * UCW Library Utilities -- A Simple Logger for use in shell scripts
+ *
+ * (c) 2001--2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/log.h"
+
+#include <stdio.h>
+#include <string.h>
+
+int
+main(int argc, char **argv)
+{
+ byte buf[1024], *c;
+
+ log_init("logger");
+ if (argc < 3 || argc > 4 || strlen(argv[2]) != 1)
+ die("Usage: logger [<logname>:]<progname> <level> [<text>]");
+ if (c = strchr(argv[1], ':'))
+ {
+ *c++ = 0;
+ log_init(c);
+ log_file(argv[1]);
+ }
+ else
+ log_init(argv[1]);
+
+ uns level = 0;
+ while (level < L_MAX && LS_LEVEL_LETTER(level) != argv[2][0])
+ level++;
+ if (level >= L_MAX)
+ die("Unknown logging level `%s'", argv[2]);
+
+ if (argc > 3)
+ msg(level, argv[3]);
+ else
+ while (fgets(buf, sizeof(buf), stdin))
+ {
+ c = strchr(buf, '\n');
+ if (c)
+ *c = 0;
+ msg(level, buf);
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Catching of signals and calling callback functions
+ *
+ * (c) 2004, Robert Spalek <robert@ucw.cz>
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+#include "ucw/threads.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+static int sig_handler_nest[NSIG];
+static struct sigaction sig_handler_old[NSIG];
+
+static void
+signal_handler_internal(int sig)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ if (!ctx->signal_handlers || !ctx->signal_handlers[sig] || ctx->signal_handlers[sig](sig))
+ abort();
+}
+
+void
+handle_signal(int signum)
+{
+ ucwlib_lock();
+ if (!sig_handler_nest[signum]++)
+ {
+ struct sigaction act;
+ bzero(&act, sizeof(act));
+ act.sa_handler = signal_handler_internal;
+ act.sa_flags = SA_NODEFER;
+ if (sigaction(signum, &act, &sig_handler_old[signum]) < 0)
+ die("sigaction: %m");
+ }
+ ucwlib_unlock();
+}
+
+void
+unhandle_signal(int signum)
+{
+ ucwlib_lock();
+ ASSERT(sig_handler_nest[signum]);
+ if (!--sig_handler_nest[signum])
+ {
+ if (sigaction(signum, &sig_handler_old[signum], NULL) < 0)
+ die("sigaction: %m");
+ }
+ ucwlib_unlock();
+}
+
+ucw_sighandler_t
+set_signal_handler(int signum, ucw_sighandler_t newh)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ if (!ctx->signal_handlers)
+ ctx->signal_handlers = xmalloc_zero(NSIG * sizeof(ucw_sighandler_t));
+ ucw_sighandler_t old = ctx->signal_handlers[signum];
+ ctx->signal_handlers[signum] = newh;
+ return old;
+}
--- /dev/null
+/*
+ * UCW Library -- Linked Lists of Simple Items
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+#include "ucw/conf.h"
+#include "ucw/simple-lists.h"
+
+simp_node *
+simp_append(struct mempool *mp, clist *l)
+{
+ simp_node *n = mp_alloc_fast(mp, sizeof(*n));
+ clist_add_tail(l, &n->n);
+ return n;
+}
+
+simp2_node *
+simp2_append(struct mempool *mp, clist *l)
+{
+ simp2_node *n = mp_alloc_fast(mp, sizeof(*n));
+ clist_add_tail(l, &n->n);
+ return n;
+}
+
+/* Configuration sections for common lists */
+
+struct cf_section cf_string_list_config = {
+ CF_TYPE(simp_node),
+ CF_ITEMS {
+ CF_STRING("String", PTR_TO(simp_node, s)),
+ CF_END
+ }
+};
+
+struct cf_section cf_2string_list_config = {
+ CF_TYPE(simp2_node),
+ CF_ITEMS {
+ CF_STRING("Src", PTR_TO(simp2_node, s1)),
+ CF_STRING("Dest", PTR_TO(simp2_node, s2)),
+ CF_END
+ }
+};
--- /dev/null
+/*
+ * UCW Library -- Linked Lists of Simple Items
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SIMPLE_LISTS_H
+#define _UCW_SIMPLE_LISTS_H
+
+#include "ucw/clists.h"
+
+/***
+ * To simplify very common usage of circular linked links, whose nodes can hold only one or two trivial values,
+ * we define some generic node types, called the simple nodes.
+ *
+ * To avoid some type casts, values in simple nodes are defined as unions of most frequent types.
+ ***/
+
+/**
+ * Simple node with one value.
+ **/
+typedef struct simp_node {
+ cnode n;
+ union {
+ char *s;
+ void *p;
+ int i;
+ uns u;
+ };
+} simp_node;
+
+/**
+ * Simple node with two values.
+ **/
+typedef struct simp2_node {
+ cnode n;
+ union {
+ char *s1;
+ void *p1;
+ int i1;
+ uns u1;
+ };
+ union {
+ char *s2;
+ void *p2;
+ int i2;
+ uns u2;
+ };
+} simp2_node;
+
+struct mempool;
+
+/**
+ * Allocate a new one-value node on memory pool @mp and insert it to @l. The value is undefined and should be changed afterwards.
+ **/
+simp_node *simp_append(struct mempool *mp, clist *l);
+
+/**
+ * Allocate a new two-value node on memory pool @mp and insert it to @l. The values are undefined and should be changed afterwards.
+ **/
+simp2_node *simp2_append(struct mempool *mp, clist *l);
+
+/* Configuration sections */
+
+/**
+ * Default definition of the configuration section with one-value string node. Identifier of the value is `String`.
+ **/
+extern struct cf_section cf_string_list_config;
+
+/**
+ * Default definition of the configuration section with two-value string node. Identifiers of the values are `Src` and `Dest`.
+ **/
+extern struct cf_section cf_2string_list_config;
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Single-Linked Lists
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/slists.h"
+
+static inline snode *
+slist_raw_prev(slist *l, snode *n)
+{
+ snode *m = &l->head;
+ while (m)
+ {
+ if (n == m->next)
+ return m;
+ m = m->next;
+ }
+ ASSERT(0);
+}
+
+void *
+slist_prev(slist *l, snode *n)
+{
+ snode *p = slist_raw_prev(l, n);
+ return (p == &l->head) ? NULL : p;
+}
+
+void
+slist_insert_before(slist *l, snode *what, snode *before)
+{
+ what->next = before;
+ slist_raw_prev(l, before)->next = what;
+}
+
+void
+slist_remove(slist *l, snode *n)
+{
+ if (n)
+ {
+ snode *p = slist_raw_prev(l, n);
+ slist_remove_after(l, p);
+ }
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <alloca.h>
+
+int main(void)
+{
+ slist l;
+
+ struct x {
+ snode n;
+ int val;
+ };
+
+ slist_init(&l);
+ for (int i=1; i<=10; i++)
+ {
+ struct x *x = alloca(sizeof(*x));
+ x->val = i;
+ if (i % 2)
+ slist_add_head(&l, &x->n);
+ else
+ slist_add_tail(&l, &x->n);
+ }
+
+ struct x *x, *prev;
+ SLIST_WALK_DELSAFE(x, l, prev)
+ if (x->val == 5)
+ slist_remove_after(&l, &prev->n);
+ else if (x->val == 6)
+ slist_remove(&l, &x->n);
+ SLIST_FOR_EACH(struct x *, x, l)
+ printf("%d/", x->val);
+ putchar('\n');
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Single-Linked Lists
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SLISTS_H
+#define _UCW_SLISTS_H
+
+/**
+ * Common header for list nodes.
+ **/
+typedef struct snode {
+ struct snode *next;
+} snode;
+
+/**
+ * Single-linked list.
+ **/
+typedef struct slist {
+ struct snode head, *last;
+} slist;
+
+/**
+ * Initialize a new single-linked list. Must be called before any other function.
+ **/
+static inline void slist_init(slist *l)
+{
+ l->head.next = l->last = NULL;
+}
+
+/**
+ * Return the first node of @l or NULL if @l is empty.
+ **/
+static inline void *slist_head(slist *l)
+{
+ return l->head.next;
+}
+
+/**
+ * Return the last node of @l or NULL if @l is empty.
+ **/
+static inline void *slist_tail(slist *l)
+{
+ return l->last;
+}
+
+/**
+ * Find the next node to @n or NULL if @n is the last one.
+ **/
+static inline void *slist_next(snode *n)
+{
+ return n->next;
+}
+
+/**
+ * Return a non-zero value iff @l is empty.
+ **/
+static inline int slist_empty(slist *l)
+{
+ return !l->head.next;
+}
+
+/**
+ * Insert a new node in front of all other nodes.
+ **/
+static inline void slist_add_head(slist *l, snode *n)
+{
+ n->next = l->head.next;
+ l->head.next = n;
+ if (!l->last)
+ l->last = n;
+}
+
+/**
+ * Insert a new node after all other nodes.
+ **/
+static inline void slist_add_tail(slist *l, snode *n)
+{
+ if (l->last)
+ l->last->next = n;
+ else
+ l->head.next = n;
+ n->next = NULL;
+ l->last = n;
+}
+
+/**
+ * Insert a new node just after the node @after. To insert a new head, use @slist_add_head() instead.
+ **/
+static inline void slist_insert_after(slist *l, snode *what, snode *after)
+{
+ what->next = after->next;
+ after->next = what;
+ if (!what->next)
+ l->last = what;
+}
+
+/**
+ * Quickly remove the node next to @after. The node may not exist.
+ **/
+static inline void slist_remove_after(slist *l, snode *after)
+{
+ snode *n = after->next;
+ if (n)
+ {
+ after->next = n->next;
+ if (l->last == n)
+ l->last = (after == &l->head) ? NULL : after;
+ }
+}
+
+/**
+ * Remove the first node in @l. The list can be empty.
+ **/
+static inline void slist_remove_head(slist *l)
+{
+ slist_remove_after(l, &l->head);
+}
+
+/* Loops */
+
+/**
+ * Loop over all nodes in the @list and perform the next C statement on them. The current node is stored in @n which must be defined before as pointer to any type.
+ * The list should not be changed during this loop command.
+ **/
+#define SLIST_WALK(n,list) for(n=(void*)(list).head.next; (n); (n)=(void*)((snode*)(n))->next)
+
+/**
+ * Same as @SLIST_WALK(), but allows removal of the current node. This macro requires one more variable to store the pointer to the previous node (useful for @slist_remove_after()).
+ **/
+#define SLIST_WALK_DELSAFE(n,list,prev) for((prev)=(void*)&(list).head; (n)=(void*)((snode*)prev)->next; (prev)=(((snode*)(prev))->next==(snode*)(n) ? (void*)(n) : (void*)(prev)))
+
+/**
+ * Same as @SLIST_WALK(), but it defines the variable for the current node in place. @type should be a pointer type.
+ **/
+#define SLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; n; n=(void*)((snode*)(n))->next)
+
+/* Non-trivial functions */
+
+/**
+ * Find the previous node to @n or NULL if @n is the first one. Beware linear time complexity.
+ **/
+void *slist_prev(slist *l, snode *n);
+
+/**
+ * Insert a new node just before the node @before. To insert a new tail, use @slist_add_tail(). Beware linear time complexity.
+ **/
+void slist_insert_before(slist *l, snode *what, snode *before);
+
+/**
+ * Remove node @n. Beware linear time complexity.
+ **/
+void slist_remove(slist *l, snode *n);
+
+/**
+ * Remove the last node in @l. The list can be empty.
+ **/
+static inline void slist_remove_tail(slist *l)
+{
+ slist_remove(l, l->last);
+}
+
+/**
+ * Compute the number of nodes in @l. Beware linear time complexity.
+ **/
+static inline uns slist_size(slist *l)
+{
+ uns i = 0;
+ SLIST_FOR_EACH(snode *, n, *l)
+ i++;
+ return i;
+}
+
+#endif
--- /dev/null
+# Test for slists module
+
+Run: ../obj/ucw/slists-t
+Out: 9/7/3/1/2/4/8/10/
--- /dev/null
+# Makefile for the UCW Sorter (c) 2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=ucw/sorter
+
+LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array)
+LIBUCW_SORTER_INCLUDES=$(addprefix sorter/, array.h array-simple.h common.h s-fixint.h \
+ s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h)
+LIBUCW_INCLUDES+=$(LIBUCW_SORTER_INCLUDES)
+
+ifdef CONFIG_DEBUG_TOOLS
+PROGS+=$(o)/ucw/sorter/sort-test
+endif
+
+$(o)/ucw/sorter/sort-test: $(o)/ucw/sorter/sort-test.o $(LIBUCW)
+
+INSTALL_TARGETS+=install-ucw-sorter
+install-ucw-sorter:
+ install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/sorter
+ install -m 644 $(addprefix run/include/ucw/,$(LIBUCW_SORTER_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/sorter/
+
+.PHONY: install-ucw-sorter
--- /dev/null
+Cleanups:
+o Log messages should show both original and new size of the data. The speed
+ should be probably calculated from the former.
+o Buffer sizing in shep-export.
+
+Improvements:
+o When quicksorting a large input (especially in threaded case), invest more
+ time to picking a good pivot.
+o Overlay presorter I/O with internal sorting.
+
+Users of ucw/sorter/array.h which might use radix-sorting:
+indexer/chewer.c
+indexer/lexfreq.c
+indexer/mkgraph.c
+indexer/reftexts.c
--- /dev/null
+/*
+ * UCW Library -- Universal Simple Array Sorter
+ *
+ * (c) 2003--2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates an array sorter
+ * with the parameters given.
+ *
+ * You might wonder why the heck do we implement our own array sorter
+ * instead of using qsort(). The primary reason is that qsort handles
+ * only continuous arrays, but we need to sort array-like data structures
+ * where the only way to access elements is by using an indexing macro.
+ * Besides that, we are more than 2 times faster.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_ELT(i) returns the key of i-th element; if this macro is not
+ * defined, the function gets a pointer to an array to be sorted
+ * ASORT_LT(x,y) x < y for ASORT_TYPE (default: "x<y")
+ * ASORT_SWAP(i,j) swap i-th and j-th element (default: assume _ELT
+ * is an l-value and swap just the keys)
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_EXTRA_ARGS extra arguments for the sort function (they are always
+ * visible in all the macros supplied above), starts with comma
+ *
+ * After including this file, a function ASORT_PREFIX(sort)(uns array_size)
+ * or ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns array_size) [if ASORT_ELT
+ * is not defined] is declared and all parameter macros are automatically
+ * undef'd.
+ */
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_EXTRA_ARGS
+#define ASORT_EXTRA_ARGS
+#endif
+
+#ifndef ASORT_ELT
+#define ASORT_ARRAY_ARG ASORT_KEY_TYPE *array,
+#define ASORT_ELT(i) array[i]
+#else
+#define ASORT_ARRAY_ARG
+#endif
+
+/**
+ * The generated sorting function. If `ASORT_ELT` macro is not provided, the
+ * @ASORT_ARRAY_ARG is equal to `ASORT_KEY_TYPE *array` and is the array to be
+ * sorted. If the macro is provided, this parameter is omitted. In that case,
+ * you can sort global variables or pass your structure by @ASORT_EXTRA_ARGS.
+ **/
+static void ASORT_PREFIX(sort)(ASORT_ARRAY_ARG uns array_size ASORT_EXTRA_ARGS)
+{
+ struct stk { int l, r; } stack[8*sizeof(uns)];
+ int l, r, left, right, m;
+ uns sp = 0;
+ ASORT_KEY_TYPE pivot;
+
+ if (array_size <= 1)
+ return;
+
+ /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */
+
+ left = 0;
+ right = array_size - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m)))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ }
+ pivot = ASORT_ELT(m);
+ do
+ {
+ while (ASORT_LT(ASORT_ELT(l), pivot))
+ l++;
+ while (ASORT_LT(pivot, ASORT_ELT(r)))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(array_size, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m)))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)array_size; m++)
+ {
+ l=m;
+ while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1)))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+#undef ASORT_PREFIX
+#undef ASORT_KEY_TYPE
+#undef ASORT_ELT
+#undef ASORT_LT
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef ASORT_EXTRA_ARGS
+#undef ASORT_ARRAY_ARG
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/sorter/common.h"
+
+#include <string.h>
+#include <alloca.h>
+
+#define ASORT_MIN_SHIFT 2
+
+#define ASORT_TRACE(x...) ASORT_XTRACE(1, x)
+#define ASORT_XTRACE(level, x...) do { if (sorter_trace_array >= level) msg(L_DEBUG, x); } while(0)
+
+static void
+asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ // swap_output == 0 if result should be returned in `array', otherwise in `buffer'
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+
+#if 0
+ static int reported[64];
+ if (!reported[hash_bits]++)
+#endif
+ DBG(">>> n=%u h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output);
+
+ bzero(cnt, sizeof(cnt));
+ ctx->radix_count(array, num_elts, cnt, shift);
+
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ ctx->radix_split(array, buffer, num_elts, cnt, shift);
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (n < ctx->radix_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ ctx->quicksort(buffer, n);
+ if (!swapped_output)
+ memcpy(array, buffer, n * ctx->elt_size);
+ }
+ else
+ asort_radix(ctx, buffer, array, n, shift, !swapped_output);
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ pos = cnt[i];
+ }
+}
+
+#ifdef CONFIG_UCW_THREADS
+
+#include "ucw/threads.h"
+#include "ucw/workqueue.h"
+#include "ucw/eltpool.h"
+
+static uns asort_threads_use_count;
+static uns asort_threads_ready;
+static struct worker_pool asort_thread_pool;
+
+static uns
+rs_estimate_stack(void)
+{
+ // Stack space needed by the recursive radix-sorter
+ uns ctrsize = sizeof(uns) * (1 << CONFIG_UCW_RADIX_SORTER_BITS);
+ uns maxdepth = (64 / CONFIG_UCW_RADIX_SORTER_BITS) + 1;
+ return ctrsize * maxdepth;
+}
+
+void
+asort_start_threads(uns run)
+{
+ ucwlib_lock();
+ asort_threads_use_count++;
+ if (run && !asort_threads_ready)
+ {
+ // XXX: If somebody overrides the radix-sorter parameters to insane values,
+ // he also should override the stack size to insane values.
+ asort_thread_pool.stack_size = ucwlib_thread_stack_size + rs_estimate_stack();
+ asort_thread_pool.num_threads = sorter_threads;
+ ASORT_TRACE("Initializing thread pool (%d threads, %dK stack)", sorter_threads, asort_thread_pool.stack_size >> 10);
+ worker_pool_init(&asort_thread_pool);
+ asort_threads_ready = 1;
+ }
+ ucwlib_unlock();
+}
+
+void
+asort_stop_threads(void)
+{
+ ucwlib_lock();
+ if (!--asort_threads_use_count && asort_threads_ready)
+ {
+ ASORT_TRACE("Shutting down thread pool");
+ worker_pool_cleanup(&asort_thread_pool);
+ asort_threads_ready = 0;
+ }
+ ucwlib_unlock();
+}
+
+struct qs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array;
+ uns num_elts;
+ int left, right;
+#define LR_UNDEF -100
+};
+
+static void
+qs_handle_work(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct qs_work *w = (struct qs_work *) ww;
+ struct asort_context *ctx = w->ctx;
+
+ DBG("Thread %d: got %u elts", thr->id, w->num_elts);
+ if (w->num_elts < ctx->thread_threshold)
+ {
+ ctx->quicksort(w->array, w->num_elts);
+ w->left = w->right = LR_UNDEF;
+ }
+ else
+ ctx->quicksplit(w->array, w->num_elts, &w->left, &w->right);
+ DBG("Thread %d: returning l=%u r=%u", thr->id, w->left, w->right);
+}
+
+static struct qs_work *
+qs_alloc_work(struct asort_context *ctx)
+{
+ struct qs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = qs_handle_work;
+ w->ctx = ctx;
+ return w;
+}
+
+static void
+threaded_quicksort(struct asort_context *ctx)
+{
+ struct work_queue q;
+ struct qs_work *v, *w;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+ ctx->eltpool = ep_new(sizeof(struct qs_work), 1000);
+
+ w = qs_alloc_work(ctx);
+ w->array = ctx->array;
+ w->num_elts = ctx->num_elts;
+ work_submit(&q, &w->w);
+
+ while (v = (struct qs_work *) work_wait(&q))
+ {
+ if (v->left != LR_UNDEF)
+ {
+ if (v->right > 0)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array;
+ w->num_elts = v->right + 1;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ if (v->left < (int)v->num_elts - 1)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array + v->left * ctx->elt_size;
+ w->num_elts = v->num_elts - v->left;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ }
+ ep_free(ctx->eltpool, v);
+ }
+
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+struct rs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array, *buffer; // Like asort_radix().
+ uns num_elts;
+ uns shift;
+ uns swap_output;
+ uns cnt[0];
+};
+
+static void
+rs_count(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Counting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_count(w->array, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Counting done", thr->id);
+}
+
+static void
+rs_split(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Splitting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_split(w->array, w->buffer, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Splitting done", thr->id);
+}
+
+static void
+rs_finish(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ if (thr)
+ DBG("Thread %d: Finishing %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ if (w->shift < ASORT_MIN_SHIFT || w->num_elts < w->ctx->radix_threshold)
+ {
+ w->ctx->quicksort(w->array, w->num_elts);
+ if (w->swap_output)
+ memcpy(w->buffer, w->array, w->num_elts * w->ctx->elt_size);
+ }
+ else
+ asort_radix(w->ctx, w->array, w->buffer, w->num_elts, w->shift, w->swap_output);
+ if (thr)
+ DBG("Thread %d: Finishing done", thr->id);
+}
+
+static void
+rs_wait_small(struct asort_context *ctx)
+{
+ struct rs_work *w;
+
+ while (w = (struct rs_work *) work_wait(ctx->rs_work_queue))
+ {
+ DBG("Reaping small chunk of %u items", w->num_elts);
+ ep_free(ctx->eltpool, w);
+ }
+}
+
+static void
+rs_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+ uns blksize = num_elts / sorter_threads;
+ DBG(">>> n=%u h=%d s=%d blk=%u sw=%d", num_elts, hash_bits, shift, blksize, swapped_output);
+
+ // If there are any small chunks in progress, wait for them to finish
+ rs_wait_small(ctx);
+
+ // Start parallel counting
+ void *iptr = array;
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.priority = 0;
+ w->w.go = rs_count;
+ w->ctx = ctx;
+ w->array = iptr;
+ w->buffer = buffer;
+ w->num_elts = blksize;
+ if (i == sorter_threads-1)
+ w->num_elts += num_elts % sorter_threads;
+ w->shift = shift;
+ iptr += w->num_elts * ctx->elt_size;
+ bzero(w->cnt, sizeof(uns) * buckets);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+
+ // Get bucket sizes from the counts
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = (struct rs_work *) work_wait(ctx->rs_work_queue);
+ ASSERT(w);
+ for (uns j=0; j<buckets; j++)
+ cnt[j] += w->cnt[j];
+ }
+
+ // Calculate bucket starts
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ // Start parallel splitting
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.go = rs_split;
+ for (uns j=0; j<buckets; j++)
+ {
+ uns k = w->cnt[j];
+ w->cnt[j] = cnt[j];
+ cnt[j] += k;
+ }
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ ASSERT(cnt[buckets-1] == num_elts);
+
+ // Wait for splits to finish
+ while (work_wait(ctx->rs_work_queue))
+ ;
+
+ // Recurse on buckets
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (!n)
+ continue;
+ if (n < ctx->thread_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ struct rs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = rs_finish;
+ w->ctx = ctx;
+ w->array = buffer;
+ w->buffer = array;
+ w->num_elts = n;
+ w->shift = shift;
+ w->swap_output = !swapped_output;
+ if (n < ctx->thread_chunk)
+ {
+ DBG("Sorting block %u+%u inline", pos, n);
+ rs_finish(NULL, &w->w);
+ ep_free(ctx->eltpool, w);
+ }
+ else
+ {
+ DBG("Scheduling block %u+%u", pos, n);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ }
+ else
+ rs_radix(ctx, buffer, array, n, shift, !swapped_output);
+ pos = cnt[i];
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ }
+}
+
+static void
+threaded_radixsort(struct asort_context *ctx, uns swap)
+{
+ struct work_queue q;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+
+ // Prepare work structures for counting and splitting.
+ // We use big_alloc(), because we want to avoid cacheline aliasing between threads.
+ ctx->rs_work_queue = &q;
+ ctx->rs_works = alloca(sizeof(struct rs_work *) * sorter_threads);
+ for (uns i=0; i<sorter_threads; i++)
+ ctx->rs_works[i] = big_alloc(sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Prepare a pool for all remaining small bits which will be sorted on background.
+ ctx->eltpool = ep_new(sizeof(struct rs_work), 1000);
+
+ // Do the big splitting
+ rs_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ for (uns i=0; i<sorter_threads; i++)
+ big_free(ctx->rs_works[i], sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Finish the small blocks
+ rs_wait_small(ctx);
+
+ ASSERT(!ctx->eltpool->num_allocated);
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+#else
+
+void asort_start_threads(uns run UNUSED) { }
+void asort_stop_threads(void) { }
+
+#endif
+
+static uns
+predict_swap(struct asort_context *ctx)
+{
+ uns bits = ctx->radix_bits;
+ uns elts = ctx->num_elts;
+ uns swap = 0;
+
+ while (elts >= ctx->radix_threshold && bits >= ASORT_MIN_SHIFT)
+ {
+ DBG("Predicting pass: %u elts, %d bits", elts, bits);
+ swap = !swap;
+ elts >>= ctx->radix_bits;
+ bits = MAX(bits, ctx->radix_bits) - ctx->radix_bits;
+ }
+ return swap;
+}
+
+void
+asort_run(struct asort_context *ctx)
+{
+ ctx->thread_threshold = MIN(sorter_thread_threshold / ctx->elt_size, ~0U);
+ ctx->thread_chunk = MIN(sorter_thread_chunk / ctx->elt_size, ~0U);
+ ctx->radix_threshold = MIN(sorter_radix_threshold / ctx->elt_size, ~0U);
+
+ ASORT_TRACE("Array-sorting %u items per %u bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits);
+ ASORT_XTRACE(2, "Limits: thread_threshold=%u, thread_chunk=%u, radix_threshold=%u",
+ ctx->thread_threshold, ctx->thread_chunk, ctx->radix_threshold);
+ uns allow_threads UNUSED = (sorter_threads > 1 &&
+ ctx->num_elts >= ctx->thread_threshold &&
+ !(sorter_debug & SORT_DEBUG_ASORT_NO_THREADS));
+
+ if (ctx->num_elts < ctx->radix_threshold ||
+ ctx->hash_bits <= ASORT_MIN_SHIFT ||
+ !ctx->radix_split ||
+ (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX))
+ {
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel quicksort");
+ threaded_quicksort(ctx);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential quicksort");
+ ctx->quicksort(ctx->array, ctx->num_elts);
+ }
+ }
+ else
+ {
+ uns swap = predict_swap(ctx);
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel radix-sort (swap=%d)", swap);
+ threaded_radixsort(ctx, swap);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential radix-sort (swap=%d)", swap);
+ asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ }
+ if (swap)
+ ctx->array = ctx->buffer;
+ }
+
+ ASORT_XTRACE(2, "Array-sort finished");
+}
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a generator of routines for sorting huge arrays, similar to the one
+ * in ucw/sorter/array-simple.h. It cannot handle discontiguous arrays, but it is able
+ * to employ radix-sorting if a monotone hash function is available and also
+ * use several threads in parallel on SMP systems (this assumes that all
+ * callbacks you provide are thread-safe).
+ *
+ * It is usually called internally by the generic shorter machinery, but
+ * you are free to use it explicitly if you need.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y")
+ * ASORT_HASH(x) a monotone hash function (safisfying hash(x) < hash(y) => x<y)
+ * ASORT_LONG_HASH hashes are 64-bit numbers (default is 32 bits)
+ *
+ * Fine-tuning parameters: (if you really insist)
+ *
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_RADIX_BITS how many bits of the hash functions are to be used at once for
+ * radix-sorting.
+ *
+ * After including this file, a function
+ * ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts [, ASORT_KEY_TYPE *buf, uns hash_bits])
+ * is declared and all parameter macros are automatically undef'd. Here `buf' is an
+ * auxiliary buffer of the same size as the input array, required whenever radix
+ * sorting should be used, and `hash_bits' is the number of significant bits returned
+ * by the hash function. If the buffer is specified, the sorting function returns either
+ * a pointer to the input array or to the buffer, depending on where the result is stored.
+ * If you do not use hashing, these parameters should be omitted.
+ */
+
+#include "ucw/sorter/common.h"
+
+#define Q(x) ASORT_PREFIX(x)
+
+typedef ASORT_KEY_TYPE Q(key);
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { Q(key) tmp = array[i]; array[i]=array[j]; array[j]=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_RADIX_BITS
+#define ASORT_RADIX_BITS CONFIG_UCW_RADIX_SORTER_BITS
+#endif
+#define ASORT_RADIX_MASK ((1 << (ASORT_RADIX_BITS)) - 1)
+
+/* QuickSort with optimizations a'la Sedgewick, inspired by qsort() from GNU libc. */
+
+static void Q(quicksort)(void *array_ptr, uns num_elts)
+{
+ Q(key) *array = array_ptr;
+ struct stk { int l, r; } stack[8*sizeof(uns)];
+ int l, r, left, right, m;
+ uns sp = 0;
+ Q(key) pivot;
+
+ if (num_elts <= 1)
+ return;
+
+ left = 0;
+ right = num_elts - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(num_elts, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(array[l], array[m]))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)num_elts; m++)
+ {
+ l=m;
+ while (ASORT_LT(array[m], array[l-1]))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+/* Just the splitting part of QuickSort */
+
+static void Q(quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp)
+{
+ Q(key) *array = array_ptr;
+ int l, r, m;
+ Q(key) pivot;
+
+ l = 0;
+ r = num_elts - 1;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ *leftp = l;
+ *rightp = r;
+}
+
+#ifdef ASORT_HASH
+
+static void Q(radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift)
+{
+ Q(key) *src = src_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RC(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ cnt[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ] ++; \
+ break; \
+
+#ifdef ASORT_LONG_HASH
+ RC(63); RC(62); RC(61); RC(60); RC(59); RC(58); RC(57); RC(56);
+ RC(55); RC(54); RC(53); RC(52); RC(51); RC(50); RC(49); RC(48);
+ RC(47); RC(46); RC(45); RC(44); RC(43); RC(42); RC(41); RC(40);
+ RC(39); RC(38); RC(37); RC(36); RC(35); RC(34); RC(33); RC(32);
+#endif
+ RC(31); RC(30); RC(29); RC(28); RC(27); RC(26); RC(25); RC(24);
+ RC(23); RC(22); RC(21); RC(20); RC(19); RC(18); RC(17); RC(16);
+ RC(15); RC(14); RC(13); RC(12); RC(11); RC(10); RC(9); RC(8);
+ RC(7); RC(6); RC(5); RC(4); RC(3); RC(2); RC(1); RC(0);
+ default:
+ ASSERT(0);
+ }
+#undef RC
+}
+
+static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift)
+{
+ Q(key) *src = src_ptr, *dest = dest_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RS(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ dest[ ptrs[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ]++ ] = src[i]; \
+ break;
+
+#ifdef ASORT_LONG_HASH
+ RS(63); RS(62); RS(61); RS(60); RS(59); RS(58); RS(57); RS(56);
+ RS(55); RS(54); RS(53); RS(52); RS(51); RS(50); RS(49); RS(48);
+ RS(47); RS(46); RS(45); RS(44); RS(43); RS(42); RS(41); RS(40);
+ RS(39); RS(38); RS(37); RS(36); RS(35); RS(34); RS(33); RS(32);
+#endif
+ RS(31); RS(30); RS(29); RS(28); RS(27); RS(26); RS(25); RS(24);
+ RS(23); RS(22); RS(21); RS(20); RS(19); RS(18); RS(17); RS(16);
+ RS(15); RS(14); RS(13); RS(12); RS(11); RS(10); RS(9); RS(8);
+ RS(7); RS(6); RS(5); RS(4); RS(3); RS(2); RS(1); RS(0);
+ default:
+ ASSERT(0);
+ }
+#undef RS
+}
+
+#endif
+
+#ifdef ASORT_HASH
+#define ASORT_HASH_ARGS , Q(key) *buffer, uns hash_bits
+#else
+#define ASORT_HASH_ARGS
+#endif
+
+/**
+ * The generated function. The @array is the data to be sorted, @num_elts tells
+ * how many elements the array has. If you did not provide `ASORT_HASH`, then
+ * the `ASORT_HASH_ARGS` is empty (there are only the two parameters in that
+ * case). When you provide it, the function gains two more parameters in the
+ * `ASORT_HASH_ARGS` macro. They are `ASORT_KEY_TYPE *@buffer`, which must be a
+ * memory buffer of the same size as the input array, and `uns @hash_bits`,
+ * specifying how many significant bits the hash function returns.
+ *
+ * The function returns pointer to the sorted data, either the @array or the
+ * @buffer argument.
+ **/
+static ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts ASORT_HASH_ARGS)
+{
+ struct asort_context ctx = {
+ .array = array,
+ .num_elts = num_elts,
+ .elt_size = sizeof(Q(key)),
+ .quicksort = Q(quicksort),
+ .quicksplit = Q(quicksplit),
+#ifdef ASORT_HASH
+ .buffer = buffer,
+ .hash_bits = hash_bits,
+ .radix_count = Q(radix_count),
+ .radix_split = Q(radix_split),
+ .radix_bits = ASORT_RADIX_BITS,
+#endif
+ };
+ asort_run(&ctx);
+ return ctx.array;
+}
+
+#undef ASORT_HASH
+#undef ASORT_KEY_TYPE
+#undef ASORT_LONG_HASH
+#undef ASORT_LT
+#undef ASORT_PAGE_ALIGNED
+#undef ASORT_PREFIX
+#undef ASORT_RADIX_BITS
+#undef ASORT_RADIX_MASK
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef ASORT_HASH_ARGS
+#undef Q
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Common Declarations
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SORTER_COMMON_H
+#define _UCW_SORTER_COMMON_H
+
+#include "ucw/clists.h"
+
+/* Configuration variables */
+extern uns sorter_trace, sorter_trace_array, sorter_stream_bufsize;
+extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits, sorter_add_radix_bits;
+extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
+extern uns sorter_threads;
+extern u64 sorter_bufsize, sorter_small_input;
+extern u64 sorter_thread_threshold, sorter_thread_chunk, sorter_radix_threshold;
+extern struct fb_params sorter_fb_params, sorter_small_fb_params;
+
+#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
+#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
+
+enum sort_debug {
+ SORT_DEBUG_NO_PRESORT = 1,
+ SORT_DEBUG_NO_JOIN = 2,
+ SORT_DEBUG_KEEP_BUCKETS = 4,
+ SORT_DEBUG_NO_RADIX = 8,
+ SORT_DEBUG_NO_MULTIWAY = 16,
+ SORT_DEBUG_ASORT_NO_RADIX = 32,
+ SORT_DEBUG_ASORT_NO_THREADS = 64
+};
+
+struct sort_bucket;
+
+struct sort_context {
+ struct fastbuf *in_fb;
+ struct fastbuf *out_fb;
+ uns hash_bits;
+ u64 in_size;
+ struct fb_params *fb_params;
+
+ struct mempool *pool;
+ clist bucket_list;
+ void *big_buf;
+ size_t big_buf_size;
+
+ int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
+
+ // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
+ // Return 1 if there is more data available in the source bucket.
+ int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
+
+ // Estimate how much input data from `b' will fit in the internal sorting buffer.
+ u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
+
+ // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
+ // Bucket arrays are NULL-terminated.
+ void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
+
+ // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
+ void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
+
+ // Radix split according to hash function
+ void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
+
+ // State variables of internal_sort
+ void *key_buf;
+ int more_keys;
+
+ // Timing
+ timestamp_t start_time;
+ uns last_pass_time;
+ uns total_int_time, total_pre_time, total_ext_time;
+};
+
+void sorter_run(struct sort_context *ctx);
+
+/* Buffers */
+
+void *sorter_alloc(struct sort_context *ctx, uns size);
+void sorter_prepare_buf(struct sort_context *ctx);
+void sorter_alloc_buf(struct sort_context *ctx);
+void sorter_free_buf(struct sort_context *ctx);
+
+/* Buckets */
+
+struct sort_bucket {
+ cnode n;
+ struct sort_context *ctx;
+ uns flags;
+ struct fastbuf *fb;
+ byte *filename;
+ u64 size; // Size in bytes (not valid when writing)
+ uns runs; // Number of runs, 0 if not sorted
+ uns hash_bits; // Remaining bits of the hash function
+ byte *ident; // Identifier used in debug messages
+};
+
+enum sort_bucket_flags {
+ SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run)
+ SBF_SOURCE = 2, // Contains the source file (always 0 runs)
+ SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter
+ SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf
+ SBF_OPEN_READ = 512, // We are reading from the fastbuf
+ SBF_DESTROYED = 1024, // Already done with, no further references allowed
+ SBF_SWAPPED_OUT = 2048, // Swapped out to a named file
+};
+
+struct sort_bucket *sbuck_new(struct sort_context *ctx);
+void sbuck_drop(struct sort_bucket *b);
+int sbuck_have(struct sort_bucket *b);
+int sbuck_has_file(struct sort_bucket *b);
+ucw_off_t sbuck_size(struct sort_bucket *b);
+struct fastbuf *sbuck_read(struct sort_bucket *b);
+struct fastbuf *sbuck_write(struct sort_bucket *b);
+void sbuck_swap_out(struct sort_bucket *b);
+
+/* Contexts and helper functions for the array sorter */
+
+struct asort_context {
+ // Interface between generic code in array.c and functions generated by array.h
+ void *array; // Array to sort
+ void *buffer; // Auxiliary buffer (required when radix-sorting)
+ uns num_elts; // Number of elements in the array
+ uns elt_size; // Bytes per element
+ uns hash_bits; // Remaining bits of the hash function
+ uns radix_bits; // How many bits to process in a single radix-sort pass
+ void (*quicksort)(void *array_ptr, uns num_elts);
+ void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
+ void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
+ void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
+
+ // Used internally by array.c
+ struct rs_work **rs_works;
+ struct work_queue *rs_work_queue;
+ struct eltpool *eltpool;
+
+ // Configured limits translated from bytes to elements
+ uns thread_threshold;
+ uns thread_chunk;
+ uns radix_threshold;
+};
+
+void asort_run(struct asort_context *ctx);
+void asort_start_threads(uns run);
+void asort_stop_threads(void);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Configuration
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/fastbuf.h"
+#include "ucw/sorter/common.h"
+
+uns sorter_trace;
+uns sorter_trace_array;
+u64 sorter_bufsize = 65536;
+uns sorter_debug;
+uns sorter_min_radix_bits;
+uns sorter_max_radix_bits;
+uns sorter_add_radix_bits;
+uns sorter_min_multiway_bits;
+uns sorter_max_multiway_bits;
+uns sorter_threads;
+u64 sorter_thread_threshold = 1048576;
+u64 sorter_thread_chunk = 4096;
+u64 sorter_radix_threshold = 4096;
+struct fb_params sorter_fb_params;
+struct fb_params sorter_small_fb_params;
+u64 sorter_small_input;
+
+static struct cf_section sorter_config = {
+ CF_ITEMS {
+ CF_UNS("Trace", &sorter_trace),
+ CF_UNS("TraceArray", &sorter_trace_array),
+ CF_SECTION("FileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_SECTION("SmallFileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_U64("SmallInput", &sorter_small_input),
+ CF_U64("SortBuffer", &sorter_bufsize),
+ CF_UNS("Debug", &sorter_debug),
+ CF_UNS("MinRadixBits", &sorter_min_radix_bits),
+ CF_UNS("MaxRadixBits", &sorter_max_radix_bits),
+ CF_UNS("AddRadixBits", &sorter_add_radix_bits),
+ CF_UNS("MinMultiwayBits", &sorter_min_multiway_bits),
+ CF_UNS("MaxMultiwayBits", &sorter_max_multiway_bits),
+ CF_UNS("Threads", &sorter_threads),
+ CF_U64("ThreadThreshold", &sorter_thread_threshold),
+ CF_U64("ThreadChunk", &sorter_thread_chunk),
+ CF_U64("RadixThreshold", &sorter_radix_threshold),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR sorter_init_config(void)
+{
+ cf_declare_section("Sorter", &sorter_config, 0);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Governing Routines
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/mempool.h"
+#include "ucw/stkstring.h"
+#include "ucw/sorter/common.h"
+
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define F_BSIZE(b) stk_fsize(sbuck_size(b))
+
+static void
+sorter_start_timer(struct sort_context *ctx)
+{
+ init_timer(&ctx->start_time);
+}
+
+static void
+sorter_stop_timer(struct sort_context *ctx, uns *account_to)
+{
+ ctx->last_pass_time = get_timer(&ctx->start_time);
+ *account_to += ctx->last_pass_time;
+}
+
+static uns
+sorter_speed(struct sort_context *ctx, u64 size)
+{
+ if (!size)
+ return 0;
+ if (!ctx->last_pass_time)
+ return 0;
+ return (uns)((double)size / (1<<20) * 1000 / ctx->last_pass_time);
+}
+
+static int
+sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
+{
+ sorter_alloc_buf(ctx);
+ if (in->flags & SBF_CUSTOM_PRESORT)
+ {
+ /*
+ * The trick with automatic joining, which we use for the normal presorter,
+ * is not necessary with the custom presorter, because the custom presorter
+ * is never called in the middle of the sorted data.
+ */
+ struct fastbuf *f = sbuck_write(out);
+ out->runs++;
+ return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size);
+ }
+ return ctx->internal_sort(ctx, in, out, out_only);
+}
+
+static struct sort_bucket *
+sbuck_join_to(struct sort_bucket *b, ucw_off_t *sizep)
+{
+ if (sorter_debug & SORT_DEBUG_NO_JOIN)
+ return NULL;
+
+ struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
+ if (!(out->flags & SBF_FINAL))
+ return NULL;
+ ASSERT(out->runs == 1);
+ *sizep = sbuck_size(out);
+ return out;
+}
+
+static ucw_off_t
+sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, ucw_off_t join_size)
+{
+ if (join && join->runs >= 2)
+ {
+ if (b)
+ sbuck_drop(b);
+ ASSERT(join->runs == 2);
+ join->runs--;
+ return sbuck_size(join) - join_size;
+ }
+ else if (b)
+ {
+ clist_insert_after(&b->n, list_pos);
+ return sbuck_size(b);
+ }
+ else
+ return 0;
+}
+
+static void
+sorter_join(struct sort_bucket *b)
+{
+ struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
+ ASSERT(join->flags & SBF_FINAL);
+ ASSERT(b->runs == 1);
+
+ if (!sbuck_has_file(join))
+ {
+ // The final bucket doesn't have any file associated yet, so replace
+ // it with the new bucket.
+ SORT_XTRACE(3, "Replaced final bucket");
+ b->flags |= SBF_FINAL;
+ sbuck_drop(join);
+ }
+ else
+ {
+ SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
+ struct fastbuf *src = sbuck_read(b);
+ struct fastbuf *dest = sbuck_write(join);
+ bbcopy(src, dest, ~0U);
+ sbuck_drop(b);
+ }
+}
+
+static void
+sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
+ cnode *list_pos = b->n.prev;
+ ucw_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+
+ if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
+ {
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ sorter_start_timer(ctx);
+ ins[0] = sbuck_new(ctx);
+ if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
+ {
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ ucw_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
+ SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 3), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(b);
+ return;
+ }
+
+ ins[1] = sbuck_new(ctx);
+ int i = 1;
+ while (sorter_presort(ctx, b, ins[i], ins[i]))
+ i = 1-i;
+ sbuck_drop(b);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ SORT_TRACE("Presorting pass (%d+%d runs, %s+%s, %dMB/s)",
+ ins[0]->runs, ins[1]->runs,
+ F_BSIZE(ins[0]), F_BSIZE(ins[1]),
+ sorter_speed(ctx, sbuck_size(ins[0]) + sbuck_size(ins[1])));
+ }
+ else
+ {
+ SORT_XTRACE(2, "Presorting disabled");
+ ins[0] = b;
+ }
+
+ SORT_XTRACE(3, "Main sorting");
+ uns pass = 0;
+ do {
+ ++pass;
+ sorter_start_timer(ctx);
+ if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
+ {
+ // This is guaranteed to produce a single run, so join if possible
+ outs[0] = join;
+ outs[1] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ ucw_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ return;
+ }
+ outs[0] = sbuck_new(ctx);
+ outs[1] = sbuck_new(ctx);
+ outs[2] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s, %dMB/s)", pass,
+ outs[0]->runs, outs[1]->runs,
+ F_BSIZE(outs[0]), F_BSIZE(outs[1]),
+ sorter_speed(ctx, sbuck_size(outs[0]) + sbuck_size(outs[1])));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
+ } while (sbuck_have(ins[1]));
+
+ sbuck_drop(ins[1]);
+ clist_insert_after(&ins[0]->n, list_pos);
+}
+
+static void
+sorter_multiway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ clist parts;
+ cnode *list_pos = b->n.prev;
+ ucw_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+ uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 3;
+
+ clist_init(&parts);
+ ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ uns cont;
+ uns part_cnt = 0;
+ u64 total_size = 0;
+ sorter_start_timer(ctx);
+ do
+ {
+ struct sort_bucket *p = sbuck_new(ctx);
+ cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
+ if (sbuck_have(p))
+ {
+ part_cnt++;
+ clist_add_tail(&parts, &p->n);
+ total_size += sbuck_size(p);
+ sbuck_swap_out(p);
+ }
+ else
+ sbuck_drop(p);
+ }
+ while (cont);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sorter_free_buf(ctx);
+ sbuck_drop(b);
+
+ if (part_cnt <= 1)
+ {
+ ucw_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
+ SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+
+ SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
+
+ uns max_ways = 1 << sorter_max_multiway_bits;
+ struct sort_bucket *ways[max_ways+1];
+ SORT_XTRACE(3, "Starting up to %d-way merge", max_ways);
+ for (;;)
+ {
+ uns n = 0;
+ struct sort_bucket *p;
+ while (n < max_ways && (p = clist_head(&parts)))
+ {
+ clist_remove(&p->n);
+ ways[n++] = p;
+ }
+ ways[n] = NULL;
+ ASSERT(n > 1);
+
+ struct sort_bucket *out;
+ if (clist_empty(&parts) && join)
+ out = join;
+ else
+ out = sbuck_new(ctx);
+ sorter_start_timer(ctx);
+ ctx->multiway_merge(ctx, ways, out);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+
+ for (uns i=0; i<n; i++)
+ sbuck_drop(ways[i]);
+
+ if (clist_empty(&parts))
+ {
+ ucw_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
+ SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+ else
+ {
+ sbuck_swap_out(out);
+ clist_add_tail(&parts, &out->n);
+ SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
+ }
+ }
+}
+
+static void
+sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
+{
+ // Add more bits if requested and allowed.
+ bits = MIN(bits + sorter_add_radix_bits, sorter_max_radix_bits);
+
+ uns nbuck = 1 << bits;
+ SORT_XTRACE(3, "Running radix split on %s with hash %d bits of %d (expecting %s buckets)",
+ F_BSIZE(b), bits, b->hash_bits, stk_fsize(sbuck_size(b) / nbuck));
+ sorter_free_buf(ctx);
+ sorter_start_timer(ctx);
+
+ struct sort_bucket **outs = alloca(nbuck * sizeof(struct sort_bucket *));
+ for (uns i=nbuck; i--; )
+ {
+ outs[i] = sbuck_new(ctx);
+ outs[i]->hash_bits = b->hash_bits - bits;
+ clist_insert_after(&outs[i]->n, &b->n);
+ }
+
+ ctx->radix_split(ctx, b, outs, b->hash_bits - bits, bits);
+
+ u64 min = ~(u64)0, max = 0, sum = 0;
+ for (uns i=0; i<nbuck; i++)
+ {
+ u64 s = sbuck_size(outs[i]);
+ min = MIN(min, s);
+ max = MAX(max, s);
+ sum += s;
+ if (nbuck > 4)
+ sbuck_swap_out(outs[i]);
+ }
+
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Radix split (%d buckets, %s min, %s max, %s avg, %dMB/s)", nbuck,
+ stk_fsize(min), stk_fsize(max), stk_fsize(sum / nbuck), sorter_speed(ctx, sum));
+ sbuck_drop(b);
+}
+
+static void
+sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
+{
+ // Drop empty buckets
+ if (!sbuck_have(b))
+ {
+ SORT_XTRACE(4, "Dropping empty bucket");
+ sbuck_drop(b);
+ return;
+ }
+
+ // How many bits of bucket size we have to reduce before it fits in the RAM?
+ // (this is insanely large if the input size is unknown, but it serves our purpose)
+ u64 insize = sbuck_size(b);
+ u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // Magical factor accounting for various non-uniformities
+ uns bits = 0;
+ while ((insize >> bits) > mem)
+ bits++;
+
+ // Calculate the possibilities of radix splits
+ uns radix_bits;
+ if (!ctx->radix_split ||
+ (b->flags & SBF_CUSTOM_PRESORT) ||
+ (sorter_debug & SORT_DEBUG_NO_RADIX))
+ radix_bits = 0;
+ else
+ {
+ radix_bits = MIN(bits, b->hash_bits);
+ radix_bits = MIN(radix_bits, sorter_max_radix_bits);
+ if (radix_bits < sorter_min_radix_bits)
+ radix_bits = 0;
+ }
+
+ // The same for multi-way merges
+ uns multiway_bits;
+ if (!ctx->multiway_merge ||
+ (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
+ (sorter_debug & SORT_DEBUG_NO_PRESORT))
+ multiway_bits = 0;
+ else
+ {
+ multiway_bits = MIN(bits, sorter_max_multiway_bits);
+ if (multiway_bits < sorter_min_multiway_bits)
+ multiway_bits = 0;
+ }
+
+ SORT_XTRACE(3, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
+ stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
+ radix_bits, multiway_bits);
+
+ // If the input already consists of a single run, just join it
+ if (b->runs)
+ return sorter_join(b);
+
+ // If everything fits in memory, the 2-way strategy will sort it in memory
+ if (!bits)
+ return sorter_twoway(ctx, b);
+
+ // If we can reduce everything in one pass, do so and prefer radix splits
+ if (radix_bits == bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits == bits)
+ return sorter_multiway(ctx, b);
+
+ // Otherwise, reduce as much as possible and again prefer radix splits
+ if (radix_bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits)
+ return sorter_multiway(ctx, b);
+
+ // Fall back to 2-way strategy if nothing else applies
+ return sorter_twoway(ctx, b);
+}
+
+void
+sorter_run(struct sort_context *ctx)
+{
+ ctx->pool = mp_new(4096);
+ clist_init(&ctx->bucket_list);
+ sorter_prepare_buf(ctx);
+ asort_start_threads(0);
+
+ // Create bucket containing the source
+ struct sort_bucket *bin = sbuck_new(ctx);
+ bin->flags = SBF_SOURCE | SBF_OPEN_READ;
+ if (ctx->custom_presort)
+ bin->flags |= SBF_CUSTOM_PRESORT;
+ else
+ bin->fb = ctx->in_fb;
+ bin->ident = "in";
+ bin->size = ctx->in_size;
+ bin->hash_bits = ctx->hash_bits;
+ clist_add_tail(&ctx->bucket_list, &bin->n);
+ SORT_XTRACE(2, "Input size: %s, %d hash bits", F_BSIZE(bin), bin->hash_bits);
+ ctx->fb_params = (bin->size < sorter_small_input) ? &sorter_small_fb_params : &sorter_fb_params;
+
+ // Create bucket for the output
+ struct sort_bucket *bout = sbuck_new(ctx);
+ bout->flags = SBF_FINAL;
+ if (bout->fb = ctx->out_fb)
+ bout->flags |= SBF_OPEN_WRITE;
+ bout->ident = "out";
+ bout->runs = 1;
+ clist_add_head(&ctx->bucket_list, &bout->n);
+
+ // Repeatedly sort buckets
+ struct sort_bucket *b;
+ while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
+ sorter_decide(ctx, b);
+
+ asort_stop_threads();
+ sorter_free_buf(ctx);
+ sbuck_write(bout); // Force empty bucket to a file
+ SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
+ SORT_XTRACE(2, "Final timings: %.3fs external sorting, %.3fs presorting, %.3fs internal sorting",
+ ctx->total_ext_time/1000., ctx->total_pre_time/1000., ctx->total_int_time/1000.);
+ ctx->out_fb = sbuck_read(bout);
+ mp_delete(ctx->pool);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Fixed-Size Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/stkstring.h"
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(key)
+#define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0)
+#ifdef SORT_INTERNAL_RADIX
+# define ASORT_HASH(x) P(hash)(&(x))
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "ucw/sorter/array.h"
+
+/*
+ * This is a more efficient implementation of the internal sorter,
+ * which runs under the following assumptions:
+ *
+ * - the keys have fixed (and small) size
+ * - no data are present after the key
+ * - unification does not require any workspace
+ */
+
+static size_t P(internal_workspace)(void)
+{
+ size_t workspace = 0;
+#ifdef SORT_UNIFY
+ workspace = sizeof(P(key) *);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ workspace = MAX(workspace, sizeof(P(key)));
+#endif
+ return workspace;
+}
+
+static uns P(internal_num_keys)(struct sort_context *ctx)
+{
+ size_t bufsize = ctx->big_buf_size;
+ size_t workspace = P(internal_workspace)();
+ if (workspace)
+ bufsize -= CPU_PAGE_SIZE;
+ u64 maxkeys = bufsize / (sizeof(P(key)) + workspace);
+ return MIN(maxkeys, ~0U); // The number of records must fit in uns
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) *buf = ctx->big_buf;
+ uns maxkeys = P(internal_num_keys)(ctx);
+
+ SORT_XTRACE(5, "s-fixint: Reading (maxkeys=%u, hash_bits=%d)", maxkeys, bin->hash_bits);
+ uns n = 0;
+ while (n < maxkeys && P(read_key)(in, &buf[n]))
+ n++;
+ if (!n)
+ return 0;
+ void *workspace UNUSED = ALIGN_PTR(&buf[n], CPU_PAGE_SIZE);
+
+ SORT_XTRACE(4, "s-fixint: Sorting %u items (%s items, %s workspace)",
+ n,
+ stk_fsize(n * sizeof(P(key))),
+ stk_fsize(n * P(internal_workspace)()));
+ timestamp_t timer;
+ init_timer(&timer);
+ buf = P(array_sort)(buf, n
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)buf != ctx->big_buf)
+ workspace = ctx->big_buf;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-fixint: Writing");
+ if (n < maxkeys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (uns i=0; i<n; i++)
+ {
+#ifdef SORT_UNIFY
+ if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
+ {
+ P(key) **keys = workspace;
+ uns n = 2;
+ keys[0] = &buf[i];
+ keys[1] = &buf[i+1];
+ while (!P(compare)(&buf[i], &buf[i+n]))
+ {
+ keys[n] = &buf[i+n];
+ n++;
+ }
+ P(write_merged)(out, keys, NULL, n, NULL);
+ merged += n - 1;
+ i += n - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(i == n-1 || P(compare)(&buf[i], &buf[i+1]) < 0);
+#endif
+ P(write_key)(out, &buf[i]);
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return (n == maxkeys);
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1; // -1 since if the buffer is full, we don't recognize EOF
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/stkstring.h"
+
+#ifdef SORT_INTERNAL_RADIX
+/* Keep copies of the items' hashes to save cache misses */
+#define SORT_COPY_HASH
+#endif
+
+typedef struct {
+ P(key) *key;
+#ifdef SORT_COPY_HASH
+ P(hash_t) hash;
+#endif
+} P(internal_item_t);
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(internal_item_t)
+#ifdef SORT_COPY_HASH
+# ifdef SORT_INT
+# define ASORT_LT(x,y) ((x).hash < (y).hash) // In this mode, the hash is the value
+# else
+# define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
+# endif
+#else
+# define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
+#endif
+#ifdef SORT_INTERNAL_RADIX
+# ifdef SORT_COPY_HASH
+# define ASORT_HASH(x) (x).hash
+# else
+# define ASORT_HASH(x) P(hash)((x).key)
+# endif
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "ucw/sorter/array.h"
+
+/*
+ * The big_buf has the following layout:
+ *
+ * +-------------------------------------------------------------------------------+
+ * | array of internal_item's |
+ * +-------------------------------------------------------------------------------+
+ * | padding to make the following part page-aligned |
+ * +--------------------------------+----------------------------------------------+
+ * | shadow copy of item array | array of pointers to data for write_merged() |
+ * | used if radix-sorting +----------------------------------------------+
+ * | | workspace for write_merged() |
+ * +--------------------------------+----------------------------------------------+
+ * | +---------+ |
+ * | | key | |
+ * | +---------+ |
+ * | sequence of | padding | |
+ * | items +---------+ |
+ * | | data | |
+ * | +---------+ |
+ * | | padding | |
+ * | +---------+ |
+ * +-------------------------------------------------------------------------------+
+ *
+ * (the data which are in different columns are never accessed simultaneously,
+ * so we use a single buffer for both)
+ */
+
+static inline void *P(internal_get_data)(P(key) *key)
+{
+ uns ksize = SORT_KEY_SIZE(*key);
+#ifdef SORT_UNIFY
+ ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#endif
+ return (byte *) key + ksize;
+}
+
+static inline size_t P(internal_workspace)(P(key) *key UNUSED)
+{
+ size_t ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += SORT_UNIFY_WORKSPACE(*key);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ return ws;
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+
+ P(key) key, *keybuf = ctx->key_buf;
+ if (!keybuf)
+ keybuf = ctx->key_buf = sorter_alloc(ctx, sizeof(key));
+ if (ctx->more_keys)
+ {
+ key = *keybuf;
+ ctx->more_keys = 0;
+ }
+ else if (!P(read_key)(in, &key))
+ return 0;
+
+ size_t bufsize = ctx->big_buf_size;
+#ifdef SORT_VAR_DATA
+ if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
+ {
+ SORT_XTRACE(4, "s-internal: Generating a giant run");
+ struct fastbuf *out = sbuck_write(bout);
+ P(copy_data)(&key, in, out);
+ bout->runs++;
+ return 1; // We don't know, but 1 is always safe
+ }
+#endif
+
+ SORT_XTRACE(5, "s-internal: Reading");
+ P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
+ byte *end = (byte *) ctx->big_buf + bufsize;
+ size_t remains = bufsize - CPU_PAGE_SIZE;
+ do
+ {
+ uns ksize = SORT_KEY_SIZE(key);
+#ifdef SORT_UNIFY
+ uns ksize_aligned = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#else
+ uns ksize_aligned = ksize;
+#endif
+ uns dsize = SORT_DATA_SIZE(key);
+ uns recsize = ALIGN_TO(ksize_aligned + dsize, CPU_STRUCT_ALIGN);
+ size_t totalsize = recsize + sizeof(P(internal_item_t)) + P(internal_workspace)(&key);
+ if (unlikely(totalsize > remains
+#ifdef CPU_64BIT_POINTERS
+ || item >= item_array + ~0U // The number of items must fit in an uns
+#endif
+ ))
+ {
+ ctx->more_keys = 1;
+ *keybuf = key;
+ break;
+ }
+ remains -= totalsize;
+ end -= recsize;
+ memcpy(end, &key, ksize);
+#ifdef SORT_VAR_DATA
+ breadb(in, end + ksize_aligned, dsize);
+#endif
+ item->key = (P(key)*) end;
+#ifdef SORT_COPY_HASH
+ item->hash = P(hash)(item->key);
+#endif
+ item++;
+ }
+ while (P(read_key)(in, &key));
+ last_item = item;
+
+ uns count = last_item - item_array;
+ void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
+ SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
+ count,
+ stk_fsize((byte*)last_item - (byte*)item_array),
+ stk_fsize(end - (byte*)last_item - remains),
+ stk_fsize((byte*)ctx->big_buf + bufsize - end));
+ timestamp_t timer;
+ init_timer(&timer);
+ item_array = P(array_sort)(item_array, count
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)item_array != ctx->big_buf)
+ workspace = ctx->big_buf;
+ last_item = item_array + count;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-internal: Writing");
+ if (!ctx->more_keys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (item = item_array; item < last_item; item++)
+ {
+#ifdef SORT_UNIFY
+ if (item < last_item - 1 && !P(compare)(item->key, item[1].key))
+ {
+ // Rewrite the item structures with just pointers to keys and place
+ // pointers to data in the workspace.
+ P(key) **key_array = (void *) item;
+ void **data_array = workspace;
+ key_array[0] = item[0].key;
+ data_array[0] = P(internal_get_data)(key_array[0]);
+ uns cnt;
+ for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++)
+ {
+ key_array[cnt] = item[cnt].key;
+ data_array[cnt] = P(internal_get_data)(key_array[cnt]);
+ }
+ P(write_merged)(out, key_array, data_array, cnt, data_array+cnt);
+ item += cnt - 1;
+ merged += cnt - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0);
+#endif
+ P(write_key)(out, item->key);
+#ifdef SORT_VAR_DATA
+ bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key));
+#endif
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return ctx->more_keys;
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ // Most of this is just wild guesses
+#ifdef SORT_VAR_KEY
+ uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
+#else
+ uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
+#endif
+ uns ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += avg;
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ // We ignore the data part of records, it probably won't make the estimate much worse
+ return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
+}
+
+#undef SORT_COPY_HASH
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Multi-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * We use a binary tree to keep track of the current minimum. The tree is
+ * represented by an array (in the same way as binary heaps usually are),
+ * leaves correspond to input streams and each internal vertex remembers
+ * the leaf in its subtree, which has the lowest key.
+ */
+
+typedef struct P(mwt) {
+ int i; // Minimum of the subtree
+#ifdef SORT_UNIFY
+ int eq; // Did we encounter equality anywhere in the subtree?
+#endif
+} P(mwt);
+
+static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
+{
+ while (i /= 2)
+ {
+ if (tree[2*i].i < 0)
+ tree[i] = tree[2*i+1];
+ else if (tree[2*i+1].i < 0)
+ tree[i] = tree[2*i];
+ else
+ {
+ int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
+ tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
+#ifdef SORT_UNIFY
+ if (!cmp)
+ tree[i].eq = 1;
+#endif
+ }
+ /*
+ * It is very tempting to stop as soon as the current node does not
+ * change, but it is wrong, because even if the stream index stored in
+ * the tree is the same, the actual key value can differ.
+ */
+ }
+ /*
+ * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
+ * leading to an assumption that tree[1] does not change during this function.
+ * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
+ */
+ asm volatile ("" : : : "memory");
+}
+
+static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
+{
+ tree[i].i = val;
+ P(update_tree)(keys, tree, i);
+}
+
+static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
+{
+ uns num_ins = 0;
+ while (ins[num_ins])
+ num_ins++;
+
+ uns n2 = 1;
+ while (n2 < num_ins)
+ n2 *= 2;
+
+ struct fastbuf *fout = sbuck_write(out);
+ struct fastbuf *fins[num_ins];
+ P(key) keys[num_ins];
+ P(mwt) tree[2*n2];
+ for (uns i=1; i<2*n2; i++)
+ tree[i] = (P(mwt)) { .i = -1 };
+
+ for (uns i=0; i<num_ins; i++)
+ {
+ fins[i] = sbuck_read(ins[i]);
+ if (P(read_key)(fins[i], &keys[i]))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+
+#ifdef SORT_UNIFY
+
+ uns hits[num_ins];
+ P(key) *mkeys[num_ins], *key;
+ struct fastbuf *mfb[num_ins];
+
+ while (likely(tree[1].i >= 0))
+ {
+ int i = tree[1].i;
+ if (!tree[1].eq)
+ {
+ /* The key is unique, so let's go through the fast path */
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+ continue;
+ }
+
+ uns m = 0;
+ key = &keys[i];
+ do
+ {
+ hits[m] = i;
+ mkeys[m] = &keys[i];
+ mfb[m] = fins[i];
+ m++;
+ P(set_tree)(keys, tree, n2+i, -1);
+ i = tree[1].i;
+ if (unlikely(i < 0))
+ break;
+ }
+ while (!P(compare)(key, &keys[i]));
+
+ P(copy_merged)(mkeys, mfb, m, fout);
+
+ for (uns j=0; j<m; j++)
+ {
+ i = hits[j];
+ if (likely(P(read_key)(fins[i], &keys[i])))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+ }
+
+#else
+
+ /* Simplified version which does not support any unification */
+ while (likely(tree[1].i >= 0))
+ {
+ uns i = tree[1].i;
+ P(key) UNUSED key = keys[i];
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);
+#endif
+ }
+
+#endif
+
+ out->runs++;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Radix-Split Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <string.h>
+
+static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
+{
+ uns nbucks = 1 << numbits;
+ uns mask = nbucks - 1;
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) k;
+
+ struct fastbuf *outs[nbucks];
+ bzero(outs, sizeof(outs));
+
+ while (P(read_key)(in, &k))
+ {
+ P(hash_t) h = P(hash)(&k);
+ uns i = (h >> bitpos) & mask;
+ if (unlikely(!outs[i]))
+ outs[i] = sbuck_write(bouts[i]);
+ P(copy_data)(&k, in, outs[i]);
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Two-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
+{
+ struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
+ P(key) kbuf1, kbuf2, kbuf3, kbuf4;
+ P(key) *kin1 = &kbuf1, *kprev1 = &kbuf2, *kin2 = &kbuf3, *kprev2 = &kbuf4;
+ P(key) *kout = NULL, *ktmp;
+ int next1, next2, run1, run2;
+ int comp;
+ uns run_count = 0;
+
+ fin1 = sbuck_read(ins[0]);
+ next1 = P(read_key)(fin1, kin1);
+ if (sbuck_have(ins[1]))
+ {
+ fin2 = sbuck_read(ins[1]);
+ next2 = P(read_key)(fin2, kin2);
+ }
+ else
+ {
+ fin2 = NULL;
+ next2 = 0;
+ }
+ fout1 = fout2 = NULL;
+
+ run1 = next1, run2 = next2;
+ while (next1 || next2)
+ {
+ if (!run1)
+ comp = 1;
+ else if (!run2)
+ comp = -1;
+ else
+ comp = P(compare)(kin1, kin2);
+ ktmp = (comp <= 0) ? kin1 : kin2;
+ if (!kout || !(P(compare)(kout, ktmp) LESS 0))
+ {
+ SWAP(fout1, fout2, ftmp);
+ if (unlikely(!fout1))
+ {
+ if (!fout2)
+ fout1 = sbuck_write(outs[0]);
+ else if (outs[1])
+ fout1 = sbuck_write(outs[1]);
+ else
+ fout1 = fout2;
+ }
+ run_count++;
+ }
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(comp != 0);
+#endif
+ if (comp LESS 0)
+ {
+ P(copy_data)(kin1, fin1, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ kout = kprev1;
+ }
+#ifdef SORT_UNIFY
+ else if (comp == 0)
+ {
+ P(key) *mkeys[] = { kin1, kin2 };
+ struct fastbuf *mfb[] = { fin1, fin2 };
+ P(copy_merged)(mkeys, mfb, 2, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+#endif
+ else
+ {
+ P(copy_data)(kin2, fin2, fout1);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+ if (!run1 && !run2)
+ {
+ run1 = next1;
+ run2 = next2;
+ }
+ }
+
+ if (fout2 && fout2 != fout1)
+ outs[1]->runs += run_count / 2;
+ if (fout1)
+ outs[0]->runs += (run_count+1) / 2;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Operations on Contexts, Buffers and Buckets
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/fastbuf.h"
+#include "ucw/mempool.h"
+#include "ucw/stkstring.h"
+#include "ucw/sorter/common.h"
+
+#include <fcntl.h>
+
+void *
+sorter_alloc(struct sort_context *ctx, uns size)
+{
+ return mp_alloc_zero(ctx->pool, size);
+}
+
+struct sort_bucket *
+sbuck_new(struct sort_context *ctx)
+{
+ struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
+ b->ctx = ctx;
+ return b;
+}
+
+void
+sbuck_drop(struct sort_bucket *b)
+{
+ if (b)
+ {
+ ASSERT(!(b->flags & SBF_DESTROYED));
+ if (b->n.prev)
+ clist_remove(&b->n);
+ bclose(b->fb);
+ bzero(b, sizeof(*b));
+ b->flags = SBF_DESTROYED;
+ }
+}
+
+ucw_off_t
+sbuck_size(struct sort_bucket *b)
+{
+ if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
+ return btell(b->fb);
+ else
+ return b->size;
+}
+
+int
+sbuck_have(struct sort_bucket *b)
+{
+ return b && sbuck_size(b);
+}
+
+int
+sbuck_has_file(struct sort_bucket *b)
+{
+ return (b->fb || (b->flags & SBF_SWAPPED_OUT));
+}
+
+static void
+sbuck_swap_in(struct sort_bucket *b)
+{
+ if (b->flags & SBF_SWAPPED_OUT)
+ {
+ b->fb = bopen_file(b->filename, O_RDWR, b->ctx->fb_params);
+ if (b->flags & SBF_OPEN_WRITE)
+ bseek(b->fb, 0, SEEK_END);
+ if (!(sorter_debug & SORT_DEBUG_KEEP_BUCKETS))
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
+ b->flags &= ~SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped in %s", b->filename);
+ }
+}
+
+struct fastbuf *
+sbuck_read(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_READ)
+ return b->fb;
+ else if (b->flags & SBF_OPEN_WRITE)
+ {
+ b->size = btell(b->fb);
+ b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
+ brewind(b->fb);
+ return b->fb;
+ }
+ else
+ ASSERT(0);
+}
+
+struct fastbuf *
+sbuck_write(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_WRITE)
+ ASSERT(b->fb);
+ else
+ {
+ ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
+ b->fb = bopen_tmp_file(b->ctx->fb_params);
+ if (sorter_debug & SORT_DEBUG_KEEP_BUCKETS)
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ b->flags |= SBF_OPEN_WRITE;
+ b->filename = mp_strdup(b->ctx->pool, b->fb->name);
+ }
+ return b->fb;
+}
+
+void
+sbuck_swap_out(struct sort_bucket *b)
+{
+ if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb && !(b->flags & SBF_SOURCE))
+ {
+ if (b->flags & SBF_OPEN_WRITE)
+ b->size = btell(b->fb);
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ bclose(b->fb);
+ b->fb = NULL;
+ b->flags |= SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped out %s", b->filename);
+ }
+}
+
+void
+sorter_prepare_buf(struct sort_context *ctx)
+{
+ u64 bs = sorter_bufsize;
+ bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
+ bs = MAX(bs, 2*(u64)CPU_PAGE_SIZE);
+ ctx->big_buf_size = bs;
+}
+
+void
+sorter_alloc_buf(struct sort_context *ctx)
+{
+ if (ctx->big_buf)
+ return;
+ ctx->big_buf = big_alloc(ctx->big_buf_size);
+ SORT_XTRACE(3, "Allocated sorting buffer (%s)", stk_fsize(ctx->big_buf_size));
+}
+
+void
+sorter_free_buf(struct sort_context *ctx)
+{
+ if (!ctx->big_buf)
+ return;
+ big_free(ctx->big_buf, ctx->big_buf_size);
+ ctx->big_buf = NULL;
+ SORT_XTRACE(3, "Freed sorting buffer");
+}
--- /dev/null
+/*
+ * UCW Library -- Testing the Sorter
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+#include "ucw/conf.h"
+#include "ucw/fastbuf.h"
+#include "ucw/ff-binary.h"
+#include "ucw/hashfunc.h"
+#include "ucw/md5.h"
+#include "ucw/string.h"
+#include "ucw/prime.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** A hack for overriding radix-sorter configuration ***/
+
+#ifdef FORCE_RADIX_BITS
+#undef CONFIG_UCW_RADIX_SORTER_BITS
+#define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
+#endif
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+static uns test_id;
+
+static void
+start(void)
+{
+ sync();
+ init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+ sync();
+ msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+ u32 x;
+};
+
+#define SORT_KEY_REGULAR struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT(k) (k).x
+#define SORT_DELETE_INPUT 0
+
+#include "ucw/sorter/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+ uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+ brewind(f);
+
+ start();
+ f = s1_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ }
+ bclose(f);
+}
+
+/*** Integers with merging, but no data ***/
+
+struct key2 {
+ u32 x;
+ u32 cnt;
+};
+
+static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
+{
+ for (uns i=1; i<n; i++)
+ k[0]->cnt += k[i]->cnt;
+ bwrite(f, k[0], sizeof(struct key2));
+}
+
+#define SORT_KEY_REGULAR struct key2
+#define SORT_PREFIX(x) s2_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIFY
+#define SORT_INT(k) (k).x
+
+#include "ucw/sorter/sorter.h"
+
+static void
+test_counted(int mode, u64 size)
+{
+ u64 items = size / sizeof(struct key2);
+ uns mult = 2;
+ while (items/(2*mult) > 0xffff0000)
+ mult++;
+ uns N = items ? nextprime(items/(2*mult)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns m=0; m<mult; m++)
+ for (uns i=0; i<N; i++)
+ for (uns j=0; j<2; j++)
+ {
+ bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+ bputl(f, 1);
+ }
+ brewind(f);
+
+ start();
+ f = s2_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ uns k = bgetl(f);
+ if (k != 2*mult)
+ die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
+ }
+ bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+ u32 hash[4];
+ u32 i;
+ u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+ COMPARE(x->hash[0], y->hash[0]);
+ COMPARE(x->hash[1], y->hash[1]);
+ COMPARE(x->hash[2], y->hash[2]);
+ COMPARE(x->hash[3], y->hash[3]);
+ return 0;
+}
+
+static inline uns s3_hash(struct key3 *x)
+{
+ return x->hash[0];
+}
+
+#define SORT_KEY_REGULAR struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_HASH_BITS 32
+
+#include "ucw/sorter/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+ k->i = i;
+ k->payload[0] = 7*i + 13;
+ k->payload[1] = 13*i + 19;
+ k->payload[2] = 19*i + 7;
+ switch (mode)
+ {
+ case 0:
+ k->hash[0] = i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ case 1:
+ k->hash[0] = ~i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ default: ;
+ md5_hash_buffer((byte *) &k->hash, (byte *) &k->i, 4);
+ break;
+ }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ struct key3 k, lastk;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ uns hash_sum = 0;
+ for (uns i=0; i<N; i++)
+ {
+ gen_hash_key(mode, &k, i);
+ hash_sum += k.hash[3];
+ bwrite(f, &k, sizeof(k));
+ }
+ brewind(f);
+
+ start();
+ f = s3_sort(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ if (i && s3_compare(&k, &lastk) <= 0)
+ ASSERT(0);
+ gen_hash_key(mode, &lastk, k.i);
+ if (memcmp(&k, &lastk, sizeof(k)))
+ ASSERT(0);
+ hash_sum -= k.hash[3];
+ }
+ ASSERT(!hash_sum);
+ bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+ uns len;
+ byte s[KEY4_MAX];
+};
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+ uns l = MIN(x->len, y->len);
+ int c = memcmp(x->s, y->s, l);
+ if (c)
+ return c;
+ COMPARE(x->len, y->len);
+ return 0;
+}
+
+static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
+{
+ x->len = bgetl(f);
+ if (x->len == 0xffffffff)
+ return 0;
+ ASSERT(x->len < KEY4_MAX);
+ breadb(f, x->s, x->len);
+ return 1;
+}
+
+static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
+{
+ ASSERT(x->len < KEY4_MAX);
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "ucw/sorter/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_read_key s4_read_key
+#define s4b_write_key s4_write_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+ return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_DATA_SIZE(x) s4_data_size(&(x))
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "ucw/sorter/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+ k->len = random_max(KEY4_MAX);
+ for (uns i=0; i<k->len; i++)
+ k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+ while (len--)
+ {
+ *buf++ = h >> 24;
+ h = h*259309 + 17;
+ }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+ srand(1);
+
+ struct key4 k, lastk;
+ byte buf[256], buf2[256];
+ uns sum = 0;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ {
+ gen_key4(&k);
+ s4_write_key(f, &k);
+ uns h = hash_block(k.s, k.len);
+ sum += h;
+ if (mode)
+ {
+ gen_data4(buf, s4_data_size(&k), h);
+ bwrite(f, buf, s4_data_size(&k));
+ }
+ }
+ brewind(f);
+
+ start();
+ f = (mode ? s4b_sort : s4_sort)(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = s4_read_key(f, &k);
+ ASSERT(ok);
+ uns h = hash_block(k.s, k.len);
+ if (mode && s4_data_size(&k))
+ {
+ ok = breadb(f, buf, s4_data_size(&k));
+ ASSERT(ok);
+ gen_data4(buf2, s4_data_size(&k), h);
+ ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+ }
+ if (i && s4_compare(&k, &lastk) < 0)
+ ASSERT(0);
+ sum -= h;
+ lastk = k;
+ }
+ ASSERT(!sum);
+ bclose(f);
+}
+
+/*** Graph-like structure with custom presorting ***/
+
+struct key5 {
+ u32 x;
+ u32 cnt;
+};
+
+static uns s5_N, s5_K, s5_L, s5_i, s5_j;
+
+struct s5_pair {
+ uns x, y;
+};
+
+static int s5_gen(struct s5_pair *p)
+{
+ if (s5_j >= s5_N)
+ {
+ if (!s5_N || s5_i >= s5_N-1)
+ return 0;
+ s5_j = 0;
+ s5_i++;
+ }
+ p->x = ((u64)s5_j * s5_K) % s5_N;
+ p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
+ s5_j++;
+ return 1;
+}
+
+#define ASORT_PREFIX(x) s5m_##x
+#define ASORT_KEY_TYPE u32
+#include "ucw/sorter/array-simple.h"
+
+static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
+{
+ u32 *a = buf;
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ memcpy(&a[m], data[i], 4*keys[i]->cnt);
+ m += keys[i]->cnt;
+ }
+ s5m_sort(a, m);
+ keys[0]->cnt = m;
+ bwrite(f, keys[0], sizeof(struct key5));
+ bwrite(f, a, 4*m);
+}
+
+static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+{
+ u32 k[n];
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ k[i] = bgetl(data[i]);
+ m += keys[i]->cnt;
+ }
+ struct key5 key = { .x = keys[0]->x, .cnt = m };
+ bwrite(dest, &key, sizeof(key));
+ while (key.cnt--)
+ {
+ uns b = 0;
+ for (uns i=1; i<n; i++)
+ if (k[i] < k[b])
+ b = i;
+ bputl(dest, k[b]);
+ if (--keys[b]->cnt)
+ k[b] = bgetl(data[b]);
+ else
+ k[b] = ~0U;
+ }
+}
+
+static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
+{
+ COMPARE_LT(x.x, y.x);
+ COMPARE_LT(x.y, y.y);
+ return 0;
+}
+
+#define ASORT_PREFIX(x) s5p_##x
+#define ASORT_KEY_TYPE struct s5_pair
+#define ASORT_LT(x,y) s5p_lt(x,y)
+#include "ucw/sorter/array.h"
+
+static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+{
+ uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
+ struct s5_pair *a = buf;
+ uns n = 0;
+ while (n<max && s5_gen(&a[n]))
+ n++;
+ if (!n)
+ return 0;
+ s5p_sort(a, n);
+ uns i = 0;
+ while (i < n)
+ {
+ uns j = i;
+ while (i < n && a[i].x == a[j].x)
+ i++;
+ struct key5 k = { .x = a[j].x, .cnt = i-j };
+ bwrite(dest, &k, sizeof(k));
+ while (j < i)
+ bputl(dest, a[j++].y);
+ }
+ return 1;
+}
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_PRESORT
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+
+#include "ucw/sorter/sorter.h"
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5b_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+#define s5b_write_merged s5_write_merged
+#define s5b_copy_merged s5_copy_merged
+
+#include "ucw/sorter/sorter.h"
+
+static void
+test_graph(uns mode, u64 size)
+{
+ uns N = 3;
+ while ((u64)N*(N+2)*4 < size)
+ N = nextprime(N);
+ if (!size)
+ N = 0;
+ msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
+ s5_N = N;
+ s5_K = N/4*3;
+ s5_L = N/3*2;
+ s5_i = s5_j = 0;
+
+ struct fastbuf *in = NULL;
+ if (mode)
+ {
+ struct s5_pair p;
+ in = bopen_tmp(65536);
+ while (s5_gen(&p))
+ {
+ struct key5 k = { .x = p.x, .cnt = 1 };
+ bwrite(in, &k, sizeof(k));
+ bputl(in, p.y);
+ }
+ brewind(in);
+ }
+
+ start();
+ struct fastbuf *f = bopen_tmp(65536);
+ bputl(f, 0xfeedcafe);
+ struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
+ ASSERT(f == g);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ uns c = bgetl(f);
+ ASSERT(c == 0xfeedcafe);
+ for (uns i=0; i<N; i++)
+ {
+ struct key5 k;
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ ASSERT(k.x == i);
+ ASSERT(k.cnt == N);
+ for (uns j=0; j<N; j++)
+ {
+ uns y = bgetl(f);
+ ASSERT(y == j);
+ }
+ }
+ bclose(f);
+}
+
+/*** Simple 8-byte integer keys ***/
+
+struct key6 {
+ u64 x;
+};
+
+#define SORT_KEY_REGULAR struct key6
+#define SORT_PREFIX(x) s6_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT64(k) (k).x
+
+#include "ucw/sorter/sorter.h"
+
+static void
+test_int64(int mode, u64 size)
+{
+ u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
+ u64 K = N/4*3;
+ msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (u64 i=0; i<N; i++)
+ bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
+ brewind(f);
+
+ start();
+ f = s6_sort(f, NULL, 777777*(N-1));
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (u64 i=0; i<N; i++)
+ {
+ u64 j = bgetq(f);
+ if (777777*i != j)
+ die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
+ }
+ bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+ test_id = i;
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ test_counted(0, size); break;
+ case 4:
+ test_counted(1, size); break;
+ case 5:
+ test_counted(2, size); break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+ case 11:
+ test_graph(0, size); break;
+ case 12:
+ test_graph(1, size); break;
+ case 13:
+ test_int64(0, size); break;
+ case 14:
+ test_int64(1, size); break;
+ case 15:
+ test_int64(2, size); break;
+#define TMAX 16
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ log_init(NULL);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 'd':
+ sorter_debug = atol(optarg);
+ break;
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ {
+ char *w[32];
+ int f = str_sepsplit(optarg, ',', w, ARRAY_SIZE(w));
+ if (f < 0)
+ goto usage;
+ t = 0;
+ for (int i=0; i<f; i++)
+ {
+ int j = atol(w[i]);
+ if (j >= TMAX)
+ goto usage;
+ t |= 1 << j;
+ }
+ }
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ for (uns i=0; i<TMAX; i++)
+ if (t & (1 << i))
+ run_test(i, size);
+
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter
+ *
+ * (c) 2001--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, but a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates a file sorter
+ * with the parameters given.
+ *
+ * The sorter operates on fastbufs containing sequences of items. Each item
+ * consists of a key, optionally followed by data. The keys are represented
+ * by fixed-size structures of type SORT_KEY internally, if this format differs
+ * from the on-disk format, explicit reading and writing routines can be provided.
+ * The data are always copied verbatim, unless the sorter is in the merging
+ * mode in which it calls callbacks for merging of items with equal keys.
+ *
+ * All callbacks must be thread-safe.
+ *
+ * Basic parameters and callbacks:
+ *
+ * SORT_PREFIX(x) add a name prefix (used on all global names defined by the sorter)
+ *
+ * SORT_KEY data type capable of holding a single key in memory (the on-disk
+ * representation can be different). Alternatively, you can use:
+ * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
+ * in this case, bread() and bwrite() is used to read/write keys
+ * and it's also assumed that the keys are not very long.
+ * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
+ * compares two keys, returns result like strcmp(). Mandatory.
+ * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
+ * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
+ * Mandatory unless SORT_KEY_REGULAR is defined.
+ * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
+ * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
+ *
+ * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
+ * can be truncated to this number of bytes without any harm;
+ * used to save memory when the keys have variable sizes).
+ * Default: always store the whole SORT_KEY.
+ * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
+ * Default: records consist of keys only.
+ *
+ * Integer sorting:
+ *
+ * SORT_INT(key) we are sorting by an integer value returned by this macro.
+ * In this mode, PREFIX_compare is supplied automatically and the sorting
+ * function gets an extra parameter specifying the range of the integers.
+ * The better the range fits, the faster we sort.
+ * Sets up SORT_HASH_xxx automatically.
+ * SORT_INT64(key) the same for 64-bit integers.
+ *
+ * Hashing (optional, but it can speed sorting up):
+ *
+ * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
+ * bits is available. A monotone hash is a function f from keys to integers
+ * such that f(x) < f(y) implies x < y, which is approximately uniformly
+ * distributed. It should be declared as:
+ * uns PREFIX_hash(SORT_KEY *a)
+ *
+ * Unification:
+ *
+ * SORT_UNIFY merge items with identical keys. It requires the following functions:
+ * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
+ * takes n records in memory with keys which compare equal and writes
+ * a single record to the given fastbuf. `buf' points to a buffer which
+ * is guaranteed to hold the sum of workspace requirements (see below)
+ * over all given records. The function is allowed to modify all its inputs.
+ * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+ * takes n records with keys in memory and data in fastbufs and writes
+ * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
+ * is defined.
+ * SORT_UNIFY_WORKSPACE(key)
+ * gets a key and returns the amount of workspace required when merging
+ * the given record. Defaults to 0.
+ *
+ * Input (choose one of these):
+ *
+ * SORT_INPUT_FILE file of a given name
+ * SORT_INPUT_FB seekable fastbuf stream
+ * SORT_INPUT_PIPE non-seekable fastbuf stream
+ * SORT_INPUT_PRESORT custom presorter. Calls function
+ * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+ * to get successive batches of pre-sorted data.
+ * The function is passed a page-aligned presorting buffer.
+ * It returns 1 on success or 0 on EOF.
+ * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
+ * as soon as possible.
+ *
+ * Output (chose one of these):
+ *
+ * SORT_OUTPUT_FILE file of a given name
+ * SORT_OUTPUT_FB temporary fastbuf stream
+ * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain some data
+ *
+ * Other switches:
+ *
+ * SORT_UNIQUE all items have distinct keys (checked in debug mode)
+ *
+ * The function generated:
+ *
+ * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
+ * <in> = input file name/fastbuf or NULL
+ * <out> = output file name/fastbuf or NULL
+ * <range> = maximum integer value for the SORT_INT mode
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#include "ucw/sorter/common.h"
+#include "ucw/fastbuf.h"
+
+#include <fcntl.h>
+
+#define P(x) SORT_PREFIX(x)
+
+#ifdef SORT_KEY_REGULAR
+typedef SORT_KEY_REGULAR P(key);
+static inline int P(read_key) (struct fastbuf *f, P(key) *k)
+{
+ return breadb(f, k, sizeof(P(key)));
+}
+static inline void P(write_key) (struct fastbuf *f, P(key) *k)
+{
+ bwrite(f, k, sizeof(P(key)));
+}
+#elif defined(SORT_KEY)
+typedef SORT_KEY P(key);
+#else
+#error Missing definition of sorting key.
+#endif
+
+#ifdef SORT_INT64
+typedef u64 P(hash_t);
+#define SORT_INT SORT_INT64
+#define SORT_LONG_HASH
+#else
+typedef uns P(hash_t);
+#endif
+
+#ifdef SORT_INT
+static inline int P(compare) (P(key) *x, P(key) *y)
+{
+ if (SORT_INT(*x) < SORT_INT(*y))
+ return -1;
+ if (SORT_INT(*x) > SORT_INT(*y))
+ return 1;
+ return 0;
+}
+
+#ifndef SORT_HASH_BITS
+static inline P(hash_t) P(hash) (P(key) *x)
+{
+ return SORT_INT((*x));
+}
+#endif
+#endif
+
+#ifdef SORT_UNIFY
+#define LESS <
+#else
+#define LESS <=
+#endif
+#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
+
+#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
+#define SORT_ASSERT_UNIQUE
+#endif
+
+#ifdef SORT_KEY_SIZE
+#define SORT_VAR_KEY
+#else
+#define SORT_KEY_SIZE(key) sizeof(key)
+#endif
+
+#ifdef SORT_DATA_SIZE
+#define SORT_VAR_DATA
+#else
+#define SORT_DATA_SIZE(key) 0
+#endif
+
+static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
+{
+ P(write_key)(out, key);
+#ifdef SORT_VAR_DATA
+ bbcopy(in, out, SORT_DATA_SIZE(*key));
+#else
+ (void) in;
+#endif
+}
+
+#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
+static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
+{
+ P(write_merged)(dest, keys, NULL, n, NULL);
+}
+#endif
+
+#if defined(SORT_HASH_BITS) || defined(SORT_INT)
+#define SORT_INTERNAL_RADIX
+#include "ucw/sorter/s-radix.h"
+#endif
+
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
+#include "ucw/sorter/s-internal.h"
+#else
+#include "ucw/sorter/s-fixint.h"
+#endif
+
+#include "ucw/sorter/s-twoway.h"
+#include "ucw/sorter/s-multiway.h"
+
+static struct fastbuf *P(sort)(
+#ifdef SORT_INPUT_FILE
+ byte *in,
+#else
+ struct fastbuf *in,
+#endif
+#ifdef SORT_OUTPUT_FILE
+ byte *out
+#else
+ struct fastbuf *out
+#endif
+#ifdef SORT_INT
+ , u64 int_range
+#endif
+ )
+{
+ struct sort_context ctx;
+ bzero(&ctx, sizeof(ctx));
+
+#ifdef SORT_INPUT_FILE
+ ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
+ ctx.in_size = bfilesize(ctx.in_fb);
+#elif defined(SORT_INPUT_FB)
+ ctx.in_fb = in;
+ ctx.in_size = bfilesize(in);
+#elif defined(SORT_INPUT_PIPE)
+ ctx.in_fb = in;
+ ctx.in_size = ~(u64)0;
+#elif defined(SORT_INPUT_PRESORT)
+ ASSERT(!in);
+ ctx.custom_presort = P(presort);
+ ctx.in_size = ~(u64)0;
+#else
+#error No input given.
+#endif
+#ifdef SORT_DELETE_INPUT
+ if (SORT_DELETE_INPUT)
+ bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
+#endif
+
+#ifdef SORT_OUTPUT_FB
+ ASSERT(!out);
+#elif defined(SORT_OUTPUT_THIS_FB)
+ ctx.out_fb = out;
+#elif defined(SORT_OUTPUT_FILE)
+ /* Just assume fastbuf output and rename the fastbuf later */
+#else
+#error No output given.
+#endif
+
+#ifdef SORT_HASH_BITS
+ ctx.hash_bits = SORT_HASH_BITS;
+ ctx.radix_split = P(radix_split);
+#elif defined(SORT_INT)
+ ctx.hash_bits = 0;
+ while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
+ ctx.hash_bits++;
+ ctx.radix_split = P(radix_split);
+#endif
+
+ ctx.internal_sort = P(internal);
+ ctx.internal_estimate = P(internal_estimate);
+ ctx.twoway_merge = P(twoway_merge);
+ ctx.multiway_merge = P(multiway_merge);
+
+ sorter_run(&ctx);
+
+#ifdef SORT_OUTPUT_FILE
+ bfix_tmp_file(ctx.out_fb, out);
+ ctx.out_fb = NULL;
+#endif
+ return ctx.out_fb;
+}
+
+#undef SORT_ASSERT_UNIQUE
+#undef SORT_DATA_SIZE
+#undef SORT_DELETE_INPUT
+#undef SORT_HASH_BITS
+#undef SORT_INPUT_FB
+#undef SORT_INPUT_FILE
+#undef SORT_INPUT_PIPE
+#undef SORT_INPUT_PRESORT
+#undef SORT_INT
+#undef SORT_INT64
+#undef SORT_INTERNAL_RADIX
+#undef SORT_KEY
+#undef SORT_KEY_REGULAR
+#undef SORT_KEY_SIZE
+#undef SORT_LONG_HASH
+#undef SORT_OUTPUT_FB
+#undef SORT_OUTPUT_FILE
+#undef SORT_OUTPUT_THIS_FB
+#undef SORT_PREFIX
+#undef SORT_UNIFY
+#undef SORT_UNIFY_WORKSPACE
+#undef SORT_UNIQUE
+#undef SORT_VAR_DATA
+#undef SORT_VAR_KEY
+#undef SWAP
+#undef LESS
+#undef P
--- /dev/null
+/*
+ * UCW Library -- Strings Allocated on the Stack
+ *
+ * (c) 2005--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/stkstring.h"
+#include "ucw/string.h"
+
+#include <stdio.h>
+
+uns
+stk_array_len(char **s, uns cnt)
+{
+ uns l = 1;
+ while (cnt--)
+ l += strlen(*s++);
+ return l;
+}
+
+void
+stk_array_join(char *x, char **s, uns cnt, uns sep)
+{
+ while (cnt--)
+ {
+ uns l = strlen(*s);
+ memcpy(x, *s, l);
+ x += l;
+ s++;
+ if (sep && cnt)
+ *x++ = sep;
+ }
+ *x = 0;
+}
+
+uns
+stk_printf_internal(const char *fmt, ...)
+{
+ uns len = 256;
+ char *buf = alloca(len);
+ va_list args, args2;
+ va_start(args, fmt);
+ for (;;)
+ {
+ va_copy(args2, args);
+ int l = vsnprintf(buf, len, fmt, args2);
+ va_end(args2);
+ if (l < 0)
+ len *= 2;
+ else
+ {
+ va_end(args);
+ return l+1;
+ }
+ buf = alloca(len);
+ }
+}
+
+uns
+stk_vprintf_internal(const char *fmt, va_list args)
+{
+ uns len = 256;
+ char *buf = alloca(len);
+ va_list args2;
+ for (;;)
+ {
+ va_copy(args2, args);
+ int l = vsnprintf(buf, len, fmt, args2);
+ va_end(args2);
+ if (l < 0)
+ len *= 2;
+ else
+ {
+ va_end(args);
+ return l+1;
+ }
+ buf = alloca(len);
+ }
+}
+
+void
+stk_hexdump_internal(char *dst, const byte *src, uns n)
+{
+ mem_to_hex(dst, src, n, ' ');
+}
+
+void
+stk_fsize_internal(char *buf, u64 x)
+{
+ if (x < 1<<10)
+ sprintf(buf, "%dB", (int)x);
+ else if (x < 10<<10)
+ sprintf(buf, "%.1fK", (double)x/(1<<10));
+ else if (x < 1<<20)
+ sprintf(buf, "%dK", (int)(x/(1<<10)));
+ else if (x < 10<<20)
+ sprintf(buf, "%.1fM", (double)x/(1<<20));
+ else if (x < 1<<30)
+ sprintf(buf, "%dM", (int)(x/(1<<20)));
+ else if (x < (u64)10<<30)
+ sprintf(buf, "%.1fG", (double)x/(1<<30));
+ else if (x != ~(u64)0)
+ sprintf(buf, "%dG", (int)(x/(1<<30)));
+ else
+ strcpy(buf, "unknown");
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ char *a = stk_strndup("are!",3);
+ a = stk_strcat(a, " the ");
+ a = stk_strmulticat(a, stk_strdup("Jabberwock, "), "my", NULL);
+ char *arr[] = { a, " son" };
+ a = stk_strarraycat(arr, 2);
+ a = stk_printf("Bew%s!", a);
+ puts(a);
+ puts(stk_hexdump(a, 3));
+ char *ary[] = { "The", "jaws", "that", "bite" };
+ puts(stk_strjoin(ary, 4, ' '));
+ puts(stk_fsize(1234567));
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Strings Allocated on the Stack
+ *
+ * (c) 2005--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_STKSTRING_H
+#define _UCW_STKSTRING_H
+
+#include <alloca.h>
+#include <string.h>
+#include <stdio.h>
+
+#define stk_strdup(s) ({ const char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
+#define stk_strndup(s,n) ({ const char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
+#define stk_strcat(s1,s2) ({ const char *_s1=(s1); const char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
+#define stk_strmulticat(s...) ({ char *_s[]={s}; char *_x=alloca(stk_array_len(_s, ARRAY_SIZE(_s)-1)); stk_array_join(_x, _s, ARRAY_SIZE(_s)-1, 0); _x; })
+#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
+#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
+#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
+#define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
+#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(char*)(s),_n); _x; })
+#define stk_str_unesc(s) ({ const char *_s=(s); char *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
+
+uns stk_array_len(char **s, uns cnt);
+void stk_array_join(char *x, char **s, uns cnt, uns sep);
+uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
+uns stk_vprintf_internal(const char *x, va_list args);
+void stk_hexdump_internal(char *dst, const byte *src, uns n);
+void stk_fsize_internal(char *dst, u64 size);
+
+#endif
--- /dev/null
+# Tests for stkstring modules
+
+Run: ../obj/ucw/stkstring-t
+Out: Beware the Jabberwock, my son!
+ 42 65 77
+ The jaws that bite
+ 1.2M
--- /dev/null
+/*
+ * UCW Library -- String Unescaping
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/string.h"
+#include "ucw/chartype.h"
+#include <stdlib.h>
+
+/* Expands C99-like escape sequences.
+ * It is safe to use the same buffer for both input and output. */
+char *
+str_unesc(char *d, const char *s)
+{
+ while (*s)
+ {
+ if (*s == '\\')
+ switch (s[1])
+ {
+ case 'a': *d++ = '\a'; s += 2; break;
+ case 'b': *d++ = '\b'; s += 2; break;
+ case 'f': *d++ = '\f'; s += 2; break;
+ case 'n': *d++ = '\n'; s += 2; break;
+ case 'r': *d++ = '\r'; s += 2; break;
+ case 't': *d++ = '\t'; s += 2; break;
+ case 'v': *d++ = '\v'; s += 2; break;
+ case '\?': *d++ = '\?'; s += 2; break;
+ case '\'': *d++ = '\''; s += 2; break;
+ case '\"': *d++ = '\"'; s += 2; break;
+ case '\\': *d++ = '\\'; s += 2; break;
+ case 'x':
+ if (!Cxdigit(s[2]))
+ {
+ s++;
+ DBG("\\x used with no following hex digits");
+ }
+ else
+ {
+ char *p;
+ uns v = strtoul(s + 2, &p, 16);
+ if (v <= 255)
+ *d++ = v;
+ else
+ DBG("hex escape sequence out of range");
+ s = (char *)p;
+ }
+ break;
+ default:
+ if (s[1] >= '0' && s[1] <= '7')
+ {
+ uns v = s[1] - '0';
+ s += 2;
+ for (uns i = 0; i < 2 && *s >= '0' && *s <= '7'; s++, i++)
+ v = (v << 3) + *s - '0';
+ if (v <= 255)
+ *d++ = v;
+ else
+ DBG("octal escape sequence out of range");
+ }
+ else
+ *d++ = *s++;
+ break;
+ }
+ else
+ *d++ = *s++;
+ }
+ *d = 0;
+ return d;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+ if (argc < 2)
+ return 1;
+
+ char tmp[strlen(argv[1]) + 1];
+ int len = str_unesc(tmp, argv[1]) - tmp;
+
+ char hex[2*len + 1];
+ mem_to_hex(hex, tmp, len, ' ');
+ puts(hex);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Hexdumping and Unhexdumping
+ *
+ * (c) 2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/string.h"
+#include "ucw/chartype.h"
+
+static uns
+hex_make(uns x)
+{
+ return (x < 10) ? (x + '0') : (x - 10 + 'a');
+}
+
+void
+mem_to_hex(char *dest, const byte *src, uns bytes, uns flags)
+{
+ uns sep = flags & 0xff;
+
+ while (bytes--)
+ {
+ dest[0] = hex_make(*src >> 4);
+ dest[1] = hex_make(*src & 0x0f);
+ if (flags & MEM_TO_HEX_UPCASE)
+ {
+ dest[0] = Cupcase(dest[0]);
+ dest[1] = Cupcase(dest[1]);
+ }
+ dest += 2;
+ if (sep && bytes)
+ *dest++ = sep;
+ src++;
+ }
+ *dest = 0;
+}
+
+static uns
+hex_parse(uns c)
+{
+ c = Cupcase(c);
+ c -= '0';
+ return (c < 10) ? c : (c - 7);
+}
+
+const char *
+hex_to_mem(byte *dest, const char *src, uns max_bytes, uns flags)
+{
+ uns sep = flags & 0xff;
+ while (max_bytes-- && Cxdigit(src[0]) && Cxdigit(src[1]))
+ {
+ *dest++ = (hex_parse(src[0]) << 4) | hex_parse(src[1]);
+ src += 2;
+ if (sep && *src && max_bytes)
+ {
+ if (*src != (char)sep)
+ return src;
+ src++;
+ }
+ }
+ return src;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int main(void)
+{
+ byte x[4] = { 0xfe, 0xed, 0xf0, 0x0d };
+ byte y[4];
+ char a[16];
+
+ mem_to_hex(a, x, 4, MEM_TO_HEX_UPCASE);
+ puts(a);
+ mem_to_hex(a, x, 4, ':');
+ puts(a);
+ const char *z = hex_to_mem(y, a, 4, ':');
+ if (*z)
+ puts("BAD");
+ else
+ printf("%02x%02x%02x%02x\n", y[0], y[1], y[2], y[3]);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Shell-Like Case-Insensitive Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+
+#define Convert(x) Cupcase(x)
+#define MATCH_FUNC_NAME str_match_pattern_nocase
+
+#include "ucw/str-match.h"
--- /dev/null
+/*
+ * UCW Library -- Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#define Convert(x) (x)
+#define MATCH_FUNC_NAME str_match_pattern
+
+#include "ucw/str-match.h"
--- /dev/null
+/*
+ * UCW Library -- Generic Shell-Like Pattern Matching (currently only '?' and '*')
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/string.h"
+
+int
+MATCH_FUNC_NAME(const char *p, const char *s)
+{
+ while (*p)
+ {
+ if (*p == '?' && *s)
+ p++, s++;
+ else if (*p == '*')
+ {
+ int z = p[1];
+
+ if (!z)
+ return 1;
+ if (z == '\\' && p[2])
+ z = p[2];
+ z = Convert(z);
+ for(;;)
+ {
+ while (*s && Convert(*s) != z)
+ s++;
+ if (!*s)
+ return 0;
+ if (MATCH_FUNC_NAME(p+1, s))
+ return 1;
+ s++;
+ }
+ }
+ else
+ {
+ if (*p == '\\' && p[1])
+ p++;
+ if (Convert(*p++) != Convert(*s++))
+ return 0;
+ }
+ }
+ return !*s;
+}
--- /dev/null
+/*
+ * UCW Library -- Word Splitting
+ *
+ * (c) 1997 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/chartype.h"
+#include "ucw/string.h"
+
+#include <string.h>
+
+int
+str_sepsplit(char *str, uns sep, char **rec, uns max)
+{
+ uns cnt = 0;
+ while (1)
+ {
+ rec[cnt++] = str;
+ str = strchr(str, sep);
+ if (!str)
+ return cnt;
+ if (cnt >= max)
+ return -1;
+ *str++ = 0;
+ }
+}
+
+int
+str_wordsplit(char *src, char **dst, uns max)
+{
+ uns cnt = 0;
+
+ for(;;)
+ {
+ while (Cspace(*src))
+ *src++ = 0;
+ if (!*src)
+ break;
+ if (cnt >= max)
+ return -1;
+ if (*src == '"')
+ {
+ src++;
+ dst[cnt++] = src;
+ while (*src && *src != '"')
+ src++;
+ if (*src)
+ *src++ = 0;
+ }
+ else
+ {
+ dst[cnt++] = src;
+ while (*src && !Cspace(*src))
+ src++;
+ }
+ }
+ return cnt;
+}
--- /dev/null
+/*
+ * UCW Library -- String Routines
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007--2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/string.h"
+
+char *
+str_format_flags(char *dest, const char *fmt, uns flags)
+{
+ char *start = dest;
+ for (uns i=0; fmt[i]; i++)
+ {
+ if (flags & (1 << i))
+ *dest++ = fmt[i];
+ else
+ *dest++ = '-';
+ }
+ *dest = 0;
+ return start;
+}
+
+uns
+str_count_char(const char *str, uns chr)
+{
+ const byte *s = str;
+ uns i = 0;
+ while (*s)
+ if (*s++ == chr)
+ i++;
+ return i;
+}
--- /dev/null
+/*
+ * UCW Library -- String Routines
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007--2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_STRING_H
+#define _UCW_STRING_H
+
+/* string.c */
+
+char *str_format_flags(char *dest, const char *fmt, uns flags);
+uns str_count_char(const char *str, uns chr);
+
+/* str-esc.c */
+
+char *str_unesc(char *dest, const char *src);
+
+/* str-split.c */
+
+int str_sepsplit(char *str, uns sep, char **rec, uns max);
+int str_wordsplit(char *str, char **rec, uns max);
+
+/* str-(i)match.c: Matching of shell patterns */
+
+int str_match_pattern(const char *patt, const char *str);
+int str_match_pattern_nocase(const char *patt, const char *str);
+
+/* str-hex.c */
+
+void mem_to_hex(char *dest, const byte *src, uns bytes, uns flags);
+const char *hex_to_mem(byte *dest, const char *src, uns max_bytes, uns flags);
+
+// Bottom 8 bits of flags are an optional separator of bytes, the rest is:
+#define MEM_TO_HEX_UPCASE 0x100
+
+#endif
--- /dev/null
+# Tests of string routines
+
+Run: ../obj/ucw/str-hex-t
+Out: FEEDF00D
+ fe:ed:f0:0d
+ feedf00d
+
+Run: ../obj/ucw/str-esc-t '12\r\n\000\\\xff'
+Out: 31 32 0d 0a 00 5c ff
+
+Run: ../obj/ucw/str-esc-t '\100\10a\1a'
+Out: 40 08 61 01 61
+
+Run: ../obj/ucw/str-esc-t '\a\b\f\r\n\t\v\?\"'"\\'"
+Out: 07 08 0c 0d 0a 09 0b 3f 22 27
--- /dev/null
+/*
+ * UCW Library -- Syncing Directories
+ *
+ * (c) 2004--2005 Martin Mares <mj@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+void
+sync_dir(const char *name)
+{
+ int fd = open(name, O_RDONLY
+#ifdef CONFIG_LINUX
+ | O_DIRECTORY
+#endif
+);
+ if (fd < 0)
+ goto err;
+ int err = fsync(fd);
+ close(fd);
+ if (err >= 0)
+ return;
+ err:
+ msg(L_ERROR, "Unable to sync directory %s: %m", name);
+}
--- /dev/null
+/*
+ * UCW Library -- Rate Limiting based on the Token Bucket Filter
+ *
+ * (c) 2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/tbf.h"
+
+void
+tbf_init(struct token_bucket_filter *f)
+{
+ if (!f->burst)
+ f->burst = MAX(2*f->rate, 1);
+ f->last_hit = 0;
+ f->bucket = f->burst;
+}
+
+int
+tbf_limit(struct token_bucket_filter *f, timestamp_t now)
+{
+ timestamp_t delta_t = now - f->last_hit;
+ f->last_hit = now;
+
+ double b = f->bucket + f->rate * delta_t / 1000;
+ b = MIN(b, f->burst);
+ if (b >= 1)
+ {
+ uns dropped = f->drop_count;
+ f->bucket = b - 1;
+ f->drop_count = 0;
+ return dropped;
+ }
+ else
+ {
+ f->bucket = b;
+ f->drop_count++;
+ return -f->drop_count;
+ }
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct token_bucket_filter t = { .rate = 1, .burst = 2 };
+ tbf_init(&t);
+ for (timestamp_t now = 0; now < 3000; now += 77)
+ {
+ int res = tbf_limit(&t, now);
+ msg(L_DEBUG, "t=%u result=%d bucket=%f", (uns) now, res, t.bucket);
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Rate Limiting based on the Token Bucket Filter
+ *
+ * (c) 2009 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_TBF_H_
+#define _UCW_TBF_H_
+
+/** A data structure describing a single TBF. **/
+struct token_bucket_filter {
+ double rate; // Number of tokens received per second
+ uns burst; // Capacity of the bucket
+ timestamp_t last_hit; // Internal state...
+ double bucket;
+ uns drop_count;
+};
+
+/** Initialize the bucket. **/
+void tbf_init(struct token_bucket_filter *f);
+
+/**
+ * Ask the filter to process a single event. Returns a negative number
+ * if the event exceeds the rate (and should be dropped) and a non-negative
+ * number if the event passes the filter.
+ * The absolute value of the result is the number of dropped events
+ * since the last passed event.
+ **/
+int tbf_limit(struct token_bucket_filter *f, timestamp_t now);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Temporary Files
+ *
+ * (c) 2002--2008 Martin Mares <mj@ucw.cz>
+ * (c) 2008 Michal Vaner <vorner@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/conf.h"
+#include "ucw/threads.h"
+#include "ucw/lfs.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <errno.h>
+
+static char *temp_prefix = "temp";
+static char *temp_dir;
+static int public_dir = 1;
+
+static struct cf_section temp_config = {
+ CF_ITEMS {
+ CF_STRING("Dir", &temp_dir),
+ CF_STRING("Prefix", &temp_prefix),
+ CF_INT("PublicDir", &public_dir),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR temp_global_init(void)
+{
+ cf_declare_section("Tempfiles", &temp_config, 0);
+}
+
+void
+temp_file_name(char *name_buf, int *open_flags)
+{
+ char *dir = temp_dir;
+ if (!dir && !(dir = getenv("TMPDIR")))
+ dir = "/tmp";
+
+ int len;
+ if (public_dir)
+ {
+ struct timeval tv;
+ if (gettimeofday(&tv, NULL))
+ die("gettimeofday() failed: %m");
+ len = snprintf(name_buf, TEMP_FILE_NAME_LEN, "%s/%s%u", dir, temp_prefix, (uns) tv.tv_usec);
+ if (open_flags)
+ *open_flags = O_EXCL;
+ }
+ else
+ {
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ int cnt = ++ctx->temp_counter;
+ int pid = getpid();
+ if (ctx->thread_id == pid)
+ len = snprintf(name_buf, TEMP_FILE_NAME_LEN, "%s/%s%d-%d", dir, temp_prefix, pid, cnt);
+ else
+ len = snprintf(name_buf, TEMP_FILE_NAME_LEN, "%s/%s%d-%d-%d", dir, temp_prefix, pid, ctx->thread_id, cnt);
+ if (open_flags)
+ *open_flags = 0;
+ }
+ ASSERT(len < TEMP_FILE_NAME_LEN);
+}
+
+int
+open_tmp(char *name_buf, int open_flags, int mode)
+{
+ int create_flags, fd, retry = 10;
+ do
+ {
+ temp_file_name(name_buf, &create_flags);
+ fd = ucw_open(name_buf, open_flags | create_flags, mode);
+ }
+ while (fd < 0 && errno == EEXIST && retry --);
+ if (fd < 0)
+ die("Unable to create temp file %s: %m", name_buf);
+ return fd;
+}
+
+#ifdef TEST
+
+#include "ucw/getopt.h"
+
+int main(int argc, char **argv)
+{
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+
+ char buf[TEMP_FILE_NAME_LEN];
+ int fd = open_tmp(buf, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ close(fd);
+ unlink(buf);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/threads.h"
+#include "ucw/conf.h"
+
+uns ucwlib_thread_stack_size = 65556;
+
+static struct cf_section threads_config = {
+ CF_ITEMS {
+ CF_UNS("DefaultStackSize", &ucwlib_thread_stack_size),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+ucwlib_threads_conf_init(void)
+{
+ cf_declare_section("Threads", &threads_config, 0);
+}
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/threads.h"
+
+#ifdef CONFIG_UCW_THREADS
+
+#include <pthread.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef __NR_gettid
+static pid_t
+gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+#define CONFIG_USE_GETTID
+#endif
+#endif
+
+static pthread_key_t ucwlib_context_key;
+static pthread_mutex_t ucwlib_master_mutex;
+
+static void
+ucwlib_free_thread_context(void *p)
+{
+ xfree(p);
+}
+
+static void CONSTRUCTOR
+ucwlib_threads_init(void)
+{
+ if (pthread_key_create(&ucwlib_context_key, ucwlib_free_thread_context) < 0)
+ die("Cannot create pthread_key: %m");
+ pthread_mutex_init(&ucwlib_master_mutex, NULL);
+}
+
+static int
+ucwlib_tid(void)
+{
+ static int tid_counter;
+ int tid;
+
+#ifdef CONFIG_USE_GETTID
+ tid = gettid();
+ if (tid > 0)
+ return tid;
+ /* The syscall might be unimplemented */
+#endif
+
+ ucwlib_lock();
+ tid = ++tid_counter;
+ ucwlib_unlock();
+ return tid;
+}
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+ struct ucwlib_context *c = pthread_getspecific(ucwlib_context_key);
+ if (!c)
+ {
+ c = xmalloc_zero(sizeof(*c));
+ c->thread_id = ucwlib_tid();
+ pthread_setspecific(ucwlib_context_key, c);
+ }
+ return c;
+}
+
+void
+ucwlib_lock(void)
+{
+ pthread_mutex_lock(&ucwlib_master_mutex);
+}
+
+void
+ucwlib_unlock(void)
+{
+ pthread_mutex_unlock(&ucwlib_master_mutex);
+}
+
+#else
+
+struct ucwlib_context *
+ucwlib_thread_context(void)
+{
+ static struct ucwlib_context ucwlib_context;
+ return &ucwlib_context;
+}
+
+void
+ucwlib_lock(void)
+{
+}
+
+void
+ucwlib_unlock(void)
+{
+}
+
+#endif
+
+#ifdef TEST
+
+int main(void)
+{
+ ucwlib_lock();
+ ucwlib_unlock();
+ msg(L_INFO, "tid=%d", ucwlib_thread_context()->thread_id);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * The UCW Library -- Threading Helpers
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_THREAD_H
+#define _UCW_THREAD_H
+
+/* This structure holds per-thread data */
+
+struct ucwlib_context {
+ int thread_id; // Thread ID (either kernel tid or a counter)
+ int temp_counter; // Counter for fb-temp.c
+ struct asio_queue *io_queue; // Async I/O queue for fb-direct.c
+ ucw_sighandler_t *signal_handlers; // Signal handlers for sighandler.c
+};
+
+struct ucwlib_context *ucwlib_thread_context(void);
+
+/* Global lock used for initialization, cleanup and other not so frequently accessed global state */
+
+void ucwlib_lock(void);
+void ucwlib_unlock(void);
+
+#ifdef CONFIG_UCW_THREADS
+
+extern uns ucwlib_thread_stack_size;
+
+#endif
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- A Simple Millisecond Timer
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+timestamp_t
+get_timestamp(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+}
+
+void
+init_timer(timestamp_t *timer)
+{
+ *timer = get_timestamp();
+}
+
+uns
+get_timer(timestamp_t *timer)
+{
+ timestamp_t t = *timer;
+ *timer = get_timestamp();
+ return MIN(*timer-t, ~0U);
+}
+
+uns
+switch_timer(timestamp_t *oldt, timestamp_t *newt)
+{
+ *newt = get_timestamp();
+ return MIN(*newt-*oldt, ~0U);
+}
--- /dev/null
+/*
+ * UCW Library -- Byte-based trie -- Testing utility
+ *
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#define TRIE_PREFIX(x) basic_##x
+#define TRIE_NODE_TYPE char
+#define TRIE_WANT_CLEANUP
+#define TRIE_WANT_ADD
+#define TRIE_WANT_DELETE
+#define TRIE_WANT_FIND
+#define TRIE_WANT_AUDIT
+#ifdef LOCAL_DEBUG
+#define TRIE_TRACE
+#endif
+#include "ucw/trie.h"
+
+static void
+basic_test(void)
+{
+ basic_init();
+ basic_add("str1");
+ basic_add("str2");
+ if (!basic_find("str1") || !basic_find("str2") || basic_find("x") || basic_find("str123"))
+ ASSERT(0);
+ basic_audit();
+ basic_delete("str1");
+ if (basic_find("str1") || !basic_find("str2"))
+ ASSERT(0);
+ basic_audit();
+ basic_cleanup();
+}
+
+#define TRIE_PREFIX(x) dynamic_##x
+#define TRIE_NODE_TYPE char
+#define TRIE_REV
+#define TRIE_DYNAMIC
+#define TRIE_WANT_CLEANUP
+#define TRIE_WANT_ADD
+#define TRIE_WANT_FIND
+#define TRIE_WANT_AUDIT
+#ifdef LOCAL_DEBUG
+#define TRIE_TRACE
+#endif
+#include "ucw/trie.h"
+
+static void
+dynamic_test(void)
+{
+ struct dynamic_trie trie1, trie2;
+ dynamic_init(&trie1);
+ dynamic_init(&trie2);
+ dynamic_add(&trie1, "str1");
+ dynamic_add(&trie2, "str2");
+ if (!dynamic_find(&trie1, "str1") || dynamic_find(&trie1, "str2") || !dynamic_find(&trie2, "str2"))
+ ASSERT(0);
+ dynamic_audit(&trie1);
+ dynamic_audit(&trie2);
+ dynamic_cleanup(&trie1);
+ dynamic_cleanup(&trie2);
+}
+
+
+#define TRIE_PREFIX(x) random_##x
+#define TRIE_NODE_TYPE char
+#define TRIE_LEN_TYPE u16
+#undef TRIE_REV
+#define TRIE_WANT_CLEANUP
+#define TRIE_WANT_FIND
+#define TRIE_WANT_ADD
+#define TRIE_WANT_REMOVE
+#define TRIE_WANT_AUDIT
+#ifdef LOCAL_DEBUG
+#define TRIE_TRACE
+#endif
+#include "ucw/trie.h"
+
+#define MAX_STRINGS 200
+
+static uns count;
+static char *str[MAX_STRINGS];
+
+static char *
+gen_string(void)
+{
+ uns l = random_max(11);
+ char *s = xmalloc(l + 1);
+ for (uns i = 0; i < l; i++)
+ s[i] = random_max('z' - 'a') + 'a';
+ s[l] = 0;
+ return s;
+}
+
+static char *
+gen_unique_string(void)
+{
+ char *s;
+again:
+ s = gen_string();
+ for (uns i = 0; i < count; i++)
+ if (!strcmp(s, str[i]))
+ {
+ xfree(s);
+ goto again;
+ }
+ return s;
+}
+
+static void
+insert(void)
+{
+ if (count == MAX_STRINGS)
+ return;
+ char *s = gen_unique_string();
+ str[count++] = s;
+ DBG("add '%s'", s);
+ random_add(s);
+ random_audit();
+}
+
+static void
+delete(void)
+{
+ if (!count)
+ return;
+ uns i = random_max(count);
+ DBG("remove '%s'", str[i]);
+ random_remove(str[i]);
+ random_audit();
+ xfree(str[i]);
+ str[i] = str[--count];
+}
+
+static void
+find(void)
+{
+ if (!count || !random_max(4))
+ {
+ char *s = gen_unique_string();
+ DBG("negative find '%s'", s);
+ if (random_find(s))
+ ASSERT(0);
+ xfree(s);
+ }
+ else
+ {
+ uns i = random_max(count);
+ DBG("positive find '%s'", str[i]);
+ if (random_find(str[i]) != str[i])
+ ASSERT(0);
+ }
+}
+
+static void
+reset(void)
+{
+ DBG("reset");
+ random_cleanup();
+ for (uns i = 0; i < count; i++)
+ xfree(str[i]);
+ count = 0;
+ random_init();
+ random_audit();
+}
+
+static void
+random_test(void)
+{
+ random_init();
+ for (uns i = 0; i < 10000; i++)
+ {
+ int r = random_max(1000);
+ if ((r -= 300) < 0)
+ insert();
+ else if ((r -= 150) < 0)
+ delete();
+ else if ((r -= 300) < 0)
+ find();
+ else if ((r -= 1) < 0)
+ reset();
+ }
+ random_cleanup();
+}
+
+int main(int argc, char **argv)
+{
+ log_init(argv[0]);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 || optind + 1 != argc)
+ die("Invalid usage, see the source code");
+ srandom(time(NULL));
+
+ char *test = argv[optind];
+ if (!strcmp(test, "basic"))
+ basic_test();
+ else if (!strcmp(test, "dynamic"))
+ dynamic_test();
+ else if (!strcmp(test, "random"))
+ random_test();
+ else
+ die("Unknown test case");
+
+ return 0;
+}
--- /dev/null
+# Tests for tries
+
+Run: ../obj/ucw/trie-test basic
+
+Run: ../obj/ucw/trie-test dynamic
+
+Run: ../obj/ucw/trie-test random
--- /dev/null
+/*
+ * UCW Library -- Byte-based trie
+ *
+ * (c) 2008 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, it's a generator of tries.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates a trie with the parameters given.
+ *
+ * You need to specify:
+ *
+ * [*] TRIE_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the trie generator).
+ *
+ * [*] TRIE_NODE_TYPE data type where a node dwells (usually a struct).
+ * TRIE_NODE_KEY(node) macro to return the pointer to the key (default=&x)
+ * TRIE_NODE_LEN(node) macro to return the length of the key (default=str_len(TRIE_NODE_KEY(node)))
+ * TRIE_LEN_TYPE integer type large enough to hold length of any inserted string (default=u32).
+ * TRIE_REV work with reversed strings.
+ * TRIE_DYNAMIC
+ *
+ * TRIE_WANT_CLEANUP cleanup()
+ *
+ * TRIE_WANT_FIND node *find(char *str)
+ * TRIE_WANT_FIND_BUF node *find_buf(byte *ptr, uns len)
+ * TRIE_WANT_ADD add(*node)
+ * TRIE_WANT_REPLACE node *replace(*node)
+ * TRIE_WANT_DELETE delete(char *str)
+ * TRIE_WANT_DELETE_BUF delete_buf(byte *ptr, uns len)
+ * TRIE_WANT_REMOVE remove(*node)
+ *
+ * TRIE_WANT_AUDIT audit()
+ * TRIE_WANT_STATS
+ */
+
+/*** Define once ***/
+
+#ifndef _SHERLOCK_UCW_TRIE_H
+#define _SHERLOCK_UCW_TRIE_H
+
+#include "ucw/eltpool.h"
+#include "ucw/hashfunc.h"
+
+#include <string.h>
+
+#define TRIE_FLAG_DEG 0x01ff // mask for edge degree (0-256)
+#define TRIE_FLAG_HASH 0x0200 // sons are stored in a hash table
+#define TRIE_FLAG_NODE 0x0400 // edge contains inserted data
+
+#endif
+
+/*** Defaults ***/
+
+#ifndef TRIE_PREFIX
+#error Undefined mandatory macro TRIE_PREFIX
+#endif
+#define P(x) TRIE_PREFIX(x)
+
+#ifndef TRIE_NODE_TYPE
+#error Undefined mandatory macro TRIE_NODE_TYPE
+#endif
+typedef TRIE_NODE_TYPE P(node_t);
+
+#ifndef TRIE_NODE_KEY
+#define TRIE_NODE_KEY(node) ((char *)&(node))
+#endif
+
+#ifndef TRIE_NODE_LEN
+#define TRIE_NODE_LEN(node) (str_len(TRIE_NODE_KEY(node)))
+#endif
+
+#ifndef TRIE_LEN_TYPE
+#define TRIE_LEN_TYPE u32
+#endif
+typedef TRIE_LEN_TYPE P(len_t);
+
+#ifndef TRIE_ELTPOOL_SIZE
+#define TRIE_ELTPOOL_SIZE 1024
+#endif
+
+#ifndef TRIE_HASH_THRESHOLD
+#define TRIE_HASH_THRESHOLD (6 - sizeof(P(len_t)))
+#endif
+
+#ifndef TRIE_BUCKET_RANK
+#define TRIE_BUCKET_RANK (2U + (sizeof(void *) > 4))
+#endif
+#define TRIE_BUCKET_SIZE (1U << TRIE_BUCKET_RANK)
+#define TRIE_BUCKET_MASK (TRIE_BUCKET_SIZE - 1)
+enum { P(bucket_rank) = TRIE_BUCKET_RANK };
+
+#define TRIE_COMPILE_ASSERT(x, y) typedef char TRIE_PREFIX(x##_compile_assert)[!!(y)-1]
+TRIE_COMPILE_ASSERT(len_type, sizeof(P(len_t)) <= sizeof(uns));
+TRIE_COMPILE_ASSERT(hash_threshold, TRIE_HASH_THRESHOLD >= 2);
+TRIE_COMPILE_ASSERT(bucket_size, TRIE_BUCKET_RANK >= 1 && TRIE_BUCKET_MASK < sizeof(void *));
+
+#ifdef TRIE_TRACE
+#define TRIE_DBG(x...) msg(L_DEBUG, "TRIE: " x)
+#else
+#define TRIE_DBG(x...) do{}while(0)
+#endif
+
+/*** Solve dependencies ***/
+
+#if !defined(TRIE_WANT_DO_FIND) && (defined(TRIE_WANT_FIND) || defined(TRIE_WANT_FIND_BUF))
+#define TRIE_WANT_DO_FIND
+#endif
+
+#if !defined(TRIE_WANT_DO_LOOKUP) && (defined(TRIE_WANT_ADD) || defined(TRIE_WANT_REPLACE))
+#define TRIE_WANT_DO_LOOKUP
+#endif
+
+#if !defined(TRIE_WANT_DO_DELETE) && (defined(TRIE_WANT_DELETE) || defined(TRIE_WANT_DELETE_BUF) || defined(TRIE_WANT_REMOVE))
+#define TRIE_WANT_DO_DELETE
+#endif
+
+#if !defined(TRIE_WANT_DO_LOOKUP)
+#error You must request at least one method for inserting nodes
+#endif
+
+/*** Data structures ***/
+
+struct P(trie) {
+ struct P(edge) *root; // root edge or NULL
+ struct eltpool *epool[TRIE_HASH_THRESHOLD + 1]; // eltpools for edges with array of sons
+ struct eltpool *hpool[9]; // eltpools for edges with hash table
+};
+
+struct P(edge) {
+ u16 flags; // TRIE_FLAG_x
+ union {
+ byte trans[TRIE_HASH_THRESHOLD]; // transition characters (!TRIE_FLAG_HASH)
+ struct {
+ byte hash_rank; // logarithmic hash size (TRIE_FLAG_HASH)
+ byte hash_deleted; // number of deleted items
+ };
+ };
+ P(len_t) len; // sum of all ancestor edges with their trasition
+ // characters plus the length of the current edge
+ union {
+ P(node_t) *node; // inserted data (TRIE_FLAG_NODE)
+ struct P(edge) *leaf; // reference to a descendant with data (!TRIE_FLAG_NODE)
+ };
+ struct P(edge) *son[0]; // array of sons (!TRIE_FLAG_HASH)
+ // or hash table (TRIE_FLAG_HASH)
+};
+
+#ifdef TRIE_DYNAMIC
+#define T (*trie)
+#define TA struct P(trie) *trie
+#define TAC TA,
+#define TT trie
+#define TTC trie,
+#else
+static struct P(trie) P(trie);
+#define T P(trie)
+#define TA void
+#define TAC
+#define TT
+#define TTC
+#endif
+
+/*** Memory management ***/
+
+static void
+P(init)(TA)
+{
+ TRIE_DBG("Initializing");
+ bzero(&T, sizeof(T));
+ for (uns i = 0; i < ARRAY_SIZE(T.epool); i++)
+ {
+ uns size = sizeof(struct P(edge)) + i * sizeof(void *);
+ T.epool[i] = ep_new(size, MAX(TRIE_ELTPOOL_SIZE / size, 1));
+ }
+ for (uns i = 0; i < ARRAY_SIZE(T.hpool); i++)
+ {
+ uns size = sizeof(struct P(edge)) + ((sizeof(void *) << TRIE_BUCKET_RANK) << i);
+ T.hpool[i] = ep_new(size, MAX(TRIE_ELTPOOL_SIZE / size, 1));
+ }
+}
+
+#ifdef TRIE_WANT_CLEANUP
+static void
+P(cleanup)(TA)
+{
+ TRIE_DBG("Cleaning up");
+ for (uns i = 0; i < ARRAY_SIZE(T.epool); i++)
+ ep_delete(T.epool[i]);
+ for (uns i = 0; i < ARRAY_SIZE(T.hpool); i++)
+ ep_delete(T.hpool[i]);
+}
+#endif
+
+static struct P(edge) *
+P(edge_alloc)(TAC uns flags)
+{
+ struct P(edge) *edge;
+ if (flags & TRIE_FLAG_HASH)
+ {
+ uns rank = 0, deg = flags & TRIE_FLAG_DEG;
+ while ((TRIE_BUCKET_MASK << rank) < deg * 2) // 25-50% density
+ rank++;
+ ASSERT(rank < ARRAY_SIZE(T.hpool));
+ edge = ep_alloc(T.hpool[rank]);
+ edge->hash_rank = rank;
+ edge->hash_deleted = 0;
+ bzero(edge->son, (sizeof(void *) << TRIE_BUCKET_RANK) << rank);
+ }
+ else
+ edge = ep_alloc(T.epool[flags & TRIE_FLAG_DEG]);
+ edge->flags = flags;
+ TRIE_DBG("Allocated edge %p, flags=0x%x", edge, flags);
+ return edge;
+}
+
+static void
+P(edge_free)(TAC struct P(edge) *edge)
+{
+ TRIE_DBG("Freeing edge %p, flags=0x%x", edge, edge->flags);
+ if (edge->flags & TRIE_FLAG_HASH)
+ ep_free(T.hpool[edge->hash_rank], edge);
+ else
+ ep_free(T.epool[edge->flags & TRIE_FLAG_DEG], edge);
+}
+
+/*** Manipulation with strings ***/
+
+static inline byte *
+P(str_get)(P(node_t) *node)
+{
+ return TRIE_NODE_KEY((*node));
+}
+
+static inline uns
+P(str_len)(P(node_t) *node)
+{
+ return TRIE_NODE_LEN((*node));
+}
+
+static inline uns
+P(str_char)(byte *ptr, uns len UNUSED, uns pos)
+{
+#ifndef TRIE_REV
+ return ptr[pos];
+#else
+ return ptr[len - pos - 1];
+#endif
+}
+
+static inline byte *
+P(str_prefix)(byte *ptr, uns len UNUSED, uns prefix UNUSED)
+{
+#ifndef TRIE_REV
+ return ptr;
+#else
+ return ptr + len - prefix;
+#endif
+}
+
+static inline byte *
+P(str_suffix)(byte *ptr, uns len UNUSED, uns suffix UNUSED)
+{
+#ifndef TRIE_REV
+ return ptr + len - suffix;
+#else
+ return ptr;
+#endif
+}
+
+static inline uns
+P(common_prefix)(byte *ptr1, uns len1, byte *ptr2, uns len2)
+{
+ uns l = MIN(len1, len2), i;
+ for (i = 0; i < l; i++)
+ if (P(str_char)(ptr1, len1, i) != P(str_char)(ptr2, len2, i))
+ break;
+ return i;
+}
+
+/*** Sons ***/
+
+static inline uns
+P(hash_func)(uns c)
+{
+ return hash_u32(c) >> 16;
+}
+
+static inline struct P(edge) **
+P(hash_find)(struct P(edge) *edge, uns c)
+{
+ uns mask = (TRIE_BUCKET_SIZE << edge->hash_rank) - 1;
+ for (uns i = P(hash_func)(c); ; i++)
+ if (((i &= mask) & TRIE_BUCKET_MASK) && (uintptr_t)edge->son[i] != 1)
+ if (!edge->son[i])
+ return NULL;
+ else if (((byte *)&edge->son[i & ~TRIE_BUCKET_MASK])[i & TRIE_BUCKET_MASK] == c)
+ return &edge->son[i];
+}
+
+static inline struct P(edge) **
+P(hash_insert)(struct P(edge) *edge, uns c)
+{
+ uns mask = (TRIE_BUCKET_SIZE << edge->hash_rank) - 1;
+ for (uns i = P(hash_func)(c); ; i++)
+ if (((i &= mask) & TRIE_BUCKET_MASK) && (uintptr_t)edge->son[i] <= 1)
+ {
+ edge->hash_deleted -= (uintptr_t)edge->son[i];
+ edge->son[i] = NULL;
+ ((byte *)&edge->son[i & ~TRIE_BUCKET_MASK])[i & TRIE_BUCKET_MASK] = c;
+ return &edge->son[i];
+ }
+}
+
+#ifdef TRIE_WANT_DO_DELETE
+static inline void
+P(hash_delete)(struct P(edge) *edge, uns c)
+{
+ uns mask = (TRIE_BUCKET_SIZE << edge->hash_rank) - 1;
+ for (uns i = P(hash_func)(c); ; i++)
+ if (((i &= mask) & TRIE_BUCKET_MASK) && (uintptr_t)edge->son[i] > 1 &&
+ ((byte *)&edge->son[i & ~TRIE_BUCKET_MASK])[i & TRIE_BUCKET_MASK] == c)
+ {
+ edge->hash_deleted++;
+ edge->son[i] = (void *)1;
+ return;
+ }
+}
+#endif
+
+#define TRIE_HASH_FOR_ALL(xedge, xtrans, xson) do { \
+ struct P(edge) *_edge = (xedge); \
+ for (uns _i = (TRIE_BUCKET_SIZE << _edge->hash_rank); _i--; ) \
+ if ((_i & TRIE_BUCKET_MASK) && (uintptr_t)_edge->son[_i] > 1) { \
+ UNUSED uns xtrans = ((byte *)&_edge->son[_i & ~TRIE_BUCKET_MASK])[_i & TRIE_BUCKET_MASK]; \
+ UNUSED struct P(edge) *xson = _edge->son[_i]; \
+ do {
+#define TRIE_HASH_END_FOR }while(0);}}while(0)
+
+static void
+P(hash_realloc)(TAC struct P(edge) **ref)
+{
+ struct P(edge) *old = *ref, *edge = *ref = P(edge_alloc)(TTC old->flags);
+ TRIE_DBG("Reallocating hash table");
+ edge->node = old->node;
+ edge->len = old->len;
+ TRIE_HASH_FOR_ALL(old, trans, son)
+ *P(hash_insert)(edge, trans) = son;
+ TRIE_HASH_END_FOR;
+ P(edge_free)(TTC old);
+}
+
+/*** Finding/inserting/deleting sons ***/
+
+static struct P(edge) **
+P(son_find)(struct P(edge) *edge, uns c)
+{
+ if (edge->flags & TRIE_FLAG_HASH)
+ return P(hash_find)(edge, c);
+ else
+ for (uns i = edge->flags & TRIE_FLAG_DEG; i--; )
+ if (edge->trans[i] == c)
+ return &edge->son[i];
+ return NULL;
+}
+
+static struct P(edge) **
+P(son_insert)(TAC struct P(edge) **ref, uns c)
+{
+ struct P(edge) *old = *ref, *edge;
+ uns deg = old->flags & TRIE_FLAG_DEG;
+ if (old->flags & TRIE_FLAG_HASH)
+ {
+ old->flags++;
+ if ((deg + 1 + old->hash_deleted) * 4 > (TRIE_BUCKET_MASK << old->hash_rank) * 3) // >75% density
+ {
+ P(hash_realloc)(TTC ref);
+ edge = *ref;
+ }
+ else
+ edge = old;
+ }
+ else
+ {
+ if (deg < TRIE_HASH_THRESHOLD)
+ {
+ TRIE_DBG("Growing array");
+ edge = P(edge_alloc)(TTC old->flags + 1);
+ memcpy((byte *)edge + sizeof(edge->flags), (byte *)old + sizeof(edge->flags),
+ sizeof(*old) - sizeof(edge->flags) + deg * sizeof(*old->son));
+ edge->trans[deg] = c;
+ edge->son[deg] = NULL;
+ P(edge_free)(TTC old);
+ *ref = edge;
+ return &edge->son[deg];
+ }
+ else
+ {
+ TRIE_DBG("Growing array to hash table");
+ edge = P(edge_alloc)(TTC (old->flags + 1) | TRIE_FLAG_HASH);
+ edge->node = old->node;
+ edge->len = old->len;
+ for (uns i = 0; i < deg; i++)
+ *P(hash_insert)(edge, old->trans[i]) = old->son[i];
+ P(edge_free)(TTC old);
+ }
+ }
+ *ref = edge;
+ return P(hash_insert)(edge, c);
+}
+
+#ifdef TRIE_WANT_DO_DELETE
+static void
+P(son_delete)(TAC struct P(edge) **ref, uns c)
+{
+ struct P(edge) *old = *ref, *edge;
+ uns deg = old->flags & TRIE_FLAG_DEG;
+ ASSERT(deg);
+ if (old->flags & TRIE_FLAG_HASH)
+ {
+ P(hash_delete)(old, c);
+ old->flags--;
+ deg--;
+ if (deg <= TRIE_HASH_THRESHOLD / 2)
+ {
+ TRIE_DBG("Reducing hash table to array");
+ edge = P(edge_alloc)(TTC old->flags & ~TRIE_FLAG_HASH);
+ uns k = 0;
+ TRIE_HASH_FOR_ALL(old, trans, son)
+ edge->trans[k] = trans;
+ edge->son[k] = son;
+ k++;
+ TRIE_HASH_END_FOR;
+ ASSERT(k == deg);
+ }
+ else if (deg * 6 >= (TRIE_BUCKET_MASK << old->hash_rank)) // >= 16%
+ return;
+ else
+ {
+ P(hash_realloc)(TTC ref);
+ edge = *ref;
+ return;
+ }
+ }
+ else
+ {
+ TRIE_DBG("Reducing array");
+ edge = P(edge_alloc)(TTC old->flags - 1);
+ uns j = 0;
+ for (uns i = 0; i < deg; i++)
+ if (old->trans[i] != c)
+ {
+ edge->trans[j] = old->trans[i];
+ edge->son[j] = old->son[i];
+ j++;
+ }
+ ASSERT(j == deg - 1);
+ }
+ edge->node = old->node;
+ edge->len = old->len;
+ P(edge_free)(TTC old);
+ *ref = edge;
+}
+#endif
+
+#ifdef TRIE_WANT_DO_DELETE
+static struct P(edge) *
+P(son_any)(struct P(edge) *edge)
+{
+ ASSERT(edge->flags & TRIE_FLAG_DEG);
+ if (!(edge->flags & TRIE_FLAG_HASH))
+ return edge->son[0];
+ else
+ for (uns i = 0; ; i++)
+ if ((i & TRIE_BUCKET_MASK) && (uintptr_t)edge->son[i] > 1)
+ return edge->son[i];
+}
+#endif
+
+/*** Find/insert/delete ***/
+
+#ifdef TRIE_WANT_DO_FIND
+static struct P(edge) *
+P(do_find)(TAC byte *ptr, uns len)
+{
+ TRIE_DBG("do_find('%.*s')", len, ptr);
+ struct P(edge) **ref = &T.root, *edge;
+ do
+ {
+ if (!(edge = *ref) || edge->len > len)
+ return NULL;
+ else if (edge->len == len)
+ return ((edge->flags & TRIE_FLAG_NODE) && !memcmp(ptr, P(str_get)(edge->node), len)) ? edge : NULL;
+ }
+ while (ref = P(son_find)(edge, P(str_char)(ptr, len, edge->len)));
+ return NULL;
+}
+#endif
+
+static struct P(edge) *
+P(do_lookup)(TAC byte *ptr, uns len)
+{
+ TRIE_DBG("do_lookup('%.*s')", len, ptr);
+ struct P(edge) **ref, *edge, *leaf, *newleaf;
+ uns prefix, elen, trans, pos;
+ byte *eptr;
+
+ if (!(edge = T.root))
+ {
+ TRIE_DBG("Creating first edge");
+ edge = T.root = P(edge_alloc)(TTC TRIE_FLAG_NODE);
+ edge->node = NULL;
+ edge->len = len;
+ return edge;
+ }
+ else
+ {
+ while (edge->len < len && (ref = P(son_find)(edge, P(str_char)(ptr, len, edge->len))))
+ edge = *ref;
+ if (!(edge->flags & TRIE_FLAG_NODE))
+ edge = edge->leaf;
+ ASSERT(edge->flags & TRIE_FLAG_NODE);
+ eptr = P(str_get)(edge->node);
+ elen = edge->len;
+ prefix = P(common_prefix)(ptr, len, eptr, elen);
+ if (prefix == len && prefix == elen)
+ return edge;
+ TRIE_DBG("The longest common prefix is '%.*s'", prefix, P(str_prefix)(ptr, len, prefix));
+
+ if (prefix < len)
+ {
+ TRIE_DBG("Creating a new leaf");
+ newleaf = P(edge_alloc)(TTC TRIE_FLAG_NODE);
+ newleaf->node = NULL;
+ newleaf->len = len;
+ }
+ else
+ newleaf = NULL;
+
+ ref = &T.root;
+ while (edge = *ref)
+ {
+ pos = edge->len;
+ if (prefix < pos)
+ {
+ leaf = (edge->flags & TRIE_FLAG_NODE) ? edge : edge->leaf;
+ TRIE_DBG("Splitting edge '%.*s'", leaf->len, P(str_get)(leaf->node));
+ trans = P(str_char)(P(str_get)(leaf->node), leaf->len, prefix);
+ if (len == prefix)
+ {
+ edge = P(edge_alloc)(TTC 1 | TRIE_FLAG_NODE);
+ edge->len = prefix;
+ edge->node = NULL;
+ edge->trans[0] = trans;
+ edge->son[0] = *ref;
+ return *ref = edge;
+ }
+ else
+ {
+ edge = P(edge_alloc)(TTC 2);
+ edge->len = prefix;
+ edge->leaf = leaf;
+ edge->trans[0] = trans;
+ edge->son[0] = *ref;
+ edge->trans[1] = P(str_char)(ptr, len, prefix);
+ *ref = edge;
+ return edge->son[1] = newleaf;
+ }
+ }
+ if (pos == len)
+ {
+ TRIE_DBG("Adding the node to an already existing edge");
+ edge->flags |= TRIE_FLAG_NODE;
+ edge->node = NULL;
+ return edge;
+ }
+ if (!(edge->flags & TRIE_FLAG_NODE) && newleaf)
+ edge->leaf = newleaf;
+ trans = P(str_char)(ptr, len, pos);
+ if (pos < prefix)
+ ref = P(son_find)(edge, trans);
+ else
+ ref = P(son_insert)(TTC ref, trans);
+ }
+ }
+ return *ref = newleaf;
+}
+
+#ifdef TRIE_WANT_DO_DELETE
+static P(node_t) *
+P(do_delete)(TAC byte *ptr, uns len)
+{
+ TRIE_DBG("do_delete('%.*s')", len, ptr);
+ struct P(edge) **ref = &T.root, **pref = NULL, *edge, *parent, *leaf, *pold = NULL;
+ while (1)
+ {
+ if (!(edge = *ref) || edge->len > len)
+ return NULL;
+ else if (edge->len == len)
+ if ((edge->flags & TRIE_FLAG_NODE) && !memcmp(ptr, P(str_get)(edge->node), len))
+ break;
+ else
+ return NULL;
+ pref = ref;
+ if (!(ref = P(son_find)(edge, P(str_char)(ptr, len, edge->len))))
+ return NULL;
+ }
+
+ P(node_t) *node = edge->node;
+ uns deg = edge->flags & TRIE_FLAG_DEG;
+
+ if (!deg)
+ {
+ if (!pref)
+ {
+ TRIE_DBG("Deleting last edge");
+ T.root = NULL;
+ P(edge_free)(TTC edge);
+ return node;
+ }
+ else
+ {
+ TRIE_DBG("Deleting a leaf");
+ pold = *pref;
+ P(son_delete)(TTC pref, P(str_char)(ptr, len, pold->len));
+ parent = *pref;
+ if ((parent->flags & (TRIE_FLAG_DEG | TRIE_FLAG_NODE)) <= 1)
+ {
+ ASSERT((parent->flags & (TRIE_FLAG_DEG | TRIE_FLAG_HASH)) == 1);
+ TRIE_DBG("... and its parent");
+ leaf = *pref = parent->son[0];
+ P(edge_free)(TTC parent);
+ }
+ else if (parent->flags & TRIE_FLAG_NODE)
+ leaf = parent;
+ else
+ leaf = P(son_any)(parent);
+ }
+ P(edge_free)(TTC edge);
+ }
+ else if (deg == 1)
+ {
+ TRIE_DBG("Deleting internal edge");
+ ASSERT(!(edge->flags & TRIE_FLAG_HASH));
+ leaf = *ref = edge->son[0];
+ P(edge_free)(TTC edge);
+ }
+ else
+ {
+ TRIE_DBG("Deleting node, but leaving edge");
+ leaf = P(son_any)(edge);
+ if (!(leaf->flags & TRIE_FLAG_NODE))
+ leaf = leaf->leaf;
+ edge->leaf = leaf;
+ edge->flags &= ~TRIE_FLAG_NODE;
+ }
+
+ TRIE_DBG("Updating leaf pointers");
+ if (!(leaf->flags & TRIE_FLAG_NODE))
+ leaf = leaf->leaf;
+ ASSERT(leaf->flags & TRIE_FLAG_NODE);
+ for (ref = &T.root; ref && (*ref)->len < len; ref = P(son_find)(*ref, P(str_char)(ptr, len, (*ref)->len)))
+ if ((*ref)->leaf == edge || (*ref)->leaf == pold)
+ (*ref)->leaf = leaf;
+ return node;
+}
+#endif
+
+#ifdef TRIE_WANT_FIND
+static inline P(node_t) *
+P(find)(TAC char *str)
+{
+ struct P(edge) *edge = P(do_find)(TTC str, str_len(str));
+ return edge ? edge->node : NULL;
+}
+#endif
+
+#ifdef TRIE_WANT_FIND_BUF
+static inline P(node_t) *
+P(find_buf)(TAC byte *ptr, uns len)
+{
+ struct P(edge) *edge = P(do_find)(TTC ptr, len);
+ return edge ? edge->node : NULL;
+}
+#endif
+
+#ifdef TRIE_WANT_ADD
+static inline void
+P(add)(TAC P(node_t) *node)
+{
+ struct P(edge) *edge = P(do_lookup)(TTC P(str_get)(node), P(str_len)(node));
+ ASSERT(!edge->node);
+ edge->node = node;
+}
+#endif
+
+#ifdef TRIE_WANT_REPLACE
+static inline P(node_t) *
+P(replace)(TAC P(node_t) *node)
+{
+ struct P(edge) *edge = P(do_lookup)(TTC P(str_get)(node), P(str_len)(node));
+ P(node_t) *over = edge->node;
+ edge->node = node;
+ return over;
+}
+#endif
+
+#ifdef TRIE_WANT_DELETE
+static inline P(node_t) *
+P(delete)(TAC char *str)
+{
+ return P(do_delete)(TTC str, str_len(str));
+}
+#endif
+
+#ifdef TRIE_WANT_DELETE_BUF
+static inline P(node_t) *
+P(delete_buf)(TAC byte *ptr, uns len)
+{
+ return P(do_delete)(TTC ptr, len);
+}
+#endif
+
+#ifdef TRIE_WANT_REMOVE
+static inline void
+P(remove)(TAC P(node_t) *node)
+{
+ if (unlikely(P(do_delete)(TTC P(str_get)(node), P(str_len)(node)) != node))
+ ASSERT(0);
+}
+#endif
+
+/*** Traversing prefixes and subtrees ***/
+
+#ifndef TRIE_FOR_ALL
+
+// for all matched edges until the first >=xlen (including)
+#define TRIE_FOR_PREFIX_EDGES(px, xtrie, xptr, xlen, xedge) \
+ do \
+ { \
+ byte *_ptr = (xptr); \
+ uns _len = (xlen); \
+ struct px##trie *_trie = (xtrie); \
+ struct px##edge *xedge, **_ref; \
+ if (!(xedge = _trie->root)) \
+ break; \
+ while (xedge->len < _len && (_ref = px##son_find(xedge, px##str_char(_ptr, _len, xedge->len)))) \
+ xedge = *_ref; \
+ if (!(xedge->flags & TRIE_FLAG_NODE)) \
+ xedge = xedge->leaf; \
+ uns _prefix = px##common_prefix(_ptr, _len, px##str_get(xedge->node), xedge->len); \
+ for (_ref = &_trie->root; _ref && ((xedge = *_ref)->len <= _prefix || _prefix == _len); \
+ _ref = (xedge->len < _prefix) ? px##son_find(xedge, px##str_char(_ptr, _len, xedge->len)) : NULL) \
+ {
+#define TRIE_END_PREFIX_EDGES \
+ } \
+ } \
+ while (0)
+
+// for entire subtree starting in the xstart edge
+#define TRIE_FOR_SUBTREE_EDGES(px, xstart, xedge) \
+ do \
+ { \
+ struct { struct px##edge *edge; uns pos; } \
+ *_sbuf = alloca(sizeof(*_sbuf) * 16), \
+ *_sptr = _sbuf, *_send = _sbuf + 16; \
+ struct px##edge *_next = (xstart), *xedge; \
+ while (xedge = _next) \
+ { \
+ if (xedge->flags & TRIE_FLAG_DEG) \
+ { \
+ if (_sptr == _send) \
+ { \
+ uns stack_size = _sptr - _sbuf; \
+ _sptr = alloca(sizeof(*_sptr) * (stack_size * 2)); \
+ memcpy(_sptr, _sbuf, sizeof(*_sptr) * stack_size); \
+ _sbuf = _sptr; \
+ _send = _sptr + stack_size * 2; \
+ } \
+ _sptr->edge = xedge; \
+ _sptr->pos = (xedge->flags & TRIE_FLAG_HASH) ? \
+ (1U << px##bucket_rank) << xedge->hash_rank : \
+ (xedge->flags & TRIE_FLAG_DEG); \
+ _sptr++; \
+ } \
+ while (1) \
+ { \
+ if (_sptr == _sbuf) \
+ { \
+ _next = NULL; \
+ break; \
+ } \
+ _next = (--_sptr)->edge; \
+ uns pos = --(_sptr->pos); \
+ uns flags = _next->flags; \
+ _next = _next->son[pos]; \
+ if (pos) \
+ _sptr++; \
+ if (!(flags & TRIE_FLAG_HASH) || \
+ ((pos & ((1U << px##bucket_rank) - 1)) && \
+ (uintptr_t)_next > 1)) \
+ break; \
+ }
+#define TRIE_END_SUBTREE_EDGES \
+ } \
+ } \
+ while (0)
+
+#define TRIE_FOR_SUBTREE(px, xstart, xnode) \
+ TRIE_FOR_SUBTREE_EDGES(px, xstart, _edge) \
+ if (_edge->flags & TRIE_FLAG_NODE) \
+ { \
+ px##node_t *xnode = _edge->node;
+#define TRIE_END_SUBTREE \
+ } \
+ TRIE_END_SUBTREE_EDGES;
+
+#define TRIE_FOR_ALL_EDGES(px, xtrie, xedge) TRIE_FOR_SUBTREE_EDGES(px, (xtrie)->root, xedge)
+#define TRIE_END_ALL_EDGES TRIE_END_SUBTREE_EDGES
+
+#define TRIE_FOR_ALL(px, xtrie, xnode) TRIE_FOR_SUBTREE(px, (xtrie)->root, xnode)
+#define TRIE_END_ALL TRIE_END_SUBTREE
+
+#endif
+
+/*** Check consistency ***/
+
+#ifdef TRIE_WANT_AUDIT
+
+static void
+P(audit)(TA)
+{
+ uns count = 0;
+ TRIE_FOR_ALL_EDGES(TRIE_PREFIX(), &T, edge)
+ {
+ ASSERT(edge);
+ uns deg = edge->flags & TRIE_FLAG_DEG;
+ ASSERT(edge->node);
+ struct P(edge) * leaf = (edge->flags & TRIE_FLAG_NODE) ? edge : edge->leaf;
+ if (leaf != edge)
+ {
+ ASSERT(leaf->flags & TRIE_FLAG_NODE);
+ ASSERT(leaf->len > edge->len);
+ ASSERT(leaf->node);
+ }
+ TRIE_DBG("Checking edge %p, %s=%p, flags=0x%x, key='%.*s'",
+ edge, (edge->flags & TRIE_FLAG_NODE) ? "node" : "leaf", edge->node, edge->flags,
+ edge->len, P(str_prefix)(P(str_get)(leaf->node), leaf->len, edge->len));
+ ASSERT(deg >= 2 || (edge->flags & TRIE_FLAG_NODE));
+ if (edge->flags & TRIE_FLAG_HASH)
+ {
+ ASSERT(deg > 1 && deg <= 256);
+ uns count = 0, deleted = 0;
+ for (uns i = TRIE_BUCKET_SIZE << edge->hash_rank; i--; )
+ if (i & TRIE_BUCKET_MASK)
+ if ((uintptr_t)edge->son[i] == 1)
+ deleted++;
+ else if (edge->son[i])
+ {
+ ASSERT(edge->son[i]->len > edge->len);
+ count++;
+ }
+ ASSERT(count == deg);
+ ASSERT(deleted == edge->hash_deleted);
+ }
+ else
+ {
+ ASSERT(deg <= TRIE_HASH_THRESHOLD);
+ for (uns i = 0; i < deg; i++)
+ ASSERT(edge->son[i]->len > edge->len);
+ }
+ count++;
+ }
+ TRIE_END_ALL_EDGES;
+ TRIE_DBG("Found %u edges", count);
+}
+
+#endif
+
+/*** Statistics ***/
+
+#ifdef TRIE_WANT_STATS
+
+struct P(stats) {
+ u64 total_size;
+ u64 small_size;
+ u64 hash_size;
+};
+
+static void
+P(stats)(TAC struct P(stats) *stats)
+{
+ bzero(stats, sizeof(*stats));
+ for (uns i = 0; i < ARRAY_SIZE(T.epool); i++)
+ stats->small_size += ep_total_size(T.epool[i]);
+ for (uns i = 0; i < ARRAY_SIZE(T.hpool); i++)
+ stats->hash_size += ep_total_size(T.hpool[i]);
+ stats->total_size = stats->small_size + stats->hash_size + sizeof(T);
+}
+
+static inline u64
+P(total_size)(TA)
+{
+ struct P(stats) stats;
+ P(stats)(TTC &stats);
+ return stats.total_size;
+}
+
+#endif
+
+/*** Clean up local macros ***/
+
+#undef P
+#undef T
+#undef TA
+#undef TAC
+#undef TT
+#undef TTC
+
+#undef TRIE_PREFIX
+#undef TRIE_NODE_TYPE
+#undef TRIE_NODE_KEY
+#undef TRIE_NODE_LEN
+#undef TRIE_LEN_TYPE
+#undef TRIE_REV
+#undef TRIE_DYNAMIC
+#undef TRIE_ELTPOOL_SIZE
+#undef TRIE_HASH_THRESHOLD
+#undef TRIE_BUCKET_RANK
+#undef TRIE_BUCKET_SIZE
+#undef TRIE_BUCKET_MASK
+#undef TRIE_TRACE
+#undef TRIE_DBG
+#undef TRIE_HASH_FOR_ALL
+#undef TRIE_HASH_END_FOR
+
+#undef TRIE_WANT_CLEANUP
+
+#undef TRIE_WANT_DO_FIND
+#undef TRIE_WANT_DO_LOOKUP
+#undef TRIE_WANT_DO_DELETE
+
+#undef TRIE_WANT_FIND
+#undef TRIE_WANT_FIND_BUF
+#undef TRIE_WANT_ADD
+#undef TRIE_WANT_ADD_OVER
+#undef TRIE_WANT_DELETE
+#undef TRIE_WANT_DELETE_BUF
+#undef TRIE_WANT_REMOVE
+
+#undef TRIE_WANT_AUDIT
--- /dev/null
+/*
+ * UCW Library -- Fast Access to Unaligned Data
+ *
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNALIGNED_H
+#define _UCW_UNALIGNED_H
+
+/* Big endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_be(const void *p) { return *(u16 *)p; } /** Read 16-bit integer value from an unaligned sequence of 2 bytes (big-endian version). **/
+static inline u32 get_u32_be(const void *p) { return *(u32 *)p; } /** Read 32-bit integer value from an unaligned sequence of 4 bytes (big-endian version). **/
+static inline u64 get_u64_be(const void *p) { return *(u64 *)p; } /** Read 64-bit integer value from an unaligned sequence of 8 bytes (big-endian version). **/
+static inline void put_u16_be(void *p, uns x) { *(u16 *)p = x; } /** Write 16-bit integer value to an unaligned sequence of 2 bytes (big-endian version). **/
+static inline void put_u32_be(void *p, u32 x) { *(u32 *)p = x; } /** Write 32-bit integer value to an unaligned sequence of 4 bytes (big-endian version). **/
+static inline void put_u64_be(void *p, u64 x) { *(u64 *)p = x; } /** Write 64-bit integer value to an unaligned sequence of 8 bytes (big-endian version). **/
+#else
+static inline uns get_u16_be(const void *p)
+{
+ const byte *c = p;
+ return (c[0] << 8) | c[1];
+}
+static inline u32 get_u32_be(const void *p)
+{
+ const byte *c = p;
+ return (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
+}
+static inline u64 get_u64_be(const void *p)
+{
+ return ((u64) get_u32_be(p) << 32) | get_u32_be((const byte *)p+4);
+}
+static inline void put_u16_be(void *p, uns x)
+{
+ byte *c = p;
+ c[0] = x >> 8;
+ c[1] = x;
+}
+static inline void put_u32_be(void *p, u32 x)
+{
+ byte *c = p;
+ c[0] = x >> 24;
+ c[1] = x >> 16;
+ c[2] = x >> 8;
+ c[3] = x;
+}
+static inline void put_u64_be(void *p, u64 x)
+{
+ put_u32_be(p, x >> 32);
+ put_u32_be((byte *)p+4, x);
+}
+#endif
+
+static inline u64 get_u40_be(const void *p) /** Read 40-bit integer value from an unaligned sequence of 5 bytes (big-endian version). **/
+{
+ const byte *c = p;
+ return ((u64)c[0] << 32) | get_u32_be(c+1);
+}
+
+static inline void put_u40_be(void *p, u64 x)
+{
+ byte *c = p;
+ c[0] = x >> 32;
+ put_u32_be(c+1, x);
+}
+
+/* Little-endian format */
+
+#if defined(CPU_ALLOW_UNALIGNED) && !defined(CPU_BIG_ENDIAN)
+static inline uns get_u16_le(const void *p) { return *(u16 *)p; } /** Read 16-bit integer value from an unaligned sequence of 2 bytes (little-endian version). **/
+static inline u32 get_u32_le(const void *p) { return *(u32 *)p; } /** Read 32-bit integer value from an unaligned sequence of 4 bytes (little-endian version). **/
+static inline u64 get_u64_le(const void *p) { return *(u64 *)p; } /** Read 64-bit integer value from an unaligned sequence of 8 bytes (little-endian version). **/
+static inline void put_u16_le(void *p, uns x) { *(u16 *)p = x; } /** Write 16-bit integer value to an unaligned sequence of 2 bytes (little-endian version). **/
+static inline void put_u32_le(void *p, u32 x) { *(u32 *)p = x; } /** Write 32-bit integer value to an unaligned sequence of 4 bytes (little-endian version). **/
+static inline void put_u64_le(void *p, u64 x) { *(u64 *)p = x; } /** Write 64-bit integer value to an unaligned sequence of 8 bytes (little-endian version). **/
+#else
+static inline uns get_u16_le(const void *p)
+{
+ const byte *c = p;
+ return c[0] | (c[1] << 8);
+}
+static inline u32 get_u32_le(const void *p)
+{
+ const byte *c = p;
+ return c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
+}
+static inline u64 get_u64_le(const void *p)
+{
+ return get_u32_le(p) | ((u64) get_u32_le((const byte *)p+4) << 32);
+}
+static inline void put_u16_le(void *p, uns x)
+{
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
+}
+static inline void put_u32_le(void *p, u32 x)
+{
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
+ c[2] = x >> 16;
+ c[3] = x >> 24;
+}
+static inline void put_u64_le(void *p, u64 x)
+{
+ put_u32_le(p, x);
+ put_u32_le((byte *)p+4, x >> 32);
+}
+#endif
+
+static inline u64 get_u40_le(const void *p) /** Read 40-bit integer value from an unaligned sequence of 5 bytes (little-endian version). **/
+{
+ const byte *c = p;
+ return get_u32_le(c) | ((u64) c[4] << 32);
+}
+
+static inline void put_u40_le(void *p, u64 x)
+{
+ byte *c = p;
+ put_u32_le(c, x);
+ c[4] = x >> 32;
+}
+
+/* The native format */
+
+#ifdef CPU_BIG_ENDIAN
+
+static inline uns get_u16(const void *p) { return get_u16_be(p); } /** Read 16-bit integer value from an unaligned sequence of 2 bytes (native byte-order). **/
+static inline u32 get_u32(const void *p) { return get_u32_be(p); } /** Read 32-bit integer value from an unaligned sequence of 4 bytes (native byte-order). **/
+static inline u64 get_u64(const void *p) { return get_u64_be(p); } /** Read 64-bit integer value from an unaligned sequence of 8 bytes (native byte-order). **/
+static inline u64 get_u40(const void *p) { return get_u40_be(p); } /** Read 40-bit integer value from an unaligned sequence of 5 bytes (native byte-order). **/
+static inline void put_u16(void *p, uns x) { return put_u16_be(p, x); } /** Write 16-bit integer value to an unaligned sequence of 2 bytes (native byte-order). **/
+static inline void put_u32(void *p, u32 x) { return put_u32_be(p, x); } /** Write 32-bit integer value to an unaligned sequence of 4 bytes (native byte-order). **/
+static inline void put_u64(void *p, u64 x) { return put_u64_be(p, x); } /** Write 64-bit integer value to an unaligned sequence of 8 bytes (native byte-order). **/
+static inline void put_u40(void *p, u64 x) { return put_u40_be(p, x); } /** Write 40-bit integer value to an unaligned sequence of 5 bytes (native byte-order). **/
+
+#else
+
+static inline uns get_u16(const void *p) { return get_u16_le(p); }
+static inline u32 get_u32(const void *p) { return get_u32_le(p); }
+static inline u64 get_u64(const void *p) { return get_u64_le(p); }
+static inline u64 get_u40(const void *p) { return get_u40_le(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_le(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_le(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_le(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_le(p, x); }
+
+#endif
+
+/* Just for completeness */
+
+static inline uns get_u8(const void *p) { return *(const byte *)p; } /** Read 8-bit integer value. **/
+static inline void put_u8(void *p, uns x) { *(byte *)p = x; } /** Write 8-bit integer value. **/
+
+/* Backward compatibility macros */
+
+#define GET_U8(p) get_u8(p)
+#define GET_U16(p) get_u16(p)
+#define GET_U32(p) get_u32(p)
+#define GET_U64(p) get_u64(p)
+#define GET_U40(p) get_u40(p)
+
+#define PUT_U8(p,x) put_u8(p,x);
+#define PUT_U16(p,x) put_u16(p,x)
+#define PUT_U32(p,x) put_u32(p,x)
+#define PUT_U64(p,x) put_u64(p,x)
+#define PUT_U40(p,x) put_u40(p,x)
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- UTF-8 Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2003 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/unicode.h"
+
+uns
+utf8_strlen(const byte *str)
+{
+ uns len = 0;
+ while (*str)
+ {
+ UTF8_SKIP(str);
+ len++;
+ }
+ return len;
+}
+
+uns
+utf8_strnlen(const byte *str, uns n)
+{
+ uns len = 0;
+ const byte *end = str + n;
+ while (str < end)
+ {
+ UTF8_SKIP(str);
+ len++;
+ }
+ return len;
+}
+
+#ifdef TEST
+
+#include <string.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ byte buf[256];
+
+#define FUNCS \
+ F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
+ F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
+
+ enum {
+#define F(x) FUNC_##x,
+ FUNCS
+#undef F
+ };
+ char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+ FUNCS
+#undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func)
+ {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ if (func < FUNC_UTF8_PUT)
+ {
+ byte *p = buf, *q = buf, *last;
+ uns u;
+ bzero(buf, sizeof(buf));
+ while (scanf("%x", &u) == 1)
+ *q++ = u;
+ while (p < q)
+ {
+ last = p;
+ if (p != buf)
+ putchar(' ');
+ switch (func)
+ {
+ case FUNC_UTF8_GET:
+ p = utf8_get(p, &u);
+ break;
+ case FUNC_UTF8_32_GET:
+ p = utf8_32_get(p, &u);
+ break;
+ case FUNC_UTF16_BE_GET:
+ p = utf16_be_get(p, &u);
+ break;
+ case FUNC_UTF16_LE_GET:
+ p = utf16_le_get(p, &u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ printf("%04x", u);
+ ASSERT(last < p && p <= q);
+ }
+ putchar('\n');
+ }
+ else
+ {
+ uns u, i=0;
+ while (scanf("%x", &u) == 1)
+ {
+ byte *p = buf, *q = buf;
+ switch (func)
+ {
+ case FUNC_UTF8_PUT:
+ p = utf8_put(p, u);
+ break;
+ case FUNC_UTF8_32_PUT:
+ p = utf8_32_put(p, u);
+ break;
+ case FUNC_UTF16_BE_PUT:
+ p = utf16_be_put(p, u);
+ break;
+ case FUNC_UTF16_LE_PUT:
+ p = utf16_le_put(p, u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ while (q < p)
+ {
+ if (i++)
+ putchar(' ');
+ printf("%02x", *q++);
+ }
+ }
+ putchar('\n');
+ }
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Unicode Characters
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_UNICODE_H
+#define _UCW_UNICODE_H
+
+#include "ucw/unaligned.h"
+
+/* Macros for handling UTF-8 */
+
+#define UNI_REPLACEMENT 0xfffc /** Unicode value used as a default replacement of invalid characters. **/
+
+/**
+ * Encode a value from the range `[0, 0xFFFF]`
+ * (basic multilingual plane); up to 3 bytes needed (RFC2279).
+ **/
+static inline byte *utf8_put(byte *p, uns u)
+{
+ if (u < 0x80)
+ *p++ = u;
+ else if (u < 0x800)
+ {
+ *p++ = 0xc0 | (u >> 6);
+ *p++ = 0x80 | (u & 0x3f);
+ }
+ else
+ {
+ ASSERT(u < 0x10000);
+ *p++ = 0xe0 | (u >> 12);
+ *p++ = 0x80 | ((u >> 6) & 0x3f);
+ *p++ = 0x80 | (u & 0x3f);
+ }
+ return p;
+}
+
+/**
+ * Encode a value from the range `[0, 0x7FFFFFFF]`;
+ * (superset of Unicode 4.0) up to 6 bytes needed (RFC2279).
+ **/
+static inline byte *utf8_32_put(byte *p, uns u)
+{
+ if (u < 0x80)
+ *p++ = u;
+ else if (u < 0x800)
+ {
+ *p++ = 0xc0 | (u >> 6);
+ goto put1;
+ }
+ else if (u < (1<<16))
+ {
+ *p++ = 0xe0 | (u >> 12);
+ goto put2;
+ }
+ else if (u < (1<<21))
+ {
+ *p++ = 0xf0 | (u >> 18);
+ goto put3;
+ }
+ else if (u < (1<<26))
+ {
+ *p++ = 0xf8 | (u >> 24);
+ goto put4;
+ }
+ else if (u < (1U<<31))
+ {
+ *p++ = 0xfc | (u >> 30);
+ *p++ = 0x80 | ((u >> 24) & 0x3f);
+put4: *p++ = 0x80 | ((u >> 18) & 0x3f);
+put3: *p++ = 0x80 | ((u >> 12) & 0x3f);
+put2: *p++ = 0x80 | ((u >> 6) & 0x3f);
+put1: *p++ = 0x80 | (u & 0x3f);
+ }
+ else
+ ASSERT(0);
+ return p;
+}
+
+#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
+
+/**
+ * Decode a value from the range `[0, 0xFFFF]` (basic multilingual plane)
+ * or return @repl if the encoding has been corrupted.
+ **/
+static inline byte *utf8_get_repl(const byte *p, uns *uu, uns repl)
+{
+ uns u = *p++;
+ if (u < 0x80)
+ ;
+ else if (unlikely(u < 0xc0))
+ {
+ /* Incorrect byte sequence */
+ bad:
+ u = repl;
+ }
+ else if (u < 0xe0)
+ {
+ u &= 0x1f;
+ UTF8_GET_NEXT;
+ }
+ else if (likely(u < 0xf0))
+ {
+ u &= 0x0f;
+ UTF8_GET_NEXT;
+ UTF8_GET_NEXT;
+ }
+ else
+ goto bad;
+ *uu = u;
+ return (byte *)p;
+}
+
+/**
+ * Decode a value from the range `[0, 0x7FFFFFFF]`
+ * or return @repl if the encoding has been corrupted.
+ **/
+static inline byte *utf8_32_get_repl(const byte *p, uns *uu, uns repl)
+{
+ uns u = *p++;
+ if (u < 0x80)
+ ;
+ else if (unlikely(u < 0xc0))
+ {
+ /* Incorrect byte sequence */
+ bad:
+ u = repl;
+ }
+ else if (u < 0xe0)
+ {
+ u &= 0x1f;
+ goto get1;
+ }
+ else if (u < 0xf0)
+ {
+ u &= 0x0f;
+ goto get2;
+ }
+ else if (u < 0xf8)
+ {
+ u &= 0x07;
+ goto get3;
+ }
+ else if (u < 0xfc)
+ {
+ u &= 0x03;
+ goto get4;
+ }
+ else if (u < 0xfe)
+ {
+ u &= 0x01;
+ UTF8_GET_NEXT;
+get4: UTF8_GET_NEXT;
+get3: UTF8_GET_NEXT;
+get2: UTF8_GET_NEXT;
+get1: UTF8_GET_NEXT;
+ }
+ else
+ goto bad;
+ *uu = u;
+ return (byte *)p;
+}
+
+/**
+ * Decode a value from the range `[0, 0xFFFF]` (basic multilignual plane)
+ * or return `UNI_REPLACEMENT` if the encoding has been corrupted.
+ **/
+static inline byte *utf8_get(const byte *p, uns *uu)
+{
+ return utf8_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+/**
+ * Decode a value from the range `[0, 0x7FFFFFFF]`
+ * or return `UNI_REPLACEMENT` if the encoding has been corrupted.
+ **/
+static inline byte *utf8_32_get(const byte *p, uns *uu)
+{
+ return utf8_32_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+#define UTF8_SKIP(p) do { \
+ uns c = *p++; \
+ if (c >= 0xc0) \
+ while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \
+ p++, c <<= 1; \
+ } while (0)
+
+#define UTF8_SKIP_BWD(p) while ((*--(p) & 0xc0) == 0x80)
+
+/**
+ * Return the number of bytes needed to encode a given value from the range `[0, 0x7FFFFFFF]` to UTF-8.
+ **/
+static inline uns utf8_space(uns u)
+{
+ if (u < 0x80)
+ return 1;
+ if (u < 0x800)
+ return 2;
+ if (u < (1<<16))
+ return 3;
+ if (u < (1<<21))
+ return 4;
+ if (u < (1<<26))
+ return 5;
+ return 6;
+}
+
+/**
+ * Compute the length of a single UTF-8 character from it's first byte. The encoding must be valid.
+ **/
+static inline uns utf8_encoding_len(uns c)
+{
+ if (c < 0x80)
+ return 1;
+ ASSERT(c >= 0xc0 && c < 0xfe);
+ if (c < 0xe0)
+ return 2;
+ if (c < 0xf0)
+ return 3;
+ if (c < 0xf8)
+ return 4;
+ if (c < 0xfc)
+ return 5;
+ return 6;
+}
+
+/**
+ * Encode an UTF-16LE character from the range `[0, 0xD7FF]` or `[0xE000,0x11FFFF]`;
+ * up to 4 bytes needed.
+ **/
+static inline void *utf16_le_put(void *p, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ put_u16_le(p, u);
+ return p + 2;
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ put_u16_le(p, 0xd800 | (u >> 10));
+ put_u16_le(p + 2, 0xdc00 | (u & 0x3ff));
+ return p + 4;
+ }
+ else
+ ASSERT(0);
+}
+
+/**
+ * Encode an UTF-16BE character from the range `[0, 0xD7FF]` or `[0xE000,0x11FFFF]`;
+ * up to 4 bytes needed.
+ **/
+static inline void *utf16_be_put(void *p, uns u)
+{
+ if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
+ {
+ put_u16_be(p, u);
+ return p + 2;
+ }
+ else if ((u -= 0x10000) < 0x100000)
+ {
+ put_u16_be(p, 0xd800 | (u >> 10));
+ put_u16_be(p + 2, 0xdc00 | (u & 0x3ff));
+ return p + 4;
+ }
+ else
+ ASSERT(0);
+}
+
+/**
+ * Decode an UTF-16LE character from the range `[0, 0xD7FF]` or `[0xE000,11FFFF]`
+ * or return @repl if the encoding has been corrupted.
+ **/
+static inline void *utf16_le_get_repl(const void *p, uns *uu, uns repl)
+{
+ uns u = get_u16_le(p), x, y;
+ x = u - 0xd800;
+ if (x < 0x800)
+ if (x < 0x400 && (y = get_u16_le(p + 2) - 0xdc00) < 0x400)
+ {
+ u = 0x10000 + (x << 10) + y;
+ p += 2;
+ }
+ else
+ u = repl;
+ *uu = u;
+ return (void *)(p + 2);
+}
+
+/**
+ * Decode an UTF-16BE character from the range `[0, 0xD7FF]` or `[0xE000,11FFFF]`
+ * or return @repl if the encoding has been corrupted.
+ **/
+static inline void *utf16_be_get_repl(const void *p, uns *uu, uns repl)
+{
+ uns u = get_u16_be(p), x, y;
+ x = u - 0xd800;
+ if (x < 0x800)
+ if (x < 0x400 && (y = get_u16_be(p + 2) - 0xdc00) < 0x400)
+ {
+ u = 0x10000 + (x << 10) + y;
+ p += 2;
+ }
+ else
+ u = repl;
+ *uu = u;
+ return (void *)(p + 2);
+}
+
+/**
+ * Decode an UTF-16LE character from the range `[0, 0xD7FF]` or `[0xE000,11FFFF]`
+ * or return `UNI_REPLACEMENT` if the encoding has been corrupted.
+ **/
+static inline void *utf16_le_get(const void *p, uns *uu)
+{
+ return utf16_le_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+/**
+ * Decode an UTF-16BE character from the range `[0, 0xD7FF]` or `[0xE000,11FFFF]`
+ * or return `UNI_REPLACEMENT` if the encoding has been corrupted.
+ **/
+static inline void *utf16_be_get(const void *p, uns *uu)
+{
+ return utf16_be_get_repl(p, uu, UNI_REPLACEMENT);
+}
+
+/**
+ * Check an Unicode value and if it seems to be useless (defined by Ucwlib; it may change in future) return `UNI_REPLACEMENT` instead.
+ **/
+static inline uns unicode_sanitize_char(uns u)
+{
+ if (u >= 0x10000 || // We don't accept anything outside the basic plane
+ u >= 0xd800 && u < 0xf900 || // neither we do surrogates
+ u >= 0x80 && u < 0xa0 || // nor latin-1 control chars
+ u < 0x20 && u != '\t')
+ return UNI_REPLACEMENT;
+ return u;
+}
+
+/* unicode-utf8.c */
+
+/**
+ * Count the number of Unicode character in a zero-terminated UTF-8 string.
+ * Returned value for corrupted encoding is undefined, but is never greater than strlen().
+ **/
+uns utf8_strlen(const byte *str);
+
+/**
+ * Same as @utf8_strlen(), but returns at most @n characters.
+ **/
+uns utf8_strnlen(const byte *str, uns n);
+
+#endif
--- /dev/null
+# Tests for the Unicode module
+
+Name: utf8_put (1)
+Run: ../obj/ucw/unicode-t utf8_put
+In: 0041 0048 004f 004a
+Out: 41 48 4f 4a
+
+Name: utf8_put (2)
+Run: ../obj/ucw/unicode-t utf8_put
+In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+
+Name: utf8_get (1)
+Run: ../obj/ucw/unicode-t utf8_get
+In: 41 48 4f 4a
+Out: 0041 0048 004f 004a
+
+Name: utf8_get (2)
+Run: ../obj/ucw/unicode-t utf8_get
+In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
+Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
+
+Name: utf8_get (3)
+Run: ../obj/ucw/unicode-t utf8_get
+In: 84 ff f9 f8 c2 aa 41
+Out: fffc fffc fffc fffc 00aa 0041
+
+Name: utf8_32_put
+Run: ../obj/ucw/unicode-t utf8_32_put
+In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+
+Name: utf8_32_get (1)
+Run: ../obj/ucw/unicode-t utf8_32_get
+In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
+Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+
+Name: utf8_32_get (2)
+Run: ../obj/ucw/unicode-t utf8_32_get
+In: fe 83 81
+Out: fffc fffc fffc
+
+Name: utf16_be_put
+Run: ../obj/ucw/unicode-t utf16_be_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name: utf16_le_put
+Run: ../obj/ucw/unicode-t utf16_le_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name: utf16_be_get (1)
+Run: ../obj/ucw/unicode-t utf16_be_get
+In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_be_get (2)
+Run: ../obj/ucw/unicode-t utf16_be_get
+In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out: fffc 2a5f fffc fffc 2a5f fffc
+
+Name: utf16_le_get (1)
+Run: ../obj/ucw/unicode-t utf16_le_get
+In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_le_get (2)
+Run: ../obj/ucw/unicode-t utf16_le_get
+In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out: fffc 2a5f fffc fffc 2a5f fffc
--- /dev/null
+/*
+ * UCW Library -- URL Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001--2005 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ * XXX: The buffer handling in this module is really horrible, but it works.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/url.h"
+#include "ucw/chartype.h"
+#include "ucw/conf.h"
+#include "ucw/prime.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <alloca.h>
+
+/* Configuration */
+
+static uns url_ignore_spaces;
+static uns url_ignore_underflow;
+static char *url_component_separators = "";
+static uns url_min_repeat_count = 0x7fffffff;
+static uns url_max_repeat_length = 0;
+static uns url_max_occurences = ~0U;
+
+#ifndef TEST
+static struct cf_section url_config = {
+ CF_ITEMS {
+ CF_UNS("IgnoreSpaces", &url_ignore_spaces),
+ CF_UNS("IgnoreUnderflow", &url_ignore_underflow),
+ CF_STRING("ComponentSeparators", &url_component_separators),
+ CF_UNS("MinRepeatCount", &url_min_repeat_count),
+ CF_UNS("MaxRepeatLength", &url_max_repeat_length),
+ CF_UNS("MaxOccurences", &url_max_occurences),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR url_init_config(void)
+{
+ cf_declare_section("URL", &url_config, 0);
+}
+#endif
+
+/* Escaping and de-escaping */
+
+static uns
+enhex(uns x)
+{
+ return (x<10) ? (x + '0') : (x - 10 + 'A');
+}
+
+int
+url_deescape(const char *s, char *d)
+{
+ char *dstart = d;
+ char *end = d + MAX_URL_SIZE - 10;
+ while (*s)
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ if (*s == '%')
+ {
+ unsigned int val;
+ if (!Cxdigit(s[1]) || !Cxdigit(s[2]))
+ return URL_ERR_INVALID_ESCAPE;
+ val = Cxvalue(s[1])*16 + Cxvalue(s[2]);
+ if (val < 0x20)
+ return URL_ERR_INVALID_ESCAPED_CHAR;
+ switch (val)
+ {
+ case ';':
+ val = NCC_SEMICOLON; break;
+ case '/':
+ val = NCC_SLASH; break;
+ case '?':
+ val = NCC_QUEST; break;
+ case ':':
+ val = NCC_COLON; break;
+ case '@':
+ val = NCC_AT; break;
+ case '=':
+ val = NCC_EQUAL; break;
+ case '&':
+ val = NCC_AND; break;
+ case '#':
+ val = NCC_HASH; break;
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+ case '$':
+ val = NCC_DOLLAR; break;
+ case '+':
+ val = NCC_PLUS; break;
+ case ',':
+ val = NCC_COMMA; break;
+#endif
+ }
+ *d++ = val;
+ s += 3;
+ }
+ else if ((byte) *s > 0x20)
+ *d++ = *s++;
+ else if (Cspace(*s))
+ {
+ const char *s0 = s;
+ while (Cspace(*s))
+ s++;
+ if (!url_ignore_spaces || !(!*s || d == dstart))
+ {
+ while (Cspace(*s0))
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ *d++ = *s0++;
+ }
+ }
+ }
+ else
+ return URL_ERR_INVALID_CHAR;
+ }
+ *d = 0;
+ return 0;
+}
+
+int
+url_enescape(const char *s, char *d)
+{
+ char *end = d + MAX_URL_SIZE - 10;
+ unsigned int c;
+
+ while (c = *s)
+ {
+ if (d >= end)
+ return URL_ERR_TOO_LONG;
+ if (Calnum(c) || /* RFC 2396 (2.1-2.3): Only alphanumerics ... */
+ c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || /* ... and some exceptions and reserved chars */
+ c == '$' || c == '-' || c == '_' || c == '.' || c == '+' ||
+ c == ',' || c == '=' || c == '&' || c == '#' || c == ';' ||
+ c == '/' || c == '?' || c == ':' || c == '@'
+#ifndef CONFIG_URL_ESCAPE_COMPAT
+ || c == '~'
+#endif
+ )
+ *d++ = *s++;
+ else
+ {
+ uns val = (byte)(((byte)*s < NCC_MAX) ? NCC_CHARS[(byte)*s] : *s);
+ *d++ = '%';
+ *d++ = enhex(val >> 4);
+ *d++ = enhex(val & 0x0f);
+ s++;
+ }
+ }
+ *d = 0;
+ return 0;
+}
+
+int
+url_enescape_friendly(const char *src, char *dest)
+{
+ char *end = dest + MAX_URL_SIZE - 10;
+ const byte *srcb = src;
+ while (*srcb)
+ {
+ if (dest >= end)
+ return URL_ERR_TOO_LONG;
+ if ((byte)*srcb < NCC_MAX)
+ *dest++ = NCC_CHARS[*srcb++];
+ else if (*srcb >= 0x20 && *srcb < 0x7f)
+ *dest++ = *srcb++;
+ else
+ {
+ *dest++ = '%';
+ *dest++ = enhex((byte)*srcb >> 4);
+ *dest++ = enhex(*srcb++ & 0x0f);
+ }
+ }
+ *dest = 0;
+ return 0;
+}
+
+/* Split an URL (several parts may be copied to the destination buffer) */
+
+char *url_proto_names[URL_PROTO_MAX] = URL_PNAMES;
+static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
+
+uns
+url_identify_protocol(const char *p)
+{
+ uns i;
+
+ for(i=1; i<URL_PROTO_MAX; i++)
+ if (!strcasecmp(p, url_proto_names[i]))
+ return i;
+ return URL_PROTO_UNKNOWN;
+}
+
+int
+url_split(char *s, struct url *u, char *d)
+{
+ bzero(u, sizeof(struct url));
+ u->port = ~0;
+ u->bufend = d + MAX_URL_SIZE - 10;
+
+ if (s[0] != '/') /* Seek for "protocol:" */
+ {
+ char *p = s;
+ while (*p && Calnum(*p))
+ p++;
+ if (p != s && *p == ':')
+ {
+ u->protocol = d;
+ while (s < p)
+ *d++ = *s++;
+ *d++ = 0;
+ u->protoid = url_identify_protocol(u->protocol);
+ s++;
+ if (url_proto_path_flags[u->protoid] && (s[0] != '/' || s[1] != '/'))
+ {
+ /* The protocol requires complete host spec, but it's missing -> treat as a relative path instead */
+ int len = d - u->protocol;
+ d -= len;
+ s -= len;
+ u->protocol = NULL;
+ u->protoid = 0;
+ }
+ }
+ }
+
+ if (s[0] == '/') /* Host spec or absolute path */
+ {
+ if (s[1] == '/') /* Host spec */
+ {
+ char *q, *e;
+ char *at = NULL;
+ char *ep;
+
+ s += 2;
+ q = d;
+ while (*s && *s != '/' && *s != '?') /* Copy user:passwd@host:port */
+ {
+ if (*s != '@')
+ *d++ = *s;
+ else if (!at)
+ {
+ *d++ = 0;
+ at = d;
+ }
+ else /* This shouldn't happen with sane URL's, but we need to be sure */
+ *d++ = NCC_AT;
+ s++;
+ }
+ *d++ = 0;
+ if (at) /* user:passwd present */
+ {
+ u->user = q;
+ if (e = strchr(q, ':'))
+ {
+ *e++ = 0;
+ u->pass = e;
+ }
+ }
+ else
+ at = q;
+ e = strchr(at, ':');
+ if (e) /* host:port present */
+ {
+ uns p;
+ *e++ = 0;
+ p = strtoul(e, &ep, 10);
+ if (ep && *ep || p > 65535)
+ return URL_ERR_INVALID_PORT;
+ else if (p) /* Port 0 (e.g. in :/) is treated as default port */
+ u->port = p;
+ }
+ u->host = at;
+ }
+ }
+
+ u->rest = s;
+ u->buf = d;
+ return 0;
+}
+
+/* Normalization according to given base URL */
+
+static uns std_ports[] = URL_DEFPORTS; /* Default port numbers */
+
+static int
+relpath_merge(struct url *u, struct url *b)
+{
+ char *a = u->rest;
+ char *o = b->rest;
+ char *d = u->buf;
+ char *e = u->bufend;
+ char *p;
+
+ if (a[0] == '/') /* Absolute path => OK */
+ return 0;
+ if (o[0] != '/' && o[0] != '?')
+ return URL_PATH_UNDERFLOW;
+
+ if (!a[0]) /* Empty URL -> inherit everything */
+ {
+ u->rest = b->rest;
+ return 0;
+ }
+
+ u->rest = d; /* We know we'll need to copy the path somewhere else */
+
+ if (a[0] == '#') /* Another fragment */
+ {
+ for(p=o; *p && *p != '#'; p++)
+ ;
+ goto copy;
+ }
+ if (a[0] == '?') /* New query */
+ {
+ for(p=o; *p && *p != '#' && *p != '?'; p++)
+ ;
+ goto copy;
+ }
+
+ p = NULL; /* Copy original path and find the last slash */
+ while (*o && *o != '?' && *o != '#')
+ {
+ if (d >= e)
+ return URL_ERR_TOO_LONG;
+ if ((*d++ = *o++) == '/')
+ p = d;
+ }
+ if (!p)
+ return URL_ERR_REL_NOTHING;
+ d = p;
+
+ while (*a)
+ {
+ if (a[0] == '.')
+ {
+ if (a[1] == '/' || !a[1]) /* Skip "./" and ".$" */
+ {
+ a++;
+ if (a[0])
+ a++;
+ continue;
+ }
+ else if (a[1] == '.' && (a[2] == '/' || !a[2])) /* "../" */
+ {
+ a += 2;
+ if (a[0])
+ a++;
+ if (d <= u->buf + 1)
+ {
+ /*
+ * RFC 1808 says we should leave ".." as a path segment, but
+ * we intentionally break the rule and refuse the URL.
+ */
+ if (!url_ignore_underflow)
+ return URL_PATH_UNDERFLOW;
+ }
+ else
+ {
+ d--; /* Discard trailing slash */
+ while (d[-1] != '/')
+ d--;
+ }
+ continue;
+ }
+ }
+ while (a[0] && a[0] != '/')
+ {
+ if (d >= e)
+ return URL_ERR_TOO_LONG;
+ *d++ = *a++;
+ }
+ if (a[0])
+ *d++ = *a++;
+ }
+
+okay:
+ *d++ = 0;
+ u->buf = d;
+ return 0;
+
+copy: /* Combine part of old URL with the new one */
+ while (o < p)
+ if (d < e)
+ *d++ = *o++;
+ else
+ return URL_ERR_TOO_LONG;
+ while (*a)
+ if (d < e)
+ *d++ = *a++;
+ else
+ return URL_ERR_TOO_LONG;
+ goto okay;
+}
+
+int
+url_normalize(struct url *u, struct url *b)
+{
+ int err;
+
+ /* Basic checks */
+ if (url_proto_path_flags[u->protoid] && (!u->host || !*u->host) ||
+ !u->host && u->user ||
+ !u->user && u->pass ||
+ !u->rest)
+ return URL_SYNTAX_ERROR;
+
+ if (!u->protocol)
+ {
+ /* Now we know it's a relative URL. Do we have any base? */
+ if (!b || !url_proto_path_flags[b->protoid])
+ return URL_ERR_REL_NOTHING;
+ u->protocol = b->protocol;
+ u->protoid = b->protoid;
+
+ /* Reference to the same host */
+ if (!u->host)
+ {
+ u->host = b->host;
+ u->user = b->user;
+ u->pass = b->pass;
+ u->port = b->port;
+ if (err = relpath_merge(u, b))
+ return err;
+ }
+ }
+
+ /* Change path "?" to "/?" because it's the true meaning */
+ if (u->rest[0] == '?')
+ {
+ int l = strlen(u->rest);
+ if (u->bufend - u->buf < l+1)
+ return URL_ERR_TOO_LONG;
+ u->buf[0] = '/';
+ memcpy(u->buf+1, u->rest, l+1);
+ u->rest = u->buf;
+ u->buf += l+2;
+ }
+
+ /* Fill in missing info */
+ if (u->port == ~0U)
+ u->port = std_ports[u->protoid];
+
+ return 0;
+}
+
+/* Name canonicalization */
+
+static void
+lowercase(char *b)
+{
+ if (b)
+ while (*b)
+ {
+ if (*b >= 'A' && *b <= 'Z')
+ *b = *b + 0x20;
+ b++;
+ }
+}
+
+static void
+kill_end_dot(char *b)
+{
+ char *k;
+
+ if (b)
+ {
+ k = b + strlen(b) - 1;
+ while (k > b && *k == '.')
+ *k-- = 0;
+ }
+}
+
+int
+url_canonicalize(struct url *u)
+{
+ char *c;
+
+ lowercase(u->protocol);
+ lowercase(u->host);
+ kill_end_dot(u->host);
+ if ((!u->rest || !*u->rest) && url_proto_path_flags[u->protoid])
+ u->rest = "/";
+ if (u->rest && (c = strchr(u->rest, '#'))) /* Kill fragment reference */
+ *c = 0;
+ return 0;
+}
+
+/* Pack a broken-down URL */
+
+static char *
+append(char *d, const char *s, char *e)
+{
+ if (d)
+ while (*s)
+ {
+ if (d >= e)
+ return NULL;
+ *d++ = *s++;
+ }
+ return d;
+}
+
+int
+url_pack(struct url *u, char *d)
+{
+ char *e = d + MAX_URL_SIZE - 10;
+
+ if (u->protocol)
+ {
+ d = append(d, u->protocol, e);
+ d = append(d, ":", e);
+ u->protoid = url_identify_protocol(u->protocol);
+ }
+ if (u->host)
+ {
+ d = append(d, "//", e);
+ if (u->user)
+ {
+ d = append(d, u->user, e);
+ if (u->pass)
+ {
+ d = append(d, ":", e);
+ d = append(d, u->pass, e);
+ }
+ d = append(d, "@", e);
+ }
+ d = append(d, u->host, e);
+ if (u->port != std_ports[u->protoid] && u->port != ~0U)
+ {
+ char z[10];
+ sprintf(z, "%d", u->port);
+ d = append(d, ":", e);
+ d = append(d, z, e);
+ }
+ }
+ if (u->rest)
+ d = append(d, u->rest, e);
+ if (!d)
+ return URL_ERR_TOO_LONG;
+ *d = 0;
+ return 0;
+}
+
+/* Error messages */
+
+static char *errmsg[] = {
+ "Something is wrong",
+ "Too long",
+ "Invalid character",
+ "Invalid escape",
+ "Invalid escaped character",
+ "Invalid port number",
+ "Relative URL not allowed",
+ "Unknown protocol",
+ "Syntax error",
+ "Path underflow"
+};
+
+char *
+url_error(uns err)
+{
+ if (err >= sizeof(errmsg) / sizeof(char *))
+ err = 0;
+ return errmsg[err];
+}
+
+/* Standard cookbook recipes */
+
+int
+url_canon_split_rel(const char *u, char *buf1, char *buf2, struct url *url, struct url *base)
+{
+ int err;
+
+ if (err = url_deescape(u, buf1))
+ return err;
+ if (err = url_split(buf1, url, buf2))
+ return err;
+ if (err = url_normalize(url, base))
+ return err;
+ return url_canonicalize(url);
+}
+
+int
+url_auto_canonicalize_rel(const char *src, char *dst, struct url *base)
+{
+ char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
+ int err;
+ struct url ur;
+
+ (void)((err = url_canon_split_rel(src, buf1, buf2, &ur, base)) ||
+ (err = url_pack(&ur, buf3)) ||
+ (err = url_enescape(buf3, dst)));
+ return err;
+}
+
+/* Testing */
+
+#ifdef TEST
+
+int main(int argc, char **argv)
+{
+ char buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
+ int err;
+ struct url url, url0;
+ char *base = "http://mj@www.hell.org/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?";
+
+ if (argc != 2 && argc != 3)
+ return 1;
+ if (argc == 3)
+ base = argv[2];
+ if (err = url_deescape(argv[1], buf1))
+ {
+ printf("deesc: error %d\n", err);
+ return 1;
+ }
+ printf("deesc: %s\n", buf1);
+ if (err = url_split(buf1, &url, buf2))
+ {
+ printf("split: error %d\n", err);
+ return 1;
+ }
+ printf("split: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_split(base, &url0, buf3))
+ {
+ printf("split base: error %d\n", err);
+ return 1;
+ }
+ if (err = url_normalize(&url0, NULL))
+ {
+ printf("normalize base: error %d\n", err);
+ return 1;
+ }
+ printf("base: @%s@%s@%s@%s@%d@%s\n", url0.protocol, url0.user, url0.pass, url0.host, url0.port, url0.rest);
+ if (err = url_normalize(&url, &url0))
+ {
+ printf("normalize: error %d\n", err);
+ return 1;
+ }
+ printf("normalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_canonicalize(&url))
+ {
+ printf("canonicalize: error %d\n", err);
+ return 1;
+ }
+ printf("canonicalize: @%s@%s@%s@%s@%d@%s\n", url.protocol, url.user, url.pass, url.host, url.port, url.rest);
+ if (err = url_pack(&url, buf4))
+ {
+ printf("pack: error %d\n", err);
+ return 1;
+ }
+ printf("pack: %s\n", buf4);
+ if (err = url_enescape(buf4, buf2))
+ {
+ printf("enesc: error %d\n", err);
+ return 1;
+ }
+ printf("enesc: %s\n", buf2);
+ return 0;
+}
+
+#endif
+
+struct component {
+ const char *start;
+ int length;
+ uns count;
+ u32 hash;
+};
+
+static inline u32
+hashf(const char *start, int length)
+{
+ u32 hf = length;
+ while (length-- > 0)
+ hf = (hf << 8 | hf >> 24) ^ *start++;
+ return hf;
+}
+
+static inline uns
+repeat_count(struct component *comp, uns count, uns len)
+{
+ struct component *orig_comp = comp;
+ uns found = 0;
+ while (1)
+ {
+ uns i;
+ comp += len;
+ count -= len;
+ found++;
+ if (count < len)
+ return found;
+ for (i=0; i<len; i++)
+ if (comp[i].hash != orig_comp[i].hash
+ || comp[i].length != orig_comp[i].length
+ || memcmp(comp[i].start, orig_comp[i].start, comp[i].length))
+ return found;
+ }
+}
+
+int
+url_has_repeated_component(const char *url)
+{
+ struct component *comp;
+ uns comps, comp_len, rep_prefix, hash_size, *hash, *next;
+ const char *c;
+ uns i, j, k;
+
+ for (comps=0, c=url; c; comps++)
+ {
+ c = strpbrk(c, url_component_separators);
+ if (c)
+ c++;
+ }
+ if (comps < url_min_repeat_count && comps <= url_max_occurences)
+ return 0;
+ comp = alloca(comps * sizeof(*comp));
+ for (i=0, c=url; c; i++)
+ {
+ comp[i].start = c;
+ c = strpbrk(c, url_component_separators);
+ if (c)
+ {
+ comp[i].length = c - comp[i].start;
+ c++;
+ }
+ else
+ comp[i].length = strlen(comp[i].start);
+ }
+ ASSERT(i == comps);
+ for (i=0; i<comps; i++)
+ comp[i].hash = hashf(comp[i].start, comp[i].length);
+ if (comps > url_max_occurences)
+ {
+ hash_size = next_table_prime(comps);
+ hash = alloca(hash_size * sizeof(*hash));
+ next = alloca(comps * sizeof(*next));
+ memset(hash, 255, hash_size * sizeof(*hash));
+ for (i=0; i<comps; i++)
+ {
+ j = comp[i].hash % hash_size;
+ for (k = hash[j]; ~k && (comp[i].hash != comp[k].hash || comp[i].length != comp[k].length ||
+ memcmp(comp[k].start, comp[i].start, comp[i].length)); k = next[k]);
+ if (!~k)
+ {
+ next[i] = hash[j];
+ hash[j] = i;
+ comp[i].count = 1;
+ }
+ else
+ {
+ if (comp[k].count++ >= url_max_occurences)
+ return 1;
+ }
+ }
+ }
+ for (comp_len = 1; comp_len <= url_max_repeat_length && comp_len <= comps; comp_len++)
+ for (rep_prefix = 0; rep_prefix <= comps - comp_len; rep_prefix++)
+ if (repeat_count(comp + rep_prefix, comps - rep_prefix, comp_len) >= url_min_repeat_count)
+ return comp_len;
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- URL Functions
+ *
+ * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_URL_H
+#define _UCW_URL_H
+
+#define MAX_URL_SIZE 1024
+
+/* Non-control meanings of control characters */
+
+enum {
+ NCC_SEMICOLON = 1,
+ NCC_SLASH = 2,
+ NCC_QUEST = 3,
+ NCC_COLON = 4,
+ NCC_AT = 5,
+ NCC_EQUAL = 6,
+ NCC_AND = 7,
+ NCC_HASH = 8,
+#ifdef CONFIG_URL_ESCAPE_COMPAT
+ NCC_MAX = 9
+#else
+ // Avoid 9 (\t) and 10 (\n)
+ NCC_DOLLAR = 11,
+ NCC_PLUS = 12,
+ // Avoid 13 (\r)
+ NCC_COMMA = 14,
+ NCC_MAX = 15
+#endif
+};
+
+#ifdef CONFIG_URL_ESCAPE_COMPAT
+#define NCC_CHARS " ;/?:@=&#"
+#else
+#define NCC_CHARS " ;/?:@=&#\t\n$+\r,"
+#endif
+
+/* Remove/Introduce '%' escapes */
+
+int url_deescape(const char *s, char *d);
+int url_enescape(const char *s, char *d);
+int url_enescape_friendly(const char *src, char *dest);
+
+/* URL splitting and normalization */
+
+struct url {
+ char *protocol;
+ uns protoid;
+ char *user;
+ char *pass;
+ char *host;
+ uns port; /* ~0 if unspec */
+ char *rest;
+ char *buf, *bufend;
+};
+
+int url_split(char *s, struct url *u, char *d);
+int url_normalize(struct url *u, struct url *b);
+int url_canonicalize(struct url *u);
+int url_pack(struct url *u, char *d);
+int url_canon_split_rel(const char *url, char *buf1, char *buf2, struct url *u, struct url *base);
+int url_auto_canonicalize_rel(const char *src, char *dst, struct url *base);
+uns url_identify_protocol(const char *p);
+int url_has_repeated_component(const char *url);
+
+static inline int url_canon_split(const char *url, char *buf1, char *buf2, struct url *u)
+{ return url_canon_split_rel(url, buf1, buf2, u, NULL); }
+
+static inline int url_auto_canonicalize(const char *src, char *dst)
+{ return url_auto_canonicalize_rel(src, dst, NULL); }
+
+/* Error codes */
+
+char *url_error(uns);
+
+#define URL_ERR_TOO_LONG 1
+#define URL_ERR_INVALID_CHAR 2
+#define URL_ERR_INVALID_ESCAPE 3
+#define URL_ERR_INVALID_ESCAPED_CHAR 4
+#define URL_ERR_INVALID_PORT 5
+#define URL_ERR_REL_NOTHING 6
+#define URL_ERR_UNKNOWN_PROTOCOL 7
+#define URL_SYNTAX_ERROR 8
+#define URL_PATH_UNDERFLOW 9
+
+#define URL_PROTO_UNKNOWN 0
+#define URL_PROTO_HTTP 1
+#define URL_PROTO_FTP 2
+#define URL_PROTO_FILE 3
+#define URL_PROTO_MAX 4
+
+#define URL_PNAMES { "unknown", "http", "ftp", "file" }
+#define URL_DEFPORTS { ~0, 80, 21, 0 }
+#define URL_PATH_FLAGS { 0, 1, 1, 1 }
+
+extern char *url_proto_names[];
+
+#endif
--- /dev/null
+# Tests for url.c
+
+Name: Absolute
+Run: ../obj/ucw/url-t 'ftp://example.com/other'
+Out: deesc: ftp://example.com/other
+ split: @ftp@(null)@(null)@example.com@-1@/other
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @ftp@(null)@(null)@example.com@21@/other
+ canonicalize: @ftp@(null)@(null)@example.com@21@/other
+ pack: ftp://example.com/other
+ enesc: ftp://example.com/other
+
+Name: Simple
+Run: ../obj/ucw/url-t 'object'
+Out: deesc: object
+ split: @(null)@(null)@(null)@(null)@-1@object
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/object
+ canonicalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/object
+ pack: http://mj@www.hell.org/123/sub_dir;param/object
+ enesc: http://mj@www.hell.org/123/sub_dir;param/object
+
+Name: Toplevel
+Run: ../obj/ucw/url-t '/object'
+Out: deesc: /object
+ split: @(null)@(null)@(null)@(null)@-1@/object
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/object
+ canonicalize: @http@mj@(null)@www.hell.org@80@/object
+ pack: http://mj@www.hell.org/object
+ enesc: http://mj@www.hell.org/object
+
+Name: Domain
+Run: ../obj/ucw/url-t '//www.example.com'
+Out: deesc: //www.example.com
+ split: @(null)@(null)@(null)@www.example.com@-1@
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@(null)@(null)@www.example.com@80@
+ canonicalize: @http@(null)@(null)@www.example.com@80@/
+ pack: http://www.example.com/
+ enesc: http://www.example.com/
+
+Name: Levels
+Run: ../obj/ucw/url-t '../a/b;paramb/c/.././x?a#frag'
+Out: deesc: ../a/b;paramb/c/.././x?a#frag
+ split: @(null)@(null)@(null)@(null)@-1@../a/b;paramb/c/.././x?a#frag
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/123/a/b;paramb/x?a#frag
+ canonicalize: @http@mj@(null)@www.hell.org@80@/123/a/b;paramb/x?a
+ pack: http://mj@www.hell.org/123/a/b;paramb/x?a
+ enesc: http://mj@www.hell.org/123/a/b;paramb/x?a
+
+Name: Query
+Run: ../obj/ucw/url-t '?query'
+Out: deesc: ?query
+ split: @(null)@(null)@(null)@(null)@-1@?query
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query
+ canonicalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query
+ pack: http://mj@www.hell.org/123/sub_dir;param/index.html;param?query
+ enesc: http://mj@www.hell.org/123/sub_dir;param/index.html;param?query
+
+Name: Fragments
+Run: ../obj/ucw/url-t '#../?a' 'http://example.com/?query#@?/x'
+Out: deesc: #../?a
+ split: @(null)@(null)@(null)@(null)@-1@#../?a
+ base: @http@(null)@(null)@example.com@80@/?query#@?/x
+ normalize: @http@(null)@(null)@example.com@80@/?query#../?a
+ canonicalize: @http@(null)@(null)@example.com@80@/?query
+ pack: http://example.com/?query
+ enesc: http://example.com/?query
+
+Name: Deescape
+Run: ../obj/ucw/url-t '/%20%25'
+Out: deesc: / %
+ split: @(null)@(null)@(null)@(null)@-1@/ %
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/ %
+ canonicalize: @http@mj@(null)@www.hell.org@80@/ %
+ pack: http://mj@www.hell.org/ %
+ enesc: http://mj@www.hell.org/%20%25
+
+Name: Dots
+Run: ../obj/ucw/url-t '..a/./x.?a/x'
+Out: deesc: ..a/./x.?a/x
+ split: @(null)@(null)@(null)@(null)@-1@..a/./x.?a/x
+ base: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/index.html;param?query&zzz/sub;query+#fragment?
+ normalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/..a/x.?a/x
+ canonicalize: @http@mj@(null)@www.hell.org@80@/123/sub_dir;param/..a/x.?a/x
+ pack: http://mj@www.hell.org/123/sub_dir;param/..a/x.?a/x
+ enesc: http://mj@www.hell.org/123/sub_dir;param/..a/x.?a/x
--- /dev/null
+# Makefile for the UCW utilities (c) 2008 Michal Vaner <vorner@ucw.cz>
+
+UCW_UTILS=$(addprefix $(o)/ucw/utils/,basecode daemon-helper rotate-log urltool)
+PROGS+=$(UCW_UTILS)
+DIRS+=ucw/utils
+
+$(o)/ucw/utils/basecode: $(o)/ucw/utils/basecode.o $(LIBUCW)
+$(o)/ucw/utils/daemon-helper: $(o)/ucw/utils/daemon-helper.o $(LIBUCW)
+$(o)/ucw/utils/urltool: $(o)/ucw/utils/urltool.o $(LIBUCW)
+
+TESTS+=$(o)/ucw/utils/basecode.test
+$(o)/ucw/utils/basecode.test: $(o)/ucw/utils/basecode
+
+INSTALL_TARGETS+=install-ucw-utils
+install-ucw-utils:
+ install -d -m 755 $(DESTDIR)$(INSTALL_BIN_DIR)
+ install -m 755 $(UCW_UTILS) $(DESTDIR)$(INSTALL_BIN_DIR)
+
+.PHONY: install-ucw-utils
--- /dev/null
+/*
+ * UCW Library -- Line utility for encoding and decoding base64 & base224
+ *
+ * (c) 2008, Michal Vaner <vorner@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ucw/lib.h"
+#include "ucw/base64.h"
+#include "ucw/base224.h"
+#include "ucw/fastbuf.h"
+#include "ucw/getopt.h"
+
+static struct option opts[] = {
+ { "encode64", 0, 0, 'e' },
+ { "decode64", 0, 0, 'd' },
+ { "encode224", 0, 0, 'E' },
+ { "decode224", 0, 0, 'D' },
+ { "prefix", 1, 0, 'p' },
+ { "blocks", 1, 0, 'b' },
+ { 0, 0, 0, 0 }
+};
+
+static const struct {
+ uns (*function)(byte *, const byte *, uns);
+ uns in_block, out_block, num_blocks;
+ uns add_prefix;
+} actions[] = {
+ {
+ base64_encode,
+ BASE64_IN_CHUNK, BASE64_OUT_CHUNK, 20,
+ 1
+ },
+ {
+ base64_decode,
+ BASE64_OUT_CHUNK, BASE64_IN_CHUNK, 20,
+ 0
+ },
+ {
+ base224_encode,
+ BASE224_IN_CHUNK, BASE64_OUT_CHUNK, 6,
+ 1
+ },
+ {
+ base224_decode,
+ BASE224_OUT_CHUNK, BASE224_IN_CHUNK, 6,
+ 0
+ }
+};
+
+int main(int argc, char **argv)
+{
+ // Choose mode
+ int mode = -1;
+ char *prefix = NULL;
+ uns blocks = 0;
+ int opt;
+ while ((opt = getopt_long(argc, argv, "edEDp:b:", opts, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'e': mode = 0; break;
+ case 'd': mode = 1; break;
+ case 'E': mode = 2; break;
+ case 'D': mode = 3; break;
+ case 'p': prefix = optarg; break;
+ case 'b':
+ {
+ char *end;
+ blocks = strtol(optarg, &end, 0);
+ if ((blocks > 0) && !*end)
+ break;
+ }
+ default: goto usage;
+ }
+
+ if (mode == -1)
+ {
+ usage:
+ fprintf(stderr, "basecode mode [--prefix=prefix] [--blocks=number_of_blocks]\nMode is one of:\n\t--encode64 (-e)\n\t--decode64 (-d)\n\t--encode224 (-E)\n\t--decode224 (-D)\n");
+ return 1;
+ }
+ if (!blocks)
+ blocks = actions[mode].num_blocks;
+
+ // Prepare buffers
+ struct fastbuf *in = bfdopen_shared(0, 4096);
+ struct fastbuf *out = bfdopen_shared(1, 4096);
+ int has_offset = !actions[mode].add_prefix && prefix;
+ uns offset = has_offset ? strlen(prefix) : 0;
+ uns read_size = actions[mode].in_block * blocks + offset + has_offset;
+ uns write_size = actions[mode].out_block * blocks;
+ byte in_buff[read_size], out_buff[write_size];
+ uns isize;
+
+ // Recode it
+ while (isize = bread(in, in_buff, read_size))
+ {
+ if (prefix)
+ {
+ if (actions[mode].add_prefix)
+ bputs(out, prefix);
+ else
+ if ((isize < offset) || (in_buff[isize-1] != '\n')
+ || (strncmp(prefix, in_buff, offset)))
+ die("Invalid line syntax");
+ }
+ uns osize = actions[mode].function(out_buff, in_buff + offset, isize - offset - has_offset);
+ bwrite(out, out_buff, osize);
+ if (actions[mode].add_prefix && prefix)
+ bputc(out, '\n');
+ }
+
+ bclose(in);
+ bclose(out);
+ return 0;
+}
--- /dev/null
+/*
+ * A Simple Wrapper for Starting and Stopping of Daemons
+ *
+ * (c) 2003 Martin Mares <mj@ucw.cz>
+ *
+ * It would seem that we are reinventing the wheel and the
+ * start-stop-daemon command present in most Linux distributions
+ * is just what we need, but the usual "does the process already
+ * exist?" strategies fail in presence of multiple running daemons.
+ *
+ * Return codes:
+ * 101 already running
+ * 102 not running
+ */
+
+#include "ucw/lib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+#include <alloca.h>
+
+enum action {
+ ACTION_NONE,
+ ACTION_START,
+ ACTION_STOP,
+ ACTION_FORCE_STOP,
+ ACTION_CHECK,
+ ACTION_RELOAD
+};
+
+static int action;
+
+static struct option options[] = {
+ { "pid-file", required_argument, NULL, 'p' },
+ { "status-file", required_argument, NULL, 's' },
+ { "start", no_argument, &action, ACTION_START },
+ { "stop", no_argument, &action, ACTION_STOP },
+ { "force-stop", no_argument, &action, ACTION_FORCE_STOP },
+ { "check", no_argument, &action, ACTION_CHECK },
+ { "reload", no_argument, &action, ACTION_RELOAD },
+ { NULL, no_argument, NULL, 0 }
+};
+
+static void NONRET
+usage(void)
+{
+ fputs("\n\
+Usage: daemon-helper --start <options> -- <daemon> <args>\n\
+ or: daemon-helper --stop <options>\n\
+ or: daemon-helper --force-stop <options>\n\
+ or: daemon-helper --reload <options>\n\
+ or: daemon-helper --check <options>\n\
+\n\
+Options:\n\
+--pid-file <name> Name of PID file for this daemon (mandatory)\n\
+--status-file <name> Status file used by the daemon (deleted just before starting)\n\
+", stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c, fd;
+ char *pidfile = NULL;
+ char *statfile = NULL;
+ struct flock fl;
+ char buf[64];
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c)
+ {
+ case 0:
+ break;
+ case 'p':
+ pidfile = optarg;
+ break;
+ case 's':
+ statfile = optarg;
+ break;
+ default:
+ usage();
+ }
+ if (!pidfile)
+ usage();
+
+ bzero(&fl, sizeof(fl));
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+
+ switch (action)
+ {
+ case ACTION_START:
+ if (optind >= argc)
+ usage();
+ fd = open(pidfile, O_RDWR | O_CREAT, 0666);
+ if (fd < 0)
+ die("Unable to create %s: %m", pidfile);
+ if ((c = fcntl(fd, F_SETLK, &fl)) < 0)
+ {
+ if (errno == EAGAIN || errno == EACCES)
+ return 101;
+ else
+ die("fcntl lock on %s failed: %m", pidfile);
+ }
+ c = sprintf(buf, "%d\n", getpid());
+ if (write(fd, buf, c) != c)
+ die("write on %s failed: %m", pidfile);
+ if (ftruncate(fd, c) < 0)
+ die("truncate on %s failed: %m", pidfile);
+ if (statfile && unlink(statfile) < 0 && errno != ENOENT)
+ die("unlink(%s) failed: %m", statfile);
+ setsid();
+ /* Disconnect from stdin and stdout, leave stderr to the daemon. */
+ close(0);
+ open("/dev/null", O_RDWR, 0);
+ dup2(0, 1);
+ argv += optind;
+ argc -= optind;
+ char **a = alloca(sizeof(char *) * (argc+1));
+ memcpy(a, argv, sizeof(char *) * argc);
+ a[argc] = NULL;
+ execv(a[0], a);
+ die("Cannot execute %s: %m", a[0]);
+ case ACTION_STOP:
+ case ACTION_FORCE_STOP:
+ case ACTION_CHECK:
+ case ACTION_RELOAD:
+ if (optind < argc)
+ usage();
+ fd = open(pidfile, O_RDWR);
+ if (fd < 0)
+ {
+ if (errno == ENOENT)
+ return 102;
+ else
+ die("Unable to open %s: %m", pidfile);
+ }
+ if ((c = fcntl(fd, F_SETLK, &fl)) >= 0)
+ {
+ nopid:
+ unlink(pidfile);
+ return 102;
+ }
+ if (errno != EAGAIN && errno != EACCES)
+ die("fcntl lock on %s failed: %m", pidfile);
+ if ((c = read(fd, buf, sizeof(buf))) < 0)
+ die("read on %s failed: %m", pidfile);
+ if (!c)
+ goto nopid;
+ if (c >= (int) sizeof(buf) || sscanf(buf, "%d", &c) != 1)
+ die("PID file syntax error");
+ int sig = 0;
+ if (action == ACTION_CHECK || action == ACTION_RELOAD)
+ {
+ if (action == ACTION_RELOAD)
+ sig = SIGHUP;
+ if (kill(c, sig) < 0 && errno == ESRCH)
+ goto nopid;
+ return 0;
+ }
+ sig = (action == ACTION_STOP) ? SIGTERM : SIGQUIT;
+ if (kill(c, sig) < 0)
+ {
+ if (errno == ESRCH)
+ goto nopid;
+ die("Cannot kill process %d: %m", c);
+ }
+ if ((c = fcntl(fd, F_SETLKW, &fl)) < 0)
+ die("Cannot lock %s: %m", pidfile);
+ if (statfile)
+ unlink(statfile);
+ if (unlink(pidfile) < 0)
+ die("Cannot unlink %s: %m", pidfile);
+ return 0;
+ default:
+ usage();
+ }
+}
--- /dev/null
+#!/usr/bin/perl
+
+# Rotate Sherlock logs
+# (c) 2001--2002 Martin Mares <mj@ucw.cz>
+
+use File::stat;
+
+@ARGV >= 3 or die "Usage: rotate-log <days-to-compress> <date-to-delete> <logs...>";
+
+$now = time;
+$cps = shift @ARGV;
+$del = shift @ARGV;
+
+$compress_thr = $now - 86400 * $cps;
+$delete_thr = $now - 86400 * $del;
+foreach $f (@ARGV) {
+ -f $f or next;
+ $st = stat $f or next;
+ if ($del > 0 && $st->mtime < $delete_thr) {
+ print "Deleting $f\n";
+ unlink $f || die "Delete FAILED: $!";
+ } elsif ($cps > 0 && $st->mtime < $compress_thr && $f !~ /\.(gz|bz2)$/) {
+ print "Compressing $f\n";
+ `gzip -f $f`;
+ $? && die "Compression FAILED: $!";
+ }
+}
--- /dev/null
+/*
+ * Sherlock Utilities -- URL Handling Tool
+ *
+ * (c) 2004 Martin Mares <mj@ucw.cz>
+ */
+
+#include "ucw/lib.h"
+#include "ucw/getopt.h"
+#include "ucw/url.h"
+#include "ucw/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static byte *base_url;
+static struct url base;
+static uns opt_split = 0, opt_normalize = 0, opt_forgive = 0;
+static struct fastbuf *fout;
+static uns err_count;
+
+static void
+process_url(byte *url)
+{
+ byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE], buf4[MAX_URL_SIZE];
+ int e;
+ struct url ur;
+
+ if ((e = url_deescape(url, buf1)) || (e = url_split(buf1, &ur, buf2)))
+ goto error;
+ if ((base_url || opt_normalize) && (e = url_normalize(&ur, &base)))
+ goto error;
+ if (opt_normalize && (e = url_canonicalize(&ur)))
+ goto error;
+ if (opt_split)
+ {
+ if (ur.protocol)
+ bprintf(fout, "protocol=%s\n", ur.protocol);
+ if (ur.user)
+ bprintf(fout, "user=%s\n", ur.user);
+ if (ur.pass)
+ bprintf(fout, "pass=%s\n", ur.pass);
+ if (ur.host)
+ bprintf(fout, "host=%s\n", ur.host);
+ if (ur.port != ~0U)
+ bprintf(fout, "port=%d\n", ur.port);
+ if (ur.rest)
+ bprintf(fout, "rest=%s\n", ur.rest);
+ bputc(fout, '\n');
+ }
+ else
+ {
+ if ((e = url_pack(&ur, buf3)) || (e = url_enescape(buf3, buf4)))
+ goto error;
+ bprintf(fout, "%s\n", buf4);
+ }
+ return;
+
+ error:
+ msg(L_ERROR, "%s: %s", url, url_error(e));
+ err_count++;
+}
+
+static char *shortopts = CF_SHORT_OPTS "b:fns";
+static struct option longopts[] =
+{
+ CF_LONG_OPTS
+ { "base", 1, 0, 'b' },
+ { "forgive", 0, 0, 'f' },
+ { "normalize", 0, 0, 'n' },
+ { "split", 0, 0, 's' },
+ { NULL, 0, 0, 0 }
+};
+
+static char *help = "\
+Usage: urltool [<options>] <operations> [<URL's>]\n\
+\n\
+Options:\n"
+CF_USAGE "\
+-b, --base <URL>\tInput URL's are relative to this base\n\
+-f, --forgive\t\tReturn exit status 0 even if there were errors\n\
+\n\
+Operations:\n\
+-s, --split\t\tSplit a given URL to components\n\
+-n, --normalize\t\tNormalize given URL\n\
+";
+
+static void NONRET
+usage(byte *msg)
+{
+ if (msg)
+ {
+ fputs(msg, stderr);
+ fputc('\n', stderr);
+ }
+ fputs(help, stderr);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int opt, err;
+ byte *base_url = NULL;
+ byte basebuf1[MAX_URL_SIZE], basebuf2[MAX_URL_SIZE];
+
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
+ switch (opt)
+ {
+ case 'b':
+ base_url = optarg;
+ err = url_canon_split(base_url, basebuf1, basebuf2, &base);
+ if (err)
+ die("Invalid base URL: %s", url_error(err));
+ break;
+ case 's':
+ opt_split = 1;
+ break;
+ case 'n':
+ opt_normalize = 1;
+ break;
+ case 'f':
+ opt_forgive = 1;
+ break;
+ default:
+ usage("Invalid option");
+ }
+
+ fout = bfdopen_shared(1, 4096);
+ if (optind >= argc)
+ {
+ struct fastbuf *fin = bfdopen_shared(0, 4096);
+ byte url[MAX_URL_SIZE];
+ while (bgets(fin, url, sizeof(url)))
+ process_url(url);
+ bclose(fin);
+ }
+ else
+ while (optind < argc)
+ process_url(argv[optind++]);
+ bclose(fout);
+
+ return (err_count && !opt_forgive);
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Pattern Matcher for Short Wildcard Patterns (only `?' and `*' supported)
+ *
+ * Traditional NFA -> DFA method with on-the-fly DFA construction.
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/mempool.h"
+#include "ucw/wildmatch.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_STATES 32 /* Must be <= 32, state 0 is reserved, state 1 is initial */
+#define MAX_CACHED 256 /* Maximum number of cached DFA states */
+#define HASH_SIZE 512 /* Number of entries in DFA hash table (at least MAX_CACHED+MAX_STATES) */
+#define HASH_SKIP 137
+
+struct nfa_state {
+ char ch; /* 0 for non-matching state */
+ byte final; /* Accepting state */
+ u32 match_states; /* States to go to when input character == ch */
+ u32 default_states; /* States to go to whatever the input is */
+};
+
+struct dfa_state {
+ uintptr_t edge[256]; /* Outgoing DFA edges. Bit 0 is set for incomplete edges which
+ * contain just state set and clear for complete ones which point
+ * to other states. NULL means `no match'.
+ */
+ u32 nfa_set; /* A set of NFA states this DFA state represents */
+ int final; /* This is an accepting state */
+ struct dfa_state *next; /* Next in the chain of free states */
+};
+
+struct wildpatt {
+ struct nfa_state nfa[MAX_STATES];
+ struct dfa_state *hash[HASH_SIZE];
+ struct dfa_state *dfa_start;
+ uns nfa_states;
+ uns dfa_cache_counter;
+ struct mempool *pool;
+ struct dfa_state *free_states;
+};
+
+static inline unsigned
+wp_hash(u32 set)
+{
+ set ^= set >> 16;
+ set ^= set >> 8;
+ return set % HASH_SIZE;
+}
+
+static struct dfa_state *
+wp_new_state(struct wildpatt *w, u32 set)
+{
+ unsigned h = wp_hash(set);
+ struct dfa_state *d;
+ unsigned bit;
+ u32 def_set;
+
+ while (d = w->hash[h])
+ {
+ if (d->nfa_set == set)
+ return d;
+ h = (h + HASH_SKIP) % HASH_SIZE;
+ }
+ if (d = w->free_states)
+ w->free_states = d->next;
+ else
+ d = mp_alloc(w->pool, sizeof(*d));
+ w->hash[h] = d;
+ bzero(d, sizeof(*d));
+ d->nfa_set = set;
+ def_set = 0;
+ for(bit=1; bit <= w->nfa_states; bit++)
+ if (set & (1 << bit))
+ {
+ struct nfa_state *n = &w->nfa[bit];
+ if (n->ch)
+ d->edge[(unsigned char)n->ch] |= n->match_states | 1;
+ d->final |= n->final;
+ def_set |= n->default_states;
+ }
+ if (def_set)
+ {
+ unsigned i;
+ def_set |= 1;
+ for(i=0; i<256; i++)
+ d->edge[i] |= def_set;
+ }
+ w->dfa_cache_counter++;
+ return d;
+}
+
+struct wildpatt *
+wp_compile(const char *p, struct mempool *pool)
+{
+ struct wildpatt *w;
+ uns i;
+
+ if (strlen(p) >= MAX_STATES) /* Too long */
+ return NULL;
+ w = mp_alloc_zero(pool, sizeof(*w));
+ w->pool = pool;
+ for(i=1; *p; p++)
+ {
+ struct nfa_state *n = w->nfa + i;
+ if (*p == '?')
+ n->default_states |= 1 << (++i);/* Default edge to a new state */
+ else if (*p == '*')
+ n->default_states |= 1 << i; /* Default edge to the same state */
+ else
+ {
+ n->ch = *p; /* Edge to new state labelled with 'c' */
+ n->match_states = 1 << (++i);
+ }
+ }
+ w->nfa[i].final = 1;
+ w->nfa_states = i;
+ w->dfa_start = wp_new_state(w, 1 << 1);
+ return w;
+}
+
+static void
+wp_prune_cache(struct wildpatt *w)
+{
+ /*
+ * I was unable to trigger cache overflow on my large set of
+ * test cases, so I decided to handle it in an extremely dumb
+ * way. --mj
+ */
+ int i;
+ for(i=0; i<HASH_SIZE; i++)
+ if (w->hash[i] && w->hash[i]->nfa_set != (1 << 1))
+ {
+ struct dfa_state *d = w->hash[i];
+ w->hash[i] = NULL;
+ d->next = w->free_states;
+ w->free_states = d;
+ }
+ w->dfa_cache_counter = 1; /* Only the initial state remains */
+}
+
+int
+wp_match(struct wildpatt *w, const char *s)
+{
+ struct dfa_state *d;
+
+ if (w->dfa_cache_counter >= MAX_CACHED)
+ wp_prune_cache(w);
+ d = w->dfa_start;
+ while (*s)
+ {
+ uintptr_t next = d->edge[(unsigned char)*s];
+ if (next & 1)
+ {
+ /* Need to lookup/create the destination state */
+ struct dfa_state *new = wp_new_state(w, next & ~1);
+ d->edge[(unsigned char)*s] = (uintptr_t) new;
+ d = new;
+ }
+ else if (!next)
+ return 0;
+ else
+ d = (struct dfa_state *) next;
+ s++;
+ }
+ return d->final;
+}
+
+int
+wp_min_size(const char *p)
+{
+ int s = 0;
+
+ while (*p)
+ if (*p++ != '*')
+ s++;
+ return s;
+}
+
+#ifdef TEST
+
+void
+wp_dump(struct wildpatt *w)
+{
+ int i;
+
+ puts("NFA:");
+ for(i=1; i<=w->nfa_states; i++)
+ {
+ struct nfa_state *n = w->nfa + i;
+ printf("%2d: %d %02x %08x %08x\n", i, n->final, n->ch, n->match_states, n->default_states);
+ }
+ puts("DFA:");
+ for(i=0; i<HASH_SIZE; i++)
+ if (w->hash[i])
+ printf("%3d: %08x\n", i, w->hash[i]->nfa_set);
+ printf("%d DFA states cached.\n", w->dfa_cache_counter);
+}
+
+int main(int argc, char **argv)
+{
+ struct wildpatt *w;
+ char buf[1024];
+
+ if (argc != 2) return 1;
+ w = wp_compile(argv[1], mp_new(65536));
+ if (!w)
+ {
+ puts("Compile error");
+ return 1;
+ }
+ wp_dump(w);
+ while (fgets(buf, sizeof(buf)-1, stdin))
+ {
+ char *c = strchr(buf, '\n');
+ if (!c) break;
+ *c = 0;
+#if 0
+ printf("%d\n", wp_match(w, buf));
+#else
+ if (wp_match(w, buf))
+ puts(buf);
+#endif
+ }
+ wp_dump(w);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Wildcard Pattern Matcher (only `?' and `*' supported)
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+struct wildpatt;
+struct mempool;
+
+struct wildpatt *wp_compile(const char *, struct mempool *);
+int wp_match(struct wildpatt *, const char *);
+int wp_min_size(const char *);
--- /dev/null
+/*
+ * UCW Library -- Thread Pools and Work Queues
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "ucw/lib.h"
+#include "ucw/threads.h"
+#include "ucw/workqueue.h"
+#include "ucw/heap.h"
+
+static void *
+worker_thread_init(void *arg)
+{
+ struct worker_thread *t = arg;
+ struct worker_pool *pool = t->pool;
+
+ if (pool->init_thread)
+ pool->init_thread(t);
+ sem_post(pool->init_cleanup_sem);
+
+ for (;;)
+ {
+ struct work *w = raw_queue_get(&pool->requests);
+ w->go(t, w);
+ raw_queue_put(&w->reply_to->finished, w);
+ }
+
+ return NULL;
+}
+
+static void
+worker_thread_signal_finish(struct worker_thread *t, struct work *w UNUSED)
+{
+ if (t->pool->cleanup_thread)
+ t->pool->cleanup_thread(t);
+ sem_post(t->pool->init_cleanup_sem);
+ pthread_exit(NULL);
+}
+
+void
+worker_pool_init(struct worker_pool *p)
+{
+ clist_init(&p->worker_threads);
+ raw_queue_init(&p->requests);
+ p->init_cleanup_sem = sem_alloc();
+
+ pthread_attr_t attr;
+ if (pthread_attr_init(&attr) < 0 ||
+ pthread_attr_setstacksize(&attr, p->stack_size ? : ucwlib_thread_stack_size) < 0)
+ ASSERT(0);
+
+ for (uns i=0; i < p->num_threads; i++)
+ {
+ struct worker_thread *t = (p->new_thread ? p->new_thread() : xmalloc(sizeof(*t)));
+ t->pool = p;
+ t->id = i;
+ int err = pthread_create(&t->thread, &attr, worker_thread_init, t);
+ if (err)
+ die("Unable to create thread: %m");
+ clist_add_tail(&p->worker_threads, &t->n);
+ sem_wait(p->init_cleanup_sem);
+ }
+
+ pthread_attr_destroy(&attr);
+}
+
+void
+worker_pool_cleanup(struct worker_pool *p)
+{
+ for (uns i=0; i < p->num_threads; i++)
+ {
+ struct work w = {
+ .go = worker_thread_signal_finish
+ };
+ raw_queue_put(&p->requests, &w);
+ sem_wait(p->init_cleanup_sem);
+ }
+
+ struct worker_thread *tmp;
+ CLIST_FOR_EACH_DELSAFE(struct worker_thread *, t, p->worker_threads, tmp)
+ {
+ int err = pthread_join(t->thread, NULL);
+ ASSERT(!err);
+ if (p->free_thread)
+ p->free_thread(t);
+ else
+ xfree(t);
+ }
+ raw_queue_cleanup(&p->requests);
+ sem_free(p->init_cleanup_sem);
+}
+
+void
+raw_queue_init(struct raw_queue *q)
+{
+ pthread_mutex_init(&q->queue_mutex, NULL);
+ clist_init(&q->pri0_queue);
+ q->queue_sem = sem_alloc();
+ q->pri_heap = NULL;
+ q->heap_cnt = q->heap_max = 0;
+}
+
+void
+raw_queue_cleanup(struct raw_queue *q)
+{
+ ASSERT(clist_empty(&q->pri0_queue));
+ ASSERT(!q->heap_cnt);
+ xfree(q->pri_heap);
+ sem_free(q->queue_sem);
+ pthread_mutex_destroy(&q->queue_mutex);
+}
+
+#define PRI_LESS(x,y) ((x)->priority > (y)->priority)
+
+void
+raw_queue_put(struct raw_queue *q, struct work *w)
+{
+ pthread_mutex_lock(&q->queue_mutex);
+ if (!w->priority)
+ clist_add_tail(&q->pri0_queue, &w->n);
+ else
+ {
+ if (unlikely(q->heap_cnt >= q->heap_max))
+ {
+ struct work **old_heap = q->pri_heap;
+ q->heap_max = (q->heap_max ? 2*q->heap_max : 16);
+ q->pri_heap = xrealloc(old_heap, (q->heap_max + 1) * sizeof(struct work *));
+ }
+ struct work **heap = q->pri_heap;
+ heap[++q->heap_cnt] = w;
+ HEAP_INSERT(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+ }
+ pthread_mutex_unlock(&q->queue_mutex);
+ sem_post(q->queue_sem);
+}
+
+static inline struct work *
+raw_queue_do_get(struct raw_queue *q)
+{
+ pthread_mutex_lock(&q->queue_mutex);
+ struct work *w;
+ if (!q->heap_cnt)
+ {
+ w = clist_head(&q->pri0_queue);
+ ASSERT(w);
+ clist_remove(&w->n);
+ }
+ else
+ {
+ struct work **heap = q->pri_heap;
+ w = heap[1];
+ HEAP_DELMIN(struct work *, heap, q->heap_cnt, PRI_LESS, HEAP_SWAP);
+ }
+ pthread_mutex_unlock(&q->queue_mutex);
+ return w;
+}
+
+struct work *
+raw_queue_get(struct raw_queue *q)
+{
+ sem_wait(q->queue_sem);
+ return raw_queue_do_get(q);
+}
+
+struct work *
+raw_queue_try_get(struct raw_queue *q)
+{
+ if (!sem_trywait(q->queue_sem))
+ return raw_queue_do_get(q);
+ else
+ return NULL;
+}
+
+void
+work_queue_init(struct worker_pool *p, struct work_queue *q)
+{
+ q->pool = p;
+ q->nr_running = 0;
+ raw_queue_init(&q->finished);
+}
+
+void
+work_queue_cleanup(struct work_queue *q)
+{
+ ASSERT(!q->nr_running);
+ raw_queue_cleanup(&q->finished);
+}
+
+void
+work_submit(struct work_queue *q, struct work *w)
+{
+ ASSERT(w->go);
+ w->reply_to = q;
+ raw_queue_put(&q->pool->requests, w);
+ q->nr_running++;
+}
+
+static struct work *
+work_do_wait(struct work_queue *q, int try)
+{
+ if (!q->nr_running)
+ return NULL;
+ struct work *w = (try ? raw_queue_try_get : raw_queue_get)(&q->finished);
+ if (!w)
+ return NULL;
+ q->nr_running--;
+ return w;
+}
+
+struct work *
+work_wait(struct work_queue *q)
+{
+ return work_do_wait(q, 0);
+}
+
+struct work *
+work_try_wait(struct work_queue *q)
+{
+ return work_do_wait(q, 1);
+}
+
+#ifdef TEST
+
+#include <unistd.h>
+
+static void wt_init(struct worker_thread *t)
+{
+ msg(L_INFO, "INIT %d", t->id);
+}
+
+static void wt_cleanup(struct worker_thread *t)
+{
+ msg(L_INFO, "CLEANUP %d", t->id);
+}
+
+struct w {
+ struct work w;
+ uns id;
+};
+
+static void go(struct worker_thread *t, struct work *w)
+{
+ msg(L_INFO, "GO %d: request %d (pri %d)", t->id, ((struct w *)w)->id, w->priority);
+ usleep(1);
+}
+
+int main(void)
+{
+ struct worker_pool pool = {
+ .num_threads = 10,
+ .stack_size = 65536,
+ .init_thread = wt_init,
+ .cleanup_thread = wt_cleanup
+ };
+ worker_pool_init(&pool);
+
+ struct work_queue q;
+ work_queue_init(&pool, &q);
+ for (uns i=0; i<500; i++)
+ {
+ struct w *w = xmalloc_zero(sizeof(*w));
+ w->w.go = go;
+ w->w.priority = (i < 250 ? i : 0);
+ w->id = i;
+ work_submit(&q, &w->w);
+ msg(L_INFO, "Submitted request %d (pri %d)", w->id, w->w.priority);
+ }
+
+ struct w *w;
+ while (w = (struct w *) work_wait(&q))
+ msg(L_INFO, "Finished request %d", w->id);
+
+ work_queue_cleanup(&q);
+ worker_pool_cleanup(&pool);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Thread Pools and Work Queues
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_WORKQUEUE_H
+#define _UCW_WORKQUEUE_H
+
+/*
+ * A thread pool is a set of threads receiving work requests from a common queue,
+ * each work request contains a pointer to a function inside the thread.
+ *
+ * A work queue is an interface for submitting work requests. It's bound to a single
+ * thread pool, it remembers running requests and gathers replies. A single work queue
+ * should not be used by multiple threads simultaneously.
+ *
+ * Requests can have priorities. Requests with the highest priority are served first.
+ * Requests of priority 0 are guaranteed to be served on first-come-first-served
+ * basis, requests of higher priorities are unordered.
+ *
+ * When a thread pool is initialized, new_thread() is called for every thread first,
+ * allocating struct worker_thread (and user-defined thread context following it) for
+ * each thread. Then the threads are fired and each of them executes the init_thread()
+ * callback. These callbacks are serialized and worker_pool_init() function waits
+ * until all of them finish.
+ */
+
+#include "ucw/semaphore.h"
+#include "ucw/clists.h"
+
+#include <pthread.h>
+
+struct worker_thread { // One of threads serving requests
+ cnode n;
+ pthread_t thread;
+ struct worker_pool *pool;
+ int id; // Inside the pool
+ /* user-defined data can follow */
+};
+
+struct raw_queue { // Generic queue with locking
+ pthread_mutex_t queue_mutex;
+ clist pri0_queue; // Ordinary queue for requests with priority=0
+ struct work **pri_heap; // A heap for request with priority>0
+ uns heap_cnt, heap_max;
+ sem_t *queue_sem; // Number of requests queued
+};
+
+struct worker_pool {
+ struct raw_queue requests;
+ uns num_threads;
+ uns stack_size; // 0 for default
+ struct worker_thread *(*new_thread)(void); // default: xmalloc the struct
+ void (*free_thread)(struct worker_thread *t); // default: xfree
+ void (*init_thread)(struct worker_thread *t); // default: empty
+ void (*cleanup_thread)(struct worker_thread *t); // default: empty
+ clist worker_threads;
+ sem_t *init_cleanup_sem;
+};
+
+struct work_queue {
+ struct worker_pool *pool;
+ uns nr_running; // Number of requests in service
+ struct raw_queue finished; // Finished requests queue up here
+};
+
+struct work { // A single request
+ cnode n;
+ uns priority;
+ struct work_queue *reply_to; // Where to queue the request when it's finished
+ void (*go)(struct worker_thread *t, struct work *w); // Called inside the worker thread
+};
+
+void worker_pool_init(struct worker_pool *p);
+void worker_pool_cleanup(struct worker_pool *p);
+
+void raw_queue_init(struct raw_queue *q);
+void raw_queue_cleanup(struct raw_queue *q);
+void raw_queue_put(struct raw_queue *q, struct work *w);
+struct work *raw_queue_get(struct raw_queue *q);
+struct work *raw_queue_try_get(struct raw_queue *q);
+
+void work_queue_init(struct worker_pool *p, struct work_queue *q);
+void work_queue_cleanup(struct work_queue *q);
+void work_submit(struct work_queue *q, struct work *w);
+struct work *work_wait(struct work_queue *q);
+struct work *work_try_wait(struct work_queue *q);
+
+#endif /* !_UCW_WORKQUEUE_H */