--- /dev/null
+# Bottom part of Makefile for the UCW Libraries
+# (c) 1997--2007 Martin Mares <mj@ucw.cz>
+
+# The run tree
+
+runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS)))
+
+run/.tree-stamp: $(o)/config.mk
+ $(M)Creating runtree
+ $(Q)mkdir -p run $(addprefix run/, cf $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS))
+ $(Q)touch run/.tree-stamp
+
+# Miscellaneous targets
+
+programs: $(PROGS)
+datafiles: $(DATAFILES)
+tests: $(TESTS)
+configs: $(addprefix run/cf/,$(CONFIGS))
+
+tags:
+ etags `find . -name "*.[ch]"`
+
+# Black magic with dependencies. It would be more correct to make "depend.new"
+# a prerequisite for "depend", but "depend.new" often has the same timestamp
+# as "depend" which would confuse make a lot and either force remaking anyway
+# or (as in current versions of GNU make) erroneously skipping the remaking.
+
+-include $(o)/depend
+
+$(o)/depend: force
+ $(Q)if [ -s $(o)/depend.new ] ; then $(s)/build/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi
+
+force:
+
+# Rules for directories
+
+%.dir-stamp:
+ $(Q)mkdir -p $(@D) && touch $@
+
+# Rules for configuration files
+
+run/cf/%: custom/cf/% $(o)/config.mk $(s)/build/genconf
+ $(M)CF $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+
+run/cf/%: $(s)/cf/% $(o)/config.mk $(s)/build/genconf
+ $(M)CF $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+
+# Rules for libraries
+
+%.a:
+ $(M)AR $@
+ $(Q)rm -f $@
+ $(Q)ar rcs $@ $^
+ifdef CONFIG_INSTALL_API
+ $(Q)$(call symlink,$@,run/lib)
+endif
+
+%.so:
+ $(M)LD $@
+ $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $^
+ $(Q)$(call symlink,$@,run/lib)
+
+$(o)/%.pc: $(s)/%.pc $(o)/%.$(LS)
+ $(M)PC $<
+ $(Q)DEPS="$(shell $(s)/build/lib-deps $^)" LIBDIR=$(@D) $(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)mkdir -p $(o)/pkgconfig
+ $(Q)$(call symlink,$@,$(o)/pkgconfig)
+
+# Rules for public API
+
+ifdef CONFIG_INSTALL_API
+
+API_ROOT:=$(shell pwd)/run
+INSTALL_RUNDIRS+=include lib/pkgconfig
+api: $(API_INCLUDES) $(addprefix run/lib/pkgconfig/,$(addsuffix .pc,$(API_LIBS)))
+
+$(o)/%/.include-stamp:
+ $(Q)$(s)/build/install-includes $(<D) run/include/$(IDST) $(?F)
+ $(Q)touch $@
+
+run/lib/pkgconfig/%.pc: # RHS supplied in the sub-makefile
+ $(M)PC-API $@
+ $(Q)sed <$< >$@ "s@^libdir=.*@libdir=$(API_ROOT)/lib@;s@^incdir=.*@incdir=$(API_ROOT)/include@"
+
+else
+api:
+endif
+
+# Rules for compiling C
+
+$(o)/%.o: $(s)/%.c $(o)/autoconf.h
+ $(M)CC $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $<
+
+$(o)/%.o: %.c $(o)/autoconf.h
+ $(M)CC $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $<
+
+%.o: %.c $(o)/autoconf.h
+ $(M)CC $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $<
+
+$(o)/%.oo: $(s)/%.c $(o)/autoconf.h
+ $(M)CC-SO $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $<
+
+$(o)/%.oo: %.c $(o)/autoconf.h
+ $(M)CC-SO $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $<
+
+%.oo: %.c $(o)/autoconf.h
+ $(M)CC-SO $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $<
+
+$(o)/%-tt.o: $(s)/%.c $(o)/autoconf.h
+ $(M)CC-TEST $<
+ $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -DTEST -c -o $@ $<
+
+# Rules for testing
+
+$(o)/%-t: $(o)/%-tt.o $(TESTING_DEPS)
+ $(M)LD-TEST $@
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS)
+
+$(o)/%.test: $(s)/%.t $(s)/build/tester
+ $(M)TEST $@
+ $(Q)$(s)/build/tester $< && touch $@
+
+# Rules for binaries
+
+BINDIR=bin
+
+$(o)/%: $(o)/%.o
+ $(M)LD $@
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(shell $(s)/build/lib-flags $^) $(LIBS)
+ $(Q)$(call symlink,$@,run/$(BINDIR))
+
+$(o)/%: $(s)/%.sh $(o)/config.mk $(s)/build/genconf
+ $(M)PP $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)chmod +x $@
+ $(Q)$(call symlink,$@,run/$(BINDIR))
+
+$(o)/%: %.sh $(o)/config.mk $(s)/build/genconf
+ $(M)PP $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)chmod +x $@
+ $(Q)$(call symlink,$@,run/$(BINDIR))
+
+$(o)/%: $(s)/%.pl $(o)/config.mk $(s)/build/genconf
+ $(M)PP $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)chmod +x $@
+ $(Q)$(call symlink,$@,run/$(BINDIR))
+
+$(o)/%: %.pl $(o)/config.mk $(s)/build/genconf
+ $(M)PP $<
+ $(Q)$(s)/build/genconf $< $@ $(o)/config.mk
+ $(Q)chmod +x $@
+ $(Q)$(call symlink,$@,run/$(BINDIR))
+
+PERL_MODULE_DIR=UCW
+
+$(o)/%.pm: $(s)/%.pm
+ $(M)"PM $< -> run/lib/perl5/$(PERL_MODULE_DIR)/$(@F)"
+ $(Q)cp $^ $@
+ $(Q)$(call symlink,$@,run/lib/perl5/$(PERL_MODULE_DIR))
+
+$(o)/%.pm: %.pm
+ $(M)"PM $< -> run/lib/perl/$(PERL_MODULE_DIR)/$(@F)"
+ $(Q)cp $^ $@
+ $(Q)$(call symlink,$@,run/lib/perl5/$(PERL_MODULE_DIR))
+
+# Rules for data files
+
+DATADIR=lib
+
+$(DATAFILES): $(o)/%: $(s)/%
+ $(M)DATA $<
+ $(Q)cp $^ $@
+ $(Q)$(call symlink,$@,run/$(DATADIR))
+
+# Default installation target
+
+default-install:
+ SH_EXTRA_RUNDIRS="$(sort $(EXTRA_RUNDIRS))" SH_INSTALL_RUNDIRS="$(sort $(INSTALL_RUNDIRS))" SH_CONFIGS="$(sort $(CONFIGS))" $(s)/build/installer $(INSTALL_DIR)
+
+# Don't delete intermediate targets. There shouldn't be any, but due to bugs
+# in GNU Make rules with targets in not-yet-existing directories are ignored
+# when searching for implicit rules and thence targets considered intermediate.
+.SECONDARY:
+
+.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install default-install
--- /dev/null
+# Top part of Makefile for the UCW Libraries
+# (c) 1997--2007 Martin Mares <mj@ucw.cz>
+
+# Set to 1 if you want verbose output
+V=0
+
+# Disable all built-in rules and variables. Speeds up make and simplifies debugging.
+MAKEFLAGS+=-rR
+
+CFLAGS=$(CLANG) $(COPT) $(CDEBUG) $(CWARNS) $(CEXTRA) -I. -I$(o) -I$(s)
+LDFLAGS=$(LOPT) $(LEXTRA)
+
+DIRS=
+PROGS=
+CONFIGS=
+TESTS=
+EXTRA_RUNDIRS=tmp log
+INSTALL_RUNDIRS=bin lib
+API_INCLUDES=
+API_LIBS=
+
+# Various files whose type does not fit into PROGS
+DATAFILES=
+
+ifdef CONFIG_SHARED
+LS=so
+OS=oo
+else
+LS=a
+OS=o
+endif
+
+ifdef CONFIG_DARWIN
+SOEXT=bundle
+else
+SOEXT=so
+endif
+
+# Whenever "make -s" (silent) is run, turn on verbose mode (paradoxical, but gives the right result)
+ifneq ($(findstring s,$(MAKEFLAGS)),)
+V=1
+endif
+
+# Define M (message) and Q (quiet command prefix) macros and also MAKESILENT passed to sub-makes
+ifeq ($(V),1)
+M=@\#
+Q=
+MAKESILENT=
+else
+M=@echo #
+Q=@
+MAKESILENT=-s
+endif
+
+# Clean needs to be a double-colon rule since we want sub-makefiles to be able
+# to define their own cleanup actions.
+dust::
+ rm -f `find . -path "*~" -or -name "\#*\#" -or -name core`
+ rm -f allocs.tmp cscope.out TAGS
+
+clean:: dust
+ rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h \)`
+ rm -rf tests run/{bin,lib,include,.tree-stamp}
+
+distclean:: clean
+ rm -rf obj run
+
+testclean::
+ rm -f `find obj -name "*.test"`
+
+# Extra default rules (appended to by submakefiles)
+extras::
+
+# Relative symlinks and other pathname manipulation macros
+empty:=
+space:=$(empty) $(empty)
+backref=$(subst $(space),/,$(patsubst %,..,$(subst /,$(space),$(1))))
+tack-on=$(if $(patsubst /%,,$(2)),$(1)/$(2),$(2))
+symlink=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/
open CF, $ARGV[2] or die "Unable to open $ARGV[2]";
my %options = ();
my %vars = ();
+sub opt {
+ my ($k,$v) = @_;
+ $vars{$k} = $v;
+ $options{$k} = 1 if ($k =~ /^CONFIG_/);
+}
+foreach my $k (keys %ENV) {
+ opt($k, $ENV{$k});
+}
while (<CF>) {
chomp;
if (my ($k,$v) = /^(\w+)=(.*)/) {
$v =~ s/\s+$//;
- $vars{$k} = $v;
- $options{$k} = 1 if ($k =~ /^CONFIG_/);
+ opt($k, $v);
}
}
close CF;
fprintf(fo, "{ \"%s\", %s },\n", ht[i]->w, ht[i]->extra);
else
fprintf(fo, "{ NULL },\n");
- fprintf(fo, "};\n\nconst %s *%s(register const char *x, register unsigned int len)\n\
+ fprintf(fo, "};\n\nconst %s *%s(const char *x, unsigned int len)\n\
{\n\
const char *c = x;\n\
unsigned int h = 0;\n\
#!/bin/sh
# A simple installer of include files
-# (c) 2005 Martin Mares <mj@ucw.cz>
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
set -e
SRC=$1
DEST=$2
shift 2
while [ -n "$1" ] ; do
- if [ "$SRC/$1" -nt "$DEST/$1" ] ; then
- echo "Copying $SRC/$1 to $DEST/$1"
+ if [ ! -f "$DEST/$1" -o "$SRC/$1" -nt "$DEST/$1" ] ; then
+ echo "INC $SRC/$1 -> $DEST/$1"
mkdir -p $DEST/`dirname $1`
cp $SRC/$1 $DEST/$1
fi
if [ ! -f $DEST/cf/$a ] ; then
echo "cf/$a: new, installed"
cp run/cf/$a $DEST/cf/$a
- elif [ $a == catalog-rules ] ; then
+ elif [ $a = catalog-rules ] ; then
echo "cf/$a: will be regenerated automatically"
elif diff -u $DEST/cf/$a run/cf/$a ; then
echo "cf/$a: no differences"
else
echo -n "cf/$a differs, replace it [Yn]? "
read x
- if [ -z "$x" -o "$x" == "y" -o "$x" == "Y" ] ; then
+ if [ -z "$x" -o "$x" = "y" -o "$x" = "Y" ] ; then
echo "cf/$a: replacing and keeping the old version as cf/$a.old"
mv $DEST/cf/$a $DEST/cf/$a.old
cp run/cf/$a $DEST/cf/$a
--- /dev/null
+#!/bin/bash
+#
+# A tool which builds a list of dependent libraries from the list
+# of pkg-config files.
+#
+# (c) 2007 Martin Mares <mj@ucw.cz>, placed under GNU LGPL
+#
+
+set -e
+
+shift
+SEEN=
+while [ -n "$1" ] ; do
+ case "$1" in
+ *.pc) if [ -n "$SEEN" ] ; then echo -n ", " ; fi
+ echo -n "`basename $1 .pc`"
+ SEEN=1
+ ;;
+ *) ;;
+ esac
+ shift
+done
--- /dev/null
+#!/bin/bash
+#
+# A preprocessor for linker arguments, which replaces references to .pc
+# files by results of the proper calls to pkg-config.
+#
+# (c) 2007 Martin Mares <mj@ucw.cz>, placed under GNU LGPL
+#
+
+set -e
+
+PC=
+while [ -n "$1" ] ; do
+ case "$1" in
+ *.pc) PC="$PC `basename $1 .pc`"
+ ;;
+ *) echo -n " $1"
+ ;;
+ esac
+ shift
+done
+if [ -n "$PC" ] ; then
+ echo -n " "
+ PKG_CONFIG_PATH="$PKG_CONFIG_PATH:obj/pkgconfig" pkg-config --libs $PC
+fi
#!/usr/bin/perl
# A simple unit testing script
# (c) 2004 Martin Mares <mj@ucw.cz>
+# (c) 2007 Pavel Charvat <pchar@ucw.cz>
my @tests = ();
my $tt;
$prev_run = $run;
my ($ifi, $ofi);
if (defined $tt->{'In'}) {
- $ifi = "run/tmp/test$i.in";
- open X, ">$ifi" or die "Unable to create $ifi";
+ $ifi = "tmp/test$i.in";
+ open X, ">run/$ifi" or die "Unable to create $ifi";
print X $tt->{'In'}, "\n";
close X;
$run .= " <$ifi";
$run .= " </dev/null";
}
if (defined $tt->{'Out'}) {
- $ofi = "run/tmp/test$i.out";
- unlink $ofi;
+ $ofi = "tmp/test$i.out";
+ unlink "run/$ofi";
$run .= " >$ofi";
} else {
$run .= " >/dev/null";
}
- `$run`;
+ system "cd run && ( $run )";
if ($?) {
print "FAILED with exit code $?\n";
$errors++;
next;
}
if (defined $tt->{'Out'}) {
- open X, "<$ofi" or die "Unable to read $ofi";
+ open X, "<run/$ofi" or die "Unable to read $ofi";
my $out;
{
local $/ = undef;
-# Makefile for the Sherlock Charset Library (c) 1997--2002 Martin Mares <mj@ucw.cz>
+# Makefile for the Sherlock Charset Library (c) 1997--2007 Martin Mares <mj@ucw.cz>
DIRS+=charset
$(o)/charset/libcharset.a: $(addsuffix .o,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS)))
$(o)/charset/libcharset.so: $(addsuffix .oo,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS)))
+$(o)/charset/libcharset.pc: $(LIBUCW)
-INCLUDES+=$(o)/charset/.include-stamp
+API_LIBS+=libcharset
+API_INCLUDES+=$(o)/charset/.include-stamp
$(o)/charset/.include-stamp: $(addprefix $(s)/charset/,$(LIBCHARSET_INCLUDES))
- $(s)/build/install-includes $(s)/charset run/include/charset $(?F)
- touch $(o)/charset/.include-stamp
+$(o)/charset/.include-stamp: IDST=charset
+run/lib/pkgconfig/libcharset.pc: $(o)/charset/libcharset.pc
build_charsets:
cd $(s)/charset && sh misc/generate
/* Charset names */
-int find_charset_by_name(char *);
+int find_charset_by_name(const char *);
char *charset_name(int);
#endif
--- /dev/null
+# pkg-config metadata for libcharset
+
+libdir=@LIBDIR@
+incdir=.
+
+Name: libcharset
+Description: Character set conversion library
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lcharset
+Requires: @DEPS@
#include <alloca.h>
byte *
-mp_strconv(struct mempool *mp, byte *s, uns in_cs, uns out_cs)
+mp_strconv(struct mempool *mp, const byte *s, uns in_cs, uns out_cs)
{
if (in_cs == out_cs)
return mp_strdup(mp, s);
#include "lib/mempool.h"
#include "charset/charconv.h"
-byte *mp_strconv(struct mempool *mp, byte *s, uns cs_in, uns cs_out);
+byte *mp_strconv(struct mempool *mp, const byte *s, uns cs_in, uns cs_out);
static inline byte *
-mp_strconv_to_utf8(struct mempool *mp, byte *s, uns cs_in)
+mp_strconv_to_utf8(struct mempool *mp, const byte *s, uns cs_in)
{ return mp_strconv(mp, s, cs_in, CONV_CHARSET_UTF8); }
static inline byte *
-mp_strconv_from_utf8(struct mempool *mp, byte *s, uns cs_out)
+mp_strconv_from_utf8(struct mempool *mp, const byte *s, uns cs_out)
{ return mp_strconv(mp, s, CONV_CHARSET_UTF8, cs_out); }
#endif
/* Names according to RFC 1345 (see http://www.iana.org/assignments/character-sets) */
-static char *cs_names[] = {
+static const char *cs_names[] = {
"US-ASCII",
"ISO-8859-1",
"ISO-8859-2",
};
int
-find_charset_by_name(char *c)
+find_charset_by_name(const char *c)
{
unsigned int i;
if (i < 0 || i > CONV_NUM_CHARSETS)
return "x-unknown";
else
- return cs_names[i];
+ return (char *)cs_names[i];
}
#define INITIAL_SCALE 2
uns
-stk_strconv_init(struct conv_context *c, byte *s, uns in_cs, uns out_cs)
+stk_strconv_init(struct conv_context *c, const byte *s, uns in_cs, uns out_cs)
{
uns l = strlen(s);
if (in_cs == out_cs)
/* Internals */
-uns stk_strconv_init(struct conv_context *c, byte *s, uns cs_in, uns cs_out);
+uns stk_strconv_init(struct conv_context *c, const byte *s, uns cs_in, uns cs_out);
uns stk_strconv_step(struct conv_context *c, byte *buf, uns len);
#endif
--- /dev/null
+How retros runs on different hardware:
+
+# 32-bit Athlon 64, gcc-4.1
+mj@albireo:~/src/sh/dev-sorter/run$ bin/retros
+D 2006-11-23 23:17:36 [retros] memcpy: 212
+D 2006-11-23 23:17:44 [retros] qsort: 6947
+D 2006-11-23 23:17:48 [retros] arraysort: 3183
+D 2006-11-23 23:18:02 [retros] indirect qsort: 13116
+D 2006-11-23 23:18:24 [retros] indirect arraysort: 19176
+D 2006-11-23 23:18:30 [retros] radix1: 3755
+D 2006-11-23 23:18:34 [retros] radix1b: 3100
+D 2006-11-23 23:18:39 [retros] radix1c: 2777
+D 2006-11-23 23:18:43 [retros] radix1c-sse: 2602
+D 2006-11-23 23:18:47 [retros] radix1d: 2728
+D 2006-11-23 23:18:53 [retros] radix2: 4249
+D 2006-11-23 23:18:57 [retros] radix3: 2577
+D 2006-11-23 23:19:09 [retros] mergesort: 10399
+D 2006-11-23 23:19:16 [retros] samplesort: 5698
+D 2006-11-23 23:19:23 [retros] samplesort2: 5016
+
+# 32-bit P4 Xeon, gcc-3.4
+sherlock@sherlock3:~/sherlock-mj/run$ bin/retros
+D 2006-11-23 23:23:52 [retros] memcpy: 198
+D 2006-11-23 23:24:23 [retros] qsort: 30114
+D 2006-11-23 23:24:27 [retros] arraysort: 2882
+D 2006-11-23 23:24:43 [retros] indirect qsort: 15019
+D 2006-11-23 23:24:59 [retros] indirect arraysort: 13267
+D 2006-11-23 23:25:03 [retros] radix1: 1881
+D 2006-11-23 23:25:06 [retros] radix1b: 1442
+D 2006-11-23 23:25:08 [retros] radix1c: 1313
+D 2006-11-23 23:25:10 [retros] radix1c-sse: 1229
+D 2006-11-23 23:25:13 [retros] radix1d: 1324
+D 2006-11-23 23:25:17 [retros] radix2: 2598
+D 2006-11-23 23:25:19 [retros] radix3: 1419
+D 2006-11-23 23:25:25 [retros] mergesort: 4929
+D 2006-11-23 23:25:29 [retros] samplesort: 2742
+D 2006-11-23 23:25:33 [retros] samplesort2: 2350
+
+# 64-bit P4 Xeon, gcc-3.4
+sherlock@sherlock4:~/sherlock-3.10/run$ bin/retros
+D 2006-11-23 23:44:31 [retros] memcpy: 132
+D 2006-11-23 23:44:58 [retros] qsort: 26469
+D 2006-11-23 23:45:01 [retros] arraysort: 2307
+D 2006-11-23 23:45:12 [retros] indirect qsort: 10971
+D 2006-11-23 23:45:24 [retros] indirect arraysort: 10350
+D 2006-11-23 23:45:26 [retros] radix1: 1099
+D 2006-11-23 23:45:27 [retros] radix1b: 1052
+D 2006-11-23 23:45:29 [retros] radix1c: 1017
+D 2006-11-23 23:45:30 [retros] radix1c-sse: 1017
+D 2006-11-23 23:45:32 [retros] radix1d: 1016
+D 2006-11-23 23:45:34 [retros] radix2: 1661
+D 2006-11-23 23:45:36 [retros] radix3: 955
+D 2006-11-23 23:45:39 [retros] mergesort: 3302
+D 2006-11-23 23:45:42 [retros] samplesort: 2376
+D 2006-11-23 23:45:45 [retros] samplesort2: 1870
+
+# 64-bit Turion X2 TL52, gcc-4.1.1
+pchar@paja ~/prog/sherlock-dev-sorter/run $ bin/retros
+D 2006-11-24 00:32:38 [retros] memcpy: 93
+D 2006-11-24 00:32:46 [retros] qsort: 7530
+D 2006-11-24 00:32:50 [retros] arraysort: 2766
+D 2006-11-24 00:33:01 [retros] indirect qsort: 10543
+D 2006-11-24 00:33:13 [retros] indirect arraysort: 10169
+D 2006-11-24 00:33:16 [retros] radix1: 1319
+D 2006-11-24 00:33:18 [retros] radix1b: 1126
+D 2006-11-24 00:33:20 [retros] radix1c: 1084
+D 2006-11-24 00:33:22 [retros] radix1c-sse: 1126
+D 2006-11-24 00:33:24 [retros] radix1d: 1091
+D 2006-11-24 00:33:27 [retros] radix2: 2238
+D 2006-11-24 00:33:29 [retros] radix3: 1183
+D 2006-11-24 00:33:34 [retros] mergesort: 4036
+D 2006-11-24 00:33:37 [retros] samplesort: 2594
+D 2006-11-24 00:33:40 [retros] samplesort2: 2214
--- /dev/null
+# Tests related to the new sorter
+
+DIRS+=debug/sorter
+PROGS+=$(addprefix $(o)/debug/sorter/,radix-file-test radix-asio-test radix-tune-bits radix-tune-thresh)
+
+$(o)/debug/sorter/retros: $(o)/debug/sorter/retros.o $(LIBSH)
+$(o)/debug/sorter/radix-file-test: $(o)/debug/sorter/radix-file-test.o $(LIBSH)
+$(o)/debug/sorter/radix-asio-test: $(o)/debug/sorter/radix-asio-test.o $(LIBSH)
+$(o)/debug/sorter/radix-tune-bits: $(s)/debug/sorter/radix-tune-bits.sh
+$(o)/debug/sorter/radix-tune-thresh: $(s)/debug/sorter/radix-tune-thresh.sh
--- /dev/null
+/*
+ * An experiment with parallel reading and writing of files using ASIO.
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/lfs.h"
+#include "lib/asio.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define COPY
+#define DIRECT O_DIRECT
+
+static timestamp_t timer;
+
+#define P_INIT do { cnt = 0; cnt_rep = 0; cnt_ms = 1; } while(0)
+#define P_UPDATE(cc) do { \
+ cnt += cc; \
+ if (cnt >= cnt_rep) { cnt_ms += get_timer(&timer); \
+ printf("%d of %d MB (%.2f MB/sec)\r", (int)(cnt >> 20), (int)(total_size >> 20), (double)cnt / 1048576 * 1000 / cnt_ms); \
+ fflush(stdout); cnt_rep += 1<<26; } } while(0)
+#define P_FINAL do { \
+ cnt_ms += get_timer(&timer); \
+ msg(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \
+} while(0)
+
+static struct asio_queue io_queue;
+
+int main(int argc, char **argv)
+{
+ uns files, bufsize;
+ u64 total_size;
+ if (argc != 4 ||
+ cf_parse_int(argv[1], (int*) &files) ||
+ cf_parse_int(argv[2], (int*) &bufsize) ||
+ cf_parse_u64(argv[3], &total_size))
+ {
+ fprintf(stderr, "Usage: asio-test <nr-files> <bufsize> <totalsize>\n");
+ return 1;
+ }
+ u64 cnt, cnt_rep;
+ uns cnt_ms;
+ int fd[files];
+ byte name[files][16];
+ struct asio_request *req[files];
+
+ init_timer(&timer);
+
+ io_queue.buffer_size = bufsize;
+ io_queue.max_writebacks = 2;
+ asio_init_queue(&io_queue);
+
+#ifdef COPY
+ msg(L_INFO, "Creating input file");
+ int in_fd = sh_open("tmp/ft-in", O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666);
+ ASSERT(in_fd >= 0);
+ ASSERT(!(total_size % bufsize));
+ P_INIT;
+ for (uns i=0; i<total_size/bufsize; i++)
+ {
+ struct asio_request *r = asio_get(&io_queue);
+ r->op = ASIO_WRITE_BACK;
+ r->fd = in_fd;
+ r->len = bufsize;
+ byte *xbuf = r->buffer;
+ for (uns j=0; j<bufsize; j++)
+ xbuf[j] = i+j;
+ asio_submit(r);
+ P_UPDATE(bufsize);
+ }
+ asio_sync(&io_queue);
+ lseek(in_fd, 0, SEEK_SET);
+ sync();
+ P_FINAL;
+#endif
+
+ msg(L_INFO, "Initializing output files");
+ for (uns i=0; i<files; i++)
+ {
+ sprintf(name[i], "tmp/ft-%d", i);
+ fd[i] = sh_open(name[i], O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666);
+ if (fd[i] < 0)
+ die("Cannot create %s: %m", name[i]);
+ }
+ sync();
+ get_timer(&timer);
+
+ msg(L_INFO, "Writing %d MB to %d files in parallel with %d byte buffers", (int)(total_size >> 20), files, bufsize);
+ P_INIT;
+ for (uns i=0; i<files; i++)
+ req[i] = asio_get(&io_queue);
+ for (uns round=0; round<total_size/bufsize/files; round++)
+ {
+ for (uns i=0; i<files; i++)
+ {
+ struct asio_request *r = req[i];
+#ifdef COPY
+ struct asio_request *rr, *rd = asio_get(&io_queue);
+ rd->op = ASIO_READ;
+ rd->fd = in_fd;
+ rd->len = bufsize;
+ asio_submit(rd);
+ rr = asio_wait(&io_queue);
+ ASSERT(rr == rd && rd->status == (int)rd->len);
+ memcpy(r->buffer, rd->buffer, bufsize);
+ asio_put(rr);
+#else
+ for (uns j=0; j<bufsize; j++)
+ r->buffer[j] = round+i+j;
+#endif
+ r->op = ASIO_WRITE_BACK;
+ r->fd = fd[i];
+ r->len = bufsize;
+ asio_submit(r);
+ P_UPDATE(bufsize);
+ req[i] = asio_get(&io_queue);
+ }
+ }
+ for (uns i=0; i<files; i++)
+ asio_put(req[i]);
+ asio_sync(&io_queue);
+#ifdef COPY
+ close(in_fd);
+#endif
+ msg(L_INFO, "Syncing");
+ sync();
+ P_FINAL;
+
+ msg(L_INFO, "Reading the files sequentially");
+ P_INIT;
+ for (uns i=0; i<files; i++)
+ {
+ lseek(fd[i], 0, SEEK_SET);
+ for (uns round=0; round<total_size/bufsize/files; round++)
+ {
+ struct asio_request *rr, *r = asio_get(&io_queue);
+ r->op = ASIO_READ;
+ r->fd = fd[i];
+ r->len = bufsize;
+ asio_submit(r);
+ rr = asio_wait(&io_queue);
+ ASSERT(rr == r && r->status == (int)bufsize);
+ asio_put(r);
+ P_UPDATE(bufsize);
+ }
+ close(fd[i]);
+ }
+ P_FINAL;
+
+ for (uns i=0; i<files; i++)
+ unlink(name[i]);
+#ifdef COPY
+ unlink("tmp/ft-in");
+#endif
+
+ asio_cleanup_queue(&io_queue);
+ msg(L_INFO, "Done");
+ return 0;
+}
--- /dev/null
+/*
+ * An experiment with parallel reading and writing of files.
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/lfs.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define COPY
+#define DIRECT 0 // or O_DIRECT
+
+static timestamp_t timer;
+
+#define P_INIT do { cnt = 0; cnt_rep = 0; cnt_ms = 1; } while(0)
+#define P_UPDATE(cc) do { \
+ cnt += cc; \
+ if (cnt >= cnt_rep) { cnt_ms += get_timer(&timer); \
+ printf("%d of %d MB (%.2f MB/sec)\r", (int)(cnt >> 20), (int)(total_size >> 20), (double)cnt / 1048576 * 1000 / cnt_ms); \
+ fflush(stdout); cnt_rep += 1<<26; } } while(0)
+#define P_FINAL do { \
+ cnt_ms += get_timer(&timer); \
+ msg(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \
+} while(0)
+
+int main(int argc, char **argv)
+{
+ uns files, bufsize;
+ u64 total_size;
+ if (argc != 4 ||
+ cf_parse_int(argv[1], (int*) &files) ||
+ cf_parse_int(argv[2], (int*) &bufsize) ||
+ cf_parse_u64(argv[3], &total_size))
+ {
+ fprintf(stderr, "Usage: file-test <nr-files> <bufsize> <totalsize>\n");
+ return 1;
+ }
+ u64 cnt, cnt_rep;
+ uns cnt_ms;
+ int fd[files];
+ byte *buf[files], name[files][16];
+ uns xbufsize = bufsize; // Used for single-file I/O
+ byte *xbuf = big_alloc(xbufsize);
+
+ init_timer(&timer);
+
+#ifdef COPY
+ msg(L_INFO, "Creating input file");
+ int in_fd = sh_open("tmp/ft-in", O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666);
+ ASSERT(in_fd >= 0);
+ ASSERT(!(total_size % xbufsize));
+ P_INIT;
+ for (uns i=0; i<total_size/xbufsize; i++)
+ {
+ for (uns j=0; j<xbufsize; j++)
+ xbuf[j] = i+j;
+ uns c = write(in_fd, xbuf, xbufsize);
+ ASSERT(c == xbufsize);
+ P_UPDATE(c);
+ }
+ lseek(in_fd, 0, SEEK_SET);
+ sync();
+ P_FINAL;
+#endif
+
+ msg(L_INFO, "Initializing output files");
+ for (uns i=0; i<files; i++)
+ {
+ sprintf(name[i], "tmp/ft-%d", i);
+ fd[i] = sh_open(name[i], O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666);
+ if (fd[i] < 0)
+ die("Cannot create %s: %m", name[i]);
+ buf[i] = big_alloc(bufsize);
+ }
+ sync();
+ get_timer(&timer);
+
+ msg(L_INFO, "Writing %d MB to %d files in parallel with %d byte buffers", (int)(total_size >> 20), files, bufsize);
+ P_INIT;
+ for (uns r=0; r<total_size/bufsize/files; r++)
+ {
+ for (uns i=0; i<files; i++)
+ {
+#ifdef COPY
+ uns ci = read(in_fd, buf[i], bufsize);
+ ASSERT(ci == bufsize);
+#else
+ for (uns j=0; j<bufsize; j++)
+ buf[i][j] = r+i+j;
+#endif
+ uns c = write(fd[i], buf[i], bufsize);
+ ASSERT(c == bufsize);
+ P_UPDATE(c);
+ }
+ }
+#ifdef COPY
+ close(in_fd);
+#endif
+ msg(L_INFO, "Syncing");
+ sync();
+ P_FINAL;
+
+ msg(L_INFO, "Reading the files sequentially");
+ P_INIT;
+ for (uns i=0; i<files; i++)
+ {
+ lseek(fd[i], 0, SEEK_SET);
+ for (uns r=0; r<total_size/xbufsize/files; r++)
+ {
+ uns c = read(fd[i], xbuf, xbufsize);
+ ASSERT(c == xbufsize);
+ P_UPDATE(c);
+ }
+ close(fd[i]);
+ }
+ P_FINAL;
+
+ for (uns i=0; i<files; i++)
+ unlink(name[i]);
+#ifdef COPY
+ unlink("tmp/ft-in");
+#endif
+ msg(L_INFO, "Done");
+ return 0;
+}
--- /dev/null
+#!/bin/bash
+# An utility for tuning the Sherlock's radix sorter
+# (c) 2007 Martin Mares <mj@ucw.cz>
+set -e
+UCW_PROGNAME="$0"
+. lib/libucw.sh
+
+# Path to Sherlock build directory
+[ -n "$BUILD" ] || BUILD=..
+[ -f "$BUILD/lib/sorter/sorter.h" ] || die "BUILD does not point to Sherlock build directory"
+
+# Find out sort buffer size
+parse-config 'Sorter{##SortBuffer}'
+SORTBUF=$CF_Sorter_SortBuffer
+[ "$SORTBUF" -gt 0 ] || die "Unable to determine SortBuffer"
+log "Detected sort buffer size $SORTBUF"
+
+# Size of the test -- should be slightly less than a half of SortBuffer
+SIZE=$(($SORTBUF/2 - 8192))
+log "Decided to benchmark sorting of $SIZE byte data"
+
+# Which bit widths we try
+WIDTHS="0 6 7 8 9 10 11 12 13 14"
+
+# Which RadixThresholds we try
+THRS="2000 4000 10000 20000 50000"
+
+# Which sort-test tests we try
+TESTS="2,5,8,15"
+
+# Check various bit widths of the radix sorter
+rm -f tmp/radix-*
+for W in $WIDTHS ; do
+ rm -f $BUILD/obj/lib/sorter/sort-test{,.o}
+ if [ $W = 0 ] ; then
+ log "Compiling with no radix splits"
+ ( cd $BUILD && make obj/lib/sorter/sort-test )
+ OPT="-d32"
+ else
+ log "Compiling with $W-bit radix splits"
+ ( cd $BUILD && make CEXTRA="-DFORCE_RADIX_BITS=$W" obj/lib/sorter/sort-test )
+ OPT=
+ fi
+ for THR in $THRS ; do
+ log "Testing with RadixThreshold=$THR"
+ $BUILD/obj/lib/sorter/sort-test -SThreads.DefaultStackSize=2M -SSorter.RadixThreshold=$THR -s$SIZE -t$TESTS $OPT -v 2>&1 | tee -a tmp/radix-$W
+ done
+done
+
+echo "thresh" >tmp/radix-thrs
+echo "test#" >tmp/radix-tests
+for THR in $THRS ; do
+ for TEST in `echo $TESTS | tr ',' ' '` ; do
+ echo $THR >>tmp/radix-thrs
+ echo $TEST >>tmp/radix-tests
+ done
+done
+
+FILES="tmp/radix-thrs tmp/radix-tests"
+for W in $WIDTHS ; do
+ a=tmp/radix-$W
+ echo >$a.out "$W bits"
+ sed 's/.* \([0-9.]\+\)s internal sorting.*/\1/;t;d' <$a >>$a.out
+ FILES="$FILES $a.out"
+done
+
+log "These are the results:"
+paste $FILES
--- /dev/null
+#!/bin/bash
+# An utility for tuning the Sherlock's radix sorter threshold
+# (c) 2007 Martin Mares <mj@ucw.cz>
+set -e
+UCW_PROGNAME="$0"
+. lib/libucw.sh
+
+# Path to Sherlock build directory
+[ -n "$BUILD" ] || BUILD=..
+[ -f "$BUILD/lib/sorter/sorter.h" ] || die "BUILD does not point to Sherlock build directory"
+
+# Find out sort buffer size
+parse-config 'Sorter{##SortBuffer}'
+SORTBUF=$CF_Sorter_SortBuffer
+[ "$SORTBUF" -gt 0 ] || die "Unable to determine SortBuffer"
+log "Detected sort buffer size $SORTBUF"
+
+# Find out radix-sorter width
+[ -f "$BUILD/obj/config.mk" ] || die "Sherlock source not configured"
+WIDTH=`sed <$BUILD/obj/config.mk 's/^CONFIG_UCW_RADIX_SORTER_BITS=\(.*\)/\1/;t;d'`
+[ -n "$WIDTH" ] || die "CONFIG_UCW_RADIX_SORTER_BITS not set (!?)"
+log "Detected radix-sorter width $WIDTH"
+
+# Maximum size of the test -- should be slightly less than a half of SortBuffer
+SIZE=$(($SORTBUF/2 - 8192))
+
+# Which sort-test test we try
+TEST="2"
+
+# Which thresholds we try
+THRS="16"
+T=$SIZE
+while [ $T -gt 100 ] ; do
+ THRS="$THRS $T"
+ T=$(($T/2))
+done
+
+if true ; then
+
+rm -f tmp/radix-*
+echo "sizes" >tmp/radix-sizes
+while [ $SIZE -gt 262144 ] ; do
+ echo $SIZE >>tmp/radix-sizes
+ for T in $THRS ; do
+ log "Trying size $SIZE with threshold $T"
+ $BUILD/obj/lib/sorter/sort-test -SSorter.RadixThreshold=$T -s$SIZE -t$TEST -v 2>&1 | tee -a tmp/radix-$T
+ done
+ SIZE=$(($SIZE/2))
+done
+
+fi
+
+FILES=tmp/radix-sizes
+for T in $THRS ; do
+ a=tmp/radix-$T
+ echo >$a.out $T
+ sed 's/.* \([0-9.]\+\)s internal sorting.*/\1/;t;d' <$a >>$a.out
+ FILES="$FILES $a.out"
+done
+
+log "These are the results:"
+paste $FILES
--- /dev/null
+/*
+ * Experiments with various sorting algorithms
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ */
+
+#include "sherlock/sherlock.h"
+#include "lib/getopt.h"
+#include "lib/md5.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+
+struct elt {
+ u32 key;
+ u32 ballast[3];
+};
+
+static struct elt *ary, *alt, **ind, *array0, *array1;
+static uns n = 10000000;
+static u32 sum;
+
+static struct elt *alloc_elts(uns n)
+{
+ return big_alloc(n * sizeof(struct elt));
+}
+
+static void free_elts(struct elt *a, uns n)
+{
+ big_free(a, n * sizeof(struct elt));
+}
+
+static int comp(const void *x, const void *y)
+{
+ const struct elt *xx = x, *yy = y;
+ return (xx->key < yy->key) ? -1 : (xx->key > yy->key) ? 1 : 0;
+}
+
+static int comp_ind(const void *x, const void *y)
+{
+ const struct elt * const *xx = x, * const *yy = y;
+ return comp(*xx, *yy);
+}
+
+#define ASORT_PREFIX(x) as_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) a[i].key
+#define ASORT_SWAP(i,j) do { struct elt t=a[i]; a[i]=a[j]; a[j]=t; } while (0)
+#define ASORT_EXTRA_ARGS , struct elt *a
+#include "lib/arraysort.h"
+
+#define ASORT_PREFIX(x) asi_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) ind[i]->key
+#define ASORT_SWAP(i,j) do { struct elt *t=ind[i]; ind[i]=ind[j]; ind[j]=t; } while (0)
+#include "lib/arraysort.h"
+
+static void r1_sort(void)
+{
+ struct elt *from = ary, *to = alt, *tmp;
+#define BITS 8
+ uns cnt[1 << BITS];
+ for (uns sh=0; sh<32; sh+=BITS)
+ {
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<n; i++)
+ cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++;
+ uns pos = 0;
+ for (uns i=0; i<(1<<BITS); i++)
+ {
+ uns c = cnt[i];
+ cnt[i] = pos;
+ pos += c;
+ }
+ ASSERT(pos == n);
+ for (uns i=0; i<n; i++)
+ to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i];
+ ASSERT(cnt[(1 << BITS)-1] == n);
+ tmp=from, from=to, to=tmp;
+ }
+ ary = from;
+#undef BITS
+}
+
+static void r1b_sort(void)
+{
+ struct elt *from = ary, *to = alt, *tmp;
+#define BITS 8
+ uns cnt[1 << BITS], cnt2[1 << BITS];
+ for (uns sh=0; sh<32; sh+=BITS)
+ {
+ if (sh)
+ memcpy(cnt, cnt2, sizeof(cnt));
+ else
+ {
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<n; i++)
+ cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++;
+ }
+ uns pos = 0;
+ for (uns i=0; i<(1<<BITS); i++)
+ {
+ uns c = cnt[i];
+ cnt[i] = pos;
+ pos += c;
+ }
+ ASSERT(pos == n);
+ bzero(cnt2, sizeof(cnt2));
+ for (uns i=0; i<n; i++)
+ {
+ cnt2[(from[i].key >> (sh + BITS)) & ((1 << BITS) - 1)]++;
+ to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i];
+ }
+ ASSERT(cnt[(1 << BITS)-1] == n);
+ tmp=from, from=to, to=tmp;
+ }
+ ary = from;
+#undef BITS
+}
+
+static void r1c_sort(void)
+{
+ uns cnt[256];
+ struct elt *ptrs[256], *x, *lim;
+
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ cnt[x++->key & 255]++;
+
+#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
+
+ PTRS(alt);
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 8) & 255]++;
+ *ptrs[x->key & 255]++ = *x;
+ x++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 16) & 255]++;
+ *ptrs[(x->key >> 8) & 255]++ = *x;
+ x++;
+ }
+
+ PTRS(alt);
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 24) & 255]++;
+ *ptrs[(x->key >> 16) & 255]++ = *x;
+ x++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ while (x < lim)
+ {
+ *ptrs[(x->key >> 24) & 255]++ = *x;
+ x++;
+ }
+#undef PTRS
+}
+
+#include <emmintrin.h>
+
+static inline void sse_copy_elt(struct elt *to, struct elt *from)
+{
+ __m128i m = _mm_load_si128((__m128i *) from);
+ _mm_store_si128((__m128i *) to, m);
+}
+
+static void r1c_sse_sort(void)
+{
+ uns cnt[256];
+ struct elt *ptrs[256], *x, *lim;
+
+ ASSERT(sizeof(struct elt) == 16);
+ ASSERT(!((uintptr_t)alt & 15));
+ ASSERT(!((uintptr_t)ary & 15));
+
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ cnt[x++->key & 255]++;
+
+#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
+
+ PTRS(alt);
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 8) & 255]++;
+ sse_copy_elt(ptrs[x->key & 255]++, x);
+ x++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 16) & 255]++;
+ sse_copy_elt(ptrs[(x->key >> 8) & 255]++, x);
+ x++;
+ }
+
+ PTRS(alt);
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 24) & 255]++;
+ sse_copy_elt(ptrs[(x->key >> 16) & 255]++, x);
+ x++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ while (x < lim)
+ {
+ sse_copy_elt(ptrs[(x->key >> 24) & 255]++, x);
+ x++;
+ }
+#undef PTRS
+}
+
+static void r1d_sort(void)
+{
+ uns cnt[256];
+ struct elt *ptrs[256], *x, *y, *lim;
+
+ ASSERT(!(n % 4));
+
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[x++->key & 255]++;
+ cnt[x++->key & 255]++;
+ cnt[x++->key & 255]++;
+ cnt[x++->key & 255]++;
+ }
+
+#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
+
+ PTRS(alt);
+ x = ary; y = ary+n/2; lim = ary + n/2;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 8) & 255]++;
+ cnt[(y->key >> 8) & 255]++;
+ *ptrs[x->key & 255]++ = *x;
+ *ptrs[y->key & 255]++ = *y;
+ x++, y++;
+ cnt[(x->key >> 8) & 255]++;
+ cnt[(y->key >> 8) & 255]++;
+ *ptrs[x->key & 255]++ = *x;
+ *ptrs[y->key & 255]++ = *y;
+ x++, y++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 16) & 255]++;
+ *ptrs[(x->key >> 8) & 255]++ = *x;
+ x++;
+ cnt[(x->key >> 16) & 255]++;
+ *ptrs[(x->key >> 8) & 255]++ = *x;
+ x++;
+ }
+
+ PTRS(alt);
+ x = ary; lim = ary + n;
+ bzero(cnt, sizeof(cnt));
+ while (x < lim)
+ {
+ cnt[(x->key >> 24) & 255]++;
+ *ptrs[(x->key >> 16) & 255]++ = *x;
+ x++;
+ cnt[(x->key >> 24) & 255]++;
+ *ptrs[(x->key >> 16) & 255]++ = *x;
+ x++;
+ }
+
+ PTRS(ary);
+ x = alt; lim = alt + n;
+ while (x < lim)
+ {
+ *ptrs[(x->key >> 24) & 255]++ = *x;
+ x++;
+ *ptrs[(x->key >> 24) & 255]++ = *x;
+ x++;
+ }
+#undef PTRS
+}
+
+static void r2_sort(void)
+{
+ struct elt *from = ary, *to = alt;
+#define BITS 14
+ uns cnt[1 << BITS];
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<n; i++)
+ cnt[(from[i].key >> (32 - BITS)) & ((1 << BITS) - 1)]++;
+ uns pos = 0;
+ for (uns i=0; i<(1<<BITS); i++)
+ {
+ uns c = cnt[i];
+ cnt[i] = pos;
+ pos += c;
+ }
+ ASSERT(pos == n);
+ for (uns i=0; i<n; i++)
+ to[cnt[(from[i].key >> (32 - BITS)) & ((1 << BITS) - 1)]++] = from[i];
+ ASSERT(cnt[(1 << BITS)-1] == n);
+
+ pos = 0;
+ for (uns i=0; i<(1 << BITS); i++)
+ {
+ as_sort(cnt[i] - pos, alt+pos);
+ pos = cnt[i];
+ }
+ ary = alt;
+#undef BITS
+}
+
+static void r3_sort(void)
+{
+#define BITS 10
+#define LEVELS 2
+#define BUCKS (1 << BITS)
+#define THRESHOLD 5000
+#define ODDEVEN 0
+
+ auto void r3(struct elt *from, struct elt *to, uns n, uns lev);
+ void r3(struct elt *from, struct elt *to, uns n, uns lev)
+ {
+ uns sh = 32 - lev*BITS;
+ uns cnt[BUCKS];
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<n; i++)
+ cnt[(from[i].key >> sh) & (BUCKS - 1)]++;
+ uns pos = 0;
+ for (uns i=0; i<BUCKS; i++)
+ {
+ uns c = cnt[i];
+ cnt[i] = pos;
+ pos += c;
+ }
+ ASSERT(pos == n);
+ for (uns i=0; i<n; i++)
+#if 1
+ to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++] = from[i];
+#else
+ sse_copy_elt(&to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++], &from[i]);
+#endif
+ pos = 0;
+ for (uns i=0; i<BUCKS; i++)
+ {
+ uns l = cnt[i]-pos;
+ if (lev >= LEVELS || l <= THRESHOLD)
+ {
+ as_sort(l, to+pos);
+ if ((lev % 2) != ODDEVEN)
+ memcpy(from+pos, to+pos, l * sizeof(struct elt));
+ }
+ else
+ r3(to+pos, from+pos, l, lev+1);
+ pos = cnt[i];
+ }
+ }
+
+ r3(ary, alt, n, 1);
+ if (ODDEVEN)
+ ary = alt;
+
+#undef ODDEVEN
+#undef THRESHOLD
+#undef BUCKS
+#undef LEVELS
+#undef BITS
+}
+
+static inline struct elt *mrg(struct elt *x, struct elt *xl, struct elt *y, struct elt *yl, struct elt *z)
+{
+ for (;;)
+ {
+ if (x->key <= y->key)
+ {
+ *z++ = *x++;
+ if (x >= xl)
+ goto xend;
+ }
+ else
+ {
+ *z++ = *y++;
+ if (y >= yl)
+ goto yend;
+ }
+ }
+
+ xend:
+ while (y < yl)
+ *z++ = *y++;
+ return z;
+
+ yend:
+ while (x < xl)
+ *z++ = *x++;
+ return z;
+}
+
+static void mergesort(void)
+{
+ struct elt *from, *to;
+ uns lev = 0;
+ if (1)
+ {
+ struct elt *x = ary, *z = alt, *last = ary + (n & ~1U);
+ while (x < last)
+ {
+ if (x[0].key < x[1].key)
+ *z++ = *x++, *z++ = *x++;
+ else
+ {
+ *z++ = x[1];
+ *z++ = x[0];
+ x += 2;
+ }
+ }
+ if (n % 2)
+ *z = *x;
+ lev++;
+ }
+ for (; (1U << lev) < n; lev++)
+ {
+ if (lev % 2)
+ from = alt, to = ary;
+ else
+ from = ary, to = alt;
+ struct elt *x, *z, *last;
+ x = from;
+ z = to;
+ last = from + n;
+ uns step = 1 << lev;
+ while (x + 2*step <= last)
+ {
+ z = mrg(x, x+step, x+step, x+2*step, z);
+ x += 2*step;
+ }
+ if (x + step < last)
+ mrg(x, x+step, x+step, last, z);
+ else
+ memcpy(z, x, (byte*)last - (byte*)x);
+ }
+ if (lev % 2)
+ ary = alt;
+}
+
+static void sampsort(uns n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf)
+{
+#define WAYS 256
+ struct elt k[WAYS];
+ uns cnt[WAYS];
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<WAYS; i++)
+ k[i] = ar[random() % n];
+ as_sort(WAYS, k);
+ for (uns i=0; i<n; i++)
+ {
+ uns w = 0;
+#define FW(delta) if (ar[i].key > k[w+delta].key) w += delta
+ FW(128);
+ FW(64);
+ FW(32);
+ FW(16);
+ FW(8);
+ FW(4);
+ FW(2);
+ FW(1);
+ wbuf[i] = w;
+ cnt[w]++;
+ }
+ struct elt *y = al, *way[WAYS], *z;
+ for (uns i=0; i<WAYS; i++)
+ {
+ way[i] = y;
+ y += cnt[i];
+ }
+ ASSERT(y == al+n);
+ for (uns i=0; i<n; i++)
+ {
+ uns w = wbuf[i];
+ *way[w]++ = ar[i];
+ }
+ y = al;
+ z = ar;
+ for (uns i=0; i<WAYS; i++)
+ {
+ if (cnt[i] >= 1000)
+ sampsort(cnt[i], y, z, dest, wbuf);
+ else
+ {
+ as_sort(cnt[i], y);
+ if (al != dest)
+ memcpy(z, y, cnt[i]*sizeof(struct elt));
+ }
+ y += cnt[i];
+ z += cnt[i];
+ }
+#undef FW
+#undef WAYS
+}
+
+static void samplesort(void)
+{
+ byte *aux = xmalloc(n);
+ sampsort(n, ary, alt, ary, aux);
+ xfree(aux);
+}
+
+static void sampsort2(uns n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf)
+{
+#define WAYS 256
+ struct elt k[WAYS];
+ uns cnt[WAYS];
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<WAYS; i++)
+ k[i] = ar[random() % n];
+ as_sort(WAYS, k);
+ struct elt *k1 = ar, *k2 = ar+1, *kend = ar+n;
+ byte *ww = wbuf;
+ while (k2 < kend)
+ {
+ uns w1 = 0, w2 = 0;
+#define FW1(delta) if (k1->key > k[w1+delta].key) w1 += delta
+#define FW2(delta) if (k2->key > k[w2+delta].key) w2 += delta
+ FW1(128); FW2(128);
+ FW1(64); FW2(64);
+ FW1(32); FW2(32);
+ FW1(16); FW2(16);
+ FW1(8); FW2(8);
+ FW1(4); FW2(4);
+ FW1(2); FW2(2);
+ FW1(1); FW2(1);
+ *ww++ = w1;
+ *ww++ = w2;
+ cnt[w1]++;
+ cnt[w2]++;
+ k1 += 2;
+ k2 += 2;
+ }
+ if (k1 < kend)
+ {
+ uns w1 = 0;
+ FW1(128); FW1(64); FW1(32); FW1(16);
+ FW1(8); FW1(4); FW1(2); FW1(1);
+ *ww++ = w1;
+ cnt[w1]++;
+ }
+ struct elt *y = al, *way[WAYS], *z;
+ for (uns i=0; i<WAYS; i++)
+ {
+ way[i] = y;
+ y += cnt[i];
+ }
+ ASSERT(y == al+n);
+ for (uns i=0; i<n; i++)
+ {
+ uns w = wbuf[i];
+ *way[w]++ = ar[i];
+ }
+ y = al;
+ z = ar;
+ for (uns i=0; i<WAYS; i++)
+ {
+ if (cnt[i] >= 1000)
+ sampsort2(cnt[i], y, z, dest, wbuf);
+ else
+ {
+ as_sort(cnt[i], y);
+ if (al != dest)
+ memcpy(z, y, cnt[i]*sizeof(struct elt));
+ }
+ y += cnt[i];
+ z += cnt[i];
+ }
+#undef FW1
+#undef FW2
+#undef WAYS
+}
+
+static void samplesort2(void)
+{
+ byte *aux = xmalloc(n);
+ sampsort2(n, ary, alt, ary, aux);
+ xfree(aux);
+}
+
+static void mk_ary(void)
+{
+ ary = array0;
+ alt = array1;
+ struct MD5Context ctx;
+ MD5Init(&ctx);
+ u32 block[16];
+ bzero(block, sizeof(block));
+
+ sum = 0;
+ for (uns i=0; i<n; i++)
+ {
+#if 1
+ if (!(i % 4))
+ {
+ block[i%16] = i;
+ MD5Transform(ctx.buf, block);
+ }
+ ary[i].key = ctx.buf[i%4];
+#else
+ ary[i].key = i*(~0U/(n-1));
+#endif
+ for (uns j=1; j<sizeof(struct elt)/4; j++)
+ ((u32*)&ary[i])[j] = ROL(ary[i].key, 3*j);
+ sum ^= ary[i].key;
+ }
+}
+
+static void chk_ary(void)
+{
+ u32 s = ary[0].key;
+ for (uns i=1; i<n; i++)
+ if (ary[i].key < ary[i-1].key)
+ die("Missorted at %d", i);
+ else
+ s ^= ary[i].key;
+ if (s != sum)
+ die("Corrupted");
+}
+
+static void mk_ind(void)
+{
+ mk_ary();
+ ind = xmalloc(sizeof(struct elt *) * n);
+ for (uns i=0; i<n; i++)
+ ind[i] = &ary[i];
+}
+
+static void chk_ind(void)
+{
+ u32 s = ind[0]->key;
+ for (uns i=1; i<n; i++)
+ if (ind[i]->key < ind[i-1]->key)
+ die("Missorted at %d", i);
+ else
+ s ^= ind[i]->key;
+ if (s != sum)
+ die("Corrupted");
+ xfree(ind);
+}
+
+int main(int argc, char **argv)
+{
+ log_init(argv[0]);
+
+ int opt;
+ uns op = 0;
+ while ((opt = cf_getopt(argc, argv, CF_SHORT_OPTS "1", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (opt)
+ {
+ case '1':
+ op |= (1 << (opt - '0'));
+ break;
+ default:
+ die("usage?");
+ }
+
+ array0 = alloc_elts(n);
+ array1 = alloc_elts(n);
+ for (uns i=0; i<n; i++)
+ array0[i] = array1[i] = (struct elt) { 0 };
+
+ mk_ary();
+ timestamp_t timer;
+ init_timer(&timer);
+ for (uns i=0; i<5; i++)
+ {
+#if 1
+ memcpy(alt, ary, sizeof(struct elt) * n);
+ memcpy(ary, alt, sizeof(struct elt) * n);
+#else
+ for (uns j=0; j<n; j++)
+ alt[j] = ary[j];
+ for (uns j=0; j<n; j++)
+ ary[j] = alt[j];
+#endif
+ }
+ log(L_DEBUG, "memcpy: %d", get_timer(&timer)/10);
+
+#define BENCH(type, name, func) mk_##type(); init_timer(&timer); func; log(L_DEBUG, name ": %d", get_timer(&timer)); chk_##type()
+
+ //BENCH(ary, "qsort", qsort(ary, n, sizeof(struct elt), comp));
+ //BENCH(ary, "arraysort", as_sort(n, ary));
+ //BENCH(ind, "indirect qsort", qsort(ind, n, sizeof(struct elt *), comp_ind));
+ //BENCH(ind, "indirect arraysort", asi_sort(n));
+ //BENCH(ary, "radix1", r1_sort());
+ //BENCH(ary, "radix1b", r1b_sort());
+ BENCH(ary, "radix1c", r1c_sort());
+ //BENCH(ary, "radix1c-sse", r1c_sse_sort());
+ //BENCH(ary, "radix1d", r1d_sort());
+ //BENCH(ary, "radix2", r2_sort());
+ BENCH(ary, "radix3", r3_sort());
+ //BENCH(ary, "mergesort", mergesort());
+ //BENCH(ary, "samplesort", samplesort());
+ //BENCH(ary, "samplesort2", samplesort2());
+
+ free_elts(array0, n);
+ free_elts(array1, n);
+ return 0;
+}
--- /dev/null
+# Makefile for the stand-alone release of Sherlock libraries
+# (c) 2007 Martin Mares <mj@ucw.cz>
+
+# The default target
+all: runtree libs api programs extras configs
+
+# Include configuration
+s=.
+-include obj/config.mk
+obj/config.mk:
+ @echo "You need to run configure first." && false
+
+# We will use the libucw build system
+include $(s)/build/Maketop
+
+# Install config files
+CONFIGS+=sherlock local
+
+# Set up names of common libraries (to avoid forward references in rules)
+LIBCHARSET=$(o)/charset/libcharset.pc
+LIBSH=$(o)/sherlock/libsh.pc
+
+# Include makefiles of libraries we wish to use
+include $(s)/lib/Makefile
+include $(s)/charset/Makefile
+include $(s)/sherlock/Makefile
+
+ifdef CONFIG_LANG
+LIBLANG=$(o)/lang/liblang.pc
+include $(s)/lang/Makefile
+endif
+
+ifdef CONFIG_IMAGES
+LIBIMAGES=$(o)/images/libimages.pc
+include $(s)/images/Makefile
+endif
+
+libs: $(LIBUCW) $(LIBSH) $(LIBIMAGES) $(LIBCHARSET) $(LIBLANG)
+
+# And finally the default rules of the build system
+include $(s)/build/Makebottom
--- /dev/null
+Sherlock Holmes Libraries 3.12
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+(c) 1997--2007 Martin Mares <mj@ucw.cz>
+(c) 2000--2007 Robert Spalek <robert@ucw.cz>
+
+This package contains all libraries from the freely distributable version
+of the Sherlock Holmes search engine (see http://www.ucw.cz/holmes/).
+
+See doc/using-libs for description of the libraries.
+
+See doc/install for build requirements (the rest of the document does not apply).
+
+Use `./configure && make' to build the libraries.
+
+See doc/configure if you want to tweak what gets built (for example, you can
+call `./configure -CONFIG_SHARED' to build the libraries statically or
+call `./configure -CONFIG_IMAGES' to avoid building the image library).
--- /dev/null
+#!/usr/bin/perl
+# Configure Script for Stand-Alone Sherlock Libraries
+# (c) 2007 Martin Mares <mj@ucw.cz>
+
+use warnings;
+use strict;
+
+our $srcdir;
+BEGIN {
+ my $pkgfile = "lib/wildmatch.c";
+ if (!defined ($srcdir = $ENV{"SRCDIR"})) {
+ if (-f $pkgfile) {
+ $srcdir=".";
+ } elsif ($0 =~ m@^(.*)/configure$@ && -f "$1/$pkgfile") {
+ $srcdir=$1;
+ } else {
+ die "Don't know how to find myself. Please set SRCDIR manually.";
+ }
+ }
+ require "$srcdir/lib/perl/Configure.pm";
+ UCW::Configure::import UCW::Configure;
+}
+
+Init($srcdir, "default.cfg");
+Include "lib/default.cfg";
+Log "### Configuring Sherlock Libraries " . Get("SHERLOCK_VERSION") . " with configuration " . Get("CONFIG") . "\n";
+Include Get("CONFIG");
+Include "lib/autoconf.cfg";
+Finish();
+
+Log "\nConfigured, run `make' to build everything.\n";
--- /dev/null
+# Default configuration of Sherlock libraries (see sherlock/default.cfg for an explanation)
+
+# Do we want shared or static libraries?
+Set("CONFIG_SHARED");
+
+# We want the public API
+Set("CONFIG_INSTALL_API");
+
+# Libucw should support files >2GB and threading
+Set("CONFIG_LARGE_FILES");
+Set("CONFIG_UCW_THREADS" => 1);
+
+# Libucw extensions
+Set("CONFIG_UCW_PERL" => 1);
+Set("CONFIG_UCW_PERL_MODULES" => 1);
+Set("CONFIG_UCW_SHELL_UTILS" => 1);
+
+# Libsh settings
+Set("CONFIG_BUCKET_SHIFT" => 6);
+
+# Liblang settings
+Set("CONFIG_LANG");
+Set("MAX_WORD_LEN" => 64);
+
+# Libimages settings
+Set("CONFIG_IMAGES");
+Set("CONFIG_IMAGES_LIBJPEG");
+Set("CONFIG_IMAGES_LIBPNG");
+Set("CONFIG_IMAGES_LIBUNGIF");
+UnSet("CONFIG_IMAGES_LIBGIF");
+UnSet("CONFIG_IMAGES_LIBMAGICK");
+
+# Return success
+1;
--- /dev/null
+# Example Makefile for a stand-alone program using libucw
+
+CFLAGS:=$(shell pkg-config --cflags libucw)
+LDLIBS:=$(shell pkg-config --libs libucw)
+
+all: test
+
+test: test.c
--- /dev/null
+#include "lib/lib.h"
+
+int main(void)
+{
+ log_init("test");
+ msg(L_INFO, "Hoooot!");
+ return 0;
+}
--- /dev/null
+# Example Makefile for a stand-alone program using the libucw build system
+# (c) 2007 Martin Mares <mj@ucw.cz>
+
+# The default target
+all: runtree programs
+
+# Include configuration
+s=.
+-include obj/config.mk
+obj/config.mk:
+ @echo "You need to run configure first." && false
+
+# We will use the libucw build system
+include $(s)/build/Maketop
+
+# Set up names of common libraries (to avoid forward references in rules)
+LIBLANG=$(o)/lang/liblang.pc
+LIBCHARSET=$(o)/charset/libcharset.pc
+LIBIMAGES=$(o)/images/libimages.pc
+
+# Include makefiles of libraries we wish to use
+include $(s)/lib/Makefile
+include $(s)/charset/Makefile
+include $(s)/lang/Makefile
+include $(s)/images/Makefile
+
+# Programs we want to compile
+PROGS+=$(o)/test
+$(o)/test: $(o)/test.o $(LIBUCW) $(LIBLANG) $(LIBCHARSET) $(LIBIMAGES)
+
+# All tests (%-t) get automatically linked with libucw
+TESTING_DEPS=$(LIBUCW)
+
+# And finally the default rules of the build system
+include $(s)/build/Makebottom
--- /dev/null
+#!/usr/bin/perl
+# Configure script for the libucw example
+# (c) 2007 Martin Mares <mj@ucw.cz>
+
+use warnings;
+use strict;
+
+our $srcdir;
+BEGIN {
+ my $pkgfile = "lib/wildmatch.c";
+ if (!defined ($srcdir = $ENV{"SRCDIR"})) {
+ if (-f $pkgfile) {
+ $srcdir=".";
+ } elsif ($0 =~ m@^(.*)/configure$@ && -f "$1/$pkgfile") {
+ $srcdir=$1;
+ } else {
+ die "Don't know how to find myself. Please set SRCDIR manually.";
+ }
+ }
+ require "$srcdir/lib/perl/Configure.pm";
+ UCW::Configure::import UCW::Configure;
+}
+
+Init($srcdir, "default.cfg");
+Include "lib/default.cfg";
+Log "### Configuring TestApp ###\n\n";
+Include Get("CONFIG");
+Include "lib/autoconf.cfg";
+Finish();
+
+Log "\nConfigured, run `make' to build everything.\n";
--- /dev/null
+# Default configuration file for our test application
+
+# We want to build all libraries shared
+Set("CONFIG_SHARED");
+
+# Liblang settings
+Set("CONFIG_LANG");
+Set("MAX_WORD_LEN" => 64);
+
+# Libimages settings
+Set("CONFIG_IMAGES");
+Set("CONFIG_IMAGES_LIBJPEG");
+Set("CONFIG_IMAGES_LIBPNG");
+Set("CONFIG_IMAGES_LIBUNGIF");
+UnSet("CONFIG_IMAGES_LIBGIF");
+UnSet("CONFIG_IMAGES_LIBMAGICK");
+
+# Return success
+1;
--- /dev/null
+#include "lib/lib.h"
+
+int main(void)
+{
+ log_init("test");
+ msg(L_INFO, "Hoooot!");
+ return 0;
+}
CONFIGS+=images
LIBIMAGES_MODS=math config context image scale color io-main
LIBIMAGES_INCLUDES=images.h error.h color.h math.h
+export LIBIMAGES_LIBS=
ifdef CONFIG_SHERLOCK
LIBIMAGES_MODS+=object
LIBIMAGES_INCLUDES+=signature.h
endif
-LIBIMAGES_LIBS=-lm -lpthread
-
ifdef CONFIG_IMAGES_LIBJPEG
LIBIMAGES_MODS+=io-libjpeg
LIBIMAGES_LIBS+=-ljpeg
$(o)/images/libimages.a: $(addsuffix .o,$(addprefix $(o)/images/,$(LIBIMAGES_MODS)))
$(o)/images/libimages.so: $(addsuffix .oo,$(addprefix $(o)/images/,$(LIBIMAGES_MODS)))
+$(o)/images/libimages.pc: $(LIBIMAGES_DEPS)
-$(o)/images/image-tool: $(o)/images/image-tool.o $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/image-tool: LIBS+=$(LIBIMAGES_LIBS)
-
-$(o)/images/color-tool: $(o)/images/color-tool.o $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/color-tool: LIBS+=$(LIBIMAGES_LIBS)
-
-$(o)/images/image-dup-test: $(o)/images/image-dup-test.o $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/image-dup-test: LIBS+=$(LIBIMAGES_LIBS)
-
-$(o)/images/image-sim-test: $(o)/images/image-sim-test.o $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/image-sim-test: LIBS+=$(LIBIMAGES_LIBS)
+$(o)/images/image-tool: $(o)/images/image-tool.o $(LIBIMAGES)
+$(o)/images/color-tool: $(o)/images/color-tool.o $(LIBIMAGES)
+$(o)/images/image-dup-test: $(o)/images/image-dup-test.o $(LIBIMAGES)
+$(o)/images/image-sim-test: $(o)/images/image-sim-test.o $(LIBIMAGES)
TESTS+=$(o)/images/image-test.test
-$(o)/images/image-test: $(o)/images/image-test.o $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/image-test: LIBS+=$(LIBIMAGES_LIBS)
+$(o)/images/image-test: $(o)/images/image-test.o $(LIBIMAGES)
$(o)/images/image-test.test: $(o)/images/image-test
-TESTS+=$(o)/images/hilbert-test.test
-$(o)/images/hilbert-test: $(LIBIMAGES_DEPS)
-$(o)/images/hilbert-test: LIBS+=-lm
-$(o)/images/hilbert-test.test: $(o)/images/hilbert-test
-
TESTS+=$(o)/images/color.test
-$(o)/images/color-t: $(LIBIMAGES) $(LIBIMAGES_DEPS)
-$(o)/images/color-t: LIBS+=$(LIBIMAGES_LIBS) $(LIBIMAGES_DEPS)
+$(o)/images/color-t: $(LIBIMAGES)
$(o)/images/color.test: $(o)/images/color-t
-INCLUDES+=$(o)/images/.include-stamp
+API_LIBS+=libimages
+API_INCLUDES+=$(o)/images/.include-stamp
$(o)/images/.include-stamp: $(addprefix $(s)/images/,$(LIBIMAGES_INCLUDES))
- $(s)/build/install-includes $(s)/images run/include/images $(?F)
- touch $(o)/images/.include-stamp
+$(o)/images/.include-stamp: IDST=images
+run/lib/pkgconfig/libimages.pc: $(o)/images/libimages.pc
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
-#include "lib/math.h"
+#include "lib/lib.h"
#include "images/images.h"
#include "images/color.h"
#include "images/error.h"
#include "images/math.h"
#include <string.h>
+#include <math.h>
uns color_space_channels[COLOR_SPACE_MAX] = {
[COLOR_SPACE_UNKNOWN] = 0,
byte *a = xmalloc(3 * CNT), *b = xmalloc(3 * CNT);
for (uns i = 0; i < 3 * CNT; i++)
a[i] = random_max(256);
- init_timer();
+ timestamp_t timer;
+ init_timer(&timer);
for (uns i = 0; i < TESTS; i++)
memcpy(b, a, CNT * 3);
- DBG("memcpy time=%d", (uns)get_timer());
- init_timer();
+ DBG("memcpy time=%d", get_timer(&timer));
+ init_timer(&timer);
for (uns i = 0; i < TESTS; i++)
srgb_to_luv_pixels(b, a, CNT);
- DBG("direct time=%d", (uns)get_timer());
- init_timer();
+ DBG("direct time=%d", get_timer(&timer));
+ init_timer(&timer);
for (uns i = 0; i < TESTS; i++)
color_conv_pixels(b, a, CNT, srgb_to_luv_grid);
- DBG("grid time=%d", (uns)get_timer());
+ DBG("grid time=%d", get_timer(&timer));
#endif
return 0;
}
# Tests for color conversion module
-Run: obj/images/color-t
+Run: ../obj/images/color-t
CF_INT("BorderBonus", &image_sig_border_bonus),
CF_DOUBLE_ARY("InertiaScale", image_sig_inertia_scale, 3),
CF_DOUBLE("TexturedThreshold", &image_sig_textured_threshold),
- CF_LOOKUP("CompareMethod", &image_sig_compare_method, ((byte *[]){"integrated", "fuzzy", "average", NULL})),
+ CF_LOOKUP("CompareMethod", &image_sig_compare_method, ((char *[]){"integrated", "fuzzy", "average", NULL})),
CF_UNS_ARY("CompareFeaturesWeights", image_sig_cmp_features_weights, IMAGE_REG_F + IMAGE_REG_H),
CF_END
}
void
image_context_msg_default(struct image_context *ctx)
{
- log(ctx->msg_code >> 24, "%s", ctx->msg);
+ msg(ctx->msg_code >> 24, "%s", ctx->msg);
}
void
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
+#include "lib/lib.h"
#include "lib/mempool.h"
#include "lib/fastbuf.h"
#include "images/images.h"
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
+#include "lib/lib.h"
#include "lib/mempool.h"
#include "lib/fastbuf.h"
#include "images/images.h"
+++ /dev/null
-/* Tests for multidimensional Hilbert curves */
-
-#define LOCAL_DEBUG
-
-#include "lib/lib.h"
-#include "lib/mempool.h"
-#include "lib/math.h"
-#include <stdlib.h>
-#include <stdio.h>
-
-static struct mempool *pool;
-
-static uns dim;
-static uns order;
-
-static inline void
-rand_vec(uns *vec)
-{
- for (uns i = 0; i < dim; i++)
- vec[i] = (uns)rand() >> (32 - order);
-}
-
-static byte *
-print_vec(uns *vec)
-{
- byte *s = mp_alloc(pool, dim * 16), *res = s;
- *s++ = '(';
- for (uns i = 0; i < dim; i++)
- {
- if (i)
- *s++ = ' ';
- s += sprintf(s, "%x", vec[i]);
- }
- *s++ = ')';
- *s = 0;
- return res;
-}
-
-static inline int
-cmp_vec(uns *vec1, uns *vec2)
-{
- for (uns i = dim; i--; )
- if (vec1[i] < vec2[i])
- return -1;
- else if (vec1[i] > vec2[i])
- return 1;
- return 0;
-}
-
-#if 0
-static long double
-param_dist(uns *vec1, uns *vec2)
-{
- long double d1 = 0, d2 = 0;
- for (uns i = 0; i < dim; i++)
- {
- d1 = (d1 + vec1[i]) / ((u64)1 << order);
- d2 = (d2 + vec2[i]) / ((u64)1 << order);
- }
- return fabsl(d1 - d2);
-}
-
-static long double
-vec_dist(uns *vec1, uns *vec2)
-{
- long double d = 0;
- for (uns i = 0; i < dim; i++)
- {
- long double x = fabsl(vec1[i] - vec2[i]) / ((u64)1 << order);
- d += x * x;
- }
- return sqrtl(d);
-}
-#endif
-
-#define HILBERT_PREFIX(x) test1_##x
-#define HILBERT_DIM dim
-#define HILBERT_ORDER order
-#define HILBERT_WANT_DECODE
-#define HILBERT_WANT_ENCODE
-#include "images/hilbert.h"
-
-static void
-test1(void)
-{
- uns a[32], b[32], c[32];
- for (dim = 2; dim <= 8; dim++)
- for (order = 8; order <= 32; order++)
- for (uns i = 0; i < 1000; i++)
- {
- rand_vec(a);
- test1_encode(b, a);
- test1_decode(c, b);
- if (cmp_vec(a, c))
- die("Error... dim=%d order=%d testnum=%d ... %s -> %s -> %s",
- dim, order, i, print_vec(a), print_vec(b), print_vec(c));
- }
-}
-
-#if 0
-#include "images/hilbert-origin.h"
-static void
-test_origin(void)
-{
- Hcode code;
- Point pt, pt2;
- pt.hcode[0] = 0x12345678;
- pt.hcode[1] = 0x654321;
- pt.hcode[2] = 0x11122233;
- code = H_encode(pt);
- pt2 = H_decode(code);
- DBG("origin: [%08x, %08x, %08x] --> [%08x, %08x %08x] --> [%08x, %08x %08x]",
- pt.hcode[0], pt.hcode[1], pt.hcode[2], code.hcode[0], code.hcode[1], code.hcode[2], pt2.hcode[0], pt2.hcode[1], pt2.hcode[2]);
-}
-#endif
-
-int
-main(int argc UNUSED, char **argv UNUSED)
-{
- pool = mp_new(1 << 16);
- test1();
- //test_origin();
- return 0;
-}
+++ /dev/null
-# Tests for multidimensional Hilbert curves
-
-Run: obj/images/hilbert-test
+++ /dev/null
-/*
- * Image Library -- multidimensional Hilbert curves
- *
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- *
- *
- * References:
- * - http://www.dcs.bbk.ac.uk/~jkl/mapping.c
- * (c) 2002 J.K.Lawder
- * - J.K. Lawder. Calculation of Mappings between One and n-dimensional Values
- * Using the Hilbert Space-Filling Curve. Technical Report JL1/00, Birkbeck
- * College, University of London, 2000.
- *
- * FIXME:
- * - the algorithm fails for some combinations of HILBERT_DIM and HILBERT_ORDER,
- * but it should be safe for HILBERT_DIM = 2..8, HILBERT_ORDER = 8..32
- * - clean and optimize the code
- */
-
-#ifndef HILBERT_PREFIX
-# error Undefined HILBERT_PREFIX
-#endif
-
-#define P(x) HILBERT_PREFIX(x)
-
-/*
- * HILBERT_DIM is the number of dimensions in space through which the
- * Hilbert Curve passes.
- * Don't use this implementation with values for HILBERT_DIM of > 31!
- * Also, make sure you use a 32 bit compiler!
- */
-#ifndef HILBERT_DIM
-# define HILBERT_DIM 2
-#endif
-
-#ifndef HILBERT_TYPE
-# define HILBERT_TYPE u32
-#endif
-
-#ifndef HILBERT_ORDER
-# define HILBERT_ORDER (8 * sizeof(HILBERT_TYPE))
-#endif
-
-typedef HILBERT_TYPE P(t);
-
-/*
- * retained for historical reasons: the number of bits in an attribute value:
- * effectively the order of a curve
- */
-#define NUMBITS HILBERT_ORDER
-
-/*
- * the number of bits in a word used to store an hcode (or in an element of
- * an array that's used)
- */
-#define WORDBITS HILBERT_ORDER
-
-#ifdef HILBERT_WANT_ENCODE
-/*
- * given the coordinates of a point, it finds the sequence number of the point
- * on the Hilbert Curve
- */
-static void
-P(encode) (P(t) *dest, P(t) *src)
-{
- P(t) mask = (P(t))1 << WORDBITS - 1, element, temp1, temp2,
- A, W = 0, S, tS, T, tT, J, P = 0, xJ;
- uns i = NUMBITS * HILBERT_DIM - HILBERT_DIM, j;
-
- for (j = 0; j < HILBERT_DIM; j++)
- dest[j] = 0;
- for (j = A = 0; j < HILBERT_DIM; j++)
- if (src[j] & mask)
- A |= (1 << HILBERT_DIM - 1 - j);
-
- S = tS = A;
-
- P |= S & (1 << HILBERT_DIM - 1);
- for (j = 1; j < HILBERT_DIM; j++)
- if( S & (1 << HILBERT_DIM - 1 - j) ^ (P >> 1) & (1 << HILBERT_DIM - 1 - j))
- P |= (1 << HILBERT_DIM - 1 - j);
-
- /* add in HILBERT_DIM bits to hcode */
- element = i / WORDBITS;
- if (i % WORDBITS > WORDBITS - HILBERT_DIM)
- {
- dest[element] |= P << i % WORDBITS;
- dest[element + 1] |= P >> WORDBITS - i % WORDBITS;
- }
- else
- dest[element] |= P << i - element * WORDBITS;
-
- J = HILBERT_DIM;
- for (j = 1; j < HILBERT_DIM; j++)
- if ((P >> j & 1) == (P & 1))
- continue;
- else
- break;
- if (j != HILBERT_DIM)
- J -= j;
- xJ = J - 1;
-
- if (P < 3)
- T = 0;
- else
- if (P % 2)
- T = (P - 1) ^ (P - 1) / 2;
- else
- T = (P - 2) ^ (P - 2) / 2;
- tT = T;
-
- for (i -= HILBERT_DIM, mask >>= 1; (int)i >= 0; i -= HILBERT_DIM, mask >>= 1)
- {
- for (j = A = 0; j < HILBERT_DIM; j++)
- if (src[j] & mask)
- A |= (1 << HILBERT_DIM - 1 - j);
-
- W ^= tT;
- tS = A ^ W;
- if (xJ % HILBERT_DIM != 0)
- {
- temp1 = tS << xJ % HILBERT_DIM;
- temp2 = tS >> HILBERT_DIM - xJ % HILBERT_DIM;
- S = temp1 | temp2;
- S &= ((P(t))1 << HILBERT_DIM) - 1;
- }
- else
- S = tS;
-
- P = S & (1 << HILBERT_DIM - 1);
- for (j = 1; j < HILBERT_DIM; j++)
- if( S & (1 << HILBERT_DIM - 1 - j) ^ (P >> 1) & (1 << HILBERT_DIM - 1 - j))
- P |= (1 << HILBERT_DIM - 1 - j);
-
- /* add in HILBERT_DIM bits to hcode */
- element = i / WORDBITS;
- if (i % WORDBITS > WORDBITS - HILBERT_DIM)
- {
- dest[element] |= P << i % WORDBITS;
- dest[element + 1] |= P >> WORDBITS - i % WORDBITS;
- }
- else
- dest[element] |= P << i - element * WORDBITS;
-
- if (i > 0)
- {
- if (P < 3)
- T = 0;
- else
- if (P % 2)
- T = (P - 1) ^ (P - 1) / 2;
- else
- T = (P - 2) ^ (P - 2) / 2;
-
- if (xJ % HILBERT_DIM != 0)
- {
- temp1 = T >> xJ % HILBERT_DIM;
- temp2 = T << HILBERT_DIM - xJ % HILBERT_DIM;
- tT = temp1 | temp2;
- tT &= ((P(t))1 << HILBERT_DIM) - 1;
- }
- else
- tT = T;
-
- J = HILBERT_DIM;
- for (j = 1; j < HILBERT_DIM; j++)
- if ((P >> j & 1) == (P & 1))
- continue;
- else
- break;
- if (j != HILBERT_DIM)
- J -= j;
-
- xJ += J - 1;
- /* J %= HILBERT_DIM; */
- }
- }
- for (j = 0; j < HILBERT_DIM; j++)
- dest[j] &= ~(P(t))0 >> (8 * sizeof(P(t)) - WORDBITS);
-}
-#endif
-
-#ifdef HILBERT_WANT_DECODE
-/*
- * given the sequence number of a point, it finds the coordinates of the point
- * on the Hilbert Curve
- */
-static void
-P(decode) (P(t) *dest, P(t) *src)
-{
- P(t) mask = (P(t))1 << WORDBITS - 1, element, temp1, temp2,
- A, W = 0, S, tS, T, tT, J, P = 0, xJ;
- uns i = NUMBITS * HILBERT_DIM - HILBERT_DIM, j;
-
- for (j = 0; j < HILBERT_DIM; j++)
- dest[j] = 0;
-
- /*--- P ---*/
- element = i / WORDBITS;
- P = src[element];
- if (i % WORDBITS > WORDBITS - HILBERT_DIM)
- {
- temp1 = src[element + 1];
- P >>= i % WORDBITS;
- temp1 <<= WORDBITS - i % WORDBITS;
- P |= temp1;
- }
- else
- P >>= i % WORDBITS; /* P is a HILBERT_DIM bit hcode */
-
- /* the & masks out spurious highbit values */
- if (HILBERT_DIM < WORDBITS)
- P &= (1 << HILBERT_DIM) -1;
-
- /*--- xJ ---*/
- J = HILBERT_DIM;
- for (j = 1; j < HILBERT_DIM; j++)
- if ((P >> j & 1) == (P & 1))
- continue;
- else
- break;
- if (j != HILBERT_DIM)
- J -= j;
- xJ = J - 1;
-
- /*--- S, tS, A ---*/
- A = S = tS = P ^ P / 2;
-
-
- /*--- T ---*/
- if (P < 3)
- T = 0;
- else
- if (P % 2)
- T = (P - 1) ^ (P - 1) / 2;
- else
- T = (P - 2) ^ (P - 2) / 2;
-
- /*--- tT ---*/
- tT = T;
-
- /*--- distrib bits to coords ---*/
- for (j = HILBERT_DIM - 1; P > 0; P >>=1, j--)
- if (P & 1)
- dest[j] |= mask;
-
-
- for (i -= HILBERT_DIM, mask >>= 1; (int)i >= 0; i -= HILBERT_DIM, mask >>= 1)
- {
- /*--- P ---*/
- element = i / WORDBITS;
- P = src[element];
- if (i % WORDBITS > WORDBITS - HILBERT_DIM)
- {
- temp1 = src[element + 1];
- P >>= i % WORDBITS;
- temp1 <<= WORDBITS - i % WORDBITS;
- P |= temp1;
- }
- else
- P >>= i % WORDBITS; /* P is a HILBERT_DIM bit hcode */
-
- /* the & masks out spurious highbit values */
- if (HILBERT_DIM < WORDBITS)
- P &= (1 << HILBERT_DIM) -1;
-
- /*--- S ---*/
- S = P ^ P / 2;
-
- /*--- tS ---*/
- if (xJ % HILBERT_DIM != 0)
- {
- temp1 = S >> xJ % HILBERT_DIM;
- temp2 = S << HILBERT_DIM - xJ % HILBERT_DIM;
- tS = temp1 | temp2;
- tS &= ((P(t))1 << HILBERT_DIM) - 1;
- }
- else
- tS = S;
-
- /*--- W ---*/
- W ^= tT;
-
- /*--- A ---*/
- A = W ^ tS;
-
- /*--- distrib bits to coords ---*/
- for (j = HILBERT_DIM - 1; A > 0; A >>=1, j--)
- if (A & 1)
- dest[j] |= mask;
-
- if (i > 0)
- {
- /*--- T ---*/
- if (P < 3)
- T = 0;
- else
- if (P % 2)
- T = (P - 1) ^ (P - 1) / 2;
- else
- T = (P - 2) ^ (P - 2) / 2;
-
- /*--- tT ---*/
- if (xJ % HILBERT_DIM != 0)
- {
- temp1 = T >> xJ % HILBERT_DIM;
- temp2 = T << HILBERT_DIM - xJ % HILBERT_DIM;
- tT = temp1 | temp2;
- tT &= ((P(t))1 << HILBERT_DIM) - 1;
- }
- else
- tT = T;
-
- /*--- xJ ---*/
- J = HILBERT_DIM;
- for (j = 1; j < HILBERT_DIM; j++)
- if ((P >> j & 1) == (P & 1))
- continue;
- else
- break;
- if (j != HILBERT_DIM)
- J -= j;
- xJ += J - 1;
- }
- }
-}
-#endif
-
-#undef P
-#undef HILBERT_PREFIX
-#undef HILBERT_DIM
-#undef HILBERT_TYPE
-#undef HILBERT_ORDER
-#undef HILBERT_WANT_DECODE
-#undef HILBERT_WANT_ENCODE
-#undef NUMBITS
-#undef WORDBITS
{ "transormations", 0, 0, 't' },
{ NULL, 0, 0, 0 }
};
-
+
static uns verbose = 1;
static byte *file_name_1;
static byte *file_name_2;
static struct color background_color;
static uns transformations = IMAGE_DUP_TRANS_ALL;
-#define MSG(x...) do{ if (verbose) log(L_INFO, ##x); }while(0)
+#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0)
int
main(int argc, char **argv)
static uns display_base64;
static uns display_base224;
-#define MSG(x...) do{ if (verbose) log(L_INFO, ##x); }while(0)
+#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0)
#define TRY(x) do{ if (!(x)) exit(1); }while(0)
static void
mp_delete(pool);
}
+#ifdef CONFIG_UCW_THREADS
+
#define TEST_THREADS_COUNT 4
static void *
return NULL;
}
+#endif
+
static void
test_threads(void)
{
+#ifdef CONFIG_UCW_THREADS
pthread_t threads[TEST_THREADS_COUNT - 1];
pthread_attr_t attr;
if (pthread_attr_init(&attr) < 0 ||
for (uns i = 0; i < TEST_THREADS_COUNT - 1; i++)
if (pthread_join(threads[i], NULL) < 0)
die("Cannot join thread: %m");
+#else
+ msg(L_WARN, "Disabled CONFIG_UCW_THREADS, threaded tests skipped");
+#endif
}
int
color_make_rgb(color, (v >> 16) & 255, (v >> 8) & 255, v & 255);
}
-#define MSG(x...) do{ if (verbose) log(L_INFO, ##x); }while(0)
+#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0)
int
main(int argc, char **argv)
--- /dev/null
+# pkg-config metadata for libimages
+
+libdir=@LIBDIR@
+incdir=.
+
+Name: libimages
+Description: Sherlock image library
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -limages -lm -lpthread @LIBIMAGES_LIBS@
+Requires: @DEPS@
#undef LOCAL_DEBUG
#include "lib/lib.h"
-#include "lib/math.h"
#include "images/math.h"
#include "images/images.h"
#include "images/signature.h"
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
+#include "lib/lib.h"
#include "lib/fastbuf.h"
#include "lib/conf.h"
-#include "lib/math.h"
#include "images/images.h"
#include "images/math.h"
#include "images/error.h"
#include "images/signature.h"
#include <alloca.h>
+#include <math.h>
int
image_sig_init(struct image_context *ctx, struct image_sig_data *data, struct image *image)
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
+#include "lib/lib.h"
#include "lib/conf.h"
#include "lib/heap.h"
#include "images/images.h"
#undef LOCAL_DEBUG
-#include "sherlock/sherlock.h"
+#include "lib/lib.h"
#include "images/images.h"
#include "images/signature.h"
#include "images/math.h"
-# Makefile for the UCW Library (c) 1997--2006 Martin Mares <mj@ucw.cz>
+# Makefile for the UCW Library (c) 1997--2007 Martin Mares <mj@ucw.cz>
DIRS+=lib
+CONFIGS+=library
+LIBUCW=$(o)/lib/libucw.pc
ifdef CONFIG_UCW_DBTOOL
PROGS+=$(o)/lib/db-tool
LIBUCW_MODS= \
threads \
- alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt \
+ alloc alloc_str realloc bigalloc mempool mempool-str mempool-fmt eltpool \
mmap pagecache partmap hashfunc \
- lists slists simple-lists sorter bitsig \
+ lists slists simple-lists bitsig \
log log-file proctitle \
conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \
ipaccess \
profile \
fastbuf ff-binary ff-string ff-printf ff-utf8 \
- fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-atomic \
+ fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \
str_ctype str_upper str_lower unicode-utf8 stkstring \
wildmatch wordsplit ctmatch patimatch patmatch regex \
prime primetable random timer randomkey \
getopt
LIBUCW_INCLUDES= \
- lib.h config.h threads.h math.h \
+ lib.h config.h threads.h \
mempool.h pagecache.h \
- sorter.h sorter-globals.h arraysort.h \
- lists.h clists.h \
+ arraysort.h \
+ lists.h clists.h slists.h simple-lists.h \
unaligned.h prefetch.h \
bbuf.h gbuf.h bitarray.h bitsig.h \
hashfunc.h hashtable.h \
heap.h binheap.h binheap-node.h \
redblack.h \
+ binsearch.h \
+ bitops.h \
conf.h getopt.h ipaccess.h \
profile.h \
- fastbuf.h lfs.h ff-utf8.h \
+ fastbuf.h lfs.h ff-utf8.h ff-binary.h \
chartype.h unicode.h stkstring.h \
wildmatch.h patmatch.h \
db.h \
md5.h \
base64.h base224.h \
qache.h \
- kmp.h kmp-search.h binsearch.h
+ kmp.h kmp-search.h binsearch.h \
+ partmap.h
ifdef CONFIG_UCW_THREADS
# Some modules require threading
-LIBS+=-lpthread
-LIBUCW_MODS+=threads-conf workqueue
-LIBUCW_INCLUDES+=workqueue.h
+LIBUCW_MODS+=threads-conf workqueue asio fb-direct
+LIBUCW_INCLUDES+=workqueue.h semaphore.h asio.h
endif
ifdef CONFIG_OWN_REGEX
include $(s)/lib/getopt/Makefile
endif
-LIBUCW=$(o)/lib/libucw.$(LS)
+include $(s)/lib/sorter/Makefile
+
LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS))
$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
$(o)/lib/db-test: $(o)/lib/db-test.o $(LIBUCW)
$(o)/lib/db-tool: $(o)/lib/db-tool.o $(LIBUCW)
$(o)/lib/conf-test: $(o)/lib/conf-test.o $(LIBUCW)
-$(o)/lib/sort-test: $(o)/lib/sort-test.o $(LIBUCW)
$(o)/lib/lfs-test: $(o)/lib/lfs-test.o $(LIBUCW)
$(o)/lib/hash-test: $(o)/lib/hash-test.o $(LIBUCW)
$(o)/lib/str-test: $(o)/lib/str-test.o $(LIBUCW)
$(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW)
TESTS+=$(addprefix $(o)/lib/,regex.test unicode-utf8.test hash-test.test mempool.test stkstring.test \
- slists.test kmp-test.test bbuf.test getopt.test)
+ slists.test kmp-test.test bbuf.test getopt.test fastbuf.test eltpool.test)
$(o)/lib/regex.test: $(o)/lib/regex-t
$(o)/lib/unicode-utf8.test: $(o)/lib/unicode-utf8-t
$(o)/lib/kmp-test.test: $(o)/lib/kmp-test
$(o)/lib/bbuf.test: $(o)/lib/bbuf-t
$(o)/lib/getopt.test: $(o)/lib/getopt-t
+$(o)/lib/fastbuf.test: $(o)/lib/fb-file-t $(o)/lib/fb-grow-t $(o)/lib/fb-pool-t
+$(o)/lib/eltpool.test: $(o)/lib/eltpool-t
+
+ifdef CONFIG_UCW_THREADS
+TESTS+=$(addprefix $(o)/lib/,asio.test)
+$(o)/lib/asio.test: $(o)/lib/asio-t
+endif
-INCLUDES+=$(o)/lib/.include-stamp
-$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES))
- $(s)/build/install-includes $(s)/lib run/include/lib $(?F)
- $(s)/build/install-includes $(o)/lib run/include/lib autoconf.h
- touch $(o)/lib/.include-stamp
+API_LIBS+=libucw
+API_INCLUDES+=$(o)/lib/.include-stamp
+$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) obj/autoconf.h
+ $(Q)$(s)/build/install-includes $(<D) run/include/lib $(LIBUCW_INCLUDES)
+ $(Q)$(s)/build/install-includes obj run/include/lib autoconf.h
+ $(Q)touch $@
+run/lib/pkgconfig/libucw.pc: $(o)/lib/libucw.pc
ifdef CONFIG_UCW_PERL
include $(s)/lib/perl/Makefile
#define MOD(a) a %= BASE
uns
-update_adler32(uns adler, byte *buf, uns len)
+update_adler32(uns adler, const byte *buf, uns len)
{
uns s1 = adler & 0xffff;
uns s2 = (adler >> 16) & 0xffff;
#include <string.h>
-byte *
-xstrdup(byte *s)
+char *
+xstrdup(const char *s)
{
uns l = strlen(s) + 1;
return memcpy(xmalloc(l), s, l);
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/asio.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+static uns asio_num_users;
+static struct worker_pool asio_wpool;
+
+static void
+asio_init_unlocked(void)
+{
+ if (asio_num_users++)
+ return;
+
+ DBG("ASIO: INIT");
+ asio_wpool.num_threads = 1;
+ worker_pool_init(&asio_wpool);
+}
+
+static void
+asio_cleanup_unlocked(void)
+{
+ if (--asio_num_users)
+ return;
+
+ DBG("ASIO: CLEANUP");
+ worker_pool_cleanup(&asio_wpool);
+}
+
+void
+asio_init_queue(struct asio_queue *q)
+{
+ ucwlib_lock();
+ asio_init_unlocked();
+ ucwlib_unlock();
+
+ DBG("ASIO: New queue %p", q);
+ ASSERT(q->buffer_size);
+ q->allocated_requests = 0;
+ q->running_requests = 0;
+ q->running_writebacks = 0;
+ q->use_count = 0;
+ clist_init(&q->idle_list);
+ clist_init(&q->done_list);
+ work_queue_init(&asio_wpool, &q->queue);
+}
+
+void
+asio_cleanup_queue(struct asio_queue *q)
+{
+ DBG("ASIO: Removing queue %p", q);
+ ASSERT(!q->running_requests);
+ ASSERT(!q->running_writebacks);
+ ASSERT(!q->allocated_requests);
+ ASSERT(clist_empty(&q->done_list));
+
+ struct asio_request *r;
+ while (r = clist_head(&q->idle_list))
+ {
+ clist_remove(&r->work.n);
+ big_free(r->buffer, q->buffer_size);
+ xfree(r);
+ }
+
+ work_queue_cleanup(&q->queue);
+
+ ucwlib_lock();
+ asio_cleanup_unlocked();
+ ucwlib_unlock();
+}
+
+struct asio_request *
+asio_get(struct asio_queue *q)
+{
+ q->allocated_requests++;
+ struct asio_request *r = clist_head(&q->idle_list);
+ if (!r)
+ {
+ r = xmalloc_zero(sizeof(*r));
+ r->queue = q;
+ r->buffer = big_alloc(q->buffer_size);
+ DBG("ASIO: Got %p (new)", r);
+ }
+ else
+ {
+ clist_remove(&r->work.n);
+ DBG("ASIO: Got %p", r);
+ }
+ r->op = ASIO_FREE;
+ r->fd = -1;
+ r->len = 0;
+ r->status = -1;
+ r->returned_errno = -1;
+ r->submitted = 0;
+ return r;
+}
+
+static int
+asio_raw_wait(struct asio_queue *q)
+{
+ struct asio_request *r = (struct asio_request *) work_wait(&q->queue);
+ if (!r)
+ return 0;
+ r->submitted = 0;
+ q->running_requests--;
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ DBG("ASIO: Finished writeback %p", r);
+ if (r->status < 0)
+ die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno));
+ if (r->status != (int)r->len)
+ die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len);
+ q->running_writebacks--;
+ asio_put(r);
+ }
+ else
+ clist_add_tail(&q->done_list, &r->work.n);
+ return 1;
+}
+
+static void
+asio_handler(struct worker_thread *t UNUSED, struct work *w)
+{
+ struct asio_request *r = (struct asio_request *) w;
+
+ DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r,
+ (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len);
+ errno = 0;
+ switch (r->op)
+ {
+ case ASIO_READ:
+ r->status = read(r->fd, r->buffer, r->len);
+ break;
+ case ASIO_WRITE:
+ case ASIO_WRITE_BACK:
+ r->status = write(r->fd, r->buffer, r->len);
+ break;
+ default:
+ die("ASIO: Got unknown request type %d", r->op);
+ }
+ r->returned_errno = errno;
+ DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno);
+}
+
+void
+asio_submit(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Submitting %p on queue %p", r, q);
+ ASSERT(r->op != ASIO_FREE);
+ ASSERT(!r->submitted);
+ if (r->op == ASIO_WRITE_BACK)
+ {
+ while (q->running_writebacks >= q->max_writebacks)
+ {
+ DBG("ASIO: Waiting for free writebacks");
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+ }
+ q->running_writebacks++;
+ }
+ q->running_requests++;
+ r->submitted = 1;
+ r->work.go = asio_handler;
+ r->work.priority = 0;
+ work_submit(&q->queue, &r->work);
+}
+
+struct asio_request *
+asio_wait(struct asio_queue *q)
+{
+ struct asio_request *r;
+ while (!(r = clist_head(&q->done_list)))
+ {
+ DBG("ASIO: Waiting on queue %p", q);
+ if (!asio_raw_wait(q))
+ return NULL;
+ }
+ clist_remove(&r->work.n);
+ DBG("ASIO: Done %p", r);
+ return r;
+}
+
+void
+asio_put(struct asio_request *r)
+{
+ struct asio_queue *q = r->queue;
+ DBG("ASIO: Put %p", r);
+ ASSERT(!r->submitted);
+ ASSERT(q->allocated_requests);
+ clist_add_tail(&q->idle_list, &r->work.n);
+ q->allocated_requests--;
+}
+
+void
+asio_sync(struct asio_queue *q)
+{
+ DBG("ASIO: Syncing queue %p", q);
+ while (q->running_requests)
+ if (!asio_raw_wait(q))
+ ASSERT(0);
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct asio_queue q;
+ struct asio_request *r;
+
+ q.buffer_size = 4096;
+ q.max_writebacks = 2;
+ asio_init_queue(&q);
+
+#if 0
+
+ for (;;)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = q.buffer_size;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ if (r->status <= 0)
+ {
+ asio_put(r);
+ break;
+ }
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = r->status;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+#else
+
+ r = asio_get(&q);
+ r->op = ASIO_READ;
+ r->fd = 0;
+ r->len = 1;
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+ for (uns i=0; i<10; i++)
+ {
+ r = asio_get(&q);
+ r->op = ASIO_WRITE_BACK;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = 'A' + i;
+ asio_submit(r);
+ }
+ asio_sync(&q);
+
+ r = asio_get(&q);
+ r->op = ASIO_WRITE;
+ r->fd = 1;
+ r->len = 1;
+ r->buffer[0] = '\n';
+ asio_submit(r);
+ r = asio_wait(&q);
+ ASSERT(r);
+ asio_put(r);
+
+#endif
+
+ asio_cleanup_queue(&q);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Asynchronous I/O
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ASIO_H
+#define _UCW_ASIO_H
+
+#include "lib/workqueue.h"
+#include "lib/clists.h"
+
+/*
+ * This module takes care of scheduling and executing asynchronous I/O requests
+ * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf
+ * back-end, but you can use it explicitly, too.
+ *
+ * You can define several I/O queues, each for use by a single thread. Requests
+ * on a single queue are always processed in order of their submits, requests
+ * from different queues may be interleaved (although the current implementation
+ * does not do so). Normal read and write requests are returned to their queue
+ * when they are completed. Write-back requests are automatically freed when
+ * done, but the number of such requests in fly is limited in order to avoid
+ * consuming all memory, so a submit of a write-back request can block.
+ */
+
+struct asio_queue {
+ uns buffer_size; // How large buffers do we use [user-settable]
+ uns max_writebacks; // Maximum number of writeback requests active [user-settable]
+ uns allocated_requests;
+ uns running_requests; // Total number of running requests
+ uns running_writebacks; // How many of them are writebacks
+ clist idle_list; // Recycled requests waiting for get
+ clist done_list; // Finished requests
+ struct work_queue queue;
+ uns use_count; // For use by the caller
+};
+
+enum asio_op {
+ ASIO_FREE,
+ ASIO_READ,
+ ASIO_WRITE,
+ ASIO_WRITE_BACK, // Background write with no success notification
+};
+
+struct asio_request {
+ struct work work; // asio_requests are internally just work nodes
+ struct asio_queue *queue;
+ byte *buffer;
+ int fd;
+ enum asio_op op;
+ uns len;
+ int status;
+ int returned_errno;
+ int submitted;
+ void *user_data; // For use by the caller
+};
+
+void asio_init_queue(struct asio_queue *q); // Initialize a new queue
+void asio_cleanup_queue(struct asio_queue *q);
+struct asio_request *asio_get(struct asio_queue *q); // Get an empty request
+void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks)
+struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more
+void asio_put(struct asio_request *r); // Return a finished request for recycling
+void asio_sync(struct asio_queue *q); // Wait until all requests are finished
+
+#endif /* !_UCW_ASIO_H */
--- /dev/null
+# Tests for asynchronous I/O
+
+Run: echo y | ../obj/lib/asio-t
+Out: ABCDEFGHIJ
int main(void)
{
+ timestamp_t timer;
+
generate();
- init_timer();
+ init_timer(&timer);
qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp);
- printf("qsort: %d ms\n", get_timer());
+ printf("qsort: %d ms\n", get_timer(&timer));
check();
generate();
- init_timer();
+ init_timer(&timer);
as_sort(N);
- printf("asort: %d ms\n", get_timer());
+ printf("asort: %d ms\n", get_timer(&timer));
check();
return 0;
}
Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
Append("CWARNS_OFF" => "-Wno-pointer-sign");
Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400");
+} elsif ($gccver == 4002) {
+ Append("CWARNS" => "-Wundef -Wredundant-decls -Wno-pointer-sign -Wdisabled-optimization -Wno-missing-field-initializers");
+ Append("CWARNS_OFF" => "-Wno-pointer-sign");
+ Append("COPT" => "-finline-limit=5000 --param large-function-insns=5000 --param inline-unit-growth=200 --param large-function-growth=400 -fgnu89-inline");
} else {
Warn "Don't know anything about this GCC version, using default switches.\n";
}
# Option for lib/mempool.c
Set("POOL_IS_MMAP");
+# Guess optimal bit width of the radix-sorter
+if (Get("CPU_ARCH") eq "default" || Get("CPU_ARCH") =~ /^i[345]86$/) {
+ # This should be safe everywhere
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 10);
+} else {
+ # Use this on modern CPU's
+ Set("CONFIG_UCW_RADIX_SORTER_BITS" => 12);
+}
+
# If debugging memory allocations:
#LIBS+=-lefence
#CDEBUG+=-DDEBUG_DMALLOC
}
uns
-base224_encode(byte *dest, byte *src, uns len)
+base224_encode(byte *dest, const byte *src, uns len)
{
u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */
uns i=0; /* How many source bits do we have buffered */
}
uns
-base224_decode(byte *dest, byte *src, uns len)
+base224_decode(byte *dest, const byte *src, uns len)
{
u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */
uns i=0; /* How many bits do we have accumulated */
* of the GNU Lesser General Public License.
*/
-uns base224_encode(byte *dest, byte *src, uns len);
-uns base224_decode(byte *dest, byte *src, uns len);
+uns base224_encode(byte *dest, const byte *src, uns len);
+uns base224_decode(byte *dest, const byte *src, uns len);
/*
* Warning: when encoding, at least 4 bytes of extra space are needed.
#include <string.h>
-static byte base64_table[] =
+static const byte base64_table[] =
{ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
};
-static byte base64_pad = '=';
+static const byte base64_pad = '=';
uns
-base64_encode(byte *dest, byte *src, uns len)
+base64_encode(byte *dest, const byte *src, uns len)
{
- byte *current = src;
+ const byte *current = src;
uns i = 0;
while (len > 2) { /* keep going until we have less than 24 bits */
/* as above, but backwards. :) */
uns
-base64_decode(byte *dest, byte *src, uns len)
+base64_decode(byte *dest, const byte *src, uns len)
{
- byte *current = src;
+ const byte *current = src;
uns ch;
uns i = 0, j = 0;
static byte reverse_table[256];
* of the GNU Lesser General Public License.
*/
-uns base64_encode(byte *dest, byte *src, uns len);
-uns base64_decode(byte *dest, byte *src, uns len);
+uns base64_encode(byte *dest, const byte *src, uns len);
+uns base64_decode(byte *dest, const byte *src, uns len);
/*
* Use this macro to calculate buffer size.
#include <stdio.h>
char *
-bb_vprintf_at(bb_t *bb, uns ofs, char *fmt, va_list args)
+bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args)
{
bb_grow(bb, ofs + 1);
va_list args2;
}
char *
-bb_printf_at(bb_t *bb, uns ofs, char *fmt, ...)
+bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
}
char *
-bb_vprintf(bb_t *bb, char *fmt, va_list args)
+bb_vprintf(bb_t *bb, const char *fmt, va_list args)
{
return bb_vprintf_at(bb, 0, fmt, args);
}
char *
-bb_printf(bb_t *bb, char *fmt, ...)
+bb_printf(bb_t *bb, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
#define GBUF_PREFIX(x) bb_##x
#include "lib/gbuf.h"
-char *bb_vprintf(bb_t *bb, char *fmt, va_list args);
-char *bb_printf(bb_t *bb, char *fmt, ...);
-char *bb_vprintf_at(bb_t *bb, uns ofs, char *fmt, va_list args);
-char *bb_printf_at(bb_t *bb, uns ofs, char *fmt, ...);
+char *bb_vprintf(bb_t *bb, const char *fmt, va_list args);
+char *bb_printf(bb_t *bb, const char *fmt, ...);
+char *bb_vprintf_at(bb_t *bb, uns ofs, const char *fmt, va_list args);
+char *bb_printf_at(bb_t *bb, uns ofs, const char *fmt, ...);
#endif
# Tests for growing buffers
-Run: obj/lib/bbuf-t
+Run: ../obj/lib/bbuf-t
Out: <Hello, World!><Hello, World!>
/*
* UCW Library -- Allocation of Large Aligned Buffers
*
- * (c) 2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
* (c) 2007 Pavel Charvat <char@ucw.cz>
*
* This software may be freely distributed and used according to the terms
#include <sys/mman.h>
#include <string.h>
+#include <limits.h>
void *
-page_alloc(unsigned int len)
+page_alloc(u64 len)
{
+ if (len > SIZE_MAX)
+ die("page_alloc: Size %llu is too large for the current architecture", (long long) len);
ASSERT(!(len & (CPU_PAGE_SIZE-1)));
byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
if (p == (byte*) MAP_FAILED)
- die("Cannot mmap %d bytes of memory: %m", len);
+ die("Cannot mmap %llu bytes of memory: %m", (long long)len);
+ return p;
+}
+
+void *
+page_alloc_zero(u64 len)
+{
+ void *p = page_alloc(len);
+ bzero(p, len);
return p;
}
void
-page_free(void *start, unsigned int len)
+page_free(void *start, u64 len)
{
ASSERT(!(len & (CPU_PAGE_SIZE-1)));
ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
}
void *
-page_realloc(void *start, unsigned int old_len, unsigned int new_len)
+page_realloc(void *start, u64 old_len, u64 new_len)
{
void *p = page_alloc(new_len);
memcpy(p, start, MIN(old_len, new_len));
return p;
}
-static unsigned int
-big_round(unsigned int len)
+static u64
+big_round(u64 len)
{
- return ALIGN_TO(len, CPU_PAGE_SIZE);
+ return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
}
void *
-big_alloc(unsigned int len)
+big_alloc(u64 len)
{
- len = big_round(len);
+ u64 l = big_round(len);
+ if (l > SIZE_MAX - 2*CPU_PAGE_SIZE)
+ die("big_alloc: Size %llu is too large for the current architecture", (long long) len);
#ifdef CONFIG_DEBUG
- len += 2*CPU_PAGE_SIZE;
+ l += 2*CPU_PAGE_SIZE;
#endif
- byte *p = page_alloc(len);
+ byte *p = page_alloc(l);
#ifdef CONFIG_DEBUG
+ *(u64*)p = len;
mprotect(p, CPU_PAGE_SIZE, PROT_NONE);
- mprotect(p+len-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
+ mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE);
p += CPU_PAGE_SIZE;
#endif
return p;
}
+void *
+big_alloc_zero(u64 len)
+{
+ void *p = big_alloc(len);
+ bzero(p, big_round(len));
+ return p;
+}
+
void
-big_free(void *start, unsigned int len)
+big_free(void *start, u64 len)
{
byte *p = start;
- len = big_round(len);
+ u64 l = big_round(len);
#ifdef CONFIG_DEBUG
p -= CPU_PAGE_SIZE;
- len += 2*CPU_PAGE_SIZE;
+ mprotect(p, CPU_PAGE_SIZE, PROT_READ);
+ ASSERT(*(u64*)p == len);
+ l += 2*CPU_PAGE_SIZE;
#endif
- page_free(p, len);
+ page_free(p, l);
}
#ifdef TEST
# Tests for bitops modules
-Run: obj/lib/bit-ffs-t
+Run: ../obj/lib/bit-ffs-t
In: 1
2
3
30
31
-Run: obj/lib/bit-fls-t
+Run: ../obj/lib/bit-fls-t
In: 1
2
3
b->maxn = maxn;
b->max_m_mult = (0xffffffff / m) * m;
bzero(b->array, mbytes);
- log(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
+ msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn);
return b;
}
}
}
if (!was && b->n++ == b->maxn+1)
- log(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
+ msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!");
return was;
}
}
int
-careful_write(int fd, void *buf, int len)
+careful_write(int fd, const void *buf, int len)
{
- byte *pos = buf;
+ const byte *pos = buf;
while (len)
{
int l = write(fd, pos, len);
return mp_alloc_zero(cf_pool, size);
}
-byte *
-cf_strdup(byte *s)
+char *
+cf_strdup(const char *s)
{
return mp_strdup(cf_pool, s);
}
-byte *
-cf_printf(char *fmt, ...)
+char *
+cf_printf(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- byte *res = mp_vprintf(cf_pool, fmt, args);
+ char *res = mp_vprintf(cf_pool, fmt, args);
va_end(args);
return res;
}
case CT_DOUBLE: bprintf(fb, "%lg ", *(double*)ptr); break;
case CT_IP: bprintf(fb, "%08x ", *(uns*)ptr); break;
case CT_STRING:
- if (*(byte**)ptr)
- bprintf(fb, "'%s' ", *(byte**)ptr);
+ if (*(char**)ptr)
+ bprintf(fb, "'%s' ", *(char**)ptr);
else
bprintf(fb, "NULL ");
break;
- case CT_LOOKUP: bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : (byte*) "???"); break;
+ case CT_LOOKUP: bprintf(fb, "%s ", *(int*)ptr >= 0 ? u->lookup[ *(int*)ptr ] : "???"); break;
case CT_USER:
if (u->utype->dumper)
u->utype->dumper(fb, ptr);
static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr);
-static byte *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
+static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" };
static void
dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr)
/* Text file parser */
-static byte *name_parse_fb;
+static const char *name_parse_fb;
static struct fastbuf *parse_fb;
static uns line_num;
#define MAX_LINE 4096
-static byte line_buf[MAX_LINE];
-static byte *line = line_buf;
+static char line_buf[MAX_LINE];
+static char *line = line_buf;
#include "lib/bbuf.h"
static bb_t copy_buf;
static uns ends_by_brace; // the line is ended by "{"
static int
-get_line(byte **msg)
+get_line(char **msg)
{
int err = bgets_nodie(parse_fb, line_buf, MAX_LINE);
line_num++;
}
static void
-append(byte *start, byte *end)
+append(char *start, char *end)
{
uns len = end - start;
bb_grow(©_buf, copied + len + 1);
copy_buf.ptr[copied-1] = 0;
}
-static byte *
+static char *
get_word(uns is_command_name)
{
- byte *msg;
+ char *msg;
if (*line == '\'') {
line++;
while (1) {
- byte *start = line;
+ char *start = line;
while (*line && *line != '\'')
line++;
append(start, line);
break;
copy_buf.ptr[copied-1] = '\n';
if (!get_line(&msg))
- return msg ? : (byte*) "Unterminated apostrophe word at the end";
+ return msg ? : "Unterminated apostrophe word at the end";
}
line++;
line++;
uns start_copy = copied;
while (1) {
- byte *start = line;
+ char *start = line;
uns escape = 0;
while (*line) {
if (*line == '"' && !escape)
else // merge two lines
copied -= 2;
if (!get_line(&msg))
- return msg ? : (byte*) "Unterminated quoted word at the end";
+ return msg ? : "Unterminated quoted word at the end";
}
line++;
- byte *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
+ char *tmp = stk_str_unesc(copy_buf.ptr + start_copy);
uns l = strlen(tmp);
bb_grow(©_buf, start_copy + l + 1);
strcpy(copy_buf.ptr + start_copy, tmp);
} else {
// promised that *line is non-null and non-blank
- byte *start = line;
+ char *start = line;
while (*line && !Cblank(*line)
&& *line != '{' && *line != '}' && *line != ';'
&& (*line != '=' || !is_command_name))
return NULL;
}
-static byte *
-get_token(uns is_command_name, byte **msg)
+static char *
+get_token(uns is_command_name, char **err)
{
- *msg = NULL;
+ *err = NULL;
while (1) {
if (!*line || *line == '#') {
- if (!is_command_name || !get_line(msg))
+ if (!is_command_name || !get_line(err))
return NULL;
} else if (*line == ';') {
- *msg = get_word(0);
- if (!is_command_name || *msg)
+ *err = get_word(0);
+ if (!is_command_name || *err)
return NULL;
} else if (*line == '\\' && !line[1]) {
- if (!get_line(msg)) {
- if (!*msg)
- *msg = "Last line ends by a backslash";
+ if (!get_line(err)) {
+ if (!*err)
+ *err = "Last line ends by a backslash";
return NULL;
}
if (!*line || *line == '#')
- log(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : (byte*) "", line_num);
+ msg(L_WARN, "The line %s:%d following a backslash is empty", name_parse_fb ? : "", line_num);
} else {
split_grow(&word_buf, words+1);
uns start = copied;
word_buf.ptr[words++] = copied;
- *msg = get_word(is_command_name);
- return *msg ? NULL : copy_buf.ptr + start;
+ *err = get_word(is_command_name);
+ return *err ? NULL : copy_buf.ptr + start;
}
}
}
-static byte *
+static char *
split_command(void)
{
words = copied = ends_by_brace = 0;
- byte *msg, *start_word;
+ char *msg, *start_word;
if (!(start_word = get_token(1, &msg)))
return msg;
if (*start_word == '{') // only one opening brace
/* Parsing multiple files */
-static byte *
-parse_fastbuf(byte *name_fb, struct fastbuf *fb, uns depth)
+static char *
+parse_fastbuf(const char *name_fb, struct fastbuf *fb, uns depth)
{
- byte *msg;
+ char *err;
name_parse_fb = name_fb;
parse_fb = fb;
line_num = 0;
*line = 0;
while (1)
{
- msg = split_command();
- if (msg)
+ err = split_command();
+ if (err)
goto error;
if (!words)
return NULL;
- byte *name = copy_buf.ptr + word_buf.ptr[0];
- byte *pars[words-1];
+ char *name = copy_buf.ptr + word_buf.ptr[0];
+ char *pars[words-1];
for (uns i=1; i<words; i++)
pars[i-1] = copy_buf.ptr + word_buf.ptr[i];
if (!strcasecmp(name, "include"))
{
if (words != 2)
- msg = "Expecting one filename";
+ err = "Expecting one filename";
else if (depth > 8)
- msg = "Too many nested files";
+ err = "Too many nested files";
else if (*line && *line != '#') // because the contents of line_buf is not re-entrant and will be cleared
- msg = "The input command must be the last one on a line";
- if (msg)
+ err = "The input command must be the last one on a line";
+ if (err)
goto error;
struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14);
if (!new_fb) {
- msg = cf_printf("Cannot open file %s: %m", pars[0]);
+ err = cf_printf("Cannot open file %s: %m", pars[0]);
goto error;
}
uns ll = line_num;
- msg = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
+ err = parse_fastbuf(stk_strdup(pars[0]), new_fb, depth+1);
line_num = ll;
bclose(new_fb);
- if (msg)
+ if (err)
goto error;
parse_fb = fb;
continue;
}
enum cf_operation op;
- byte *c = strchr(name, ':');
+ char *c = strchr(name, ':');
if (!c)
op = strcmp(name, "}") ? OP_SET : OP_CLOSE;
else {
default: op = OP_SET; break;
}
if (strcasecmp(c, cf_op_names[op])) {
- msg = cf_printf("Unknown operation %s", c);
+ err = cf_printf("Unknown operation %s", c);
goto error;
}
}
if (ends_by_brace)
op |= OP_OPEN;
- msg = cf_interpret_line(name, op, words-1, pars);
- if (msg)
+ err = cf_interpret_line(name, op, words-1, pars);
+ if (err)
goto error;
}
error:
if (name_fb)
- log(L_ERROR, "File %s, line %d: %s", name_fb, line_num, msg);
+ msg(L_ERROR, "File %s, line %d: %s", name_fb, line_num, err);
else if (line_num == 1)
- log(L_ERROR, "Manual setting of configuration: %s", msg);
+ msg(L_ERROR, "Manual setting of configuration: %s", err);
else
- log(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, msg);
+ msg(L_ERROR, "Manual setting of configuration, line %d: %s", line_num, err);
return "included from here";
}
#ifndef DEFAULT_CONFIG
#define DEFAULT_CONFIG NULL
#endif
-byte *cf_def_file = DEFAULT_CONFIG;
+char *cf_def_file = DEFAULT_CONFIG;
+
+#ifndef ENV_VAR_CONFIG
+#define ENV_VAR_CONFIG NULL
+#endif
+char *cf_env_file = ENV_VAR_CONFIG;
static uns postpone_commit; // only for cf_getopt()
static uns everything_committed; // after the 1st load, this flag is set on
}
static int
-load_file(byte *file)
+load_file(const char *file)
{
cf_init_stack();
struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14);
if (!fb) {
- log(L_ERROR, "Cannot open %s: %m", file);
+ msg(L_ERROR, "Cannot open %s: %m", file);
return 1;
}
- byte *msg = parse_fastbuf(file, fb, 0);
+ char *err_msg = parse_fastbuf(file, fb, 0);
bclose(fb);
- int err = !!msg || done_stack();
+ int err = !!err_msg || done_stack();
if (!err)
cf_def_file = NULL;
return err;
}
static int
-load_string(byte *string)
+load_string(const char *string)
{
cf_init_stack();
struct fastbuf fb;
- fbbuf_init_read(&fb, string, strlen(string), 0);
- byte *msg = parse_fastbuf(NULL, &fb, 0);
+ fbbuf_init_read(&fb, (byte *)string, strlen(string), 0);
+ char *msg = parse_fastbuf(NULL, &fb, 0);
return !!msg || done_stack();
}
/* Safe loading and reloading */
int
-cf_reload(byte *file)
+cf_reload(const char *file)
{
cf_journal_swap();
struct cf_journal_item *oldj = cf_journal_new_transaction(1);
}
int
-cf_load(byte *file)
+cf_load(const char *file)
{
struct cf_journal_item *oldj = cf_journal_new_transaction(1);
int err = load_file(file);
}
int
-cf_set(byte *string)
+cf_set(const char *string)
{
struct cf_journal_item *oldj = cf_journal_new_transaction(0);
int err = load_string(string);
load_default(void)
{
if (cf_def_file)
- if (cf_load(cf_def_file))
- die("Cannot load default config %s", cf_def_file);
+ {
+ char *env;
+ if (cf_env_file && (env = getenv(cf_env_file)))
+ {
+ if (cf_load(env))
+ die("Cannot load config file %s", env);
+ }
+ else if (cf_load(cf_def_file))
+ die("Cannot load default config %s", cf_def_file);
+ }
}
static void
#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask
#define OP_2ND 0x400 // in the 2nd phase real data are entered
enum cf_operation;
-extern byte *cf_op_names[];
-extern byte *cf_type_names[];
+extern char *cf_op_names[];
+extern char *cf_type_names[];
uns cf_type_size(enum cf_type type, struct cf_user_type *utype);
-byte *cf_interpret_line(byte *name, enum cf_operation op, int number, byte **pars);
+char *cf_interpret_line(char *name, enum cf_operation op, int number, char **pars);
void cf_init_stack(void);
int cf_check_stack(void);
enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL };
extern struct cf_section cf_sections;
-struct cf_item *cf_find_subitem(struct cf_section *sec, byte *name);
+struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name);
int cf_commit_all(enum cf_commit_mode cm);
void cf_add_dirty(struct cf_section *sec, void *ptr);
#include <string.h>
#include <stdio.h>
-#define TRY(f) do { byte *_msg = f; if (_msg) return _msg; } while (0)
+#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0)
/* Register size of and parser for each basic type */
-static byte *
-cf_parse_string(byte *str, byte **ptr)
+static char *
+cf_parse_string(char *str, char **ptr)
{
*ptr = cf_strdup(str);
return NULL;
}
-typedef byte *cf_basic_parser(byte *str, void *ptr);
+typedef char *cf_basic_parser(char *str, void *ptr);
static struct {
uns size;
void *parser;
{ sizeof(u64), cf_parse_u64 },
{ sizeof(double), cf_parse_double },
{ sizeof(u32), cf_parse_ip },
- { sizeof(byte*), cf_parse_string },
+ { sizeof(char*), cf_parse_string },
{ sizeof(int), NULL }, // lookups are parsed extra
{ 0, NULL }, // user-defined types are parsed extra
};
return utype->size;
}
-static byte *
-cf_parse_lookup(byte *str, int *ptr, byte **t)
+static char *
+cf_parse_lookup(char *str, int *ptr, char **t)
{
- byte **n = t;
+ char **n = t;
uns total_len = 0;
while (*n && strcasecmp(*n, str)) {
total_len += strlen(*n) + 2;
*ptr = n - t;
return NULL;
}
- byte *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
+ char *err = cf_malloc(total_len + strlen(str) + 60), *c = err;
c += sprintf(err, "Invalid value %s, possible values are: ", str);
for (n=t; *n; n++)
c+= sprintf(c, "%s, ", *n);
return err;
}
-static byte *
-cf_parse_ary(uns number, byte **pars, void *ptr, enum cf_type type, union cf_union *u)
+static char *
+cf_parse_ary(uns number, char **pars, void *ptr, enum cf_type type, union cf_union *u)
{
for (uns i=0; i<number; i++)
{
- byte *msg;
+ char *msg;
uns size = cf_type_size(type, u->utype);
if (type < CT_LOOKUP)
msg = ((cf_basic_parser*) parsers[type].parser) (pars[i], ptr + i * size);
/* Interpreter */
#define T(x) #x,
-byte *cf_op_names[] = { CF_OPERATIONS };
+char *cf_op_names[] = { CF_OPERATIONS };
#undef T
-byte *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
+char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user" };
#define DARY_HDR_SIZE ALIGN_TO(sizeof(uns), CPU_STRUCT_ALIGN)
-static byte *
-interpret_set_dynamic(struct cf_item *item, int number, byte **pars, void **ptr)
+static char *
+interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr)
{
enum cf_type type = item->type;
cf_journal_block(ptr, sizeof(void*));
return cf_parse_ary(number, pars, *ptr, type, &item->u);
}
-static byte *
-interpret_add_dynamic(struct cf_item *item, int number, byte **pars, int *processed, void **ptr, enum cf_operation op)
+static char *
+interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op)
{
enum cf_type type = item->type;
void *old_p = *ptr;
return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]);
}
-static byte *interpret_set_item(struct cf_item *item, int number, byte **pars, int *processed, void *ptr, uns allow_dynamic);
+static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic);
-static byte *
-interpret_section(struct cf_section *sec, int number, byte **pars, int *processed, void *ptr, uns allow_dynamic)
+static char *
+interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
{
cf_add_dirty(sec, ptr);
*processed = 0;
for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
{
int taken;
- byte *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
+ char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls);
if (msg)
return cf_printf("Item %s: %s", ci->name, msg);
*processed += taken;
}
}
-static byte *
-interpret_add_list(struct cf_item *item, int number, byte **pars, int *processed, void *ptr, enum cf_operation op)
+static char *
+interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op)
{
if (op >= OP_REMOVE)
return cf_printf("You have to open a block for operation %s", cf_op_names[op]);
/* If the node contains any dynamic attribute at the end, we suppress
* auto-repetition here and pass the flag inside instead. */
index++;
- byte *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
+ char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC);
if (msg)
return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg);
*processed += taken;
return NULL;
}
-static byte *
-interpret_add_bitmap(struct cf_item *item, int number, byte **pars, int *processed, u32 *ptr, enum cf_operation op)
+static char *
+interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op)
{
if (op != OP_SET && op != OP_REMOVE)
return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]);
return NULL;
}
-static byte *
-interpret_set_item(struct cf_item *item, int number, byte **pars, int *processed, void *ptr, uns allow_dynamic)
+static char *
+interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uns allow_dynamic)
{
int taken;
switch (item->cls)
}
}
-static byte *
+static char *
interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op)
{
if (item->cls == CC_BITMAP) {
static uns zero = 0;
* (void**) ptr = (&zero) + 1;
} else if (item->cls == CC_STATIC && item->type == CT_STRING) {
- cf_journal_block(ptr, item->number * sizeof(byte*));
- bzero(ptr, item->number * sizeof(byte*));
+ cf_journal_block(ptr, item->number * sizeof(char*));
+ bzero(ptr, item->number * sizeof(char*));
} else
return "The item is not a list, dynamic array, bitmap, or string";
return NULL;
i1 += (uintptr_t) item->ptr;
i2 += (uintptr_t) item->ptr;
if (item->type == CT_STRING)
- return strcmp(* (byte**) i1, * (byte**) i2);
+ return strcmp(* (char**) i1, * (char**) i2);
else // all numeric types
return memcmp(i1, i2, cf_type_size(item->type, item->u.utype));
}
return NULL;
}
-static byte *
+static char *
record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask)
{
uns nr = sec->flags & SEC_FLAG_NUMBER;
} stack[MAX_STACK_SIZE];
static uns level;
-static byte *
+static char *
opening_brace(struct cf_item *item, void *ptr, enum cf_operation op)
{
if (level >= MAX_STACK_SIZE-1)
return NULL;
}
-static byte *
-closing_brace(struct item_stack *st, enum cf_operation op, int number, byte **pars)
+static char *
+closing_brace(struct item_stack *st, enum cf_operation op, int number, char **pars)
{
if (st->op == OP_CLOSE) // top-level
return "Unmatched } parenthesis";
}
static struct cf_item *
-find_item(struct cf_section *curr_sec, byte *name, byte **msg, void **ptr)
+find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr)
{
*msg = NULL;
if (name[0] == '^') // absolute name instead of relative
{
if (curr_sec != &cf_sections)
cf_add_dirty(curr_sec, *ptr);
- byte *c = strchr(name, '.');
+ char *c = strchr(name, '.');
if (c)
*c++ = 0;
struct cf_item *ci = cf_find_subitem(curr_sec, name);
}
}
-byte *
-cf_interpret_line(byte *name, enum cf_operation op, int number, byte **pars)
+char *
+cf_interpret_line(char *name, enum cf_operation op, int number, char **pars)
{
- byte *msg;
+ char *msg;
if ((op & OP_MASK) == OP_CLOSE)
return closing_brace(stack+level, op, number, pars);
void *ptr = stack[level].base_ptr;
return NULL;
}
-byte *
-cf_find_item(byte *name, struct cf_item *item)
+char *
+cf_find_item(const char *name, struct cf_item *item)
{
- byte *msg;
+ char *msg;
void *ptr = NULL;
struct cf_item *ci = find_item(&cf_sections, name, &msg, &ptr);
if (msg)
return NULL;
}
-byte *
-cf_write_item(struct cf_item *item, enum cf_operation op, int number, byte **pars)
+char *
+cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars)
{
- byte *msg;
+ char *msg;
int taken = 0;
switch (op) {
case OP_SET:
cf_check_stack(void)
{
if (level > 0) {
- log(L_ERROR, "Unterminated block");
+ msg(L_ERROR, "Unterminated block");
return 1;
}
return 0;
};
static const struct unit *
-lookup_unit(byte *value, byte *end, byte **msg)
+lookup_unit(const char *value, const char *end, char **msg)
{
if (end && *end) {
if (end == value || end[1] || *end >= '0' && *end <= '9')
*msg = "Invalid number";
else {
for (const struct unit *u=units; u->name; u++)
- if (u->name == *end)
+ if ((char)u->name == *end)
return u;
*msg = "Invalid unit";
}
static char cf_rngerr[] = "Number out of range";
-byte *
-cf_parse_int(byte *str, int *ptr)
+char *
+cf_parse_int(const char *str, int *ptr)
{
- byte *msg = NULL;
+ char *msg = NULL;
if (!*str)
msg = "Missing number";
else {
return msg;
}
-byte *
-cf_parse_u64(byte *str, u64 *ptr)
+char *
+cf_parse_u64(const char *str, u64 *ptr)
{
- byte *msg = NULL;
+ char *msg = NULL;
if (!*str)
msg = "Missing number";
else {
return msg;
}
-byte *
-cf_parse_double(byte *str, double *ptr)
+char *
+cf_parse_double(const char *str, double *ptr)
{
- byte *msg = NULL;
+ char *msg = NULL;
if (!*str)
msg = "Missing number";
else {
return msg;
}
-byte *
-cf_parse_ip(byte *p, u32 *varp)
+char *
+cf_parse_ip(const char *p, u32 *varp)
{
if (!*p)
return "Missing IP address";
struct cf_section cf_sections; // root section
struct cf_item *
-cf_find_subitem(struct cf_section *sec, byte *name)
+cf_find_subitem(struct cf_section *sec, const char *name)
{
struct cf_item *ci = sec->cfg;
for (; ci->cls; ci++)
}
void
-cf_declare_section(byte *name, struct cf_section *sec, uns allow_unknown)
+cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown)
{
if (!cf_sections.cfg)
{
}
void
-cf_init_section(byte *name, struct cf_section *sec, void *ptr, uns do_bzero)
+cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero)
{
if (do_bzero) {
ASSERT(sec->size);
}
}
if (sec->init) {
- byte *msg = sec->init(ptr);
+ char *msg = sec->init(ptr);
if (msg)
die("Cannot initialize section %s: %s", name, msg);
}
}
-static byte *
+static char *
commit_section(struct cf_section *sec, void *ptr, uns commit_all)
{
- byte *err;
+ char *err;
for (struct cf_item *ci=sec->cfg; ci->cls; ci++)
if (ci->cls == CC_SECTION) {
if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) {
- log(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
+ msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err);
return "commit of a subsection failed";
}
} else if (ci->cls == CC_LIST) {
uns idx = 0;
CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr))
if (idx++, err = commit_section(ci->u.sec, n, commit_all)) {
- log(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
+ msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err);
return "commit of a list failed";
}
}
struct sub_sect_1 {
cnode n;
- byte *name;
+ char *name;
time_t t;
- byte *level;
+ char *level;
int confidence[2];
double *list;
};
static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, DARY_ALLOC(double, 3, 1e4, -1e-4, 8) };
-static byte *
+static char *
init_sec_1(struct sub_sect_1 *s)
{
if (s == &sec1) // this is a static variable; skip clearing
return NULL;
}
-static byte *
+static char *
commit_sec_1(struct sub_sect_1 *s)
{
if (s->confidence[0] < 0 || s->confidence[0] > 10)
return NULL;
}
-static byte *
-time_parser(uns number, byte **pars, time_t *ptr)
+static char *
+time_parser(uns number, char **pars, time_t *ptr)
{
*ptr = number ? atoi(pars[0]) : time(NULL);
return NULL;
static uns nr1 = 15;
static int *nrs1 = DARY_ALLOC(int, 5, 5, 4, 3, 2, 1);
static int nrs2[5];
-static byte *str1 = "no worries";
-static byte **str2 = DARY_ALLOC(byte *, 2, "Alice", "Bob");
+static char *str1 = "no worries";
+static char **str2 = DARY_ALLOC(char *, 2, "Alice", "Bob");
static u64 u1 = 0xCafeBeefDeadC00ll;
static double d1 = -1.1;
static clist secs;
static u32 bitmap1 = 0xff;
static u32 bitmap2 = 3;
-static byte *
-parse_u16(byte *string, u16 *ptr)
+static char *
+parse_u16(char *string, u16 *ptr)
{
uns a;
- byte *msg = cf_parse_int(string, &a);
+ char *msg = cf_parse_int(string, &a);
if (msg)
return msg;
if (a >= (1<<16))
.dumper = (cf_dumper1*) dump_u16
};
-static byte *
+static char *
init_top(void *ptr UNUSED)
{
for (uns i=0; i<5; i++)
return NULL;
}
-static byte *
+static char *
commit_top(void *ptr UNUSED)
{
if (nr1 != 15)
return NULL;
}
-static byte *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
+static char *alphabet[] = { "alpha", "beta", "gamma", "delta", NULL };
static struct cf_section cf_top = {
CF_INIT(init_top),
CF_COMMIT(commit_top),
CF_LOOKUP_DYN("look", &look, alphabet, 1000),
CF_USER_ARY("numbers", numbers, &u16_type, 10),
CF_BITMAP_INT("bitmap1", &bitmap1),
- CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((byte*[]) {
+ CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((char*[]) {
"one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen",
"eighteen", "nineteen", "twenty", NULL // hidden joke here
";
static void NONRET
-usage(byte *msg, ...)
+usage(char *msg, ...)
{
va_list va;
va_start(va, msg);
};
struct fastbuf;
-typedef byte *cf_parser(uns number, byte **pars, void *ptr);
+typedef char *cf_parser(uns number, char **pars, void *ptr);
/* A parser function gets an array of (strdup'ed) strings and a pointer with
* the customized information (most likely the target address). It can store
* the parsed value anywhere in any way it likes, however it must first call
* cf_journal_block() on the overwritten memory block. It returns an error
* message or NULL if everything is all right. */
-typedef byte *cf_parser1(byte *string, void *ptr);
+typedef char *cf_parser1(char *string, void *ptr);
/* A parser function for user-defined types gets a string and a pointer to
* the destination variable. It must store the value within [ptr,ptr+size),
* where size is fixed for each type. It should not call cf_journal_block(). */
-typedef byte *cf_hook(void *ptr);
+typedef char *cf_hook(void *ptr);
/* An init- or commit-hook gets a pointer to the section or NULL if this
* is the global section. It returns an error message or NULL if everything
* is all right. The init-hook should fill in default values (needed for
* use cf_malloc() but normal xmalloc(). */
typedef void cf_dumper1(struct fastbuf *fb, void *ptr);
/* Dumps the contents of a variable of a user-defined type. */
-typedef byte *cf_copier(void *dest, void *src);
+typedef char *cf_copier(void *dest, void *src);
/* Similar to init-hook, but it copies attributes from another list node
* instead of setting the attributes to default values. You have to provide
* it if your node contains parsed values and/or sub-lists. */
struct cf_user_type {
uns size; // of the parsed attribute
- byte *name; // name of the type (for dumping)
+ char *name; // name of the type (for dumping)
cf_parser1 *parser; // how to parse it
cf_dumper1 *dumper; // how to dump the type
};
struct cf_section;
struct cf_item {
- byte *name; // case insensitive
+ const char *name; // case insensitive
int number; // length of an array or #parameters of a parser (negative means at most)
void *ptr; // pointer to a global variable or an offset in a section
union cf_union {
struct cf_section *sec; // declaration of a section or a list
cf_parser *par; // parser function
- byte **lookup; // NULL-terminated sequence of allowed strings for lookups
+ char **lookup; // NULL-terminated sequence of allowed strings for lookups
struct cf_user_type *utype; // specification of the user-defined type
} u;
enum cf_class cls:16; // attribute class
#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1)
#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c)
#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c)
-#define CF_STRING(n,p) CF_STATIC(n,p,STRING,byte*,1)
-#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,byte*,c)
-#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,byte*,c)
+#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1)
+#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c)
+#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c)
#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t }
#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t }
extern struct mempool *cf_pool;
void *cf_malloc(uns size);
void *cf_malloc_zero(uns size);
-byte *cf_strdup(byte *s);
-byte *cf_printf(char *fmt, ...) FORMAT_CHECK(printf,1,2);
+char *cf_strdup(const char *s);
+char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2);
/* Undo journal for error recovery: conf-journal.c */
extern uns cf_need_journal;
#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var))
/* Declaration: conf-section.c */
-void cf_declare_section(byte *name, struct cf_section *sec, uns allow_unknown);
-void cf_init_section(byte *name, struct cf_section *sec, void *ptr, uns do_bzero);
+void cf_declare_section(const char *name, struct cf_section *sec, uns allow_unknown);
+void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uns do_bzero);
/* Parsers for basic types: conf-parse.c */
-byte *cf_parse_int(byte *str, int *ptr);
-byte *cf_parse_u64(byte *str, u64 *ptr);
-byte *cf_parse_double(byte *str, double *ptr);
-byte *cf_parse_ip(byte *p, u32 *varp);
+char *cf_parse_int(const char *str, int *ptr);
+char *cf_parse_u64(const char *str, u64 *ptr);
+char *cf_parse_double(const char *str, double *ptr);
+char *cf_parse_ip(const char *p, u32 *varp);
#endif
/* Configuration switches */
-#include "lib/autoconf.h"
+#include "autoconf.h"
/* Tell libc we're going to use all extensions available */
typedef int64_t s64; /* exactly 64 bits, signed */
typedef unsigned int uns; /* at least 32 bits */
-typedef u32 sh_time_t; /* Timestamp */
+typedef u32 sh_time_t; /* seconds since UNIX epoch */
+typedef s64 timestamp_t; /* milliseconds since UNIX epoch */
#ifdef CONFIG_LARGE_FILES /* File positions */
typedef s64 sh_off_t;
#include "lib/chartype.h"
int
-match_ct_patt(byte *p, byte *t)
+match_ct_patt(const char *p, const char *t)
{
if (*p == '*' && !p[1]) /* "*" matches everything */
return 1;
uns ks, vs, vs2, perc, cnt;
char *ch;
int dont_delete = 0;
+ timestamp_t timer;
log_init("dbtest");
setvbuf(stdout, NULL, _IONBF, 0);
while (optind < argc)
{
char *o = argv[optind++];
- init_timer();
+ init_timer(&timer);
switch (*o)
{
case 'c':
help();
}
sdbm_sync(d);
- printf("%d ms\n", get_timer());
+ printf("%d ms\n", get_timer(&timer));
}
verb("CLOSE\n");
--- /dev/null
+# Configuration variables of the UCW library and their default values
+# (c) 2005--2007 Martin Mares <mj@ucw.cz>
+
+# Version of the whole package
+Set("SHERLOCK_VERSION" => "3.12");
+
+# Compile everything with debug information and ASSERT's
+UnSet("CONFIG_DEBUG");
+
+# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages)
+UnSet("CONFIG_EXACT_CPU");
+
+# Support files >2GB
+Set("CONFIG_LARGE_FILES");
+
+# Use shared libraries
+UnSet("CONFIG_SHARED");
+
+# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock
+# use its own regex library (a copy of the glibc one), because the default regex library
+# is likely to be crappy.
+Set("CONFIG_OWN_REGEX");
+
+# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy
+# of GNU libc's getopt. This should not be necessary on GNU libc.
+UnSet("CONFIG_OWN_GETOPT");
+
+# Install libraries and their API includes
+UnSet("CONFIG_INSTALL_API");
+
+# Build with support for multi-threaded programs
+Set("CONFIG_UCW_THREADS" => 1);
+
+# Include Perl modules
+Set("CONFIG_UCW_PERL" => 1);
+
+# Include Perl modules written in C
+UnSet("CONFIG_UCW_PERL_MODULES");
+
+# Include support utilities for shell scripts
+Set("CONFIG_UCW_SHELL_UTILS" => 1);
+
+# Default configuration file
+UnSet("DEFAULT_CONFIG");
+
+# Environment variable with configuration file
+UnSet("ENV_VAR_CONFIG");
+
+# Return success
+1;
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This allocator is optimized for intensive allocation and freeing of small
+ * blocks of identical sizes. System memory is allocated by multiples of the
+ * page size and it is returned back only when the whole eltpool is deleted.
+ *
+ * In the future, we can add returning of memory to the system and also cache
+ * coloring like in the SLAB allocator used in the Linux kernel.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/eltpool.h"
+
+struct eltpool *
+ep_new(uns elt_size, uns elts_per_chunk)
+{
+ struct eltpool *pool = xmalloc_zero(sizeof(*pool));
+ pool->elt_size = ALIGN_TO(MAX(elt_size, sizeof(struct eltpool_free)), CPU_STRUCT_ALIGN);
+ pool->chunk_size = CPU_PAGE_SIZE;
+ while (pool->elt_size * elts_per_chunk + sizeof(struct eltpool_chunk) > pool->chunk_size)
+ pool->chunk_size *= 2;
+ pool->elts_per_chunk = (pool->chunk_size - sizeof(struct eltpool_chunk)) / pool->elt_size;
+ DBG("ep_new(): got elt_size=%d, epc=%d; used chunk_size=%d, epc=%d", elt_size, elts_per_chunk, pool->chunk_size, pool->elts_per_chunk);
+ return pool;
+}
+
+void
+ep_delete(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch;
+ while (ch = pool->first_chunk)
+ {
+ pool->first_chunk = ch->next;
+ page_free(ch, pool->chunk_size);
+ }
+ xfree(pool);
+}
+
+void *
+ep_alloc_slow(struct eltpool *pool)
+{
+ struct eltpool_chunk *ch = page_alloc(pool->chunk_size);
+ void *p = (void *)(ch+1);
+ for (uns i=1; i<pool->elts_per_chunk; i++)
+ {
+ struct eltpool_free *f = p;
+ f->next = pool->first_free;
+ pool->first_free = f;
+ p += pool->elt_size;
+ }
+ ch->next = pool->first_chunk;
+ pool->first_chunk = ch;
+ return p;
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+#include "lib/clists.h"
+
+struct argh {
+ cnode n;
+ byte x[1];
+} PACKED;
+
+int main(void)
+{
+ struct eltpool *ep = ep_new(sizeof(struct argh), 64);
+ clist l;
+ clist_init(&l);
+ for (uns i=0; i<65536; i++)
+ {
+ struct argh *a = ep_alloc(ep);
+ if (i % 3)
+ clist_add_tail(&l, &a->n);
+ else
+ clist_add_head(&l, &a->n);
+ if (!(i % 5))
+ {
+ a = clist_head(&l);
+ clist_remove(&a->n);
+ ep_free(ep, a);
+ }
+ }
+ ep_delete(ep);
+ puts("OK");
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Fast Allocator for Fixed-Size Elements
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_ELTPOOL_H
+#define _UCW_ELTPOOL_H
+
+struct eltpool {
+ struct eltpool_chunk *first_chunk;
+ struct eltpool_free *first_free;
+ uns elt_size;
+ uns chunk_size;
+ uns elts_per_chunk;
+ uns num_allocated; // Just for debugging
+};
+
+struct eltpool_chunk {
+ struct eltpool_chunk *next;
+ /* Chunk data continue here */
+};
+
+struct eltpool_free {
+ struct eltpool_free *next;
+};
+
+struct eltpool *ep_new(uns elt_size, uns elts_per_chunk);
+void ep_delete(struct eltpool *pool);
+void *ep_alloc_slow(struct eltpool *pool);
+
+static inline void *
+ep_alloc(struct eltpool *pool)
+{
+ pool->num_allocated++;
+#ifdef CONFIG_FAKE_ELTPOOL
+ return xmalloc(pool->elt_size);
+#else
+ struct eltpool_free *elt;
+ if (elt = pool->first_free)
+ pool->first_free = elt->next;
+ else
+ elt = ep_alloc_slow(pool);
+ return elt;
+#endif
+}
+
+static inline void
+ep_free(struct eltpool *pool, void *p)
+{
+ pool->num_allocated--;
+#ifdef CONFIG_FAKE_ELTPOOL
+ (void) pool;
+ xfree(p);
+#else
+ struct eltpool_free *elt = p;
+ elt->next = pool->first_free;
+ pool->first_free = elt;
+#endif
+}
+
+#endif
--- /dev/null
+# Tests for eltpools
+
+Run: ../obj/lib/eltpool-t
+Out: OK
#include <errno.h>
int
-format_exit_status(byte *msg, int stat)
+format_exit_status(char *msg, int stat)
{
if (stat < 0)
sprintf(msg, "failed to fork (err=%d)", errno);
return total;
}
-void bwrite_slow(struct fastbuf *f, void *b, uns l)
+void bwrite_slow(struct fastbuf *f, const void *b, uns l)
{
while (l)
{
byte is_fastbuf[0]; /* Dummy field for checking of type casts */
byte *bptr, *bstop; /* Access pointers */
byte *buffer, *bufend; /* Start and end of the buffer */
- byte *name; /* File name for error messages */
+ char *name; /* File name for error messages */
sh_off_t pos; /* Position of bstop in the file */
int (*refill)(struct fastbuf *); /* Get a buffer with new data */
void (*spout)(struct fastbuf *); /* Write buffer data to the file */
int can_overwrite_buffer; /* Can the buffer be altered? (see discussion above) 0=never, 1=temporarily, 2=permanently */
};
-/* FastIO on standard files (specify buffer size 0 to enable mmaping) */
+/* FastIO on files with several configurable back-ends */
-struct fastbuf *bopen(byte *name, uns mode, uns buflen);
-struct fastbuf *bopen_try(byte *name, uns mode, uns buflen);
+enum fb_type { /* Which back-end you want to use */
+ FB_STD, /* Standard buffered I/O */
+ FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
+ FB_MMAP /* Memory mapped files */
+};
+
+struct fb_params {
+ enum fb_type type;
+ uns buffer_size; /* 0 for default size */
+ uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
+ uns read_ahead; /* FB_DIRECT options */
+ uns write_back;
+ struct asio_queue *asio;
+};
+
+struct cf_section;
+extern struct cf_section fbpar_cf;
+extern struct fb_params fbpar_def;
+
+struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); /* Use params==NULL for defaults */
+struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params);
+struct fastbuf *bopen_tmp_file(struct fb_params *params);
+struct fastbuf *bopen_fd(int fd, struct fb_params *params);
+
+/* FastIO on standard files (shortcuts for FB_STD) */
+
+struct fastbuf *bopen(const char *name, uns mode, uns buflen);
+struct fastbuf *bopen_try(const char *name, uns mode, uns buflen);
struct fastbuf *bopen_tmp(uns buflen);
struct fastbuf *bfdopen(int fd, uns buflen);
struct fastbuf *bfdopen_shared(int fd, uns buflen);
void bfilesync(struct fastbuf *b);
+/* Temporary files */
+
#define TEMP_FILE_NAME_LEN 256
-void temp_file_name(byte *name);
+void temp_file_name(char *name);
+void bfix_tmp_file(struct fastbuf *fb, const char *name);
-/* FastIO on in-memory streams */
+/* Internal functions of some file back-ends */
-struct fastbuf *fbmem_create(unsigned blocksize); /* Create stream and return its writing fastbuf */
-struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */
+struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
+struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
+
+extern uns fbdir_cheat;
+struct asio_queue;
+struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
-/* FastIO on memory mapped files */
+void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
-struct fastbuf *bopen_mm(byte *name, uns mode);
+/* FastIO on in-memory streams */
+
+struct fastbuf *fbmem_create(uns blocksize); /* Create stream and return its writing fastbuf */
+struct fastbuf *fbmem_clone_read(struct fastbuf *); /* Create reading fastbuf */
/* FastI on file descriptors with limit */
void fbgrow_reset(struct fastbuf *b); /* Reset stream and prepare for writing */
void fbgrow_rewind(struct fastbuf *b); /* Prepare for reading */
+/* FastO on memory pools */
+
+struct mempool;
+struct fbpool {
+ struct fastbuf fb;
+ struct mempool *mp;
+};
+
+void fbpool_init(struct fbpool *fb); /* Initialize a new fastbuf */
+void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
+ /* Start a new continuous block and prepare for writing (see mp_start()) */
+void *fbpool_end(struct fbpool *fb); /* Close the block and return its address (see mp_end()).
+ The length can be determined with mp_size(mp, ptr). */
+
/* FastO with atomic writes for multi-threaded programs */
struct fb_atomic {
};
#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
-struct fastbuf *fbatomic_open(byte *name, struct fastbuf *master, uns bufsize, int record_len);
+struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
void fbatomic_internal_write(struct fastbuf *b);
static inline void
/* Configuring stream parameters */
-int bconfig(struct fastbuf *f, uns type, int data);
+enum bconfig_type {
+ BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
+ BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
+};
-#define BCONFIG_IS_TEMP_FILE 0
+int bconfig(struct fastbuf *f, uns type, int data);
/* Universal functions working on all fastbuf's */
void bseek(struct fastbuf *f, sh_off_t pos, int whence);
void bsetpos(struct fastbuf *f, sh_off_t pos);
void brewind(struct fastbuf *f);
-sh_off_t bfilesize(struct fastbuf *f); // -1 if not seekable
+sh_off_t bfilesize(struct fastbuf *f); /* -1 if not seekable */
static inline sh_off_t btell(struct fastbuf *f)
{
return bread_slow(f, b, l, 1);
}
-void bwrite_slow(struct fastbuf *f, void *b, uns l);
-static inline void bwrite(struct fastbuf *f, void *b, uns l)
+void bwrite_slow(struct fastbuf *f, const void *b, uns l);
+static inline void bwrite(struct fastbuf *f, const void *b, uns l)
{
if (bavailw(f) >= l)
{
bwrite_slow(f, b, l);
}
-byte *bgets(struct fastbuf *f, byte *b, uns l); /* Non-std */
-int bgets_nodie(struct fastbuf *f, byte *b, uns l);
-byte *bgets0(struct fastbuf *f, byte *b, uns l);
+/*
+ * Functions for reading of strings:
+ *
+ * bgets() reads a line, strip the trailing '\n' and return a pointer
+ * to the terminating 0 or NULL on EOF. Dies if the line is too long.
+ * bgets0() does the same for 0-terminated strings.
+ * bgets_nodie() a variant of bgets() which returns either the length of the
+ * string (excluding the terminator) or -1 if the line does not fit
+ * in the buffer. In such cases, it returns after reading exactly `l'
+ * bytes of input.
+ * bgets_bb() a variant of bgets() which allocates the string in a growing buffer
+ * bgets_mp() the same, but in a mempool
+ * bgets_stk() the same, but on the stack by alloca()
+ */
+
+char *bgets(struct fastbuf *f, char *b, uns l);
+char *bgets0(struct fastbuf *f, char *b, uns l);
+int bgets_nodie(struct fastbuf *f, char *b, uns l);
struct mempool;
struct bb_t;
uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
-byte *bgets_mp(struct fastbuf *f, struct mempool *mp);
+char *bgets_mp(struct fastbuf *f, struct mempool *mp);
struct bgets_stk_struct {
struct fastbuf *f;
#define bgets_stk(fb) ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
static inline void
-bputs(struct fastbuf *f, byte *b)
+bputs(struct fastbuf *f, const char *b)
{
bwrite(f, b, strlen(b));
}
static inline void
-bputs0(struct fastbuf *f, byte *b)
+bputs0(struct fastbuf *f, const char *b)
{
bwrite(f, b, strlen(b)+1);
}
static inline void
-bputsn(struct fastbuf *f, byte *b)
+bputsn(struct fastbuf *f, const char *b)
{
bputs(f, b);
bputc(f, '\n');
/* Formatted output */
-int bprintf(struct fastbuf *b, char *msg, ...) FORMAT_CHECK(printf,2,3);
-int vbprintf(struct fastbuf *b, char *msg, va_list args);
+int bprintf(struct fastbuf *b, const char *msg, ...) FORMAT_CHECK(printf,2,3);
+int vbprintf(struct fastbuf *b, const char *msg, va_list args);
#endif
--- /dev/null
+# Tests for fastbufs
+
+Run: ../obj/lib/fb-file-t
+Out: 112
+ <hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello><hello>
+ 112 116
+
+Run: ../obj/lib/fb-grow-t
+Out: <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+ <10><10><0>1234512345<10><9>5<10>
+
+Run: ../obj/lib/fb-pool-t
}
struct fastbuf *
-fbatomic_open(byte *name, struct fastbuf *master, uns bufsize, int record_len)
+fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len)
{
struct fb_atomic *F = xmalloc_zero(sizeof(*F));
struct fastbuf *f = &F->fb;
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on O_DIRECT Files
+ *
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a fastbuf backend for fast streaming I/O using O_DIRECT and
+ * the asynchronous I/O module. It's designed for use on large files
+ * which don't fit in the disk cache.
+ *
+ * CAVEATS:
+ *
+ * - All operations with a single fbdirect handle must be done
+ * within a single thread, unless you provide a custom I/O queue
+ * and take care of locking.
+ *
+ * FIXME: what if the OS doesn't support O_DIRECT?
+ * FIXME: unaligned seeks and partial writes?
+ * FIXME: append to unaligned file
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/asio.h"
+#include "lib/conf.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+uns fbdir_cheat;
+
+static struct cf_section fbdir_cf = {
+ CF_ITEMS {
+ CF_UNS("Cheat", &fbdir_cheat),
+ CF_END
+ }
+};
+
+#define FBDIR_ALIGN 512
+
+enum fbdir_mode { // Current operating mode
+ M_NULL,
+ M_READ,
+ M_WRITE
+};
+
+struct fb_direct {
+ struct fastbuf fb;
+ int fd; // File descriptor
+ int is_temp_file;
+ struct asio_queue *io_queue; // I/O queue to use
+ struct asio_queue *user_queue; // If io_queue was supplied by the user
+ struct asio_request *pending_read;
+ struct asio_request *done_read;
+ struct asio_request *active_buffer;
+ enum fbdir_mode mode;
+ byte name[0];
+};
+#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
+
+static void CONSTRUCTOR
+fbdir_global_init(void)
+{
+ cf_declare_section("FBDirect", &fbdir_cf, 0);
+}
+
+static void
+fbdir_read_sync(struct fb_direct *F)
+{
+ while (F->pending_read)
+ {
+ struct asio_request *r = asio_wait(F->io_queue);
+ ASSERT(r);
+ struct fb_direct *G = r->user_data;
+ ASSERT(G);
+ ASSERT(G->pending_read == r && !G->done_read);
+ G->pending_read = NULL;
+ G->done_read = r;
+ }
+}
+
+static void
+fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
+{
+ if (F->mode == mode)
+ return;
+ DBG("FB-DIRECT: Switching mode to %d", mode);
+ switch (F->mode)
+ {
+ case M_NULL:
+ break;
+ case M_READ:
+ fbdir_read_sync(F); // Wait for read-ahead requests to finish
+ if (F->done_read) // Return read-ahead requests if any
+ {
+ asio_put(F->done_read);
+ F->done_read = NULL;
+ }
+ break;
+ case M_WRITE:
+ asio_sync(F->io_queue); // Wait for pending writebacks
+ break;
+ }
+ if (F->active_buffer)
+ {
+ asio_put(F->active_buffer);
+ F->active_buffer = NULL;
+ }
+ F->mode = mode;
+}
+
+static void
+fbdir_submit_read(struct fb_direct *F)
+{
+ struct asio_request *r = asio_get(F->io_queue);
+ r->fd = F->fd;
+ r->op = ASIO_READ;
+ r->len = F->io_queue->buffer_size;
+ r->user_data = F;
+ asio_submit(r);
+ F->pending_read = r;
+}
+
+static int
+fbdir_refill(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Refill");
+
+ if (!F->done_read)
+ {
+ if (!F->pending_read)
+ {
+ fbdir_change_mode(F, M_READ);
+ fbdir_submit_read(F);
+ }
+ fbdir_read_sync(F);
+ ASSERT(F->done_read);
+ }
+
+ struct asio_request *r = F->done_read;
+ F->done_read = NULL;
+ if (F->active_buffer)
+ asio_put(F->active_buffer);
+ F->active_buffer = r;
+ if (!r->status)
+ return 0;
+ if (r->status < 0)
+ die("Error reading %s: %s", f->name, strerror(r->returned_errno));
+ f->bptr = f->buffer = r->buffer;
+ f->bstop = f->bufend = f->buffer + r->status;
+ f->pos += r->status;
+
+ fbdir_submit_read(F); // Read-ahead the next block
+
+ return r->status;
+}
+
+static void
+fbdir_spout(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+ struct asio_request *r;
+
+ DBG("FB-DIRECT: Spout");
+
+ fbdir_change_mode(F, M_WRITE);
+ r = F->active_buffer;
+ if (r && f->bptr > f->bstop)
+ {
+ r->op = ASIO_WRITE_BACK;
+ r->fd = F->fd;
+ r->len = f->bptr - f->bstop;
+ ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
+ f->pos += r->len;
+ if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes
+ {
+ r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
+ asio_submit(r);
+ asio_sync(F->io_queue);
+ DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos);
+ if (sh_ftruncate(F->fd, f->pos) < 0)
+ die("Error truncating %s: %m", f->name);
+ }
+ else
+ asio_submit(r);
+ r = NULL;
+ }
+ if (!r)
+ r = asio_get(F->io_queue);
+ f->bstop = f->bptr = f->buffer = r->buffer;
+ f->bufend = f->buffer + F->io_queue->buffer_size;
+ F->active_buffer = r;
+}
+
+static int
+fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence);
+
+ if (whence == SEEK_SET && pos == f->pos)
+ return 1;
+
+ fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish
+ sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence);
+ if (l < 0)
+ return 0;
+ f->pos = l;
+ return 1;
+}
+
+static struct asio_queue *
+fbdir_get_io_queue(uns buffer_size, uns write_back)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ if (!q)
+ {
+ q = xmalloc_zero(sizeof(struct asio_queue));
+ q->buffer_size = buffer_size;
+ q->max_writebacks = write_back;
+ asio_init_queue(q);
+ ctx->io_queue = q;
+ }
+ q->use_count++;
+ DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
+ return q;
+}
+
+static void
+fbdir_put_io_queue(void)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ ASSERT(q);
+ DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
+ if (!--q->use_count)
+ {
+ asio_cleanup_queue(q);
+ xfree(q);
+ ctx->io_queue = NULL;
+ }
+}
+
+static void
+fbdir_close(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Close");
+
+ fbdir_change_mode(F, M_NULL);
+ if (!F->user_queue)
+ fbdir_put_io_queue();
+
+ bclose_file_helper(f, F->fd, F->is_temp_file);
+ xfree(f);
+}
+
+static int
+fbdir_config(struct fastbuf *f, uns item, int value)
+{
+ int orig;
+
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_DIRECT(f)->is_temp_file;
+ FB_DIRECT(f)->is_temp_file = value;
+ return orig;
+ default:
+ return -1;
+ }
+}
+
+struct fastbuf *
+fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ DBG("FB-DIRECT: Open");
+ bzero(F, sizeof(*F));
+ f->name = F->name;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ if (q)
+ F->io_queue = F->user_queue = q;
+ else
+ F->io_queue = fbdir_get_io_queue(buffer_size, write_back);
+ f->refill = fbdir_refill;
+ f->spout = fbdir_spout;
+ f->seek = fbdir_seek;
+ f->close = fbdir_close;
+ f->config = fbdir_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+ struct fb_params par = { .type = FB_DIRECT };
+ struct fastbuf *f, *t;
+
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+ f = (optind < argc) ? bopen_file(argv[optind++], O_RDONLY, &par) : bopen_fd(0, &par);
+ t = (optind < argc) ? bopen_file(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, &par) : bopen_fd(1, &par);
+
+ bbcopy(f, t, ~0U);
+ ASSERT(btell(f) == btell(t));
+
+#if 0 // This triggers unaligned write
+ bflush(t);
+ bputc(t, '\n');
+#endif
+
+ brewind(t);
+ bgetc(t);
+ ASSERT(btell(t) == 1);
+
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
/*
* UCW Library -- Fast Buffered I/O on Files
*
- * (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
struct fb_file {
struct fastbuf fb;
int fd; /* File descriptor */
- int is_temp_file; /* 0=normal file, 1=temporary file, delete on close, -1=shared FD */
+ int is_temp_file;
+ int keep_back_buf; /* Optimize for backwards reading */
+ sh_off_t wpos; /* Real file position */
+ uns wlen; /* Window size */
};
#define FB_FILE(f) ((struct fb_file *)(f)->is_fastbuf)
#define FB_BUFFER(f) (byte *)(FB_FILE(f) + 1)
static int
bfd_refill(struct fastbuf *f)
{
- f->bptr = f->buffer = FB_BUFFER(f);
- int l = read(FB_FILE(f)->fd, f->buffer, f->bufend-f->buffer);
- if (l < 0)
- die("Error reading %s: %m", f->name);
- f->bstop = f->buffer + l;
- f->pos += l;
- return l;
+ struct fb_file *F = FB_FILE(f);
+ byte *read_ptr = (f->buffer = FB_BUFFER(f));
+ uns blen = f->bufend - f->buffer, back = F->keep_back_buf ? blen >> 2 : 0, read_len = blen;
+ /* Forward or no seek */
+ if (F->wpos <= f->pos)
+ {
+ sh_off_t diff = f->pos - F->wpos;
+ /* Formula for long forward seeks (prefer lseek()) */
+ if (diff > ((sh_off_t)blen << 2))
+ {
+long_seek:
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ goto seek;
+ }
+ /* Short forward seek (prefer read() to skip data )*/
+ else if ((uns)diff >= back)
+ {
+ uns skip = diff - back;
+ F->wpos += skip;
+ while (skip)
+ {
+ int l = read(F->fd, f->buffer, MIN(skip, blen));
+ if (unlikely(l <= 0))
+ if (l < 0)
+ die("Error reading %s: %m", f->name);
+ else
+ {
+ F->wpos -= skip;
+ goto eof;
+ }
+ skip -= l;
+ }
+ }
+ /* Reuse part of the previous window and append new data (also F->wpos == f->pos) */
+ else
+ {
+ uns keep = back - (uns)diff;
+ if (keep >= F->wlen)
+ back = diff + (keep = F->wlen);
+ else
+ memmove(f->buffer, f->buffer + F->wlen - keep, keep);
+ read_len -= keep;
+ read_ptr += keep;
+ }
+ f->bptr = f->buffer + back;
+ f->bstop = f->buffer + blen;
+ }
+ /* Backwards seek */
+ else
+ {
+ sh_off_t diff = F->wpos - f->pos;
+ /* Formula for long backwards seeks (keep smaller backbuffer than for shorter seeks ) */
+ if (diff > ((sh_off_t)blen << 1))
+ {
+ if ((sh_off_t)back > f->pos)
+ back = f->pos;
+ goto long_seek;
+ }
+ /* Seek into previous window (do nothing... for example brewind) */
+ else if ((uns)diff <= F->wlen)
+ {
+ f->bstop = f->buffer + F->wlen;
+ f->bptr = f->bstop - diff;
+ f->pos = F->wpos;
+ return 1;
+ }
+ back *= 3;
+ if ((sh_off_t)back > f->pos)
+ back = f->pos;
+ f->bptr = f->buffer + back;
+ read_len = blen;
+ f->bstop = f->buffer + read_len;
+ /* Reuse part of previous window */
+ if (F->wlen && read_len <= back + diff && read_len > back + diff - F->wlen)
+ {
+ uns keep = read_len + F->wlen - back - diff;
+ memmove(f->buffer + read_len - keep, f->buffer, keep);
+ }
+seek:
+ /* Do lseek() */
+ F->wpos = f->pos + (f->buffer - f->bptr);
+ if (sh_seek(F->fd, F->wpos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+ }
+ /* Read (part of) buffer */
+ do
+ {
+ int l = read(F->fd, read_ptr, read_len);
+ if (unlikely(l < 0))
+ die("Error reading %s: %m", f->name);
+ if (!l)
+ if (unlikely(read_ptr < f->bptr))
+ goto eof;
+ else
+ break; /* Incomplete read because of EOF */
+ read_ptr += l;
+ read_len -= l;
+ F->wpos += l;
+ }
+ while (read_ptr <= f->bptr);
+ if (read_len)
+ f->bstop = read_ptr;
+ f->pos += f->bstop - f->bptr;
+ F->wlen = f->bstop - f->buffer;
+ return f->bstop - f->bptr;
+eof:
+ /* Seeked behind EOF */
+ f->bptr = f->bstop = f->buffer;
+ F->wlen = 0;
+ return 0;
}
static void
bfd_spout(struct fastbuf *f)
{
+ /* Do delayed lseek() if needed */
+ if (FB_FILE(f)->wpos != f->pos && sh_seek(FB_FILE(f)->fd, f->pos, SEEK_SET) < 0)
+ die("Error seeking %s: %m", f->name);
+
int l = f->bptr - f->buffer;
byte *c = f->buffer;
- f->pos += l;
+ /* Write the buffer */
+ FB_FILE(f)->wpos = (f->pos += l);
+ FB_FILE(f)->wlen = 0;
while (l)
{
int z = write(FB_FILE(f)->fd, c, l);
static int
bfd_seek(struct fastbuf *f, sh_off_t pos, int whence)
{
- sh_off_t l = sh_seek(FB_FILE(f)->fd, pos, whence);
- if (l < 0)
- return 0;
- f->pos = l;
- return 1;
+ /* Delay the seek for the next refill() or spout() call (if whence != SEEK_END). */
+ sh_off_t l;
+ switch (whence)
+ {
+ case SEEK_SET:
+ f->pos = pos;
+ return 1;
+ case SEEK_CUR:
+ l = f->pos + pos;
+ if ((pos > 0) ^ (l > f->pos))
+ return 0;
+ f->pos = l;
+ return 1;
+ case SEEK_END:
+ l = sh_seek(FB_FILE(f)->fd, pos, SEEK_END);
+ if (l < 0)
+ return 0;
+ FB_FILE(f)->wpos = f->pos = l;
+ FB_FILE(f)->wlen = 0;
+ return 1;
+ default:
+ ASSERT(0);
+ }
}
static void
bfd_close(struct fastbuf *f)
{
- switch (FB_FILE(f)->is_temp_file)
- {
- case 1:
- if (unlink(f->name) < 0)
- log(L_ERROR, "unlink(%s): %m", f->name);
- case 0:
- close(FB_FILE(f)->fd);
- }
+ bclose_file_helper(f, FB_FILE(f)->fd, FB_FILE(f)->is_temp_file);
xfree(f);
}
static int
bfd_config(struct fastbuf *f, uns item, int value)
{
+ int orig;
+
switch (item)
{
- case BCONFIG_IS_TEMP_FILE:
- FB_FILE(f)->is_temp_file = value;
- return 0;
- default:
- return -1;
+ case BCONFIG_IS_TEMP_FILE:
+ orig = FB_FILE(f)->is_temp_file;
+ FB_FILE(f)->is_temp_file = value;
+ return orig;
+ case BCONFIG_KEEP_BACK_BUF:
+ orig = FB_FILE(f)->keep_back_buf;
+ FB_FILE(f)->keep_back_buf = value;
+ return orig;
+ default:
+ return -1;
}
}
-static struct fastbuf *
-bfdopen_internal(int fd, uns buflen, byte *name)
+struct fastbuf *
+bfdopen_internal(int fd, const char *name, uns buflen)
{
+ ASSERT(buflen);
int namelen = strlen(name) + 1;
- struct fb_file *F = xmalloc(sizeof(struct fb_file) + buflen + namelen);
+ struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen);
struct fastbuf *f = &F->fb;
bzero(F, sizeof(*F));
return f;
}
-struct fastbuf *
-bopen_try(byte *name, uns mode, uns buflen)
-{
- int fd = sh_open(name, mode, 0666);
- if (fd < 0)
- return NULL;
- struct fastbuf *b = bfdopen_internal(fd, buflen, name);
- if (mode & O_APPEND)
- bfd_seek(b, 0, SEEK_END);
- return b;
-}
-
-struct fastbuf *
-bopen(byte *name, uns mode, uns buflen)
-{
- if (!buflen)
- return bopen_mm(name, mode);
- struct fastbuf *b = bopen_try(name, mode, buflen);
- if (!b)
- die("Unable to %s file %s: %m",
- (mode & O_CREAT) ? "create" : "open", name);
- return b;
-}
-
-struct fastbuf *
-bfdopen(int fd, uns buflen)
-{
- byte x[32];
-
- sprintf(x, "fd%d", fd);
- return bfdopen_internal(fd, buflen, x);
-}
-
-struct fastbuf *
-bfdopen_shared(int fd, uns buflen)
-{
- struct fastbuf *f = bfdopen(fd, buflen);
- FB_FILE(f)->is_temp_file = -1;
- return f;
-}
-
void
bfilesync(struct fastbuf *b)
{
bflush(b);
if (fsync(FB_FILE(b)->fd) < 0)
- log(L_ERROR, "fsync(%s) failed: %m", b->name);
+ msg(L_ERROR, "fsync(%s) failed: %m", b->name);
}
#ifdef TEST
-int main(int argc UNUSED, char **argv UNUSED)
+int main(void)
{
struct fastbuf *f, *t;
-
- f = bopen("/etc/profile", O_RDONLY, 16);
- t = bfdopen(1, 13);
- bbcopy(f, t, 100);
- printf("%d %d\n", (int)btell(f), (int)btell(t));
+ f = bopen_tmp(16);
+ t = bfdopen_shared(1, 13);
+ for (uns i = 0; i < 16; i++)
+ bwrite(f, "<hello>", 7);
+ bprintf(t, "%d\n", (int)btell(f));
+ brewind(f);
+ bbcopy(f, t, ~0U);
+ bprintf(t, "\n%d %d\n", (int)btell(f), (int)btell(t));
bclose(f);
bclose(t);
return 0;
int main(void)
{
struct fastbuf *f;
- int t;
+ uns t;
f = fbgrow_create(3);
for (uns i=0; i<5; i++)
{
- fbgrow_write(f);
+ fbgrow_reset(f);
bwrite(f, "12345", 5);
bwrite(f, "12345", 5);
printf("<%d>", (int)btell(f));
printf("<%d>", (int)btell(f));
fbgrow_rewind(f);
printf("<%d>", (int)btell(f));
- while ((t = bgetc(f)) >= 0)
+ while ((t = bgetc(f)) != ~0U)
putchar(t);
printf("<%d>", (int)btell(f));
fbgrow_rewind(f);
bseek(f, -1, SEEK_END);
printf("<%d>", (int)btell(f));
- while ((t = bgetc(f)) >= 0)
+ while ((t = bgetc(f)) != ~0U)
putchar(t);
printf("<%d>\n", (int)btell(f));
}
* of the GNU Lesser General Public License.
*/
+#undef LOCAL_DEBUG
+
#include "lib/lib.h"
#include "lib/fastbuf.h"
#include "lib/lfs.h"
sh_off_t file_size;
sh_off_t file_extend;
sh_off_t window_pos;
+ uns window_size;
int mode;
};
#define FB_MMAP(f) ((struct fb_mmap *)(f)->is_fastbuf)
sh_off_t pos0 = f->pos & ~(sh_off_t)(CPU_PAGE_SIZE-1);
int l = MIN((sh_off_t)mmap_window_size, F->file_extend - pos0);
uns ll = ALIGN_TO(l, CPU_PAGE_SIZE);
- uns oll = ALIGN_TO(f->bufend - f->buffer, CPU_PAGE_SIZE);
int prot = ((F->mode & O_ACCMODE) == O_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE);
DBG(" ... Mapping %x(%x)+%x(%x) len=%x extend=%x", (int)pos0, (int)f->pos, ll, l, (int)F->file_size, (int)F->file_extend);
- if (ll != oll && f->buffer)
+ if (ll != F->window_size && f->buffer)
{
- munmap(f->buffer, oll);
+ munmap(f->buffer, F->window_size);
f->buffer = NULL;
}
+ F->window_size = ll;
if (!f->buffer)
f->buffer = sh_mmap(NULL, ll, prot, MAP_SHARED, F->fd, pos0);
else
ASSERT(whence == SEEK_SET);
ASSERT(pos >= 0 && pos <= FB_MMAP(f)->file_size);
f->pos = pos;
- f->bptr = f->bstop = f->bufend; /* force refill/spout call */
+ f->bptr = f->bstop = f->bufend = f->buffer; /* force refill/spout call */
DBG("Seek -> %p %p %p(%x) %p", f->buffer, f->bptr, f->bstop, (int)f->pos, f->bufend);
return 1;
}
struct fb_mmap *F = FB_MMAP(f);
if (f->buffer)
- munmap(f->buffer, ALIGN_TO(f->bufend-f->buffer, CPU_PAGE_SIZE));
+ munmap(f->buffer, F->window_size);
if (F->file_extend > F->file_size &&
sh_ftruncate(F->fd, F->file_size))
die("ftruncate(%s): %m", f->name);
- switch (F->is_temp_file)
- {
- case 1:
- if (unlink(f->name) < 0)
- log(L_ERROR, "unlink(%s): %m", f->name);
- case 0:
- close(F->fd);
- }
+ bclose_file_helper(f, F->fd, F->is_temp_file);
xfree(f);
}
static int
bfmm_config(struct fastbuf *f, uns item, int value)
{
+ int orig;
+
switch (item)
{
case BCONFIG_IS_TEMP_FILE:
+ orig = FB_MMAP(f)->is_temp_file;
FB_MMAP(f)->is_temp_file = value;
- return 0;
+ return orig;
default:
return -1;
}
}
-static struct fastbuf *
-bfmmopen_internal(int fd, byte *name, uns mode)
+struct fastbuf *
+bfmmopen_internal(int fd, const char *name, uns mode)
{
int namelen = strlen(name) + 1;
struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen);
memcpy(f->name, name, namelen);
F->fd = fd;
F->file_extend = F->file_size = sh_seek(fd, 0, SEEK_END);
+ if (F->file_size < 0)
+ die("seek(%s): %m", name);
if (mode & O_APPEND)
f->pos = F->file_size;
F->mode = mode;
return f;
}
-struct fastbuf *
-bopen_mm(byte *name, uns mode)
-{
- int fd;
-
- if ((mode & O_ACCMODE) == O_WRONLY)
- mode = (mode & ~O_ACCMODE) | O_RDWR;
- fd = sh_open(name, mode, 0666);
- if (fd < 0)
- die("Unable to %s file %s: %m",
- (mode & O_CREAT) ? "create" : "open", name);
- return bfmmopen_internal(fd, name, mode);
-}
-
#ifdef TEST
int main(int argc, char **argv)
{
- struct fastbuf *f = bopen_mm(argv[1], O_RDONLY);
- struct fastbuf *g = bopen_mm(argv[2], O_RDWR | O_CREAT | O_TRUNC);
+ struct fb_params par = { .type = FB_MMAP };
+ struct fastbuf *f = bopen_file(argv[1], O_RDONLY, &par);
+ struct fastbuf *g = bopen_file(argv[2], O_RDWR | O_CREAT | O_TRUNC, &par);
int c;
DBG("Copying");
--- /dev/null
+/*
+ * UCW Library -- FastIO on files with run-time parametrization
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/lfs.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+
+struct fb_params fbpar_def = {
+ .buffer_size = 65536,
+ .read_ahead = 1,
+ .write_back = 1,
+};
+
+static char *
+fbpar_cf_commit(struct fb_params *p UNUSED)
+{
+#ifndef CONFIG_UCW_THREADS
+ if (p->type == FB_DIRECT)
+ return "Direct I/O is supported only with CONFIG_UCW_THREADS";
+#endif
+ return NULL;
+}
+
+struct cf_section fbpar_cf = {
+# define F(x) PTR_TO(struct fb_params, x)
+ CF_TYPE(struct fb_params),
+ CF_COMMIT(fbpar_cf_commit),
+ CF_ITEMS {
+ CF_LOOKUP("Type", (int *)F(type), ((char *[]){"std", "direct", "mmap", NULL})),
+ CF_UNS("BufSize", F(buffer_size)),
+ CF_UNS("KeepBackBuf", F(keep_back_buf)),
+ CF_UNS("ReadAhead", F(read_ahead)),
+ CF_UNS("WriteBack", F(write_back)),
+ CF_END
+ }
+# undef F
+};
+
+static struct cf_section fbpar_global_cf = {
+ CF_ITEMS {
+ CF_SECTION("Defaults", &fbpar_def, &fbpar_cf),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR
+fbpar_global_init(void)
+{
+ cf_declare_section("FBParam", &fbpar_global_cf, 0);
+}
+
+static struct fastbuf *
+bopen_fd_internal(int fd, struct fb_params *params, uns mode, const char *name)
+{
+ char buf[32];
+ if (!name)
+ {
+ sprintf(buf, "fd%d", fd);
+ name = buf;
+ }
+ struct fastbuf *fb;
+ switch (params->type)
+ {
+#ifdef CONFIG_UCW_THREADS
+ case FB_DIRECT:
+ fb = fbdir_open_fd_internal(fd, name, params->asio,
+ params->buffer_size ? : fbpar_def.buffer_size,
+ params->read_ahead ? : fbpar_def.read_ahead,
+ params->write_back ? : fbpar_def.write_back);
+ if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0)
+ msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
+ return fb;
+#endif
+ case FB_STD:
+ fb = bfdopen_internal(fd, name,
+ params->buffer_size ? : fbpar_def.buffer_size);
+ if (params->keep_back_buf)
+ bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1);
+ return fb;
+ case FB_MMAP:
+ if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0)
+ die("Cannot get flags of fd %d: %m", fd);
+ return bfmmopen_internal(fd, name, mode);
+ default:
+ ASSERT(0);
+ }
+}
+
+static struct fastbuf *
+bopen_file_internal(const char *name, int mode, struct fb_params *params, int try)
+{
+ if (!params)
+ params = &fbpar_def;
+#ifdef CONFIG_UCW_THREADS
+ if (params->type == FB_DIRECT && !fbdir_cheat)
+ mode |= O_DIRECT;
+#endif
+ if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY)
+ mode = (mode & ~O_ACCMODE) | O_RDWR;
+ int fd = sh_open(name, mode, 0666);
+ if (fd < 0)
+ if (try)
+ return NULL;
+ else
+ die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name);
+ struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name);
+ ASSERT(fb);
+ if (mode & O_APPEND)
+ bseek(fb, 0, SEEK_END);
+ return fb;
+}
+
+struct fastbuf *
+bopen_file(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 0);
+}
+
+struct fastbuf *
+bopen_file_try(const char *name, int mode, struct fb_params *params)
+{
+ return bopen_file_internal(name, mode, params, 1);
+}
+
+struct fastbuf *
+bopen_fd(int fd, struct fb_params *params)
+{
+ return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL);
+}
+
+/* Function for use by individual file back-ends */
+
+void
+bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file)
+{
+ switch (is_temp_file)
+ {
+ case 1:
+ if (unlink(f->name) < 0)
+ msg(L_ERROR, "unlink(%s): %m", f->name);
+ case 0:
+ if (close(fd))
+ die("close(%s): %m", f->name);
+ }
+}
+
+/* Compatibility wrappers */
+
+struct fastbuf *
+bopen_try(const char *name, uns mode, uns buflen)
+{
+ return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bopen(const char *name, uns mode, uns buflen)
+{
+ return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen(int fd, uns buflen)
+{
+ return bopen_fd(fd, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
+
+struct fastbuf *
+bfdopen_shared(int fd, uns buflen)
+{
+ struct fastbuf *f = bfdopen(fd, buflen);
+ bconfig(f, BCONFIG_IS_TEMP_FILE, 2);
+ return f;
+}
--- /dev/null
+/*
+ * UCW Library -- Fast Buffered I/O on Memory Pools
+ *
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include "lib/fastbuf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define FB_POOL(f) ((struct fbpool *)(f)->is_fastbuf)
+
+static void
+fbpool_spout(struct fastbuf *b)
+{
+ if (b->bptr >= b->bufend)
+ {
+ uns len = b->bufend - b->buffer;
+ b->buffer = mp_expand(FB_POOL(b)->mp);
+ b->bufend = b->buffer + mp_avail(FB_POOL(b)->mp);
+ b->bstop = b->buffer;
+ b->bptr = b->buffer + len;
+ }
+}
+
+void
+fbpool_start(struct fbpool *b, struct mempool *mp, uns init_size)
+{
+ b->mp = mp;
+ b->fb.buffer = b->fb.bstop = b->fb.bptr = mp_start(mp, init_size);
+ b->fb.bufend = b->fb.buffer + mp_avail(mp);
+}
+
+void *
+fbpool_end(struct fbpool *b)
+{
+ return mp_end(b->mp, b->fb.bptr);
+}
+
+void
+fbpool_init(struct fbpool *b)
+{
+ bzero(b, sizeof(*b));
+ b->fb.name = "<fbpool>";
+ b->fb.spout = fbpool_spout;
+ b->fb.can_overwrite_buffer = 1;
+}
+
+#ifdef TEST
+
+int main(void)
+{
+ struct mempool *mp;
+ struct fbpool fb;
+ byte *p;
+ uns l;
+
+ mp = mp_new(64);
+ fbpool_init(&fb);
+ fbpool_start(&fb, mp, 16);
+ for (uns i = 0; i < 1024; i++)
+ bprintf(&fb.fb, "<hello>");
+ p = fbpool_end(&fb);
+ l = mp_size(mp, p);
+ if (l != 1024 * 7)
+ ASSERT(0);
+ for (uns i = 0; i < 1024; i++)
+ if (memcmp(p + i * 7, "<hello>", 7))
+ ASSERT(0);
+ mp_delete(mp);
+
+ return 0;
+}
+
+#endif
/*
* UCW Library -- Temporary Fastbufs
*
- * (c) 2002--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2002--2007 Martin Mares <mj@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
#include <unistd.h>
#include <sys/fcntl.h>
-static byte *temp_prefix = "/tmp/temp";
+static char *temp_prefix = "/tmp/temp";
static struct cf_section temp_config = {
CF_ITEMS {
}
void
-temp_file_name(byte *buf)
+temp_file_name(char *buf)
{
struct ucwlib_context *ctx = ucwlib_thread_context();
int cnt = ++ctx->temp_counter;
sprintf(buf, "%s%d-%d-%d", temp_prefix, pid, ctx->thread_id, cnt);
}
+struct fastbuf *
+bopen_tmp_file(struct fb_params *params)
+{
+ char name[TEMP_FILE_NAME_LEN];
+ temp_file_name(name);
+ struct fastbuf *fb = bopen_file(name, O_RDWR | O_CREAT | O_TRUNC, params);
+ bconfig(fb, BCONFIG_IS_TEMP_FILE, 1);
+ return fb;
+}
+
struct fastbuf *
bopen_tmp(uns buflen)
{
- byte buf[TEMP_FILE_NAME_LEN];
- struct fastbuf *f;
+ return bopen_tmp_file(&(struct fb_params){ .type = FB_STD, .buffer_size = buflen });
+}
- temp_file_name(buf);
- f = bopen(buf, O_RDWR | O_CREAT | O_TRUNC, buflen);
- bconfig(f, BCONFIG_IS_TEMP_FILE, 1);
- return f;
+void bfix_tmp_file(struct fastbuf *fb, const char *name)
+{
+ int was_temp = bconfig(fb, BCONFIG_IS_TEMP_FILE, 0);
+ ASSERT(was_temp == 1);
+ if (rename(fb->name, name))
+ die("Cannot rename %s to %s: %m", fb->name, name);
+ bclose(fb);
}
#ifdef TEST
#include <alloca.h>
int
-vbprintf(struct fastbuf *b, char *msg, va_list args)
+vbprintf(struct fastbuf *b, const char *msg, va_list args)
{
byte *buf;
int len, r;
}
int
-bprintf(struct fastbuf *b, char *msg, ...)
+bprintf(struct fastbuf *b, const char *msg, ...)
{
va_list args;
int res;
#include "lib/mempool.h"
#include "lib/bbuf.h"
-byte * /* Non-standard */
-bgets(struct fastbuf *f, byte *b, uns l)
+char * /* Non-standard */
+bgets(struct fastbuf *f, char *b, uns l)
{
ASSERT(l);
byte *src;
}
int
-bgets_nodie(struct fastbuf *f, byte *b, uns l)
+bgets_nodie(struct fastbuf *f, char *b, uns l)
{
ASSERT(l);
byte *src, *start = b;
while (src_len);
exit:
*b++ = 0;
- return b - start;
+ return b - (char *)start;
}
uns
return buf - bb->ptr;
}
-byte *
+char *
bgets_mp(struct fastbuf *f, struct mempool *mp)
{
byte *src;
s->cur_len = 0;
}
-byte *
-bgets0(struct fastbuf *f, byte *b, uns l)
+char *
+bgets0(struct fastbuf *f, char *b, uns l)
{
ASSERT(l);
byte *src;
/* Safe loading and reloading of configuration files: conf-input.c */
-extern byte *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */
-int cf_reload(byte *file);
-int cf_load(byte *file);
-int cf_set(byte *string);
+extern char *cf_def_file; /* DEFAULT_CONFIG; NULL if already loaded */
+extern char *cf_env_file; /* ENV_VAR_CONFIG */
+int cf_reload(const char *file);
+int cf_load(const char *file);
+int cf_set(const char *string);
/* Direct access to configuration items: conf-intr.c */
#undef T
struct cf_item;
-byte *cf_find_item(byte *name, struct cf_item *item);
-byte *cf_write_item(struct cf_item *item, enum cf_operation op, int number, byte **pars);
+char *cf_find_item(const char *name, struct cf_item *item);
+char *cf_write_item(struct cf_item *item, enum cf_operation op, int number, char **pars);
/* Debug dumping: conf-dump.c */
# Tests for getopt
-Run: obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3
+Run: ../obj/lib/getopt-t -a -b --longc 2819 -d -a 1 2 3
Out: option a
option b
option c with value `2819'
option d with value `-a'
3 nonoption arguments
-Run: obj/lib/getopt-t -a -x
+Run: ../obj/lib/getopt-t -a -x
Out: option a
unknown option
reset
# Tests for the hash table modules
-Run: obj/lib/hash-test 1
+Run: ../obj/lib/hash-test 1
Out: OK
-Run: obj/lib/hash-test 2
+Run: ../obj/lib/hash-test 2
Out: OK
-Run: obj/lib/hash-test 3
+Run: ../obj/lib/hash-test 3
Out: OK
-Run: obj/lib/hash-test 4
+Run: ../obj/lib/hash-test 4
Out: OK
}
inline uns
-str_len_aligned(const byte *str)
+str_len_aligned(const char *str)
{
const uns *u = (const uns *) str;
uns len = 0;
}
inline uns
-hash_string_aligned(const byte *str)
+hash_string_aligned(const char *str)
{
const uns *u = (const uns *) str;
uns hash = 0;
#ifndef CPU_ALLOW_UNALIGNED
uns
-str_len(const byte *str)
+str_len(const char *str)
{
uns shift = UNALIGNED_PART(str, uns);
if (!shift)
}
uns
-hash_string(const byte *str)
+hash_string(const char *str)
{
- uns shift = UNALIGNED_PART(str, uns);
+ const byte *s = str;
+ uns shift = UNALIGNED_PART(s, uns);
if (!shift)
- return hash_string_aligned(str);
+ return hash_string_aligned(s);
else
{
uns hash = 0;
#endif
if (!modulo)
hash = ROL(hash, SHIFT_BITS);
- if (!str[i])
+ if (!s[i])
break;
- hash ^= str[i] << (shift * 8);
+ hash ^= s[i] << (shift * 8);
}
return hash;
}
#endif
uns
-hash_string_nocase(const byte *str)
+hash_string_nocase(const char *str)
{
+ const byte *s = str;
uns hash = 0;
uns i;
for (i=0; ; i++)
#endif
if (!modulo)
hash = ROL(hash, SHIFT_BITS);
- if (!str[i])
+ if (!s[i])
break;
- hash ^= Cupcase(str[i]) << (shift * 8);
+ hash ^= Cupcase(s[i]) << (shift * 8);
}
return hash;
}
#include "lib/lib.h"
/* The following functions need str to be aligned to uns. */
-uns str_len_aligned(const byte *str) PURE;
-uns hash_string_aligned(const byte *str) PURE;
+uns str_len_aligned(const char *str) PURE;
+uns hash_string_aligned(const char *str) PURE;
uns hash_block_aligned(const byte *str, uns len) PURE;
#ifdef CPU_ALLOW_UNALIGNED
#define hash_string(str) hash_string_aligned(str)
#define hash_block(str, len) hash_block_aligned(str, len)
#else
-uns str_len(const byte *str) PURE;
-uns hash_string(const byte *str) PURE;
+uns str_len(const char *str) PURE;
+uns hash_string(const char *str) PURE;
uns hash_block(const byte *str, uns len) PURE;
#endif
-uns hash_string_nocase(const byte *str) PURE;
+uns hash_string_nocase(const char *str) PURE;
/*
* We hash integers by multiplying by a reasonably large prime with
/*
* UCW Library -- IP address access lists
*
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
#include <string.h>
-struct addrmask {
- u32 addr;
- u32 mask;
-};
-
struct ipaccess_entry {
cnode n;
int allow;
- struct addrmask addr;
+ struct ip_addrmask addr;
};
-static byte *
-addrmask_parser(byte *c, void *ptr)
+static char *
+addrmask_parser(char *c, void *ptr)
{
/*
* This is tricky: addrmasks will be compared by memcmp(), so we must ensure
* that even the padding between structure members is zeroed out.
*/
- struct addrmask *am = ptr;
+ struct ip_addrmask *am = ptr;
bzero(am, sizeof(*am));
- byte *p = strchr(c, '/');
+ char *p = strchr(c, '/');
if (p)
*p++ = 0;
- byte *err = cf_parse_ip(c, &am->addr);
+ char *err = cf_parse_ip(c, &am->addr);
if (err)
return err;
if (p)
static void
addrmask_dumper(struct fastbuf *fb, void *ptr)
{
- struct addrmask *am = ptr;
+ struct ip_addrmask *am = ptr;
bprintf(fb, "%08x/%08x ", am->addr, am->mask);
}
-static struct cf_user_type addrmask_type = {
- .size = sizeof(struct addrmask),
- .name = "addrmask",
+struct cf_user_type ip_addrmask_type = {
+ .size = sizeof(struct ip_addrmask),
+ .name = "ip_addrmask",
.parser = addrmask_parser,
.dumper = addrmask_dumper
};
struct cf_section ipaccess_cf = {
CF_TYPE(struct ipaccess_entry),
CF_ITEMS {
- CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((byte*[]) { "deny", "allow", NULL })),
- CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &addrmask_type),
+ CF_LOOKUP("Mode", PTR_TO(struct ipaccess_entry, allow), ((char*[]) { "deny", "allow", NULL })),
+ CF_USER("IP", PTR_TO(struct ipaccess_entry, addr), &ip_addrmask_type),
CF_END
}
};
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip)
+{
+ return !((ip ^ am->addr) & am->mask);
+}
+
int
ipaccess_check(clist *l, u32 ip)
{
CLIST_FOR_EACH(struct ipaccess_entry *, a, *l)
- if (! ((ip ^ a->addr.addr) & a->addr.mask))
+ if (ip_addrmask_match(&a->addr, ip))
return a->allow;
return 0;
}
byte buf[256];
while (fgets(buf, sizeof(buf), stdin))
{
- byte *c = strchr(buf, '\n');
+ char *c = strchr(buf, '\n');
if (c)
*c = 0;
u32 ip;
/*
* UCW Library -- IP address access lists
*
- * (c) 1997--2006 Martin Mares <mj@ucw.cz>
+ * (c) 1997--2007 Martin Mares <mj@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
extern struct cf_section ipaccess_cf;
int ipaccess_check(clist *l, u32 ip);
+/* Low-level handling of addresses and masks */
+
+struct ip_addrmask {
+ u32 addr;
+ u32 mask;
+};
+
+extern struct cf_user_type ip_addrmask_type;
+int ip_addrmask_match(struct ip_addrmask *am, u32 ip);
+
#endif
#define KMP_USE_UTF8
#define KMP_TOLOWER
#define KMP_ONLYALPHA
-#define KMP_STATE_VARS byte *str; uns id;
+#define KMP_STATE_VARS char *str; uns id;
#define KMP_ADD_EXTRA_ARGS uns id
-#define KMP_VARS byte *start;
+#define KMP_VARS char *start;
#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
#define KMP_ADD_NEW(kmp,src,s) do{ TRACE("Inserting string %s with id %d", kmp->u.start, id); \
s->u.str = kmp->u.start; s->u.id = id; }while(0)
#define KMP_PREFIX(x) kmp3_##x
#define KMP_STATE_VARS uns index;
#define KMP_ADD_EXTRA_ARGS uns index
-#define KMP_VARS byte *start;
+#define KMP_VARS char *start;
#define KMP_ADD_INIT(kmp,src) kmp->u.start = src
#define KMP_ADD_NEW(kmp,src,s) s->u.index = index
#define KMP_ADD_DUP(kmp,src,s) *(kmp->u.start) = 0
{
mp_flush(pool);
uns n = random_max(100);
- byte *s[n];
+ char *s[n];
struct kmp3_struct kmp;
kmp3_init(&kmp);
for (uns i = 0; i < n; i++)
# Tests for the kmp module
-Run: obj/lib/kmp-test
+Run: ../obj/lib/kmp-test
#ifdef KMP_SOURCE
typedef KMP_SOURCE P(source_t);
#else
-typedef byte *P(source_t);
+typedef char *P(source_t);
#endif
#ifdef KMP_GET_CHAR
{
# ifdef KMP_USE_UTF8
uns cc;
- *src = (byte *)utf8_get(*src, &cc);
+ *src = utf8_get(*src, &cc);
# ifdef KMP_ONLYALPHA
if (!cc) {}
else if (!Ualpha(cc))
#define HAVE_PREAD
static inline sh_off_t
-sh_file_size(byte *name)
+sh_file_size(const char *name)
{
int fd = sh_open(name, O_RDONLY);
if (fd < 0)
struct tm;
extern void (*log_switch_hook)(struct tm *tm);
-void log_msg(unsigned int cat, const char *msg, ...) FORMAT_CHECK(printf,2,3);
-#define log log_msg
-void vlog_msg(unsigned int cat, const char *msg, va_list args);
+void msg(uns cat, const char *fmt, ...) FORMAT_CHECK(printf,2,3);
+void vmsg(uns cat, const char *fmt, va_list args);
void die(const char *, ...) NONRET FORMAT_CHECK(printf,1,2);
-void log_init(byte *argv0);
-void log_file(byte *name);
+void log_init(const char *argv0);
+void log_file(const char *name);
void log_fork(void);
int log_switch(void);
-void assert_failed(char *assertion, char *file, int line) NONRET;
+void assert_failed(const char *assertion, const char *file, int line) NONRET;
void assert_failed_noinfo(void) NONRET;
#ifdef DEBUG_ASSERTS
#define COMPILE_ASSERT(name,x) typedef char _COMPILE_ASSERT_##name[!!(x)-1]
#ifdef LOCAL_DEBUG
-#define DBG(x,y...) log(L_DEBUG, x,##y)
+#define DBG(x,y...) msg(L_DEBUG, x,##y)
#else
#define DBG(x,y...) do { } while(0)
#endif
* their own xmalloc and we don't want to interfere with them, hence
* the renaming.
*/
-void *xmalloc(unsigned) LIKE_MALLOC;
-void *xrealloc(void *, unsigned);
+void *xmalloc(uns) LIKE_MALLOC;
+void *xrealloc(void *, uns);
void xfree(void *);
#endif
-void *xmalloc_zero(unsigned) LIKE_MALLOC;
-byte *xstrdup(byte *) LIKE_MALLOC;
+void *xmalloc_zero(uns) LIKE_MALLOC;
+char *xstrdup(const char *) LIKE_MALLOC;
/* Content-Type pattern matching and filters */
-int match_ct_patt(byte *, byte *);
+int match_ct_patt(const char *, const char *);
/* wordsplit.c */
-int sepsplit(byte *str, byte sep, byte **rec, uns max);
-int wordsplit(byte *, byte **, uns);
+int sepsplit(char *str, uns sep, char **rec, uns max);
+int wordsplit(char *str, char **rec, uns max);
/* pat(i)match.c: Matching of shell patterns */
-int match_pattern(byte *, byte *);
-int match_pattern_nocase(byte *, byte *);
+int match_pattern(const char *patt, const char *str);
+int match_pattern_nocase(const char *patt, const char *str);
/* md5hex.c */
-void md5_to_hex(byte *, byte *);
-void hex_to_md5(byte *, byte *);
+void md5_to_hex(const byte *s, char *d);
+void hex_to_md5(const char *s, byte *d);
#define MD5_SIZE 16
#define MD5_HEX_SIZE 33
/* prime.c */
-int isprime(uns);
-uns nextprime(uns);
+int isprime(uns x);
+uns nextprime(uns x);
/* primetable.c */
/* timer.c */
-struct timeval;
+timestamp_t get_timestamp(void);
-void init_timer(void);
-uns get_timer(void);
-void get_last_timeval(struct timeval *tv);
+void init_timer(timestamp_t *timer);
+uns get_timer(timestamp_t *timer);
+uns switch_timer(timestamp_t *old, timestamp_t *new);
/* regex.c */
typedef struct regex regex;
-regex *rx_compile(byte *r, int icase);
+regex *rx_compile(const char *r, int icase);
void rx_free(regex *r);
-int rx_match(regex *r, byte *s);
-int rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen);
+int rx_match(regex *r, const char *s);
+int rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen);
/* random.c */
/* mmap.c */
-void *mmap_file(byte *name, unsigned *len, int writeable);
+void *mmap_file(const char *name, unsigned *len, int writeable);
void munmap_file(void *start, unsigned len);
/* proctitle.c */
void setproctitle_init(int argc, char **argv);
-void setproctitle(char *msg, ...) FORMAT_CHECK(printf,1,2);
+void setproctitle(const char *msg, ...) FORMAT_CHECK(printf,1,2);
char *getproctitle(void);
/* randomkey.c */
/* exitstatus.c */
#define EXIT_STATUS_MSG_SIZE 32
-int format_exit_status(byte *msg, int stat);
+int format_exit_status(char *msg, int stat);
/* runcmd.c */
-int run_command(byte *cmd, ...);
-void NONRET exec_command(byte *cmd, ...);
-void echo_command(byte *buf, int size, byte *cmd, ...);
-int run_command_v(byte *cmd, va_list args);
-void NONRET exec_command_v(byte *cmd, va_list args);
-void echo_command_v(byte *buf, int size, byte *cmd, va_list args);
+int run_command(const char *cmd, ...);
+void NONRET exec_command(const char *cmd, ...);
+void echo_command(char *buf, int size, const char *cmd, ...);
+int run_command_v(const char *cmd, va_list args);
+void NONRET exec_command_v(const char *cmd, va_list args);
+void echo_command_v(char *buf, int size, const char *cmd, va_list args);
/* carefulio.c */
int careful_read(int fd, void *buf, int len);
-int careful_write(int fd, void *buf, int len);
+int careful_write(int fd, const void *buf, int len);
/* sync.c */
-void sync_dir(byte *name);
+void sync_dir(const char *name);
/* sighandler.c */
/* string.c */
-byte *str_unesc(byte *dest, byte *src);
-byte *str_format_flags(byte *dest, const byte *fmt, uns flags);
+char *str_unesc(char *dest, const char *src);
+char *str_format_flags(char *dest, const char *fmt, uns flags);
/* bigalloc.c */
-void *page_alloc(unsigned int len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
-void page_free(void *start, unsigned int len);
-void *page_realloc(void *start, unsigned int old_len, unsigned int new_len);
+void *page_alloc(u64 len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
+void *page_alloc_zero(u64 len) LIKE_MALLOC;
+void page_free(void *start, u64 len);
+void *page_realloc(void *start, u64 old_len, u64 new_len);
-void *big_alloc(unsigned int len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
-void big_free(void *start, unsigned int len);
+void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
+void *big_alloc_zero(u64 len) LIKE_MALLOC;
+void big_free(void *start, u64 len);
#endif
--- /dev/null
+# pkg-config metadata for libucw
+
+libdir=@LIBDIR@
+incdir=.
+
+#ifdef CONFIG_UCW_THREADS
+threads=-lpthread
+#else
+threads=
+#endif
+
+Name: libucw
+Description: A library of utility functions and data structures
+Version: @SHERLOCK_VERSION@
+Cflags: -I${incdir}
+Libs: -L${libdir} -lucw ${threads}
}
byte *
-lizard_decompress_safe(byte *in, struct lizard_buffer *buf, uns expected_length)
+lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length)
/* Decompresses in into buf, sets *ptr to the data, and returns the
* uncompressed length. If an error has occured, -1 is returned and errno is
* set. The buffer buf is automatically reallocated. SIGSEGV is caught in
}
else
{
- log(L_ERROR, "SIGSEGV caught in lizard_decompress()");
+ msg(L_ERROR, "SIGSEGV caught in lizard_decompress()");
ptr = NULL;
errno = EFAULT;
}
#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
static inline uns
-hashf(byte *string)
+hashf(const byte *string)
/* 0..HASH_SIZE-1 */
{
return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
}
static inline byte *
-locate_string(byte *string, int record_id, int head)
+locate_string(const byte *string, int record_id, int head)
/* The strings are recorded into the hash-table regularly, hence there is no
* need to store the pointer there. */
{
string += record_id - head;
if (record_id >= head)
string -= HASH_RECORDS-1;
- return string;
+ return (byte *)string;
}
static inline uns
-find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
/* hash_tab[hash] == record_id points to the head of the double-linked
* link-list of strings with the same hash. The records are statically
* stored in circular array hash_rec (with the 1st entry unused), and the
if (*cmp++ == string[4] && *cmp++ == string[5]
&& *cmp++ == string[6] && *cmp++ == string[7])
{
- byte *str = string + 8;
+ const byte *str = string + 8;
while (str <= string_end && *cmp++ == *str++);
}
}
}
static byte *
-flush_copy_command(uns bof, byte *out, byte *start, uns len)
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
{
if (bof && len <= 238)
*out++ = len + 17;
}
int
-lizard_compress(byte *in, uns in_len, byte *out)
+lizard_compress(const byte *in, uns in_len, byte *out)
/* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
* LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
* allocated after in. Returns the actual compressed length. */
{
hash_ptr_t hash_tab[HASH_SIZE];
struct hash_record hash_rec[HASH_RECORDS];
- byte *in_end = in + in_len;
+ const byte *in_end = in + in_len;
byte *out_start = out;
- byte *copy_start = in;
+ const byte *copy_start = in;
uns head = 1; /* 0 in unused */
uns to_delete = 0, bof = 1;
bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
}
static inline byte *
-read_unary_value(byte *in, uns *val)
+read_unary_value(const byte *in, uns *val)
{
uns l = 0;
while (!*in++)
l += 255;
l += in[-1];
*val = l;
- return in;
+ return (byte *)in;
}
int
-lizard_decompress(byte *in, byte *out)
+lizard_decompress(const byte *in, byte *out)
/* Requires out being allocated for the decompressed length must be known
* beforehand. It is desirable to lock the following memory page for
* read-only access to prevent buffer overflow. Returns the actual
*/
/* lizard.c */
-int lizard_compress(byte *in, uns in_len, byte *out);
-int lizard_decompress(byte *in, byte *out);
+int lizard_compress(const byte *in, uns in_len, byte *out);
+int lizard_decompress(const byte *in, byte *out);
/* lizard-safe.c */
struct lizard_buffer;
struct lizard_buffer *lizard_alloc(void);
void lizard_free(struct lizard_buffer *buf);
-byte *lizard_decompress_safe(byte *in, struct lizard_buffer *buf, uns expected_length);
+byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
/* adler32.c */
-uns update_adler32(uns adler, byte *ptr, uns len);
+uns update_adler32(uns adler, const byte *ptr, uns len);
static inline uns
-adler32(byte *buf, uns len)
+adler32(const byte *buf, uns len)
{
return update_adler32(1, buf, len);
}
fd = sh_open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
if (fd < 0)
die("Unable to open log file %s: %m", name);
- close(2);
- dup(fd);
+ dup2(fd, 2);
close(fd);
- close(1);
- dup(2);
switched = 1;
}
log_switch_nest--;
}
void
-log_file(byte *name)
+log_file(const char *name)
{
if (name)
{
void (*log_switch_hook)(struct tm *tm);
void
-vlog_msg(unsigned int cat, const char *msg, va_list args)
+vmsg(unsigned int cat, const char *fmt, va_list args)
{
struct timeval tv;
struct tm tm;
l0 = p - buf + 1;
r = buflen - l0;
va_copy(args2, args);
- l = vsnprintf(p, r, msg, args2);
+ l = vsnprintf(p, r, fmt, args2);
va_end(args2);
if (l < 0)
l = r;
}
void
-log_msg(unsigned int cat, const char *msg, ...)
+msg(unsigned int cat, const char *fmt, ...)
{
va_list args;
- va_start(args, msg);
- vlog_msg(cat, msg, args);
+ va_start(args, fmt);
+ vmsg(cat, fmt, args);
va_end(args);
}
void
-die(const char *msg, ...)
+die(const char *fmt, ...)
{
va_list args;
- va_start(args, msg);
- vlog_msg(L_FATAL, msg, args);
+ va_start(args, fmt);
+ vmsg(L_FATAL, fmt, args);
va_end(args);
if (log_die_hook)
log_die_hook();
}
void
-assert_failed(char *assertion, char *file, int line)
+assert_failed(const char *assertion, const char *file, int line)
{
- log(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
+ msg(L_FATAL, "Assertion `%s' failed at %s:%d", assertion, file, line);
abort();
}
die("Internal error: Assertion failed.");
}
-static byte *
-log_basename(byte *n)
+static const char *
+log_basename(const char *n)
{
- byte *p = n;
+ const char *p = n;
while (*n)
if (*n++ == '/')
}
void
-log_init(byte *argv0)
+log_init(const char *argv0)
{
if (argv0)
{
main_file_cnt++;
main_poll_table_obsolete = 1;
if (fcntl(fi->fd, F_SETFL, O_NONBLOCK) < 0)
- log(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
+ msg(L_ERROR, "Error setting fd %d to non-blocking mode: %m. Keep fingers crossed.", fi->fd);
}
void
main_debug(void)
{
#ifdef CONFIG_DEBUG
- log(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
- log(L_DEBUG, "\tActive timers:");
+ msg(L_DEBUG, "### Main loop status on %lld", (long long)main_now);
+ msg(L_DEBUG, "\tActive timers:");
struct main_timer *tm;
CLIST_WALK(tm, main_timer_list)
- log(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
+ msg(L_DEBUG, "\t\t%p (expires %lld, data %p)", tm, (long long)(tm->expires ? tm->expires-main_now : 999999), tm->data);
struct main_file *fi;
- log(L_DEBUG, "\tActive files:");
+ msg(L_DEBUG, "\tActive files:");
CLIST_WALK(fi, main_file_list)
- log(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
+ msg(L_DEBUG, "\t\t%p (fd %d, rh %p, wh %p, eh %p, expires %lld, data %p)",
fi, fi->fd, fi->read_handler, fi->write_handler, fi->error_handler,
(long long)(fi->timer.expires ? fi->timer.expires-main_now : 999999), fi->data);
- log(L_DEBUG, "\tActive hooks:");
+ msg(L_DEBUG, "\tActive hooks:");
struct main_hook *ho;
CLIST_WALK(ho, main_hook_list)
- log(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
- log(L_DEBUG, "\tActive processes:");
+ msg(L_DEBUG, "\t\t%p (func %p, data %p)", ho, ho->handler, ho->data);
+ msg(L_DEBUG, "\tActive processes:");
struct main_process *pr;
CLIST_WALK(pr, main_process_list)
- log(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
+ msg(L_DEBUG, "\t\t%p (pid %d, data %p)", pr, pr->pid, pr->data);
#endif
}
#include "lib/clists.h"
-typedef s64 timestamp_t; /* We measure time in milliseconds */
extern timestamp_t main_now; /* Current time in milliseconds since UNIX epoch */
extern sh_time_t main_now_seconds; /* Current time in seconds since the epoch */
extern uns main_shutdown;
cnode n;
int pid; /* Process id (0=not running) */
int status; /* Exit status (-1=fork failed) */
- byte status_msg[EXIT_STATUS_MSG_SIZE];
+ char status_msg[EXIT_STATUS_MSG_SIZE];
void (*handler)(struct main_process *mp); /* [*] Called when the process exits; process_del done automatically */
void *data; /* [*] For use by the handler */
};
+++ /dev/null
-/*
- * UCW Library -- Stub for including math.h, avoiding name collisions
- *
- * (c) 2003 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#undef log
-#define log libm_log
-#define exception math_exception
-#include <math.h>
-#undef log
-#define log log_msg
-#undef exception
-
-#ifdef CONFIG_LINUX
-float logf(float);
-#endif
*/
void MD5Transform(uint32 buf[4], uint32 const in[16])
{
- register uint32 a, b, c, d;
+ uint32 a, b, c, d;
a = buf[0];
b = buf[1];
#include <stdio.h>
void
-md5_to_hex(byte *s, byte *d)
+md5_to_hex(const byte *s, char *d)
{
int i;
for(i=0; i<MD5_SIZE; i++)
}
void
-hex_to_md5(byte *s, byte *d)
+hex_to_md5(const char *s, byte *d)
{
uns i, j;
for(i=0; i<MD5_SIZE; i++)
#include <string.h>
char *
-mp_strdup(struct mempool *p, char *s)
+mp_strdup(struct mempool *p, const char *s)
{
uns l = strlen(s) + 1;
char *t = mp_alloc_fast_noalign(p, l);
}
void *
-mp_memdup(struct mempool *p, void *s, uns len)
+mp_memdup(struct mempool *p, const void *s, uns len)
{
void *t = mp_alloc_fast(p, len);
memcpy(t, s, len);
/*** mempool-str.c ***/
-char *mp_strdup(struct mempool *, char *) LIKE_MALLOC;
-void *mp_memdup(struct mempool *, void *, uns) LIKE_MALLOC;
+char *mp_strdup(struct mempool *, const char *) LIKE_MALLOC;
+void *mp_memdup(struct mempool *, const void *, uns) LIKE_MALLOC;
char *mp_multicat(struct mempool *, ...) LIKE_MALLOC SENTINEL_CHECK;
static inline char * LIKE_MALLOC
-mp_strcat(struct mempool *mp, char *x, char *y)
+mp_strcat(struct mempool *mp, const char *x, const char *y)
{
return mp_multicat(mp, x, y, NULL);
}
# Tests for mempool modules
-Run: obj/lib/mempool-t
+Run: ../obj/lib/mempool-t
-Run: obj/lib/mempool-fmt-t
+Run: ../obj/lib/mempool-fmt-t
Out: <Hello, World!><Hello, World!><Appended><Hello, World!>
-Run: obj/lib/mempool-str-t
+Run: ../obj/lib/mempool-str-t
Out: <<12345>>
bugs.gnats.insects
bugsgnatsinsects
#include <sys/mman.h>
void *
-mmap_file(byte *name, unsigned *len, int writeable)
+mmap_file(const char *name, unsigned *len, int writeable)
{
int fd = open(name, writeable ? O_RDWR : O_RDONLY);
struct stat st;
#endif
struct partmap *
-partmap_open(byte *name, int writeable)
+partmap_open(char *name, int writeable)
{
struct partmap *p = xmalloc_zero(sizeof(struct partmap));
int writeable;
};
-struct partmap *partmap_open(byte *name, int writeable);
+struct partmap *partmap_open(char *name, int writeable);
void partmap_close(struct partmap *p);
sh_off_t partmap_size(struct partmap *p);
void partmap_load(struct partmap *p, sh_off_t start, uns size);
*/
int
-MATCH_FUNC_NAME(byte *p, byte *s)
+MATCH_FUNC_NAME(const char *p, const char *s)
{
while (*p)
{
--- /dev/null
+# Poor Man's CGI Module for Perl
+#
+# (c) 2002--2007 Martin Mares <mj@ucw.cz>
+# Slightly modified by Tomas Valla <tom@ucw.cz>
+#
+# This software may be freely distributed and used according to the terms
+# of the GNU Lesser General Public License.
+
+# FIXME:
+# - respond with proper HTTP error codes
+# - if we get invalid parameters, generate HTTP error or redirect
+
+package UCW::CGI;
+
+# First of all, set up error handling, so that even errors during parsing
+# will be reported properly.
+
+# Variables to be set by the calling module:
+# $UCW::CGI::error_mail mail address of the script admin (optional)
+# (this one has to be set in the BEGIN block!)
+# $UCW::CGI::error_hook function to be called for reporting errors
+
+my $error_reported;
+my $exit_code;
+my $debug = 0;
+
+sub report_bug($)
+{
+ if (!defined $error_reported) {
+ $error_reported = 1;
+ print STDERR $_[0];
+ if (defined($UCW::CGI::error_hook)) {
+ &$UCW::CGI::error_hook($_[0]);
+ } else {
+ print "Content-type: text/plain\n\n";
+ print "Internal bug:\n";
+ print $_[0], "\n";
+ print "Please notify $UCW::CGI::error_mail\n" if defined $UCW::CGI::error_mail;
+ }
+ }
+ die;
+}
+
+BEGIN {
+ $SIG{__DIE__} = sub { report_bug($_[0]); };
+ $SIG{__WARN__} = sub { report_bug("WARNING: " . $_[0]); };
+ $exit_code = 0;
+}
+
+END {
+ $? = $exit_code;
+}
+
+use strict;
+use warnings;
+
+require Exporter;
+our $VERSION = 1.0;
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&html_escape &url_escape &url_param_escape &self_ref &self_form &http_get);
+our @EXPORT_OK = qw();
+
+### Escaping ###
+
+sub url_escape($) {
+ my $x = shift @_;
+ $x =~ s/([^-\$_.!*'(),0-9A-Za-z\x80-\xff])/"%".unpack('H2',$1)/ge;
+ return $x;
+}
+
+sub url_param_escape($) {
+ my $x = shift @_;
+ $x = url_escape($x);
+ $x =~ s/%20/+/g;
+ return $x;
+}
+
+sub html_escape($) {
+ my $x = shift @_;
+ $x =~ s/&/&/g;
+ $x =~ s/</</g;
+ $x =~ s/>/>/g;
+ $x =~ s/"/"/g;
+ return $x;
+}
+
+### Analysing RFC 822 Style Headers ###
+
+sub rfc822_prepare($) {
+ my $x = shift @_;
+ # Convert all %'s and backslash escapes to %xx escapes
+ $x =~ s/%/%25/g;
+ $x =~ s/\\(.)/"%".unpack("H2",$1)/ge;
+ # Remove all comments, beware, they can be nested (unterminated comments are closed at EOL automatically)
+ while ($x =~ s/^(("[^"]*"|[^"(])*(\([^)]*)*)(\([^()]*(\)|$))/$1 /) { }
+ # Remove quotes and escape dangerous characters inside (again closing at the end automatically)
+ $x =~ s{"([^"]*)("|$)}{my $z=$1; $z =~ s/([^0-9a-zA-Z%_-])/"%".unpack("H2",$1)/ge; $z;}ge;
+ # All control characters are properly escaped, tokens are clearly visible.
+ # Finally remove all unnecessary spaces.
+ $x =~ s/\s+/ /g;
+ $x =~ s/(^ | $)//g;
+ $x =~ s{\s*([()<>@,;:\\"/\[\]?=])\s*}{$1}g;
+ return $x;
+}
+
+sub rfc822_deescape($) {
+ my $x = shift @_;
+ $x =~ s/%(..)/pack("H2",$1)/ge;
+ return $x;
+}
+
+### Reading of HTTP headers ###
+
+sub http_get($) {
+ my $h = shift @_;
+ $h =~ tr/a-z-/A-Z_/;
+ return $ENV{"HTTP_$h"} || $ENV{"$h"};
+}
+
+### Parsing of Arguments ###
+
+my $arg_table;
+
+sub parse_arg_string($) {
+ my ($s) = @_;
+ $s =~ s/\s+//;
+ foreach $_ (split /[&:]/,$s) {
+ (/^([^=]+)=(.*)$/) or next;
+ my $arg = $arg_table->{$1} or next;
+ $_ = $2;
+ s/\+/ /g;
+ s/%(..)/pack("H2",$1)/eg;
+ s/\r\n/\n/g;
+ s/\r/\n/g;
+ $arg->{'multiline'} || s/(\n|\t)/ /g;
+ s/^\s+//;
+ s/\s+$//;
+ if (my $rx = $arg->{'check'}) {
+ if (!/^$rx$/) { $_ = $arg->{'default'}; }
+ }
+
+ my $r = ref($arg->{'var'});
+ if ($r eq 'SCALAR') {
+ ${$arg->{'var'}} = $_;
+ } elsif ($r eq 'ARRAY') {
+ push @{$arg->{'var'}}, $_;
+ }
+ }
+}
+
+sub parse_multipart_form_data();
+
+sub parse_args($) {
+ $arg_table = shift @_;
+ if (!defined $ENV{"GATEWAY_INTERFACE"}) {
+ print STDERR "Must be called as a CGI script.\n";
+ $exit_code = 1;
+ exit;
+ }
+ foreach my $a (values %$arg_table) {
+ my $r = ref($a->{'var'});
+ defined($a->{'default'}) or $a->{'default'}="";
+ if ($r eq 'SCALAR') {
+ ${$a->{'var'}} = $a->{'default'};
+ } elsif ($r eq 'ARRAY') {
+ @{$a->{'var'}} = ();
+ }
+ }
+ my $method = $ENV{"REQUEST_METHOD"};
+ my $qs = $ENV{"QUERY_STRING"};
+ parse_arg_string($qs) if defined($qs);
+ if ($method eq "GET") {
+ } elsif ($method eq "POST") {
+ if ($ENV{"CONTENT_TYPE"} =~ /^application\/x-www-form-urlencoded\b/i) {
+ while (<STDIN>) {
+ chomp;
+ parse_arg_string($_);
+ }
+ } elsif ($ENV{"CONTENT_TYPE"} =~ /^multipart\/form-data\b/i) {
+ parse_multipart_form_data();
+ } else {
+ die "Unknown content type for POST data";
+ }
+ } else {
+ die "Unknown request method";
+ }
+}
+
+### Parsing Multipart Form Data ###
+
+my $boundary;
+my $boundary_len;
+my $mp_buffer;
+my $mp_buffer_i;
+my $mp_buffer_boundary;
+my $mp_eof;
+
+sub refill_mp_data($) {
+ my ($more) = @_;
+ if ($mp_buffer_boundary >= $mp_buffer_i) {
+ return $mp_buffer_boundary - $mp_buffer_i;
+ } elsif ($mp_buffer_i + $more <= length($mp_buffer) - $boundary_len) {
+ return $more;
+ } else {
+ if ($mp_buffer_i) {
+ $mp_buffer = substr($mp_buffer, $mp_buffer_i);
+ $mp_buffer_i = 0;
+ }
+ while ($mp_buffer_i + $more > length($mp_buffer) - $boundary_len) {
+ last if $mp_eof;
+ my $data;
+ my $n = read(STDIN, $data, 2048);
+ if ($n > 0) {
+ $mp_buffer .= $data;
+ } else {
+ $mp_eof = 1;
+ }
+ }
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ($mp_buffer_boundary >= 0) {
+ return $mp_buffer_boundary;
+ } elsif ($mp_eof) {
+ return length($mp_buffer);
+ } else {
+ return length($mp_buffer) - $boundary_len;
+ }
+ }
+}
+
+sub get_mp_line($) {
+ my ($allow_empty) = @_;
+ my $n = refill_mp_data(1024);
+ my $i = index($mp_buffer, "\r\n", $mp_buffer_i);
+ if ($i >= $mp_buffer_i && $i < $mp_buffer_i + $n - 1) {
+ my $s = substr($mp_buffer, $mp_buffer_i, $i - $mp_buffer_i);
+ $mp_buffer_i = $i + 2;
+ return $s;
+ } elsif ($allow_empty) {
+ if ($n) { # An incomplete line
+ my $s = substr($mp_buffer, $mp_buffer_i, $n);
+ $mp_buffer_i += $n;
+ return $s;
+ } else { # No more lines
+ return undef;
+ }
+ } else {
+ die "Premature end of multipart POST data";
+ }
+}
+
+sub skip_mp_boundary() {
+ if ($mp_buffer_boundary != $mp_buffer_i) {
+ die "Premature end of multipart POST data";
+ }
+ $mp_buffer_boundary = -1;
+ $mp_buffer_i += 2;
+ my $b = get_mp_line(0);
+ print STDERR "SEP $b\n" if $debug;
+ $mp_buffer_boundary = index($mp_buffer, $boundary, $mp_buffer_i);
+ if ("\r\n$b" =~ /^$boundary--/) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub parse_mp_header() {
+ my $h = {};
+ my $last;
+ while ((my $l = get_mp_line(0)) ne "") {
+ print STDERR "HH $l\n" if $debug;
+ if (my ($name, $value) = ($l =~ /([A-Za-z0-9-]+)\s*:\s*(.*)/)) {
+ $name =~ tr/A-Z/a-z/;
+ $h->{$name} = $value;
+ $last = $name;
+ } elsif ($l =~ /^\s+/ && $last) {
+ $h->{$last} .= $l;
+ } else {
+ $last = undef;
+ }
+ }
+ foreach my $n (keys %$h) {
+ $h->{$n} = rfc822_prepare($h->{$n});
+ print STDERR "H $n: $h->{$n}\n" if $debug;
+ }
+ return (keys %$h) ? $h : undef;
+}
+
+sub parse_multipart_form_data() {
+ # First of all, find the boundary string
+ my $ct = rfc822_prepare($ENV{"CONTENT_TYPE"});
+ if (!(($boundary) = ($ct =~ /^.*;boundary=([^; ]+)/))) {
+ die "Multipart content with no boundary string received";
+ }
+ $boundary = rfc822_deescape($boundary);
+ print STDERR "BOUNDARY IS $boundary\n" if $debug;
+
+ # BUG: IE 3.01 on Macintosh forgets to add the "--" at the start of the boundary string
+ # as the MIME specs preach. Workaround borrowed from CGI.pm in Perl distribution.
+ my $agent = http_get("User-agent") || "";
+ $boundary = "--$boundary" unless $agent =~ /MSIE\s+3\.0[12];\s*Mac/;
+ $boundary = "\r\n$boundary";
+ $boundary_len = length($boundary) + 2;
+
+ # Check upload size in advance
+ if (my $size = http_get("Content-Length")) {
+ my $max_allowed = 0;
+ foreach my $a (values %$arg_table) {
+ $max_allowed += $a->{"maxsize"} || 65536;
+ }
+ if ($size > $max_allowed) {
+ die "Maximum form data length exceeded";
+ }
+ }
+
+ # Initialize our buffering mechanism and part splitter
+ $mp_buffer = "\r\n";
+ $mp_buffer_i = 0;
+ $mp_buffer_boundary = -1;
+ $mp_eof = 0;
+
+ # Skip garbage before the 1st part
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ skip_mp_boundary() || return;
+
+ # Process individual parts
+ do { PART: {
+ print STDERR "NEXT PART\n" if $debug;
+ my $h = parse_mp_header();
+ my ($field, $cdisp, $a);
+ if ($h &&
+ ($cdisp = $h->{"content-disposition"}) &&
+ $cdisp =~ /^form-data/ &&
+ (($field) = ($cdisp =~ /;name=([^;]+)/)) &&
+ ($a = $arg_table->{"$field"})) {
+ print STDERR "FIELD $field\n" if $debug;
+ if (defined $h->{"content-transfer-encoding"}) { die "Unexpected Content-Transfer-Encoding"; }
+ if (defined $a->{"var"}) {
+ while (defined (my $l = get_mp_line(1))) {
+ print STDERR "VALUE $l\n" if $debug;
+ parse_arg_string("$field=$l");
+ }
+ next PART;
+ } elsif (defined $a->{"file"}) {
+ require File::Temp;
+ require IO::Handle;
+ my $max_size = $a->{"maxsize"} || 1048576;
+ my @tmpargs = (undef, UNLINK => 1);
+ push @tmpargs, DIR => $a->{"tmpdir"} if defined $a->{"tmpdir"};
+ my ($fh, $fn) = File::Temp::tempfile(@tmpargs);
+ print STDERR "FILE UPLOAD to $fn\n" if $debug;
+ ${$a->{"file"}} = $fn;
+ ${$a->{"fh"}} = $fh if defined $a->{"fh"};
+ my $total_size = 0;
+ while (my $i = refill_mp_data(4096)) {
+ print $fh substr($mp_buffer, $mp_buffer_i, $i);
+ $mp_buffer_i += $i;
+ $total_size += $i;
+ if ($total_size > $max_size) { die "Uploaded file too long"; }
+ }
+ $fh->flush(); # Don't close the handle, the file would disappear otherwise
+ next PART;
+ }
+ }
+ print STDERR "SKIPPING\n" if $debug;
+ while (my $i = refill_mp_data(256)) { $mp_buffer_i += $i; }
+ } } while (skip_mp_boundary());
+}
+
+### Generating Self-ref URL's ###
+
+sub make_out_args($) {
+ my ($overrides) = @_;
+ my $out = {};
+ foreach my $name (keys %$arg_table) {
+ my $arg = $arg_table->{$name};
+ defined($arg->{'var'}) || next;
+ defined($arg->{'pass'}) && !$arg->{'pass'} && !exists $overrides->{$name} && next;
+ my $value;
+ if (!defined($value = $overrides->{$name})) {
+ if (exists $overrides->{$name}) {
+ $value = $arg->{'default'};
+ } else {
+ $value = ${$arg->{'var'}};
+ }
+ }
+ if ($value ne $arg->{'default'}) {
+ $out->{$name} = $value;
+ }
+ }
+ return $out;
+}
+
+sub self_ref(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return "?" . join(':', map { "$_=" . url_param_escape($out->{$_}) } sort keys %$out);
+}
+
+sub self_form(@) {
+ my %h = @_;
+ my $out = make_out_args(\%h);
+ return join('', map { "<input type=hidden name=$_ value='" . html_escape($out->{$_}) . "'>\n" } sort keys %$out);
+}
+
+### Cookies
+
+sub cookie_esc($) {
+ my $x = shift @_;
+ if ($x !~ /^[a-zA-Z0-9%]+$/) {
+ $x =~ s/([\\\"])/\\$1/g;
+ $x = "\"$x\"";
+ }
+ return $x;
+}
+
+sub set_cookie($$@) {
+ my $key = shift @_;
+ my $value = shift @_;
+ my %other = @_;
+ $other{'version'} = 1 unless defined $other{'version'};
+ print "Set-Cookie: $key=", cookie_esc($value);
+ foreach my $k (keys %other) {
+ print ";$k=", cookie_esc($other{$k});
+ }
+ print "\n";
+}
+
+sub parse_cookies() {
+ my $h = http_get("Cookie") or return ();
+ my @cook = ();
+ while (my ($padding,$name,$val,$xx,$rest) = ($h =~ /\s*([,;]\s*)*([^ =]+)=([^ =,;\"]*|\"([^\"\\]|\\.)*\")(\s.*|;.*|$)/)) {
+ if ($val =~ /^\"/) {
+ $val =~ s/^\"//;
+ $val =~ s/\"$//;
+ $val =~ s/\\(.)/$1/g;
+ }
+ push @cook, $name, $val;
+ $h = $rest;
+ }
+ return @cook;
+}
+
+1; # OK
# This software may be freely distributed and used according to the terms
# of the GNU Lesser General Public License.
-package Sherlock::Config;
+package UCW::Config;
use strict;
use warnings;
} elsif (ref $var eq "ARRAY") {
push @$var, $val;
} elsif (ref $var) {
- die ("Sherlock::Config::Parse: don't know how to set $o");
+ die ("UCW::Config::Parse: don't know how to set $o");
}
}
}
Log "done\n";
Log "Generating autoconf.h ... ";
- open X, ">obj/lib/autoconf.h" or Fail $!;
+ open X, ">obj/autoconf.h" or Fail $!;
print X "/* Generated automatically by $0, please don't touch manually. */\n";
foreach my $x (sort keys %vars) {
# Don't export variables which contain no underscores
foreach my $x (sort keys %vars) {
print X "$x=$vars{$x}\n";
}
+ print X "s=\${SRCDIR}\n";
+ print X "o=obj\n";
close X;
Log "done\n";
}
#
#
# Interface:
-# Sherlock::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
+# UCW::Filelock::fcntl_lock($fd, $cmd, $type, $whence, $start, $len)
#
-package Sherlock::Filelock;
+package UCW::Filelock;
use 5.006;
use strict;
our $VERSION = '0.01';
-bootstrap Sherlock::Filelock $VERSION;
+bootstrap UCW::Filelock $VERSION;
# Preloaded methods go here.
#include <fcntl.h>
-MODULE = Sherlock::Filelock PACKAGE = Sherlock::Filelock
+MODULE = UCW::Filelock PACKAGE = UCW::Filelock
PROTOTYPES: ENABLED
# Makefile for the Filelock Perl module (c) 2007 Pavel Chrvat <pchar@ucw.cz>
-DIRS+=lib/perl/Filelock/arch/auto/Sherlock/Filelock
+DIRS+=lib/perl/Filelock/arch/auto/UCW/Filelock
FILELOCK_DIR=lib/perl/Filelock
PROGS+=$(o)/lib/perl/Filelock/Filelock.pm
-extras:: $(o)/lib/perl/Filelock/arch/auto/Sherlock/Filelock/Filelock.$(SOEXT)
+extras:: $(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT)
-$(o)/lib/perl/Filelock/arch/auto/Sherlock/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
- cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile
- cp $@ run/$(DATADIR)/
+$(o)/lib/perl/Filelock/arch/auto/UCW/Filelock/Filelock.$(SOEXT): $(o)/$(FILELOCK_DIR)/Filelock.xs $(o)/$(FILELOCK_DIR)/Filelock.pm $(o)/$(FILELOCK_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
$(o)/$(FILELOCK_DIR)/Makefile: $(o)/$(FILELOCK_DIR)/Makefile.PL
- cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(FILELOCK_DIR) && perl Makefile.PL
$(o)/$(FILELOCK_DIR)/Filelock.xs: $(s)/$(FILELOCK_DIR)/Filelock.xs
- cp $^ $@
+ $(Q)cp $^ $@
$(o)/$(FILELOCK_DIR)/Makefile.PL: $(s)/$(FILELOCK_DIR)/Makefile.PL
- cp $^ $@
+ $(Q)cp $^ $@
use ExtUtils::MakeMaker;
WriteMakefile(
- 'NAME' => 'Sherlock::Filelock',
+ 'NAME' => 'UCW::Filelock',
'VERSION_FROM' => 'Filelock.pm',
'INST_LIB' => 'lib',
'INST_ARCHLIB' => 'arch',
# (c) 2007 Pavel Charvat <pchar@ucw.cz>
#
-package Sherlock::Log;
+package UCW::Log;
use lib 'lib/perl5';
use strict;
# Perl modules
DIRS+=lib/perl
-PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm)
+EXTRA_RUNDIRS+=lib/perl5/UCW
+PROGS+=$(addprefix $(o)/lib/perl/,Config.pm Log.pm CGI.pm)
ifdef CONFIG_UCW_PERL_MODULES
include $(s)/lib/perl/Ulimit/Makefile
# Makefile for the Ulimit Perl module (c) 2003 Tomas Valla <tom@ucw.cz>
-DIRS+=lib/perl/Ulimit/arch/auto/Sherlock/Ulimit
+DIRS+=lib/perl/Ulimit/arch/auto/UCW/Ulimit
ULIMIT_DIR=lib/perl/Ulimit
PROGS+=$(o)/lib/perl/Ulimit/Ulimit.pm
-extras:: $(o)/lib/perl/Ulimit/arch/auto/Sherlock/Ulimit/Ulimit.$(SOEXT)
+extras:: $(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT)
-$(o)/lib/perl/Ulimit/arch/auto/Sherlock/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
- cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile
- cp $@ run/$(DATADIR)/
+$(o)/lib/perl/Ulimit/arch/auto/UCW/Ulimit/Ulimit.$(SOEXT): $(o)/$(ULIMIT_DIR)/Ulimit.xs $(o)/$(ULIMIT_DIR)/Ulimit.pm $(o)/$(ULIMIT_DIR)/Makefile
+ $(M)MAKE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && $(MAKE) -f Makefile $(MAKESILENT)
+ $(Q)touch $@
+ $(Q)cp $@ run/$(DATADIR)/
$(o)/$(ULIMIT_DIR)/Makefile: $(o)/$(ULIMIT_DIR)/Makefile.PL
- cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
+ $(M)PREPARE $@
+ $(Q)cd $(o)/$(ULIMIT_DIR) && perl Makefile.PL
$(o)/$(ULIMIT_DIR)/Ulimit.xs: $(s)/$(ULIMIT_DIR)/Ulimit.xs
- cp $^ $@
+ $(Q)cp $^ $@
$(o)/$(ULIMIT_DIR)/Makefile.PL: $(s)/$(ULIMIT_DIR)/Makefile.PL
- cp $^ $@
+ $(Q)cp $^ $@
use ExtUtils::MakeMaker;
WriteMakefile(
- 'NAME' => 'Sherlock::Ulimit',
+ 'NAME' => 'UCW::Ulimit',
'VERSION_FROM' => 'Ulimit.pm',
'INST_LIB' => 'lib',
'INST_ARCHLIB' => 'arch',
#
#
# Interface:
-# Sherlock::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
-# Sherlock::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
+# UCW::Ulimit::setlimit( $resource, $softlimit, $hardlimit)
+# UCW::Ulimit::getlimit( $resource, $softlimit, $hardlimit)
#
# setlimit sets limit to values supplied in softlimit and hardlimit
# getlimit reads limits into softlimit and hardlimit
# $resource constants are defined below
#
-package Sherlock::Ulimit;
+package UCW::Ulimit;
use 5.006;
use strict;
our $VERSION = '0.01';
-bootstrap Sherlock::Ulimit $VERSION;
+bootstrap UCW::Ulimit $VERSION;
# Preloaded methods go here.
#include <unistd.h>
-MODULE = Sherlock::Ulimit PACKAGE = Sherlock::Ulimit
+MODULE = UCW::Ulimit PACKAGE = UCW::Ulimit
PROTOTYPES: ENABLED
}
void
-setproctitle(char *msg, ...)
+setproctitle(const char *msg, ...)
{
va_list args;
byte buf[256];
static inline void prof_start(prof_t *c) { prof_switch(NULL, c); }
static inline void prof_stop(prof_t *c) { prof_switch(c, NULL); }
#endif
-#define PROF_STR(C) ({ static byte _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
+#define PROF_STR(C) ({ static char _x[PROF_STR_SIZE]; prof_format(_x, &(C)); _x; })
#else
int fd;
byte *mmap_data;
uns file_size;
- byte *file_name;
+ char *file_name;
uns locked;
};
#define first_free_block entry_table[0].first_data_block
#define num_free_blocks entry_table[0].data_len
-static inline byte *
+static inline char *
format_key(qache_key_t *key)
{
- static byte keybuf[2*sizeof(qache_key_t)+1];
+ static char keybuf[2*sizeof(qache_key_t)+1];
for (uns i=0; i<sizeof(qache_key_t); i++)
sprintf(keybuf+2*i, "%02x", (*key)[i]);
return keybuf;
start -= start % CPU_PAGE_SIZE;
len = ALIGN_TO(len, CPU_PAGE_SIZE);
if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
- log(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
+ msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
#endif
}
ET_HASH = 4
};
-static byte *
+static char *
audit_entries(struct qache *q, byte *entrymap)
{
uns i, j;
BT_ALLOC = 2
};
-static byte *
+static char *
audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
{
uns i, j;
return NULL;
}
-static byte *
+static char *
do_audit(struct qache *q)
{
byte *entry_map = xmalloc_zero(q->hdr->max_entries);
return 0;
struct stat st;
- byte *err = "stat failed";
+ char *err = "stat failed";
if (fstat(q->fd, &st) < 0)
goto close_and_fail;
goto unlock_and_fail;
qache_unlock(q, 0);
- log(L_INFO, "Cache %s: using existing data", q->file_name);
+ msg(L_INFO, "Cache %s: using existing data", q->file_name);
return 1;
unlock_and_fail:
qache_unlock(q, 0);
munmap(q->mmap_data, q->file_size);
close_and_fail:
- log(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
+ msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
close(q->fd);
return 0;
}
bwrite(fb, &h, sizeof(h));
/* Entry #0: heads of all lists */
- ASSERT(btell(fb) == h.entry_table_start);
+ ASSERT(btell(fb) == (sh_off_t)h.entry_table_start);
struct qache_entry ent;
bzero(&ent, sizeof(ent));
ent.first_data_block = h.first_data_block;
}
/* The hash table */
- ASSERT(btell(fb) == h.hash_table_start);
+ ASSERT(btell(fb) == (sh_off_t)h.hash_table_start);
for (uns i=0; i<h.hash_size; i++)
bputl(fb, 0);
/* The next pointers */
- ASSERT(btell(fb) == h.next_table_start);
+ ASSERT(btell(fb) == (sh_off_t)h.next_table_start);
for (uns i=0; i<h.num_blocks; i++)
bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
/* Padding */
- ASSERT(btell(fb) <= h.first_data_block << h.block_shift);
- while (btell(fb) < h.first_data_block << h.block_shift)
+ ASSERT(btell(fb) <= (sh_off_t)(h.first_data_block << h.block_shift));
+ while (btell(fb) < (sh_off_t)(h.first_data_block << h.block_shift))
bputc(fb, 0);
/* Data blocks */
for (uns j=0; j<h.block_size; j+=4)
bputl(fb, 0);
- ASSERT(btell(fb) == par->cache_size);
+ ASSERT(btell(fb) == (sh_off_t)par->cache_size);
bclose(fb);
- log(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
+ msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
die("Cache %s: mmap failed (%m)", par->file_name);
munmap(q->mmap_data, q->file_size);
close(q->fd);
if (!retain_data && unlink(q->file_name) < 0)
- log(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
+ msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
xfree(q->file_name);
xfree(q);
}
void
qache_debug(struct qache *q)
{
- log(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
+ msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
q->hdr->max_entries, q->hdr->hash_size);
- log(L_DEBUG, "Table of cache entries:");
- log(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
+ msg(L_DEBUG, "Table of cache entries:");
+ msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
for (uns e=0; e<q->hdr->max_entries; e++)
{
struct qache_entry *ent = &q->entry_table[e];
- log(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
+ msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
ent->first_data_block, ent->hash_next, format_key(&ent->key));
}
- log(L_DEBUG, "Hash table:");
+ msg(L_DEBUG, "Hash table:");
for (uns h=0; h<q->hdr->hash_size; h++)
- log(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
+ msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
- log(L_DEBUG, "Next pointers:");
+ msg(L_DEBUG, "Next pointers:");
for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
- log(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
+ msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
}
void
qache_audit(struct qache *q)
{
- byte *err;
+ char *err;
qache_lock(q);
if (err = do_audit(q))
die("Cache %s: %s", q->file_name, err);
found++;
}
}
- log(L_INFO, "Found %d of %d entries", found, N);
+ msg(L_INFO, "Found %d of %d entries", found, N);
qache_close(q, 1);
return 0;
#define _UCW_QACHE_H
struct qache_params {
- byte *file_name;
+ char *file_name;
uns block_size; /* Cache block size (a power of two) */
uns cache_size; /* Size of the whole cache */
uns max_entries; /* Maximum number of cached entries */
};
regex *
-rx_compile(byte *p, int icase)
+rx_compile(const char *p, int icase)
{
regex *r = xmalloc_zero(sizeof(regex));
int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
if (err)
{
- byte msg[256];
+ char msg[256];
regerror(err, &r->rx, msg, sizeof(msg)-1);
/* regfree(&r->rx) not needed */
die("Error parsing regular expression `%s': %s", p, msg);
}
int
-rx_match(regex *r, byte *s)
+rx_match(regex *r, const char *s)
{
int err = regexec(&r->rx, s, 10, r->matches, 0);
if (!err)
}
int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
{
- byte *end = dest + destlen - 1;
+ char *end = dest + destlen - 1;
if (!rx_match(r, src))
return 0;
uns j = *by++ - '0';
if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
{
- byte *s = src + r->matches[j].rm_so;
+ const char *s = src + r->matches[j].rm_so;
uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
if (dest + i >= end)
return -1;
};
regex *
-rx_compile(byte *p, int icase)
+rx_compile(const char *p, int icase)
{
const char *err;
int errpos, match_array_size, eno;
}
int
-rx_match(regex *r, byte *s)
+rx_match(regex *r, const char *s)
{
int len = str_len(s);
int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
}
int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
{
- byte *end = dest + destlen - 1;
+ char *end = dest + destlen - 1;
if (!rx_match(r, src))
return 0;
uns j = *by++ - '0';
if (j < r->real_matches && r->matches[2*j] >= 0)
{
- byte *s = src + r->matches[2*j];
+ char *s = src + r->matches[2*j];
uns i = r->matches[2*j+1] - r->matches[2*j];
if (dest + i >= end)
return -1;
};
regex *
-rx_compile(byte *p, int icase)
+rx_compile(const char *p, int icase)
{
regex *r = xmalloc_zero(sizeof(regex));
const char *msg;
}
int
-rx_match(regex *r, byte *s)
+rx_match(regex *r, const char *s)
{
int len = strlen(s);
}
int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
{
- byte *end = dest + destlen - 1;
+ char *end = dest + destlen - 1;
if (!rx_match(r, src))
return 0;
uns j = *by++ - '0';
if (j < r->regs.num_regs)
{
- byte *s = src + r->regs.start[j];
+ const char *s = src + r->regs.start[j];
uns i = r->regs.end[j] - r->regs.start[j];
if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
return -1;
int main(int argc, char **argv)
{
regex *r;
- byte buf1[4096], buf2[4096];
+ char buf1[4096], buf2[4096];
int opt_i = 0;
if (!strcmp(argv[1], "-i"))
# Tests for the regex module
-Run: obj/lib/regex-t 'a.*b.*c'
+Run: ../obj/lib/regex-t 'a.*b.*c'
In: abc
ajkhkbbbbbc
Aabc
MATCH
NO MATCH
-Run: obj/lib/regex-t -i 'a.*b.*c'
+Run: ../obj/lib/regex-t -i 'a.*b.*c'
In: aBc
ajkhkbBBBBC
Aabc
MATCH
MATCH
-Run: obj/lib/regex-t -i '(ahoj|nebo)'
+Run: ../obj/lib/regex-t -i '(ahoj|nebo)'
In: Ahoj
nEBo
ahoja
NO MATCH
NO MATCH
-Run: obj/lib/regex-t '\(ahoj\)'
+Run: ../obj/lib/regex-t '\(ahoj\)'
In: (ahoj)
ahoj
Out: MATCH
NO MATCH
-Run: obj/lib/regex-t '(.*b)*'
+Run: ../obj/lib/regex-t '(.*b)*'
In: ababababab
ababababababababababababababababababababababababababababa
Out: MATCH
NO MATCH
-Run: obj/lib/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
+Run: ../obj/lib/regex-t '(.*)((aabb)|cc)(b.*)' '\1<\3>\4'
In: aaabbb
aabbccb
abcabc
NO MATCH
aa<>bb
-Run: obj/lib/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
+Run: ../obj/lib/regex-t '.*\?(.*&)*([a-z_]*sess[a-z_]*|random|sid|S_ID|rnd|timestamp|referer)=.*'
In: /nemecky/ubytovani/hotel.php?sort=&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3&cislo=26&mena=EUR&typ=Hotel&luz1=ANO&luz2=ANO&luz3=&luz4=&luz5=&maxp1=99999&maxp2=99999&maxp3=99999&maxp4=99999&maxp5=99999&apart=&rada=8,9,10,11,19,22,26,27,28,29,3
/test...?f=1&s=3&sid=123&q=3&
Out: NO MATCH
MATCH
-Run: obj/lib/regex-t '.*[0-9a-f]{8,16}.*'
+Run: ../obj/lib/regex-t '.*[0-9a-f]{8,16}.*'
In: abcdabcdabcd
aaaaaaaaaaaaaaaaaaaaaaaaaaaa
asddajlkdkajlqwepoiequwiouio
#include <sys/wait.h>
void NONRET
-exec_command_v(byte *cmd, va_list args)
+exec_command_v(const char *cmd, va_list args)
{
va_list cargs;
va_copy(cargs, args);
int cnt = 2;
- byte *arg;
- while (arg = va_arg(cargs, byte *))
+ char *arg;
+ while (arg = va_arg(cargs, char *))
cnt++;
va_end(cargs);
- char **argv = alloca(sizeof(byte *) * cnt);
- argv[0] = cmd;
+ char **argv = alloca(sizeof(char *) * cnt);
+ argv[0] = (char *)cmd;
cnt = 1;
va_copy(cargs, args);
- while (arg = va_arg(cargs, byte *))
+ while (arg = va_arg(cargs, char *))
argv[cnt++] = arg;
va_end(cargs);
argv[cnt] = NULL;
execv(cmd, argv);
- byte echo[256];
+ char echo[256];
echo_command_v(echo, sizeof(echo), cmd, args);
- log(L_ERROR, "Cannot execute %s: %m", echo);
+ msg(L_ERROR, "Cannot execute %s: %m", echo);
exit(255);
}
int
-run_command_v(byte *cmd, va_list args)
+run_command_v(const char *cmd, va_list args)
{
pid_t p = fork();
if (p < 0)
{
- log(L_ERROR, "fork() failed: %m");
+ msg(L_ERROR, "fork() failed: %m");
return 0;
}
else if (!p)
else
{
int stat;
- byte msg[EXIT_STATUS_MSG_SIZE];
+ char status_msg[EXIT_STATUS_MSG_SIZE];
p = waitpid(p, &stat, 0);
if (p < 0)
die("waitpid() failed: %m");
- if (format_exit_status(msg, stat))
+ if (format_exit_status(status_msg, stat))
{
- byte echo[256];
+ char echo[256];
echo_command_v(echo, sizeof(echo), cmd, args);
- log(L_ERROR, "`%s' failed: %s", echo, msg);
+ msg(L_ERROR, "`%s' failed: %s", echo, status_msg);
return 0;
}
return 1;
}
void
-echo_command_v(byte *buf, int size, byte *cmd, va_list args)
+echo_command_v(char *buf, int size, const char *cmd, va_list args)
{
- byte *limit = buf + size - 4;
- byte *p = buf;
- byte *arg = cmd;
+ char *limit = buf + size - 4;
+ char *p = buf;
+ const char *arg = cmd;
do
{
int l = strlen(arg);
memcpy(p, arg, l);
p += l;
}
- while (arg = va_arg(args, byte *));
+ while (arg = va_arg(args, char *));
*p = 0;
}
int
-run_command(byte *cmd, ...)
+run_command(const char *cmd, ...)
{
va_list args;
va_start(args, cmd);
}
void NONRET
-exec_command(byte *cmd, ...)
+exec_command(const char *cmd, ...)
{
va_list args;
va_start(args, cmd);
}
void
-echo_command(byte *buf, int len, byte *cmd, ...)
+echo_command(char *buf, int len, const char *cmd, ...)
{
va_list args;
va_start(args, cmd);
int main(void)
{
- byte msg[1024];
+ char msg[1024];
echo_command(msg, sizeof(msg), "/bin/echo", "datel", "strakapoud", NULL);
log(L_INFO, "Running <%s>", msg);
run_command("/bin/echo", "datel", "strakapoud", NULL);
sem_alloc(void)
{
static uns cnt = 0;
- byte buf[20];
+ char buf[20];
sprintf(buf, "tmp/sem-%d-%d", getpid(), cnt++);
sem_t *sem = sem_open(buf, O_CREAT, 0777, 0);
ASSERT(sem != (sem_t*) SEM_FAILED);
PROGS+=$(o)/lib/shell/config $(o)/lib/shell/logger
DATAFILES+=$(o)/lib/shell/libucw.sh
-$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBSH)
-$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBSH)
+$(o)/lib/shell/config: $(o)/lib/shell/config.o $(LIBUCW)
+$(o)/lib/shell/logger: $(o)/lib/shell/logger.o $(LIBUCW)
TESTS+=$(addprefix $(o)/lib/shell/,config.test)
Usage: config [-C<configfile>] [-S<section>.<option>=<value>] <sections>\n\
\n\
<sections>\t<section>[;<sections>]\n\
-<section>\t[*]<name>{[<items>]}\n\
+<section>\t[!]<name>{[<items>]}\n\
<items>\t\t[-]<item>[;<items>]\n\
<item>\t\t<static> | <array> | <list>\n\
<static>\t<type><name>[=<value>]\n\
# Tests for configuration parser
-Run: obj/lib/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{int1=23; long1=1234567812345678; long2=4321; str1="s1"; str2="s2"}' 'sec1 {#int1; ##long1; -str1; str2; #int2=123; ##long2=1234; #int3=0x10; #int4; $dbl1=001.100; $dbl2}; sec2{str3}'
Out: CF_sec1_int1='23'
CF_sec1_long1='1234567812345678'
CF_sec1_str2='s2'
CF_sec1_dbl2='0'
CF_sec2_str3=''
-Run: obj/lib/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 1 a1 b1; list1:clear; list1 2 a2 b2 3 a3 b3}' 'sec1 {@list1 {#int1; str1; -str2}}'
Out: CF_sec1_list1_int1[1]='2'
CF_sec1_list1_str1[1]='a2'
CF_sec1_list1_int1[2]='3'
CF_sec1_list1_str1[2]='a3'
-Run: obj/lib/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{ar1 a b c d; ar1 a b c; ar2 1 2; ar3 1.1}' 'sec1 {ar1[]; #ar2[2]; $ar3[-2]}'
Out: CF_sec1_ar1[1]='a'
CF_sec1_ar1[2]='b'
CF_sec1_ar1[3]='c'
CF_sec1_ar2[2]='2'
CF_sec1_ar3[1]='1.1'
-Run: obj/lib/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
+Run: ../obj/lib/shell/config -C/dev/null -S 'sec1{list1 {str1=1; list2=a b c}; list1 {str1=2; list2=d e}}' 'sec1 {@list1 {str1; @list2{str2}}}'
Out: CF_sec1_list1_str1[1]='1'
CF_sec1_list1_list2_str2[1]='a'
CF_sec1_list1_list2_str2[2]='b'
CF_sec1_list1_list2_str2[4]='d'
CF_sec1_list1_list2_str2[5]='e'
-Run: obj/lib/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
+Run: ../obj/lib/shell/config -C/dev/null 'sec{str=a'\''b"c'\''d"\\e'\''f"g}'
Out: CF_sec_str='ab"cd\e'\''fg'
else
log_init(argv[1]);
if (argc > 3)
- log(argv[2][0], argv[3]);
+ msg(argv[2][0], argv[3]);
else
while (fgets(buf, sizeof(buf), stdin))
{
c = strchr(buf, '\n');
if (c)
*c = 0;
- log(argv[2][0], buf);
+ msg(argv[2][0], buf);
}
return 0;
}
typedef struct simp_node {
cnode n;
union {
- byte *s;
+ char *s;
void *p;
int i;
uns u;
typedef struct simp2_node {
cnode n;
union {
- byte *s1;
+ char *s1;
void *p1;
int i1;
uns u1;
};
union {
- byte *s2;
+ char *s2;
void *p2;
int i2;
uns u2;
# Test for slists module
-Run: obj/lib/slists-t
+Run: ../obj/lib/slists-t
Out: 9/7/3/1/2/4/8/10/
+++ /dev/null
-/* Test for sorting routines */
-
-#include "lib/lib.h"
-#include "lib/getopt.h"
-#include "lib/fastbuf.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-struct key {
- char line[4096];
-};
-
-#define SORT_KEY struct key
-#define SORT_PREFIX(x) s_##x
-#define SORT_PRESORT
-#define SORT_INPUT_FILE
-#define SORT_OUTPUT_FILE
-#define SORT_UNIFY
-
-static inline int
-s_compare(struct key *a, struct key *b)
-{
- return strcmp(a->line, b->line);
-}
-
-static inline int
-s_fetch_key(struct fastbuf *f, struct key *a)
-{
- return !!bgets(f, a->line, sizeof(a->line));
-}
-
-static inline void
-s_copy_data(struct fastbuf *src UNUSED, struct fastbuf *dest, struct key *k)
-{
- bputsn(dest, k->line);
-}
-
-static inline byte *
-s_fetch_item(struct fastbuf *src UNUSED, struct key *k, byte *limit UNUSED)
-{
- byte *end = (byte *) k->line + strlen(k->line) + 1;
-#if 0 /* Testing splits */
- uns r = random_max(10000);
- if (end + r <= limit)
- return end + r;
- else
- return NULL;
-#else
- return end;
-#endif
-}
-
-static inline void
-s_store_item(struct fastbuf *f, struct key *k)
-{
- s_copy_data(NULL, f, k);
-}
-
-#ifdef SORT_UNIFY
-static inline void
-s_merge_data(struct fastbuf *src1 UNUSED, struct fastbuf *src2 UNUSED, struct fastbuf *dest, struct key *k1, struct key *k2 UNUSED)
-{
- s_copy_data(NULL, dest, k1);
-}
-
-static inline struct key *
-s_merge_items(struct key *a, struct key *b UNUSED)
-{
- return a;
-}
-#endif
-
-#include "lib/sorter.h"
-
-int
-main(int argc, char **argv)
-{
- log_init(NULL);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
- optind != argc - 2)
- {
- fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
- exit(1);
- }
-
- s_sort(argv[optind], argv[optind+1]);
- return 0;
-}
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter: Global Variables
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#ifndef _UCW_SORTER_GLOBALS_H
-#define _UCW_SORTER_GLOBALS_H
-
-extern uns sorter_trace;
-extern uns sorter_presort_bufsize;
-extern uns sorter_stream_bufsize;
-
-extern uns sorter_pass_counter;
-
-#endif
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter
- *
- * (c) 2001--2002 Martin Mares <mj@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-#include "lib/lib.h"
-#include "lib/conf.h"
-#include "lib/fastbuf.h"
-#include "lib/sorter-globals.h"
-
-#include <unistd.h>
-#include <sys/fcntl.h>
-
-uns sorter_trace;
-uns sorter_presort_bufsize = 65536;
-uns sorter_stream_bufsize = 65536;
-
-static struct cf_section sorter_config = {
- CF_ITEMS {
- CF_UNS("Trace", &sorter_trace),
- CF_UNS("PresortBuffer", &sorter_presort_bufsize),
- CF_UNS("StreamBuffer", &sorter_stream_bufsize),
- CF_END
- }
-};
-
-static void CONSTRUCTOR sorter_init_config(void)
-{
- cf_declare_section("Sorter", &sorter_config, 0);
-}
-
-uns sorter_pass_counter;
+++ /dev/null
-/*
- * UCW Library -- Universal Sorter
- *
- * (c) 2001--2004 Martin Mares <mj@ucw.cz>
- * (c) 2004 Robert Spalek <robert@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- */
-
-/*
- * This is not a normal header file, it's a generator of sorting
- * routines. Each time you include it with parameters set in the
- * corresponding preprocessor macros, it generates a file sorter
- * with the parameters given.
- *
- * Recognized parameter macros: (those marked with [*] are mandatory)
- *
- * SORT_KEY [*] data type capable of storing a single key
- * SORT_PREFIX(x) [*] add a name prefix (used on all global names
- * defined by the sorter)
- * SORT_PRESORT include an in-core pre-sorting pass. Beware, when in
- * the pre-sorting mode, it's quite possible that the
- * comparison function will be called with both arguments
- * identical.
- * SORT_UP_TO a C expression, if defined, sorting is stopped after the
- * average run length in the file exceeds the value of this
- * expression (in bytes)
- * SORT_UNIFY merge items with identical keys
- * SORT_UNIQUE all items have distinct keys (checked in debug mode)
- * SORT_REGULAR all items are equally long and they don't contain
- * anything else than the key. In this case, the sorter
- * automatically supplies fetch_key, copy_data, fetch_item
- * and store_item functions. Item size is also expected
- * to be small.
- * SORT_DELETE_INPUT a C expression, if true, the input files are
- * deleted as soon as possible
- * SORT_INPUT_FILE input is a file with this name
- * SORT_INPUT_FB input is a fastbuf stream
- * (can be safely NULL if you want to treat original
- * input in a different way by file read functions)
- * SORT_INPUT_FBPAIR input is a pair of fastbuf streams
- * (not supported by the presorter)
- * SORT_OUTPUT_FILE output is a file with this name
- * SORT_OUTPUT_FB output is a temporary fastbuf stream
- *
- * You also need to define some (usually inline) functions which
- * are called by the sorter to process your data:
- *
- * int PREFIX_compare(SORT_KEY *a, *b)
- * compare two keys, result like strcmp
- * int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k)
- * fetch next key, returns nonzero=ok, 0=eof
- * void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k)
- * write just fetched key k to dest and copy all data
- * belonging to this key from src to dest.
- * void PREFIX_merge_data(struct fastbuf *src1, *src2, *dest, SORT_KEY *k1, *k2)
- * [used only in case SORT_UNIFY is defined]
- * write just fetched key k to dest and merge data from
- * two records with the same key (k1 and k2 are key occurences
- * in the corresponding streams).
- * byte * PREFIX_fetch_item(struct fastbuf *f, SORT_KEY *k, byte *limit)
- * [used only with SORT_PRESORT]
- * fetch data belonging to a just fetched key and store
- * them to memory following the key, but not over limit.
- * Returns a pointer to first byte after the data
- * or NULL if the data don't fit. For variable-length
- * keys, it can use the rest of SORT_KEY and even return
- * pointer before end of the key.
- * Important: before PREFIX_fetch_item() succeeds, the key
- * must be position independent, the sorter can copy it.
- * void PREFIX_store_item(struct fastbuf *f, SORT_KEY *k)
- * [used only with SORT_PRESORT]
- * write key and all its data read with PREFIX_fetch_data
- * to the stream given.
- * SORT_KEY * PREFIX_merge_items(SORT_KEY *a, SORT_KEY *b)
- * [used only with SORT_PRESORT && SORT_UNIFY]
- * merge two items with the same key, returns pointer
- * to at most one of the items, the rest will be removed
- * from the list of items, but not deallocated, so
- * the remaining item can freely reference data of the
- * other one.
- *
- * After including this file, all parameter macros are automatically
- * undef'd.
- */
-
-#include "lib/sorter-globals.h"
-#include "lib/fastbuf.h"
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
-
-#if !defined(SORT_KEY) || !defined(SORT_PREFIX)
-#error Some of the mandatory configuration macros are missing.
-#endif
-
-#define P(x) SORT_PREFIX(x)
-#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
-
-#if defined(SORT_UNIFY) || defined(SORT_UNIQUE)
-#define LESS <
-#else
-#define LESS <=
-#endif
-
-#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
-#define SORT_ASSERT_UNIQUE
-#endif
-
-#ifdef SORT_REGULAR
-
-static inline int
-P(fetch_key)(struct fastbuf *in, SORT_KEY *x)
-{
- return breadb(in, x, sizeof(*x));
-}
-
-static inline void
-P(copy_data)(struct fastbuf *in UNUSED, struct fastbuf *out, SORT_KEY *x)
-{
- bwrite(out, x, sizeof(*x));
-}
-
-static inline byte *
-P(fetch_item)(struct fastbuf *in UNUSED, SORT_KEY *x UNUSED, byte *limit UNUSED)
-{
- return (byte *)(x+1);
-}
-
-static inline void
-P(store_item)(struct fastbuf *out, SORT_KEY *x)
-{
- bwrite(out, x, sizeof(*x));
-}
-
-#endif
-
-static struct fastbuf *
-P(flush_out)(struct fastbuf *out)
-{
- if (out)
- brewind(out);
- return out;
-}
-
-static uns
-P(pass)(struct fastbuf **fb1, struct fastbuf **fb2
-#ifdef SORT_UP_TO
- , uns stop_sorting
-#endif
-)
-{
- struct fastbuf *in1 = *fb1;
- struct fastbuf *in2 = *fb2;
- struct fastbuf *out1 = NULL;
- struct fastbuf *out2 = NULL;
- SORT_KEY kbuf1, kbuf2, kbuf3, kbuf4;
- SORT_KEY *kin1 = &kbuf1;
- SORT_KEY *kprev1 = &kbuf2;
- SORT_KEY *kin2 = &kbuf3;
- SORT_KEY *kprev2 = &kbuf4;
- SORT_KEY *kout = NULL;
- SORT_KEY *ktmp;
- int next1, next2, comp;
- int run1, run2;
- uns run_count = 0;
-
- run1 = next1 = in1 ? P(fetch_key)(in1, kin1) : 0;
- run2 = next2 = in2 ? P(fetch_key)(in2, kin2) : 0;
- while (next1 || next2)
- {
- if (!run1)
- comp = 1;
- else if (!run2)
- comp = -1;
- else
- comp = P(compare)(kin1, kin2);
- ktmp = (comp <= 0) ? kin1 : kin2;
- if (!kout || !(P(compare)(kout, ktmp) LESS 0))
- {
- struct fastbuf *t;
-#ifdef SORT_UP_TO
- if (!stop_sorting)
-#endif
- SWAP(out1, out2, t);
- if (!out1)
- out1 = bopen_tmp(sorter_stream_bufsize);
- run_count++;
- }
- if (comp LESS 0)
- {
- P(copy_data)(in1, out1, kin1);
- SWAP(kin1, kprev1, ktmp);
- next1 = P(fetch_key)(in1, kin1);
- run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
- kout = kprev1;
- }
-#ifdef SORT_UNIFY
- else if (comp == 0)
- {
- P(merge_data)(in1, in2, out1, kin1, kin2);
- SWAP(kin1, kprev1, ktmp);
- next1 = P(fetch_key)(in1, kin1);
- run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
- SWAP(kin2, kprev2, ktmp);
- next2 = P(fetch_key)(in2, kin2);
- run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
- kout = kprev2;
- }
-#endif
-#ifdef SORT_ASSERT_UNIQUE
- else if (unlikely(comp == 0))
- ASSERT(0);
-#endif
- else
- {
- P(copy_data)(in2, out1, kin2);
- SWAP(kin2, kprev2, ktmp);
- next2 = P(fetch_key)(in2, kin2);
- run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
- kout = kprev2;
- }
- if (!run1 && !run2)
- {
- run1 = next1;
- run2 = next2;
- }
- }
- bclose(in1);
- bclose(in2);
- if (sorter_trace)
- log(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
- (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
- (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
- *fb1 = P(flush_out)(out1);
- *fb2 = P(flush_out)(out2);
- sorter_pass_counter++;
- return run_count;
-}
-
-#ifdef SORT_PRESORT
-
-#if defined(SORT_REGULAR) && !defined(SORT_UNIFY)
-
-/* If we are doing a simple sort on a regular file, we can use a faster presorting strategy */
-
-static SORT_KEY *P(array);
-
-#define ASORT_PREFIX(x) SORT_PREFIX(x##_array)
-#define ASORT_KEY_TYPE SORT_KEY
-#define ASORT_ELT(i) P(array)[i]
-#define ASORT_LT(x,y) (P(compare)(&(x),&(y)) < 0)
-
-#include "lib/arraysort.h"
-
-static void
-P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
-{
- struct fastbuf *in = *fb1;
- struct fastbuf *out1 = NULL;
- struct fastbuf *out2 = NULL;
- struct fastbuf *tbuf;
- uns buf_items = sorter_presort_bufsize / sizeof(SORT_KEY);
- uns run_count = 0;
- SORT_KEY last_out = { }, *array;
-
- ASSERT(!*fb2);
- if (buf_items < 2)
- die("PresortBuffer set too low");
- P(array) = array = xmalloc(buf_items * sizeof(SORT_KEY));
-
- for(;;)
- {
- uns s = bread(in, array, buf_items * sizeof(SORT_KEY));
- uns n = s / sizeof(SORT_KEY);
- ASSERT(!(s % sizeof(SORT_KEY)));
- if (!n)
- break;
- P(sort_array)(n);
-#ifdef SORT_ASSERT_UNIQUE
- for (uns i=0; i<n-1; i++)
- if (unlikely(P(compare)(&array[i], &array[i+1]) >= 0))
- ASSERT(0);
- ASSERT(!run_count || P(compare)(&last_out, &array[0]));
-#endif
- if (!run_count || P(compare)(&last_out, &array[0]) > 0)
- {
- run_count++;
-#ifdef SORT_UP_TO
- if (sorter_presort_bufsize < (uns) SORT_UP_TO)
-#endif
- SWAP(out1, out2, tbuf);
- if (!out1)
- out1 = bopen_tmp(sorter_stream_bufsize);
- }
- last_out = array[n-1];
- bwrite(out1, array, n * sizeof(SORT_KEY));
- }
-
- bclose(in);
- if (sorter_trace)
- log(L_INFO, "Pass 0: %d runs, %d+%d KB",
- run_count,
- (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
- (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
- *fb1 = P(flush_out)(out1);
- *fb2 = P(flush_out)(out2);
- xfree(array);
-}
-
-#else
-
-#define SORT_NODE struct P(presort_node)
-
-SORT_NODE {
- SORT_NODE *next;
- SORT_KEY key;
-};
-
-static SORT_NODE *
-P(mergesort)(SORT_NODE *x)
-{
- SORT_NODE *f1, **l1, *f2, **l2, **l;
-
- l1 = &f1;
- l2 = &f2;
- while (x)
- {
- *l1 = x;
- l1 = &x->next;
- x = x->next;
- if (!x)
- break;
- *l2 = x;
- l2 = &x->next;
- x = x->next;
- }
- *l1 = *l2 = NULL;
-
- if (f1 && f1->next)
- f1 = P(mergesort)(f1);
- if (f2 && f2->next)
- f2 = P(mergesort)(f2);
- l = &x;
- while (f1 && f2)
- {
- if (P(compare)(&f1->key, &f2->key) <= 0)
- {
- *l = f1;
- l = &f1->next;
- f1 = f1->next;
- }
- else
- {
- *l = f2;
- l = &f2->next;
- f2 = f2->next;
- }
- }
- *l = f1 ? : f2;
- return x;
-}
-
-static void
-P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
-{
- struct fastbuf *in = *fb1;
- struct fastbuf *out1 = NULL;
- struct fastbuf *out2 = NULL;
- struct fastbuf *tbuf;
- byte *buffer, *bufend, *current;
- SORT_NODE *first, **last, *this, *leftover;
- int cont = 1;
- uns run_count = 0;
- uns giant_count = 0;
- uns split_count = 0;
-
- ASSERT(!*fb2);
- if (sorter_presort_bufsize < 2*sizeof(SORT_NODE))
- die("PresortBuffer set too low");
- buffer = xmalloc(sorter_presort_bufsize);
- bufend = buffer + sorter_presort_bufsize;
- leftover = NULL;
- while (cont)
- {
-#ifdef SORT_UP_TO
- if (sorter_presort_bufsize < SORT_UP_TO)
-#endif
- SWAP(out1, out2, tbuf);
- if (!out1)
- out1 = bopen_tmp(sorter_stream_bufsize);
- current = buffer;
- last = &first;
- if (leftover)
- {
- memmove(buffer, leftover, sizeof(SORT_NODE));
- this = leftover = (SORT_NODE *) buffer;
- split_count++;
- goto get_data;
- }
- for(;;)
- {
- current = (byte *) ALIGN_TO((uintptr_t) current, CPU_STRUCT_ALIGN);
- if (current + sizeof(*this) > bufend)
- break;
- this = (SORT_NODE *) current;
- cont = P(fetch_key)(in, &this->key);
- if (!cont)
- break;
- get_data:
- current = P(fetch_item)(in, &this->key, bufend);
- if (!current)
- {
- if (leftover) /* Single node too large */
- {
- P(copy_data)(in, out1, &leftover->key);
- leftover = NULL;
- run_count++;
- giant_count++;
- }
- else /* Node will be left over to the next phase */
- leftover = this;
- break;
- }
- *last = this;
- last = &this->next;
- leftover = NULL;
- }
- *last = NULL;
- if (!first)
- continue;
-
- first = P(mergesort)(first);
- run_count++;
- while (first)
- {
-#ifdef SORT_UNIFY
- SORT_NODE *second = first->next;
- if (second && !P(compare)(&first->key, &second->key))
- {
- SORT_KEY *n = P(merge_items)(&first->key, &second->key);
- if (n == &first->key)
- first->next = second->next;
- else if (n)
- first = first->next;
- else
- first = second->next;
- continue;
- }
-#endif
-#ifdef SORT_ASSERT_UNIQUE
- ASSERT(!first->next || P(compare)(&first->key, &first->next->key));
-#endif
- P(store_item)(out1, &first->key);
- first = first->next;
- }
- }
-
- bclose(in);
- if (sorter_trace)
- log(L_INFO, "Pass 0: %d runs (%d giants, %d splits), %d+%d KB",
- run_count, giant_count, split_count,
- (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
- (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
- *fb1 = P(flush_out)(out1);
- *fb2 = P(flush_out)(out2);
- xfree(buffer);
-}
-
-#endif /* SORT_REGULAR && !SORT_UNIFY */
-
-#endif /* SORT_PRESORT */
-
-static
-#ifdef SORT_OUTPUT_FB
-struct fastbuf *
-#elif defined(SORT_OUTPUT_FILE)
-void
-#else
-#error No output defined.
-#endif
-P(sort)(
-#ifdef SORT_INPUT_FILE
-byte *inname
-#elif defined(SORT_INPUT_FB)
-struct fastbuf *fb1
-#elif defined(SORT_INPUT_FBPAIR)
-struct fastbuf *fb1, struct fastbuf *fb2
-#else
-#error No input defined.
-#endif
-#ifdef SORT_OUTPUT_FILE
-,byte *outname
-#endif
-)
-{
-#ifdef SORT_INPUT_FILE
- struct fastbuf *fb1, *fb2;
- fb1 = bopen(inname, O_RDONLY, sorter_stream_bufsize);
- fb2 = NULL;
-#elif defined(SORT_INPUT_FB)
- struct fastbuf *fb2 = NULL;
-#endif
-
-#ifdef SORT_DELETE_INPUT
- bconfig(fb1, BCONFIG_IS_TEMP_FILE, SORT_DELETE_INPUT);
-#endif
- sorter_pass_counter = 1;
-#ifdef SORT_PRESORT
- P(presort)(&fb1, &fb2);
- if (fb2)
-#endif
-#ifndef SORT_UP_TO
- do P(pass)(&fb1, &fb2); while (fb1 && fb2);
-#else
- {
- sh_off_t run_count, max_run_count = 0;
- if (fb1)
- max_run_count += bfilesize(fb1);
- if (fb2)
- max_run_count += bfilesize(fb2);
-#ifdef SORT_PRESORT
- run_count = max_run_count / sorter_presort_bufsize;
-#else
- run_count = max_run_count;
-#endif
- if (SORT_UP_TO)
- max_run_count /= SORT_UP_TO;
- do
- run_count = P(pass)(&fb1, &fb2, (run_count+1)/2 <= max_run_count);
- while (fb1 && fb2);
- }
-#endif
- if (!fb1)
- fb1 = bopen_tmp(sorter_stream_bufsize);
-
-#ifdef SORT_OUTPUT_FB
- return fb1;
-#else
- bconfig(fb1, BCONFIG_IS_TEMP_FILE, 0);
- if (rename(fb1->name, outname) < 0)
- die("rename(%s,%s): %m", fb1->name, outname);
- bclose(fb1);
-#endif
-}
-
-#undef P
-#undef LESS
-#undef SWAP
-#undef SORT_NODE
-#undef SORT_KEY
-#undef SORT_PREFIX
-#undef SORT_UNIFY
-#undef SORT_UNIQUE
-#undef SORT_ASSERT_UNIQUE
-#undef SORT_REGULAR
-#undef SORT_DELETE_INPUT
-#undef SORT_INPUT_FILE
-#undef SORT_INPUT_FB
-#undef SORT_INPUT_FBPAIR
-#undef SORT_OUTPUT_FILE
-#undef SORT_OUTPUT_FB
-#undef SORT_PRESORT
-#undef SORT_UP_TO
--- /dev/null
+# Makefile for the UCW Sorter (c) 2007 Martin Mares <mj@ucw.cz>
+
+DIRS+=lib/sorter
+
+LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array)
+LIBUCW_INCLUDES+=$(addprefix sorter/, array.h common.h s-fixint.h \
+ s-internal.h s-multiway.h s-radix.h s-twoway.h sorter.h)
+
+ifdef CONFIG_DEBUG_TOOLS
+PROGS+=$(o)/lib/sorter/sort-test
+endif
+
+$(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW)
--- /dev/null
+Cleanups:
+o Log messages should show both original and new size of the data. The speed
+ should be probably calculated from the former.
+o Buffer sizing in shep-export.
+
+Improvements:
+o When quicksorting a large input (especially in threaded case), invest more
+ time to picking a good pivot.
+o Overlay presorter I/O with internal sorting.
+
+Users of lib/sorter/array.h which might use radix-sorting:
+indexer/chewer.c
+indexer/lexfreq.c
+indexer/mkgraph.c
+indexer/reftexts.c
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <alloca.h>
+
+#define ASORT_MIN_SHIFT 2
+
+#define ASORT_TRACE(x...) ASORT_XTRACE(1, x)
+#define ASORT_XTRACE(level, x...) do { if (sorter_trace_array >= level) msg(L_DEBUG, x); } while(0)
+
+static void
+asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ // swap_output == 0 if result should be returned in `array', otherwise in `buffer'
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+
+#if 0
+ static int reported[64];
+ if (!reported[hash_bits]++)
+#endif
+ DBG(">>> n=%u h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output);
+
+ bzero(cnt, sizeof(cnt));
+ ctx->radix_count(array, num_elts, cnt, shift);
+
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ ctx->radix_split(array, buffer, num_elts, cnt, shift);
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (n < ctx->radix_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ ctx->quicksort(buffer, n);
+ if (!swapped_output)
+ memcpy(array, buffer, n * ctx->elt_size);
+ }
+ else
+ asort_radix(ctx, buffer, array, n, shift, !swapped_output);
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ pos = cnt[i];
+ }
+}
+
+#ifdef CONFIG_UCW_THREADS
+
+#include "lib/threads.h"
+#include "lib/workqueue.h"
+#include "lib/eltpool.h"
+
+static uns asort_threads_use_count;
+static uns asort_threads_ready;
+static struct worker_pool asort_thread_pool;
+
+static uns
+rs_estimate_stack(void)
+{
+ // Stack space needed by the recursive radix-sorter
+ uns ctrsize = sizeof(uns) * (1 << CONFIG_UCW_RADIX_SORTER_BITS);
+ uns maxdepth = (64 / CONFIG_UCW_RADIX_SORTER_BITS) + 1;
+ return ctrsize * maxdepth;
+}
+
+void
+asort_start_threads(uns run)
+{
+ ucwlib_lock();
+ asort_threads_use_count++;
+ if (run && !asort_threads_ready)
+ {
+ // XXX: If somebody overrides the radix-sorter parameters to insane values,
+ // he also should override the stack size to insane values.
+ asort_thread_pool.stack_size = default_thread_stack_size + rs_estimate_stack();
+ asort_thread_pool.num_threads = sorter_threads;
+ ASORT_TRACE("Initializing thread pool (%d threads, %dK stack)", sorter_threads, asort_thread_pool.stack_size >> 10);
+ worker_pool_init(&asort_thread_pool);
+ asort_threads_ready = 1;
+ }
+ ucwlib_unlock();
+}
+
+void
+asort_stop_threads(void)
+{
+ ucwlib_lock();
+ if (!--asort_threads_use_count && asort_threads_ready)
+ {
+ ASORT_TRACE("Shutting down thread pool");
+ worker_pool_cleanup(&asort_thread_pool);
+ asort_threads_ready = 0;
+ }
+ ucwlib_unlock();
+}
+
+struct qs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array;
+ uns num_elts;
+ int left, right;
+#define LR_UNDEF -100
+};
+
+static void
+qs_handle_work(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct qs_work *w = (struct qs_work *) ww;
+ struct asort_context *ctx = w->ctx;
+
+ DBG("Thread %d: got %u elts", thr->id, w->num_elts);
+ if (w->num_elts < ctx->thread_threshold)
+ {
+ ctx->quicksort(w->array, w->num_elts);
+ w->left = w->right = LR_UNDEF;
+ }
+ else
+ ctx->quicksplit(w->array, w->num_elts, &w->left, &w->right);
+ DBG("Thread %d: returning l=%u r=%u", thr->id, w->left, w->right);
+}
+
+static struct qs_work *
+qs_alloc_work(struct asort_context *ctx)
+{
+ struct qs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = qs_handle_work;
+ w->ctx = ctx;
+ return w;
+}
+
+static void
+threaded_quicksort(struct asort_context *ctx)
+{
+ struct work_queue q;
+ struct qs_work *v, *w;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+ ctx->eltpool = ep_new(sizeof(struct qs_work), 1000);
+
+ w = qs_alloc_work(ctx);
+ w->array = ctx->array;
+ w->num_elts = ctx->num_elts;
+ work_submit(&q, &w->w);
+
+ while (v = (struct qs_work *) work_wait(&q))
+ {
+ if (v->left != LR_UNDEF)
+ {
+ if (v->right > 0)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array;
+ w->num_elts = v->right + 1;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ if (v->left < (int)v->num_elts - 1)
+ {
+ w = qs_alloc_work(ctx);
+ w->array = v->array + v->left * ctx->elt_size;
+ w->num_elts = v->num_elts - v->left;
+ w->w.priority = v->w.priority + 1;
+ work_submit(&q, &w->w);
+ }
+ }
+ ep_free(ctx->eltpool, v);
+ }
+
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+struct rs_work {
+ struct work w;
+ struct asort_context *ctx;
+ void *array, *buffer; // Like asort_radix().
+ uns num_elts;
+ uns shift;
+ uns swap_output;
+ uns cnt[0];
+};
+
+static void
+rs_count(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Counting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_count(w->array, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Counting done", thr->id);
+}
+
+static void
+rs_split(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ DBG("Thread %d: Splitting %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ w->ctx->radix_split(w->array, w->buffer, w->num_elts, w->cnt, w->shift);
+ DBG("Thread %d: Splitting done", thr->id);
+}
+
+static void
+rs_finish(struct worker_thread *thr UNUSED, struct work *ww)
+{
+ struct rs_work *w = (struct rs_work *) ww;
+
+ if (thr)
+ DBG("Thread %d: Finishing %u items, shift=%d", thr->id, w->num_elts, w->shift);
+ if (w->shift < ASORT_MIN_SHIFT || w->num_elts < w->ctx->radix_threshold)
+ {
+ w->ctx->quicksort(w->array, w->num_elts);
+ if (w->swap_output)
+ memcpy(w->buffer, w->array, w->num_elts * w->ctx->elt_size);
+ }
+ else
+ asort_radix(w->ctx, w->array, w->buffer, w->num_elts, w->shift, w->swap_output);
+ if (thr)
+ DBG("Thread %d: Finishing done", thr->id);
+}
+
+static void
+rs_wait_small(struct asort_context *ctx)
+{
+ struct rs_work *w;
+
+ while (w = (struct rs_work *) work_wait(ctx->rs_work_queue))
+ {
+ DBG("Reaping small chunk of %u items", w->num_elts);
+ ep_free(ctx->eltpool, w);
+ }
+}
+
+static void
+rs_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output)
+{
+ uns buckets = (1 << ctx->radix_bits);
+ uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0;
+ uns cnt[buckets];
+ uns blksize = num_elts / sorter_threads;
+ DBG(">>> n=%u h=%d s=%d blk=%u sw=%d", num_elts, hash_bits, shift, blksize, swapped_output);
+
+ // If there are any small chunks in progress, wait for them to finish
+ rs_wait_small(ctx);
+
+ // Start parallel counting
+ void *iptr = array;
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.priority = 0;
+ w->w.go = rs_count;
+ w->ctx = ctx;
+ w->array = iptr;
+ w->buffer = buffer;
+ w->num_elts = blksize;
+ if (i == sorter_threads-1)
+ w->num_elts += num_elts % sorter_threads;
+ w->shift = shift;
+ iptr += w->num_elts * ctx->elt_size;
+ bzero(w->cnt, sizeof(uns) * buckets);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+
+ // Get bucket sizes from the counts
+ bzero(cnt, sizeof(cnt));
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = (struct rs_work *) work_wait(ctx->rs_work_queue);
+ ASSERT(w);
+ for (uns j=0; j<buckets; j++)
+ cnt[j] += w->cnt[j];
+ }
+
+ // Calculate bucket starts
+ uns pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns j = cnt[i];
+ cnt[i] = pos;
+ pos += j;
+ }
+ ASSERT(pos == num_elts);
+
+ // Start parallel splitting
+ for (uns i=0; i<sorter_threads; i++)
+ {
+ struct rs_work *w = ctx->rs_works[i];
+ w->w.go = rs_split;
+ for (uns j=0; j<buckets; j++)
+ {
+ uns k = w->cnt[j];
+ w->cnt[j] = cnt[j];
+ cnt[j] += k;
+ }
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ ASSERT(cnt[buckets-1] == num_elts);
+
+ // Wait for splits to finish
+ while (work_wait(ctx->rs_work_queue))
+ ;
+
+ // Recurse on buckets
+ pos = 0;
+ for (uns i=0; i<buckets; i++)
+ {
+ uns n = cnt[i] - pos;
+ if (!n)
+ continue;
+ if (n < ctx->thread_threshold || shift < ASORT_MIN_SHIFT)
+ {
+ struct rs_work *w = ep_alloc(ctx->eltpool);
+ w->w.priority = 0;
+ w->w.go = rs_finish;
+ w->ctx = ctx;
+ w->array = buffer;
+ w->buffer = array;
+ w->num_elts = n;
+ w->shift = shift;
+ w->swap_output = !swapped_output;
+ if (n < ctx->thread_chunk)
+ {
+ DBG("Sorting block %u+%u inline", pos, n);
+ rs_finish(NULL, &w->w);
+ ep_free(ctx->eltpool, w);
+ }
+ else
+ {
+ DBG("Scheduling block %u+%u", pos, n);
+ work_submit(ctx->rs_work_queue, &w->w);
+ }
+ }
+ else
+ rs_radix(ctx, buffer, array, n, shift, !swapped_output);
+ pos = cnt[i];
+ array += n * ctx->elt_size;
+ buffer += n * ctx->elt_size;
+ }
+}
+
+static void
+threaded_radixsort(struct asort_context *ctx, uns swap)
+{
+ struct work_queue q;
+
+ asort_start_threads(1);
+ work_queue_init(&asort_thread_pool, &q);
+
+ // Prepare work structures for counting and splitting.
+ // We use big_alloc(), because we want to avoid cacheline aliasing between threads.
+ ctx->rs_work_queue = &q;
+ ctx->rs_works = alloca(sizeof(struct rs_work *) * sorter_threads);
+ for (uns i=0; i<sorter_threads; i++)
+ ctx->rs_works[i] = big_alloc(sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Prepare a pool for all remaining small bits which will be sorted on background.
+ ctx->eltpool = ep_new(sizeof(struct rs_work), 1000);
+
+ // Do the big splitting
+ rs_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ for (uns i=0; i<sorter_threads; i++)
+ big_free(ctx->rs_works[i], sizeof(struct rs_work) + sizeof(uns) * (1 << ctx->radix_bits));
+
+ // Finish the small blocks
+ rs_wait_small(ctx);
+
+ ASSERT(!ctx->eltpool->num_allocated);
+ ep_delete(ctx->eltpool);
+ work_queue_cleanup(&q);
+ asort_stop_threads();
+}
+
+#else
+
+void asort_start_threads(uns run UNUSED) { }
+void asort_stop_threads(void) { }
+
+#endif
+
+static uns
+predict_swap(struct asort_context *ctx)
+{
+ uns bits = ctx->radix_bits;
+ uns elts = ctx->num_elts;
+ uns swap = 0;
+
+ while (elts >= ctx->radix_threshold && bits >= ASORT_MIN_SHIFT)
+ {
+ DBG("Predicting pass: %u elts, %d bits", elts, bits);
+ swap = !swap;
+ elts >>= ctx->radix_bits;
+ bits = MAX(bits, ctx->radix_bits) - ctx->radix_bits;
+ }
+ return swap;
+}
+
+void
+asort_run(struct asort_context *ctx)
+{
+ ctx->thread_threshold = MIN(sorter_thread_threshold / ctx->elt_size, ~0U);
+ ctx->thread_chunk = MIN(sorter_thread_chunk / ctx->elt_size, ~0U);
+ ctx->radix_threshold = MIN(sorter_radix_threshold / ctx->elt_size, ~0U);
+
+ ASORT_TRACE("Array-sorting %u items per %u bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits);
+ ASORT_XTRACE(2, "Limits: thread_threshold=%u, thread_chunk=%u, radix_threshold=%u",
+ ctx->thread_threshold, ctx->thread_chunk, ctx->radix_threshold);
+ uns allow_threads UNUSED = (sorter_threads > 1 &&
+ ctx->num_elts >= ctx->thread_threshold &&
+ !(sorter_debug & SORT_DEBUG_ASORT_NO_THREADS));
+
+ if (ctx->num_elts < ctx->radix_threshold ||
+ ctx->hash_bits <= ASORT_MIN_SHIFT ||
+ !ctx->radix_split ||
+ (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX))
+ {
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel quicksort");
+ threaded_quicksort(ctx);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential quicksort");
+ ctx->quicksort(ctx->array, ctx->num_elts);
+ }
+ }
+ else
+ {
+ uns swap = predict_swap(ctx);
+#ifdef CONFIG_UCW_THREADS
+ if (allow_threads)
+ {
+ ASORT_XTRACE(2, "Decided to use parallel radix-sort (swap=%d)", swap);
+ threaded_radixsort(ctx, swap);
+ }
+ else
+#endif
+ {
+ ASORT_XTRACE(2, "Decided to use sequential radix-sort (swap=%d)", swap);
+ asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, swap);
+ }
+ if (swap)
+ ctx->array = ctx->buffer;
+ }
+
+ ASORT_XTRACE(2, "Array-sort finished");
+}
--- /dev/null
+/*
+ * UCW Library -- Optimized Array Sorter
+ *
+ * (c) 2003--2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a generator of routines for sorting huge arrays, similar to the one
+ * in lib/arraysort.h. It cannot handle discontiguous arrays, but it is able
+ * to employ radix-sorting if a monotone hash function is available and also
+ * use several threads in parallel on SMP systems (this assumes that all
+ * callbacks you provide are thread-safe).
+ *
+ * It is usually called internally by the generic shorter machinery, but
+ * you are free to use it explicitly if you need.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y")
+ * ASORT_HASH(x) a monotone hash function (safisfying hash(x) < hash(y) => x<y)
+ * ASORT_LONG_HASH hashes are 64-bit numbers (default is 32 bits)
+ *
+ * Fine-tuning parameters: (if you really insist)
+ *
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_RADIX_BITS how many bits of the hash functions are to be used at once for
+ * radix-sorting.
+ *
+ * After including this file, a function
+ * ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts [, ASORT_KEY_TYPE *buf, uns hash_bits])
+ * is declared and all parameter macros are automatically undef'd. Here `buf' is an
+ * auxiliary buffer of the same size as the input array, required whenever radix
+ * sorting should be used, and `hash_bits' is the number of significant bits returned
+ * by the hash function. If the buffer is specified, the sorting function returns either
+ * a pointer to the input array or to the buffer, depending on where the result is stored.
+ * If you do not use hashing, these parameters should be omitted.
+ */
+
+#include "lib/sorter/common.h"
+
+#define Q(x) ASORT_PREFIX(x)
+
+typedef ASORT_KEY_TYPE Q(key);
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { Q(key) tmp = array[i]; array[i]=array[j]; array[j]=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_RADIX_BITS
+#define ASORT_RADIX_BITS CONFIG_UCW_RADIX_SORTER_BITS
+#endif
+#define ASORT_RADIX_MASK ((1 << (ASORT_RADIX_BITS)) - 1)
+
+/* QuickSort with optimizations a'la Sedgewick, inspired by qsort() from GNU libc. */
+
+static void Q(quicksort)(void *array_ptr, uns num_elts)
+{
+ Q(key) *array = array_ptr;
+ struct stk { int l, r; } stack[8*sizeof(uns)];
+ int l, r, left, right, m;
+ uns sp = 0;
+ Q(key) pivot;
+
+ if (num_elts <= 1)
+ return;
+
+ left = 0;
+ right = num_elts - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(num_elts, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(array[l], array[m]))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)num_elts; m++)
+ {
+ l=m;
+ while (ASORT_LT(array[m], array[l-1]))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+/* Just the splitting part of QuickSort */
+
+static void Q(quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp)
+{
+ Q(key) *array = array_ptr;
+ int l, r, m;
+ Q(key) pivot;
+
+ l = 0;
+ r = num_elts - 1;
+ m = (l+r)/2;
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(array[r], array[m]))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(array[m], array[l]))
+ ASORT_SWAP(l,m);
+ }
+ pivot = array[m];
+ do
+ {
+ while (ASORT_LT(array[l], pivot))
+ l++;
+ while (ASORT_LT(pivot, array[r]))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ *leftp = l;
+ *rightp = r;
+}
+
+#ifdef ASORT_HASH
+
+static void Q(radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift)
+{
+ Q(key) *src = src_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RC(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ cnt[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ] ++; \
+ break; \
+
+#ifdef ASORT_LONG_HASH
+ RC(63); RC(62); RC(61); RC(60); RC(59); RC(58); RC(57); RC(56);
+ RC(55); RC(54); RC(53); RC(52); RC(51); RC(50); RC(49); RC(48);
+ RC(47); RC(46); RC(45); RC(44); RC(43); RC(42); RC(41); RC(40);
+ RC(39); RC(38); RC(37); RC(36); RC(35); RC(34); RC(33); RC(32);
+#endif
+ RC(31); RC(30); RC(29); RC(28); RC(27); RC(26); RC(25); RC(24);
+ RC(23); RC(22); RC(21); RC(20); RC(19); RC(18); RC(17); RC(16);
+ RC(15); RC(14); RC(13); RC(12); RC(11); RC(10); RC(9); RC(8);
+ RC(7); RC(6); RC(5); RC(4); RC(3); RC(2); RC(1); RC(0);
+ default:
+ ASSERT(0);
+ }
+#undef RC
+}
+
+static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift)
+{
+ Q(key) *src = src_ptr, *dest = dest_ptr;
+ uns i;
+
+ switch (shift)
+ {
+#define RS(s) \
+ case s: \
+ for (i=0; i<num_elts; i++) \
+ dest[ ptrs[ (ASORT_HASH(src[i]) >> s) & ASORT_RADIX_MASK ]++ ] = src[i]; \
+ break;
+
+#ifdef ASORT_LONG_HASH
+ RS(63); RS(62); RS(61); RS(60); RS(59); RS(58); RS(57); RS(56);
+ RS(55); RS(54); RS(53); RS(52); RS(51); RS(50); RS(49); RS(48);
+ RS(47); RS(46); RS(45); RS(44); RS(43); RS(42); RS(41); RS(40);
+ RS(39); RS(38); RS(37); RS(36); RS(35); RS(34); RS(33); RS(32);
+#endif
+ RS(31); RS(30); RS(29); RS(28); RS(27); RS(26); RS(25); RS(24);
+ RS(23); RS(22); RS(21); RS(20); RS(19); RS(18); RS(17); RS(16);
+ RS(15); RS(14); RS(13); RS(12); RS(11); RS(10); RS(9); RS(8);
+ RS(7); RS(6); RS(5); RS(4); RS(3); RS(2); RS(1); RS(0);
+ default:
+ ASSERT(0);
+ }
+#undef RS
+}
+
+#endif
+
+static Q(key) *Q(sort)(Q(key) *array, uns num_elts
+#ifdef ASORT_HASH
+ , Q(key) *buffer, uns hash_bits
+#endif
+ )
+{
+ struct asort_context ctx = {
+ .array = array,
+ .num_elts = num_elts,
+ .elt_size = sizeof(Q(key)),
+ .quicksort = Q(quicksort),
+ .quicksplit = Q(quicksplit),
+#ifdef ASORT_HASH
+ .buffer = buffer,
+ .hash_bits = hash_bits,
+ .radix_count = Q(radix_count),
+ .radix_split = Q(radix_split),
+ .radix_bits = ASORT_RADIX_BITS,
+#endif
+ };
+ asort_run(&ctx);
+ return ctx.array;
+}
+
+#undef ASORT_HASH
+#undef ASORT_KEY_TYPE
+#undef ASORT_LONG_HASH
+#undef ASORT_LT
+#undef ASORT_PAGE_ALIGNED
+#undef ASORT_PREFIX
+#undef ASORT_RADIX_BITS
+#undef ASORT_RADIX_MASK
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef Q
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Common Declarations
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_SORTER_COMMON_H
+#define _UCW_SORTER_COMMON_H
+
+#include "lib/clists.h"
+
+/* Configuration variables */
+extern uns sorter_trace, sorter_trace_array, sorter_stream_bufsize;
+extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits, sorter_add_radix_bits;
+extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
+extern uns sorter_threads;
+extern u64 sorter_bufsize, sorter_small_input;
+extern u64 sorter_thread_threshold, sorter_thread_chunk, sorter_radix_threshold;
+extern struct fb_params sorter_fb_params, sorter_small_fb_params;
+
+#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
+#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
+
+enum sort_debug {
+ SORT_DEBUG_NO_PRESORT = 1,
+ SORT_DEBUG_NO_JOIN = 2,
+ SORT_DEBUG_KEEP_BUCKETS = 4,
+ SORT_DEBUG_NO_RADIX = 8,
+ SORT_DEBUG_NO_MULTIWAY = 16,
+ SORT_DEBUG_ASORT_NO_RADIX = 32,
+ SORT_DEBUG_ASORT_NO_THREADS = 64
+};
+
+struct sort_bucket;
+
+struct sort_context {
+ struct fastbuf *in_fb;
+ struct fastbuf *out_fb;
+ uns hash_bits;
+ u64 in_size;
+ struct fb_params *fb_params;
+
+ struct mempool *pool;
+ clist bucket_list;
+ void *big_buf;
+ size_t big_buf_size;
+
+ int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
+
+ // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
+ // Return 1 if there is more data available in the source bucket.
+ int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
+
+ // Estimate how much input data from `b' will fit in the internal sorting buffer.
+ u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
+
+ // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
+ // Bucket arrays are NULL-terminated.
+ void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
+
+ // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
+ void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
+
+ // Radix split according to hash function
+ void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
+
+ // State variables of internal_sort
+ void *key_buf;
+ int more_keys;
+
+ // Timing
+ timestamp_t start_time;
+ uns last_pass_time;
+ uns total_int_time, total_pre_time, total_ext_time;
+};
+
+void sorter_run(struct sort_context *ctx);
+
+/* Buffers */
+
+void *sorter_alloc(struct sort_context *ctx, uns size);
+void sorter_prepare_buf(struct sort_context *ctx);
+void sorter_alloc_buf(struct sort_context *ctx);
+void sorter_free_buf(struct sort_context *ctx);
+
+/* Buckets */
+
+struct sort_bucket {
+ cnode n;
+ struct sort_context *ctx;
+ uns flags;
+ struct fastbuf *fb;
+ byte *filename;
+ u64 size; // Size in bytes (not valid when writing)
+ uns runs; // Number of runs, 0 if not sorted
+ uns hash_bits; // Remaining bits of the hash function
+ byte *ident; // Identifier used in debug messages
+};
+
+enum sort_bucket_flags {
+ SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run)
+ SBF_SOURCE = 2, // Contains the source file (always 0 runs)
+ SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter
+ SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf
+ SBF_OPEN_READ = 512, // We are reading from the fastbuf
+ SBF_DESTROYED = 1024, // Already done with, no further references allowed
+ SBF_SWAPPED_OUT = 2048, // Swapped out to a named file
+};
+
+struct sort_bucket *sbuck_new(struct sort_context *ctx);
+void sbuck_drop(struct sort_bucket *b);
+int sbuck_have(struct sort_bucket *b);
+int sbuck_has_file(struct sort_bucket *b);
+sh_off_t sbuck_size(struct sort_bucket *b);
+struct fastbuf *sbuck_read(struct sort_bucket *b);
+struct fastbuf *sbuck_write(struct sort_bucket *b);
+void sbuck_swap_out(struct sort_bucket *b);
+
+/* Contexts and helper functions for the array sorter */
+
+struct asort_context {
+ // Interface between generic code in array.c and functions generated by array.h
+ void *array; // Array to sort
+ void *buffer; // Auxiliary buffer (required when radix-sorting)
+ uns num_elts; // Number of elements in the array
+ uns elt_size; // Bytes per element
+ uns hash_bits; // Remaining bits of the hash function
+ uns radix_bits; // How many bits to process in a single radix-sort pass
+ void (*quicksort)(void *array_ptr, uns num_elts);
+ void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
+ void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
+ void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
+
+ // Used internally by array.c
+ struct rs_work **rs_works;
+ struct work_queue *rs_work_queue;
+ struct eltpool *eltpool;
+
+ // Configured limits translated from bytes to elements
+ uns thread_threshold;
+ uns thread_chunk;
+ uns radix_threshold;
+};
+
+void asort_run(struct asort_context *ctx);
+void asort_start_threads(uns run);
+void asort_stop_threads(void);
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Configuration
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/sorter/common.h"
+
+uns sorter_trace;
+uns sorter_trace_array;
+u64 sorter_bufsize = 65536;
+uns sorter_debug;
+uns sorter_min_radix_bits;
+uns sorter_max_radix_bits;
+uns sorter_add_radix_bits;
+uns sorter_min_multiway_bits;
+uns sorter_max_multiway_bits;
+uns sorter_threads;
+u64 sorter_thread_threshold = 1048576;
+u64 sorter_thread_chunk = 4096;
+u64 sorter_radix_threshold = 4096;
+struct fb_params sorter_fb_params;
+struct fb_params sorter_small_fb_params;
+u64 sorter_small_input;
+
+static struct cf_section sorter_config = {
+ CF_ITEMS {
+ CF_UNS("Trace", &sorter_trace),
+ CF_UNS("TraceArray", &sorter_trace_array),
+ CF_SECTION("FileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_SECTION("SmallFileAccess", &sorter_fb_params, &fbpar_cf),
+ CF_U64("SmallInput", &sorter_small_input),
+ CF_U64("SortBuffer", &sorter_bufsize),
+ CF_UNS("Debug", &sorter_debug),
+ CF_UNS("MinRadixBits", &sorter_min_radix_bits),
+ CF_UNS("MaxRadixBits", &sorter_max_radix_bits),
+ CF_UNS("AddRadixBits", &sorter_add_radix_bits),
+ CF_UNS("MinMultiwayBits", &sorter_min_multiway_bits),
+ CF_UNS("MaxMultiwayBits", &sorter_max_multiway_bits),
+ CF_UNS("Threads", &sorter_threads),
+ CF_U64("ThreadThreshold", &sorter_thread_threshold),
+ CF_U64("ThreadChunk", &sorter_thread_chunk),
+ CF_U64("RadixThreshold", &sorter_radix_threshold),
+ CF_END
+ }
+};
+
+static void CONSTRUCTOR sorter_init_config(void)
+{
+ cf_declare_section("Sorter", &sorter_config, 0);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Governing Routines
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define F_BSIZE(b) stk_fsize(sbuck_size(b))
+
+static void
+sorter_start_timer(struct sort_context *ctx)
+{
+ init_timer(&ctx->start_time);
+}
+
+static void
+sorter_stop_timer(struct sort_context *ctx, uns *account_to)
+{
+ ctx->last_pass_time = get_timer(&ctx->start_time);
+ *account_to += ctx->last_pass_time;
+}
+
+static uns
+sorter_speed(struct sort_context *ctx, u64 size)
+{
+ if (!size)
+ return 0;
+ if (!ctx->last_pass_time)
+ return 0;
+ return (uns)((double)size / (1<<20) * 1000 / ctx->last_pass_time);
+}
+
+static int
+sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
+{
+ sorter_alloc_buf(ctx);
+ if (in->flags & SBF_CUSTOM_PRESORT)
+ {
+ /*
+ * The trick with automatic joining, which we use for the normal presorter,
+ * is not necessary with the custom presorter, because the custom presorter
+ * is never called in the middle of the sorted data.
+ */
+ struct fastbuf *f = sbuck_write(out);
+ out->runs++;
+ return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size);
+ }
+ return ctx->internal_sort(ctx, in, out, out_only);
+}
+
+static struct sort_bucket *
+sbuck_join_to(struct sort_bucket *b, sh_off_t *sizep)
+{
+ if (sorter_debug & SORT_DEBUG_NO_JOIN)
+ return NULL;
+
+ struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
+ if (!(out->flags & SBF_FINAL))
+ return NULL;
+ ASSERT(out->runs == 1);
+ *sizep = sbuck_size(out);
+ return out;
+}
+
+static sh_off_t
+sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, sh_off_t join_size)
+{
+ if (join && join->runs >= 2)
+ {
+ if (b)
+ sbuck_drop(b);
+ ASSERT(join->runs == 2);
+ join->runs--;
+ return sbuck_size(join) - join_size;
+ }
+ else if (b)
+ {
+ clist_insert_after(&b->n, list_pos);
+ return sbuck_size(b);
+ }
+ else
+ return 0;
+}
+
+static void
+sorter_join(struct sort_bucket *b)
+{
+ struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
+ ASSERT(join->flags & SBF_FINAL);
+ ASSERT(b->runs == 1);
+
+ if (!sbuck_has_file(join))
+ {
+ // The final bucket doesn't have any file associated yet, so replace
+ // it with the new bucket.
+ SORT_XTRACE(3, "Replaced final bucket");
+ b->flags |= SBF_FINAL;
+ sbuck_drop(join);
+ }
+ else
+ {
+ SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
+ struct fastbuf *src = sbuck_read(b);
+ struct fastbuf *dest = sbuck_write(join);
+ bbcopy(src, dest, ~0U);
+ sbuck_drop(b);
+ }
+}
+
+static void
+sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
+ cnode *list_pos = b->n.prev;
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+
+ if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
+ {
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ sorter_start_timer(ctx);
+ ins[0] = sbuck_new(ctx);
+ if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
+ {
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sh_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
+ SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 3), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(b);
+ return;
+ }
+
+ ins[1] = sbuck_new(ctx);
+ int i = 1;
+ while (sorter_presort(ctx, b, ins[i], ins[i]))
+ i = 1-i;
+ sbuck_drop(b);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ SORT_TRACE("Presorting pass (%d+%d runs, %s+%s, %dMB/s)",
+ ins[0]->runs, ins[1]->runs,
+ F_BSIZE(ins[0]), F_BSIZE(ins[1]),
+ sorter_speed(ctx, sbuck_size(ins[0]) + sbuck_size(ins[1])));
+ }
+ else
+ {
+ SORT_XTRACE(2, "Presorting disabled");
+ ins[0] = b;
+ }
+
+ SORT_XTRACE(3, "Main sorting");
+ uns pass = 0;
+ do {
+ ++pass;
+ sorter_start_timer(ctx);
+ if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
+ {
+ // This is guaranteed to produce a single run, so join if possible
+ outs[0] = join;
+ outs[1] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ sh_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ return;
+ }
+ outs[0] = sbuck_new(ctx);
+ outs[1] = sbuck_new(ctx);
+ outs[2] = NULL;
+ ctx->twoway_merge(ctx, ins, outs);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s, %dMB/s)", pass,
+ outs[0]->runs, outs[1]->runs,
+ F_BSIZE(outs[0]), F_BSIZE(outs[1]),
+ sorter_speed(ctx, sbuck_size(outs[0]) + sbuck_size(outs[1])));
+ sbuck_drop(ins[0]);
+ sbuck_drop(ins[1]);
+ memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
+ } while (sbuck_have(ins[1]));
+
+ sbuck_drop(ins[1]);
+ clist_insert_after(&ins[0]->n, list_pos);
+}
+
+static void
+sorter_multiway(struct sort_context *ctx, struct sort_bucket *b)
+{
+ clist parts;
+ cnode *list_pos = b->n.prev;
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+ uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 3;
+
+ clist_init(&parts);
+ ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
+ SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
+ uns cont;
+ uns part_cnt = 0;
+ u64 total_size = 0;
+ sorter_start_timer(ctx);
+ do
+ {
+ struct sort_bucket *p = sbuck_new(ctx);
+ cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
+ if (sbuck_have(p))
+ {
+ part_cnt++;
+ clist_add_tail(&parts, &p->n);
+ total_size += sbuck_size(p);
+ sbuck_swap_out(p);
+ }
+ else
+ sbuck_drop(p);
+ }
+ while (cont);
+ sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sorter_free_buf(ctx);
+ sbuck_drop(b);
+
+ if (part_cnt <= 1)
+ {
+ sh_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
+ SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+
+ SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
+
+ uns max_ways = 1 << sorter_max_multiway_bits;
+ struct sort_bucket *ways[max_ways+1];
+ SORT_XTRACE(3, "Starting up to %d-way merge", max_ways);
+ for (;;)
+ {
+ uns n = 0;
+ struct sort_bucket *p;
+ while (n < max_ways && (p = clist_head(&parts)))
+ {
+ clist_remove(&p->n);
+ ways[n++] = p;
+ }
+ ways[n] = NULL;
+ ASSERT(n > 1);
+
+ struct sort_bucket *out;
+ if (clist_empty(&parts) && join)
+ out = join;
+ else
+ out = sbuck_new(ctx);
+ sorter_start_timer(ctx);
+ ctx->multiway_merge(ctx, ways, out);
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+
+ for (uns i=0; i<n; i++)
+ sbuck_drop(ways[i]);
+
+ if (clist_empty(&parts))
+ {
+ sh_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
+ SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
+ return;
+ }
+ else
+ {
+ sbuck_swap_out(out);
+ clist_add_tail(&parts, &out->n);
+ SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
+ }
+ }
+}
+
+static void
+sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
+{
+ // Add more bits if requested and allowed.
+ bits = MIN(bits + sorter_add_radix_bits, sorter_max_radix_bits);
+
+ uns nbuck = 1 << bits;
+ SORT_XTRACE(3, "Running radix split on %s with hash %d bits of %d (expecting %s buckets)",
+ F_BSIZE(b), bits, b->hash_bits, stk_fsize(sbuck_size(b) / nbuck));
+ sorter_free_buf(ctx);
+ sorter_start_timer(ctx);
+
+ struct sort_bucket **outs = alloca(nbuck * sizeof(struct sort_bucket *));
+ for (uns i=nbuck; i--; )
+ {
+ outs[i] = sbuck_new(ctx);
+ outs[i]->hash_bits = b->hash_bits - bits;
+ clist_insert_after(&outs[i]->n, &b->n);
+ }
+
+ ctx->radix_split(ctx, b, outs, b->hash_bits - bits, bits);
+
+ u64 min = ~(u64)0, max = 0, sum = 0;
+ for (uns i=0; i<nbuck; i++)
+ {
+ u64 s = sbuck_size(outs[i]);
+ min = MIN(min, s);
+ max = MAX(max, s);
+ sum += s;
+ if (nbuck > 4)
+ sbuck_swap_out(outs[i]);
+ }
+
+ sorter_stop_timer(ctx, &ctx->total_ext_time);
+ SORT_TRACE("Radix split (%d buckets, %s min, %s max, %s avg, %dMB/s)", nbuck,
+ stk_fsize(min), stk_fsize(max), stk_fsize(sum / nbuck), sorter_speed(ctx, sum));
+ sbuck_drop(b);
+}
+
+static void
+sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
+{
+ // Drop empty buckets
+ if (!sbuck_have(b))
+ {
+ SORT_XTRACE(4, "Dropping empty bucket");
+ sbuck_drop(b);
+ return;
+ }
+
+ // How many bits of bucket size we have to reduce before it fits in the RAM?
+ // (this is insanely large if the input size is unknown, but it serves our purpose)
+ u64 insize = sbuck_size(b);
+ u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // Magical factor accounting for various non-uniformities
+ uns bits = 0;
+ while ((insize >> bits) > mem)
+ bits++;
+
+ // Calculate the possibilities of radix splits
+ uns radix_bits;
+ if (!ctx->radix_split ||
+ (b->flags & SBF_CUSTOM_PRESORT) ||
+ (sorter_debug & SORT_DEBUG_NO_RADIX))
+ radix_bits = 0;
+ else
+ {
+ radix_bits = MIN(bits, b->hash_bits);
+ radix_bits = MIN(radix_bits, sorter_max_radix_bits);
+ if (radix_bits < sorter_min_radix_bits)
+ radix_bits = 0;
+ }
+
+ // The same for multi-way merges
+ uns multiway_bits;
+ if (!ctx->multiway_merge ||
+ (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
+ (sorter_debug & SORT_DEBUG_NO_PRESORT))
+ multiway_bits = 0;
+ else
+ {
+ multiway_bits = MIN(bits, sorter_max_multiway_bits);
+ if (multiway_bits < sorter_min_multiway_bits)
+ multiway_bits = 0;
+ }
+
+ SORT_XTRACE(3, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
+ stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
+ radix_bits, multiway_bits);
+
+ // If the input already consists of a single run, just join it
+ if (b->runs)
+ return sorter_join(b);
+
+ // If everything fits in memory, the 2-way strategy will sort it in memory
+ if (!bits)
+ return sorter_twoway(ctx, b);
+
+ // If we can reduce everything in one pass, do so and prefer radix splits
+ if (radix_bits == bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits == bits)
+ return sorter_multiway(ctx, b);
+
+ // Otherwise, reduce as much as possible and again prefer radix splits
+ if (radix_bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits)
+ return sorter_multiway(ctx, b);
+
+ // Fall back to 2-way strategy if nothing else applies
+ return sorter_twoway(ctx, b);
+}
+
+void
+sorter_run(struct sort_context *ctx)
+{
+ ctx->pool = mp_new(4096);
+ clist_init(&ctx->bucket_list);
+ sorter_prepare_buf(ctx);
+ asort_start_threads(0);
+
+ // Create bucket containing the source
+ struct sort_bucket *bin = sbuck_new(ctx);
+ bin->flags = SBF_SOURCE | SBF_OPEN_READ;
+ if (ctx->custom_presort)
+ bin->flags |= SBF_CUSTOM_PRESORT;
+ else
+ bin->fb = ctx->in_fb;
+ bin->ident = "in";
+ bin->size = ctx->in_size;
+ bin->hash_bits = ctx->hash_bits;
+ clist_add_tail(&ctx->bucket_list, &bin->n);
+ SORT_XTRACE(2, "Input size: %s, %d hash bits", F_BSIZE(bin), bin->hash_bits);
+ ctx->fb_params = (bin->size < sorter_small_input) ? &sorter_small_fb_params : &sorter_fb_params;
+
+ // Create bucket for the output
+ struct sort_bucket *bout = sbuck_new(ctx);
+ bout->flags = SBF_FINAL;
+ if (bout->fb = ctx->out_fb)
+ bout->flags |= SBF_OPEN_WRITE;
+ bout->ident = "out";
+ bout->runs = 1;
+ clist_add_head(&ctx->bucket_list, &bout->n);
+
+ // Repeatedly sort buckets
+ struct sort_bucket *b;
+ while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
+ sorter_decide(ctx, b);
+
+ asort_stop_threads();
+ sorter_free_buf(ctx);
+ sbuck_write(bout); // Force empty bucket to a file
+ SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
+ SORT_XTRACE(2, "Final timings: %.3fs external sorting, %.3fs presorting, %.3fs internal sorting",
+ ctx->total_ext_time/1000., ctx->total_pre_time/1000., ctx->total_int_time/1000.);
+ ctx->out_fb = sbuck_read(bout);
+ mp_delete(ctx->pool);
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Fixed-Size Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(key)
+#define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0)
+#ifdef SORT_INTERNAL_RADIX
+# define ASORT_HASH(x) P(hash)(&(x))
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ * This is a more efficient implementation of the internal sorter,
+ * which runs under the following assumptions:
+ *
+ * - the keys have fixed (and small) size
+ * - no data are present after the key
+ * - unification does not require any workspace
+ */
+
+static size_t P(internal_workspace)(void)
+{
+ size_t workspace = 0;
+#ifdef SORT_UNIFY
+ workspace = sizeof(P(key) *);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ workspace = MAX(workspace, sizeof(P(key)));
+#endif
+ return workspace;
+}
+
+static uns P(internal_num_keys)(struct sort_context *ctx)
+{
+ size_t bufsize = ctx->big_buf_size;
+ size_t workspace = P(internal_workspace)();
+ if (workspace)
+ bufsize -= CPU_PAGE_SIZE;
+ u64 maxkeys = bufsize / (sizeof(P(key)) + workspace);
+ return MIN(maxkeys, ~0U); // The number of records must fit in uns
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) *buf = ctx->big_buf;
+ uns maxkeys = P(internal_num_keys)(ctx);
+
+ SORT_XTRACE(5, "s-fixint: Reading (maxkeys=%u, hash_bits=%d)", maxkeys, bin->hash_bits);
+ uns n = 0;
+ while (n < maxkeys && P(read_key)(in, &buf[n]))
+ n++;
+ if (!n)
+ return 0;
+ void *workspace UNUSED = ALIGN_PTR(&buf[n], CPU_PAGE_SIZE);
+
+ SORT_XTRACE(4, "s-fixint: Sorting %u items (%s items, %s workspace)",
+ n,
+ stk_fsize(n * sizeof(P(key))),
+ stk_fsize(n * P(internal_workspace)()));
+ timestamp_t timer;
+ init_timer(&timer);
+ buf = P(array_sort)(buf, n
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)buf != ctx->big_buf)
+ workspace = ctx->big_buf;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-fixint: Writing");
+ if (n < maxkeys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (uns i=0; i<n; i++)
+ {
+#ifdef SORT_UNIFY
+ if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
+ {
+ P(key) **keys = workspace;
+ uns n = 2;
+ keys[0] = &buf[i];
+ keys[1] = &buf[i+1];
+ while (!P(compare)(&buf[i], &buf[i+n]))
+ {
+ keys[n] = &buf[i+n];
+ n++;
+ }
+ P(write_merged)(out, keys, NULL, n, NULL);
+ merged += n - 1;
+ i += n - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(i == n-1 || P(compare)(&buf[i], &buf[i+1]) < 0);
+#endif
+ P(write_key)(out, &buf[i]);
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return (n == maxkeys);
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1; // -1 since if the buffer is full, we don't recognize EOF
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Internal Sorting Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/stkstring.h"
+
+#ifdef SORT_INTERNAL_RADIX
+/* Keep copies of the items' hashes to save cache misses */
+#define SORT_COPY_HASH
+#endif
+
+typedef struct {
+ P(key) *key;
+#ifdef SORT_COPY_HASH
+ P(hash_t) hash;
+#endif
+} P(internal_item_t);
+
+#define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
+#define ASORT_KEY_TYPE P(internal_item_t)
+#ifdef SORT_COPY_HASH
+# ifdef SORT_INT
+# define ASORT_LT(x,y) ((x).hash < (y).hash) // In this mode, the hash is the value
+# else
+# define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
+# endif
+#else
+# define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
+#endif
+#ifdef SORT_INTERNAL_RADIX
+# ifdef SORT_COPY_HASH
+# define ASORT_HASH(x) (x).hash
+# else
+# define ASORT_HASH(x) P(hash)((x).key)
+# endif
+# ifdef SORT_LONG_HASH
+# define ASORT_LONG_HASH
+# endif
+#endif
+#include "lib/sorter/array.h"
+
+/*
+ * The big_buf has the following layout:
+ *
+ * +-------------------------------------------------------------------------------+
+ * | array of internal_item's |
+ * +-------------------------------------------------------------------------------+
+ * | padding to make the following part page-aligned |
+ * +--------------------------------+----------------------------------------------+
+ * | shadow copy of item array | array of pointers to data for write_merged() |
+ * | used if radix-sorting +----------------------------------------------+
+ * | | workspace for write_merged() |
+ * +--------------------------------+----------------------------------------------+
+ * | +---------+ |
+ * | | key | |
+ * | +---------+ |
+ * | sequence of | padding | |
+ * | items +---------+ |
+ * | | data | |
+ * | +---------+ |
+ * | | padding | |
+ * | +---------+ |
+ * +-------------------------------------------------------------------------------+
+ *
+ * (the data which are in different columns are never accessed simultaneously,
+ * so we use a single buffer for both)
+ */
+
+static inline void *P(internal_get_data)(P(key) *key)
+{
+ uns ksize = SORT_KEY_SIZE(*key);
+#ifdef SORT_UNIFY
+ ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#endif
+ return (byte *) key + ksize;
+}
+
+static inline size_t P(internal_workspace)(P(key) *key UNUSED)
+{
+ size_t ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += SORT_UNIFY_WORKSPACE(*key);
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ return ws;
+}
+
+static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
+{
+ sorter_alloc_buf(ctx);
+ struct fastbuf *in = sbuck_read(bin);
+
+ P(key) key, *keybuf = ctx->key_buf;
+ if (!keybuf)
+ keybuf = ctx->key_buf = sorter_alloc(ctx, sizeof(key));
+ if (ctx->more_keys)
+ {
+ key = *keybuf;
+ ctx->more_keys = 0;
+ }
+ else if (!P(read_key)(in, &key))
+ return 0;
+
+ size_t bufsize = ctx->big_buf_size;
+#ifdef SORT_VAR_DATA
+ if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
+ {
+ SORT_XTRACE(4, "s-internal: Generating a giant run");
+ struct fastbuf *out = sbuck_write(bout);
+ P(copy_data)(&key, in, out);
+ bout->runs++;
+ return 1; // We don't know, but 1 is always safe
+ }
+#endif
+
+ SORT_XTRACE(5, "s-internal: Reading");
+ P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
+ byte *end = (byte *) ctx->big_buf + bufsize;
+ size_t remains = bufsize - CPU_PAGE_SIZE;
+ do
+ {
+ uns ksize = SORT_KEY_SIZE(key);
+#ifdef SORT_UNIFY
+ uns ksize_aligned = ALIGN_TO(ksize, CPU_STRUCT_ALIGN);
+#else
+ uns ksize_aligned = ksize;
+#endif
+ uns dsize = SORT_DATA_SIZE(key);
+ uns recsize = ALIGN_TO(ksize_aligned + dsize, CPU_STRUCT_ALIGN);
+ size_t totalsize = recsize + sizeof(P(internal_item_t)) + P(internal_workspace)(&key);
+ if (unlikely(totalsize > remains
+#ifdef CPU_64BIT_POINTERS
+ || item >= item_array + ~0U // The number of items must fit in an uns
+#endif
+ ))
+ {
+ ctx->more_keys = 1;
+ *keybuf = key;
+ break;
+ }
+ remains -= totalsize;
+ end -= recsize;
+ memcpy(end, &key, ksize);
+#ifdef SORT_VAR_DATA
+ breadb(in, end + ksize_aligned, dsize);
+#endif
+ item->key = (P(key)*) end;
+#ifdef SORT_COPY_HASH
+ item->hash = P(hash)(item->key);
+#endif
+ item++;
+ }
+ while (P(read_key)(in, &key));
+ last_item = item;
+
+ uns count = last_item - item_array;
+ void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
+ SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
+ count,
+ stk_fsize((byte*)last_item - (byte*)item_array),
+ stk_fsize(end - (byte*)last_item - remains),
+ stk_fsize((byte*)ctx->big_buf + bufsize - end));
+ timestamp_t timer;
+ init_timer(&timer);
+ item_array = P(array_sort)(item_array, count
+#ifdef SORT_INTERNAL_RADIX
+ , workspace, bin->hash_bits
+#endif
+ );
+ if ((void *)item_array != ctx->big_buf)
+ workspace = ctx->big_buf;
+ last_item = item_array + count;
+ ctx->total_int_time += get_timer(&timer);
+
+ SORT_XTRACE(5, "s-internal: Writing");
+ if (!ctx->more_keys)
+ bout = bout_only;
+ struct fastbuf *out = sbuck_write(bout);
+ bout->runs++;
+ uns merged UNUSED = 0;
+ for (item = item_array; item < last_item; item++)
+ {
+#ifdef SORT_UNIFY
+ if (item < last_item - 1 && !P(compare)(item->key, item[1].key))
+ {
+ // Rewrite the item structures with just pointers to keys and place
+ // pointers to data in the workspace.
+ P(key) **key_array = (void *) item;
+ void **data_array = workspace;
+ key_array[0] = item[0].key;
+ data_array[0] = P(internal_get_data)(key_array[0]);
+ uns cnt;
+ for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++)
+ {
+ key_array[cnt] = item[cnt].key;
+ data_array[cnt] = P(internal_get_data)(key_array[cnt]);
+ }
+ P(write_merged)(out, key_array, data_array, cnt, data_array+cnt);
+ item += cnt - 1;
+ merged += cnt - 1;
+ continue;
+ }
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0);
+#endif
+ P(write_key)(out, item->key);
+#ifdef SORT_VAR_DATA
+ bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key));
+#endif
+ }
+#ifdef SORT_UNIFY
+ SORT_XTRACE(4, "Merging reduced %u records", merged);
+#endif
+
+ return ctx->more_keys;
+}
+
+static u64
+P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
+{
+ // Most of this is just wild guesses
+#ifdef SORT_VAR_KEY
+ uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
+#else
+ uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
+#endif
+ uns ws = 0;
+#ifdef SORT_UNIFY
+ ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+ ws += avg;
+#endif
+#ifdef SORT_INTERNAL_RADIX
+ ws = MAX(ws, sizeof(P(internal_item_t)));
+#endif
+ // We ignore the data part of records, it probably won't make the estimate much worse
+ return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
+}
+
+#undef SORT_COPY_HASH
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Multi-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * We use a binary tree to keep track of the current minimum. The tree is
+ * represented by an array (in the same way as binary heaps usually are),
+ * leaves correspond to input streams and each internal vertex remembers
+ * the leaf in its subtree, which has the lowest key.
+ */
+
+typedef struct P(mwt) {
+ int i; // Minimum of the subtree
+#ifdef SORT_UNIFY
+ int eq; // Did we encounter equality anywhere in the subtree?
+#endif
+} P(mwt);
+
+static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
+{
+ while (i /= 2)
+ {
+ if (tree[2*i].i < 0)
+ tree[i] = tree[2*i+1];
+ else if (tree[2*i+1].i < 0)
+ tree[i] = tree[2*i];
+ else
+ {
+ int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
+ tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
+#ifdef SORT_UNIFY
+ if (!cmp)
+ tree[i].eq = 1;
+#endif
+ }
+ /*
+ * It is very tempting to stop as soon as the current node does not
+ * change, but it is wrong, because even if the stream index stored in
+ * the tree is the same, the actual key value can differ.
+ */
+ }
+ /*
+ * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
+ * leading to an assumption that tree[1] does not change during this function.
+ * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
+ */
+ asm volatile ("" : : : "memory");
+}
+
+static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
+{
+ tree[i].i = val;
+ P(update_tree)(keys, tree, i);
+}
+
+static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
+{
+ uns num_ins = 0;
+ while (ins[num_ins])
+ num_ins++;
+
+ uns n2 = 1;
+ while (n2 < num_ins)
+ n2 *= 2;
+
+ struct fastbuf *fout = sbuck_write(out);
+ struct fastbuf *fins[num_ins];
+ P(key) keys[num_ins];
+ P(mwt) tree[2*n2];
+ for (uns i=1; i<2*n2; i++)
+ tree[i] = (P(mwt)) { .i = -1 };
+
+ for (uns i=0; i<num_ins; i++)
+ {
+ fins[i] = sbuck_read(ins[i]);
+ if (P(read_key)(fins[i], &keys[i]))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+
+#ifdef SORT_UNIFY
+
+ uns hits[num_ins];
+ P(key) *mkeys[num_ins], *key;
+ struct fastbuf *mfb[num_ins];
+
+ while (likely(tree[1].i >= 0))
+ {
+ int i = tree[1].i;
+ if (!tree[1].eq)
+ {
+ /* The key is unique, so let's go through the fast path */
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+ continue;
+ }
+
+ uns m = 0;
+ key = &keys[i];
+ do
+ {
+ hits[m] = i;
+ mkeys[m] = &keys[i];
+ mfb[m] = fins[i];
+ m++;
+ P(set_tree)(keys, tree, n2+i, -1);
+ i = tree[1].i;
+ if (unlikely(i < 0))
+ break;
+ }
+ while (!P(compare)(key, &keys[i]));
+
+ P(copy_merged)(mkeys, mfb, m, fout);
+
+ for (uns j=0; j<m; j++)
+ {
+ i = hits[j];
+ if (likely(P(read_key)(fins[i], &keys[i])))
+ P(set_tree)(keys, tree, n2+i, i);
+ }
+ }
+
+#else
+
+ /* Simplified version which does not support any unification */
+ while (likely(tree[1].i >= 0))
+ {
+ uns i = tree[1].i;
+ P(key) UNUSED key = keys[i];
+ P(copy_data)(&keys[i], fins[i], fout);
+ if (unlikely(!P(read_key)(fins[i], &keys[i])))
+ tree[n2+i].i = -1;
+ P(update_tree)(keys, tree, n2+i);
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);
+#endif
+ }
+
+#endif
+
+ out->runs++;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Radix-Split Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <string.h>
+
+static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
+{
+ uns nbucks = 1 << numbits;
+ uns mask = nbucks - 1;
+ struct fastbuf *in = sbuck_read(bin);
+ P(key) k;
+
+ struct fastbuf *outs[nbucks];
+ bzero(outs, sizeof(outs));
+
+ while (P(read_key)(in, &k))
+ {
+ P(hash_t) h = P(hash)(&k);
+ uns i = (h >> bitpos) & mask;
+ if (unlikely(!outs[i]))
+ outs[i] = sbuck_write(bouts[i]);
+ P(copy_data)(&k, in, outs[i]);
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Two-Way Merge Module
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
+{
+ struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
+ P(key) kbuf1, kbuf2, kbuf3, kbuf4;
+ P(key) *kin1 = &kbuf1, *kprev1 = &kbuf2, *kin2 = &kbuf3, *kprev2 = &kbuf4;
+ P(key) *kout = NULL, *ktmp;
+ int next1, next2, run1, run2;
+ int comp;
+ uns run_count = 0;
+
+ fin1 = sbuck_read(ins[0]);
+ next1 = P(read_key)(fin1, kin1);
+ if (sbuck_have(ins[1]))
+ {
+ fin2 = sbuck_read(ins[1]);
+ next2 = P(read_key)(fin2, kin2);
+ }
+ else
+ {
+ fin2 = NULL;
+ next2 = 0;
+ }
+ fout1 = fout2 = NULL;
+
+ run1 = next1, run2 = next2;
+ while (next1 || next2)
+ {
+ if (!run1)
+ comp = 1;
+ else if (!run2)
+ comp = -1;
+ else
+ comp = P(compare)(kin1, kin2);
+ ktmp = (comp <= 0) ? kin1 : kin2;
+ if (!kout || !(P(compare)(kout, ktmp) LESS 0))
+ {
+ SWAP(fout1, fout2, ftmp);
+ if (unlikely(!fout1))
+ {
+ if (!fout2)
+ fout1 = sbuck_write(outs[0]);
+ else if (outs[1])
+ fout1 = sbuck_write(outs[1]);
+ else
+ fout1 = fout2;
+ }
+ run_count++;
+ }
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(comp != 0);
+#endif
+ if (comp LESS 0)
+ {
+ P(copy_data)(kin1, fin1, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ kout = kprev1;
+ }
+#ifdef SORT_UNIFY
+ else if (comp == 0)
+ {
+ P(key) *mkeys[] = { kin1, kin2 };
+ struct fastbuf *mfb[] = { fin1, fin2 };
+ P(copy_merged)(mkeys, mfb, 2, fout1);
+ SWAP(kin1, kprev1, ktmp);
+ next1 = P(read_key)(fin1, kin1);
+ run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+#endif
+ else
+ {
+ P(copy_data)(kin2, fin2, fout1);
+ SWAP(kin2, kprev2, ktmp);
+ next2 = P(read_key)(fin2, kin2);
+ run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
+ kout = kprev2;
+ }
+ if (!run1 && !run2)
+ {
+ run1 = next1;
+ run2 = next2;
+ }
+ }
+
+ if (fout2 && fout2 != fout1)
+ outs[1]->runs += run_count / 2;
+ if (fout1)
+ outs[0]->runs += (run_count+1) / 2;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter: Operations on Contexts, Buffers and Buckets
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/mempool.h"
+#include "lib/stkstring.h"
+#include "lib/sorter/common.h"
+
+#include <fcntl.h>
+
+void *
+sorter_alloc(struct sort_context *ctx, uns size)
+{
+ return mp_alloc_zero(ctx->pool, size);
+}
+
+struct sort_bucket *
+sbuck_new(struct sort_context *ctx)
+{
+ struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
+ b->ctx = ctx;
+ return b;
+}
+
+void
+sbuck_drop(struct sort_bucket *b)
+{
+ if (b)
+ {
+ ASSERT(!(b->flags & SBF_DESTROYED));
+ if (b->n.prev)
+ clist_remove(&b->n);
+ bclose(b->fb);
+ bzero(b, sizeof(*b));
+ b->flags = SBF_DESTROYED;
+ }
+}
+
+sh_off_t
+sbuck_size(struct sort_bucket *b)
+{
+ if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
+ return btell(b->fb);
+ else
+ return b->size;
+}
+
+int
+sbuck_have(struct sort_bucket *b)
+{
+ return b && sbuck_size(b);
+}
+
+int
+sbuck_has_file(struct sort_bucket *b)
+{
+ return (b->fb || (b->flags & SBF_SWAPPED_OUT));
+}
+
+static void
+sbuck_swap_in(struct sort_bucket *b)
+{
+ if (b->flags & SBF_SWAPPED_OUT)
+ {
+ b->fb = bopen_file(b->filename, O_RDWR, b->ctx->fb_params);
+ if (b->flags & SBF_OPEN_WRITE)
+ bseek(b->fb, 0, SEEK_END);
+ if (!(sorter_debug & SORT_DEBUG_KEEP_BUCKETS))
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
+ b->flags &= ~SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped in %s", b->filename);
+ }
+}
+
+struct fastbuf *
+sbuck_read(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_READ)
+ return b->fb;
+ else if (b->flags & SBF_OPEN_WRITE)
+ {
+ b->size = btell(b->fb);
+ b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
+ brewind(b->fb);
+ return b->fb;
+ }
+ else
+ ASSERT(0);
+}
+
+struct fastbuf *
+sbuck_write(struct sort_bucket *b)
+{
+ sbuck_swap_in(b);
+ if (b->flags & SBF_OPEN_WRITE)
+ ASSERT(b->fb);
+ else
+ {
+ ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
+ b->fb = bopen_tmp_file(b->ctx->fb_params);
+ if (sorter_debug & SORT_DEBUG_KEEP_BUCKETS)
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ b->flags |= SBF_OPEN_WRITE;
+ b->filename = mp_strdup(b->ctx->pool, b->fb->name);
+ }
+ return b->fb;
+}
+
+void
+sbuck_swap_out(struct sort_bucket *b)
+{
+ if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb && !(b->flags & SBF_SOURCE))
+ {
+ if (b->flags & SBF_OPEN_WRITE)
+ b->size = btell(b->fb);
+ bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
+ bclose(b->fb);
+ b->fb = NULL;
+ b->flags |= SBF_SWAPPED_OUT;
+ SORT_XTRACE(3, "Swapped out %s", b->filename);
+ }
+}
+
+void
+sorter_prepare_buf(struct sort_context *ctx)
+{
+ u64 bs = sorter_bufsize;
+ bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
+ bs = MAX(bs, 2*(u64)CPU_PAGE_SIZE);
+ ctx->big_buf_size = bs;
+}
+
+void
+sorter_alloc_buf(struct sort_context *ctx)
+{
+ if (ctx->big_buf)
+ return;
+ ctx->big_buf = big_alloc(ctx->big_buf_size);
+ SORT_XTRACE(3, "Allocated sorting buffer (%s)", stk_fsize(ctx->big_buf_size));
+}
+
+void
+sorter_free_buf(struct sort_context *ctx)
+{
+ if (!ctx->big_buf)
+ return;
+ big_free(ctx->big_buf, ctx->big_buf_size);
+ ctx->big_buf = NULL;
+ SORT_XTRACE(3, "Freed sorting buffer");
+}
--- /dev/null
+/*
+ * UCW Library -- Testing the Sorter
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/ff-binary.h"
+#include "lib/hashfunc.h"
+#include "lib/md5.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** A hack for overriding radix-sorter configuration ***/
+
+#ifdef FORCE_RADIX_BITS
+#undef CONFIG_UCW_RADIX_SORTER_BITS
+#define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
+#endif
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+static uns test_id;
+
+static void
+start(void)
+{
+ sync();
+ init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+ sync();
+ msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+ u32 x;
+};
+
+#define SORT_KEY_REGULAR struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT(k) (k).x
+#define SORT_DELETE_INPUT 0
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+ uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+ brewind(f);
+
+ start();
+ f = s1_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ }
+ bclose(f);
+}
+
+/*** Integers with merging, but no data ***/
+
+struct key2 {
+ u32 x;
+ u32 cnt;
+};
+
+static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
+{
+ for (uns i=1; i<n; i++)
+ k[0]->cnt += k[i]->cnt;
+ bwrite(f, k[0], sizeof(struct key2));
+}
+
+#define SORT_KEY_REGULAR struct key2
+#define SORT_PREFIX(x) s2_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIFY
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_counted(int mode, u64 size)
+{
+ u64 items = size / sizeof(struct key2);
+ uns mult = 2;
+ while (items/(2*mult) > 0xffff0000)
+ mult++;
+ uns N = items ? nextprime(items/(2*mult)) : 0;
+ uns K = N/4*3;
+ msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns m=0; m<mult; m++)
+ for (uns i=0; i<N; i++)
+ for (uns j=0; j<2; j++)
+ {
+ bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+ bputl(f, 1);
+ }
+ brewind(f);
+
+ start();
+ f = s2_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ uns k = bgetl(f);
+ if (k != 2*mult)
+ die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
+ }
+ bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+ u32 hash[4];
+ u32 i;
+ u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+ COMPARE(x->hash[0], y->hash[0]);
+ COMPARE(x->hash[1], y->hash[1]);
+ COMPARE(x->hash[2], y->hash[2]);
+ COMPARE(x->hash[3], y->hash[3]);
+ return 0;
+}
+
+static inline uns s3_hash(struct key3 *x)
+{
+ return x->hash[0];
+}
+
+#define SORT_KEY_REGULAR struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_HASH_BITS 32
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+ k->i = i;
+ k->payload[0] = 7*i + 13;
+ k->payload[1] = 13*i + 19;
+ k->payload[2] = 19*i + 7;
+ switch (mode)
+ {
+ case 0:
+ k->hash[0] = i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ case 1:
+ k->hash[0] = ~i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ default: ;
+ struct MD5Context ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (byte*) &k->i, 4);
+ MD5Final((byte*) &k->hash, &ctx);
+ break;
+ }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ struct key3 k, lastk;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ uns hash_sum = 0;
+ for (uns i=0; i<N; i++)
+ {
+ gen_hash_key(mode, &k, i);
+ hash_sum += k.hash[3];
+ bwrite(f, &k, sizeof(k));
+ }
+ brewind(f);
+
+ start();
+ f = s3_sort(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ if (i && s3_compare(&k, &lastk) <= 0)
+ ASSERT(0);
+ gen_hash_key(mode, &lastk, k.i);
+ if (memcmp(&k, &lastk, sizeof(k)))
+ ASSERT(0);
+ hash_sum -= k.hash[3];
+ }
+ ASSERT(!hash_sum);
+ bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+ uns len;
+ byte s[KEY4_MAX];
+};
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+ uns l = MIN(x->len, y->len);
+ int c = memcmp(x->s, y->s, l);
+ if (c)
+ return c;
+ COMPARE(x->len, y->len);
+ return 0;
+}
+
+static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
+{
+ x->len = bgetl(f);
+ if (x->len == 0xffffffff)
+ return 0;
+ ASSERT(x->len < KEY4_MAX);
+ breadb(f, x->s, x->len);
+ return 1;
+}
+
+static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
+{
+ ASSERT(x->len < KEY4_MAX);
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_read_key s4_read_key
+#define s4b_write_key s4_write_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+ return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_DATA_SIZE(x) s4_data_size(&(x))
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+ k->len = random_max(KEY4_MAX);
+ for (uns i=0; i<k->len; i++)
+ k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+ while (len--)
+ {
+ *buf++ = h >> 24;
+ h = h*259309 + 17;
+ }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+ srand(1);
+
+ struct key4 k, lastk;
+ byte buf[256], buf2[256];
+ uns sum = 0;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ {
+ gen_key4(&k);
+ s4_write_key(f, &k);
+ uns h = hash_block(k.s, k.len);
+ sum += h;
+ if (mode)
+ {
+ gen_data4(buf, s4_data_size(&k), h);
+ bwrite(f, buf, s4_data_size(&k));
+ }
+ }
+ brewind(f);
+
+ start();
+ f = (mode ? s4b_sort : s4_sort)(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = s4_read_key(f, &k);
+ ASSERT(ok);
+ uns h = hash_block(k.s, k.len);
+ if (mode && s4_data_size(&k))
+ {
+ ok = breadb(f, buf, s4_data_size(&k));
+ ASSERT(ok);
+ gen_data4(buf2, s4_data_size(&k), h);
+ ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+ }
+ if (i && s4_compare(&k, &lastk) < 0)
+ ASSERT(0);
+ sum -= h;
+ lastk = k;
+ }
+ ASSERT(!sum);
+ bclose(f);
+}
+
+/*** Graph-like structure with custom presorting ***/
+
+struct key5 {
+ u32 x;
+ u32 cnt;
+};
+
+static uns s5_N, s5_K, s5_L, s5_i, s5_j;
+
+struct s5_pair {
+ uns x, y;
+};
+
+static int s5_gen(struct s5_pair *p)
+{
+ if (s5_j >= s5_N)
+ {
+ if (!s5_N || s5_i >= s5_N-1)
+ return 0;
+ s5_j = 0;
+ s5_i++;
+ }
+ p->x = ((u64)s5_j * s5_K) % s5_N;
+ p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
+ s5_j++;
+ return 1;
+}
+
+#define ASORT_PREFIX(x) s5m_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) ary[i]
+#define ASORT_EXTRA_ARGS , u32 *ary
+#include "lib/arraysort.h"
+
+static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
+{
+ u32 *a = buf;
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ memcpy(&a[m], data[i], 4*keys[i]->cnt);
+ m += keys[i]->cnt;
+ }
+ s5m_sort(m, a);
+ keys[0]->cnt = m;
+ bwrite(f, keys[0], sizeof(struct key5));
+ bwrite(f, a, 4*m);
+}
+
+static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+{
+ u32 k[n];
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ k[i] = bgetl(data[i]);
+ m += keys[i]->cnt;
+ }
+ struct key5 key = { .x = keys[0]->x, .cnt = m };
+ bwrite(dest, &key, sizeof(key));
+ while (key.cnt--)
+ {
+ uns b = 0;
+ for (uns i=1; i<n; i++)
+ if (k[i] < k[b])
+ b = i;
+ bputl(dest, k[b]);
+ if (--keys[b]->cnt)
+ k[b] = bgetl(data[b]);
+ else
+ k[b] = ~0U;
+ }
+}
+
+static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
+{
+ COMPARE_LT(x.x, y.x);
+ COMPARE_LT(x.y, y.y);
+ return 0;
+}
+
+#define ASORT_PREFIX(x) s5p_##x
+#define ASORT_KEY_TYPE struct s5_pair
+#define ASORT_LT(x,y) s5p_lt(x,y)
+#include "lib/sorter/array.h"
+
+static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+{
+ uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
+ struct s5_pair *a = buf;
+ uns n = 0;
+ while (n<max && s5_gen(&a[n]))
+ n++;
+ if (!n)
+ return 0;
+ s5p_sort(a, n);
+ uns i = 0;
+ while (i < n)
+ {
+ uns j = i;
+ while (i < n && a[i].x == a[j].x)
+ i++;
+ struct key5 k = { .x = a[j].x, .cnt = i-j };
+ bwrite(dest, &k, sizeof(k));
+ while (j < i)
+ bputl(dest, a[j++].y);
+ }
+ return 1;
+}
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_PRESORT
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5b_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+#define s5b_write_merged s5_write_merged
+#define s5b_copy_merged s5_copy_merged
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_graph(uns mode, u64 size)
+{
+ uns N = 3;
+ while ((u64)N*(N+2)*4 < size)
+ N = nextprime(N);
+ if (!size)
+ N = 0;
+ msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
+ s5_N = N;
+ s5_K = N/4*3;
+ s5_L = N/3*2;
+ s5_i = s5_j = 0;
+
+ struct fastbuf *in = NULL;
+ if (mode)
+ {
+ struct s5_pair p;
+ in = bopen_tmp(65536);
+ while (s5_gen(&p))
+ {
+ struct key5 k = { .x = p.x, .cnt = 1 };
+ bwrite(in, &k, sizeof(k));
+ bputl(in, p.y);
+ }
+ brewind(in);
+ }
+
+ start();
+ struct fastbuf *f = bopen_tmp(65536);
+ bputl(f, 0xfeedcafe);
+ struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
+ ASSERT(f == g);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ uns c = bgetl(f);
+ ASSERT(c == 0xfeedcafe);
+ for (uns i=0; i<N; i++)
+ {
+ struct key5 k;
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ ASSERT(k.x == i);
+ ASSERT(k.cnt == N);
+ for (uns j=0; j<N; j++)
+ {
+ uns y = bgetl(f);
+ ASSERT(y == j);
+ }
+ }
+ bclose(f);
+}
+
+/*** Simple 8-byte integer keys ***/
+
+struct key6 {
+ u64 x;
+};
+
+#define SORT_KEY_REGULAR struct key6
+#define SORT_PREFIX(x) s6_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT64(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int64(int mode, u64 size)
+{
+ u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
+ u64 K = N/4*3;
+ msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (u64 i=0; i<N; i++)
+ bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
+ brewind(f);
+
+ start();
+ f = s6_sort(f, NULL, 777777*(N-1));
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (u64 i=0; i<N; i++)
+ {
+ u64 j = bgetq(f);
+ if (777777*i != j)
+ die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
+ }
+ bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+ test_id = i;
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ test_counted(0, size); break;
+ case 4:
+ test_counted(1, size); break;
+ case 5:
+ test_counted(2, size); break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+ case 11:
+ test_graph(0, size); break;
+ case 12:
+ test_graph(1, size); break;
+ case 13:
+ test_int64(0, size); break;
+ case 14:
+ test_int64(1, size); break;
+ case 15:
+ test_int64(2, size); break;
+#define TMAX 16
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ log_init(NULL);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 'd':
+ sorter_debug = atol(optarg);
+ break;
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ {
+ char *w[32];
+ int f = sepsplit(optarg, ',', w, ARRAY_SIZE(w));
+ if (f < 0)
+ goto usage;
+ t = 0;
+ for (int i=0; i<f; i++)
+ {
+ int j = atol(w[i]);
+ if (j >= TMAX)
+ goto usage;
+ t |= 1 << j;
+ }
+ }
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ for (uns i=0; i<TMAX; i++)
+ if (t & (1 << i))
+ run_test(i, size);
+
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Universal Sorter
+ *
+ * (c) 2001--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is not a normal header file, but a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates a file sorter
+ * with the parameters given.
+ *
+ * The sorter operates on fastbufs containing sequences of items. Each item
+ * consists of a key, optionally followed by data. The keys are represented
+ * by fixed-size structures of type SORT_KEY internally, if this format differs
+ * from the on-disk format, explicit reading and writing routines can be provided.
+ * The data are always copied verbatim, unless the sorter is in the merging
+ * mode in which it calls callbacks for merging of items with equal keys.
+ *
+ * All callbacks must be thread-safe.
+ *
+ * Basic parameters and callbacks:
+ *
+ * SORT_PREFIX(x) add a name prefix (used on all global names defined by the sorter)
+ *
+ * SORT_KEY data type capable of holding a single key in memory (the on-disk
+ * representation can be different). Alternatively, you can use:
+ * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
+ * in this case, bread() and bwrite() is used to read/write keys
+ * and it's also assumed that the keys are not very long.
+ * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
+ * compares two keys, returns result like strcmp(). Mandatory.
+ * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
+ * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
+ * Mandatory unless SORT_KEY_REGULAR is defined.
+ * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
+ * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
+ *
+ * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
+ * can be truncated to this number of bytes without any harm;
+ * used to save memory when the keys have variable sizes).
+ * Default: always store the whole SORT_KEY.
+ * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
+ * Default: records consist of keys only.
+ *
+ * Integer sorting:
+ *
+ * SORT_INT(key) we are sorting by an integer value returned by this macro.
+ * In this mode, PREFIX_compare is supplied automatically and the sorting
+ * function gets an extra parameter specifying the range of the integers.
+ * The better the range fits, the faster we sort.
+ * Sets up SORT_HASH_xxx automatically.
+ * SORT_INT64(key) the same for 64-bit integers.
+ *
+ * Hashing (optional, but it can speed sorting up):
+ *
+ * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
+ * bits is available. A monotone hash is a function f from keys to integers
+ * such that f(x) < f(y) implies x < y, which is approximately uniformly
+ * distributed. It should be declared as:
+ * uns PREFIX_hash(SORT_KEY *a)
+ *
+ * Unification:
+ *
+ * SORT_UNIFY merge items with identical keys. It requires the following functions:
+ * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
+ * takes n records in memory with keys which compare equal and writes
+ * a single record to the given fastbuf. `buf' points to a buffer which
+ * is guaranteed to hold the sum of workspace requirements (see below)
+ * over all given records. The function is allowed to modify all its inputs.
+ * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+ * takes n records with keys in memory and data in fastbufs and writes
+ * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
+ * is defined.
+ * SORT_UNIFY_WORKSPACE(key)
+ * gets a key and returns the amount of workspace required when merging
+ * the given record. Defaults to 0.
+ *
+ * Input (choose one of these):
+ *
+ * SORT_INPUT_FILE file of a given name
+ * SORT_INPUT_FB seekable fastbuf stream
+ * SORT_INPUT_PIPE non-seekable fastbuf stream
+ * SORT_INPUT_PRESORT custom presorter. Calls function
+ * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+ * to get successive batches of pre-sorted data.
+ * The function is passed a page-aligned presorting buffer.
+ * It returns 1 on success or 0 on EOF.
+ * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
+ * as soon as possible.
+ *
+ * Output (chose one of these):
+ *
+ * SORT_OUTPUT_FILE file of a given name
+ * SORT_OUTPUT_FB temporary fastbuf stream
+ * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain some data
+ *
+ * Other switches:
+ *
+ * SORT_UNIQUE all items have distinct keys (checked in debug mode)
+ *
+ * The function generated:
+ *
+ * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
+ * <in> = input file name/fastbuf or NULL
+ * <out> = output file name/fastbuf or NULL
+ * <range> = maximum integer value for the SORT_INT mode
+ *
+ * After including this file, all parameter macros are automatically
+ * undef'd.
+ */
+
+#include "lib/sorter/common.h"
+#include "lib/fastbuf.h"
+
+#include <fcntl.h>
+
+#define P(x) SORT_PREFIX(x)
+
+#ifdef SORT_KEY_REGULAR
+typedef SORT_KEY_REGULAR P(key);
+static inline int P(read_key) (struct fastbuf *f, P(key) *k)
+{
+ return breadb(f, k, sizeof(P(key)));
+}
+static inline void P(write_key) (struct fastbuf *f, P(key) *k)
+{
+ bwrite(f, k, sizeof(P(key)));
+}
+#elif defined(SORT_KEY)
+typedef SORT_KEY P(key);
+#else
+#error Missing definition of sorting key.
+#endif
+
+#ifdef SORT_INT64
+typedef u64 P(hash_t);
+#define SORT_INT SORT_INT64
+#define SORT_LONG_HASH
+#else
+typedef uns P(hash_t);
+#endif
+
+#ifdef SORT_INT
+static inline int P(compare) (P(key) *x, P(key) *y)
+{
+ if (SORT_INT(*x) < SORT_INT(*y))
+ return -1;
+ if (SORT_INT(*x) > SORT_INT(*y))
+ return 1;
+ return 0;
+}
+
+#ifndef SORT_HASH_BITS
+static inline P(hash_t) P(hash) (P(key) *x)
+{
+ return SORT_INT((*x));
+}
+#endif
+#endif
+
+#ifdef SORT_UNIFY
+#define LESS <
+#else
+#define LESS <=
+#endif
+#define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
+
+#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
+#define SORT_ASSERT_UNIQUE
+#endif
+
+#ifdef SORT_KEY_SIZE
+#define SORT_VAR_KEY
+#else
+#define SORT_KEY_SIZE(key) sizeof(key)
+#endif
+
+#ifdef SORT_DATA_SIZE
+#define SORT_VAR_DATA
+#else
+#define SORT_DATA_SIZE(key) 0
+#endif
+
+static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
+{
+ P(write_key)(out, key);
+#ifdef SORT_VAR_DATA
+ bbcopy(in, out, SORT_DATA_SIZE(*key));
+#else
+ (void) in;
+#endif
+}
+
+#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
+static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
+{
+ P(write_merged)(dest, keys, NULL, n, NULL);
+}
+#endif
+
+#if defined(SORT_HASH_BITS) || defined(SORT_INT)
+#define SORT_INTERNAL_RADIX
+#include "lib/sorter/s-radix.h"
+#endif
+
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
+#include "lib/sorter/s-internal.h"
+#else
+#include "lib/sorter/s-fixint.h"
+#endif
+
+#include "lib/sorter/s-twoway.h"
+#include "lib/sorter/s-multiway.h"
+
+static struct fastbuf *P(sort)(
+#ifdef SORT_INPUT_FILE
+ byte *in,
+#else
+ struct fastbuf *in,
+#endif
+#ifdef SORT_OUTPUT_FILE
+ byte *out
+#else
+ struct fastbuf *out
+#endif
+#ifdef SORT_INT
+ , u64 int_range
+#endif
+ )
+{
+ struct sort_context ctx;
+ bzero(&ctx, sizeof(ctx));
+
+#ifdef SORT_INPUT_FILE
+ ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
+ ctx.in_size = bfilesize(ctx.in_fb);
+#elif defined(SORT_INPUT_FB)
+ ctx.in_fb = in;
+ ctx.in_size = bfilesize(in);
+#elif defined(SORT_INPUT_PIPE)
+ ctx.in_fb = in;
+ ctx.in_size = ~(u64)0;
+#elif defined(SORT_INPUT_PRESORT)
+ ASSERT(!in);
+ ctx.custom_presort = P(presort);
+ ctx.in_size = ~(u64)0;
+#else
+#error No input given.
+#endif
+#ifdef SORT_DELETE_INPUT
+ if (SORT_DELETE_INPUT)
+ bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
+#endif
+
+#ifdef SORT_OUTPUT_FB
+ ASSERT(!out);
+#elif defined(SORT_OUTPUT_THIS_FB)
+ ctx.out_fb = out;
+#elif defined(SORT_OUTPUT_FILE)
+ /* Just assume fastbuf output and rename the fastbuf later */
+#else
+#error No output given.
+#endif
+
+#ifdef SORT_HASH_BITS
+ ctx.hash_bits = SORT_HASH_BITS;
+ ctx.radix_split = P(radix_split);
+#elif defined(SORT_INT)
+ ctx.hash_bits = 0;
+ while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
+ ctx.hash_bits++;
+ ctx.radix_split = P(radix_split);
+#endif
+
+ ctx.internal_sort = P(internal);
+ ctx.internal_estimate = P(internal_estimate);
+ ctx.twoway_merge = P(twoway_merge);
+ ctx.multiway_merge = P(multiway_merge);
+
+ sorter_run(&ctx);
+
+#ifdef SORT_OUTPUT_FILE
+ bfix_tmp_file(ctx.out_fb, out);
+ ctx.out_fb = NULL;
+#endif
+ return ctx.out_fb;
+}
+
+#undef SORT_ASSERT_UNIQUE
+#undef SORT_DATA_SIZE
+#undef SORT_DELETE_INPUT
+#undef SORT_HASH_BITS
+#undef SORT_INPUT_FB
+#undef SORT_INPUT_FILE
+#undef SORT_INPUT_PIPE
+#undef SORT_INPUT_PRESORT
+#undef SORT_INT
+#undef SORT_INT64
+#undef SORT_INTERNAL_RADIX
+#undef SORT_KEY
+#undef SORT_KEY_REGULAR
+#undef SORT_KEY_SIZE
+#undef SORT_LONG_HASH
+#undef SORT_OUTPUT_FB
+#undef SORT_OUTPUT_FILE
+#undef SORT_OUTPUT_THIS_FB
+#undef SORT_PREFIX
+#undef SORT_UNIFY
+#undef SORT_UNIFY_WORKSPACE
+#undef SORT_UNIQUE
+#undef SORT_VAR_DATA
+#undef SORT_VAR_KEY
+#undef SWAP
+#undef LESS
+#undef P
}
void
-stk_hexdump_internal(char *dst, byte *src, uns n)
+stk_hexdump_internal(char *dst, const byte *src, uns n)
{
for (uns i=0; i<n; i++)
{
*dst = 0;
}
+void
+stk_fsize_internal(char *buf, u64 x)
+{
+ if (x < 1<<10)
+ sprintf(buf, "%dB", (int)x);
+ else if (x < 10<<10)
+ sprintf(buf, "%.1fK", (double)x/(1<<10));
+ else if (x < 1<<20)
+ sprintf(buf, "%dK", (int)(x/(1<<10)));
+ else if (x < 10<<20)
+ sprintf(buf, "%.1fM", (double)x/(1<<20));
+ else if (x < 1<<30)
+ sprintf(buf, "%dM", (int)(x/(1<<20)));
+ else if (x < (u64)10<<30)
+ sprintf(buf, "%.1fG", (double)x/(1<<30));
+ else if (x != ~(u64)0)
+ sprintf(buf, "%dG", (int)(x/(1<<30)));
+ else
+ strcpy(buf, "unknown");
+}
+
#ifdef TEST
int main(void)
puts(stk_hexdump(a, 3));
char *ary[] = { "The", "jaws", "that", "bite" };
puts(stk_strjoin(ary, 4, ' '));
+ puts(stk_fsize(1234567));
return 0;
}
/*
* UCW Library -- Strings Allocated on the Stack
*
- * (c) 2005--2006 Martin Mares <mj@ucw.cz>
+ * (c) 2005--2007 Martin Mares <mj@ucw.cz>
* (c) 2005 Tomas Valla <tom@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
*/
+#ifndef _UCW_STKSTRING_H
+#define _UCW_STKSTRING_H
+
#include <alloca.h>
#include <string.h>
#include <stdio.h>
-#define stk_strdup(s) ({ char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
-#define stk_strndup(s,n) ({ char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
-#define stk_strcat(s1,s2) ({ char *_s1=(s1); char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
+#define stk_strdup(s) ({ const char *_s=(s); uns _l=strlen(_s)+1; char *_x=alloca(_l); memcpy(_x, _s, _l); _x; })
+#define stk_strndup(s,n) ({ const char *_s=(s); uns _l=strnlen(_s,(n)); char *_x=alloca(_l+1); memcpy(_x, _s, _l); _x[_l]=0; _x; })
+#define stk_strcat(s1,s2) ({ const char *_s1=(s1); const char *_s2=(s2); uns _l1=strlen(_s1); uns _l2=strlen(_s2); char *_x=alloca(_l1+_l2+1); memcpy(_x,_s1,_l1); memcpy(_x+_l1,_s2,_l2+1); _x; })
#define stk_strmulticat(s...) ({ char *_s[]={s}; char *_x=alloca(stk_array_len(_s, ARRAY_SIZE(_s)-1)); stk_array_join(_x, _s, ARRAY_SIZE(_s)-1, 0); _x; })
#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
#define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
-#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(byte*)(s),_n); _x; })
-#define stk_str_unesc(s) ({ byte *_s=(s); byte *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(char*)(s),_n); _x; })
+#define stk_str_unesc(s) ({ const char *_s=(s); char *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
uns stk_array_len(char **s, uns cnt);
void stk_array_join(char *x, char **s, uns cnt, uns sep);
uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
uns stk_vprintf_internal(const char *x, va_list args);
-void stk_hexdump_internal(char *dst, byte *src, uns n);
+void stk_hexdump_internal(char *dst, const byte *src, uns n);
+void stk_fsize_internal(char *dst, u64 size);
+
+#endif
# Tests for stkstring modules
-Run: obj/lib/stkstring-t
+Run: ../obj/lib/stkstring-t
Out: Beware the Jabberwock, my son!
42 65 77
The jaws that bite
+ 1.2M
/* Expands C99-like escape sequences.
* It is safe to use the same buffer for both input and output. */
-byte *
-str_unesc(byte *d, byte *s)
+char *
+str_unesc(char *d, const char *s)
{
while (*s)
{
*d++ = v;
else
DBG("hex escape sequence out of range");
- s = (byte *)p;
+ s = (char *)p;
}
break;
default:
return d;
}
-byte *
-str_format_flags(byte *dest, const byte *fmt, uns flags)
+char *
+str_format_flags(char *dest, const char *fmt, uns flags)
{
- byte *start = dest;
+ char *start = dest;
for (uns i=0; fmt[i]; i++)
{
if (flags & (1 << i))
#include <unistd.h>
void
-sync_dir(byte *name)
+sync_dir(const char *name)
{
int fd = open(name, O_RDONLY
#ifdef CONFIG_LINUX
if (err >= 0)
return;
err:
- log(L_ERROR, "Unable to sync directory %s: %m", name);
+ msg(L_ERROR, "Unable to sync directory %s: %m", name);
}
#include <pthread.h>
+#ifdef CONFIG_LINUX
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef __NR_gettid
+static pid_t
+gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+#define CONFIG_USE_GETTID
+#endif
+#endif
+
static pthread_key_t ucwlib_context_key;
static pthread_mutex_t ucwlib_master_mutex;
ucwlib_tid(void)
{
static int tid_counter;
+ int tid;
+
+#ifdef CONFIG_USE_GETTID
+ tid = gettid();
+ if (tid > 0)
+ return tid;
+ /* The syscall might be unimplemented */
+#endif
ucwlib_lock();
- int tid = ++tid_counter;
+ tid = ++tid_counter;
ucwlib_unlock();
return tid;
}
/*
- * UCW Library -- Execution Timing
+ * UCW Library -- A Simple Millisecond Timer
*
- * (c) 1997 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Martin Mares <mj@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
#include <stdlib.h>
#include <sys/time.h>
-static struct timeval last_tv;
-
-uns
-get_timer(void)
+timestamp_t
+get_timestamp(void)
{
struct timeval tv;
- uns diff;
-
gettimeofday(&tv, NULL);
- if (tv.tv_sec < last_tv.tv_sec
- || tv.tv_sec == last_tv.tv_sec && tv.tv_usec < last_tv.tv_usec)
- diff = 0;
- else
- {
- if (tv.tv_sec == last_tv.tv_sec)
- diff = (tv.tv_usec - last_tv.tv_usec + 500) / 1000;
- else
- {
- diff = 1000 * (tv.tv_sec - last_tv.tv_sec - 1);
- diff += (1000500 - last_tv.tv_usec + tv.tv_usec) / 1000;
- }
- }
- last_tv = tv;
- return diff;
+ return (timestamp_t)tv.tv_sec * 1000 + tv.tv_usec / 1000;
}
void
-init_timer(void)
+init_timer(timestamp_t *timer)
{
- gettimeofday(&last_tv, NULL);
+ *timer = get_timestamp();
}
-void
-get_last_timeval(struct timeval *tv)
+uns
+get_timer(timestamp_t *timer)
+{
+ timestamp_t t = *timer;
+ *timer = get_timestamp();
+ return MIN(*timer-t, ~0U);
+}
+
+uns
+switch_timer(timestamp_t *old, timestamp_t *new)
{
- *tv = last_tv;
+ *new = get_timestamp();
+ return MIN(*new-*old, ~0U);
}
/* Big endian format */
#if defined(CPU_ALLOW_UNALIGNED) && defined(CPU_BIG_ENDIAN)
-static inline uns get_u16_be(byte *p) { return *(u16 *)p; }
-static inline u32 get_u32_be(byte *p) { return *(u32 *)p; }
-static inline u64 get_u64_be(byte *p) { return *(u64 *)p; }
-static inline void put_u16_be(byte *p, uns x) { *(u16 *)p = x; }
-static inline void put_u32_be(byte *p, u32 x) { *(u32 *)p = x; }
-static inline void put_u64_be(byte *p, u64 x) { *(u64 *)p = x; }
+static inline uns get_u16_be(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_be(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_be(const void *p) { return *(u64 *)p; }
+static inline void put_u16_be(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_be(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_be(void *p, u64 x) { *(u64 *)p = x; }
#else
-static inline uns get_u16_be(byte *p)
+static inline uns get_u16_be(const void *p)
{
- return (p[0] << 8) | p[1];
+ const byte *c = p;
+ return (c[0] << 8) | c[1];
}
-static inline u32 get_u32_be(byte *p)
+static inline u32 get_u32_be(const void *p)
{
- return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+ const byte *c = p;
+ return (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
}
-static inline u64 get_u64_be(byte *p)
+static inline u64 get_u64_be(const void *p)
{
- return ((u64) get_u32_be(p) << 32) | get_u32_be(p+4);
+ return ((u64) get_u32_be(p) << 32) | get_u32_be((const byte *)p+4);
}
-static inline void put_u16_be(byte *p, uns x)
+static inline void put_u16_be(void *p, uns x)
{
- p[0] = x >> 8;
- p[1] = x;
+ byte *c = p;
+ c[0] = x >> 8;
+ c[1] = x;
}
-static inline void put_u32_be(byte *p, u32 x)
+static inline void put_u32_be(void *p, u32 x)
{
- p[0] = x >> 24;
- p[1] = x >> 16;
- p[2] = x >> 8;
- p[3] = x;
+ byte *c = p;
+ c[0] = x >> 24;
+ c[1] = x >> 16;
+ c[2] = x >> 8;
+ c[3] = x;
}
-static inline void put_u64_be(byte *p, u64 x)
+static inline void put_u64_be(void *p, u64 x)
{
put_u32_be(p, x >> 32);
- put_u32_be(p+4, x);
+ put_u32_be((byte *)p+4, x);
}
#endif
/* Little-endian format */
#if defined(CPU_ALLOW_UNALIGNED) && !defined(CPU_BIG_ENDIAN)
-static inline uns get_u16_le(byte *p) { return *(u16 *)p; }
-static inline u32 get_u32_le(byte *p) { return *(u32 *)p; }
-static inline u64 get_u64_le(byte *p) { return *(u64 *)p; }
-static inline void put_u16_le(byte *p, uns x) { *(u16 *)p = x; }
-static inline void put_u32_le(byte *p, u32 x) { *(u32 *)p = x; }
-static inline void put_u64_le(byte *p, u64 x) { *(u64 *)p = x; }
+static inline uns get_u16_le(const void *p) { return *(u16 *)p; }
+static inline u32 get_u32_le(const void *p) { return *(u32 *)p; }
+static inline u64 get_u64_le(const void *p) { return *(u64 *)p; }
+static inline void put_u16_le(void *p, uns x) { *(u16 *)p = x; }
+static inline void put_u32_le(void *p, u32 x) { *(u32 *)p = x; }
+static inline void put_u64_le(void *p, u64 x) { *(u64 *)p = x; }
#else
-static inline uns get_u16_le(byte *p)
+static inline uns get_u16_le(const void *p)
{
- return p[0] | (p[1] << 8);
+ const byte *c = p;
+ return c[0] | (c[1] << 8);
}
-static inline u32 get_u32_le(byte *p)
+static inline u32 get_u32_le(const void *p)
{
- return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+ const byte *c = p;
+ return c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
}
-static inline u64 get_u64_le(byte *p)
+static inline u64 get_u64_le(const void *p)
{
- return get_u32_le(p) | ((u64) get_u32_le(p+4) << 32);
+ return get_u32_le(p) | ((u64) get_u32_le((const byte *)p+4) << 32);
}
-static inline void put_u16_le(byte *p, uns x)
+static inline void put_u16_le(void *p, uns x)
{
- p[0] = x;
- p[1] = x >> 8;
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
}
-static inline void put_u32_le(byte *p, u32 x)
+static inline void put_u32_le(void *p, u32 x)
{
- p[0] = x;
- p[1] = x >> 8;
- p[2] = x >> 16;
- p[3] = x >> 24;
+ byte *c = p;
+ c[0] = x;
+ c[1] = x >> 8;
+ c[2] = x >> 16;
+ c[3] = x >> 24;
}
-static inline void put_u64_le(byte *p, u64 x)
+static inline void put_u64_le(void *p, u64 x)
{
put_u32_le(p, x);
- put_u32_le(p+4, x >> 32);
+ put_u32_le((byte *)p+4, x >> 32);
}
#endif
-static inline u64 get_u40_be(byte *p)
+static inline u64 get_u40_be(const void *p)
{
- return ((u64)p[0] << 32) | get_u32_be(p+1);
+ const byte *c = p;
+ return ((u64)c[0] << 32) | get_u32_be(c+1);
}
-static inline void put_u40_be(byte *p, u64 x)
+static inline void put_u40_be(void *p, u64 x)
{
- p[0] = x >> 32;
- put_u32_be(p+1, x);
+ byte *c = p;
+ c[0] = x >> 32;
+ put_u32_be(c+1, x);
}
-static inline u64 get_u40_le(byte *p)
+static inline u64 get_u40_le(const void *p)
{
- return get_u32_le(p) | ((u64) p[4] << 32);
+ const byte *c = p;
+ return get_u32_le(c) | ((u64) c[4] << 32);
}
-static inline void put_u40_le(byte *p, u64 x)
+static inline void put_u40_le(void *p, u64 x)
{
- put_u32_le(p, x);
- p[4] = x >> 32;
+ byte *c = p;
+ put_u32_le(c, x);
+ c[4] = x >> 32;
}
/* The native format */
#ifdef CPU_BIG_ENDIAN
-static inline uns get_u16(byte *p) { return get_u16_be(p); }
-static inline u32 get_u32(byte *p) { return get_u32_be(p); }
-static inline u64 get_u64(byte *p) { return get_u64_be(p); }
-static inline u64 get_u40(byte *p) { return get_u40_be(p); }
-static inline void put_u16(byte *p, uns x) { return put_u16_be(p, x); }
-static inline void put_u32(byte *p, u32 x) { return put_u32_be(p, x); }
-static inline void put_u64(byte *p, u64 x) { return put_u64_be(p, x); }
-static inline void put_u40(byte *p, u64 x) { return put_u40_be(p, x); }
+static inline uns get_u16(const void *p) { return get_u16_be(p); }
+static inline u32 get_u32(const void *p) { return get_u32_be(p); }
+static inline u64 get_u64(const void *p) { return get_u64_be(p); }
+static inline u64 get_u40(const void *p) { return get_u40_be(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_be(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_be(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_be(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_be(p, x); }
#else
-static inline uns get_u16(byte *p) { return get_u16_le(p); }
-static inline u32 get_u32(byte *p) { return get_u32_le(p); }
-static inline u64 get_u64(byte *p) { return get_u64_le(p); }
-static inline u64 get_u40(byte *p) { return get_u40_le(p); }
-static inline void put_u16(byte *p, uns x) { return put_u16_le(p, x); }
-static inline void put_u32(byte *p, u32 x) { return put_u32_le(p, x); }
-static inline void put_u64(byte *p, u64 x) { return put_u64_le(p, x); }
-static inline void put_u40(byte *p, u64 x) { return put_u40_le(p, x); }
+static inline uns get_u16(const void *p) { return get_u16_le(p); }
+static inline u32 get_u32(const void *p) { return get_u32_le(p); }
+static inline u64 get_u64(const void *p) { return get_u64_le(p); }
+static inline u64 get_u40(const void *p) { return get_u40_le(p); }
+static inline void put_u16(void *p, uns x) { return put_u16_le(p, x); }
+static inline void put_u32(void *p, u32 x) { return put_u32_le(p, x); }
+static inline void put_u64(void *p, u64 x) { return put_u64_le(p, x); }
+static inline void put_u40(void *p, u64 x) { return put_u40_le(p, x); }
#endif
/* Just for completeness */
-static inline uns get_u8(byte *p) { return *p; }
-static inline void put_u8(byte *p, uns x) { *p = x; }
+static inline uns get_u8(const void *p) { return *(const byte *)p; }
+static inline void put_u8(void *p, uns x) { *(byte *)p = x; }
/* Backward compatibility macros */
-#define GET_U8(p) get_u8((byte*)(p))
-#define GET_U16(p) get_u16((byte*)(p))
-#define GET_U32(p) get_u32((byte*)(p))
-#define GET_U64(p) get_u64((byte*)(p))
-#define GET_U40(p) get_u40((byte*)(p))
-
-#define PUT_U8(p,x) put_u8((byte*)(p),x);
-#define PUT_U16(p,x) put_u16((byte*)(p),x)
-#define PUT_U32(p,x) put_u32((byte*)p,x)
-#define PUT_U64(p,x) put_u64((byte*)p,x)
-#define PUT_U40(p,x) put_u40((byte*)p,x)
+#define GET_U8(p) get_u8(p)
+#define GET_U16(p) get_u16(p)
+#define GET_U32(p) get_u32(p)
+#define GET_U64(p) get_u64(p)
+#define GET_U40(p) get_u40(p)
+
+#define PUT_U8(p,x) put_u8(p,x);
+#define PUT_U16(p,x) put_u16(p,x)
+#define PUT_U32(p,x) put_u32(p,x)
+#define PUT_U64(p,x) put_u64(p,x)
+#define PUT_U40(p,x) put_u40(p,x)
#endif
#include "lib/unicode.h"
uns
-utf8_strlen(byte *str)
+utf8_strlen(const byte *str)
{
uns len = 0;
while (*str)
}
uns
-utf8_strnlen(byte *str, uns n)
+utf8_strnlen(const byte *str, uns n)
{
uns len = 0;
- byte *end = str + n;
+ const byte *end = str + n;
while (str < end)
{
UTF8_SKIP(str);
return len;
}
-uns
-utf8_check(byte *s)
-{
-#define UTF8_CHECK_NEXT if (unlikely((*s & 0xc0) != 0x80)) goto bad; s++
- while (*s)
- {
- uns u = *s++;
- if (u < 0x80)
- ;
- else if (unlikely(u < 0xc0))
- {
-bad:
- return 0;
- }
- else if (u < 0xe0)
- {
- UTF8_CHECK_NEXT;
- }
- else if (likely(u < 0xf0))
- {
- UTF8_CHECK_NEXT;
- UTF8_CHECK_NEXT;
- }
- else
- goto bad;
- }
- return 1;
-}
-
#ifdef TEST
#include <string.h>
#include <stdio.h>
# Tests for the Unicode UTF-8 module
-Run: obj/lib/unicode-utf8-t put
+Run: ../obj/lib/unicode-utf8-t put
In: 0041 0048 004f 004a
Out: 41 48 4f 4a
-Run: obj/lib/unicode-utf8-t put
+Run: ../obj/lib/unicode-utf8-t put
In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
-Run: obj/lib/unicode-utf8-t get
+Run: ../obj/lib/unicode-utf8-t get
In: 41 48 4f 4a
Out: 0041 0048 004f 004a
-Run: obj/lib/unicode-utf8-t get
+Run: ../obj/lib/unicode-utf8-t get
In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
-Run: obj/lib/unicode-utf8-t get
+Run: ../obj/lib/unicode-utf8-t get
In: 84 ff f9 f8 c2 aa 41
Out: fffc fffc fffc fffc 00aa 0041
-Run: obj/lib/unicode-utf8-t put32
+Run: ../obj/lib/unicode-utf8-t put32
In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
-Run: obj/lib/unicode-utf8-t get32
+Run: ../obj/lib/unicode-utf8-t get32
In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
-Run: obj/lib/unicode-utf8-t get32
+Run: ../obj/lib/unicode-utf8-t get32
In: fe 83 81
Out: fffc fffc fffc
#define UNI_REPLACEMENT 0xfffc
+/* Encode a character from the basic multilingual plane [0, 0xFFFF]
+ * (subset of Unicode 4.0); up to 3 bytes needed (RFC2279) */
static inline byte *
utf8_put(byte *p, uns u)
{
return p;
}
+/* Encode a value from the range [0, 0x7FFFFFFF];
+ * (superset of Unicode 4.0) up to 6 bytes needed (RFC2279) */
static inline byte *
utf8_32_put(byte *p, uns u)
{
#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
-static inline const byte *
+/* Decode a character from the basic multilingual plane [0, 0xFFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
+static inline byte *
utf8_get(const byte *p, uns *uu)
{
uns u = *p++;
else
goto bad;
*uu = u;
- return p;
+ return (byte *)p;
}
+/* Decode a value from the range [0, 0x7FFFFFFF]
+ * or return UNI_REPLACEMENT if the encoding has been corrupted */
static inline byte *
-utf8_32_get(byte *p, uns *uu)
+utf8_32_get(const byte *p, uns *uu)
{
uns u = *p++;
if (u < 0x80)
else
goto bad;
*uu = u;
- return p;
+ return (byte *)p;
}
#define PUT_UTF8(p,u) p = utf8_put(p, u)
/* unicode-utf8.c */
-uns utf8_strlen(byte *str);
-uns utf8_strnlen(byte *str, uns n);
-uns utf8_check(byte *str);
+uns utf8_strlen(const byte *str);
+uns utf8_strnlen(const byte *str, uns n);
#endif
static uns url_ignore_spaces;
static uns url_ignore_underflow;
-static byte *url_component_separators = "";
+static char *url_component_separators = "";
static uns url_min_repeat_count = 0x7fffffff;
static uns url_max_repeat_length = 0;
}
int
-url_deescape(byte *s, byte *d)
+url_deescape(const byte *s, byte *d)
{
byte *dstart = d;
byte *end = d + MAX_URL_SIZE - 10;
*d++ = *s++;
else if (Cspace(*s))
{
- byte *s0 = s;
+ const byte *s0 = s;
while (Cspace(*s))
s++;
if (!url_ignore_spaces || !(!*s || d == dstart))
}
int
-url_enescape(byte *s, byte *d)
+url_enescape(const byte *s, byte *d)
{
byte *end = d + MAX_URL_SIZE - 10;
unsigned int c;
}
int
-url_enescape_friendly(byte *src, byte *dest)
+url_enescape_friendly(const byte *src, byte *dest)
{
byte *end = dest + MAX_URL_SIZE - 10;
while (*src)
static int url_proto_path_flags[URL_PROTO_MAX] = URL_PATH_FLAGS;
uns
-identify_protocol(byte *p)
+identify_protocol(const byte *p)
{
uns i;
/* Pack a broken-down URL */
static byte *
-append(byte *d, byte *s, byte *e)
+append(byte *d, const byte *s, byte *e)
{
if (d)
while (*s)
/* Standard cookbook recipes */
int
-url_canon_split_rel(byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
+url_canon_split_rel(const byte *u, byte *buf1, byte *buf2, struct url *url, struct url *base)
{
int err;
}
int
-url_auto_canonicalize_rel(byte *src, byte *dst, struct url *base)
+url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base)
{
byte buf1[MAX_URL_SIZE], buf2[MAX_URL_SIZE], buf3[MAX_URL_SIZE];
int err;
#endif
struct component {
- byte *start;
+ const byte *start;
int length;
u32 hash;
};
static inline u32
-hashf(byte *start, int length)
+hashf(const byte *start, int length)
{
u32 hf = length;
while (length-- > 0)
}
int
-url_has_repeated_component(byte *url)
+url_has_repeated_component(const byte *url)
{
struct component *comp;
uns comps, comp_len, rep_prefix;
- byte *c;
+ const byte *c;
uns i;
for (comps=0, c=url; c; comps++)
/* Remove/Introduce '%' escapes */
-int url_deescape(byte *s, byte *d);
-int url_enescape(byte *s, byte *d);
-int url_enescape_friendly(byte *src, byte *dest); // for cards.c only
+int url_deescape(const byte *s, byte *d);
+int url_enescape(const byte *s, byte *d);
+int url_enescape_friendly(const byte *src, byte *dest); // for cards.c only
/* URL splitting and normalization */
int url_normalize(struct url *u, struct url *b);
int url_canonicalize(struct url *u);
int url_pack(struct url *u, byte *d);
-int url_canon_split_rel(byte *url, byte *buf1, byte *buf2, struct url *u, struct url *base);
-int url_auto_canonicalize_rel(byte *src, byte *dst, struct url *base);
-uns identify_protocol(byte *p);
-int url_has_repeated_component(byte *url);
+int url_canon_split_rel(const byte *url, byte *buf1, byte *buf2, struct url *u, struct url *base);
+int url_auto_canonicalize_rel(const byte *src, byte *dst, struct url *base);
+uns identify_protocol(const byte *p);
+int url_has_repeated_component(const byte *url);
-static inline int url_canon_split(byte *url, byte *buf1, byte *buf2, struct url *u)
+static inline int url_canon_split(const byte *url, byte *buf1, byte *buf2, struct url *u)
{ return url_canon_split_rel(url, buf1, buf2, u, NULL); }
-static inline int url_auto_canonicalize(byte *src, byte *dst)
+static inline int url_auto_canonicalize(const byte *src, byte *dst)
{ return url_auto_canonicalize_rel(src, dst, NULL); }
/* Error codes */
}
struct wildpatt *
-wp_compile(byte *p, struct mempool *pool)
+wp_compile(const byte *p, struct mempool *pool)
{
struct wildpatt *w;
uns i;
}
int
-wp_match(struct wildpatt *w, byte *s)
+wp_match(struct wildpatt *w, const byte *s)
{
struct dfa_state *d;
}
int
-wp_min_size(byte *p)
+wp_min_size(const byte *p)
{
int s = 0;
struct wildpatt;
struct mempool;
-struct wildpatt *wp_compile(byte *, struct mempool *);
-int wp_match(struct wildpatt *, byte *);
-int wp_min_size(byte *);
+struct wildpatt *wp_compile(const byte *, struct mempool *);
+int wp_match(struct wildpatt *, const byte *);
+int wp_min_size(const byte *);
#include <string.h>
int
-sepsplit(byte *str, byte sep, byte **rec, uns max)
+sepsplit(char *str, uns sep, char **rec, uns max)
{
uns cnt = 0;
while (1)
}
int
-wordsplit(byte *src, byte **dst, uns max)
+wordsplit(char *src, char **dst, uns max)
{
uns cnt = 0;