include $(s)/lib/regex/Makefile
endif
+ ifdef CONFIG_OWN_GETOPT
+ include $(s)/lib/getopt/Makefile
+ endif
+
+include $(s)/lib/sorter/Makefile
+
+ LIBUCW=$(o)/lib/libucw.$(LS)
LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS))
$(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS))
$(o)/lib/slists.test: $(o)/lib/slists-t
$(o)/lib/kmp-test.test: $(o)/lib/kmp-test
$(o)/lib/bbuf.test: $(o)/lib/bbuf-t
+ $(o)/lib/getopt.test: $(o)/lib/getopt-t
+ifdef CONFIG_UCW_THREADS
+TESTS+=$(addprefix $(o)/lib/,asio.test)
+$(o)/lib/asio.test: $(o)/lib/asio-t
+endif
+
INCLUDES+=$(o)/lib/.include-stamp
$(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES))
$(s)/build/install-includes $(s)/lib run/include/lib $(?F)
/*
* UCW Library -- Allocation of Large Aligned Buffers
*
- * (c) 2006 Martin Mares <mj@ucw.cz>
+ * (c) 2006--2007 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <char@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
#include "lib/lib.h"
#include <sys/mman.h>
+ #include <string.h>
+
+ void *
+ page_alloc(unsigned int len)
+ {
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (p == (byte*) MAP_FAILED)
+ die("Cannot mmap %d bytes of memory: %m", len);
+ return p;
+ }
+
+ void
+ page_free(void *start, unsigned int len)
+ {
+ ASSERT(!(len & (CPU_PAGE_SIZE-1)));
+ ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
+ munmap(start, len);
+ }
+
+ void *
+ page_realloc(void *start, unsigned int old_len, unsigned int new_len)
+ {
+ void *p = page_alloc(new_len);
+ memcpy(p, start, MIN(old_len, new_len));
+ page_free(start, old_len);
+ return p;
+ }
-static unsigned int
-big_round(unsigned int len)
+static u64
+big_round(u64 len)
{
- return ALIGN_TO(len, CPU_PAGE_SIZE);
+ return ALIGN_TO(len, (u64)CPU_PAGE_SIZE);
}
void *
}
void
-big_free(void *start, unsigned int len)
+big_free(void *start, u64 len)
{
byte *p = start;
- ASSERT(!((uintptr_t) p & (CPU_PAGE_SIZE-1)));
len = big_round(len);
#ifdef CONFIG_DEBUG
p -= CPU_PAGE_SIZE;
--- /dev/null
- static void
+/*
+ * UCW Library -- Fast Buffered I/O on O_DIRECT Files
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/*
+ * This is a fastbuf backend for fast streaming I/O using O_DIRECT and
+ * the asynchronous I/O module. It's designed for use on large files
+ * which don't fit in the disk cache.
+ *
+ * CAVEATS:
+ *
+ * - All operations with a single fbdirect handle must be done
+ * within a single thread, unless you provide a custom I/O queue
+ * and take care of locking.
+ *
+ * FIXME: what if the OS doesn't support O_DIRECT?
+ * FIXME: unaligned seeks and partial writes?
+ * FIXME: merge with other file-oriented fastbufs
+ */
+
+#undef LOCAL_DEBUG
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "lib/lfs.h"
+#include "lib/asio.h"
+#include "lib/conf.h"
+#include "lib/threads.h"
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
++#include <stdio.h>
+
+static uns fbdir_cheat;
+static uns fbdir_buffer_size = 65536;
+static uns fbdir_read_ahead = 1;
+static uns fbdir_write_back = 1;
+
+static struct cf_section fbdir_cf = {
+ CF_ITEMS {
+ CF_UNS("Cheat", &fbdir_cheat),
+ CF_UNS("BufferSize", &fbdir_buffer_size),
+ CF_UNS("ReadAhead", &fbdir_read_ahead),
+ CF_UNS("WriteBack", &fbdir_write_back),
+ CF_END
+ }
+};
+
+#define FBDIR_ALIGN 512
+
+enum fbdir_mode { // Current operating mode
+ M_NULL,
+ M_READ,
+ M_WRITE
+};
+
+struct fb_direct {
+ struct fastbuf fb;
+ int fd; // File descriptor
+ int is_temp_file; // 0=normal file, 1=temporary file, delete on close, -1=shared FD
+ struct asio_queue *io_queue; // I/O queue to use
+ struct asio_queue *user_queue; // If io_queue was supplied by the user
+ struct asio_request *pending_read;
+ struct asio_request *done_read;
+ struct asio_request *active_buffer;
+ enum fbdir_mode mode;
+ byte name[0];
+};
+#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf)
+
+static void CONSTRUCTOR
+fbdir_global_init(void)
+{
+ cf_declare_section("FBDirect", &fbdir_cf, 0);
+}
+
+static void
+fbdir_read_sync(struct fb_direct *F)
+{
+ while (F->pending_read)
+ {
+ struct asio_request *r = asio_wait(F->io_queue);
+ ASSERT(r);
+ struct fb_direct *G = r->user_data;
+ ASSERT(G);
+ ASSERT(G->pending_read == r && !G->done_read);
+ G->pending_read = NULL;
+ G->done_read = r;
+ }
+}
+
+static void
+fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode)
+{
+ if (F->mode == mode)
+ return;
+ DBG("FB-DIRECT: Switching mode to %d", mode);
+ switch (F->mode)
+ {
+ case M_NULL:
+ break;
+ case M_READ:
+ fbdir_read_sync(F); // Wait for read-ahead requests to finish
+ if (F->done_read) // Return read-ahead requests if any
+ {
+ asio_put(F->done_read);
+ F->done_read = NULL;
+ }
+ break;
+ case M_WRITE:
+ asio_sync(F->io_queue); // Wait for pending writebacks
+ break;
+ }
+ if (F->active_buffer)
+ {
+ asio_put(F->active_buffer);
+ F->active_buffer = NULL;
+ }
+ F->mode = mode;
+}
+
+static void
+fbdir_submit_read(struct fb_direct *F)
+{
+ struct asio_request *r = asio_get(F->io_queue);
+ r->fd = F->fd;
+ r->op = ASIO_READ;
+ r->len = F->io_queue->buffer_size;
+ r->user_data = F;
+ asio_submit(r);
+ F->pending_read = r;
+}
+
+static int
+fbdir_refill(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Refill");
+
+ if (!F->done_read)
+ {
+ if (!F->pending_read)
+ {
+ fbdir_change_mode(F, M_READ);
+ fbdir_submit_read(F);
+ }
+ fbdir_read_sync(F);
+ ASSERT(F->done_read);
+ }
+
+ struct asio_request *r = F->done_read;
+ F->done_read = NULL;
+ if (F->active_buffer)
+ asio_put(F->active_buffer);
+ F->active_buffer = r;
+ if (!r->status)
+ return 0;
+ if (r->status < 0)
+ die("Error reading %s: %s", f->name, strerror(r->returned_errno));
+ f->bptr = f->buffer = r->buffer;
+ f->bstop = f->bufend = f->buffer + r->status;
+ f->pos += r->status;
+
+ fbdir_submit_read(F); // Read-ahead the next block
+
+ return r->status;
+}
+
+static void
+fbdir_spout(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+ struct asio_request *r;
+
+ DBG("FB-DIRECT: Spout");
+
+ fbdir_change_mode(F, M_WRITE);
+ r = F->active_buffer;
+ if (r && f->bptr > f->bstop)
+ {
+ r->op = ASIO_WRITE_BACK;
+ r->fd = F->fd;
+ r->len = f->bptr - f->bstop;
+ ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat);
+ f->pos += r->len;
+ if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes
+ {
+ r->len = ALIGN_TO(r->len, FBDIR_ALIGN);
+ asio_submit(r);
+ asio_sync(F->io_queue);
+ DBG("FB-DIRECT: Truncating at %Ld", (long long)f->pos);
+ if (sh_ftruncate(F->fd, f->pos) < 0)
+ die("Error truncating %s: %m", f->name);
+ }
+ else
+ asio_submit(r);
+ r = NULL;
+ }
+ if (!r)
+ r = asio_get(F->io_queue);
+ f->bstop = f->bptr = f->buffer = r->buffer;
+ f->bufend = f->buffer + F->io_queue->buffer_size;
+ F->active_buffer = r;
+}
+
- return;
++static int
+fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence)
+{
+ DBG("FB-DIRECT: Seek %Ld %d", (long long)pos, whence);
+
+ if (whence == SEEK_SET && pos == f->pos)
- die("lseek on %s: %m", f->name);
++ return 1;
+
+ fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish
+ sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence);
+ if (l < 0)
++ return 0;
+ f->pos = l;
++ return 1;
+}
+
+static struct asio_queue *
+fbdir_get_io_queue(void)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ if (!q)
+ {
+ q = xmalloc_zero(sizeof(struct asio_queue));
+ q->buffer_size = fbdir_buffer_size;
+ q->max_writebacks = fbdir_write_back;
+ asio_init_queue(q);
+ ctx->io_queue = q;
+ }
+ q->use_count++;
+ DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count);
+ return q;
+}
+
+static void
+fbdir_put_io_queue(void)
+{
+ struct ucwlib_context *ctx = ucwlib_thread_context();
+ struct asio_queue *q = ctx->io_queue;
+ ASSERT(q);
+ DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count);
+ if (!--q->use_count)
+ {
+ asio_cleanup_queue(q);
+ xfree(q);
+ ctx->io_queue = NULL;
+ }
+}
+
+static void
+fbdir_close(struct fastbuf *f)
+{
+ struct fb_direct *F = FB_DIRECT(f);
+
+ DBG("FB-DIRECT: Close");
+
+ fbdir_change_mode(F, M_NULL);
+ if (!F->user_queue)
+ fbdir_put_io_queue();
+
+ switch (F->is_temp_file)
+ {
+ case 1:
+ if (unlink(f->name) < 0)
+ log(L_ERROR, "unlink(%s): %m", f->name);
+ case 0:
+ close(F->fd);
+ }
+
+ xfree(f);
+}
+
+static int
+fbdir_config(struct fastbuf *f, uns item, int value)
+{
+ switch (item)
+ {
+ case BCONFIG_IS_TEMP_FILE:
+ FB_DIRECT(f)->is_temp_file = value;
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static struct fastbuf *
+fbdir_open_internal(byte *name, int fd, struct asio_queue *q)
+{
+ int namelen = strlen(name) + 1;
+ struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen);
+ struct fastbuf *f = &F->fb;
+
+ DBG("FB-DIRECT: Open");
+ bzero(F, sizeof(*F));
+ f->name = F->name;
+ memcpy(f->name, name, namelen);
+ F->fd = fd;
+ if (q)
+ F->io_queue = F->user_queue = q;
+ else
+ F->io_queue = fbdir_get_io_queue();
+ f->refill = fbdir_refill;
+ f->spout = fbdir_spout;
+ f->seek = fbdir_seek;
+ f->close = fbdir_close;
+ f->config = fbdir_config;
+ f->can_overwrite_buffer = 2;
+ return f;
+}
+
+struct fastbuf *
+fbdir_open_try(byte *name, uns mode, struct asio_queue *q)
+{
+ if (!fbdir_cheat)
+ mode |= O_DIRECT;
+ int fd = sh_open(name, mode, 0666);
+ if (fd < 0)
+ return NULL;
+ struct fastbuf *b = fbdir_open_internal(name, fd, q);
+ if (mode & O_APPEND)
+ fbdir_seek(b, 0, SEEK_END);
+ return b;
+}
+
+struct fastbuf *
+fbdir_open(byte *name, uns mode, struct asio_queue *q)
+{
+ struct fastbuf *b = fbdir_open_try(name, mode, q);
+ if (!b)
+ die("Unable to %s file %s: %m",
+ (mode & O_CREAT) ? "create" : "open", name);
+ return b;
+}
+
+struct fastbuf *
+fbdir_open_fd(int fd, struct asio_queue *q)
+{
+ byte x[32];
+
+ sprintf(x, "fd%d", fd);
+ if (!fbdir_cheat && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_DIRECT) < 0)
+ log(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd);
+ return fbdir_open_internal(x, fd, q);
+}
+
+struct fastbuf *
+fbdir_open_tmp(struct asio_queue *q)
+{
+ byte buf[TEMP_FILE_NAME_LEN];
+ struct fastbuf *f;
+
+ temp_file_name(buf);
+ f = fbdir_open(buf, O_RDWR | O_CREAT | O_TRUNC, q);
+ bconfig(f, BCONFIG_IS_TEMP_FILE, 1);
+ return f;
+}
+
+#ifdef TEST
+
+#include "lib/getopt.h"
+
+int main(int argc, char **argv)
+{
+ struct fastbuf *f, *t;
+
+ log_init(NULL);
+ if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0)
+ die("Hey, whaddya want?");
+ f = (optind < argc) ? fbdir_open(argv[optind++], O_RDONLY, NULL) : fbdir_open_fd(0, NULL);
+ t = (optind < argc) ? fbdir_open(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, NULL) : fbdir_open_fd(1, NULL);
+
+ bbcopy(f, t, ~0U);
+ ASSERT(btell(f) == btell(t));
+
+#if 0 // This triggers unaligned write
+ bflush(t);
+ bputc(t, '\n');
+#endif
+
+ brewind(t);
+ bgetc(t);
+ ASSERT(btell(t) == 1);
+
+ bclose(f);
+ bclose(t);
+ return 0;
+}
+
+#endif
/* bigalloc.c */
- void *big_alloc(u64 len);
+ void *page_alloc(unsigned int len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap
+ void page_free(void *start, unsigned int len);
+ void *page_realloc(void *start, unsigned int old_len, unsigned int new_len);
+
-void *big_alloc(unsigned int len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
-void big_free(void *start, unsigned int len);
++void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available
+void big_free(void *start, u64 len);
#endif
--- /dev/null
+/*
+ * UCW Library -- Testing the Old Sorter
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
++#include "lib/ff-binary.h"
+#include "lib/hashfunc.h"
+#include "lib/md5.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+
+static void
+start(void)
+{
+ sync();
+ init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+ sync();
+ log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+ u32 x;
+};
+
+static inline int s1_compare(struct key1 *x, struct key1 *y)
+{
+ COMPARE(x->x, y->x);
+ return 0;
+}
+
+#define SORT_KEY struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_REGULAR
+#define SORT_PRESORT
+
+#include "lib/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+ uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+ uns K = N/4*3;
+ log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+ brewind(f);
+
+ start();
+ f = s1_sort(f);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ }
+ bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+ u32 hash[4];
+ u32 i;
+ u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+ /* FIXME: Maybe unroll manually? */
+ for (uns i=0; i<4; i++)
+ COMPARE(x->hash[i], y->hash[i]);
+ return 0;
+}
+
+#define SORT_KEY struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_REGULAR
+#define SORT_PRESORT
+
+#include "lib/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+ k->i = i;
+ k->payload[0] = 7*i + 13;
+ k->payload[1] = 13*i + 19;
+ k->payload[2] = 19*i + 7;
+ switch (mode)
+ {
+ case 0:
+ k->hash[0] = i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ case 1:
+ k->hash[0] = ~i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ default: ;
+ struct MD5Context ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (byte*) &k->i, 4);
+ MD5Final((byte*) &k->hash, &ctx);
+ break;
+ }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ struct key3 k, lastk;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ uns hash_sum = 0;
+ for (uns i=0; i<N; i++)
+ {
+ gen_hash_key(mode, &k, i);
+ hash_sum += k.hash[3];
+ bwrite(f, &k, sizeof(k));
+ }
+ brewind(f);
+
+ start();
+ f = s3_sort(f);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ if (i && s3_compare(&k, &lastk) <= 0)
+ ASSERT(0);
+ gen_hash_key(mode, &lastk, k.i);
+ if (memcmp(&k, &lastk, sizeof(k)))
+ ASSERT(0);
+ hash_sum -= k.hash[3];
+ }
+ ASSERT(!hash_sum);
+ bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+ uns len;
+ byte s[KEY4_MAX];
+};
+
+static inline int s4_fetch_key(struct fastbuf *f, struct key4 *x)
+{
+ int len = bgetl(f);
+ if (len < 0)
+ return 0;
+ x->len = len;
+ breadb(f, x->s, len);
+ return 1;
+}
+
+static inline void s4_copy_data(struct fastbuf *i UNUSED, struct fastbuf *f, struct key4 *x)
+{
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+}
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+ uns l = MIN(x->len, y->len);
+ int c = memcmp(x->s, y->s, l);
+ if (c)
+ return c;
+ COMPARE(x->len, y->len);
+ return 0;
+}
+
+static inline byte *s4_fetch_item(struct fastbuf *f UNUSED, struct key4 *x, byte *limit UNUSED)
+{
+ return &x->s[x->len];
+}
+
+static inline void s4_store_item(struct fastbuf *f, struct key4 *x)
+{
+ s4_copy_data(NULL, f, x);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_PRESORT
+
+#include "lib/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_fetch_key s4_fetch_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+ return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+static inline void s4b_copy_data(struct fastbuf *i, struct fastbuf *f, struct key4 *x)
+{
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+ bbcopy(i, f, s4_data_size(x));
+}
+
+static inline byte *s4b_fetch_item(struct fastbuf *f, struct key4 *x, byte *limit)
+{
+ byte *d = &x->s[x->len];
+ if (d + s4_data_size(x) > limit)
+ return NULL;
+ breadb(f, d, s4_data_size(x));
+ return d + s4_data_size(x);
+}
+
+static inline void s4b_store_item(struct fastbuf *f, struct key4 *x)
+{
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len + s4_data_size(x));
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_PRESORT
+
+#include "lib/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+ k->len = random_max(KEY4_MAX);
+ for (uns i=0; i<k->len; i++)
+ k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+ while (len--)
+ {
+ *buf++ = h >> 24;
+ h = h*259309 + 17;
+ }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+ srand(1);
+
+ struct key4 k, lastk;
+ byte buf[256], buf2[256];
+ uns sum = 0;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ {
+ gen_key4(&k);
+ s4_copy_data(NULL, f, &k);
+ uns h = hash_block(k.s, k.len);
+ sum += h;
+ if (mode)
+ {
+ gen_data4(buf, s4_data_size(&k), h);
+ bwrite(f, buf, s4_data_size(&k));
+ }
+ }
+ brewind(f);
+
+ start();
+ f = (mode ? s4b_sort : s4_sort)(f);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = s4_fetch_key(f, &k);
+ ASSERT(ok);
+ uns h = hash_block(k.s, k.len);
+ if (mode && s4_data_size(&k))
+ {
+ ok = breadb(f, buf, s4_data_size(&k));
+ ASSERT(ok);
+ gen_data4(buf2, s4_data_size(&k), h);
+ ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+ }
+ if (i && s4_compare(&k, &lastk) < 0)
+ ASSERT(0);
+ sum -= h;
+ lastk = k;
+ }
+ ASSERT(!sum);
+ bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ case 4:
+ case 5:
+ break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+#define TMAX 11
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ log_init(NULL);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ t = atol(optarg);
+ if (t >= TMAX)
+ goto usage;
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-v] [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ if (t != ~0U)
+ run_test(t, size);
+ else
+ for (uns i=0; i<TMAX; i++)
+ run_test(i, size);
+
+ return 0;
+}
--- /dev/null
+/*
+ * UCW Library -- Testing the Sorter
+ *
+ * (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/getopt.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
++#include "lib/ff-binary.h"
+#include "lib/hashfunc.h"
+#include "lib/md5.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*** Time measurement ***/
+
+static timestamp_t timer;
+
+static void
+start(void)
+{
+ sync();
+ init_timer(&timer);
+}
+
+static void
+stop(void)
+{
+ sync();
+ log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.);
+}
+
+/*** Simple 4-byte integer keys ***/
+
+struct key1 {
+ u32 x;
+};
+
+#define SORT_KEY_REGULAR struct key1
+#define SORT_PREFIX(x) s1_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIQUE
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_int(int mode, u64 size)
+{
+ uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
+ uns K = N/4*3;
+ log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
+ brewind(f);
+
+ start();
+ f = s1_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ }
+ bclose(f);
+}
+
+/*** Integers with merging, but no data ***/
+
+struct key2 {
+ u32 x;
+ u32 cnt;
+};
+
+static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uns n, void *buf UNUSED)
+{
+ for (uns i=1; i<n; i++)
+ k[0]->cnt += k[i]->cnt;
+ bwrite(f, k[0], sizeof(struct key2));
+}
+
+static inline void s2_copy_merged(struct key2 **k, struct fastbuf **d UNUSED, uns n, struct fastbuf *dest)
+{
+ for (uns i=1; i<n; i++)
+ k[0]->cnt += k[i]->cnt;
+ bwrite(dest, k[0], sizeof(struct key2));
+}
+
+#define SORT_KEY_REGULAR struct key2
+#define SORT_PREFIX(x) s2_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_UNIFY
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_counted(int mode, u64 size)
+{
+ u64 items = size / sizeof(struct key2);
+ uns mult = 2;
+ while (items/(2*mult) > 0xffff0000)
+ mult++;
+ uns N = items ? nextprime(items/(2*mult)) : 0;
+ uns K = N/4*3;
+ log(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns m=0; m<mult; m++)
+ for (uns i=0; i<N; i++)
+ for (uns j=0; j<2; j++)
+ {
+ bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+ bputl(f, 1);
+ }
+ brewind(f);
+
+ start();
+ f = s2_sort(f, NULL, N-1);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ uns j = bgetl(f);
+ if (i != j)
+ die("Discrepancy: %u instead of %u", j, i);
+ uns k = bgetl(f);
+ if (k != 2*mult)
+ die("Discrepancy: %u has count %u instead of %u", j, k, mult);
+ }
+ bclose(f);
+}
+
+/*** Longer records with hashes (similar to Shepherd's index records) ***/
+
+struct key3 {
+ u32 hash[4];
+ u32 i;
+ u32 payload[3];
+};
+
+static inline int s3_compare(struct key3 *x, struct key3 *y)
+{
+ /* FIXME: Maybe unroll manually? */
+ for (uns i=0; i<4; i++)
+ COMPARE(x->hash[i], y->hash[i]);
+ return 0;
+}
+
+static inline uns s3_hash(struct key3 *x)
+{
+ return x->hash[0];
+}
+
+#define SORT_KEY_REGULAR struct key3
+#define SORT_PREFIX(x) s3_##x
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+#define SORT_HASH_BITS 32
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_hash_key(int mode, struct key3 *k, uns i)
+{
+ k->i = i;
+ k->payload[0] = 7*i + 13;
+ k->payload[1] = 13*i + 19;
+ k->payload[2] = 19*i + 7;
+ switch (mode)
+ {
+ case 0:
+ k->hash[0] = i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ case 1:
+ k->hash[0] = ~i;
+ k->hash[1] = k->payload[0];
+ k->hash[2] = k->payload[1];
+ k->hash[3] = k->payload[2];
+ break;
+ default: ;
+ struct MD5Context ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (byte*) &k->i, 4);
+ MD5Final((byte*) &k->hash, &ctx);
+ break;
+ }
+}
+
+static void
+test_hashes(int mode, u64 size)
+{
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ struct key3 k, lastk;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ uns hash_sum = 0;
+ for (uns i=0; i<N; i++)
+ {
+ gen_hash_key(mode, &k, i);
+ hash_sum += k.hash[3];
+ bwrite(f, &k, sizeof(k));
+ }
+ brewind(f);
+
+ start();
+ f = s3_sort(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ if (i && s3_compare(&k, &lastk) <= 0)
+ ASSERT(0);
+ gen_hash_key(mode, &lastk, k.i);
+ if (memcmp(&k, &lastk, sizeof(k)))
+ ASSERT(0);
+ hash_sum -= k.hash[3];
+ }
+ ASSERT(!hash_sum);
+ bclose(f);
+}
+
+/*** Variable-length records (strings) with and without var-length data ***/
+
+#define KEY4_MAX 256
+
+struct key4 {
+ uns len;
+ byte s[KEY4_MAX];
+};
+
+static inline int s4_compare(struct key4 *x, struct key4 *y)
+{
+ uns l = MIN(x->len, y->len);
+ int c = memcmp(x->s, y->s, l);
+ if (c)
+ return c;
+ COMPARE(x->len, y->len);
+ return 0;
+}
+
+static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
+{
+ x->len = bgetl(f);
+ if (x->len == 0xffffffff)
+ return 0;
+ ASSERT(x->len < KEY4_MAX);
+ breadb(f, x->s, x->len);
+ return 1;
+}
+
+static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
+{
+ ASSERT(x->len < KEY4_MAX);
+ bputl(f, x->len);
+ bwrite(f, x->s, x->len);
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+#define s4b_compare s4_compare
+#define s4b_read_key s4_read_key
+#define s4b_write_key s4_write_key
+
+static inline uns s4_data_size(struct key4 *x)
+{
+ return x->len ? (x->s[0] ^ 0xad) : 0;
+}
+
+#define SORT_KEY struct key4
+#define SORT_PREFIX(x) s4b_##x
+#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
+#define SORT_DATA_SIZE(x) s4_data_size(&(x))
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_FB
+
+#include "lib/sorter/sorter.h"
+
+static void
+gen_key4(struct key4 *k)
+{
+ k->len = random_max(KEY4_MAX);
+ for (uns i=0; i<k->len; i++)
+ k->s[i] = random();
+}
+
+static void
+gen_data4(byte *buf, uns len, uns h)
+{
+ while (len--)
+ {
+ *buf++ = h >> 24;
+ h = h*259309 + 17;
+ }
+}
+
+static void
+test_strings(uns mode, u64 size)
+{
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
+ srand(1);
+
+ struct key4 k, lastk;
+ byte buf[256], buf2[256];
+ uns sum = 0;
+
+ struct fastbuf *f = bopen_tmp(65536);
+ for (uns i=0; i<N; i++)
+ {
+ gen_key4(&k);
+ s4_write_key(f, &k);
+ uns h = hash_block(k.s, k.len);
+ sum += h;
+ if (mode)
+ {
+ gen_data4(buf, s4_data_size(&k), h);
+ bwrite(f, buf, s4_data_size(&k));
+ }
+ }
+ brewind(f);
+
+ start();
+ f = (mode ? s4b_sort : s4_sort)(f, NULL);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ for (uns i=0; i<N; i++)
+ {
+ int ok = s4_read_key(f, &k);
+ ASSERT(ok);
+ uns h = hash_block(k.s, k.len);
+ if (mode && s4_data_size(&k))
+ {
+ ok = breadb(f, buf, s4_data_size(&k));
+ ASSERT(ok);
+ gen_data4(buf2, s4_data_size(&k), h);
+ ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
+ }
+ if (i && s4_compare(&k, &lastk) < 0)
+ ASSERT(0);
+ sum -= h;
+ lastk = k;
+ }
+ ASSERT(!sum);
+ bclose(f);
+}
+
+/*** Graph-like structure with custom presorting ***/
+
+struct key5 {
+ u32 x;
+ u32 cnt;
+};
+
+static uns s5_N, s5_K, s5_L, s5_i, s5_j;
+
+struct s5_pair {
+ uns x, y;
+};
+
+static int s5_gen(struct s5_pair *p)
+{
+ if (s5_j >= s5_N)
+ {
+ if (s5_i >= s5_N-1)
+ return 0;
+ s5_j = 0;
+ s5_i++;
+ }
+ p->x = ((u64)s5_j * s5_K) % s5_N;
+ p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
+ s5_j++;
+ return 1;
+}
+
+#define ASORT_PREFIX(x) s5m_##x
+#define ASORT_KEY_TYPE u32
+#define ASORT_ELT(i) ary[i]
+#define ASORT_EXTRA_ARGS , u32 *ary
+#include "lib/arraysort.h"
+
+static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
+{
+ u32 *a = buf;
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ memcpy(&a[m], data[i], 4*keys[i]->cnt);
+ m += keys[i]->cnt;
+ }
+ s5m_sort(m, a);
+ keys[0]->cnt = m;
+ bwrite(f, keys[0], sizeof(struct key5));
+ bwrite(f, a, 4*m); /* FIXME: Might overflow here */
+}
+
+static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
+{
+ u32 k[n];
+ uns m = 0;
+ for (uns i=0; i<n; i++)
+ {
+ k[i] = bgetl(data[i]);
+ m += keys[i]->cnt;
+ }
+ struct key5 key = { .x = keys[0]->x, .cnt = m };
+ bwrite(dest, &key, sizeof(key));
+ while (key.cnt--)
+ {
+ uns b = 0;
+ for (uns i=1; i<n; i++)
+ if (k[i] < k[b])
+ b = i;
+ bputl(dest, k[b]);
+ if (--keys[b]->cnt)
+ k[b] = bgetl(data[b]);
+ else
+ k[b] = ~0U;
+ }
+}
+
+static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
+{
+ COMPARE_LT(x.x, y.x);
+ COMPARE_LT(x.y, y.y);
+ return 0;
+}
+
+/* FIXME: Use smarter internal sorter when it's available */
+#define ASORT_PREFIX(x) s5p_##x
+#define ASORT_KEY_TYPE struct s5_pair
+#define ASORT_ELT(i) ary[i]
+#define ASORT_LT(x,y) s5p_lt(x,y)
+#define ASORT_EXTRA_ARGS , struct s5_pair *ary
+#include "lib/arraysort.h"
+
+static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
+{
+ uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
+ struct s5_pair *a = buf;
+ uns n = 0;
+ while (n<max && s5_gen(&a[n]))
+ n++;
+ if (!n)
+ return 0;
+ s5p_sort(n, a);
+ uns i = 0;
+ while (i < n)
+ {
+ uns j = i;
+ while (i < n && a[i].x == a[j].x)
+ i++;
+ struct key5 k = { .x = a[j].x, .cnt = i-j };
+ bwrite(dest, &k, sizeof(k));
+ while (j < i)
+ bputl(dest, a[j++].y);
+ }
+ return 1;
+}
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_INPUT_PRESORT
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+
+#include "lib/sorter/sorter.h"
+
+#define SORT_KEY_REGULAR struct key5
+#define SORT_PREFIX(x) s5b_##x
+#define SORT_DATA_SIZE(k) (4*(k).cnt)
+#define SORT_UNIFY
+#define SORT_INPUT_FB
+#define SORT_OUTPUT_THIS_FB
+#define SORT_INT(k) (k).x
+#define s5b_write_merged s5_write_merged
+#define s5b_copy_merged s5_copy_merged
+
+#include "lib/sorter/sorter.h"
+
+static void
+test_graph(uns mode, u64 size)
+{
+ uns N = 3;
+ while ((u64)N*(N+2)*4 < size)
+ N = nextprime(N);
+ log(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
+ s5_N = N;
+ s5_K = N/4*3;
+ s5_L = N/3*2;
+ s5_i = s5_j = 0;
+
+ struct fastbuf *in = NULL;
+ if (mode)
+ {
+ struct s5_pair p;
+ in = bopen_tmp(65536);
+ while (s5_gen(&p))
+ {
+ struct key5 k = { .x = p.x, .cnt = 1 };
+ bwrite(in, &k, sizeof(k));
+ bputl(in, p.y);
+ }
+ brewind(in);
+ }
+
+ start();
+ struct fastbuf *f = bopen_tmp(65536);
+ bputl(f, 0xfeedcafe);
+ struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
+ ASSERT(f == g);
+ stop();
+
+ SORT_XTRACE(2, "Verifying");
+ uns c = bgetl(f);
+ ASSERT(c == 0xfeedcafe);
+ for (uns i=0; i<N; i++)
+ {
+ struct key5 k;
+ int ok = breadb(f, &k, sizeof(k));
+ ASSERT(ok);
+ ASSERT(k.x == i);
+ ASSERT(k.cnt == N);
+ for (uns j=0; j<N; j++)
+ {
+ uns y = bgetl(f);
+ ASSERT(y == j);
+ }
+ }
+ bclose(f);
+}
+
+/*** Main ***/
+
+static void
+run_test(uns i, u64 size)
+{
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ test_counted(0, size); break;
+ case 4:
+ test_counted(1, size); break;
+ case 5:
+ test_counted(2, size); break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+ case 11:
+ test_graph(0, size); break;
+ case 12:
+ test_graph(1, size); break;
+#define TMAX 13
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ log_init(NULL);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 'd':
+ sorter_debug = atol(optarg);
+ break;
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ t = atol(optarg);
+ if (t >= TMAX)
+ goto usage;
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ if (t != ~0U)
+ run_test(t, size);
+ else
+ for (uns i=0; i<TMAX; i++)
+ run_test(i, size);
+
+ return 0;
+}
#define stk_strarraycat(s,n) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)); stk_array_join(_x, _s, _n, 0); _x; })
#define stk_strjoin(s,n,sep) ({ char **_s=(s); int _n=(n); char *_x=alloca(stk_array_len(_s,_n)+_n-1); stk_array_join(_x, _s, _n, (sep)); _x; })
#define stk_printf(f...) ({ uns _l=stk_printf_internal(f); char *_x=alloca(_l); sprintf(_x, f); _x; })
+ #define stk_vprintf(f, args) ({ uns _l=stk_vprintf_internal(f, args); char *_x=alloca(_l); vsprintf(_x, f, args); _x; })
#define stk_hexdump(s,n) ({ uns _n=(n); char *_x=alloca(3*_n+1); stk_hexdump_internal(_x,(byte*)(s),_n); _x; })
#define stk_str_unesc(s) ({ byte *_s=(s); byte *_d=alloca(strlen(_s)+1); str_unesc(_d, _s); _d; })
+#define stk_fsize(n) ({ char *_s=alloca(16); stk_fsize_internal(_s, n); _s; })
uns stk_array_len(char **s, uns cnt);
void stk_array_join(char *x, char **s, uns cnt, uns sep);
- uns stk_printf_internal(char *x, ...) FORMAT_CHECK(printf,1,2);
+ uns stk_printf_internal(const char *x, ...) FORMAT_CHECK(printf,1,2);
+ uns stk_vprintf_internal(const char *x, va_list args);
void stk_hexdump_internal(char *dst, byte *src, uns n);
+void stk_fsize_internal(char *dst, u64 size);