From: Pavel Charvat Date: Fri, 18 May 2007 14:37:51 +0000 (+0200) Subject: Merged main branch to dev-sorter. X-Git-Tag: holmes-import~506^2~13^2~137 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=f3625d622964cf99300ceb7e499b99ebbce9fe69;p=libucw.git Merged main branch to dev-sorter. --- f3625d622964cf99300ceb7e499b99ebbce9fe69 diff --cc lib/Makefile index 0d2b9acf,95938ed1..01692986 --- a/lib/Makefile +++ b/lib/Makefile @@@ -67,8 -68,11 +69,13 @@@ ifdef CONFIG_OWN_REGE include $(s)/lib/regex/Makefile endif + ifdef CONFIG_OWN_GETOPT + include $(s)/lib/getopt/Makefile + endif + +include $(s)/lib/sorter/Makefile + + LIBUCW=$(o)/lib/libucw.$(LS) LIBUCW_MOD_PATHS=$(addprefix $(o)/lib/,$(LIBUCW_MODS)) $(o)/lib/libucw.a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) @@@ -101,12 -107,8 +110,13 @@@ $(o)/lib/bitops.test: $(o)/lib/bit-ffs- $(o)/lib/slists.test: $(o)/lib/slists-t $(o)/lib/kmp-test.test: $(o)/lib/kmp-test $(o)/lib/bbuf.test: $(o)/lib/bbuf-t + $(o)/lib/getopt.test: $(o)/lib/getopt-t +ifdef CONFIG_UCW_THREADS +TESTS+=$(addprefix $(o)/lib/,asio.test) +$(o)/lib/asio.test: $(o)/lib/asio-t +endif + INCLUDES+=$(o)/lib/.include-stamp $(o)/lib/.include-stamp: $(addprefix $(s)/lib/,$(LIBUCW_INCLUDES)) $(s)/build/install-includes $(s)/lib run/include/lib $(?F) diff --cc lib/bigalloc.c index 32d54643,3749fc9c..e4b9faf1 --- a/lib/bigalloc.c +++ b/lib/bigalloc.c @@@ -1,7 -1,8 +1,8 @@@ /* * UCW Library -- Allocation of Large Aligned Buffers * - * (c) 2006 Martin Mares + * (c) 2006--2007 Martin Mares + * (c) 2007 Pavel Charvat * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@@ -10,11 -11,39 +11,39 @@@ #include "lib/lib.h" #include + #include + + void * + page_alloc(unsigned int len) + { + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == (byte*) MAP_FAILED) + die("Cannot mmap %d bytes of memory: %m", len); + return p; + } + + void + page_free(void *start, unsigned int len) + { + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1))); + munmap(start, len); + } + + void * + page_realloc(void *start, unsigned int old_len, unsigned int new_len) + { + void *p = page_alloc(new_len); + memcpy(p, start, MIN(old_len, new_len)); + page_free(start, old_len); + return p; + } -static unsigned int -big_round(unsigned int len) +static u64 +big_round(u64 len) { - return ALIGN_TO(len, CPU_PAGE_SIZE); + return ALIGN_TO(len, (u64)CPU_PAGE_SIZE); } void * @@@ -40,10 -63,9 +67,9 @@@ big_alloc(u64 len } void -big_free(void *start, unsigned int len) +big_free(void *start, u64 len) { byte *p = start; - ASSERT(!((uintptr_t) p & (CPU_PAGE_SIZE-1))); len = big_round(len); #ifdef CONFIG_DEBUG p -= CPU_PAGE_SIZE; diff --cc lib/fb-direct.c index c3b74e15,00000000..32f17be8 mode 100644,000000..100644 --- a/lib/fb-direct.c +++ b/lib/fb-direct.c @@@ -1,399 -1,0 +1,401 @@@ +/* + * UCW Library -- Fast Buffered I/O on O_DIRECT Files + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is a fastbuf backend for fast streaming I/O using O_DIRECT and + * the asynchronous I/O module. It's designed for use on large files + * which don't fit in the disk cache. + * + * CAVEATS: + * + * - All operations with a single fbdirect handle must be done + * within a single thread, unless you provide a custom I/O queue + * and take care of locking. + * + * FIXME: what if the OS doesn't support O_DIRECT? + * FIXME: unaligned seeks and partial writes? + * FIXME: merge with other file-oriented fastbufs + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" +#include "lib/asio.h" +#include "lib/conf.h" +#include "lib/threads.h" + +#include +#include +#include ++#include + +static uns fbdir_cheat; +static uns fbdir_buffer_size = 65536; +static uns fbdir_read_ahead = 1; +static uns fbdir_write_back = 1; + +static struct cf_section fbdir_cf = { + CF_ITEMS { + CF_UNS("Cheat", &fbdir_cheat), + CF_UNS("BufferSize", &fbdir_buffer_size), + CF_UNS("ReadAhead", &fbdir_read_ahead), + CF_UNS("WriteBack", &fbdir_write_back), + CF_END + } +}; + +#define FBDIR_ALIGN 512 + +enum fbdir_mode { // Current operating mode + M_NULL, + M_READ, + M_WRITE +}; + +struct fb_direct { + struct fastbuf fb; + int fd; // File descriptor + int is_temp_file; // 0=normal file, 1=temporary file, delete on close, -1=shared FD + struct asio_queue *io_queue; // I/O queue to use + struct asio_queue *user_queue; // If io_queue was supplied by the user + struct asio_request *pending_read; + struct asio_request *done_read; + struct asio_request *active_buffer; + enum fbdir_mode mode; + byte name[0]; +}; +#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf) + +static void CONSTRUCTOR +fbdir_global_init(void) +{ + cf_declare_section("FBDirect", &fbdir_cf, 0); +} + +static void +fbdir_read_sync(struct fb_direct *F) +{ + while (F->pending_read) + { + struct asio_request *r = asio_wait(F->io_queue); + ASSERT(r); + struct fb_direct *G = r->user_data; + ASSERT(G); + ASSERT(G->pending_read == r && !G->done_read); + G->pending_read = NULL; + G->done_read = r; + } +} + +static void +fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode) +{ + if (F->mode == mode) + return; + DBG("FB-DIRECT: Switching mode to %d", mode); + switch (F->mode) + { + case M_NULL: + break; + case M_READ: + fbdir_read_sync(F); // Wait for read-ahead requests to finish + if (F->done_read) // Return read-ahead requests if any + { + asio_put(F->done_read); + F->done_read = NULL; + } + break; + case M_WRITE: + asio_sync(F->io_queue); // Wait for pending writebacks + break; + } + if (F->active_buffer) + { + asio_put(F->active_buffer); + F->active_buffer = NULL; + } + F->mode = mode; +} + +static void +fbdir_submit_read(struct fb_direct *F) +{ + struct asio_request *r = asio_get(F->io_queue); + r->fd = F->fd; + r->op = ASIO_READ; + r->len = F->io_queue->buffer_size; + r->user_data = F; + asio_submit(r); + F->pending_read = r; +} + +static int +fbdir_refill(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Refill"); + + if (!F->done_read) + { + if (!F->pending_read) + { + fbdir_change_mode(F, M_READ); + fbdir_submit_read(F); + } + fbdir_read_sync(F); + ASSERT(F->done_read); + } + + struct asio_request *r = F->done_read; + F->done_read = NULL; + if (F->active_buffer) + asio_put(F->active_buffer); + F->active_buffer = r; + if (!r->status) + return 0; + if (r->status < 0) + die("Error reading %s: %s", f->name, strerror(r->returned_errno)); + f->bptr = f->buffer = r->buffer; + f->bstop = f->bufend = f->buffer + r->status; + f->pos += r->status; + + fbdir_submit_read(F); // Read-ahead the next block + + return r->status; +} + +static void +fbdir_spout(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + struct asio_request *r; + + DBG("FB-DIRECT: Spout"); + + fbdir_change_mode(F, M_WRITE); + r = F->active_buffer; + if (r && f->bptr > f->bstop) + { + r->op = ASIO_WRITE_BACK; + r->fd = F->fd; + r->len = f->bptr - f->bstop; + ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat); + f->pos += r->len; + if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes + { + r->len = ALIGN_TO(r->len, FBDIR_ALIGN); + asio_submit(r); + asio_sync(F->io_queue); + DBG("FB-DIRECT: Truncating at %Ld", (long long)f->pos); + if (sh_ftruncate(F->fd, f->pos) < 0) + die("Error truncating %s: %m", f->name); + } + else + asio_submit(r); + r = NULL; + } + if (!r) + r = asio_get(F->io_queue); + f->bstop = f->bptr = f->buffer = r->buffer; + f->bufend = f->buffer + F->io_queue->buffer_size; + F->active_buffer = r; +} + - static void ++static int +fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + DBG("FB-DIRECT: Seek %Ld %d", (long long)pos, whence); + + if (whence == SEEK_SET && pos == f->pos) - return; ++ return 1; + + fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish + sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence); + if (l < 0) - die("lseek on %s: %m", f->name); ++ return 0; + f->pos = l; ++ return 1; +} + +static struct asio_queue * +fbdir_get_io_queue(void) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + if (!q) + { + q = xmalloc_zero(sizeof(struct asio_queue)); + q->buffer_size = fbdir_buffer_size; + q->max_writebacks = fbdir_write_back; + asio_init_queue(q); + ctx->io_queue = q; + } + q->use_count++; + DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count); + return q; +} + +static void +fbdir_put_io_queue(void) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + ASSERT(q); + DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count); + if (!--q->use_count) + { + asio_cleanup_queue(q); + xfree(q); + ctx->io_queue = NULL; + } +} + +static void +fbdir_close(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Close"); + + fbdir_change_mode(F, M_NULL); + if (!F->user_queue) + fbdir_put_io_queue(); + + switch (F->is_temp_file) + { + case 1: + if (unlink(f->name) < 0) + log(L_ERROR, "unlink(%s): %m", f->name); + case 0: + close(F->fd); + } + + xfree(f); +} + +static int +fbdir_config(struct fastbuf *f, uns item, int value) +{ + switch (item) + { + case BCONFIG_IS_TEMP_FILE: + FB_DIRECT(f)->is_temp_file = value; + return 0; + default: + return -1; + } +} + +static struct fastbuf * +fbdir_open_internal(byte *name, int fd, struct asio_queue *q) +{ + int namelen = strlen(name) + 1; + struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen); + struct fastbuf *f = &F->fb; + + DBG("FB-DIRECT: Open"); + bzero(F, sizeof(*F)); + f->name = F->name; + memcpy(f->name, name, namelen); + F->fd = fd; + if (q) + F->io_queue = F->user_queue = q; + else + F->io_queue = fbdir_get_io_queue(); + f->refill = fbdir_refill; + f->spout = fbdir_spout; + f->seek = fbdir_seek; + f->close = fbdir_close; + f->config = fbdir_config; + f->can_overwrite_buffer = 2; + return f; +} + +struct fastbuf * +fbdir_open_try(byte *name, uns mode, struct asio_queue *q) +{ + if (!fbdir_cheat) + mode |= O_DIRECT; + int fd = sh_open(name, mode, 0666); + if (fd < 0) + return NULL; + struct fastbuf *b = fbdir_open_internal(name, fd, q); + if (mode & O_APPEND) + fbdir_seek(b, 0, SEEK_END); + return b; +} + +struct fastbuf * +fbdir_open(byte *name, uns mode, struct asio_queue *q) +{ + struct fastbuf *b = fbdir_open_try(name, mode, q); + if (!b) + die("Unable to %s file %s: %m", + (mode & O_CREAT) ? "create" : "open", name); + return b; +} + +struct fastbuf * +fbdir_open_fd(int fd, struct asio_queue *q) +{ + byte x[32]; + + sprintf(x, "fd%d", fd); + if (!fbdir_cheat && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_DIRECT) < 0) + log(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd); + return fbdir_open_internal(x, fd, q); +} + +struct fastbuf * +fbdir_open_tmp(struct asio_queue *q) +{ + byte buf[TEMP_FILE_NAME_LEN]; + struct fastbuf *f; + + temp_file_name(buf); + f = fbdir_open(buf, O_RDWR | O_CREAT | O_TRUNC, q); + bconfig(f, BCONFIG_IS_TEMP_FILE, 1); + return f; +} + +#ifdef TEST + +#include "lib/getopt.h" + +int main(int argc, char **argv) +{ + struct fastbuf *f, *t; + + log_init(NULL); + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0) + die("Hey, whaddya want?"); + f = (optind < argc) ? fbdir_open(argv[optind++], O_RDONLY, NULL) : fbdir_open_fd(0, NULL); + t = (optind < argc) ? fbdir_open(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, NULL) : fbdir_open_fd(1, NULL); + + bbcopy(f, t, ~0U); + ASSERT(btell(f) == btell(t)); + +#if 0 // This triggers unaligned write + bflush(t); + bputc(t, '\n'); +#endif + + brewind(t); + bgetc(t); + ASSERT(btell(t) == 1); + + bclose(f); + bclose(t); + return 0; +} + +#endif diff --cc lib/lib.h index d8a1ddff,bad9f30d..41a6283a --- a/lib/lib.h +++ b/lib/lib.h @@@ -265,7 -268,11 +268,11 @@@ byte *str_format_flags(byte *dest, cons /* bigalloc.c */ - void *big_alloc(u64 len); + void *page_alloc(unsigned int len) LIKE_MALLOC; // allocates a multiple of CPU_PAGE_SIZE bytes with mmap + void page_free(void *start, unsigned int len); + void *page_realloc(void *start, unsigned int old_len, unsigned int new_len); + -void *big_alloc(unsigned int len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available -void big_free(void *start, unsigned int len); ++void *big_alloc(u64 len) LIKE_MALLOC; // allocate a large memory block in the most efficient way available +void big_free(void *start, u64 len); #endif diff --cc lib/sorter/old-test.c index 6c31c62c,00000000..8f2aacaa mode 100644,000000..100644 --- a/lib/sorter/old-test.c +++ b/lib/sorter/old-test.c @@@ -1,412 -1,0 +1,413 @@@ +/* + * UCW Library -- Testing the Old Sorter + * + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/getopt.h" +#include "lib/conf.h" +#include "lib/fastbuf.h" ++#include "lib/ff-binary.h" +#include "lib/hashfunc.h" +#include "lib/md5.h" + +#include +#include +#include +#include +#include + +/*** Time measurement ***/ + +static timestamp_t timer; + +static void +start(void) +{ + sync(); + init_timer(&timer); +} + +static void +stop(void) +{ + sync(); + log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); +} + +/*** Simple 4-byte integer keys ***/ + +struct key1 { + u32 x; +}; + +static inline int s1_compare(struct key1 *x, struct key1 *y) +{ + COMPARE(x->x, y->x); + return 0; +} + +#define SORT_KEY struct key1 +#define SORT_PREFIX(x) s1_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIQUE +#define SORT_REGULAR +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +test_int(int mode, u64 size) +{ + uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0; + uns K = N/4*3; + log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; ihash[i], y->hash[i]); + return 0; +} + +#define SORT_KEY struct key3 +#define SORT_PREFIX(x) s3_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_REGULAR +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +gen_hash_key(int mode, struct key3 *k, uns i) +{ + k->i = i; + k->payload[0] = 7*i + 13; + k->payload[1] = 13*i + 19; + k->payload[2] = 19*i + 7; + switch (mode) + { + case 0: + k->hash[0] = i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + case 1: + k->hash[0] = ~i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + default: ; + struct MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, (byte*) &k->i, 4); + MD5Final((byte*) &k->hash, &ctx); + break; + } +} + +static void +test_hashes(int mode, u64 size) +{ + uns N = MIN(size / sizeof(struct key3), 0xffffffff); + log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + struct key3 k, lastk; + + struct fastbuf *f = bopen_tmp(65536); + uns hash_sum = 0; + for (uns i=0; ilen = len; + breadb(f, x->s, len); + return 1; +} + +static inline void s4_copy_data(struct fastbuf *i UNUSED, struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len); +} + +static inline int s4_compare(struct key4 *x, struct key4 *y) +{ + uns l = MIN(x->len, y->len); + int c = memcmp(x->s, y->s, l); + if (c) + return c; + COMPARE(x->len, y->len); + return 0; +} + +static inline byte *s4_fetch_item(struct fastbuf *f UNUSED, struct key4 *x, byte *limit UNUSED) +{ + return &x->s[x->len]; +} + +static inline void s4_store_item(struct fastbuf *f, struct key4 *x) +{ + s4_copy_data(NULL, f, x); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_PRESORT + +#include "lib/sorter.h" + +#define s4b_compare s4_compare +#define s4b_fetch_key s4_fetch_key + +static inline uns s4_data_size(struct key4 *x) +{ + return x->len ? (x->s[0] ^ 0xad) : 0; +} + +static inline void s4b_copy_data(struct fastbuf *i, struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len); + bbcopy(i, f, s4_data_size(x)); +} + +static inline byte *s4b_fetch_item(struct fastbuf *f, struct key4 *x, byte *limit) +{ + byte *d = &x->s[x->len]; + if (d + s4_data_size(x) > limit) + return NULL; + breadb(f, d, s4_data_size(x)); + return d + s4_data_size(x); +} + +static inline void s4b_store_item(struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len + s4_data_size(x)); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4b_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +gen_key4(struct key4 *k) +{ + k->len = random_max(KEY4_MAX); + for (uns i=0; ilen; i++) + k->s[i] = random(); +} + +static void +gen_data4(byte *buf, uns len, uns h) +{ + while (len--) + { + *buf++ = h >> 24; + h = h*259309 + 17; + } +} + +static void +test_strings(uns mode, u64 size) +{ + uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0); + uns N = MIN(size / avg_item_size, 0xffffffff); + log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); + srand(1); + + struct key4 k, lastk; + byte buf[256], buf2[256]; + uns sum = 0; + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; i= 0) + switch (c) + { + case 's': + if (cf_parse_u64(optarg, &size)) + goto usage; + break; + case 't': + t = atol(optarg); + if (t >= TMAX) + goto usage; + break; + case 'v': + sorter_trace++; + break; + default: + usage: + fputs("Usage: sort-test [-v] [-s ] [-t ]\n", stderr); + exit(1); + } + if (optind != argc) + goto usage; + + if (t != ~0U) + run_test(t, size); + else + for (uns i=0; i + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/getopt.h" +#include "lib/conf.h" +#include "lib/fastbuf.h" ++#include "lib/ff-binary.h" +#include "lib/hashfunc.h" +#include "lib/md5.h" + +#include +#include +#include +#include +#include + +/*** Time measurement ***/ + +static timestamp_t timer; + +static void +start(void) +{ + sync(); + init_timer(&timer); +} + +static void +stop(void) +{ + sync(); + log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); +} + +/*** Simple 4-byte integer keys ***/ + +struct key1 { + u32 x; +}; + +#define SORT_KEY_REGULAR struct key1 +#define SORT_PREFIX(x) s1_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIQUE +#define SORT_INT(k) (k).x + +#include "lib/sorter/sorter.h" + +static void +test_int(int mode, u64 size) +{ + uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0; + uns K = N/4*3; + log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; icnt += k[i]->cnt; + bwrite(f, k[0], sizeof(struct key2)); +} + +static inline void s2_copy_merged(struct key2 **k, struct fastbuf **d UNUSED, uns n, struct fastbuf *dest) +{ + for (uns i=1; icnt += k[i]->cnt; + bwrite(dest, k[0], sizeof(struct key2)); +} + +#define SORT_KEY_REGULAR struct key2 +#define SORT_PREFIX(x) s2_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIFY +#define SORT_INT(k) (k).x + +#include "lib/sorter/sorter.h" + +static void +test_counted(int mode, u64 size) +{ + u64 items = size / sizeof(struct key2); + uns mult = 2; + while (items/(2*mult) > 0xffff0000) + mult++; + uns N = items ? nextprime(items/(2*mult)) : 0; + uns K = N/4*3; + log(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult); + + struct fastbuf *f = bopen_tmp(65536); + for (uns m=0; mhash[i], y->hash[i]); + return 0; +} + +static inline uns s3_hash(struct key3 *x) +{ + return x->hash[0]; +} + +#define SORT_KEY_REGULAR struct key3 +#define SORT_PREFIX(x) s3_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_HASH_BITS 32 + +#include "lib/sorter/sorter.h" + +static void +gen_hash_key(int mode, struct key3 *k, uns i) +{ + k->i = i; + k->payload[0] = 7*i + 13; + k->payload[1] = 13*i + 19; + k->payload[2] = 19*i + 7; + switch (mode) + { + case 0: + k->hash[0] = i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + case 1: + k->hash[0] = ~i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + default: ; + struct MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, (byte*) &k->i, 4); + MD5Final((byte*) &k->hash, &ctx); + break; + } +} + +static void +test_hashes(int mode, u64 size) +{ + uns N = MIN(size / sizeof(struct key3), 0xffffffff); + log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + struct key3 k, lastk; + + struct fastbuf *f = bopen_tmp(65536); + uns hash_sum = 0; + for (uns i=0; ilen, y->len); + int c = memcmp(x->s, y->s, l); + if (c) + return c; + COMPARE(x->len, y->len); + return 0; +} + +static inline int s4_read_key(struct fastbuf *f, struct key4 *x) +{ + x->len = bgetl(f); + if (x->len == 0xffffffff) + return 0; + ASSERT(x->len < KEY4_MAX); + breadb(f, x->s, x->len); + return 1; +} + +static inline void s4_write_key(struct fastbuf *f, struct key4 *x) +{ + ASSERT(x->len < KEY4_MAX); + bputl(f, x->len); + bwrite(f, x->s, x->len); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4_##x +#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len) +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB + +#include "lib/sorter/sorter.h" + +#define s4b_compare s4_compare +#define s4b_read_key s4_read_key +#define s4b_write_key s4_write_key + +static inline uns s4_data_size(struct key4 *x) +{ + return x->len ? (x->s[0] ^ 0xad) : 0; +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4b_##x +#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len) +#define SORT_DATA_SIZE(x) s4_data_size(&(x)) +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB + +#include "lib/sorter/sorter.h" + +static void +gen_key4(struct key4 *k) +{ + k->len = random_max(KEY4_MAX); + for (uns i=0; ilen; i++) + k->s[i] = random(); +} + +static void +gen_data4(byte *buf, uns len, uns h) +{ + while (len--) + { + *buf++ = h >> 24; + h = h*259309 + 17; + } +} + +static void +test_strings(uns mode, u64 size) +{ + uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0); + uns N = MIN(size / avg_item_size, 0xffffffff); + log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); + srand(1); + + struct key4 k, lastk; + byte buf[256], buf2[256]; + uns sum = 0; + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; i= s5_N) + { + if (s5_i >= s5_N-1) + return 0; + s5_j = 0; + s5_i++; + } + p->x = ((u64)s5_j * s5_K) % s5_N; + p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N; + s5_j++; + return 1; +} + +#define ASORT_PREFIX(x) s5m_##x +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) ary[i] +#define ASORT_EXTRA_ARGS , u32 *ary +#include "lib/arraysort.h" + +static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf) +{ + u32 *a = buf; + uns m = 0; + for (uns i=0; icnt); + m += keys[i]->cnt; + } + s5m_sort(m, a); + keys[0]->cnt = m; + bwrite(f, keys[0], sizeof(struct key5)); + bwrite(f, a, 4*m); /* FIXME: Might overflow here */ +} + +static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest) +{ + u32 k[n]; + uns m = 0; + for (uns i=0; icnt; + } + struct key5 key = { .x = keys[0]->x, .cnt = m }; + bwrite(dest, &key, sizeof(key)); + while (key.cnt--) + { + uns b = 0; + for (uns i=1; icnt) + k[b] = bgetl(data[b]); + else + k[b] = ~0U; + } +} + +static inline int s5p_lt(struct s5_pair x, struct s5_pair y) +{ + COMPARE_LT(x.x, y.x); + COMPARE_LT(x.y, y.y); + return 0; +} + +/* FIXME: Use smarter internal sorter when it's available */ +#define ASORT_PREFIX(x) s5p_##x +#define ASORT_KEY_TYPE struct s5_pair +#define ASORT_ELT(i) ary[i] +#define ASORT_LT(x,y) s5p_lt(x,y) +#define ASORT_EXTRA_ARGS , struct s5_pair *ary +#include "lib/arraysort.h" + +static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize) +{ + uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff); + struct s5_pair *a = buf; + uns n = 0; + while (n>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N); + s5_N = N; + s5_K = N/4*3; + s5_L = N/3*2; + s5_i = s5_j = 0; + + struct fastbuf *in = NULL; + if (mode) + { + struct s5_pair p; + in = bopen_tmp(65536); + while (s5_gen(&p)) + { + struct key5 k = { .x = p.x, .cnt = 1 }; + bwrite(in, &k, sizeof(k)); + bputl(in, p.y); + } + brewind(in); + } + + start(); + struct fastbuf *f = bopen_tmp(65536); + bputl(f, 0xfeedcafe); + struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1)); + ASSERT(f == g); + stop(); + + SORT_XTRACE(2, "Verifying"); + uns c = bgetl(f); + ASSERT(c == 0xfeedcafe); + for (uns i=0; i= 0) + switch (c) + { + case 'd': + sorter_debug = atol(optarg); + break; + case 's': + if (cf_parse_u64(optarg, &size)) + goto usage; + break; + case 't': + t = atol(optarg); + if (t >= TMAX) + goto usage; + break; + case 'v': + sorter_trace++; + break; + default: + usage: + fputs("Usage: sort-test [-v] [-d ] [-s ] [-t ]\n", stderr); + exit(1); + } + if (optind != argc) + goto usage; + + if (t != ~0U) + run_test(t, size); + else + for (uns i=0; i