From: Pavel Charvat Date: Mon, 25 Jun 2007 08:38:49 +0000 (+0200) Subject: Merge with git+ssh://git.ucw.cz/projects/sherlock/GIT/sherlock.git X-Git-Tag: holmes-import~506^2~13^2~97 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=69808c861b8df0745bd0bc93793f558e0055da7e;p=libucw.git Merge with git+ssh://git.ucw.cz/projects/sherlock/GIT/sherlock.git --- 69808c861b8df0745bd0bc93793f558e0055da7e diff --cc debug/sorter/asio-test.c index a7c61627,00000000..40df77b3 mode 100644,000000..100644 --- a/debug/sorter/asio-test.c +++ b/debug/sorter/asio-test.c @@@ -1,157 -1,0 +1,157 @@@ +/* + * An experiment with parallel reading and writing of files using ASIO. + */ + +#include "lib/lib.h" +#include "lib/lfs.h" +#include "lib/asio.h" + +#include +#include +#include +#include +#include + +#define COPY +#define DIRECT O_DIRECT + +static timestamp_t timer; + +#define P_INIT do { cnt = 0; cnt_rep = 0; cnt_ms = 1; } while(0) +#define P_UPDATE(cc) do { \ + cnt += cc; \ + if (cnt >= cnt_rep) { cnt_ms += get_timer(&timer); \ + printf("%d of %d MB (%.2f MB/sec)\r", (int)(cnt >> 20), (int)(total_size >> 20), (double)cnt / 1048576 * 1000 / cnt_ms); \ + fflush(stdout); cnt_rep += 1<<26; } } while(0) +#define P_FINAL do { \ + cnt_ms += get_timer(&timer); \ - log(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \ ++ msg(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \ +} while(0) + +static struct asio_queue io_queue; + +int main(int argc, char **argv) +{ + ASSERT(argc == 4); + uns files = atol(argv[1]); + uns bufsize = atol(argv[2]) * 1024; // Kbytes + u64 total_size = (u64)atol(argv[3]) * 1024*1024*1024; // Gbytes + u64 cnt, cnt_rep; + uns cnt_ms; + int fd[files]; + byte name[files][16]; + struct asio_request *req[files]; + + init_timer(&timer); + + io_queue.buffer_size = bufsize; + io_queue.max_writebacks = 2; + asio_init_queue(&io_queue); + +#ifdef COPY - log(L_INFO, "Creating input file"); ++ msg(L_INFO, "Creating input file"); + int in_fd = sh_open("tmp/ft-in", O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666); + ASSERT(in_fd >= 0); + ASSERT(!(total_size % bufsize)); + P_INIT; + for (uns i=0; iop = ASIO_WRITE_BACK; + r->fd = in_fd; + r->len = bufsize; + byte *xbuf = r->buffer; + for (uns j=0; j> 20), files, bufsize); ++ msg(L_INFO, "Writing %d MB to %d files in parallel with %d byte buffers", (int)(total_size >> 20), files, bufsize); + P_INIT; + for (uns i=0; iop = ASIO_READ; + rd->fd = in_fd; + rd->len = bufsize; + asio_submit(rd); + rr = asio_wait(&io_queue); + ASSERT(rr == rd && rd->status == (int)rd->len); + memcpy(r->buffer, rd->buffer, bufsize); + asio_put(rr); +#else + for (uns j=0; jbuffer[j] = round+i+j; +#endif + r->op = ASIO_WRITE_BACK; + r->fd = fd[i]; + r->len = bufsize; + asio_submit(r); + P_UPDATE(bufsize); + req[i] = asio_get(&io_queue); + } + } + for (uns i=0; iop = ASIO_READ; + r->fd = fd[i]; + r->len = bufsize; + asio_submit(r); + rr = asio_wait(&io_queue); + ASSERT(rr == r && r->status == (int)bufsize); + asio_put(r); + P_UPDATE(bufsize); + } + close(fd[i]); + } + P_FINAL; + + for (uns i=0; i +#include +#include +#include + +#define COPY +#define DIRECT O_DIRECT + +static timestamp_t timer; + +#define P_INIT do { cnt = 0; cnt_rep = 0; cnt_ms = 1; } while(0) +#define P_UPDATE(cc) do { \ + cnt += cc; \ + if (cnt >= cnt_rep) { cnt_ms += get_timer(&timer); \ + printf("%d of %d MB (%.2f MB/sec)\r", (int)(cnt >> 20), (int)(total_size >> 20), (double)cnt / 1048576 * 1000 / cnt_ms); \ + fflush(stdout); cnt_rep += 1<<26; } } while(0) +#define P_FINAL do { \ + cnt_ms += get_timer(&timer); \ - log(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \ ++ msg(L_INFO, "Spent %.3f sec (%.2f MB/sec)", (double)cnt_ms/1000, (double)cnt / 1048576 * 1000 / cnt_ms); \ +} while(0) + +int main(int argc, char **argv) +{ + ASSERT(argc == 4); + uns files = atol(argv[1]); + uns bufsize = atol(argv[2]) * 1024; // Kbytes + u64 total_size = (u64)atol(argv[3]) * 1024*1024*1024; // Gbytes + u64 cnt, cnt_rep; + uns cnt_ms; + int fd[files]; + byte *buf[files], name[files][16]; + uns xbufsize = bufsize; // Used for single-file I/O + byte *xbuf = big_alloc(xbufsize); + + init_timer(&timer); + +#ifdef COPY - log(L_INFO, "Creating input file"); ++ msg(L_INFO, "Creating input file"); + int in_fd = sh_open("tmp/ft-in", O_RDWR | O_CREAT | O_TRUNC | DIRECT, 0666); + ASSERT(in_fd >= 0); + ASSERT(!(total_size % xbufsize)); + P_INIT; + for (uns i=0; i> 20), files, bufsize); ++ msg(L_INFO, "Writing %d MB to %d files in parallel with %d byte buffers", (int)(total_size >> 20), files, bufsize); + P_INIT; + for (uns r=0; r + 112 116 + - Run: obj/lib/fb-grow-t ++Run: ../obj/lib/fb-grow-t +Out: <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + <10><10><0>1234512345<10><9>5<10> + - Run: obj/lib/fb-pool-t ++Run: ../obj/lib/fb-pool-t diff --cc lib/fb-direct.c index b82faa91,00000000..806064a9 mode 100644,000000..100644 --- a/lib/fb-direct.c +++ b/lib/fb-direct.c @@@ -1,348 -1,0 +1,348 @@@ +/* + * UCW Library -- Fast Buffered I/O on O_DIRECT Files + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is a fastbuf backend for fast streaming I/O using O_DIRECT and + * the asynchronous I/O module. It's designed for use on large files + * which don't fit in the disk cache. + * + * CAVEATS: + * + * - All operations with a single fbdirect handle must be done + * within a single thread, unless you provide a custom I/O queue + * and take care of locking. + * + * FIXME: what if the OS doesn't support O_DIRECT? + * FIXME: unaligned seeks and partial writes? + * FIXME: merge with other file-oriented fastbufs + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/lfs.h" +#include "lib/asio.h" +#include "lib/conf.h" +#include "lib/threads.h" + +#include +#include +#include +#include + +uns fbdir_cheat; + +static struct cf_section fbdir_cf = { + CF_ITEMS { + CF_UNS("Cheat", &fbdir_cheat), + CF_END + } +}; + +#define FBDIR_ALIGN 512 + +enum fbdir_mode { // Current operating mode + M_NULL, + M_READ, + M_WRITE +}; + +struct fb_direct { + struct fastbuf fb; + int fd; // File descriptor + int is_temp_file; // 0=normal file, 1=temporary file, delete on close, -1=shared FD + struct asio_queue *io_queue; // I/O queue to use + struct asio_queue *user_queue; // If io_queue was supplied by the user + struct asio_request *pending_read; + struct asio_request *done_read; + struct asio_request *active_buffer; + enum fbdir_mode mode; + byte name[0]; +}; +#define FB_DIRECT(f) ((struct fb_direct *)(f)->is_fastbuf) + +static void CONSTRUCTOR +fbdir_global_init(void) +{ + cf_declare_section("FBDirect", &fbdir_cf, 0); +} + +static void +fbdir_read_sync(struct fb_direct *F) +{ + while (F->pending_read) + { + struct asio_request *r = asio_wait(F->io_queue); + ASSERT(r); + struct fb_direct *G = r->user_data; + ASSERT(G); + ASSERT(G->pending_read == r && !G->done_read); + G->pending_read = NULL; + G->done_read = r; + } +} + +static void +fbdir_change_mode(struct fb_direct *F, enum fbdir_mode mode) +{ + if (F->mode == mode) + return; + DBG("FB-DIRECT: Switching mode to %d", mode); + switch (F->mode) + { + case M_NULL: + break; + case M_READ: + fbdir_read_sync(F); // Wait for read-ahead requests to finish + if (F->done_read) // Return read-ahead requests if any + { + asio_put(F->done_read); + F->done_read = NULL; + } + break; + case M_WRITE: + asio_sync(F->io_queue); // Wait for pending writebacks + break; + } + if (F->active_buffer) + { + asio_put(F->active_buffer); + F->active_buffer = NULL; + } + F->mode = mode; +} + +static void +fbdir_submit_read(struct fb_direct *F) +{ + struct asio_request *r = asio_get(F->io_queue); + r->fd = F->fd; + r->op = ASIO_READ; + r->len = F->io_queue->buffer_size; + r->user_data = F; + asio_submit(r); + F->pending_read = r; +} + +static int +fbdir_refill(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Refill"); + + if (!F->done_read) + { + if (!F->pending_read) + { + fbdir_change_mode(F, M_READ); + fbdir_submit_read(F); + } + fbdir_read_sync(F); + ASSERT(F->done_read); + } + + struct asio_request *r = F->done_read; + F->done_read = NULL; + if (F->active_buffer) + asio_put(F->active_buffer); + F->active_buffer = r; + if (!r->status) + return 0; + if (r->status < 0) + die("Error reading %s: %s", f->name, strerror(r->returned_errno)); + f->bptr = f->buffer = r->buffer; + f->bstop = f->bufend = f->buffer + r->status; + f->pos += r->status; + + fbdir_submit_read(F); // Read-ahead the next block + + return r->status; +} + +static void +fbdir_spout(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + struct asio_request *r; + + DBG("FB-DIRECT: Spout"); + + fbdir_change_mode(F, M_WRITE); + r = F->active_buffer; + if (r && f->bptr > f->bstop) + { + r->op = ASIO_WRITE_BACK; + r->fd = F->fd; + r->len = f->bptr - f->bstop; + ASSERT(!(f->pos % FBDIR_ALIGN) || fbdir_cheat); + f->pos += r->len; + if (!fbdir_cheat && r->len % FBDIR_ALIGN) // Have to simulate incomplete writes + { + r->len = ALIGN_TO(r->len, FBDIR_ALIGN); + asio_submit(r); + asio_sync(F->io_queue); + DBG("FB-DIRECT: Truncating at %llu", (long long)f->pos); + if (sh_ftruncate(F->fd, f->pos) < 0) + die("Error truncating %s: %m", f->name); + } + else + asio_submit(r); + r = NULL; + } + if (!r) + r = asio_get(F->io_queue); + f->bstop = f->bptr = f->buffer = r->buffer; + f->bufend = f->buffer + F->io_queue->buffer_size; + F->active_buffer = r; +} + +static int +fbdir_seek(struct fastbuf *f, sh_off_t pos, int whence) +{ + DBG("FB-DIRECT: Seek %llu %d", (long long)pos, whence); + + if (whence == SEEK_SET && pos == f->pos) + return 1; + + fbdir_change_mode(FB_DIRECT(f), M_NULL); // Wait for all async requests to finish + sh_off_t l = sh_seek(FB_DIRECT(f)->fd, pos, whence); + if (l < 0) + return 0; + f->pos = l; + return 1; +} + +static struct asio_queue * +fbdir_get_io_queue(uns buffer_size, uns write_back) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + if (!q) + { + q = xmalloc_zero(sizeof(struct asio_queue)); + q->buffer_size = buffer_size; + q->max_writebacks = write_back; + asio_init_queue(q); + ctx->io_queue = q; + } + q->use_count++; + DBG("FB-DIRECT: Got I/O queue, uc=%d", q->use_count); + return q; +} + +static void +fbdir_put_io_queue(void) +{ + struct ucwlib_context *ctx = ucwlib_thread_context(); + struct asio_queue *q = ctx->io_queue; + ASSERT(q); + DBG("FB-DIRECT: Put I/O queue, uc=%d", q->use_count); + if (!--q->use_count) + { + asio_cleanup_queue(q); + xfree(q); + ctx->io_queue = NULL; + } +} + +static void +fbdir_close(struct fastbuf *f) +{ + struct fb_direct *F = FB_DIRECT(f); + + DBG("FB-DIRECT: Close"); + + fbdir_change_mode(F, M_NULL); + if (!F->user_queue) + fbdir_put_io_queue(); + + switch (F->is_temp_file) + { + case 1: + if (unlink(f->name) < 0) - log(L_ERROR, "unlink(%s): %m", f->name); ++ msg(L_ERROR, "unlink(%s): %m", f->name); + case 0: + close(F->fd); + } + + xfree(f); +} + +static int +fbdir_config(struct fastbuf *f, uns item, int value) +{ + switch (item) + { + case BCONFIG_IS_TEMP_FILE: + FB_DIRECT(f)->is_temp_file = value; + return 0; + default: + return -1; + } +} + +struct fastbuf * - fbdir_open_fd_internal(int fd, byte *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back) ++fbdir_open_fd_internal(int fd, const byte *name, struct asio_queue *q, uns buffer_size, uns read_ahead UNUSED, uns write_back) +{ + int namelen = strlen(name) + 1; + struct fb_direct *F = xmalloc(sizeof(struct fb_direct) + namelen); + struct fastbuf *f = &F->fb; + + DBG("FB-DIRECT: Open"); + bzero(F, sizeof(*F)); + f->name = F->name; + memcpy(f->name, name, namelen); + F->fd = fd; + if (q) + F->io_queue = F->user_queue = q; + else + F->io_queue = fbdir_get_io_queue(buffer_size, write_back); + f->refill = fbdir_refill; + f->spout = fbdir_spout; + f->seek = fbdir_seek; + f->close = fbdir_close; + f->config = fbdir_config; + f->can_overwrite_buffer = 2; + return f; +} + +#ifdef TEST + +#include "lib/getopt.h" + +int main(int argc, char **argv) +{ + struct fastbuf *f, *t; + + log_init(NULL); + if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0) + die("Hey, whaddya want?"); + f = (optind < argc) ? fbdir_open(argv[optind++], O_RDONLY, NULL) : fbdir_open_fd(0, NULL); + t = (optind < argc) ? fbdir_open(argv[optind++], O_RDWR | O_CREAT | O_TRUNC, NULL) : fbdir_open_fd(1, NULL); + + bbcopy(f, t, ~0U); + ASSERT(btell(f) == btell(t)); + +#if 0 // This triggers unaligned write + bflush(t); + bputc(t, '\n'); +#endif + + brewind(t); + bgetc(t); + ASSERT(btell(t) == 1); + + bclose(f); + bclose(t); + return 0; +} + +#endif diff --cc lib/fb-file.c index 87791c97,dd8d1488..ed66dfe2 --- a/lib/fb-file.c +++ b/lib/fb-file.c @@@ -205,10 -71,9 +205,10 @@@ bfd_close(struct fastbuf *f { case 1: if (unlink(f->name) < 0) - log(L_ERROR, "unlink(%s): %m", f->name); + msg(L_ERROR, "unlink(%s): %m", f->name); case 0: - close(FB_FILE(f)->fd); + if (close(FB_FILE(f)->fd)) + die("close(%s): %m", f->name); } xfree(f); } @@@ -229,12 -91,11 +229,12 @@@ bfd_config(struct fastbuf *f, uns item } } -static struct fastbuf * -bfdopen_internal(int fd, uns buflen, const byte *name) +struct fastbuf * - bfdopen_internal(int fd, byte *name, uns buflen) ++bfdopen_internal(int fd, const byte *name, uns buflen) { + ASSERT(buflen); int namelen = strlen(name) + 1; - struct fb_file *F = xmalloc(sizeof(struct fb_file) + buflen + namelen); + struct fb_file *F = xmalloc_zero(sizeof(struct fb_file) + buflen + namelen); struct fastbuf *f = &F->fb; bzero(F, sizeof(*F)); @@@ -254,15 -115,27 +254,15 @@@ } struct fastbuf * - bopen_try(byte *name, uns mode, uns buflen) + bopen_try(const byte *name, uns mode, uns buflen) { - int fd = sh_open(name, mode, 0666); - if (fd < 0) - return NULL; - struct fastbuf *b = bfdopen_internal(fd, buflen, name); - if (mode & O_APPEND) - bfd_seek(b, 0, SEEK_END); - return b; + return bopen_file_try(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); } struct fastbuf * - bopen(byte *name, uns mode, uns buflen) + bopen(const byte *name, uns mode, uns buflen) { - if (!buflen) - return bopen_mm(name, mode); - struct fastbuf *b = bopen_try(name, mode, buflen); - if (!b) - die("Unable to %s file %s: %m", - (mode & O_CREAT) ? "create" : "open", name); - return b; + return bopen_file(name, mode, &(struct fb_params){ .type = FB_STD, .buffer_size = buflen }); } struct fastbuf * diff --cc lib/fb-mmap.c index 15c542a0,28e90df4..d16458ef --- a/lib/fb-mmap.c +++ b/lib/fb-mmap.c @@@ -146,10 -145,9 +146,10 @@@ bfmm_close(struct fastbuf *f { case 1: if (unlink(f->name) < 0) - log(L_ERROR, "unlink(%s): %m", f->name); + msg(L_ERROR, "unlink(%s): %m", f->name); case 0: - close(F->fd); + if (close(F->fd)) + die("close(%s): %m", f->name); } xfree(f); } @@@ -167,8 -165,8 +167,8 @@@ bfmm_config(struct fastbuf *f, uns item } } -static struct fastbuf * +struct fastbuf * - bfmmopen_internal(int fd, byte *name, uns mode) + bfmmopen_internal(int fd, const byte *name, uns mode) { int namelen = strlen(name) + 1; struct fb_mmap *F = xmalloc(sizeof(struct fb_mmap) + namelen); diff --cc lib/fb-param.c index e487ced4,00000000..d9e7a221 mode 100644,000000..100644 --- a/lib/fb-param.c +++ b/lib/fb-param.c @@@ -1,129 -1,0 +1,132 @@@ +/* + * UCW Library -- FastIO on files with run-time parametrization + * + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/conf.h" +#include "lib/lfs.h" +#include "lib/fastbuf.h" + +#include +#include + +struct fb_params fbpar_def = { + .buffer_size = 65536, + .read_ahead = 1, + .write_back = 1, +}; + +struct cf_section fbpar_cf = { +# define F(x) PTR_TO(struct fb_params, x) + CF_TYPE(struct fb_params), + CF_ITEMS { + CF_LOOKUP("Type", (int *)F(type), ((byte *[]){"std", "direct", "mmap", NULL})), + CF_UNS("BufSize", F(buffer_size)), + CF_UNS("KeepBackBuf", F(keep_back_buf)), + CF_UNS("ReadAhead", F(read_ahead)), + CF_UNS("WriteBack", F(write_back)), + CF_END + } +# undef F +}; + +static struct cf_section fbpar_global_cf = { + CF_ITEMS { + CF_SECTION("Defaults", &fbpar_def, &fbpar_cf), + CF_END + } +}; + +static void CONSTRUCTOR +fbpar_global_init(void) +{ + cf_declare_section("FBParam", &fbpar_global_cf, 0); +} + +static struct fastbuf * - bopen_fd_internal(int fd, struct fb_params *params, uns mode, byte *name) ++bopen_fd_internal(int fd, struct fb_params *params, uns mode, const byte *name) +{ + byte buf[32]; + if (!name) - sprintf(name = buf, "fd%d", fd); ++ { ++ sprintf(buf, "fd%d", fd); ++ name = buf; ++ } + struct fastbuf *fb; + switch (params->type) + { + case FB_STD: + fb = bfdopen_internal(fd, name, + params->buffer_size ? : fbpar_def.buffer_size); + if (params->keep_back_buf) + bconfig(fb, BCONFIG_KEEP_BACK_BUF, 1); + return fb; + case FB_DIRECT: + fb = fbdir_open_fd_internal(fd, name, params->asio, + params->buffer_size ? : fbpar_def.buffer_size, + params->read_ahead ? : fbpar_def.read_ahead, + params->write_back ? : fbpar_def.write_back); + if (!~mode && !fbdir_cheat && ((int)(mode = fcntl(fd, F_GETFL)) < 0 || fcntl(fd, F_SETFL, mode | O_DIRECT)) < 0) - log(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd); ++ msg(L_WARN, "Cannot set O_DIRECT on fd %d: %m", fd); + return fb; + case FB_MMAP: + if (!~mode && (int)(mode = fcntl(fd, F_GETFL)) < 0) + die("Cannot get flags of fd %d: %m", fd); + return bfmmopen_internal(fd, name, mode); + default: + ASSERT(0); + } +} + +static struct fastbuf * - bopen_file_internal(byte *name, int mode, struct fb_params *params, int try) ++bopen_file_internal(const byte *name, int mode, struct fb_params *params, int try) +{ + if (params->type == FB_DIRECT && !fbdir_cheat) + mode |= O_DIRECT; + if (params->type == FB_MMAP && (mode & O_ACCMODE) == O_WRONLY) + mode = (mode & ~O_ACCMODE) | O_RDWR; + int fd = sh_open(name, mode, 0666); + if (fd < 0) + if (try) + return NULL; + else + die("Unable to %s file %s: %m", (mode & O_CREAT) ? "create" : "open", name); + struct fastbuf *fb = bopen_fd_internal(fd, params, mode, name); + ASSERT(fb); + if (mode & O_APPEND) + bseek(fb, 0, SEEK_END); + return fb; +} + +struct fastbuf * - bopen_file(byte *name, int mode, struct fb_params *params) ++bopen_file(const byte *name, int mode, struct fb_params *params) +{ + return bopen_file_internal(name, mode, params ? : &fbpar_def, 0); +} + +struct fastbuf * - bopen_file_try(byte *name, int mode, struct fb_params *params) ++bopen_file_try(const byte *name, int mode, struct fb_params *params) +{ + return bopen_file_internal(name, mode, params ? : &fbpar_def, 1); +} + +struct fastbuf * +bopen_fd(int fd, struct fb_params *params) +{ + return bopen_fd_internal(fd, params ? : &fbpar_def, ~0U, NULL); +} + +struct fastbuf * +bopen_tmp_file(struct fb_params *params) +{ + byte buf[TEMP_FILE_NAME_LEN]; + temp_file_name(buf); + struct fastbuf *fb = bopen_file_internal(buf, O_RDWR | O_CREAT | O_TRUNC, params, 0); + bconfig(fb, BCONFIG_IS_TEMP_FILE, 1); + return fb; +} diff --cc lib/sorter/common.h index 60c6db98,00000000..db414dd0 mode 100644,000000..100644 --- a/lib/sorter/common.h +++ b/lib/sorter/common.h @@@ -1,113 -1,0 +1,113 @@@ +/* + * UCW Library -- Universal Sorter: Common Declarations + * + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_SORTER_COMMON_H +#define _UCW_SORTER_COMMON_H + +#include "lib/clists.h" + +/* Configuration, some of the variables are used by the old sorter, too. */ +extern uns sorter_trace, sorter_presort_bufsize, sorter_stream_bufsize; +extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits; +extern u64 sorter_bufsize; +extern struct fb_params sorter_fb_params; + - #define SORT_TRACE(x...) do { if (sorter_trace) log(L_DEBUG, x); } while(0) - #define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) log(L_DEBUG, x); } while(0) ++#define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0) ++#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0) + +enum sort_debug { + SORT_DEBUG_NO_PRESORT = 1, + SORT_DEBUG_NO_JOIN = 2, + SORT_DEBUG_KEEP_BUCKETS = 4, + SORT_DEBUG_NO_RADIX = 8, +}; + +struct sort_bucket; + +struct sort_context { + struct fastbuf *in_fb; + struct fastbuf *out_fb; + uns hash_bits; + u64 in_size; + + struct mempool *pool; + clist bucket_list; + void *big_buf; + size_t big_buf_size; + + int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize); + + // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket. + // Return 1 if there is more data available in the source bucket. + int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only); + + // Estimate how much input data from `b' will fit in the internal sorting buffer. + u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b); + + // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets. + // Bucket arrays are NULL-terminated. + void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs); + + // Radix split according to hash function + void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits); + + // State variables of internal_sort + void *key_buf; + int more_keys; + + // Timing + timestamp_t start_time; + uns last_pass_time; + uns total_int_time, total_pre_time, total_ext_time; +}; + +void sorter_run(struct sort_context *ctx); + +/* Buffers */ + +void *sorter_alloc(struct sort_context *ctx, uns size); +void sorter_prepare_buf(struct sort_context *ctx); +void sorter_alloc_buf(struct sort_context *ctx); +void sorter_free_buf(struct sort_context *ctx); + +/* Buckets */ + +struct sort_bucket { + cnode n; + struct sort_context *ctx; + uns flags; + struct fastbuf *fb; + byte *filename; + u64 size; // Size in bytes (not valid when writing) + uns runs; // Number of runs, 0 if not sorted + uns hash_bits; // Remaining bits of the hash function + byte *ident; // Identifier used in debug messages +}; + +enum sort_bucket_flags { + SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run) + SBF_SOURCE = 2, // Contains the source file (always 0 runs) + SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter + SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf + SBF_OPEN_READ = 512, // We are reading from the fastbuf + SBF_DESTROYED = 1024, // Already done with, no further references allowed + SBF_SWAPPED_OUT = 2048, // Swapped out to a named file +}; + +struct sort_bucket *sbuck_new(struct sort_context *ctx); +void sbuck_drop(struct sort_bucket *b); +int sbuck_have(struct sort_bucket *b); +int sbuck_has_file(struct sort_bucket *b); +sh_off_t sbuck_size(struct sort_bucket *b); +struct fastbuf *sbuck_read(struct sort_bucket *b); +struct fastbuf *sbuck_write(struct sort_bucket *b); +void sbuck_swap_out(struct sort_bucket *b); +void format_size(byte *buf, u64 x); + +#endif diff --cc lib/sorter/old-test.c index 8f2aacaa,00000000..bc108f58 mode 100644,000000..100644 --- a/lib/sorter/old-test.c +++ b/lib/sorter/old-test.c @@@ -1,413 -1,0 +1,413 @@@ +/* + * UCW Library -- Testing the Old Sorter + * + * (c) 2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/getopt.h" +#include "lib/conf.h" +#include "lib/fastbuf.h" +#include "lib/ff-binary.h" +#include "lib/hashfunc.h" +#include "lib/md5.h" + +#include +#include +#include +#include +#include + +/*** Time measurement ***/ + +static timestamp_t timer; + +static void +start(void) +{ + sync(); + init_timer(&timer); +} + +static void +stop(void) +{ + sync(); - log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); ++ msg(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); +} + +/*** Simple 4-byte integer keys ***/ + +struct key1 { + u32 x; +}; + +static inline int s1_compare(struct key1 *x, struct key1 *y) +{ + COMPARE(x->x, y->x); + return 0; +} + +#define SORT_KEY struct key1 +#define SORT_PREFIX(x) s1_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIQUE +#define SORT_REGULAR +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +test_int(int mode, u64 size) +{ + uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0; + uns K = N/4*3; - log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); ++ msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; ihash[i], y->hash[i]); + return 0; +} + +#define SORT_KEY struct key3 +#define SORT_PREFIX(x) s3_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_REGULAR +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +gen_hash_key(int mode, struct key3 *k, uns i) +{ + k->i = i; + k->payload[0] = 7*i + 13; + k->payload[1] = 13*i + 19; + k->payload[2] = 19*i + 7; + switch (mode) + { + case 0: + k->hash[0] = i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + case 1: + k->hash[0] = ~i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + default: ; + struct MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, (byte*) &k->i, 4); + MD5Final((byte*) &k->hash, &ctx); + break; + } +} + +static void +test_hashes(int mode, u64 size) +{ + uns N = MIN(size / sizeof(struct key3), 0xffffffff); - log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); ++ msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + struct key3 k, lastk; + + struct fastbuf *f = bopen_tmp(65536); + uns hash_sum = 0; + for (uns i=0; ilen = len; + breadb(f, x->s, len); + return 1; +} + +static inline void s4_copy_data(struct fastbuf *i UNUSED, struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len); +} + +static inline int s4_compare(struct key4 *x, struct key4 *y) +{ + uns l = MIN(x->len, y->len); + int c = memcmp(x->s, y->s, l); + if (c) + return c; + COMPARE(x->len, y->len); + return 0; +} + +static inline byte *s4_fetch_item(struct fastbuf *f UNUSED, struct key4 *x, byte *limit UNUSED) +{ + return &x->s[x->len]; +} + +static inline void s4_store_item(struct fastbuf *f, struct key4 *x) +{ + s4_copy_data(NULL, f, x); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_PRESORT + +#include "lib/sorter.h" + +#define s4b_compare s4_compare +#define s4b_fetch_key s4_fetch_key + +static inline uns s4_data_size(struct key4 *x) +{ + return x->len ? (x->s[0] ^ 0xad) : 0; +} + +static inline void s4b_copy_data(struct fastbuf *i, struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len); + bbcopy(i, f, s4_data_size(x)); +} + +static inline byte *s4b_fetch_item(struct fastbuf *f, struct key4 *x, byte *limit) +{ + byte *d = &x->s[x->len]; + if (d + s4_data_size(x) > limit) + return NULL; + breadb(f, d, s4_data_size(x)); + return d + s4_data_size(x); +} + +static inline void s4b_store_item(struct fastbuf *f, struct key4 *x) +{ + bputl(f, x->len); + bwrite(f, x->s, x->len + s4_data_size(x)); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4b_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_PRESORT + +#include "lib/sorter.h" + +static void +gen_key4(struct key4 *k) +{ + k->len = random_max(KEY4_MAX); + for (uns i=0; ilen; i++) + k->s[i] = random(); +} + +static void +gen_data4(byte *buf, uns len, uns h) +{ + while (len--) + { + *buf++ = h >> 24; + h = h*259309 + 17; + } +} + +static void +test_strings(uns mode, u64 size) +{ + uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0); + uns N = MIN(size / avg_item_size, 0xffffffff); - log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); ++ msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); + srand(1); + + struct key4 k, lastk; + byte buf[256], buf2[256]; + uns sum = 0; + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; i= 0) + switch (c) + { + case 's': + if (cf_parse_u64(optarg, &size)) + goto usage; + break; + case 't': + t = atol(optarg); + if (t >= TMAX) + goto usage; + break; + case 'v': + sorter_trace++; + break; + default: + usage: + fputs("Usage: sort-test [-v] [-s ] [-t ]\n", stderr); + exit(1); + } + if (optind != argc) + goto usage; + + if (t != ~0U) + run_test(t, size); + else + for (uns i=0; i + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/getopt.h" +#include "lib/conf.h" +#include "lib/fastbuf.h" +#include "lib/ff-binary.h" +#include "lib/hashfunc.h" +#include "lib/md5.h" + +#include +#include +#include +#include +#include + +/*** Time measurement ***/ + +static timestamp_t timer; + +static void +start(void) +{ + sync(); + init_timer(&timer); +} + +static void +stop(void) +{ + sync(); - log(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); ++ msg(L_INFO, "Test took %.3fs", get_timer(&timer) / 1000.); +} + +/*** Simple 4-byte integer keys ***/ + +struct key1 { + u32 x; +}; + +#define SORT_KEY_REGULAR struct key1 +#define SORT_PREFIX(x) s1_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIQUE +#define SORT_INT(k) (k).x +#define SORT_DELETE_INPUT 0 + +#include "lib/sorter/sorter.h" + +static void +test_int(int mode, u64 size) +{ + uns N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0; + uns K = N/4*3; - log(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); ++ msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; icnt += k[i]->cnt; + bwrite(f, k[0], sizeof(struct key2)); +} + +#define SORT_KEY_REGULAR struct key2 +#define SORT_PREFIX(x) s2_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_UNIFY +#define SORT_INT(k) (k).x + +#include "lib/sorter/sorter.h" + +static void +test_counted(int mode, u64 size) +{ + u64 items = size / sizeof(struct key2); + uns mult = 2; + while (items/(2*mult) > 0xffff0000) + mult++; + uns N = items ? nextprime(items/(2*mult)) : 0; + uns K = N/4*3; - log(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult); ++ msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult); + + struct fastbuf *f = bopen_tmp(65536); + for (uns m=0; mhash[i], y->hash[i]); + return 0; +} + +static inline uns s3_hash(struct key3 *x) +{ + return x->hash[0]; +} + +#define SORT_KEY_REGULAR struct key3 +#define SORT_PREFIX(x) s3_##x +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB +#define SORT_HASH_BITS 32 + +#include "lib/sorter/sorter.h" + +static void +gen_hash_key(int mode, struct key3 *k, uns i) +{ + k->i = i; + k->payload[0] = 7*i + 13; + k->payload[1] = 13*i + 19; + k->payload[2] = 19*i + 7; + switch (mode) + { + case 0: + k->hash[0] = i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + case 1: + k->hash[0] = ~i; + k->hash[1] = k->payload[0]; + k->hash[2] = k->payload[1]; + k->hash[3] = k->payload[2]; + break; + default: ; + struct MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, (byte*) &k->i, 4); + MD5Final((byte*) &k->hash, &ctx); + break; + } +} + +static void +test_hashes(int mode, u64 size) +{ + uns N = MIN(size / sizeof(struct key3), 0xffffffff); - log(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); ++ msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N); + struct key3 k, lastk; + + struct fastbuf *f = bopen_tmp(65536); + uns hash_sum = 0; + for (uns i=0; ilen, y->len); + int c = memcmp(x->s, y->s, l); + if (c) + return c; + COMPARE(x->len, y->len); + return 0; +} + +static inline int s4_read_key(struct fastbuf *f, struct key4 *x) +{ + x->len = bgetl(f); + if (x->len == 0xffffffff) + return 0; + ASSERT(x->len < KEY4_MAX); + breadb(f, x->s, x->len); + return 1; +} + +static inline void s4_write_key(struct fastbuf *f, struct key4 *x) +{ + ASSERT(x->len < KEY4_MAX); + bputl(f, x->len); + bwrite(f, x->s, x->len); +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4_##x +#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len) +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB + +#include "lib/sorter/sorter.h" + +#define s4b_compare s4_compare +#define s4b_read_key s4_read_key +#define s4b_write_key s4_write_key + +static inline uns s4_data_size(struct key4 *x) +{ + return x->len ? (x->s[0] ^ 0xad) : 0; +} + +#define SORT_KEY struct key4 +#define SORT_PREFIX(x) s4b_##x +#define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len) +#define SORT_DATA_SIZE(x) s4_data_size(&(x)) +#define SORT_INPUT_FB +#define SORT_OUTPUT_FB + +#include "lib/sorter/sorter.h" + +static void +gen_key4(struct key4 *k) +{ + k->len = random_max(KEY4_MAX); + for (uns i=0; ilen; i++) + k->s[i] = random(); +} + +static void +gen_data4(byte *buf, uns len, uns h) +{ + while (len--) + { + *buf++ = h >> 24; + h = h*259309 + 17; + } +} + +static void +test_strings(uns mode, u64 size) +{ + uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0); + uns N = MIN(size / avg_item_size, 0xffffffff); - log(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); ++ msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N); + srand(1); + + struct key4 k, lastk; + byte buf[256], buf2[256]; + uns sum = 0; + + struct fastbuf *f = bopen_tmp(65536); + for (uns i=0; i= s5_N) + { + if (s5_i >= s5_N-1) + return 0; + s5_j = 0; + s5_i++; + } + p->x = ((u64)s5_j * s5_K) % s5_N; + p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N; + s5_j++; + return 1; +} + +#define ASORT_PREFIX(x) s5m_##x +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) ary[i] +#define ASORT_EXTRA_ARGS , u32 *ary +#include "lib/arraysort.h" + +static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf) +{ + u32 *a = buf; + uns m = 0; + for (uns i=0; icnt); + m += keys[i]->cnt; + } + s5m_sort(m, a); + keys[0]->cnt = m; + bwrite(f, keys[0], sizeof(struct key5)); + bwrite(f, a, 4*m); /* FIXME: Might overflow here */ +} + +static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uns n, struct fastbuf *dest) +{ + u32 k[n]; + uns m = 0; + for (uns i=0; icnt; + } + struct key5 key = { .x = keys[0]->x, .cnt = m }; + bwrite(dest, &key, sizeof(key)); + while (key.cnt--) + { + uns b = 0; + for (uns i=1; icnt) + k[b] = bgetl(data[b]); + else + k[b] = ~0U; + } +} + +static inline int s5p_lt(struct s5_pair x, struct s5_pair y) +{ + COMPARE_LT(x.x, y.x); + COMPARE_LT(x.y, y.y); + return 0; +} + +/* FIXME: Use smarter internal sorter when it's available */ +#define ASORT_PREFIX(x) s5p_##x +#define ASORT_KEY_TYPE struct s5_pair +#define ASORT_ELT(i) ary[i] +#define ASORT_LT(x,y) s5p_lt(x,y) +#define ASORT_EXTRA_ARGS , struct s5_pair *ary +#include "lib/arraysort.h" + +static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize) +{ + uns max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff); + struct s5_pair *a = buf; + uns n = 0; + while (n>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N); ++ msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N); + s5_N = N; + s5_K = N/4*3; + s5_L = N/3*2; + s5_i = s5_j = 0; + + struct fastbuf *in = NULL; + if (mode) + { + struct s5_pair p; + in = bopen_tmp(65536); + while (s5_gen(&p)) + { + struct key5 k = { .x = p.x, .cnt = 1 }; + bwrite(in, &k, sizeof(k)); + bputl(in, p.y); + } + brewind(in); + } + + start(); + struct fastbuf *f = bopen_tmp(65536); + bputl(f, 0xfeedcafe); + struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1)); + ASSERT(f == g); + stop(); + + SORT_XTRACE(2, "Verifying"); + uns c = bgetl(f); + ASSERT(c == 0xfeedcafe); + for (uns i=0; i>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N); ++ msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N); + + struct fastbuf *f = bopen_tmp(65536); + for (u64 i=0; i= 0) + switch (c) + { + case 'd': + sorter_debug = atol(optarg); + break; + case 's': + if (cf_parse_u64(optarg, &size)) + goto usage; + break; + case 't': + t = atol(optarg); + if (t >= TMAX) + goto usage; + break; + case 'v': + sorter_trace++; + break; + default: + usage: + fputs("Usage: sort-test [-v] [-d ] [-s ] [-t ]\n", stderr); + exit(1); + } + if (optind != argc) + goto usage; + + if (t != ~0U) + run_test(t, size); + else + for (uns i=0; i