From: Martin Mares Date: Sat, 10 Feb 2007 20:39:58 +0000 (+0100) Subject: Created a local TODO list. X-Git-Tag: holmes-import~506^2~13^2~144 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=969ce845f3bf35bb7423bac705d46196bb863e72;p=libucw.git Created a local TODO list. --- diff --git a/lib/sorter/TODO b/lib/sorter/TODO new file mode 100644 index 00000000..49479aa5 --- /dev/null +++ b/lib/sorter/TODO @@ -0,0 +1,17 @@ +Testing: +o Giant runs. +o Records of odd lengths. +o Empty files. + +Improvements: +o Alignment? Use of SSE? +o Use radix-sort for internal sorting. +o Parallelization of internal sorting. +o Clean up data types and make sure they cannot overflow. (size_t vs. u64 vs. sh_off_t vs. uns) +o Buffer sizing in internal sorters. +o Switching between direct and normal I/O. +o When merging, choose the output file with less runs instead of always switching? +o Implement multi-way merge. +o Mode with only 2-way unification? +o Speed up 2-way merge. +o Speed up radix splitting. diff --git a/lib/sorter/govern.c b/lib/sorter/govern.c index ddf919e2..1211a81c 100644 --- a/lib/sorter/govern.c +++ b/lib/sorter/govern.c @@ -228,8 +228,6 @@ sorter_run(struct sort_context *ctx) clist_init(&ctx->bucket_list); sorter_prepare_buf(ctx); - /* FIXME: Remember to test sorting of empty files */ - // Create bucket containing the source struct sort_bucket *bin = sbuck_new(ctx); bin->flags = SBF_SOURCE | SBF_OPEN_READ; @@ -238,7 +236,7 @@ sorter_run(struct sort_context *ctx) else bin->fb = ctx->in_fb; bin->ident = "in"; - bin->size = ctx->in_size; /* FIXME: Sizes should be either sh_off_t or u64, not both; beware of ~0U */ + bin->size = ctx->in_size; bin->hash_bits = ctx->hash_bits; clist_add_tail(&ctx->bucket_list, &bin->n); SORT_XTRACE(2, "Input size: %s", F_BSIZE(bin)); diff --git a/lib/sorter/s-fixint.h b/lib/sorter/s-fixint.h index 91485093..3d82313e 100644 --- a/lib/sorter/s-fixint.h +++ b/lib/sorter/s-fixint.h @@ -19,7 +19,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sorter_alloc_buf(ctx); struct fastbuf *in = sbuck_read(bin); P(key) *buf = ctx->big_buf; - size_t bufsize = ctx->big_buf_half_size; /* FIXME: In some cases, we can use the whole buffer */ + size_t bufsize = ctx->big_buf_half_size; #ifdef CPU_64BIT_POINTERS bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(key))); // The number of records must fit in uns #endif diff --git a/lib/sorter/s-radix.h b/lib/sorter/s-radix.h index c83d6049..0b5f5e9f 100644 --- a/lib/sorter/s-radix.h +++ b/lib/sorter/s-radix.h @@ -7,8 +7,6 @@ * of the GNU Lesser General Public License. */ -/* FIXME: This is a very trivial implementation so far. Use fbdirect and such things to speed up. */ - #include static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits) diff --git a/lib/sorter/s-twoway.h b/lib/sorter/s-twoway.h index 0e6d6442..ef23a2e9 100644 --- a/lib/sorter/s-twoway.h +++ b/lib/sorter/s-twoway.h @@ -7,9 +7,6 @@ * of the GNU Lesser General Public License. */ -/* FIXME: There is a plenty of room for further optimization */ -/* FIXME: Swap outputs if there already are some runs? */ - static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs) { struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp; diff --git a/lib/sorter/sort-test.c b/lib/sorter/sort-test.c index 5464f744..8cf1433f 100644 --- a/lib/sorter/sort-test.c +++ b/lib/sorter/sort-test.c @@ -410,7 +410,6 @@ static int s5_gen(struct s5_pair *p) static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf) { - /* FIXME: Allow mode where this function is not defined? */ u32 *a = buf; uns m = 0; for (uns i=0; i