From 969ce845f3bf35bb7423bac705d46196bb863e72 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 10 Feb 2007 21:39:58 +0100 Subject: [PATCH] Created a local TODO list. --- lib/sorter/TODO | 17 +++++++++++++++++ lib/sorter/govern.c | 4 +--- lib/sorter/s-fixint.h | 2 +- lib/sorter/s-radix.h | 2 -- lib/sorter/s-twoway.h | 3 --- lib/sorter/sort-test.c | 1 - 6 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 lib/sorter/TODO diff --git a/lib/sorter/TODO b/lib/sorter/TODO new file mode 100644 index 00000000..49479aa5 --- /dev/null +++ b/lib/sorter/TODO @@ -0,0 +1,17 @@ +Testing: +o Giant runs. +o Records of odd lengths. +o Empty files. + +Improvements: +o Alignment? Use of SSE? +o Use radix-sort for internal sorting. +o Parallelization of internal sorting. +o Clean up data types and make sure they cannot overflow. (size_t vs. u64 vs. sh_off_t vs. uns) +o Buffer sizing in internal sorters. +o Switching between direct and normal I/O. +o When merging, choose the output file with less runs instead of always switching? +o Implement multi-way merge. +o Mode with only 2-way unification? +o Speed up 2-way merge. +o Speed up radix splitting. diff --git a/lib/sorter/govern.c b/lib/sorter/govern.c index ddf919e2..1211a81c 100644 --- a/lib/sorter/govern.c +++ b/lib/sorter/govern.c @@ -228,8 +228,6 @@ sorter_run(struct sort_context *ctx) clist_init(&ctx->bucket_list); sorter_prepare_buf(ctx); - /* FIXME: Remember to test sorting of empty files */ - // Create bucket containing the source struct sort_bucket *bin = sbuck_new(ctx); bin->flags = SBF_SOURCE | SBF_OPEN_READ; @@ -238,7 +236,7 @@ sorter_run(struct sort_context *ctx) else bin->fb = ctx->in_fb; bin->ident = "in"; - bin->size = ctx->in_size; /* FIXME: Sizes should be either sh_off_t or u64, not both; beware of ~0U */ + bin->size = ctx->in_size; bin->hash_bits = ctx->hash_bits; clist_add_tail(&ctx->bucket_list, &bin->n); SORT_XTRACE(2, "Input size: %s", F_BSIZE(bin)); diff --git a/lib/sorter/s-fixint.h b/lib/sorter/s-fixint.h index 91485093..3d82313e 100644 --- a/lib/sorter/s-fixint.h +++ b/lib/sorter/s-fixint.h @@ -19,7 +19,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sorter_alloc_buf(ctx); struct fastbuf *in = sbuck_read(bin); P(key) *buf = ctx->big_buf; - size_t bufsize = ctx->big_buf_half_size; /* FIXME: In some cases, we can use the whole buffer */ + size_t bufsize = ctx->big_buf_half_size; #ifdef CPU_64BIT_POINTERS bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(key))); // The number of records must fit in uns #endif diff --git a/lib/sorter/s-radix.h b/lib/sorter/s-radix.h index c83d6049..0b5f5e9f 100644 --- a/lib/sorter/s-radix.h +++ b/lib/sorter/s-radix.h @@ -7,8 +7,6 @@ * of the GNU Lesser General Public License. */ -/* FIXME: This is a very trivial implementation so far. Use fbdirect and such things to speed up. */ - #include static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits) diff --git a/lib/sorter/s-twoway.h b/lib/sorter/s-twoway.h index 0e6d6442..ef23a2e9 100644 --- a/lib/sorter/s-twoway.h +++ b/lib/sorter/s-twoway.h @@ -7,9 +7,6 @@ * of the GNU Lesser General Public License. */ -/* FIXME: There is a plenty of room for further optimization */ -/* FIXME: Swap outputs if there already are some runs? */ - static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs) { struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp; diff --git a/lib/sorter/sort-test.c b/lib/sorter/sort-test.c index 5464f744..8cf1433f 100644 --- a/lib/sorter/sort-test.c +++ b/lib/sorter/sort-test.c @@ -410,7 +410,6 @@ static int s5_gen(struct s5_pair *p) static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf) { - /* FIXME: Allow mode where this function is not defined? */ u32 *a = buf; uns m = 0; for (uns i=0; i