--- /dev/null
+Testing:
+o Giant runs.
+o Records of odd lengths.
+o Empty files.
+
+Improvements:
+o Alignment? Use of SSE?
+o Use radix-sort for internal sorting.
+o Parallelization of internal sorting.
+o Clean up data types and make sure they cannot overflow. (size_t vs. u64 vs. sh_off_t vs. uns)
+o Buffer sizing in internal sorters.
+o Switching between direct and normal I/O.
+o When merging, choose the output file with less runs instead of always switching?
+o Implement multi-way merge.
+o Mode with only 2-way unification?
+o Speed up 2-way merge.
+o Speed up radix splitting.
clist_init(&ctx->bucket_list);
sorter_prepare_buf(ctx);
- /* FIXME: Remember to test sorting of empty files */
-
// Create bucket containing the source
struct sort_bucket *bin = sbuck_new(ctx);
bin->flags = SBF_SOURCE | SBF_OPEN_READ;
else
bin->fb = ctx->in_fb;
bin->ident = "in";
- bin->size = ctx->in_size; /* FIXME: Sizes should be either sh_off_t or u64, not both; beware of ~0U */
+ bin->size = ctx->in_size;
bin->hash_bits = ctx->hash_bits;
clist_add_tail(&ctx->bucket_list, &bin->n);
SORT_XTRACE(2, "Input size: %s", F_BSIZE(bin));
sorter_alloc_buf(ctx);
struct fastbuf *in = sbuck_read(bin);
P(key) *buf = ctx->big_buf;
- size_t bufsize = ctx->big_buf_half_size; /* FIXME: In some cases, we can use the whole buffer */
+ size_t bufsize = ctx->big_buf_half_size;
#ifdef CPU_64BIT_POINTERS
bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(key))); // The number of records must fit in uns
#endif
* of the GNU Lesser General Public License.
*/
-/* FIXME: This is a very trivial implementation so far. Use fbdirect and such things to speed up. */
-
#include <string.h>
static void P(radix_split)(struct sort_context *ctx UNUSED, struct sort_bucket *bin, struct sort_bucket **bouts, uns bitpos, uns numbits)
* of the GNU Lesser General Public License.
*/
-/* FIXME: There is a plenty of room for further optimization */
-/* FIXME: Swap outputs if there already are some runs? */
-
static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket **outs)
{
struct fastbuf *fin1, *fin2, *fout1, *fout2, *ftmp;
static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uns n, void *buf)
{
- /* FIXME: Allow mode where this function is not defined? */
u32 *a = buf;
uns m = 0;
for (uns i=0; i<n; i++)