return ctx->internal_sort(ctx, in, out, out_only);
}
-static inline struct sort_bucket *
-sbuck_join_to(struct sort_bucket *b)
+static struct sort_bucket *
+sbuck_join_to(struct sort_bucket *b, sh_off_t *sizep)
{
if (sorter_debug & SORT_DEBUG_NO_JOIN)
return NULL;
if (!(out->flags & SBF_FINAL))
return NULL;
ASSERT(out->runs == 1);
+ *sizep = sbuck_size(out);
return out;
}
+static sh_off_t
+sbuck_ins_or_join(struct sort_bucket *b, cnode *list_pos, struct sort_bucket *join, sh_off_t join_size)
+{
+ if (join)
+ {
+ if (b)
+ sbuck_drop(b);
+ ASSERT(join->runs == 2);
+ join->runs--;
+ return sbuck_size(join) - join_size;
+ }
+ else if (b)
+ {
+ clist_insert_after(&b->n, list_pos);
+ return sbuck_size(b);
+ }
+ else
+ return 0;
+}
+
static void
sorter_join(struct sort_bucket *b)
{
{
struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
cnode *list_pos = b->n.prev;
- struct sort_bucket *join = sbuck_join_to(b);
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
{
if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
{
sorter_stop_timer(ctx, &ctx->total_pre_time);
- SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 2), "Sorted in memory");
- if (join)
- {
- ASSERT(join->runs == 2);
- join->runs--;
- sbuck_drop(ins[0]);
- }
- else
- clist_insert_after(&ins[0]->n, list_pos);
+ sh_off_t size = sbuck_ins_or_join(ins[0], list_pos, join, join_size);
+ SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 2), "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
sbuck_drop(b);
return;
}
do {
++pass;
sorter_start_timer(ctx);
- if (ins[0]->runs == 1 && ins[1]->runs == 1 && join)
+ if (ins[0]->runs <= 1 && ins[1]->runs <= 1 && join)
{
// This is guaranteed to produce a single run, so join if possible
- sh_off_t join_size = sbuck_size(join);
outs[0] = join;
outs[1] = NULL;
ctx->twoway_merge(ctx, ins, outs);
- ASSERT(join->runs == 2);
- join->runs--;
- join_size = sbuck_size(join) - join_size;
+ sh_off_t size = sbuck_ins_or_join(NULL, NULL, join, join_size);
sorter_stop_timer(ctx, &ctx->total_ext_time);
- SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(join_size), sorter_speed(ctx, join_size));
+ SORT_TRACE("Mergesort pass %d (final run, %s, %dMB/s)", pass, stk_fsize(size), sorter_speed(ctx, size));
sbuck_drop(ins[0]);
sbuck_drop(ins[1]);
return;
{
clist parts;
cnode *list_pos = b->n.prev;
- struct sort_bucket *join = sbuck_join_to(b);
+ sh_off_t join_size;
+ struct sort_bucket *join = sbuck_join_to(b, &join_size);
+ uns trace_level = (b->flags & SBF_SOURCE) ? 1 : 2;
clist_init(&parts);
ASSERT(!(sorter_debug & SORT_DEBUG_NO_PRESORT));
- // FIXME: What if the parts will be too small?
SORT_XTRACE(3, "%s", ((b->flags & SBF_CUSTOM_PRESORT) ? "Custom presorting" : "Presorting"));
uns cont;
uns part_cnt = 0;
do
{
struct sort_bucket *p = sbuck_new(ctx);
- clist_add_tail(&parts, &p->n);
cont = sorter_presort(ctx, b, p, (!part_cnt && join) ? join : p);
- part_cnt++;
- total_size += sbuck_size(p);
- sbuck_swap_out(p);
+ if (sbuck_have(p))
+ {
+ part_cnt++;
+ clist_add_tail(&parts, &p->n);
+ total_size += sbuck_size(p);
+ sbuck_swap_out(p);
+ }
+ else
+ sbuck_drop(p);
}
while (cont);
sorter_stop_timer(ctx, &ctx->total_pre_time);
+ sorter_free_buf(ctx);
+ sbuck_drop(b);
- // FIXME: This is way too similar to the two-way case.
- if (part_cnt == 1)
+ if (part_cnt <= 1)
{
- struct sort_bucket *p = clist_head(&parts);
- SORT_XTRACE(((b->flags & SBF_SOURCE) ? 1 : 2), "Sorted in memory");
- if (join)
- {
- ASSERT(join->runs == 2);
- join->runs--;
- sbuck_drop(p);
- }
- else
- clist_insert_after(&p->n, list_pos);
- sbuck_drop(b);
+ sh_off_t size = sbuck_ins_or_join(clist_head(&parts), list_pos, (part_cnt ? NULL : join), join_size);
+ SORT_XTRACE(trace_level, "Sorted in memory (%s, %dMB/s)", stk_fsize(size), sorter_speed(ctx, size));
return;
}
SORT_TRACE("Multi-way presorting pass (%d parts, %s, %dMB/s)", part_cnt, stk_fsize(total_size), sorter_speed(ctx, total_size));
- sbuck_drop(b);
- uns max_ways = 16;
+ uns max_ways = 1 << sorter_max_multiway_bits;
struct sort_bucket *ways[max_ways+1];
SORT_XTRACE(2, "Starting up to %d-way merge", max_ways);
for (;;)
ASSERT(n > 1);
struct sort_bucket *out;
- out = sbuck_new(ctx); // FIXME: No joining so far
+ if (clist_empty(&parts) && join)
+ out = join;
+ else
+ out = sbuck_new(ctx);
sorter_start_timer(ctx);
ctx->multiway_merge(ctx, ways, out);
sorter_stop_timer(ctx, &ctx->total_ext_time);
if (clist_empty(&parts))
{
- clist_insert_after(&out->n, list_pos);
- SORT_TRACE("Multi-way merge completed (%s, %dMB/s)", F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
+ sh_off_t size = sbuck_ins_or_join((join ? NULL : out), list_pos, join, join_size);
+ SORT_TRACE("Multi-way merge completed (%d ways, %s, %dMB/s)", n, stk_fsize(size), sorter_speed(ctx, size));
return;
}
else
{
+ sbuck_swap_out(out);
clist_add_tail(&parts, &out->n);
SORT_TRACE("Multi-way merge pass (%d ways, %s, %dMB/s)", n, F_BSIZE(out), sorter_speed(ctx, sbuck_size(out)));
}
}
}
-static uns
-sorter_radix_bits(struct sort_context *ctx, struct sort_bucket *b)
-{
- if (!b->hash_bits || b->hash_bits < sorter_min_radix_bits ||
- !ctx->radix_split ||
- (b->flags & SBF_CUSTOM_PRESORT) ||
- (sorter_debug & SORT_DEBUG_NO_RADIX))
- return 0;
-
- u64 in = sbuck_size(b);
- u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // FIXME: Magical factor for hash non-uniformity
- if (in <= mem)
- return 0;
-
- uns n = sorter_min_radix_bits;
- while (n < sorter_max_radix_bits && n < b->hash_bits && (in >> n) > mem)
- n++;
- return n;
-}
-
static void
sorter_radix(struct sort_context *ctx, struct sort_bucket *b, uns bits)
{
sbuck_drop(b);
}
+static void
+sorter_decide(struct sort_context *ctx, struct sort_bucket *b)
+{
+ // Drop empty buckets
+ if (!sbuck_have(b))
+ {
+ SORT_XTRACE(3, "Dropping empty bucket");
+ sbuck_drop(b);
+ return;
+ }
+
+ // How many bits of bucket size we have to reduce before it fits in the RAM?
+ // (this is insanely large if the input size is unknown, but it serves our purpose)
+ u64 insize = sbuck_size(b);
+ u64 mem = ctx->internal_estimate(ctx, b) * 0.8; // FIXME: Magical factor for various non-uniformities
+ uns bits = 0;
+ while ((insize >> bits) > mem)
+ bits++;
+
+ // Calculate the possibilities of radix splits
+ uns radix_bits;
+ if (!ctx->radix_split ||
+ (b->flags & SBF_CUSTOM_PRESORT) ||
+ (sorter_debug & SORT_DEBUG_NO_RADIX))
+ radix_bits = 0;
+ else
+ {
+ radix_bits = MIN(bits, b->hash_bits);
+ radix_bits = MIN(radix_bits, sorter_max_radix_bits);
+ if (radix_bits < sorter_min_radix_bits)
+ radix_bits = 0;
+ }
+
+ // The same for multi-way merges
+ uns multiway_bits;
+ if (!ctx->multiway_merge ||
+ (sorter_debug & SORT_DEBUG_NO_MULTIWAY) ||
+ (sorter_debug & SORT_DEBUG_NO_PRESORT))
+ multiway_bits = 0;
+ else
+ {
+ multiway_bits = MIN(bits, sorter_max_multiway_bits);
+ if (multiway_bits < sorter_min_multiway_bits)
+ multiway_bits = 0;
+ }
+
+ SORT_XTRACE(2, "Decisions: size=%s max=%s runs=%d bits=%d hash=%d -> radix=%d multi=%d",
+ stk_fsize(insize), stk_fsize(mem), b->runs, bits, b->hash_bits,
+ radix_bits, multiway_bits);
+
+ // If the input already consists of a single run, just join it
+ if (b->runs)
+ return sorter_join(b);
+
+ // If everything fits in memory, the 2-way strategy will sort it in memory
+ if (!bits)
+ return sorter_twoway(ctx, b);
+
+ // If we can reduce everything in one pass, do so and prefer radix splits
+ if (radix_bits == bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits == bits)
+ return sorter_multiway(ctx, b);
+
+ // Otherwise, reduce as much as possible and again prefer radix splits
+ if (radix_bits)
+ return sorter_radix(ctx, b, radix_bits);
+ if (multiway_bits)
+ return sorter_multiway(ctx, b);
+
+ // Fall back to 2-way strategy if nothing else applies
+ return sorter_twoway(ctx, b);
+}
+
void
sorter_run(struct sort_context *ctx)
{
bout->runs = 1;
clist_add_head(&ctx->bucket_list, &bout->n);
+ // Repeatedly sort buckets
struct sort_bucket *b;
- uns bits;
while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
- {
- SORT_XTRACE(2, "Next block: %s, %d hash bits", F_BSIZE(b), b->hash_bits);
- if (!sbuck_have(b))
- sbuck_drop(b);
- else if (b->runs == 1)
- sorter_join(b);
- else if (ctx->multiway_merge && !(sorter_debug & (SORT_DEBUG_NO_MULTIWAY | SORT_DEBUG_NO_PRESORT))) // FIXME: Just kidding...
- sorter_multiway(ctx, b);
- else if (bits = sorter_radix_bits(ctx, b))
- sorter_radix(ctx, b, bits);
- else
- sorter_twoway(ctx, b);
- }
+ sorter_decide(ctx, b);
sorter_free_buf(ctx);
sbuck_write(bout); // Force empty bucket to a file