extern u64 sorter_bufsize;
#define SORT_TRACE(x...) do { if (sorter_trace) log(L_DEBUG, x); } while(0)
-#define SORT_XTRACE(x...) do { if (sorter_trace > 1) log(L_DEBUG, x); } while(0)
+#define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) log(L_DEBUG, x); } while(0)
enum sort_debug {
SORT_DEBUG_NO_PRESORT = 1,
struct fastbuf *sbuck_write(struct sort_bucket *b);
void sbuck_swap_out(struct sort_bucket *b);
+#define F_SIZE(x) ({ byte buf[16]; format_size(buf, x); buf; })
+#define F_BSIZE(b) F_SIZE(sbuck_size(b))
+void format_size(byte *buf, u64 x);
+
#endif
#include <string.h>
-static int sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
+static int
+sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
{
sorter_alloc_buf(ctx);
if (in->flags & SBF_CUSTOM_PRESORT)
{
// The final bucket doesn't have any file associated yet, so replace
// it with the new bucket.
- SORT_XTRACE("Replaced final bucket");
+ SORT_XTRACE(2, "Replaced final bucket");
b->flags |= SBF_FINAL;
sbuck_drop(join);
}
else
{
- SORT_TRACE("Copying %jd bytes to output file", (uintmax_t) sbuck_size(b));
+ SORT_TRACE("Copying to output file: %s", F_BSIZE(b));
struct fastbuf *src = sbuck_read(b);
struct fastbuf *dest = sbuck_write(join);
bbcopy(src, dest, ~0U);
if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
{
- SORT_TRACE("Presorting");
+ SORT_XTRACE(2, "Presorting");
ins[0] = sbuck_new(ctx);
if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
{
- SORT_XTRACE("Sorted in memory");
+ SORT_TRACE("Sorted in memory");
if (join)
sbuck_drop(ins[0]);
else
}
else
{
- SORT_TRACE("Skipped presorting");
+ SORT_XTRACE(2, "Presorting disabled");
ins[0] = b;
}
- SORT_TRACE("Main sorting");
+ SORT_XTRACE(2, "Main sorting");
+ uns pass = 0;
do {
+ ++pass;
if (ins[0]->runs == 1 && ins[1]->runs == 1 && join)
{
// This is guaranteed to produce a single run, so join if possible
ctx->twoway_merge(ctx, ins, outs);
ASSERT(outs[0]->runs == 2);
outs[0]->runs--;
- SORT_TRACE("Pass done (joined final run)");
+ SORT_TRACE("Mergesort pass %d (final run, %s)", pass, F_BSIZE(outs[0]));
sbuck_drop(ins[0]);
sbuck_drop(ins[1]);
return;
outs[1] = sbuck_new(ctx);
outs[2] = NULL;
ctx->twoway_merge(ctx, ins, outs);
- SORT_TRACE("Pass done (%d+%d runs, %jd+%jd bytes)", outs[0]->runs, outs[1]->runs, (uintmax_t) sbuck_size(outs[0]), (uintmax_t) sbuck_size(outs[1]));
+ SORT_TRACE("Mergesort pass %d (%d+%d runs, %s+%s)", pass, outs[0]->runs, outs[1]->runs, F_BSIZE(outs[0]), F_BSIZE(outs[1]));
sbuck_drop(ins[0]);
sbuck_drop(ins[1]);
memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
sorter_free_buf(ctx);
sbuck_write(bout); // Force empty bucket to a file
- SORT_XTRACE("Final size: %jd", (uintmax_t) sbuck_size(bout));
+ SORT_XTRACE(2, "Final size: %s", F_BSIZE(bout));
ctx->out_fb = sbuck_read(bout);
}
#ifdef SORT_VAR_DATA
if (sizeof(key) + 1024 + SORT_DATA_SIZE(key) > ctx->big_buf_half_size)
{
- SORT_XTRACE("s-internal: Generating a giant run");
+ SORT_XTRACE(3, "s-internal: Generating a giant run");
struct fastbuf *out = sbuck_write(bout); /* FIXME: Using a non-direct buffer would be nice here */
P(copy_data)(&key, in, out);
bout->runs++;
bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(internal_item_t))); // The number of records must fit in uns
#endif
- SORT_XTRACE("s-internal: Reading (bufsize=%zd)", bufsize);
+ SORT_XTRACE(3, "s-internal: Reading (bufsize=%zd)", bufsize);
P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
byte *end = (byte *) ctx->big_buf + bufsize;
do
last_item = item;
uns count = last_item - item_array;
- SORT_XTRACE("s-internal: Sorting %d items", count);
+ SORT_XTRACE(3, "s-internal: Sorting %d items", count);
P(array_sort)(count, item_array);
- SORT_XTRACE("s-internal: Writing");
+ SORT_XTRACE(3, "s-internal: Writing");
if (!ctx->more_keys)
bout = bout_only;
struct fastbuf *out = sbuck_write(bout);
#endif
}
#ifdef SORT_UNIFY
- SORT_XTRACE("Merging reduced %d records", merged);
+ SORT_XTRACE(3, "Merging reduced %d records", merged);
#endif
return ctx->more_keys;
bseek(b->fb, 0, SEEK_END);
bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1); /* FIXME: Was it always so? */
b->flags &= ~SBF_SWAPPED_OUT;
- SORT_XTRACE("Swapped in %s", b->filename);
+ SORT_XTRACE(2, "Swapped in %s", b->filename);
}
}
bclose(b->fb);
b->fb = NULL;
b->flags |= SBF_SWAPPED_OUT;
- SORT_XTRACE("Swapped out %s", b->filename);
+ SORT_XTRACE(2, "Swapped out %s", b->filename);
}
}
ctx->big_buf_size = 2*bs;
ctx->big_buf_half = ((byte*) ctx->big_buf) + bs;
ctx->big_buf_half_size = bs;
- SORT_XTRACE("Allocated sorting buffer (%jd bytes)", (uintmax_t) bs);
+ SORT_XTRACE(2, "Allocated sorting buffer (%jd bytes)", (uintmax_t) bs);
}
void
return;
big_free(ctx->big_buf, ctx->big_buf_size);
ctx->big_buf = NULL;
- SORT_XTRACE("Freed sorting buffer");
+ SORT_XTRACE(2, "Freed sorting buffer");
+}
+
+void
+format_size(byte *buf, u64 x)
+{
+ if (x < 10<<10)
+ sprintf(buf, "%.1fK", (double)x/(1<<10));
+ else if (x < 1<<20)
+ sprintf(buf, "%dK", (int)(x/(1<<10)));
+ else if (x < 10<<20)
+ sprintf(buf, "%.1fM", (double)x/(1<<20));
+ else if (x < 1<<30)
+ sprintf(buf, "%dM", (int)(x/(1<<20)));
+ else if (x < (u64)10<<30)
+ sprintf(buf, "%.1fG", (double)x/(1<<30));
+ else
+ sprintf(buf, "%dG", (int)(x/(1<<30)));
}
#include "lib/lib.h"
#include "lib/getopt.h"
+#include "lib/conf.h"
#include "lib/fastbuf.h"
#include "lib/hashfunc.h"
#include "lib/md5.h"
#include <string.h>
#include <fcntl.h>
+/*** Time measurement ***/
+
+static void
+start(void)
+{
+ init_timer();
+}
+
+static void
+stop(void)
+{
+ log(L_INFO, "Test took %.3fs", get_timer() / 1000.);
+}
+
/*** Simple 4-byte integer keys ***/
struct key1 {
#include "lib/sorter/sorter.h"
static void
-test_int(int mode, uns N)
+test_int(int mode, u64 size)
{
- N = nextprime(N);
+ uns N = nextprime(MIN(size/4, 0xffff0000));
uns K = N/4*3;
- log(L_INFO, "Integers (%s, N=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ log(L_INFO, ">>> Integers (%s, N=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
struct fastbuf *f = bopen_tmp(65536);
for (uns i=0; i<N; i++)
bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
brewind(f);
- log(L_INFO, "Sorting");
+ start();
f = s1_sort(f, NULL, N-1);
+ stop();
- log(L_INFO, "Verifying");
+ SORT_XTRACE(2, "Verifying");
for (uns i=0; i<N; i++)
{
uns j = bgetl(f);
#include "lib/sorter/sorter.h"
static void
-test_counted(int mode, uns N)
+test_counted(int mode, u64 size)
{
- N = nextprime(N/4);
+ u64 items = size / sizeof(struct key2);
+ uns mult = 2;
+ while (items/(2*mult) > 0xffff0000)
+ mult++;
+ uns N = nextprime(items/(2*mult));
uns K = N/4*3;
- log(L_INFO, "Counted integers (%s, N=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ log(L_INFO, ">>> Counted integers (%s, N=%d, mult=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
struct fastbuf *f = bopen_tmp(65536);
- for (uns i=0; i<2*N; i++)
- for (uns j=0; j<2; j++)
- {
- bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
- bputl(f, 1);
- }
+ for (uns m=0; m<mult; m++)
+ for (uns i=0; i<N; i++)
+ for (uns j=0; j<2; j++)
+ {
+ bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
+ bputl(f, 1);
+ }
brewind(f);
- log(L_INFO, "Sorting");
+ start();
f = s2_sort(f, NULL, N-1);
+ stop();
- log(L_INFO, "Verifying");
+ SORT_XTRACE(2, "Verifying");
for (uns i=0; i<N; i++)
{
uns j = bgetl(f);
if (i != j)
die("Discrepancy: %d instead of %d", j, i);
uns k = bgetl(f);
- if (k != 4)
- die("Discrepancy: %d has count %d instead of 4", j, k);
+ if (k != 2*mult)
+ die("Discrepancy: %d has count %d instead of %d", j, k, mult);
}
bclose(f);
}
}
static void
-test_hashes(int mode, uns N)
+test_hashes(int mode, u64 size)
{
- log(L_INFO, "Hashes (%s, N=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
+ uns N = MIN(size / sizeof(struct key3), 0xffffffff);
+ log(L_INFO, ">>> Hashes (%s, N=%d)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
struct key3 k, lastk;
struct fastbuf *f = bopen_tmp(65536);
}
brewind(f);
- log(L_INFO, "Sorting");
+ start();
f = s3_sort(f, NULL);
+ stop();
- log(L_INFO, "Verifying");
+ SORT_XTRACE(2, "Verifying");
for (uns i=0; i<N; i++)
{
int ok = breadb(f, &k, sizeof(k));
}
static void
-test_strings(uns mode, uns N)
+test_strings(uns mode, u64 size)
{
- log(L_INFO, "Strings %s(N=%d)", (mode ? "with data " : ""), N);
+ uns avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
+ uns N = MIN(size / avg_item_size, 0xffffffff);
+ log(L_INFO, ">>> Strings %s(N=%d)", (mode ? "with data " : ""), N);
srand(1);
struct key4 k, lastk;
}
brewind(f);
- log(L_INFO, "Sorting");
+ start();
f = (mode ? s4b_sort : s4_sort)(f, NULL);
+ stop();
- log(L_INFO, "Verifying");
+ SORT_XTRACE(2, "Verifying");
for (uns i=0; i<N; i++)
{
int ok = s4_read_key(f, &k);
bclose(f);
}
+static void
+run_test(uns i, u64 size)
+{
+ switch (i)
+ {
+ case 0:
+ test_int(0, size); break;
+ case 1:
+ test_int(1, size); break;
+ case 2:
+ test_int(2, size); break;
+ case 3:
+ test_counted(0, size); break;
+ case 4:
+ test_counted(1, size); break;
+ case 5:
+ test_counted(2, size); break;
+ case 6:
+ test_hashes(0, size); break;
+ case 7:
+ test_hashes(1, size); break;
+ case 8:
+ test_hashes(2, size); break;
+ case 9:
+ test_strings(0, size); break;
+ case 10:
+ test_strings(1, size); break;
+#define TMAX 11
+ }
+}
+
int
main(int argc, char **argv)
{
log_init(NULL);
- if (cf_getopt(argc, argv, CF_SHORT_OPTS, CF_NO_LONG_OPTS, NULL) >= 0 ||
- optind != argc)
- {
- fputs("This program supports only the following command-line arguments:\n" CF_USAGE, stderr);
- exit(1);
- }
-
- uns N = 100000;
-#if 0
- test_int(0, N);
- test_int(1, N);
- test_int(2, N);
- test_counted(0, N);
- test_counted(1, N);
- test_counted(2, N);
- test_hashes(0, N);
- test_hashes(1, N);
- test_hashes(2, N);
- test_strings(0, N);
-#endif
- test_strings(1, N);
+ int c;
+ u64 size = 10000000;
+ uns t = ~0;
+
+ while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
+ switch (c)
+ {
+ case 's':
+ if (cf_parse_u64(optarg, &size))
+ goto usage;
+ break;
+ case 't':
+ t = atol(optarg);
+ if (t >= TMAX)
+ goto usage;
+ break;
+ case 'v':
+ sorter_trace++;
+ break;
+ default:
+ usage:
+ fputs("Usage: sort-test [-s <size>] [-t <test>]\n", stderr);
+ exit(1);
+ }
+ if (optind != argc)
+ goto usage;
+
+ if (t != ~0U)
+ run_test(t, size);
+ else
+ for (uns i=0; i<TMAX; i++)
+ run_test(i, size);
return 0;
}