o Mode with only 2-way unification?
o Speed up 2-way merge.
o Speed up radix splitting.
+o A debug switch for disabling the presorter.
#define ASORT_EXTRA_ARGS , P(key) *ary
#include "lib/arraysort.h"
+static int P(internal_num_keys)(struct sort_context *ctx)
+{
+ size_t bufsize = ctx->big_buf_half_size;
+#ifdef SORT_UNIFY
+ // When we promise unification, we have to reduce the number of records
+ // to be sure that both pointers and merged records fit in the 2nd half
+ // of the big_buf. So we eat as much memory as s-internal.h, but still
+ // we are faster.
+ u64 maxkeys = bufsize / (sizeof(P(key)) + sizeof(void *));
+#else
+ u64 maxkeys = bufsize / sizeof(P(key));
+#endif
+ return MIN(maxkeys, ~0U); // The number of records must fit in uns
+}
+
static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
{
sorter_alloc_buf(ctx);
struct fastbuf *in = sbuck_read(bin);
P(key) *buf = ctx->big_buf;
- size_t bufsize = ctx->big_buf_half_size;
-#ifdef CPU_64BIT_POINTERS
- bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(key))); // The number of records must fit in uns
-#endif
- uns maxkeys = bufsize / sizeof(P(key));
+ uns maxkeys = P(internal_num_keys)(ctx);
SORT_XTRACE(3, "s-fixint: Reading (maxkeys=%u)", maxkeys);
uns n = 0;
while (n < maxkeys && P(read_key)(in, &buf[n]))
n++;
+ if (!n)
+ return 0;
SORT_XTRACE(3, "s-fixint: Sorting %u items", n);
P(array_sort)(n, buf);
#ifdef SORT_UNIFY
if (i < n-1 && !P(compare)(&buf[i], &buf[i+1]))
{
- ASSERT(0); /* FIXME: Implement */
+ P(key) **keys = ctx->big_buf_half;
+ uns n = 2;
+ keys[0] = &buf[i];
+ keys[1] = &buf[i+1];
+ while (!P(compare)(&buf[i], &buf[i+n]))
+ {
+ keys[n] = &buf[i+n];
+ n++;
+ }
+ P(write_merged)(out, keys, NULL, n, keys+n);
+ merged += n - 1;
+ i += n - 1;
continue;
}
#endif
static u64
P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
{
- return ctx->big_buf_half_size - 1; // -1 since if the buffer is full, we don't recognize EOF
+ return P(internal_num_keys)(ctx) * sizeof(P(key)) - 1; // -1 since if the buffer is full, we don't recognize EOF
}
return (byte *) key + ksize;
}
+static size_t P(internal_buf_size)(struct sort_context *ctx)
+{
+ size_t bufsize = ctx->big_buf_half_size; /* FIXME: In some cases, we can use the whole buffer */
+#ifdef CPU_64BIT_POINTERS
+ bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(internal_item_t))); // The number of records must fit in uns
+#endif
+ return bufsize;
+}
+
static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only)
{
sorter_alloc_buf(ctx);
else if (!P(read_key)(in, &key))
return 0;
+ size_t bufsize = P(internal_buf_size)(ctx);
#ifdef SORT_VAR_DATA
- if (sizeof(key) + 1024 + SORT_DATA_SIZE(key) > ctx->big_buf_half_size)
+ if (sizeof(key) + 1024 + SORT_DATA_SIZE(key) > bufsize)
{
SORT_XTRACE(3, "s-internal: Generating a giant run");
- struct fastbuf *out = sbuck_write(bout); /* FIXME: Using a non-direct buffer would be nice here */
+ struct fastbuf *out = sbuck_write(bout);
P(copy_data)(&key, in, out);
bout->runs++;
return 1; // We don't know, but 1 is always safe
}
#endif
- size_t bufsize = ctx->big_buf_half_size; /* FIXME: In some cases, we can use the whole buffer */
-#ifdef CPU_64BIT_POINTERS
- bufsize = MIN((u64)bufsize, (u64)~0U * sizeof(P(internal_item_t))); // The number of records must fit in uns
-#endif
-
SORT_XTRACE(3, "s-internal: Reading (bufsize=%zd)", bufsize);
P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
byte *end = (byte *) ctx->big_buf + bufsize;
static u64
P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
{
- uns avg;
#ifdef SORT_VAR_KEY
- avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN); // Wild guess...
+ uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN); // Wild guess...
#else
- avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
+ uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
#endif
// We ignore the data part of records, it probably won't make the estimate much worse
- return (ctx->big_buf_half_size / (avg + sizeof(P(internal_item_t))) * avg);
+ return (P(internal_buf_size)(ctx) / (avg + sizeof(P(internal_item_t))) * avg);
}
#endif
}
-#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY)
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA)
#include "lib/sorter/s-internal.h"
#else
#include "lib/sorter/s-fixint.h"