* is supplied automatically and the sorting function gets an extra
* parameter specifying a range of the integers. The better the range
* fits, the faster we sort. Sets up SORT_HASH_xxx automatically.
+ * SORT_INT64(key) the same for 64-bit integers.
*
* Hashing (optional, but it can speed sorting up):
*
* SORT_HASH_BITS signals that a monotone hashing function returning a given number of
* bits is available. Monotone hash is a function f such that f(x) < f(y)
* implies x < y and which is approximately uniformly distributed.
- * uns PREFIX_hash(SORT_KEY *a, SORT_KEY *b)
+ * uns PREFIX_hash(SORT_KEY *a)
*
* Unification:
*
* SORT_UNIFY merge items with identical keys, needs the following functions:
- * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, uns n, byte *buf)
+ * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
* takes n records in memory with keys which compare equal and writes
- * a single record to the given fastbuf. Data for each key can
- * be accessed by the SORT_GET_DATA(*key) macro. `buf' points
- * to a buffer which is guaranteed to hold all given records.
+ * a single record to the given fastbuf. `buf' points to a buffer which
+ * is guaranteed to hold the sum of workspace requirements (see below)
+ * over all given records. The function is allowed to modify all its inputs.
* void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
* takes n records with keys in memory and data in fastbufs and writes
- * a single record.
+ * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE is defined.
+ * SORT_UNIFY_WORKSPACE(key)
+ * gets a key and returns the amount of workspace required when merging
+ * the given record. Defaults to 0.
*
* Input (choose one of these):
*
* SORT_INPUT_FILE file of a given name
- * SORT_INPUT_FB fastbuf stream
+ * SORT_INPUT_FB seekable fastbuf stream
+ * SORT_INPUT_PIPE non-seekable fastbuf stream
* SORT_INPUT_PRESORT custom presorter. Calls function
- * int PREFIX_presort(struct fastbuf *dest, byte *buf, size_t bufsize);
+ * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
* to get successive batches of pre-sorted data.
* The function is passed a page-aligned presorting buffer.
* It returns 1 on success or 0 on EOF.
+ * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
+ * as soon as possible.
*
* Output (chose one of these):
*
*
* SORT_UNIQUE all items have distinct keys (checked in debug mode)
*
- * FIXME: Maybe implement these:
- * ??? SORT_DELETE_INPUT a C expression, if true, the input files are
- * deleted as soon as possible
- * ??? SORT_ALIGNED
- *
* The function generated:
*
- * <outfb> PREFIX_SORT(<in>, <out> [,<range>]), where:
+ * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
* <in> = input file name/fastbuf or NULL
* <out> = output file name/fastbuf or NULL
* <range> = maximum integer value for the SORT_INT mode
#error Missing definition of sorting key.
#endif
+#ifdef SORT_INT64
+typedef u64 P(hash_t);
+#define SORT_INT SORT_INT64
+#else
+typedef uns P(hash_t);
+#endif
+
#ifdef SORT_INT
static inline int P(compare) (P(key) *x, P(key) *y)
{
}
#ifndef SORT_HASH_BITS
-static inline int P(hash) (P(key) *x)
+static inline P(hash_t) P(hash) (P(key) *x)
{
return SORT_INT((*x));
}
static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
{
- bwrite(out, key, sizeof(P(key)));
+ P(write_key)(out, key);
#ifdef SORT_VAR_DATA
bbcopy(in, out, SORT_DATA_SIZE(*key));
#else
#endif
}
+#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
+static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
+{
+ P(write_merged)(dest, keys, NULL, n, NULL);
+}
+#endif
+
+#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
#include "lib/sorter/s-internal.h"
+#else
+#include "lib/sorter/s-fixint.h"
+#endif
+
#include "lib/sorter/s-twoway.h"
+#if defined(SORT_HASH_BITS) || defined(SORT_INT)
+#include "lib/sorter/s-radix.h"
+#endif
+
static struct fastbuf *P(sort)(
#ifdef SORT_INPUT_FILE
byte *in,
struct fastbuf *out
#endif
#ifdef SORT_INT
- , uns int_range
+ , u64 int_range
#endif
)
{
bzero(&ctx, sizeof(ctx));
#ifdef SORT_INPUT_FILE
- ctx.in_fb = bopen(in, O_RDONLY, sorter_stream_bufsize);
+ ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
+ ctx.in_size = bfilesize(ctx.in_fb);
#elif defined(SORT_INPUT_FB)
ctx.in_fb = in;
+ ctx.in_size = bfilesize(in);
+#elif defined(SORT_INPUT_PIPE)
+ ctx.in_fb = in;
+ ctx.in_size = ~(u64)0;
#elif defined(SORT_INPUT_PRESORT)
ASSERT(!in);
ctx.custom_presort = P(presort);
+ ctx.in_size = ~(u64)0;
#else
#error No input given.
#endif
+#ifdef SORT_DELETE_INPUT
+ if (SORT_DELETE_INPUT)
+ bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
+#endif
#ifdef SORT_OUTPUT_FB
ASSERT(!out);
#ifdef SORT_HASH_BITS
ctx.hash_bits = SORT_HASH_BITS;
+ ctx.radix_split = P(radix_split);
#elif defined(SORT_INT)
ctx.hash_bits = 0;
- while (ctx.hash_bits < 32 && (int_range >> ctx.hash_bits))
+ while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
ctx.hash_bits++;
+ ctx.radix_split = P(radix_split);
#endif
ctx.internal_sort = P(internal);
+ ctx.internal_estimate = P(internal_estimate);
ctx.twoway_merge = P(twoway_merge);
sorter_run(&ctx);
return ctx.out_fb;
}
+#undef SORT_PREFIX
#undef SORT_KEY
#undef SORT_KEY_REGULAR
#undef SORT_KEY_SIZE
#undef SORT_VAR_KEY
#undef SORT_VAR_DATA
#undef SORT_INT
+#undef SORT_INT64
#undef SORT_HASH_BITS
#undef SORT_UNIFY
+#undef SORT_UNIFY_WORKSPACE
#undef SORT_INPUT_FILE
#undef SORT_INPUT_FB
#undef SORT_INPUT_PRESORT
#undef SORT_OUTPUT_THIS_FB
#undef SORT_UNIQUE
#undef SORT_ASSERT_UNIQUE
+#undef SORT_DELETE_INPUT
#undef SWAP
#undef LESS
#undef P