X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fsorter%2Fsorter.h;h=cd5e843f84bd0930d11e18ba7879452ca1fd83e6;hb=66b0f5843d272887ceb09b0d2d339f3cec10eca7;hp=fe6cefc11f28b67df6c4d92e9f95704441fccce6;hpb=5ff0a5b047624c8f215da9cd6c7182d609fcbd01;p=libucw.git diff --git a/lib/sorter/sorter.h b/lib/sorter/sorter.h index fe6cefc1..cd5e843f 100644 --- a/lib/sorter/sorter.h +++ b/lib/sorter/sorter.h @@ -52,28 +52,34 @@ * SORT_HASH_BITS signals that a monotone hashing function returning a given number of * bits is available. Monotone hash is a function f such that f(x) < f(y) * implies x < y and which is approximately uniformly distributed. - * uns PREFIX_hash(SORT_KEY *a, SORT_KEY *b) + * uns PREFIX_hash(SORT_KEY *a) * * Unification: * - * SORT_MERGE merge items with identical keys, needs the following functions: - * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, uns n, byte *buf) + * SORT_UNIFY merge items with identical keys, needs the following functions: + * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf) * takes n records in memory with keys which compare equal and writes - * a single record to the given fastbuf. Data for each key can - * be accessed by the SORT_GET_DATA(*key) macro. `buf' points - * to a buffer which is guaranteed to hold all given records. + * a single record to the given fastbuf. `buf' points to a buffer which + * is guaranteed to hold the sum of workspace requirements (see below) + * over all given records. * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest) * takes n records with keys in memory and data in fastbufs and writes - * a single record. + * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE is defined. + * SORT_UNIFY_WORKSPACE(key) gets a key and returns the amount of workspace required when merging + * the given record. Defaults to 0. * * Input (choose one of these): * * SORT_INPUT_FILE file of a given name - * SORT_INPUT_FB fastbuf stream - * SORT_INPUT_PRESORT custom presorter: call function PREFIX_presorter (see below) - * to get successive batches of pre-sorted data as temporary - * fastbuf streams or NULL if no more data is available. + * SORT_INPUT_FB seekable fastbuf stream + * SORT_INPUT_PIPE non-seekable fastbuf stream + * SORT_INPUT_PRESORT custom presorter. Calls function + * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize); + * to get successive batches of pre-sorted data. * The function is passed a page-aligned presorting buffer. + * It returns 1 on success or 0 on EOF. + * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted + * as soon as possible. * * Output (chose one of these): * @@ -85,14 +91,9 @@ * * SORT_UNIQUE all items have distinct keys (checked in debug mode) * - * FIXME: Maybe implement these: - * ??? SORT_DELETE_INPUT a C expression, if true, the input files are - * deleted as soon as possible - * ??? SORT_ALIGNED - * * The function generated: * - * PREFIX_SORT(, [,]), where: + * PREFIX_sort(, [,]), where: * = input file name/fastbuf or NULL * = output file name/fastbuf or NULL * = maximum integer value for the SORT_INT mode @@ -143,7 +144,7 @@ static inline int P(hash) (P(key) *x) #endif #endif -#ifdef SORT_MERGE +#ifdef SORT_UNIFY #define LESS < #else #define LESS <= @@ -154,19 +155,47 @@ static inline int P(hash) (P(key) *x) #define SORT_ASSERT_UNIQUE #endif +#ifdef SORT_KEY_SIZE +#define SORT_VAR_KEY +#else +#define SORT_KEY_SIZE(key) sizeof(key) +#endif + +#ifdef SORT_DATA_SIZE +#define SORT_VAR_DATA +#else +#define SORT_DATA_SIZE(key) 0 +#endif + static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out) { - bwrite(out, key, sizeof(P(key))); -#ifdef SORT_DATA_SIZE + P(write_key)(out, key); +#ifdef SORT_VAR_DATA bbcopy(in, out, SORT_DATA_SIZE(*key)); #else (void) in; #endif } +#if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE) +static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest) +{ + P(write_merged)(dest, keys, NULL, n, NULL); +} +#endif + +#if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE) #include "lib/sorter/s-internal.h" +#else +#include "lib/sorter/s-fixint.h" +#endif + #include "lib/sorter/s-twoway.h" +#if defined(SORT_HASH_BITS) || defined(SORT_INT) +#include "lib/sorter/s-radix.h" +#endif + static struct fastbuf *P(sort)( #ifdef SORT_INPUT_FILE byte *in, @@ -188,14 +217,24 @@ static struct fastbuf *P(sort)( #ifdef SORT_INPUT_FILE ctx.in_fb = bopen(in, O_RDONLY, sorter_stream_bufsize); + ctx.in_size = bfilesize(ctx.in_fb); #elif defined(SORT_INPUT_FB) ctx.in_fb = in; + ctx.in_size = bfilesize(in); +#elif defined(SORT_INPUT_PIPE) + ctx.in_fb = in; + ctx.in_size = ~(u64)0; #elif defined(SORT_INPUT_PRESORT) ASSERT(!in); - ctx.custom_presort = P(presorter); + ctx.custom_presort = P(presort); + ctx.in_size = ~(u64)0; #else #error No input given. #endif +#ifdef SORT_DELETE_INPUT + if (SORT_DELETE_INPUT) + bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1); +#endif #ifdef SORT_OUTPUT_FB ASSERT(!out); @@ -209,13 +248,16 @@ static struct fastbuf *P(sort)( #ifdef SORT_HASH_BITS ctx.hash_bits = SORT_HASH_BITS; + ctx.radix_split = P(radix_split); #elif defined(SORT_INT) ctx.hash_bits = 0; while (ctx.hash_bits < 32 && (int_range >> ctx.hash_bits)) ctx.hash_bits++; + ctx.radix_split = P(radix_split); #endif ctx.internal_sort = P(internal); + ctx.internal_estimate = P(internal_estimate); ctx.twoway_merge = P(twoway_merge); sorter_run(&ctx); @@ -230,13 +272,17 @@ static struct fastbuf *P(sort)( return ctx.out_fb; } +#undef SORT_PREFIX #undef SORT_KEY #undef SORT_KEY_REGULAR #undef SORT_KEY_SIZE #undef SORT_DATA_SIZE +#undef SORT_VAR_KEY +#undef SORT_VAR_DATA #undef SORT_INT #undef SORT_HASH_BITS -#undef SORT_MERGE +#undef SORT_UNIFY +#undef SORT_UNIFY_WORKSPACE #undef SORT_INPUT_FILE #undef SORT_INPUT_FB #undef SORT_INPUT_PRESORT @@ -245,6 +291,7 @@ static struct fastbuf *P(sort)( #undef SORT_OUTPUT_THIS_FB #undef SORT_UNIQUE #undef SORT_ASSERT_UNIQUE +#undef SORT_DELETE_INPUT #undef SWAP #undef LESS #undef P