2 * UCW Library -- Universal Sorter
4 * (c) 2001--2007 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 * This is not a normal header file, but a generator of sorting
13 * routines. Each time you include it with parameters set in the
14 * corresponding preprocessor macros, it generates a file sorter
15 * with the parameters given.
17 * The sorter operates on fastbufs containing sequences of items. Each item
18 * consists of a key, optionally followed by data. The keys are represented
19 * by fixed-size structures of type SORT_KEY internally, if this format differs
20 * from the on-disk format, explicit reading and writing routines can be provided.
21 * The data are always copied verbatim, unless the sorter is in the merging
22 * mode in which it calls callbacks for merging of items with equal keys.
24 * All callbacks must be thread-safe.
26 * Basic parameters and callbacks:
28 * SORT_PREFIX(x) add a name prefix (used on all global names defined by the sorter)
30 * SORT_KEY data type capable of holding a single key in memory (the on-disk
31 * representation can be different). Alternatively, you can use:
32 * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
33 * in this case, bread() and bwrite() is used to read/write keys
34 * and it's also assumed that the keys are not very long.
35 * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
36 * compares two keys, returns result like strcmp(). Mandatory.
37 * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
38 * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
39 * Mandatory unless SORT_KEY_REGULAR is defined.
40 * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
41 * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
43 * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
44 * can be truncated to this number of bytes without any harm;
45 * used to save memory when the keys have variable sizes).
46 * Default: always store the whole SORT_KEY.
47 * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
48 * Default: records consist of keys only.
52 * SORT_INT(key) we are sorting by an integer value returned by this macro.
53 * In this mode, PREFIX_compare is supplied automatically and the sorting
54 * function gets an extra parameter specifying the range of the integers.
55 * The better the range fits, the faster we sort.
56 * Sets up SORT_HASH_xxx automatically.
57 * SORT_INT64(key) the same for 64-bit integers.
59 * Hashing (optional, but it can speed sorting up):
61 * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
62 * bits is available. A monotone hash is a function f from keys to integers
63 * such that f(x) < f(y) implies x < y, which is approximately uniformly
64 * distributed. It should be declared as:
65 * uns PREFIX_hash(SORT_KEY *a)
69 * SORT_UNIFY merge items with identical keys. It requires the following functions:
70 * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
71 * takes n records in memory with keys which compare equal and writes
72 * a single record to the given fastbuf. `buf' points to a buffer which
73 * is guaranteed to hold the sum of workspace requirements (see below)
74 * over all given records. The function is allowed to modify all its inputs.
75 * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
76 * takes n records with keys in memory and data in fastbufs and writes
77 * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE
79 * SORT_UNIFY_WORKSPACE(key)
80 * gets a key and returns the amount of workspace required when merging
81 * the given record. Defaults to 0.
83 * Input (choose one of these):
85 * SORT_INPUT_FILE file of a given name
86 * SORT_INPUT_FB seekable fastbuf stream
87 * SORT_INPUT_PIPE non-seekable fastbuf stream
88 * SORT_INPUT_PRESORT custom presorter. Calls function
89 * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
90 * to get successive batches of pre-sorted data.
91 * The function is passed a page-aligned presorting buffer.
92 * It returns 1 on success or 0 on EOF.
93 * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
94 * as soon as possible.
96 * Output (chose one of these):
98 * SORT_OUTPUT_FILE file of a given name
99 * SORT_OUTPUT_FB temporary fastbuf stream
100 * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain some data
104 * SORT_UNIQUE all items have distinct keys (checked in debug mode)
106 * The function generated:
108 * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
109 * <in> = input file name/fastbuf or NULL
110 * <out> = output file name/fastbuf or NULL
111 * <range> = maximum integer value for the SORT_INT mode
113 * After including this file, all parameter macros are automatically
117 #include "ucw/sorter/common.h"
118 #include "ucw/fastbuf.h"
122 #define P(x) SORT_PREFIX(x)
124 #ifdef SORT_KEY_REGULAR
125 typedef SORT_KEY_REGULAR P(key);
126 static inline int P(read_key) (struct fastbuf *f, P(key) *k)
128 return breadb(f, k, sizeof(P(key)));
130 static inline void P(write_key) (struct fastbuf *f, P(key) *k)
132 bwrite(f, k, sizeof(P(key)));
134 #elif defined(SORT_KEY)
135 typedef SORT_KEY P(key);
137 #error Missing definition of sorting key.
141 typedef u64 P(hash_t);
142 #define SORT_INT SORT_INT64
143 #define SORT_LONG_HASH
145 typedef uns P(hash_t);
149 static inline int P(compare) (P(key) *x, P(key) *y)
151 if (SORT_INT(*x) < SORT_INT(*y))
153 if (SORT_INT(*x) > SORT_INT(*y))
158 #ifndef SORT_HASH_BITS
159 static inline P(hash_t) P(hash) (P(key) *x)
161 return SORT_INT((*x));
171 #define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
173 #if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
174 #define SORT_ASSERT_UNIQUE
180 #define SORT_KEY_SIZE(key) sizeof(key)
183 #ifdef SORT_DATA_SIZE
184 #define SORT_VAR_DATA
186 #define SORT_DATA_SIZE(key) 0
189 static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
191 P(write_key)(out, key);
193 bbcopy(in, out, SORT_DATA_SIZE(*key));
199 #if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
200 static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
202 P(write_merged)(dest, keys, NULL, n, NULL);
206 #if defined(SORT_HASH_BITS) || defined(SORT_INT)
207 #define SORT_INTERNAL_RADIX
208 #include "ucw/sorter/s-radix.h"
211 #if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
212 #include "ucw/sorter/s-internal.h"
214 #include "ucw/sorter/s-fixint.h"
217 #include "ucw/sorter/s-twoway.h"
218 #include "ucw/sorter/s-multiway.h"
220 static struct fastbuf *P(sort)(
221 #ifdef SORT_INPUT_FILE
226 #ifdef SORT_OUTPUT_FILE
236 struct sort_context ctx;
237 bzero(&ctx, sizeof(ctx));
239 #ifdef SORT_INPUT_FILE
240 ctx.in_fb = bopen_file(in, O_RDONLY, &sorter_fb_params);
241 ctx.in_size = bfilesize(ctx.in_fb);
242 #elif defined(SORT_INPUT_FB)
244 ctx.in_size = bfilesize(in);
245 #elif defined(SORT_INPUT_PIPE)
247 ctx.in_size = ~(u64)0;
248 #elif defined(SORT_INPUT_PRESORT)
250 ctx.custom_presort = P(presort);
251 ctx.in_size = ~(u64)0;
253 #error No input given.
255 #ifdef SORT_DELETE_INPUT
256 if (SORT_DELETE_INPUT)
257 bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
260 #ifdef SORT_OUTPUT_FB
262 #elif defined(SORT_OUTPUT_THIS_FB)
264 #elif defined(SORT_OUTPUT_FILE)
265 /* Just assume fastbuf output and rename the fastbuf later */
267 #error No output given.
270 #ifdef SORT_HASH_BITS
271 ctx.hash_bits = SORT_HASH_BITS;
272 ctx.radix_split = P(radix_split);
273 #elif defined(SORT_INT)
275 while (ctx.hash_bits < 64 && (int_range >> ctx.hash_bits))
277 ctx.radix_split = P(radix_split);
280 ctx.internal_sort = P(internal);
281 ctx.internal_estimate = P(internal_estimate);
282 ctx.twoway_merge = P(twoway_merge);
283 ctx.multiway_merge = P(multiway_merge);
287 #ifdef SORT_OUTPUT_FILE
288 bfix_tmp_file(ctx.out_fb, out);
294 #undef SORT_ASSERT_UNIQUE
295 #undef SORT_DATA_SIZE
296 #undef SORT_DELETE_INPUT
297 #undef SORT_HASH_BITS
299 #undef SORT_INPUT_FILE
300 #undef SORT_INPUT_PIPE
301 #undef SORT_INPUT_PRESORT
304 #undef SORT_INTERNAL_RADIX
306 #undef SORT_KEY_REGULAR
308 #undef SORT_LONG_HASH
309 #undef SORT_OUTPUT_FB
310 #undef SORT_OUTPUT_FILE
311 #undef SORT_OUTPUT_THIS_FB
314 #undef SORT_UNIFY_WORKSPACE