2 * UCW Library -- Universal Sorter
4 * (c) 2001--2007 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 * This is not a normal header file, but a generator of sorting
13 * routines. Each time you include it with parameters set in the
14 * corresponding preprocessor macros, it generates a file sorter
15 * with the parameters given.
17 * FIXME: explain the basics (keys and data, callbacks etc.)
19 * Basic parameters and callbacks:
21 * SORT_PREFIX(x) add a name prefix (used on all global names
22 * defined by the sorter)
24 * SORT_KEY data type capable of storing a single key.
25 * SORT_KEY_REGULAR data type holding a single key both in memory and on disk;
26 * in this case, bread() and bwrite() is used to read/write keys
27 * and it's also assumed that the keys are not very long.
28 * int PREFIX_compare(SORT_KEY *a, SORT_KEY *b)
29 * compares two keys, result like strcmp(). Mandatory.
30 * int PREFIX_read_key(struct fastbuf *f, SORT_KEY *k)
31 * reads a key from a fastbuf, returns nonzero=ok, 0=EOF.
32 * Mandatory unless SORT_KEY_REGULAR is defined.
33 * void PREFIX_write_key(struct fastbuf *f, SORT_KEY *k)
34 * writes a key to a fastbuf. Mandatory unless SORT_KEY_REGULAR.
36 * SORT_KEY_SIZE(key) returns the real size of a key (a SORT_KEY type in memory
37 * can be truncated to this number of bytes without any harm;
38 * used to save memory when the keys have variable sizes).
39 * Default: always store the whole SORT_KEY.
40 * SORT_DATA_SIZE(key) gets a key and returns the amount of data following it.
41 * Default: records consist of keys only.
45 * SORT_INT(key) We are sorting by an integer value. In this mode, PREFIX_compare
46 * is supplied automatically and the sorting function gets an extra
47 * parameter specifying a range of the integers. The better the range
48 * fits, the faster we sort. Sets up SORT_HASH_xxx automatically.
50 * Hashing (optional, but it can speed sorting up):
52 * SORT_HASH_BITS signals that a monotone hashing function returning a given number of
53 * bits is available. Monotone hash is a function f such that f(x) < f(y)
54 * implies x < y and which is approximately uniformly distributed.
55 * uns PREFIX_hash(SORT_KEY *a)
59 * SORT_UNIFY merge items with identical keys, needs the following functions:
60 * void PREFIX_write_merged(struct fastbuf *f, SORT_KEY **keys, void **data, uns n, void *buf)
61 * takes n records in memory with keys which compare equal and writes
62 * a single record to the given fastbuf. `buf' points to a buffer which
63 * is guaranteed to hold the sum of workspace requirements (see below)
64 * over all given records. The function is allowed to modify all its inputs.
65 * void PREFIX_copy_merged(SORT_KEY **keys, struct fastbuf **data, uns n, struct fastbuf *dest)
66 * takes n records with keys in memory and data in fastbufs and writes
67 * a single record. Used only if SORT_DATA_SIZE or SORT_UNIFY_WORKSPACE is defined.
68 * SORT_UNIFY_WORKSPACE(key)
69 * gets a key and returns the amount of workspace required when merging
70 * the given record. Defaults to 0.
72 * Input (choose one of these):
74 * SORT_INPUT_FILE file of a given name
75 * SORT_INPUT_FB seekable fastbuf stream
76 * SORT_INPUT_PIPE non-seekable fastbuf stream
77 * SORT_INPUT_PRESORT custom presorter. Calls function
78 * int PREFIX_presort(struct fastbuf *dest, void *buf, size_t bufsize)
79 * to get successive batches of pre-sorted data.
80 * The function is passed a page-aligned presorting buffer.
81 * It returns 1 on success or 0 on EOF.
82 * SORT_DELETE_INPUT A C expression, if true, then the input files are deleted
83 * as soon as possible.
85 * Output (chose one of these):
87 * SORT_OUTPUT_FILE file of a given name
88 * SORT_OUTPUT_FB temporary fastbuf stream
89 * SORT_OUTPUT_THIS_FB a given fastbuf stream which can already contain a header
93 * SORT_UNIQUE all items have distinct keys (checked in debug mode)
95 * The function generated:
97 * <outfb> PREFIX_sort(<in>, <out> [,<range>]), where:
98 * <in> = input file name/fastbuf or NULL
99 * <out> = output file name/fastbuf or NULL
100 * <range> = maximum integer value for the SORT_INT mode
101 * <outfb> = output fastbuf (in SORT_OUTPUT_FB mode)
103 * After including this file, all parameter macros are automatically
107 #include "lib/sorter/common.h"
108 #include "lib/fastbuf.h"
112 #define P(x) SORT_PREFIX(x)
114 #ifdef SORT_KEY_REGULAR
115 typedef SORT_KEY_REGULAR P(key);
116 static inline int P(read_key) (struct fastbuf *f, P(key) *k)
118 return breadb(f, k, sizeof(P(key)));
120 static inline void P(write_key) (struct fastbuf *f, P(key) *k)
122 bwrite(f, k, sizeof(P(key)));
124 #elif defined(SORT_KEY)
125 typedef SORT_KEY P(key);
127 #error Missing definition of sorting key.
131 static inline int P(compare) (P(key) *x, P(key) *y)
133 if (SORT_INT(*x) < SORT_INT(*y))
135 if (SORT_INT(*x) > SORT_INT(*y))
140 #ifndef SORT_HASH_BITS
141 static inline int P(hash) (P(key) *x)
143 return SORT_INT((*x));
153 #define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
155 #if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
156 #define SORT_ASSERT_UNIQUE
162 #define SORT_KEY_SIZE(key) sizeof(key)
165 #ifdef SORT_DATA_SIZE
166 #define SORT_VAR_DATA
168 #define SORT_DATA_SIZE(key) 0
171 static inline void P(copy_data)(P(key) *key, struct fastbuf *in, struct fastbuf *out)
173 P(write_key)(out, key);
175 bbcopy(in, out, SORT_DATA_SIZE(*key));
181 #if defined(SORT_UNIFY) && !defined(SORT_VAR_DATA) && !defined(SORT_UNIFY_WORKSPACE)
182 static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, uns n, struct fastbuf *dest)
184 P(write_merged)(dest, keys, NULL, n, NULL);
188 #if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE)
189 #include "lib/sorter/s-internal.h"
191 #include "lib/sorter/s-fixint.h"
194 #include "lib/sorter/s-twoway.h"
196 #if defined(SORT_HASH_BITS) || defined(SORT_INT)
197 #include "lib/sorter/s-radix.h"
200 static struct fastbuf *P(sort)(
201 #ifdef SORT_INPUT_FILE
206 #ifdef SORT_OUTPUT_FILE
216 struct sort_context ctx;
217 bzero(&ctx, sizeof(ctx));
219 #ifdef SORT_INPUT_FILE
220 ctx.in_fb = bopen(in, O_RDONLY, sorter_stream_bufsize);
221 ctx.in_size = bfilesize(ctx.in_fb);
222 #elif defined(SORT_INPUT_FB)
224 ctx.in_size = bfilesize(in);
225 #elif defined(SORT_INPUT_PIPE)
227 ctx.in_size = ~(u64)0;
228 #elif defined(SORT_INPUT_PRESORT)
230 ctx.custom_presort = P(presort);
231 ctx.in_size = ~(u64)0;
233 #error No input given.
235 #ifdef SORT_DELETE_INPUT
236 if (SORT_DELETE_INPUT)
237 bconfig(ctx.in_fb, BCONFIG_IS_TEMP_FILE, 1);
240 #ifdef SORT_OUTPUT_FB
242 #elif defined(SORT_OUTPUT_THIS_FB)
244 #elif defined(SORT_OUTPUT_FILE)
245 /* Just assume fastbuf output and rename the fastbuf later */
247 #error No output given.
250 #ifdef SORT_HASH_BITS
251 ctx.hash_bits = SORT_HASH_BITS;
252 ctx.radix_split = P(radix_split);
253 #elif defined(SORT_INT)
255 while (ctx.hash_bits < 32 && (int_range >> ctx.hash_bits))
257 ctx.radix_split = P(radix_split);
260 ctx.internal_sort = P(internal);
261 ctx.internal_estimate = P(internal_estimate);
262 ctx.twoway_merge = P(twoway_merge);
266 #ifdef SORT_OUTPUT_FILE
267 if (rename(ctx.out_fb->name, out) < 0)
268 die("Cannot rename %s to %s: %m", ctx.out_fb->name, out);
269 bconfig(ctx.out_fb, BCONFIG_IS_TEMP_FILE, 0);
278 #undef SORT_KEY_REGULAR
280 #undef SORT_DATA_SIZE
284 #undef SORT_HASH_BITS
286 #undef SORT_UNIFY_WORKSPACE
287 #undef SORT_INPUT_FILE
289 #undef SORT_INPUT_PRESORT
290 #undef SORT_OUTPUT_FILE
291 #undef SORT_OUTPUT_FB
292 #undef SORT_OUTPUT_THIS_FB
294 #undef SORT_ASSERT_UNIQUE
295 #undef SORT_DELETE_INPUT
299 /* FIXME: Check that we undef everything we should. */