2 * Sherlock Library -- Universal Sorter
4 * (c) 2001--2002 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 * This is not a normal header file, it's a generator of sorting
12 * routines. Each time you include it with parameters set in the
13 * corresponding preprocessor macros, it generates a file sorter
14 * with the parameters given.
16 * Recognized parameter macros: (those marked with [*] are mandatory)
18 * SORT_KEY [*] data type capable of storing a single key
19 * SORT_PREFIX(x) [*] add a name prefix (used on all global names
20 * defined by the sorter)
21 * SORT_PRESORT include an in-core presorting pass
22 * SORT_UNIFY merge items with identical keys
23 * SORT_DELETE_INPUT a C expression, if true, the input files are
24 * deleted as soon as possible
25 * SORT_INPUT_FILE input is a file with this name
26 * SORT_INPUT_FB input is a fastbuf stream
27 * (can be safely NULL if you want to treat original
28 * input in a different way by file read functions)
29 * SORT_INPUT_FBPAIR input is a pair of fastbuf streams
30 * (not supported by the presorter)
31 * SORT_OUTPUT_FILE output is a file with this name
32 * SORT_OUTPUT_FB output is a temporary fastbuf stream
34 * You also need to define some (usually inline) functions which
35 * are called by the sorter to process your data:
37 * int PREFIX_compare(SORT_KEY *a, *b)
38 * compare two keys, result like strcmp
39 * int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k)
40 * fetch next key, returns 1=ok, 0=eof
41 * void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k)
42 * write just fetched key k to dest and copy all data
43 * belonging to this key from src to dest.
44 * void PREFIX_merge_data(struct fastbuf *src1, *src2, *dest, SORT_KEY *k1, *k2)
45 * [used only in case SORT_UNIFY is defined]
46 * write just fetched key k to dest and merge data from
47 * two records with the same key (k1 and k2 are key occurences
48 * in the corresponding streams).
49 * byte * PREFIX_fetch_item(struct fastbuf *f, SORT_KEY *k, byte *limit)
50 * [used only with SORT_PRESORT]
51 * fetch data belonging to a just fetched key and store
52 * them to memory following the key, but not over limit.
53 * Returns a pointer to first byte after the data
54 * or NULL if the data don't fit. For variable-length
55 * keys, it can use the rest of SORT_KEY and even return
56 * pointer before end of the key.
57 * Important: before PREFIX_fetch_item() succeeds, the key
58 * must be position independent, the sorter can copy it.
59 * void PREFIX_store_item(struct fastbuf *f, SORT_KEY *k)
60 * [used only with SORT_PRESORT]
61 * write key and all its data read with PREFIX_fetch_data
62 * to the stream given.
63 * SORT_KEY * PREFIX_merge_items(SORT_KEY *a, SORT_KEY *b)
64 * [used only with SORT_PRESORT && SORT_UNIFY]
65 * merge two items with the same key, returns pointer
66 * to at most one of the items, the rest will be removed
67 * from the list of items, but not deallocated, so
68 * the remaining item can freely reference data of the
71 * After including this file, all parameter macros are automatically
75 /* Declarations of externals from sorter.c */
77 #ifndef SORT_DECLS_READ
78 #define SORT_DECLS_READ
80 extern uns sorter_trace;
81 extern uns sorter_presort_bufsize;
82 extern uns sorter_stream_bufsize;
84 extern uns sorter_pass_counter;
86 #endif /* !SORT_DECLS_READ */
88 /* The sorter proper */
90 #ifndef SORT_DECLARE_ONLY
92 #include "lib/fastbuf.h"
97 #if !defined(SORT_KEY) || !defined(SORT_PREFIX)
98 #error Some of the mandatory configuration macros are missing.
101 #define P(x) SORT_PREFIX(x)
102 #define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
104 #if defined(SORT_UNIFY) || defined(SORT_UNIQUE)
110 static struct fastbuf *
111 P(flush_out)(struct fastbuf *out)
122 P(pass)(struct fastbuf **fb1, struct fastbuf **fb2)
124 struct fastbuf *in1 = *fb1;
125 struct fastbuf *in2 = *fb2;
126 struct fastbuf *out1 = NULL;
127 struct fastbuf *out2 = NULL;
128 SORT_KEY kbuf1, kbuf2, kbuf3, kbuf4;
129 SORT_KEY *kin1 = &kbuf1;
130 SORT_KEY *kprev1 = &kbuf2;
131 SORT_KEY *kin2 = &kbuf3;
132 SORT_KEY *kprev2 = &kbuf4;
133 SORT_KEY *kout = NULL;
135 int next1, next2, comp;
139 run1 = next1 = in1 ? P(fetch_key)(in1, kin1) : 0;
140 run2 = next2 = in2 ? P(fetch_key)(in2, kin2) : 0;
141 while (next1 || next2)
148 comp = P(compare)(kin1, kin2);
149 ktmp = (comp <= 0) ? kin1 : kin2;
150 if (!kout || !(P(compare)(kout, ktmp) LESS 0))
155 out1 = bopen_tmp(sorter_stream_bufsize);
160 P(copy_data)(in1, out1, kin1);
161 SWAP(kin1, kprev1, ktmp);
162 next1 = P(fetch_key)(in1, kin1);
163 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
169 P(merge_data)(in1, in2, out1, kin1, kin2);
170 SWAP(kin1, kprev1, ktmp);
171 next1 = P(fetch_key)(in1, kin1);
172 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
173 SWAP(kin2, kprev2, ktmp);
174 next2 = P(fetch_key)(in2, kin2);
175 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
181 P(copy_data)(in2, out1, kin2);
182 SWAP(kin2, kprev2, ktmp);
183 next2 = P(fetch_key)(in2, kin2);
184 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
196 log(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
197 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
198 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
199 *fb1 = P(flush_out)(out1);
200 *fb2 = P(flush_out)(out2);
201 sorter_pass_counter++;
206 #define SORT_NODE struct P(presort_node)
214 P(mergesort)(SORT_NODE *x)
216 SORT_NODE *f1, **l1, *f2, **l2, **l;
234 f1 = P(mergesort)(f1);
236 f2 = P(mergesort)(f2);
240 if (P(compare)(&f1->key, &f2->key) <= 0)
258 P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
260 struct fastbuf *in = *fb1;
261 struct fastbuf *out1 = NULL;
262 struct fastbuf *out2 = NULL;
263 struct fastbuf *tbuf;
264 byte *buffer, *bufend, *current;
265 SORT_NODE *first, **last, *this, *leftover;
272 if (sorter_presort_bufsize < 2*sizeof(SORT_NODE))
273 die("PresortBuffer set too low");
274 buffer = xmalloc(sorter_presort_bufsize);
275 bufend = buffer + sorter_presort_bufsize;
279 SWAP(out1, out2, tbuf);
281 out1 = bopen_tmp(sorter_stream_bufsize);
286 memmove(buffer, leftover, sizeof(SORT_NODE));
287 this = leftover = (SORT_NODE *) buffer;
293 current = (byte *) ALIGN((addr_int_t) current, CPU_STRUCT_ALIGN);
294 if (current + sizeof(*this) > bufend)
296 this = (SORT_NODE *) current;
297 cont = P(fetch_key)(in, &this->key);
301 current = P(fetch_item)(in, &this->key, bufend);
304 if (leftover) /* Single node too large */
306 P(copy_data)(in, out1, &leftover->key);
311 else /* Node will be left over to the next phase */
323 first = P(mergesort)(first);
328 SORT_NODE *second = first->next;
329 if (second && !P(compare)(&first->key, &second->key))
331 SORT_KEY *n = P(merge_items)(&first->key, &second->key);
332 if (n == &first->key)
333 first->next = second->next;
337 first = second->next;
341 P(store_item)(out1, &first->key);
348 log(L_INFO, "Pass 0: %d runs (%d giants, %d splits), %d+%d KB",
349 run_count, giant_count, split_count,
350 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
351 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
352 *fb1 = P(flush_out)(out1);
353 *fb2 = P(flush_out)(out2);
357 #endif /* SORT_PRESORT */
360 #ifdef SORT_OUTPUT_FB
362 #elif defined(SORT_OUTPUT_FILE)
365 #error No output defined.
368 #ifdef SORT_INPUT_FILE
370 #elif defined(SORT_INPUT_FB)
372 #elif defined(SORT_INPUT_FBPAIR)
373 struct fastbuf *fb1, struct fastbuf *fb2
375 #error No input defined.
377 #ifdef SORT_OUTPUT_FILE
382 #ifdef SORT_INPUT_FILE
383 struct fastbuf *fb1, *fb2;
384 fb1 = bopen(inname, O_RDONLY, sorter_stream_bufsize);
386 #elif defined(SORT_INPUT_FB)
387 struct fastbuf *fb2 = NULL;
390 #ifdef SORT_DELETE_INPUT
391 bconfig(fb1, BCONFIG_IS_TEMP_FILE, SORT_DELETE_INPUT);
393 sorter_pass_counter = 1;
395 P(presort)(&fb1, &fb2);
398 do P(pass)(&fb1, &fb2); while (fb1 && fb2);
400 fb1 = bopen_tmp(sorter_stream_bufsize);
402 #ifdef SORT_OUTPUT_FB
405 bconfig(fb1, BCONFIG_IS_TEMP_FILE, 0);
406 if (rename(fb1->name, outname) < 0)
407 die("rename(%s,%s): %m", fb1->name, outname);
419 #undef SORT_DELETE_INPUT
420 #undef SORT_INPUT_FILE
422 #undef SORT_INPUT_FBPAIR
423 #undef SORT_OUTPUT_FILE
424 #undef SORT_OUTPUT_FB
426 #endif /* !SORT_DECLARE_ONLY */