2 * UCW Library -- Universal Sorter
4 * (c) 2001--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 * This is not a normal header file, it's a generator of sorting
13 * routines. Each time you include it with parameters set in the
14 * corresponding preprocessor macros, it generates a file sorter
15 * with the parameters given.
17 * Recognized parameter macros: (those marked with [*] are mandatory)
19 * SORT_KEY [*] data type capable of storing a single key
20 * SORT_PREFIX(x) [*] add a name prefix (used on all global names
21 * defined by the sorter)
22 * SORT_PRESORT include an in-core pre-sorting pass. Beware, when in
23 * the pre-sorting mode, it's quite possible that the
24 * comparison function will be called with both arguments
26 * SORT_UP_TO a C expression, if defined, sorting is stopped after the
27 * average run length in the file exceeds the value of this
28 * expression (in bytes)
29 * SORT_UNIFY merge items with identical keys
30 * SORT_UNIQUE all items have distinct keys (checked in debug mode)
31 * SORT_REGULAR all items are equally long and they don't contain
32 * anything else than the key. In this case, the sorter
33 * automatically supplies fetch_key, copy_data, fetch_item
34 * and store_item functions. Item size is also expected
36 * SORT_DELETE_INPUT a C expression, if true, the input files are
37 * deleted as soon as possible
38 * SORT_INPUT_FILE input is a file with this name
39 * SORT_INPUT_FB input is a fastbuf stream
40 * (can be safely NULL if you want to treat original
41 * input in a different way by file read functions)
42 * SORT_INPUT_FBPAIR input is a pair of fastbuf streams
43 * (not supported by the presorter)
44 * SORT_OUTPUT_FILE output is a file with this name
45 * SORT_OUTPUT_FB output is a temporary fastbuf stream
47 * You also need to define some (usually inline) functions which
48 * are called by the sorter to process your data:
50 * int PREFIX_compare(SORT_KEY *a, *b)
51 * compare two keys, result like strcmp
52 * int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k)
53 * fetch next key, returns 1=ok, 0=eof
54 * void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k)
55 * write just fetched key k to dest and copy all data
56 * belonging to this key from src to dest.
57 * void PREFIX_merge_data(struct fastbuf *src1, *src2, *dest, SORT_KEY *k1, *k2)
58 * [used only in case SORT_UNIFY is defined]
59 * write just fetched key k to dest and merge data from
60 * two records with the same key (k1 and k2 are key occurences
61 * in the corresponding streams).
62 * byte * PREFIX_fetch_item(struct fastbuf *f, SORT_KEY *k, byte *limit)
63 * [used only with SORT_PRESORT]
64 * fetch data belonging to a just fetched key and store
65 * them to memory following the key, but not over limit.
66 * Returns a pointer to first byte after the data
67 * or NULL if the data don't fit. For variable-length
68 * keys, it can use the rest of SORT_KEY and even return
69 * pointer before end of the key.
70 * Important: before PREFIX_fetch_item() succeeds, the key
71 * must be position independent, the sorter can copy it.
72 * void PREFIX_store_item(struct fastbuf *f, SORT_KEY *k)
73 * [used only with SORT_PRESORT]
74 * write key and all its data read with PREFIX_fetch_data
75 * to the stream given.
76 * SORT_KEY * PREFIX_merge_items(SORT_KEY *a, SORT_KEY *b)
77 * [used only with SORT_PRESORT && SORT_UNIFY]
78 * merge two items with the same key, returns pointer
79 * to at most one of the items, the rest will be removed
80 * from the list of items, but not deallocated, so
81 * the remaining item can freely reference data of the
84 * After including this file, all parameter macros are automatically
88 /* Declarations of externals from sorter.c */
90 #ifndef SORT_DECLS_READ
91 #define SORT_DECLS_READ
93 extern uns sorter_trace;
94 extern uns sorter_presort_bufsize;
95 extern uns sorter_stream_bufsize;
97 extern uns sorter_pass_counter;
99 #endif /* !SORT_DECLS_READ */
101 /* The sorter proper */
103 #ifndef SORT_DECLARE_ONLY
105 #include "lib/fastbuf.h"
110 #if !defined(SORT_KEY) || !defined(SORT_PREFIX)
111 #error Some of the mandatory configuration macros are missing.
114 #define P(x) SORT_PREFIX(x)
115 #define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
117 #if defined(SORT_UNIFY) || defined(SORT_UNIQUE)
123 #if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
124 #define SORT_ASSERT_UNIQUE
130 P(fetch_key)(struct fastbuf *in, SORT_KEY *x)
132 return breadb(in, x, sizeof(*x));
136 P(copy_data)(struct fastbuf *in UNUSED, struct fastbuf *out, SORT_KEY *x)
138 bwrite(out, x, sizeof(*x));
142 P(fetch_item)(struct fastbuf *in UNUSED, SORT_KEY *x UNUSED, byte *limit UNUSED)
144 return (byte *)(x+1);
148 P(store_item)(struct fastbuf *out, SORT_KEY *x)
150 bwrite(out, x, sizeof(*x));
155 static struct fastbuf *
156 P(flush_out)(struct fastbuf *out)
164 P(pass)(struct fastbuf **fb1, struct fastbuf **fb2
170 struct fastbuf *in1 = *fb1;
171 struct fastbuf *in2 = *fb2;
172 struct fastbuf *out1 = NULL;
173 struct fastbuf *out2 = NULL;
174 SORT_KEY kbuf1, kbuf2, kbuf3, kbuf4;
175 SORT_KEY *kin1 = &kbuf1;
176 SORT_KEY *kprev1 = &kbuf2;
177 SORT_KEY *kin2 = &kbuf3;
178 SORT_KEY *kprev2 = &kbuf4;
179 SORT_KEY *kout = NULL;
181 int next1, next2, comp;
185 run1 = next1 = in1 ? P(fetch_key)(in1, kin1) : 0;
186 run2 = next2 = in2 ? P(fetch_key)(in2, kin2) : 0;
187 while (next1 || next2)
194 comp = P(compare)(kin1, kin2);
195 ktmp = (comp <= 0) ? kin1 : kin2;
196 if (!kout || !(P(compare)(kout, ktmp) LESS 0))
204 out1 = bopen_tmp(sorter_stream_bufsize);
209 P(copy_data)(in1, out1, kin1);
210 SWAP(kin1, kprev1, ktmp);
211 next1 = P(fetch_key)(in1, kin1);
212 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
218 P(merge_data)(in1, in2, out1, kin1, kin2);
219 SWAP(kin1, kprev1, ktmp);
220 next1 = P(fetch_key)(in1, kin1);
221 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
222 SWAP(kin2, kprev2, ktmp);
223 next2 = P(fetch_key)(in2, kin2);
224 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
228 #ifdef SORT_ASSERT_UNIQUE
229 else if (unlikely(comp == 0))
234 P(copy_data)(in2, out1, kin2);
235 SWAP(kin2, kprev2, ktmp);
236 next2 = P(fetch_key)(in2, kin2);
237 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
249 log(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
250 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
251 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
252 *fb1 = P(flush_out)(out1);
253 *fb2 = P(flush_out)(out2);
254 sorter_pass_counter++;
260 #if defined(SORT_REGULAR) && !defined(SORT_UNIFY)
262 /* If we are doing a simple sort on a regular file, we can use a faster presorting strategy */
264 static SORT_KEY *P(array);
266 #define ASORT_PREFIX(x) SORT_PREFIX(x##_array)
267 #define ASORT_KEY_TYPE SORT_KEY
268 #define ASORT_ELT(i) P(array)[i]
269 #define ASORT_LT(x,y) (P(compare)(&(x),&(y)) < 0)
271 #include "lib/arraysort.h"
274 P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
276 struct fastbuf *in = *fb1;
277 struct fastbuf *out1 = NULL;
278 struct fastbuf *out2 = NULL;
279 struct fastbuf *tbuf;
280 uns buf_items = sorter_presort_bufsize / sizeof(SORT_KEY);
282 SORT_KEY last_out = { }, *array;
286 die("PresortBuffer set too low");
287 P(array) = array = xmalloc(buf_items * sizeof(SORT_KEY));
291 uns s = bread(in, array, buf_items * sizeof(SORT_KEY));
292 uns n = s / sizeof(SORT_KEY);
293 ASSERT(!(s % sizeof(SORT_KEY)));
297 #ifdef SORT_ASSERT_UNIQUE
298 for (uns i=0; i<n-1; i++)
299 if (unlikely(P(compare)(&array[i], &array[i+1]) >= 0))
301 ASSERT(!run_count || P(compare)(&last_out, &array[0]));
303 if (!run_count || P(compare)(&last_out, &array[0]) > 0)
307 if (sorter_presort_bufsize < (uns) SORT_UP_TO)
309 SWAP(out1, out2, tbuf);
311 out1 = bopen_tmp(sorter_stream_bufsize);
313 last_out = array[n-1];
314 bwrite(out1, array, n * sizeof(SORT_KEY));
319 log(L_INFO, "Pass 0: %d runs, %d+%d KB",
321 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
322 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
323 *fb1 = P(flush_out)(out1);
324 *fb2 = P(flush_out)(out2);
330 #define SORT_NODE struct P(presort_node)
338 P(mergesort)(SORT_NODE *x)
340 SORT_NODE *f1, **l1, *f2, **l2, **l;
358 f1 = P(mergesort)(f1);
360 f2 = P(mergesort)(f2);
364 if (P(compare)(&f1->key, &f2->key) <= 0)
382 P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
384 struct fastbuf *in = *fb1;
385 struct fastbuf *out1 = NULL;
386 struct fastbuf *out2 = NULL;
387 struct fastbuf *tbuf;
388 byte *buffer, *bufend, *current;
389 SORT_NODE *first, **last, *this, *leftover;
396 if (sorter_presort_bufsize < 2*sizeof(SORT_NODE))
397 die("PresortBuffer set too low");
398 buffer = xmalloc(sorter_presort_bufsize);
399 bufend = buffer + sorter_presort_bufsize;
404 if (sorter_presort_bufsize < SORT_UP_TO)
406 SWAP(out1, out2, tbuf);
408 out1 = bopen_tmp(sorter_stream_bufsize);
413 memmove(buffer, leftover, sizeof(SORT_NODE));
414 this = leftover = (SORT_NODE *) buffer;
420 current = (byte *) ALIGN((addr_int_t) current, CPU_STRUCT_ALIGN);
421 if (current + sizeof(*this) > bufend)
423 this = (SORT_NODE *) current;
424 cont = P(fetch_key)(in, &this->key);
428 current = P(fetch_item)(in, &this->key, bufend);
431 if (leftover) /* Single node too large */
433 P(copy_data)(in, out1, &leftover->key);
438 else /* Node will be left over to the next phase */
450 first = P(mergesort)(first);
455 SORT_NODE *second = first->next;
456 if (second && !P(compare)(&first->key, &second->key))
458 SORT_KEY *n = P(merge_items)(&first->key, &second->key);
459 if (n == &first->key)
460 first->next = second->next;
464 first = second->next;
468 #ifdef SORT_ASSERT_UNIQUE
469 ASSERT(!first->next || P(compare)(&first->key, &first->next->key));
471 P(store_item)(out1, &first->key);
478 log(L_INFO, "Pass 0: %d runs (%d giants, %d splits), %d+%d KB",
479 run_count, giant_count, split_count,
480 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
481 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
482 *fb1 = P(flush_out)(out1);
483 *fb2 = P(flush_out)(out2);
487 #endif /* SORT_REGULAR && !SORT_UNIFY */
489 #endif /* SORT_PRESORT */
492 #ifdef SORT_OUTPUT_FB
494 #elif defined(SORT_OUTPUT_FILE)
497 #error No output defined.
500 #ifdef SORT_INPUT_FILE
502 #elif defined(SORT_INPUT_FB)
504 #elif defined(SORT_INPUT_FBPAIR)
505 struct fastbuf *fb1, struct fastbuf *fb2
507 #error No input defined.
509 #ifdef SORT_OUTPUT_FILE
514 #ifdef SORT_INPUT_FILE
515 struct fastbuf *fb1, *fb2;
516 fb1 = bopen(inname, O_RDONLY, sorter_stream_bufsize);
518 #elif defined(SORT_INPUT_FB)
519 struct fastbuf *fb2 = NULL;
522 #ifdef SORT_DELETE_INPUT
523 bconfig(fb1, BCONFIG_IS_TEMP_FILE, SORT_DELETE_INPUT);
525 sorter_pass_counter = 1;
527 P(presort)(&fb1, &fb2);
531 do P(pass)(&fb1, &fb2); while (fb1 && fb2);
534 sh_off_t run_count, max_run_count = 0;
536 max_run_count += bfilesize(fb1);
538 max_run_count += bfilesize(fb2);
540 run_count = max_run_count / sorter_presort_bufsize;
542 run_count = max_run_count;
545 max_run_count /= SORT_UP_TO;
547 run_count = P(pass)(&fb1, &fb2, (run_count+1)/2 <= max_run_count);
552 fb1 = bopen_tmp(sorter_stream_bufsize);
554 #ifdef SORT_OUTPUT_FB
557 bconfig(fb1, BCONFIG_IS_TEMP_FILE, 0);
558 if (rename(fb1->name, outname) < 0)
559 die("rename(%s,%s): %m", fb1->name, outname);
572 #undef SORT_ASSERT_UNIQUE
574 #undef SORT_DELETE_INPUT
575 #undef SORT_INPUT_FILE
577 #undef SORT_INPUT_FBPAIR
578 #undef SORT_OUTPUT_FILE
579 #undef SORT_OUTPUT_FB
583 #endif /* !SORT_DECLARE_ONLY */