2 * Sherlock Library -- Universal Sorter
4 * (c) 2001 Martin Mares <mj@ucw.cz>
8 * This is not a normal header file, it's a generator of sorting
9 * routines. Each time you include it with parameters set in the
10 * corresponding preprocessor macros, it generates a file sorter
11 * with the parameters given.
13 * Recognized parameter macros: (those marked with [*] are mandatory)
15 * SORT_KEY [*] data type capable of storing a single key
16 * SORT_PREFIX(x) [*] add a name prefix (used on all global names
17 * defined by the sorter)
18 * SORT_PRESORT include an in-core presorting pass
19 * SORT_UNIFY merge items with identical keys
20 * SORT_DELETE_INPUT a C expression, if true, the input files are
21 * deleted as soon as possible
22 * SORT_INPUT_FILE input is a file with this name
23 * SORT_INPUT_FB input is a fastbuf stream
24 * SORT_INPUT_FBPAIR input is a pair of fastbuf streams
25 * (not supported by the presorter)
26 * SORT_OUTPUT_FILE output is a file with this name
27 * SORT_OUTPUT_FB output is a fastbuf stream
29 * You also need to define some (usually inline) functions which
30 * are called by the sorter to process your data:
32 * int PREFIX_compare(SORT_KEY *a, *b)
33 * compare two keys, result like strcmp
34 * int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k)
35 * fetch next key, returns 1=ok, 0=eof
36 * void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k)
37 * write just fetched key k to dest and copy all data
38 * belonging to this key from src to dest.
39 * void PREFIX_merge_data(struct fastbuf *src1, *src2, *dest, SORT_KEY *k1, *k2)
40 * [used only in case SORT_UNIFY is defined]
41 * write just fetched key k to dest and merge data from
42 * two records with the same key (k1 and k2 are key occurences
43 * in the corresponding streams).
44 * char * PREFIX_fetch_item(struct fastbuf *f, SORT_KEY *k, char *limit)
45 * [used only with SORT_PRESORT]
46 * fetch data belonging to a just fetched key and store
47 * them to memory following the key, but not over limit.
48 * Returns a pointer to first byte after the data
49 * or NULL if the data don't fit.
50 * Important: keys carrying no data must be position
52 * void PREFIX_store_item(struct fastbuf *f, SORT_KEY *k)
53 * [used only with SORT_PRESORT]
54 * write key and all its data read with PREFIX_fetch_data
55 * to the stream given.
56 * SORT_KEY * PREFIX_merge_items(SORT_KEY *a, SORT_KEY *b)
57 * [used only with SORT_PRESORT && SORT_UNIFY]
58 * merge two items with the same key, returns pointer
59 * to at most one of the items, the rest will be removed
60 * from the list of items, but not deallocated, so
61 * the remaining item can freely reference data of the
65 /* Declarations of externals from sorter.c */
67 #ifndef SORT_DECLS_READ
68 #define SORT_DECLS_READ
70 extern uns sorter_trace;
71 extern uns sorter_presort_bufsize;
72 extern uns sorter_stream_bufsize;
74 extern uns sorter_pass_counter, sorter_file_counter;
75 struct fastbuf *sorter_open_tmp(void);
77 #endif /* !SORT_DECLS_READ */
79 /* The sorter proper */
81 #ifndef SORT_DECLARE_ONLY
83 #include "lib/fastbuf.h"
87 #if !defined(SORT_KEY) || !defined(SORT_PREFIX)
88 #error Some of the mandatory configuration macros are missing.
91 #define P(x) SORT_PREFIX(x)
92 #define SWAP(x,y,z) do { z=x; x=y; y=z; } while(0)
94 #if defined(SORT_UNIFY) || defined(SORT_UNIQUE)
101 P(pass)(struct fastbuf **fb1, struct fastbuf **fb2)
103 struct fastbuf *in1 = *fb1;
104 struct fastbuf *in2 = *fb2;
105 struct fastbuf *out1 = NULL;
106 struct fastbuf *out2 = NULL;
107 SORT_KEY kbuf1, kbuf2, kbuf3, kbuf4;
108 SORT_KEY *kin1 = &kbuf1;
109 SORT_KEY *kprev1 = &kbuf2;
110 SORT_KEY *kin2 = &kbuf3;
111 SORT_KEY *kprev2 = &kbuf4;
112 SORT_KEY *kout = NULL;
114 int next1, next2, comp;
118 run1 = next1 = in1 ? P(fetch_key)(in1, kin1) : 0;
119 run2 = next2 = in2 ? P(fetch_key)(in2, kin2) : 0;
120 while (next1 || next2)
127 comp = P(compare)(kin1, kin2);
128 ktmp = (comp <= 0) ? kin1 : kin2;
129 if (!kout || !(P(compare)(kout, ktmp) LESS 0))
134 out1 = sorter_open_tmp();
139 P(copy_data)(in1, out1, kin1);
140 SWAP(kin1, kprev1, ktmp);
141 next1 = P(fetch_key)(in1, kin1);
142 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
148 P(merge_data)(in1, in2, out1, kin1, kin2);
149 SWAP(kin1, kprev1, ktmp);
150 next1 = P(fetch_key)(in1, kin1); /* FIXME: Re-use other code? */
151 run1 = next1 && (P(compare)(kprev1, kin1) LESS 0);
152 SWAP(kin2, kprev2, ktmp);
153 next2 = P(fetch_key)(in2, kin2);
154 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
160 P(copy_data)(in2, out1, kin2);
161 SWAP(kin2, kprev2, ktmp);
162 next2 = P(fetch_key)(in2, kin2);
163 run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
175 log(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
176 (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
177 (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
178 if (out1) /* FIXME: What about empty output? */
190 sorter_pass_counter++;
194 #ifdef SORT_OUTPUT_FB
196 #elif defined(SORT_OUTPUT_FILE)
199 #error No output defined.
202 #ifdef SORT_INPUT_FILE
204 #elif defined(SORT_INPUT_FB)
206 #elif defined(SORT_INPUT_FBPAIR)
207 struct fastbuf *fb1, struct fastbuf *fb2
209 #error No input defined.
211 #ifdef SORT_OUTPUT_FILE
216 #ifdef SORT_INPUT_FILE
217 struct fastbuf *fb1, *fb2;
218 fb1 = bopen(inname, O_RDONLY, sorter_stream_bufsize);
219 #ifdef SORT_DELETE_INPUT
220 fb1->is_temp_file = SORT_DELETE_INPUT;
223 #elif defined(SORT_INPUT_FB)
224 struct fastbuf *fb2 = NULL;
227 sorter_pass_counter = 1;
228 do P(pass)(&fb1, &fb2); while (fb1 && fb2);
231 fb1->is_temp_file = 0;
233 #ifdef SORT_OUTPUT_FB
236 if (rename(fb1->name, outname) < 0)
237 die("rename(%s,%s): %m", fb1->name, outname);
245 #endif /* !SORT_DECLARE_ONLY */