2 * UCW Library -- Universal Sorter: Multi-Way Merge Module
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 * We use a binary tree to keep track of the current minimum. The tree is
12 * represented by an array (in the same way as binary heaps usually are),
13 * leaves correspond to input streams and each internal vertex remembers
14 * the leaf in its subtree, which has the lowest key.
17 typedef struct P(mwt) {
18 int i; // Minimum of the subtree
20 int eq; // Did we encounter equality anywhere in the subtree?
24 static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
29 tree[i] = tree[2*i+1];
30 else if (tree[2*i+1].i < 0)
34 int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
35 tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
42 * It is very tempting to stop as soon as the current node does not
43 * change, but it is wrong, because even if the stream index stored in
44 * the tree is the same, the actual key value can differ.
47 #if defined(__GNUC__) && (__GNUC__ * 1000 + __GNUC_MINOR__ < 4002)
49 * This function sometimes triggers optimizer bugs in GCC 4.1.1 and
50 * possibly also earlier versions, leading to an assumption that tree[1]
51 * does not change during this function. We add an explicit memory barrier
52 * as a work-around. Ugh.
54 asm volatile ("" : : : "memory");
58 static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
61 P(update_tree)(keys, tree, i);
64 static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
74 struct fastbuf *fout = sbuck_write(out);
75 struct fastbuf *fins[num_ins];
78 for (uns i=1; i<2*n2; i++)
79 tree[i] = (P(mwt)) { .i = -1 };
81 for (uns i=0; i<num_ins; i++)
83 fins[i] = sbuck_read(ins[i]);
84 if (P(read_key)(fins[i], &keys[i]))
85 P(set_tree)(keys, tree, n2+i, i);
91 P(key) *mkeys[num_ins], *key;
92 struct fastbuf *mfb[num_ins];
94 while (likely(tree[1].i >= 0))
99 /* The key is unique, so let's go through the fast path */
100 P(copy_data)(&keys[i], fins[i], fout);
101 if (unlikely(!P(read_key)(fins[i], &keys[i])))
103 P(update_tree)(keys, tree, n2+i);
115 P(set_tree)(keys, tree, n2+i, -1);
120 while (!P(compare)(key, &keys[i]));
122 P(copy_merged)(mkeys, mfb, m, fout);
124 for (uns j=0; j<m; j++)
127 if (likely(P(read_key)(fins[i], &keys[i])))
128 P(set_tree)(keys, tree, n2+i, i);
134 /* Simplified version which does not support any unification */
135 while (likely(tree[1].i >= 0))
138 P(key) UNUSED key = keys[i];
139 P(copy_data)(&keys[i], fins[i], fout);
140 if (unlikely(!P(read_key)(fins[i], &keys[i])))
142 P(update_tree)(keys, tree, n2+i);
143 #ifdef SORT_ASSERT_UNIQUE
144 ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);