2 * UCW Library -- Universal Sorter: Multi-Way Merge Module
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 * We use a binary tree to keep track of the current minimum. The tree is
12 * represented by an array (in the same way as binary heaps usually are),
13 * leaves correspond to input streams and each internal vertex remembers
14 * the leaf in its subtree, which has the lowest key.
17 typedef struct P(mwt) {
18 int i; // Minimum of the subtree
20 int eq; // Did we encounter equality anywhere in the subtree?
24 static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
29 tree[i] = tree[2*i+1];
30 else if (tree[2*i+1].i < 0)
34 int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
35 tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
42 * It is very tempting to stop as soon as the current node does not
43 * change, but it is wrong, because even if the stream index stored in
44 * the tree is the same, the actual key value can differ.
48 * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
49 * leading to an assumption that tree[1] does not change during this function.
50 * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
52 asm volatile ("" : : : "memory");
55 static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
58 P(update_tree)(keys, tree, i);
61 static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket **ins, struct sort_bucket *out)
71 struct fastbuf *fout = sbuck_write(out);
72 struct fastbuf *fins[num_ins];
75 for (uns i=1; i<2*n2; i++)
76 tree[i] = (P(mwt)) { .i = -1 };
78 for (uns i=0; i<num_ins; i++)
80 fins[i] = sbuck_read(ins[i]);
81 if (P(read_key)(fins[i], &keys[i]))
82 P(set_tree)(keys, tree, n2+i, i);
88 P(key) *mkeys[num_ins], *key;
89 struct fastbuf *mfb[num_ins];
91 while (likely(tree[1].i >= 0))
96 /* The key is unique, so let's go through the fast path */
97 P(copy_data)(&keys[i], fins[i], fout);
98 if (unlikely(!P(read_key)(fins[i], &keys[i])))
100 P(update_tree)(keys, tree, n2+i);
112 P(set_tree)(keys, tree, n2+i, -1);
117 while (!P(compare)(key, &keys[i]));
119 P(copy_merged)(mkeys, mfb, m, fout);
121 for (uns j=0; j<m; j++)
124 if (likely(P(read_key)(fins[i], &keys[i])))
125 P(set_tree)(keys, tree, n2+i, i);
131 /* Simplified version which does not support any unification */
132 while (likely(tree[1].i >= 0))
135 P(key) UNUSED key = keys[i];
136 P(copy_data)(&keys[i], fins[i], fout);
137 if (unlikely(!P(read_key)(fins[i], &keys[i])))
139 P(update_tree)(keys, tree, n2+i);
140 #ifdef SORT_ASSERT_UNIQUE
141 ASSERT(tree[1].i < 0 || P(compare)(&key, &keys[tree[1].i]) < 0);