* of the GNU Lesser General Public License.
*/
-typedef struct P(mwt_node) {
- int i;
+/*
+ * We use a binary tree to keep track of the current minimum. The tree is
+ * represented by an array (in the same way as binary heaps usually are),
+ * leaves correspond to input streams and each internal vertex remembers
+ * the leaf in its subtree, which has the lowest key.
+ */
+
+typedef struct P(mwt) {
+ int i; // Minimum of the subtree
#ifdef SORT_UNIFY
- int eq;
+ int eq; // Did we encounter equality anywhere in the subtree?
#endif
-} P(mwt_node);
+} P(mwt);
-static inline void P(update_tree)(P(key) *keys, P(mwt_node) *tree, uns i)
+static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i)
{
while (i /= 2)
{
- P(mwt_node) new;
if (tree[2*i].i < 0)
- new = tree[2*i+1];
+ tree[i] = tree[2*i+1];
else if (tree[2*i+1].i < 0)
- new = tree[2*i];
+ tree[i] = tree[2*i];
else
{
int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]);
- if (cmp <= 0)
- new = tree[2*i];
- else
- new = tree[2*i+1];
+ tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1];
#ifdef SORT_UNIFY
if (!cmp)
- new.eq = 1;
+ tree[i].eq = 1;
#endif
}
-#ifdef SORT_UNIFY_XXX // FIXME
- /* If we are not unifying, the keys are usually distinct, so this check is pointless */
- if (!memcmp(&tree[i], &new, sizeof(new)))
- break;
-#endif
- tree[i] = new;
+ /*
+ * It is very tempting to stop as soon as the current node does not
+ * change, but it is wrong, because even if the stream index stored in
+ * the tree is the same, the actual key value can differ.
+ */
}
+ /*
+ * This function sometimes triggers optimizer bugs in GCC versions up to 4.2.1,
+ * leading to an assumption that tree[1] does not change during this function.
+ * We add an explicit memory barrier as a work-around. Ugh. See GCC Bug #33262.
+ */
+ asm volatile ("" : : : "memory");
}
-static inline void P(set_tree)(P(key) *keys, P(mwt_node) *tree, uns i, int val)
+static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val)
{
tree[i].i = val;
P(update_tree)(keys, tree, i);
struct fastbuf *fout = sbuck_write(out);
struct fastbuf *fins[num_ins];
P(key) keys[num_ins];
- P(mwt_node) tree[2*n2]; // A complete binary tree, leaves are input streams, each internal vertex contains a minimum of its sons
+ P(mwt) tree[2*n2];
for (uns i=1; i<2*n2; i++)
- {
- tree[i].i = -1;
-#ifdef SORT_UNIFY
- tree[i].eq = 0;
-#endif
- }
+ tree[i] = (P(mwt)) { .i = -1 };
for (uns i=0; i<num_ins; i++)
{
while (likely(tree[1].i >= 0))
{
int i = tree[1].i;
- if (!tree[1].eq && 0) // FIXME: This does not work for some reason
+ if (!tree[1].eq)
{
/* The key is unique, so let's go through the fast path */
P(copy_data)(&keys[i], fins[i], fout);