X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fsorter%2Fs-multiway.h;h=9295396f0c16ce3e428ac607c60b371684481b0f;hb=64a8f3287cdf40f4abe69a6e361b9a153961a348;hp=097679acf1613772299f13aad369f79c0d16b75e;hpb=0a0199533c874640d913370b62c8a11b8a68bfa0;p=libucw.git diff --git a/lib/sorter/s-multiway.h b/lib/sorter/s-multiway.h index 097679ac..9295396f 100644 --- a/lib/sorter/s-multiway.h +++ b/lib/sorter/s-multiway.h @@ -7,44 +7,55 @@ * of the GNU Lesser General Public License. */ -typedef struct P(mwt_node) { - int i; +/* + * We use a binary tree to keep track of the current minimum. The tree is + * represented by an array (in the same way as binary heaps usually are), + * leaves correspond to input streams and each internal vertex remembers + * the leaf in its subtree, which has the lowest key. + */ + +typedef struct P(mwt) { + int i; // Minimum of the subtree #ifdef SORT_UNIFY - int eq; + int eq; // Did we encounter equality anywhere in the subtree? #endif -} P(mwt_node); +} P(mwt); -static inline void P(update_tree)(P(key) *keys, P(mwt_node) *tree, uns i) +static inline void P(update_tree)(P(key) *keys, P(mwt) *tree, uns i) { while (i /= 2) { - P(mwt_node) new; if (tree[2*i].i < 0) - new = tree[2*i+1]; + tree[i] = tree[2*i+1]; else if (tree[2*i+1].i < 0) - new = tree[2*i]; + tree[i] = tree[2*i]; else { int cmp = P(compare)(&keys[tree[2*i].i], &keys[tree[2*i+1].i]); - if (cmp <= 0) - new = tree[2*i]; - else - new = tree[2*i+1]; + tree[i] = (cmp <= 0) ? tree[2*i] : tree[2*i+1]; #ifdef SORT_UNIFY if (!cmp) - new.eq = 1; + tree[i].eq = 1; #endif } -#ifdef SORT_UNIFY_XXX // FIXME - /* If we are not unifying, the keys are usually distinct, so this check is pointless */ - if (!memcmp(&tree[i], &new, sizeof(new))) - break; -#endif - tree[i] = new; + /* + * It is very tempting to stop as soon as the current node does not + * change, but it is wrong, because even if the stream index stored in + * the tree is the same, the actual key value can differ. + */ } +#if defined(__GNUC__) && (__GNUC__ * 1000 + __GNUC_MINOR__ < 4002) + /* + * This function sometimes triggers optimizer bugs in GCC 4.1.1 and + * possibly also earlier versions, leading to an assumption that tree[1] + * does not change during this function. We add an explicit memory barrier + * as a work-around. Ugh. + */ + asm volatile ("" : : : "memory"); +#endif } -static inline void P(set_tree)(P(key) *keys, P(mwt_node) *tree, uns i, int val) +static inline void P(set_tree)(P(key) *keys, P(mwt) *tree, uns i, int val) { tree[i].i = val; P(update_tree)(keys, tree, i); @@ -63,14 +74,9 @@ static void P(multiway_merge)(struct sort_context *ctx UNUSED, struct sort_bucke struct fastbuf *fout = sbuck_write(out); struct fastbuf *fins[num_ins]; P(key) keys[num_ins]; - P(mwt_node) tree[2*n2]; // A complete binary tree, leaves are input streams, each internal vertex contains a minimum of its sons + P(mwt) tree[2*n2]; for (uns i=1; i<2*n2; i++) - { - tree[i].i = -1; -#ifdef SORT_UNIFY - tree[i].eq = 0; -#endif - } + tree[i] = (P(mwt)) { .i = -1 }; for (uns i=0; i= 0)) { int i = tree[1].i; - if (!tree[1].eq && 0) // FIXME: This does not work for some reason + if (!tree[1].eq) { /* The key is unique, so let's go through the fast path */ P(copy_data)(&keys[i], fins[i], fout);