From: Martin Mares Date: Fri, 2 Feb 2007 11:34:24 +0000 (+0100) Subject: Merge with git+ssh://cvs.ucw.cz/projects/sherlock/GIT/sherlock.git X-Git-Tag: holmes-import~506^2~13^2~179 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=5ff0a5b047624c8f215da9cd6c7182d609fcbd01;p=libucw.git Merge with git+ssh://cvs.ucw.cz/projects/sherlock/GIT/sherlock.git Changes in debug/sorter/retros.c merged manually. --- 5ff0a5b047624c8f215da9cd6c7182d609fcbd01 diff --cc debug/sorter/retros.c index 52d4690b,00000000..a97e5545 mode 100644,000000..100644 --- a/debug/sorter/retros.c +++ b/debug/sorter/retros.c @@@ -1,733 -1,0 +1,733 @@@ +/* + * An experiment with sorting algorithms + */ + +#include "sherlock/sherlock.h" +#include "lib/getopt.h" +#include "lib/md5.h" + +#include +#include +#include +#include +#include + +struct elt { + u32 key; + u32 ballast[3]; +}; + +static struct elt *ary, *alt, **ind, *array0, *array1; +static uns n = 10000000; +static u32 sum; + +static struct elt *alloc_elts(uns n) +{ + return big_alloc(n * sizeof(struct elt)); +} + +static void free_elts(struct elt *a, uns n) +{ + big_free(a, n * sizeof(struct elt)); +} + +static int comp(const void *x, const void *y) +{ + const struct elt *xx = x, *yy = y; + return (xx->key < yy->key) ? -1 : (xx->key > yy->key) ? 1 : 0; +} + +static int comp_ind(const void *x, const void *y) +{ + const struct elt * const *xx = x, * const *yy = y; + return comp(*xx, *yy); +} + +#define ASORT_PREFIX(x) as_##x +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) a[i].key +#define ASORT_SWAP(i,j) do { struct elt t=a[i]; a[i]=a[j]; a[j]=t; } while (0) +#define ASORT_EXTRA_ARGS , struct elt *a +#include "lib/arraysort.h" + +#define ASORT_PREFIX(x) asi_##x +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) ind[i]->key +#define ASORT_SWAP(i,j) do { struct elt *t=ind[i]; ind[i]=ind[j]; ind[j]=t; } while (0) +#include "lib/arraysort.h" + +static void r1_sort(void) +{ + struct elt *from = ary, *to = alt, *tmp; +#define BITS 8 + uns cnt[1 << BITS]; + for (uns sh=0; sh<32; sh+=BITS) + { + bzero(cnt, sizeof(cnt)); + for (uns i=0; i> sh) & ((1 << BITS) - 1)]++; + uns pos = 0; + for (uns i=0; i<(1<> sh) & ((1 << BITS) - 1)]++] = from[i]; + ASSERT(cnt[(1 << BITS)-1] == n); + tmp=from, from=to, to=tmp; + } + ary = from; +#undef BITS +} + +static void r1b_sort(void) +{ + struct elt *from = ary, *to = alt, *tmp; +#define BITS 8 + uns cnt[1 << BITS], cnt2[1 << BITS]; + for (uns sh=0; sh<32; sh+=BITS) + { + if (sh) + memcpy(cnt, cnt2, sizeof(cnt)); + else + { + bzero(cnt, sizeof(cnt)); + for (uns i=0; i> sh) & ((1 << BITS) - 1)]++; + } + uns pos = 0; + for (uns i=0; i<(1<> (sh + BITS)) & ((1 << BITS) - 1)]++; + to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i]; + } + ASSERT(cnt[(1 << BITS)-1] == n); + tmp=from, from=to, to=tmp; + } + ary = from; +#undef BITS +} + +static void r1c_sort(void) +{ + uns cnt[256]; + struct elt *ptrs[256], *x, *lim; + + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + cnt[x++->key & 255]++; + +#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } + + PTRS(alt); + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 8) & 255]++; + *ptrs[x->key & 255]++ = *x; + x++; + } + + PTRS(ary); + x = alt; lim = alt + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 16) & 255]++; + *ptrs[(x->key >> 8) & 255]++ = *x; + x++; + } + + PTRS(alt); + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 24) & 255]++; + *ptrs[(x->key >> 16) & 255]++ = *x; + x++; + } + + PTRS(ary); + x = alt; lim = alt + n; + while (x < lim) + { + *ptrs[(x->key >> 24) & 255]++ = *x; + x++; + } +#undef PTRS +} + +#include + +static inline void sse_copy_elt(struct elt *to, struct elt *from) +{ + __m128i m = _mm_load_si128((__m128i *) from); + _mm_store_si128((__m128i *) to, m); +} + +static void r1c_sse_sort(void) +{ + uns cnt[256]; + struct elt *ptrs[256], *x, *lim; + + ASSERT(sizeof(struct elt) == 16); - ASSERT(!((addr_int_t)alt & 15)); - ASSERT(!((addr_int_t)ary & 15)); ++ ASSERT(!((uintptr_t)alt & 15)); ++ ASSERT(!((uintptr_t)ary & 15)); + + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + cnt[x++->key & 255]++; + +#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } + + PTRS(alt); + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 8) & 255]++; + sse_copy_elt(ptrs[x->key & 255]++, x); + x++; + } + + PTRS(ary); + x = alt; lim = alt + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 16) & 255]++; + sse_copy_elt(ptrs[(x->key >> 8) & 255]++, x); + x++; + } + + PTRS(alt); + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 24) & 255]++; + sse_copy_elt(ptrs[(x->key >> 16) & 255]++, x); + x++; + } + + PTRS(ary); + x = alt; lim = alt + n; + while (x < lim) + { + sse_copy_elt(ptrs[(x->key >> 24) & 255]++, x); + x++; + } +#undef PTRS +} + +static void r1d_sort(void) +{ + uns cnt[256]; + struct elt *ptrs[256], *x, *y, *lim; + + ASSERT(!(n % 4)); + + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[x++->key & 255]++; + cnt[x++->key & 255]++; + cnt[x++->key & 255]++; + cnt[x++->key & 255]++; + } + +#define PTRS(start) x=start; for (uns i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } + + PTRS(alt); + x = ary; y = ary+n/2; lim = ary + n/2; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 8) & 255]++; + cnt[(y->key >> 8) & 255]++; + *ptrs[x->key & 255]++ = *x; + *ptrs[y->key & 255]++ = *y; + x++, y++; + cnt[(x->key >> 8) & 255]++; + cnt[(y->key >> 8) & 255]++; + *ptrs[x->key & 255]++ = *x; + *ptrs[y->key & 255]++ = *y; + x++, y++; + } + + PTRS(ary); + x = alt; lim = alt + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 16) & 255]++; + *ptrs[(x->key >> 8) & 255]++ = *x; + x++; + cnt[(x->key >> 16) & 255]++; + *ptrs[(x->key >> 8) & 255]++ = *x; + x++; + } + + PTRS(alt); + x = ary; lim = ary + n; + bzero(cnt, sizeof(cnt)); + while (x < lim) + { + cnt[(x->key >> 24) & 255]++; + *ptrs[(x->key >> 16) & 255]++ = *x; + x++; + cnt[(x->key >> 24) & 255]++; + *ptrs[(x->key >> 16) & 255]++ = *x; + x++; + } + + PTRS(ary); + x = alt; lim = alt + n; + while (x < lim) + { + *ptrs[(x->key >> 24) & 255]++ = *x; + x++; + *ptrs[(x->key >> 24) & 255]++ = *x; + x++; + } +#undef PTRS +} + +static void r2_sort(void) +{ + struct elt *from = ary, *to = alt; +#define BITS 14 + uns cnt[1 << BITS]; + bzero(cnt, sizeof(cnt)); + for (uns i=0; i> (32 - BITS)) & ((1 << BITS) - 1)]++; + uns pos = 0; + for (uns i=0; i<(1<> (32 - BITS)) & ((1 << BITS) - 1)]++] = from[i]; + ASSERT(cnt[(1 << BITS)-1] == n); + + pos = 0; + for (uns i=0; i<(1 << BITS); i++) + { + as_sort(cnt[i] - pos, alt+pos); + pos = cnt[i]; + } + ary = alt; +#undef BITS +} + +static void r3_sort(void) +{ +#define BITS 10 +#define LEVELS 2 +#define BUCKS (1 << BITS) +#define THRESHOLD 5000 +#define ODDEVEN 0 + + auto void r3(struct elt *from, struct elt *to, uns n, uns lev); + void r3(struct elt *from, struct elt *to, uns n, uns lev) + { + uns sh = 32 - lev*BITS; + uns cnt[BUCKS]; + bzero(cnt, sizeof(cnt)); + for (uns i=0; i> sh) & (BUCKS - 1)]++; + uns pos = 0; + for (uns i=0; i> sh) & (BUCKS - 1)]++] = from[i]; +#else + sse_copy_elt(&to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++], &from[i]); +#endif + pos = 0; + for (uns i=0; i= LEVELS || l <= THRESHOLD) + { + as_sort(l, to+pos); + if ((lev % 2) != ODDEVEN) + memcpy(from+pos, to+pos, l * sizeof(struct elt)); + } + else + r3(to+pos, from+pos, l, lev+1); + pos = cnt[i]; + } + } + + r3(ary, alt, n, 1); + if (ODDEVEN) + ary = alt; + +#undef ODDEVEN +#undef THRESHOLD +#undef BUCKS +#undef LEVELS +#undef BITS +} + +static inline struct elt *mrg(struct elt *x, struct elt *xl, struct elt *y, struct elt *yl, struct elt *z) +{ + for (;;) + { + if (x->key <= y->key) + { + *z++ = *x++; + if (x >= xl) + goto xend; + } + else + { + *z++ = *y++; + if (y >= yl) + goto yend; + } + } + + xend: + while (y < yl) + *z++ = *y++; + return z; + + yend: + while (x < xl) + *z++ = *x++; + return z; +} + +static void mergesort(void) +{ + struct elt *from, *to; + uns lev = 0; + if (1) + { + struct elt *x = ary, *z = alt, *last = ary + (n & ~1U); + while (x < last) + { + if (x[0].key < x[1].key) + *z++ = *x++, *z++ = *x++; + else + { + *z++ = x[1]; + *z++ = x[0]; + x += 2; + } + } + if (n % 2) + *z = *x; + lev++; + } + for (; (1U << lev) < n; lev++) + { + if (lev % 2) + from = alt, to = ary; + else + from = ary, to = alt; + struct elt *x, *z, *last; + x = from; + z = to; + last = from + n; + uns step = 1 << lev; + while (x + 2*step <= last) + { + z = mrg(x, x+step, x+step, x+2*step, z); + x += 2*step; + } + if (x + step < last) + mrg(x, x+step, x+step, last, z); + else + memcpy(z, x, (byte*)last - (byte*)x); + } + if (lev % 2) + ary = alt; +} + +static void sampsort(uns n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf) +{ +#define WAYS 256 + struct elt k[WAYS]; + uns cnt[WAYS]; + bzero(cnt, sizeof(cnt)); + for (uns i=0; i k[w+delta].key) w += delta + FW(128); + FW(64); + FW(32); + FW(16); + FW(8); + FW(4); + FW(2); + FW(1); + wbuf[i] = w; + cnt[w]++; + } + struct elt *y = al, *way[WAYS], *z; + for (uns i=0; i= 1000) + sampsort(cnt[i], y, z, dest, wbuf); + else + { + as_sort(cnt[i], y); + if (al != dest) + memcpy(z, y, cnt[i]*sizeof(struct elt)); + } + y += cnt[i]; + z += cnt[i]; + } +#undef FW +#undef WAYS +} + +static void samplesort(void) +{ + byte *aux = xmalloc(n); + sampsort(n, ary, alt, ary, aux); + xfree(aux); +} + +static void sampsort2(uns n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf) +{ +#define WAYS 256 + struct elt k[WAYS]; + uns cnt[WAYS]; + bzero(cnt, sizeof(cnt)); + for (uns i=0; ikey > k[w1+delta].key) w1 += delta +#define FW2(delta) if (k2->key > k[w2+delta].key) w2 += delta + FW1(128); FW2(128); + FW1(64); FW2(64); + FW1(32); FW2(32); + FW1(16); FW2(16); + FW1(8); FW2(8); + FW1(4); FW2(4); + FW1(2); FW2(2); + FW1(1); FW2(1); + *ww++ = w1; + *ww++ = w2; + cnt[w1]++; + cnt[w2]++; + k1 += 2; + k2 += 2; + } + if (k1 < kend) + { + uns w1 = 0; + FW1(128); FW1(64); FW1(32); FW1(16); + FW1(8); FW1(4); FW1(2); FW1(1); + *ww++ = w1; + cnt[w1]++; + } + struct elt *y = al, *way[WAYS], *z; + for (uns i=0; i= 1000) + sampsort2(cnt[i], y, z, dest, wbuf); + else + { + as_sort(cnt[i], y); + if (al != dest) + memcpy(z, y, cnt[i]*sizeof(struct elt)); + } + y += cnt[i]; + z += cnt[i]; + } +#undef FW1 +#undef FW2 +#undef WAYS +} + +static void samplesort2(void) +{ + byte *aux = xmalloc(n); + sampsort2(n, ary, alt, ary, aux); + xfree(aux); +} + +static void mk_ary(void) +{ + ary = array0; + alt = array1; + struct MD5Context ctx; + MD5Init(&ctx); + u32 block[16]; + bzero(block, sizeof(block)); + + sum = 0; + for (uns i=0; ikey; + for (uns i=1; ikey < ind[i-1]->key) + die("Missorted at %d", i); + else + s ^= ind[i]->key; + if (s != sum) + die("Corrupted"); + xfree(ind); +} + +int main(int argc, char **argv) +{ + log_init(argv[0]); + + int opt; + uns op = 0; + while ((opt = cf_getopt(argc, argv, CF_SHORT_OPTS "1", CF_NO_LONG_OPTS, NULL)) >= 0) + switch (opt) + { + case '1': + op |= (1 << (opt - '0')); + break; + default: + die("usage?"); + } + + array0 = alloc_elts(n); + array1 = alloc_elts(n); + for (uns i=0; i