2 * UCW Library -- Testing the Sorter
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include <ucw/getopt.h>
13 #include <ucw/fastbuf.h>
14 #include <ucw/ff-binary.h>
15 #include <ucw/hashfunc.h>
17 #include <ucw/string.h>
18 #include <ucw/prime.h>
27 /*** A hack for overriding radix-sorter configuration ***/
29 #ifdef FORCE_RADIX_BITS
30 #undef CONFIG_UCW_RADIX_SORTER_BITS
31 #define CONFIG_UCW_RADIX_SORTER_BITS FORCE_RADIX_BITS
34 /*** Time measurement ***/
36 static timestamp_t timer;
50 msg(L_INFO, "Test %d took %.3fs", test_id, get_timer(&timer) / 1000.);
53 /*** Simple 4-byte integer keys ***/
59 #define SORT_KEY_REGULAR struct key1
60 #define SORT_PREFIX(x) s1_##x
62 #define SORT_OUTPUT_FB
64 #define SORT_INT(k) (k).x
65 #define SORT_DELETE_INPUT 0
67 #include <ucw/sorter/sorter.h>
70 test_int(int mode, u64 size)
72 uint N = size ? nextprime(MIN(size/4, 0xffff0000)) : 0;
74 msg(L_INFO, ">>> Integers (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
76 struct fastbuf *f = bopen_tmp(65536);
77 for (uint i=0; i<N; i++)
78 bputl(f, (mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N);
82 f = s1_sort(f, NULL, N-1);
85 SORT_XTRACE(2, "Verifying");
86 for (uint i=0; i<N; i++)
90 die("Discrepancy: %u instead of %u", j, i);
95 /*** Integers with merging, but no data ***/
102 static inline void s2_write_merged(struct fastbuf *f, struct key2 **k, void **d UNUSED, uint n, void *buf UNUSED)
104 for (uint i=1; i<n; i++)
105 k[0]->cnt += k[i]->cnt;
106 bwrite(f, k[0], sizeof(struct key2));
109 #define SORT_KEY_REGULAR struct key2
110 #define SORT_PREFIX(x) s2_##x
111 #define SORT_INPUT_FB
112 #define SORT_OUTPUT_FB
114 #define SORT_INT(k) (k).x
116 #include <ucw/sorter/sorter.h>
119 test_counted(int mode, u64 size)
121 u64 items = size / sizeof(struct key2);
123 while (items/(2*mult) > 0xffff0000)
125 uint N = items ? nextprime(items/(2*mult)) : 0;
127 msg(L_INFO, ">>> Counted integers (%s, N=%u, mult=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N, mult);
129 struct fastbuf *f = bopen_tmp(65536);
130 for (uint m=0; m<mult; m++)
131 for (uint i=0; i<N; i++)
132 for (uint j=0; j<2; j++)
134 bputl(f, (mode==0) ? (i%N) : (mode==1) ? N-1-(i%N) : ((u64)i * K + 17) % N);
140 f = s2_sort(f, NULL, N-1);
143 SORT_XTRACE(2, "Verifying");
144 for (uint i=0; i<N; i++)
148 die("Discrepancy: %u instead of %u", j, i);
151 die("Discrepancy: %u has count %u instead of %u", j, k, 2*mult);
156 /*** Longer records with hashes (similar to Shepherd's index records) ***/
164 static inline int s3_compare(struct key3 *x, struct key3 *y)
166 COMPARE(x->hash[0], y->hash[0]);
167 COMPARE(x->hash[1], y->hash[1]);
168 COMPARE(x->hash[2], y->hash[2]);
169 COMPARE(x->hash[3], y->hash[3]);
173 static inline uint s3_hash(struct key3 *x)
178 #define SORT_KEY_REGULAR struct key3
179 #define SORT_PREFIX(x) s3_##x
180 #define SORT_INPUT_FB
181 #define SORT_OUTPUT_FB
182 #define SORT_HASH_BITS 32
184 #include <ucw/sorter/sorter.h>
187 gen_hash_key(int mode, struct key3 *k, uint i)
190 k->payload[0] = 7*i + 13;
191 k->payload[1] = 13*i + 19;
192 k->payload[2] = 19*i + 7;
197 k->hash[1] = k->payload[0];
198 k->hash[2] = k->payload[1];
199 k->hash[3] = k->payload[2];
203 k->hash[1] = k->payload[0];
204 k->hash[2] = k->payload[1];
205 k->hash[3] = k->payload[2];
208 md5_hash_buffer((byte *) &k->hash, (byte *) &k->i, 4);
214 test_hashes(int mode, u64 size)
216 uint N = MIN(size / sizeof(struct key3), 0xffffffff);
217 msg(L_INFO, ">>> Hashes (%s, N=%u)", ((char *[]) { "increasing", "decreasing", "random" })[mode], N);
218 struct key3 k, lastk;
220 struct fastbuf *f = bopen_tmp(65536);
222 for (uint i=0; i<N; i++)
224 gen_hash_key(mode, &k, i);
225 hash_sum += k.hash[3];
226 bwrite(f, &k, sizeof(k));
231 f = s3_sort(f, NULL);
234 SORT_XTRACE(2, "Verifying");
235 for (uint i=0; i<N; i++)
237 int ok = breadb(f, &k, sizeof(k));
239 if (i && s3_compare(&k, &lastk) <= 0)
241 gen_hash_key(mode, &lastk, k.i);
242 if (memcmp(&k, &lastk, sizeof(k)))
244 hash_sum -= k.hash[3];
250 /*** Variable-length records (strings) with and without var-length data ***/
259 static inline int s4_compare(struct key4 *x, struct key4 *y)
261 uint l = MIN(x->len, y->len);
262 int c = memcmp(x->s, y->s, l);
265 COMPARE(x->len, y->len);
269 static inline int s4_read_key(struct fastbuf *f, struct key4 *x)
272 if (x->len == 0xffffffff)
274 ASSERT(x->len < KEY4_MAX);
275 breadb(f, x->s, x->len);
279 static inline void s4_write_key(struct fastbuf *f, struct key4 *x)
281 ASSERT(x->len < KEY4_MAX);
283 bwrite(f, x->s, x->len);
286 #define SORT_KEY struct key4
287 #define SORT_PREFIX(x) s4_##x
288 #define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
289 #define SORT_INPUT_FB
290 #define SORT_OUTPUT_FB
292 #include <ucw/sorter/sorter.h>
294 #define s4b_compare s4_compare
295 #define s4b_read_key s4_read_key
296 #define s4b_write_key s4_write_key
298 static inline uint s4_data_size(struct key4 *x)
300 return x->len ? (x->s[0] ^ 0xad) : 0;
303 #define SORT_KEY struct key4
304 #define SORT_PREFIX(x) s4b_##x
305 #define SORT_KEY_SIZE(x) (sizeof(struct key4) - KEY4_MAX + (x).len)
306 #define SORT_DATA_SIZE(x) s4_data_size(&(x))
307 #define SORT_INPUT_FB
308 #define SORT_OUTPUT_FB
310 #include <ucw/sorter/sorter.h>
313 gen_key4(struct key4 *k)
315 k->len = random_max(KEY4_MAX);
316 for (uint i=0; i<k->len; i++)
321 gen_data4(byte *buf, uint len, uint h)
331 test_strings(uint mode, u64 size)
333 uint avg_item_size = KEY4_MAX/2 + 4 + (mode ? 128 : 0);
334 uint N = MIN(size / avg_item_size, 0xffffffff);
335 msg(L_INFO, ">>> Strings %s(N=%u)", (mode ? "with data " : ""), N);
338 struct key4 k, lastk;
339 byte buf[256], buf2[256];
342 struct fastbuf *f = bopen_tmp(65536);
343 for (uint i=0; i<N; i++)
347 uint h = hash_block(k.s, k.len);
351 gen_data4(buf, s4_data_size(&k), h);
352 bwrite(f, buf, s4_data_size(&k));
358 f = (mode ? s4b_sort : s4_sort)(f, NULL);
361 SORT_XTRACE(2, "Verifying");
362 for (uint i=0; i<N; i++)
364 int ok = s4_read_key(f, &k);
366 uint h = hash_block(k.s, k.len);
367 if (mode && s4_data_size(&k))
369 ok = breadb(f, buf, s4_data_size(&k));
371 gen_data4(buf2, s4_data_size(&k), h);
372 ASSERT(!memcmp(buf, buf2, s4_data_size(&k)));
374 if (i && s4_compare(&k, &lastk) < 0)
383 /*** Graph-like structure with custom presorting ***/
390 static uint s5_N, s5_K, s5_L, s5_i, s5_j;
396 static int s5_gen(struct s5_pair *p)
400 if (!s5_N || s5_i >= s5_N-1)
405 p->x = ((u64)s5_j * s5_K) % s5_N;
406 p->y = ((u64)(s5_i + s5_j) * s5_L) % s5_N;
411 #define ASORT_PREFIX(x) s5m_##x
412 #define ASORT_KEY_TYPE u32
413 #include <ucw/sorter/array-simple.h>
415 static void s5_write_merged(struct fastbuf *f, struct key5 **keys, void **data, uint n, void *buf)
419 for (uint i=0; i<n; i++)
421 memcpy(&a[m], data[i], 4*keys[i]->cnt);
426 bwrite(f, keys[0], sizeof(struct key5));
430 static void s5_copy_merged(struct key5 **keys, struct fastbuf **data, uint n, struct fastbuf *dest)
434 for (uint i=0; i<n; i++)
436 k[i] = bgetl(data[i]);
439 struct key5 key = { .x = keys[0]->x, .cnt = m };
440 bwrite(dest, &key, sizeof(key));
444 for (uint i=1; i<n; i++)
449 k[b] = bgetl(data[b]);
455 static inline int s5p_lt(struct s5_pair x, struct s5_pair y)
457 COMPARE_LT(x.x, y.x);
458 COMPARE_LT(x.y, y.y);
462 #define ASORT_PREFIX(x) s5p_##x
463 #define ASORT_KEY_TYPE struct s5_pair
464 #define ASORT_LT(x,y) s5p_lt(x,y)
465 #include <ucw/sorter/array.h>
467 static int s5_presort(struct fastbuf *dest, void *buf, size_t bufsize)
469 uint max = MIN(bufsize/sizeof(struct s5_pair), 0xffffffff);
470 struct s5_pair *a = buf;
472 while (n<max && s5_gen(&a[n]))
481 while (i < n && a[i].x == a[j].x)
483 struct key5 k = { .x = a[j].x, .cnt = i-j };
484 bwrite(dest, &k, sizeof(k));
486 bputl(dest, a[j++].y);
491 #define SORT_KEY_REGULAR struct key5
492 #define SORT_PREFIX(x) s5_##x
493 #define SORT_DATA_SIZE(k) (4*(k).cnt)
495 #define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
496 #define SORT_INPUT_PRESORT
497 #define SORT_OUTPUT_THIS_FB
498 #define SORT_INT(k) (k).x
500 #include <ucw/sorter/sorter.h>
502 #define SORT_KEY_REGULAR struct key5
503 #define SORT_PREFIX(x) s5b_##x
504 #define SORT_DATA_SIZE(k) (4*(k).cnt)
506 #define SORT_UNIFY_WORKSPACE(k) SORT_DATA_SIZE(k)
507 #define SORT_INPUT_FB
508 #define SORT_OUTPUT_THIS_FB
509 #define SORT_INT(k) (k).x
510 #define s5b_write_merged s5_write_merged
511 #define s5b_copy_merged s5_copy_merged
513 #include <ucw/sorter/sorter.h>
516 test_graph(uint mode, u64 size)
519 while ((u64)N*(N+2)*4 < size)
523 msg(L_INFO, ">>> Graph%s (N=%u)", (mode ? "" : " with custom presorting"), N);
529 struct fastbuf *in = NULL;
533 in = bopen_tmp(65536);
536 struct key5 k = { .x = p.x, .cnt = 1 };
537 bwrite(in, &k, sizeof(k));
544 struct fastbuf *f = bopen_tmp(65536);
545 bputl(f, 0xfeedcafe);
546 struct fastbuf *g = (mode ? s5b_sort(in, f, s5_N-1) : s5_sort(NULL, f, s5_N-1));
550 SORT_XTRACE(2, "Verifying");
552 ASSERT(c == 0xfeedcafe);
553 for (uint i=0; i<N; i++)
556 int ok = breadb(f, &k, sizeof(k));
560 for (uint j=0; j<N; j++)
569 /*** Simple 8-byte integer keys ***/
575 #define SORT_KEY_REGULAR struct key6
576 #define SORT_PREFIX(x) s6_##x
577 #define SORT_INPUT_FB
578 #define SORT_OUTPUT_FB
580 #define SORT_INT64(k) (k).x
582 #include <ucw/sorter/sorter.h>
585 test_int64(int mode, u64 size)
587 u64 N = size ? nextprime(MIN(size/8, 0xffff0000)) : 0;
589 msg(L_INFO, ">>> 64-bit integers (%s, N=%llu)", ((char *[]) { "increasing", "decreasing", "random" })[mode], (long long)N);
591 struct fastbuf *f = bopen_tmp(65536);
592 for (u64 i=0; i<N; i++)
593 bputq(f, 777777*((mode==0) ? i : (mode==1) ? N-1-i : ((u64)i * K + 17) % N));
597 f = s6_sort(f, NULL, 777777*(N-1));
600 SORT_XTRACE(2, "Verifying");
601 for (u64 i=0; i<N; i++)
605 die("Discrepancy: %llu instead of %llu", (long long)j, 777777*(long long)i);
613 run_test(uint i, u64 size)
619 test_int(0, size); break;
621 test_int(1, size); break;
623 test_int(2, size); break;
625 test_counted(0, size); break;
627 test_counted(1, size); break;
629 test_counted(2, size); break;
631 test_hashes(0, size); break;
633 test_hashes(1, size); break;
635 test_hashes(2, size); break;
637 test_strings(0, size); break;
639 test_strings(1, size); break;
641 test_graph(0, size); break;
643 test_graph(1, size); break;
645 test_int64(0, size); break;
647 test_int64(1, size); break;
649 test_int64(2, size); break;
655 main(int argc, char **argv)
662 while ((c = cf_getopt(argc, argv, CF_SHORT_OPTS "d:s:t:v", CF_NO_LONG_OPTS, NULL)) >= 0)
666 sorter_debug = atol(optarg);
669 if (cf_parse_u64(optarg, &size))
675 int f = str_sepsplit(optarg, ',', w, ARRAY_SIZE(w));
679 for (int i=0; i<f; i++)
693 fputs("Usage: sort-test [-v] [-d <debug>] [-s <size>] [-t <test>]\n", stderr);
699 for (uint i=0; i<TMAX; i++)