From 7b52ebbcc1ef1faaeb666bf42d72604b6e84f84a Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Fri, 2 Feb 2007 23:27:02 +0100 Subject: [PATCH] Implemented SORT_UNIFY, SORT_UNIQUE and debugged SORT_VAR_DATA. Now the new sorter is equivalent to the old sorter (i.e., the same algorithms), but with new interface and standing on new foundations. I will write a simple test suite and then start filling the gaps with something interesting. --- lib/sorter/common.h | 2 +- lib/sorter/govern.c | 1 - lib/sorter/s-internal.h | 46 ++++++++++++++++++++++++++++++++++------- lib/sorter/s-twoway.h | 9 ++++---- lib/sorter/sort-test.c | 30 +++++++++++++++++++++++++-- lib/sorter/sorter.h | 9 ++++---- 6 files changed, 76 insertions(+), 21 deletions(-) diff --git a/lib/sorter/common.h b/lib/sorter/common.h index a17a809a..c0d2e4b2 100644 --- a/lib/sorter/common.h +++ b/lib/sorter/common.h @@ -57,7 +57,7 @@ struct sort_context { void *big_buf, *big_buf_half; size_t big_buf_size, big_buf_half_size; - int (*custom_presort)(struct fastbuf *dest, byte *buf, size_t bufsize); + int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize); // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket. // Return 1 if there is more data available in the source bucket. int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only); diff --git a/lib/sorter/govern.c b/lib/sorter/govern.c index 3606163c..d325a029 100644 --- a/lib/sorter/govern.c +++ b/lib/sorter/govern.c @@ -151,7 +151,6 @@ sorter_free_buf(struct sort_context *ctx) static int sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only) { - /* FIXME: Mode with no presorting (mostly for debugging) */ sorter_alloc_buf(ctx); if (in->flags & SBF_CUSTOM_PRESORT) { diff --git a/lib/sorter/s-internal.h b/lib/sorter/s-internal.h index 329c3b4a..7fd84bc8 100644 --- a/lib/sorter/s-internal.h +++ b/lib/sorter/s-internal.h @@ -19,6 +19,15 @@ typedef struct { #define ASORT_EXTRA_ARGS , P(internal_item_t) *ary #include "lib/arraysort.h" +static inline void *P(internal_get_data)(P(key) *key) +{ + uns ksize = SORT_KEY_SIZE(*key); +#ifdef SORT_UNIFY + ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN); +#endif + return (byte *) key + ksize; +} + static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct sort_bucket *bout, struct sort_bucket *bout_only) { sorter_alloc_buf(ctx); @@ -39,7 +48,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct if (sizeof(key) + 1024 + SORT_DATA_SIZE(key) > ctx->big_buf_half_size) { SORT_XTRACE("s-internal: Generating a giant run"); - struct fastbuf *out = sorter_open_write(bout); /* FIXME: Using a non-direct buffer would be nice here */ + struct fastbuf *out = sbuck_write(bout); /* FIXME: Using a non-direct buffer would be nice here */ P(copy_data)(&key, in, out); bout->runs++; return 1; // We don't know, but 1 is always safe @@ -88,18 +97,41 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct bout = bout_only; struct fastbuf *out = sbuck_write(bout); bout->runs++; - /* FIXME: No unification done yet */ + uns merged = 0; for (item = item_array; item < last_item; item++) { - P(write_key)(out, item->key); -#ifdef SORT_VAR_DATA - uns ksize = SORT_KEY_SIZE(*item->key); #ifdef SORT_UNIFY - ksize = ALIGN_TO(ksize, CPU_STRUCT_ALIGN); + if (item < last_item - 1 && !P(compare)(item->key, item[1].key)) + { + // Rewrite the item structures with just pointers to keys and place + // pointers to data in the secondary array. + P(key) **key_array = (void *) item; + void **data_array = (void **) ctx->big_buf_half; + key_array[0] = item[0].key; + data_array[0] = P(internal_get_data)(key_array[0]); + uns cnt; + for (cnt=1; item+cnt < last_item && !P(compare)(key_array[0], item[cnt].key); cnt++) + { + key_array[cnt] = item[cnt].key; + data_array[cnt] = P(internal_get_data)(key_array[cnt]); + } + P(write_merged)(out, key_array, data_array, cnt, data_array+cnt); + item += cnt - 1; + merged += cnt - 1; + continue; + } #endif - bwrite(out, (byte *) item->key + ksize, SORT_DATA_SIZE(*item->key)); +#ifdef SORT_ASSERT_UNIQUE + ASSERT(item == last_item-1 || P(compare)(item->key, item[1].key) < 0); +#endif + P(write_key)(out, item->key); +#ifdef SORT_VAR_DATA + bwrite(out, P(internal_get_data)(item->key), SORT_DATA_SIZE(*item->key)); #endif } +#ifdef SORT_UNIFY + SORT_XTRACE("Merging reduced %d records", merged); +#endif return ctx->more_keys; } diff --git a/lib/sorter/s-twoway.h b/lib/sorter/s-twoway.h index 51caf3d4..0e6d6442 100644 --- a/lib/sorter/s-twoway.h +++ b/lib/sorter/s-twoway.h @@ -58,6 +58,9 @@ static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket } run_count++; } +#ifdef SORT_ASSERT_UNIQUE + ASSERT(comp != 0); +#endif if (comp LESS 0) { P(copy_data)(kin1, fin1, fout1); @@ -66,7 +69,7 @@ static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket run1 = next1 && (P(compare)(kprev1, kin1) LESS 0); kout = kprev1; } -#ifdef SORT_MERGE +#ifdef SORT_UNIFY else if (comp == 0) { P(key) *mkeys[] = { kin1, kin2 }; @@ -80,10 +83,6 @@ static void P(twoway_merge)(struct sort_context *ctx UNUSED, struct sort_bucket run2 = next2 && (P(compare)(kprev2, kin2) LESS 0); kout = kprev2; } -#endif -#ifdef SORT_ASSERT_UNIQUE - else if (unlikely(comp == 0)) - ASSERT(0); #endif else { diff --git a/lib/sorter/sort-test.c b/lib/sorter/sort-test.c index c17333c3..9a4eb463 100644 --- a/lib/sorter/sort-test.c +++ b/lib/sorter/sort-test.c @@ -13,10 +13,28 @@ struct key { uns x; }; +static inline void s_write_merged(struct fastbuf *f, struct key **k, void **d, uns n, void *buf) +{ + bwrite(f, k[0], sizeof(struct key)); + bwrite(f, d[0], 5); +} + +static inline void s_copy_merged(struct key **keys, struct fastbuf **data, uns n, struct fastbuf *dest) +{ + bwrite(dest, keys[0], sizeof(struct key)); + bbcopy(data[0], dest, 5); + for (uns i=1; i