From 15a2b6d5ef43218fc66b95e4c24b152934feb77e Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Thu, 6 Sep 2007 21:27:31 +0200 Subject: [PATCH] More bits of the array sorter: radix-sort implemented. --- lib/sorter/Makefile | 2 +- lib/sorter/array.c | 81 +++++++++++++++++++++++++++++++++++++++++ lib/sorter/array.h | 58 ++++++++++++++++++++++++----- lib/sorter/common.h | 18 +++++++++ lib/sorter/s-fixint.h | 7 +++- lib/sorter/s-internal.h | 4 +- lib/sorter/sorter.h | 10 +++-- 7 files changed, 161 insertions(+), 19 deletions(-) create mode 100644 lib/sorter/array.c diff --git a/lib/sorter/Makefile b/lib/sorter/Makefile index 6a0c4f94..7dfa34de 100644 --- a/lib/sorter/Makefile +++ b/lib/sorter/Makefile @@ -2,7 +2,7 @@ DIRS+=lib/sorter -LIBUCW_MODS+=sorter/config sorter/govern sorter/sbuck +LIBUCW_MODS+=$(addprefix sorter/, config govern sbuck array) PROGS+=$(o)/lib/sorter/sort-test $(o)/lib/sorter/old-test $(o)/lib/sorter/sort-test: $(o)/lib/sorter/sort-test.o $(LIBUCW) diff --git a/lib/sorter/array.c b/lib/sorter/array.c new file mode 100644 index 00000000..65a11921 --- /dev/null +++ b/lib/sorter/array.c @@ -0,0 +1,81 @@ +/* + * UCW Library -- Optimized Array Sorter + * + * (c) 2003--2007 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#define LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/sorter/common.h" + +#include + +#define ASORT_MIN_RADIX 5000 // FIXME: var? +#define ASORT_MIN_SHIFT 2 + +static void +asort_radix(struct asort_context *ctx, void *array, void *buffer, uns num_elts, uns hash_bits, uns swapped_output) +{ + uns buckets = (1 << ctx->radix_bits); + uns shift = (hash_bits > ctx->radix_bits) ? (hash_bits - ctx->radix_bits) : 0; + uns cnt[buckets]; + + DBG(">>> n=%d h=%d s=%d sw=%d", num_elts, hash_bits, shift, swapped_output); + + bzero(cnt, sizeof(cnt)); + ctx->radix_count(array, num_elts, cnt, shift); + + uns pos = 0; + for (uns i=0; iradix_split(array, buffer, num_elts, cnt, shift); + pos = 0; + for (uns i=0; iquicksort(buffer, n); + if (!swapped_output) + memcpy(array, buffer, n * ctx->elt_size); + } + else + asort_radix(ctx, buffer, array, n, shift, !swapped_output); + array += n * ctx->elt_size; + buffer += n * ctx->elt_size; + pos = cnt[i]; + } +} + +void +asort_run(struct asort_context *ctx) +{ + SORT_XTRACE(10, "Array-sorting %d items per %d bytes, hash_bits=%d", ctx->num_elts, ctx->elt_size, ctx->hash_bits); + + if (ctx->num_elts < ASORT_MIN_RADIX || + ctx->hash_bits <= ASORT_MIN_SHIFT || + !ctx->radix_split || + (sorter_debug & SORT_DEBUG_ASORT_NO_RADIX)) + { + SORT_XTRACE(12, "Decided to use direct quicksort"); + ctx->quicksort(ctx->array, ctx->num_elts); + } + else + { + SORT_XTRACE(12, "Decided to use radix-sort"); + // FIXME: select dest buffer + asort_radix(ctx, ctx->array, ctx->buffer, ctx->num_elts, ctx->hash_bits, 0); + } + + SORT_XTRACE(11, "Array-sort finished"); +} diff --git a/lib/sorter/array.h b/lib/sorter/array.h index 73da6908..1370baa5 100644 --- a/lib/sorter/array.h +++ b/lib/sorter/array.h @@ -22,8 +22,9 @@ * ASORT_KEY_TYPE [*] data type of a single array entry key * ASORT_LT(x,y) x < y for ASORT_TYPE (default: "x> shift) & ASORT_RADIX_MASK ] ++; +} + +static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift) +{ + Q(key) *src = src_ptr, *dest = dest_ptr; + for (uns i=0; i> shift) & ASORT_RADIX_MASK ]++ ] = src[i]; +} + +#endif + static Q(key) *Q(sort)(Q(key) *array, uns num_elts, Q(key) *buffer, uns hash_bits) { - (void) buffer; - (void) hash_bits; - Q(raw_sort)(array, num_elts); - return array; + struct asort_context ctx = { + .array = array, + .buffer = buffer, + .num_elts = num_elts, + .hash_bits = hash_bits, + .elt_size = sizeof(Q(key)), + .quicksort = Q(quicksort), +#ifdef ASORT_HASH + .radix_count = Q(radix_count), + .radix_split = Q(radix_split), + .radix_bits = ASORT_RADIX_BITS, +#endif + }; + asort_run(&ctx); + return ctx.array; } /* FIXME */ @@ -177,4 +213,6 @@ static Q(key) *Q(sort)(Q(key) *array, uns num_elts, Q(key) *buffer, uns hash_bit #undef ASORT_THRESHOLD #undef ASORT_PAGE_ALIGNED #undef ASORT_HASH +#undef ASORT_RADIX_BITS +#undef ASORT_RADIX_MASK #undef Q diff --git a/lib/sorter/common.h b/lib/sorter/common.h index 70e10c08..b8438ff7 100644 --- a/lib/sorter/common.h +++ b/lib/sorter/common.h @@ -28,6 +28,8 @@ enum sort_debug { SORT_DEBUG_KEEP_BUCKETS = 4, SORT_DEBUG_NO_RADIX = 8, SORT_DEBUG_NO_MULTIWAY = 16, + SORT_DEBUG_ASORT_NO_RADIX = 32, + SORT_DEBUG_ASORT_NO_THREADS = 64 }; struct sort_bucket; @@ -114,4 +116,20 @@ struct fastbuf *sbuck_read(struct sort_bucket *b); struct fastbuf *sbuck_write(struct sort_bucket *b); void sbuck_swap_out(struct sort_bucket *b); +/* Contexts and helper functions for the array sorter */ + +struct asort_context { + void *array; // Array to sort + void *buffer; // Auxiliary buffer (required when radix-sorting) + uns num_elts; // Number of elements in the array + uns elt_size; // Bytes per element + uns hash_bits; // Remaining bits of hash function + uns radix_bits; // How many bits to process in a single radix-sort pass + void (*quicksort)(void *array_ptr, uns num_elts); + void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift); + void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift); +}; + +void asort_run(struct asort_context *ctx); + #endif diff --git a/lib/sorter/s-fixint.h b/lib/sorter/s-fixint.h index 6eac13bb..7aafa7de 100644 --- a/lib/sorter/s-fixint.h +++ b/lib/sorter/s-fixint.h @@ -13,6 +13,9 @@ #define ASORT_KEY_TYPE P(key) #define ASORT_LT(x,y) (P(compare)(&(x), &(y)) < 0) #define ASORT_PAGE_ALIGNED +#ifdef SORT_INTERNAL_RADIX +#define ASORT_HASH(x) P(hash)(&(x)) +#endif #include "lib/sorter/array.h" /* @@ -30,7 +33,7 @@ static size_t P(internal_workspace)(void) #ifdef SORT_UNIFY workspace = sizeof(P(key) *); #endif -#ifdef SORT_HASH_BITS // FIXME: Another switch? +#ifdef SORT_INTERNAL_RADIX workspace = MAX(workspace, sizeof(P(key))); #endif return workspace; @@ -68,7 +71,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct timestamp_t timer; init_timer(&timer); buf = P(array_sort)(buf, n, -#ifdef SORT_HASH_BITS +#ifdef SORT_INTERNAL_RADIX workspace, bin->hash_bits #else NULL, 0 diff --git a/lib/sorter/s-internal.h b/lib/sorter/s-internal.h index 92a3ad64..85557f12 100644 --- a/lib/sorter/s-internal.h +++ b/lib/sorter/s-internal.h @@ -65,7 +65,7 @@ static inline size_t P(internal_workspace)(P(key) *key UNUSED) #ifdef SORT_UNIFY_WORKSPACE ws += SORT_UNIFY_WORKSPACE(*key); #endif -#ifdef SORT_HASH_BITS +#ifdef SORT_INTERNAL_RADIX ws = MAX(ws, sizeof(P(internal_item_t))); #endif return ws; @@ -146,7 +146,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct timestamp_t timer; init_timer(&timer); item_array = P(array_sort)(item_array, count, -#ifdef SORT_HASH_BITS +#ifdef SORT_INTERNAL_RADIX workspace, bin->hash_bits #else NULL, 0 diff --git a/lib/sorter/sorter.h b/lib/sorter/sorter.h index 675a3d5b..5a009fab 100644 --- a/lib/sorter/sorter.h +++ b/lib/sorter/sorter.h @@ -193,6 +193,11 @@ static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, u } #endif +#if defined(SORT_HASH_BITS) || defined(SORT_INT) +#define SORT_INTERNAL_RADIX +#include "lib/sorter/s-radix.h" +#endif + #if defined(SORT_VAR_KEY) || defined(SORT_VAR_DATA) || defined(SORT_UNIFY_WORKSPACE) #include "lib/sorter/s-internal.h" #else @@ -202,10 +207,6 @@ static inline void P(copy_merged)(P(key) **keys, struct fastbuf **data UNUSED, u #include "lib/sorter/s-twoway.h" #include "lib/sorter/s-multiway.h" -#if defined(SORT_HASH_BITS) || defined(SORT_INT) -#include "lib/sorter/s-radix.h" -#endif - static struct fastbuf *P(sort)( #ifdef SORT_INPUT_FILE byte *in, @@ -301,6 +302,7 @@ static struct fastbuf *P(sort)( #undef SORT_UNIQUE #undef SORT_ASSERT_UNIQUE #undef SORT_DELETE_INPUT +#undef SORT_INTERNAL_RADIX #undef SWAP #undef LESS #undef P -- 2.39.2