From: Michal Vaner Date: Sun, 14 Dec 2008 13:29:29 +0000 (+0100) Subject: ucw docs: Array sorter X-Git-Tag: holmes-import~124 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=c8c688e0b06fdacb20b28b66b9b0608dae8ca0dc;p=libucw.git ucw docs: Array sorter --- diff --git a/ucw/doc/sort.txt b/ucw/doc/sort.txt index 0fbc1d2b..92b03ee7 100644 --- a/ucw/doc/sort.txt +++ b/ucw/doc/sort.txt @@ -13,7 +13,9 @@ All routines described below are <>. * <> * <> * <> -- <> +- <> + * <> + * <> - <> [[array-simple]] @@ -24,19 +26,21 @@ If you want to sort some data in memory and you aren't too picky about setting how, you just use the routine defined in `sorter/array-simple.h`. It is an optimised hybrid quick-sort/insert-sort algorithm (quick-sort is used to split the -input into small parts, each is then sorted by insert-sort). +input into small parts, each is then sorted by insert-sort). It is +more than 2 times faster than stdlib's qsort(), mostly because of +inlining. You need to define few macros and include the header. You get a sorting function in return. It will be called <>. -[mandatory-simple] +[[mandatory-simple]] Mandatory macros ~~~~~~~~~~~~~~~~ - `ASORT_PREFIX(name)` -- The identifier generating macro. - `ASORT_KEY_TYPE` -- Data type of a single array entry key. -[optional-simple] +[[optional-simple]] Optional macros ~~~~~~~~~~~~~~~ - `ASORT_ELT(i)` -- Indexing macro. Returns the key of the @@ -54,7 +58,7 @@ Optional macros !!ucw/sorter/array-simple.h ASORT_PREFIX -[example-simple] +[[example-simple]] Example ~~~~~~~ @@ -87,3 +91,46 @@ to sort them by the strings. Now we got a complicated_sort(uns array_size, struct elem *odd_array, struct *even_array) function to perform our sorting. + +[[array]] +Huge array sorting +------------------ + +This one is very similar to the simple array sorter, but it is +optimised for huge arrays. It is used mostly by the +<> machinery described below, but you can +use it directly. + +It differs in few details: +- It supports only continuous arrays, no indexing macro can be + provided. +- It is able to sort in parallel on SMP systems. It assumes all + callbacks you provide are thread-safe. +- If you provide a monotone hash function (if `hash(x) < hash(y)`, then + `x < y`, but `x` and `y` may differ when `hash(x) == hash(y)`), it + will use it to gain some more speed by radix-sort. + +[[mandatory-array]] +Mandatory macros +~~~~~~~~~~~~~~~~ + +- `ASORT_PREFIX(x)` -- The identifier generating macro. +- `ASORT_KEY_TYPE` -- Type of elements in the array. + +[[optional-array]] +Optional macros +~~~~~~~~~~~~~~~ + +- `ASORT_LT(x,y)` -- Comparing macro. Uses the `<` operator if not + provided. +- `ASORT_HASH(x)` -- A monotone hash function (or macro). Should + return `uns`. +- `ASORT_LONG_HASH(x)` -- Like `ASORT_HASH(x)`, but returns 64-bit + number instead of 32-bit. +- `ASORT_TRESHOLD` -- How small should a chunk of data be to be sorted + by insert-sort? Defaults to `8` elements. +- `ASORT_RADIX_BITS` -- How many bits of the hash function should be + used at once for radix-sort? The default is guessed from your + architecture. + +!!ucw/sorter/array.h ASORT_PREFIX diff --git a/ucw/sorter/array.h b/ucw/sorter/array.h index 4e64ace8..e6e0199d 100644 --- a/ucw/sorter/array.h +++ b/ucw/sorter/array.h @@ -284,11 +284,25 @@ static void Q(radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptr #endif -static Q(key) *Q(sort)(Q(key) *array, uns num_elts #ifdef ASORT_HASH - , Q(key) *buffer, uns hash_bits +#define ASORT_HASH_ARGS , Q(key) *buffer, uns hash_bits +#else +#define ASORT_HASH_ARGS #endif - ) + +/** + * The generated function. The @array is the data to be sorted, @num_elts tells + * how many elements the array has. If you did not provide `ASORT_HASH`, then + * the `ASORT_HASH_ARGS` is empty (there are only the two parameters in that + * case). When you provide it, the function gains two more parameters in the + * `ASORT_HASH_ARGS` macro. They are `ASORT_KEY_TYPE *@buffer`, which must be a + * memory buffer of the same size as the input array, and `uns @hash_bits`, + * specifying how many significant bits the hash function returns. + * + * The function returns pointer to the sorted data, either the @array or the + * @buffer argument. + **/ +static ASORT_KEY_TYPE *ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, uns num_elts ASORT_HASH_ARGS) { struct asort_context ctx = { .array = array, @@ -318,4 +332,5 @@ static Q(key) *Q(sort)(Q(key) *array, uns num_elts #undef ASORT_RADIX_MASK #undef ASORT_SWAP #undef ASORT_THRESHOLD +#undef ASORT_HASH_ARGS #undef Q