2 * UCW Library -- Universal Sorter: Common Declarations
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
10 #ifndef _UCW_SORTER_COMMON_H
11 #define _UCW_SORTER_COMMON_H
13 #include "lib/clists.h"
15 /* Configuration, some of the variables are used by the old sorter, too. */
16 extern uns sorter_trace, sorter_presort_bufsize, sorter_stream_bufsize;
17 extern uns sorter_debug, sorter_min_radix_bits, sorter_max_radix_bits;
18 extern uns sorter_min_multiway_bits, sorter_max_multiway_bits;
19 extern uns sorter_threads, sorter_thread_threshold;
20 extern u64 sorter_bufsize;
21 extern struct fb_params sorter_fb_params;
23 #define SORT_TRACE(x...) do { if (sorter_trace) msg(L_DEBUG, x); } while(0)
24 #define SORT_XTRACE(level, x...) do { if (sorter_trace >= level) msg(L_DEBUG, x); } while(0)
27 SORT_DEBUG_NO_PRESORT = 1,
28 SORT_DEBUG_NO_JOIN = 2,
29 SORT_DEBUG_KEEP_BUCKETS = 4,
30 SORT_DEBUG_NO_RADIX = 8,
31 SORT_DEBUG_NO_MULTIWAY = 16,
32 SORT_DEBUG_ASORT_NO_RADIX = 32,
33 SORT_DEBUG_ASORT_NO_THREADS = 64
39 struct fastbuf *in_fb;
40 struct fastbuf *out_fb;
49 int (*custom_presort)(struct fastbuf *dest, void *buf, size_t bufsize);
51 // Take as much as possible from the source bucket, sort it in memory and dump to destination bucket.
52 // Return 1 if there is more data available in the source bucket.
53 int (*internal_sort)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only);
55 // Estimate how much input data from `b' will fit in the internal sorting buffer.
56 u64 (*internal_estimate)(struct sort_context *ctx, struct sort_bucket *b);
58 // Two-way split/merge: merge up to 2 source buckets to up to 2 destination buckets.
59 // Bucket arrays are NULL-terminated.
60 void (*twoway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket **outs);
62 // Multi-way merge: merge an arbitrary number of source buckets to a single destination bucket.
63 void (*multiway_merge)(struct sort_context *ctx, struct sort_bucket **ins, struct sort_bucket *out);
65 // Radix split according to hash function
66 void (*radix_split)(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket **outs, uns bitpos, uns numbits);
68 // State variables of internal_sort
73 timestamp_t start_time;
75 uns total_int_time, total_pre_time, total_ext_time;
78 void sorter_run(struct sort_context *ctx);
82 void *sorter_alloc(struct sort_context *ctx, uns size);
83 void sorter_prepare_buf(struct sort_context *ctx);
84 void sorter_alloc_buf(struct sort_context *ctx);
85 void sorter_free_buf(struct sort_context *ctx);
91 struct sort_context *ctx;
95 u64 size; // Size in bytes (not valid when writing)
96 uns runs; // Number of runs, 0 if not sorted
97 uns hash_bits; // Remaining bits of the hash function
98 byte *ident; // Identifier used in debug messages
101 enum sort_bucket_flags {
102 SBF_FINAL = 1, // This bucket corresponds to the final output file (always 1 run)
103 SBF_SOURCE = 2, // Contains the source file (always 0 runs)
104 SBF_CUSTOM_PRESORT = 4, // Contains source to read via custom presorter
105 SBF_OPEN_WRITE = 256, // We are currently writing to the fastbuf
106 SBF_OPEN_READ = 512, // We are reading from the fastbuf
107 SBF_DESTROYED = 1024, // Already done with, no further references allowed
108 SBF_SWAPPED_OUT = 2048, // Swapped out to a named file
111 struct sort_bucket *sbuck_new(struct sort_context *ctx);
112 void sbuck_drop(struct sort_bucket *b);
113 int sbuck_have(struct sort_bucket *b);
114 int sbuck_has_file(struct sort_bucket *b);
115 sh_off_t sbuck_size(struct sort_bucket *b);
116 struct fastbuf *sbuck_read(struct sort_bucket *b);
117 struct fastbuf *sbuck_write(struct sort_bucket *b);
118 void sbuck_swap_out(struct sort_bucket *b);
120 /* Contexts and helper functions for the array sorter */
122 struct asort_context {
123 void *array; // Array to sort
124 void *buffer; // Auxiliary buffer (required when radix-sorting)
125 uns num_elts; // Number of elements in the array
126 uns elt_size; // Bytes per element
127 uns hash_bits; // Remaining bits of hash function
128 uns radix_bits; // How many bits to process in a single radix-sort pass
129 void (*quicksort)(void *array_ptr, uns num_elts);
130 void (*quicksplit)(void *array_ptr, uns num_elts, int *leftp, int *rightp);
131 void (*radix_count)(void *src_ptr, uns num_elts, uns *cnt, uns shift);
132 void (*radix_split)(void *src_ptr, void *dest_ptr, uns num_elts, uns *ptrs, uns shift);
134 // Used internally by array.c
135 struct rs_work **rs_works;
136 struct work_queue *rs_work_queue;
138 struct eltpool *eltpool;
141 void asort_run(struct asort_context *ctx);
142 void asort_start_threads(uns run);
143 void asort_stop_threads(void);