X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fsorter.h;h=ed82eb34893fad81c80350a06975d264267b56da;hb=b7b9a3d3d56430caba2f41a2e9cae8ca87dce1d6;hp=c2da83113646d0d1762786e491e67c731f9e02c7;hpb=b3fa32609de798b55eb8b6aff0310cddeb9c0d19;p=libucw.git diff --git a/lib/sorter.h b/lib/sorter.h index c2da8311..ed82eb34 100644 --- a/lib/sorter.h +++ b/lib/sorter.h @@ -1,7 +1,8 @@ /* - * Sherlock Library -- Universal Sorter + * UCW Library -- Universal Sorter * * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@ -18,10 +19,13 @@ * SORT_KEY [*] data type capable of storing a single key * SORT_PREFIX(x) [*] add a name prefix (used on all global names * defined by the sorter) - * SORT_PRESORT include an in-core presorting pass. Beware, when in + * SORT_PRESORT include an in-core pre-sorting pass. Beware, when in * the pre-sorting mode, it's quite possible that the * comparison function will be called with both arguments * identical. + * SORT_UP_TO a C expression, if defined, sorting is stopped after the + * average run length in the file exceeds the value of this + * expression (in bytes) * SORT_UNIFY merge items with identical keys * SORT_UNIQUE all items have distinct keys (checked in debug mode) * SORT_REGULAR all items are equally long and they don't contain @@ -46,7 +50,7 @@ * int PREFIX_compare(SORT_KEY *a, *b) * compare two keys, result like strcmp * int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k) - * fetch next key, returns 1=ok, 0=eof + * fetch next key, returns nonzero=ok, 0=eof * void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k) * write just fetched key k to dest and copy all data * belonging to this key from src to dest. @@ -81,23 +85,7 @@ * undef'd. */ -/* Declarations of externals from sorter.c */ - -#ifndef SORT_DECLS_READ -#define SORT_DECLS_READ - -extern uns sorter_trace; -extern uns sorter_presort_bufsize; -extern uns sorter_stream_bufsize; - -extern uns sorter_pass_counter; - -#endif /* !SORT_DECLS_READ */ - -/* The sorter proper */ - -#ifndef SORT_DECLARE_ONLY - +#include "lib/sorter-globals.h" #include "lib/fastbuf.h" #include #include @@ -116,7 +104,7 @@ extern uns sorter_pass_counter; #define LESS <= #endif -#if defined(SORT_UNIQUE) && defined(DEBUG) +#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS) #define SORT_ASSERT_UNIQUE #endif @@ -156,8 +144,12 @@ P(flush_out)(struct fastbuf *out) return out; } -static void -P(pass)(struct fastbuf **fb1, struct fastbuf **fb2) +static uns +P(pass)(struct fastbuf **fb1, struct fastbuf **fb2 +#ifdef SORT_UP_TO + , uns stop_sorting +#endif +) { struct fastbuf *in1 = *fb1; struct fastbuf *in2 = *fb2; @@ -188,7 +180,10 @@ P(pass)(struct fastbuf **fb1, struct fastbuf **fb2) if (!kout || !(P(compare)(kout, ktmp) LESS 0)) { struct fastbuf *t; - SWAP(out1, out2, t); +#ifdef SORT_UP_TO + if (!stop_sorting) +#endif + SWAP(out1, out2, t); if (!out1) out1 = bopen_tmp(sorter_stream_bufsize); run_count++; @@ -241,6 +236,7 @@ P(pass)(struct fastbuf **fb1, struct fastbuf **fb2) *fb1 = P(flush_out)(out1); *fb2 = P(flush_out)(out2); sorter_pass_counter++; + return run_count; } #ifdef SORT_PRESORT @@ -267,7 +263,7 @@ P(presort)(struct fastbuf **fb1, struct fastbuf **fb2) struct fastbuf *tbuf; uns buf_items = sorter_presort_bufsize / sizeof(SORT_KEY); uns run_count = 0; - SORT_KEY last_out, *array; + SORT_KEY last_out = { }, *array; ASSERT(!*fb2); if (buf_items < 2) @@ -291,7 +287,10 @@ P(presort)(struct fastbuf **fb1, struct fastbuf **fb2) if (!run_count || P(compare)(&last_out, &array[0]) > 0) { run_count++; - SWAP(out1, out2, tbuf); +#ifdef SORT_UP_TO + if (sorter_presort_bufsize < (uns) SORT_UP_TO) +#endif + SWAP(out1, out2, tbuf); if (!out1) out1 = bopen_tmp(sorter_stream_bufsize); } @@ -385,7 +384,10 @@ P(presort)(struct fastbuf **fb1, struct fastbuf **fb2) leftover = NULL; while (cont) { - SWAP(out1, out2, tbuf); +#ifdef SORT_UP_TO + if (sorter_presort_bufsize < SORT_UP_TO) +#endif + SWAP(out1, out2, tbuf); if (!out1) out1 = bopen_tmp(sorter_stream_bufsize); current = buffer; @@ -399,7 +401,7 @@ P(presort)(struct fastbuf **fb1, struct fastbuf **fb2) } for(;;) { - current = (byte *) ALIGN((addr_int_t) current, CPU_STRUCT_ALIGN); + current = (byte *) ALIGN_TO((addr_int_t) current, CPU_STRUCT_ALIGN); if (current + sizeof(*this) > bufend) break; this = (SORT_NODE *) current; @@ -509,7 +511,27 @@ struct fastbuf *fb1, struct fastbuf *fb2 P(presort)(&fb1, &fb2); if (fb2) #endif +#ifndef SORT_UP_TO do P(pass)(&fb1, &fb2); while (fb1 && fb2); +#else + { + sh_off_t run_count, max_run_count = 0; + if (fb1) + max_run_count += bfilesize(fb1); + if (fb2) + max_run_count += bfilesize(fb2); +#ifdef SORT_PRESORT + run_count = max_run_count / sorter_presort_bufsize; +#else + run_count = max_run_count; +#endif + if (SORT_UP_TO) + max_run_count /= SORT_UP_TO; + do + run_count = P(pass)(&fb1, &fb2, (run_count+1)/2 <= max_run_count); + while (fb1 && fb2); + } +#endif if (!fb1) fb1 = bopen_tmp(sorter_stream_bufsize); @@ -539,5 +561,5 @@ struct fastbuf *fb1, struct fastbuf *fb2 #undef SORT_INPUT_FBPAIR #undef SORT_OUTPUT_FILE #undef SORT_OUTPUT_FB - -#endif /* !SORT_DECLARE_ONLY */ +#undef SORT_PRESORT +#undef SORT_UP_TO