/*
- * Sherlock Library -- Universal Sorter
+ * UCW Library -- Universal Sorter
*
- * (c) 2001--2002 Martin Mares <mj@ucw.cz>
+ * (c) 2001--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2004 Robert Spalek <robert@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
* SORT_KEY [*] data type capable of storing a single key
* SORT_PREFIX(x) [*] add a name prefix (used on all global names
* defined by the sorter)
- * SORT_PRESORT include an in-core presorting pass
+ * SORT_PRESORT include an in-core pre-sorting pass. Beware, when in
+ * the pre-sorting mode, it's quite possible that the
+ * comparison function will be called with both arguments
+ * identical.
+ * SORT_UP_TO a C expression, if defined, sorting is stopped after the
+ * average run length in the file exceeds the value of this
+ * expression (in bytes)
* SORT_UNIFY merge items with identical keys
+ * SORT_UNIQUE all items have distinct keys (checked in debug mode)
+ * SORT_REGULAR all items are equally long and they don't contain
+ * anything else than the key. In this case, the sorter
+ * automatically supplies fetch_key, copy_data, fetch_item
+ * and store_item functions. Item size is also expected
+ * to be small.
* SORT_DELETE_INPUT a C expression, if true, the input files are
* deleted as soon as possible
* SORT_INPUT_FILE input is a file with this name
* int PREFIX_compare(SORT_KEY *a, *b)
* compare two keys, result like strcmp
* int PREFIX_fetch_key(struct fastbuf *f, SORT_KEY *k)
- * fetch next key, returns 1=ok, 0=eof
+ * fetch next key, returns nonzero=ok, 0=eof
* void PREFIX_copy_data(struct fastbuf *src, *dest, SORT_KEY *k)
* write just fetched key k to dest and copy all data
* belonging to this key from src to dest.
* undef'd.
*/
-/* Declarations of externals from sorter.c */
-
-#ifndef SORT_DECLS_READ
-#define SORT_DECLS_READ
-
-extern uns sorter_trace;
-extern uns sorter_presort_bufsize;
-extern uns sorter_stream_bufsize;
-
-extern uns sorter_pass_counter;
-
-#endif /* !SORT_DECLS_READ */
-
-/* The sorter proper */
-
-#ifndef SORT_DECLARE_ONLY
-
+#include "lib/sorter-globals.h"
#include "lib/fastbuf.h"
#include <unistd.h>
#include <fcntl.h>
#define LESS <=
#endif
+#if defined(SORT_UNIQUE) && defined(DEBUG_ASSERTS)
+#define SORT_ASSERT_UNIQUE
+#endif
+
+#ifdef SORT_REGULAR
+
+static inline int
+P(fetch_key)(struct fastbuf *in, SORT_KEY *x)
+{
+ return breadb(in, x, sizeof(*x));
+}
+
+static inline void
+P(copy_data)(struct fastbuf *in UNUSED, struct fastbuf *out, SORT_KEY *x)
+{
+ bwrite(out, x, sizeof(*x));
+}
+
+static inline byte *
+P(fetch_item)(struct fastbuf *in UNUSED, SORT_KEY *x UNUSED, byte *limit UNUSED)
+{
+ return (byte *)(x+1);
+}
+
+static inline void
+P(store_item)(struct fastbuf *out, SORT_KEY *x)
+{
+ bwrite(out, x, sizeof(*x));
+}
+
+#endif
+
static struct fastbuf *
P(flush_out)(struct fastbuf *out)
{
if (out)
- {
- bflush(out);
- bsetpos(out, 0);
- }
+ brewind(out);
return out;
}
-static void
-P(pass)(struct fastbuf **fb1, struct fastbuf **fb2)
+static uns
+P(pass)(struct fastbuf **fb1, struct fastbuf **fb2
+#ifdef SORT_UP_TO
+ , uns stop_sorting
+#endif
+)
{
struct fastbuf *in1 = *fb1;
struct fastbuf *in2 = *fb2;
if (!kout || !(P(compare)(kout, ktmp) LESS 0))
{
struct fastbuf *t;
- SWAP(out1, out2, t);
+#ifdef SORT_UP_TO
+ if (!stop_sorting)
+#endif
+ SWAP(out1, out2, t);
if (!out1)
out1 = bopen_tmp(sorter_stream_bufsize);
run_count++;
run2 = next2 && (P(compare)(kprev2, kin2) LESS 0);
kout = kprev2;
}
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ else if (unlikely(comp == 0))
+ ASSERT(0);
#endif
else
{
bclose(in1);
bclose(in2);
if (sorter_trace)
- log(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
+ msg(L_INFO, "Pass %d: %d runs, %d+%d KB", sorter_pass_counter, run_count,
(out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
(out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
*fb1 = P(flush_out)(out1);
*fb2 = P(flush_out)(out2);
sorter_pass_counter++;
+ return run_count;
}
#ifdef SORT_PRESORT
+#if defined(SORT_REGULAR) && !defined(SORT_UNIFY)
+
+/* If we are doing a simple sort on a regular file, we can use a faster presorting strategy */
+
+static SORT_KEY *P(array);
+
+#define ASORT_PREFIX(x) SORT_PREFIX(x##_array)
+#define ASORT_KEY_TYPE SORT_KEY
+#define ASORT_ELT(i) P(array)[i]
+#define ASORT_LT(x,y) (P(compare)(&(x),&(y)) < 0)
+
+#include "lib/arraysort.h"
+
+static void
+P(presort)(struct fastbuf **fb1, struct fastbuf **fb2)
+{
+ struct fastbuf *in = *fb1;
+ struct fastbuf *out1 = NULL;
+ struct fastbuf *out2 = NULL;
+ struct fastbuf *tbuf;
+ uns buf_items = sorter_presort_bufsize / sizeof(SORT_KEY);
+ uns run_count = 0;
+ SORT_KEY last_out = { }, *array;
+
+ ASSERT(!*fb2);
+ if (buf_items < 2)
+ die("PresortBuffer set too low");
+ P(array) = array = xmalloc(buf_items * sizeof(SORT_KEY));
+
+ for(;;)
+ {
+ uns s = bread(in, array, buf_items * sizeof(SORT_KEY));
+ uns n = s / sizeof(SORT_KEY);
+ ASSERT(!(s % sizeof(SORT_KEY)));
+ if (!n)
+ break;
+ P(sort_array)(n);
+#ifdef SORT_ASSERT_UNIQUE
+ for (uns i=0; i<n-1; i++)
+ if (unlikely(P(compare)(&array[i], &array[i+1]) >= 0))
+ ASSERT(0);
+ ASSERT(!run_count || P(compare)(&last_out, &array[0]));
+#endif
+ if (!run_count || P(compare)(&last_out, &array[0]) > 0)
+ {
+ run_count++;
+#ifdef SORT_UP_TO
+ if (sorter_presort_bufsize < (uns) SORT_UP_TO)
+#endif
+ SWAP(out1, out2, tbuf);
+ if (!out1)
+ out1 = bopen_tmp(sorter_stream_bufsize);
+ }
+ last_out = array[n-1];
+ bwrite(out1, array, n * sizeof(SORT_KEY));
+ }
+
+ bclose(in);
+ if (sorter_trace)
+ msg(L_INFO, "Pass 0: %d runs, %d+%d KB",
+ run_count,
+ (out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
+ (out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
+ *fb1 = P(flush_out)(out1);
+ *fb2 = P(flush_out)(out2);
+ xfree(array);
+}
+
+#else
+
#define SORT_NODE struct P(presort_node)
SORT_NODE {
leftover = NULL;
while (cont)
{
- SWAP(out1, out2, tbuf);
+#ifdef SORT_UP_TO
+ if (sorter_presort_bufsize < SORT_UP_TO)
+#endif
+ SWAP(out1, out2, tbuf);
if (!out1)
out1 = bopen_tmp(sorter_stream_bufsize);
current = buffer;
}
for(;;)
{
- current = (byte *) ALIGN((addr_int_t) current, CPU_STRUCT_ALIGN);
+ current = (byte *) ALIGN_TO((uintptr_t) current, CPU_STRUCT_ALIGN);
if (current + sizeof(*this) > bufend)
break;
this = (SORT_NODE *) current;
first = second->next;
continue;
}
+#endif
+#ifdef SORT_ASSERT_UNIQUE
+ ASSERT(!first->next || P(compare)(&first->key, &first->next->key));
#endif
P(store_item)(out1, &first->key);
first = first->next;
bclose(in);
if (sorter_trace)
- log(L_INFO, "Pass 0: %d runs (%d giants, %d splits), %d+%d KB",
+ msg(L_INFO, "Pass 0: %d runs (%d giants, %d splits), %d+%d KB",
run_count, giant_count, split_count,
(out1 ? (int)((btell(out1) + 1023) / 1024) : 0),
(out2 ? (int)((btell(out2) + 1023) / 1024) : 0));
xfree(buffer);
}
+#endif /* SORT_REGULAR && !SORT_UNIFY */
+
#endif /* SORT_PRESORT */
static
P(presort)(&fb1, &fb2);
if (fb2)
#endif
+#ifndef SORT_UP_TO
do P(pass)(&fb1, &fb2); while (fb1 && fb2);
+#else
+ {
+ sh_off_t run_count, max_run_count = 0;
+ if (fb1)
+ max_run_count += bfilesize(fb1);
+ if (fb2)
+ max_run_count += bfilesize(fb2);
+#ifdef SORT_PRESORT
+ run_count = max_run_count / sorter_presort_bufsize;
+#else
+ run_count = max_run_count;
+#endif
+ if (SORT_UP_TO)
+ max_run_count /= SORT_UP_TO;
+ do
+ run_count = P(pass)(&fb1, &fb2, (run_count+1)/2 <= max_run_count);
+ while (fb1 && fb2);
+ }
+#endif
if (!fb1)
fb1 = bopen_tmp(sorter_stream_bufsize);
#ifdef SORT_OUTPUT_FB
return fb1;
#else
- bconfig(fb1, BCONFIG_IS_TEMP_FILE, 0);
- if (rename(fb1->name, outname) < 0)
- die("rename(%s,%s): %m", fb1->name, outname);
- bclose(fb1);
+ bfix_tmp_file(fb1, outname);
#endif
}
#undef SORT_KEY
#undef SORT_PREFIX
#undef SORT_UNIFY
+#undef SORT_UNIQUE
+#undef SORT_ASSERT_UNIQUE
+#undef SORT_REGULAR
#undef SORT_DELETE_INPUT
#undef SORT_INPUT_FILE
#undef SORT_INPUT_FB
#undef SORT_INPUT_FBPAIR
#undef SORT_OUTPUT_FILE
#undef SORT_OUTPUT_FB
-
-#endif /* !SORT_DECLARE_ONLY */
+#undef SORT_PRESORT
+#undef SORT_UP_TO