X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fsorter%2Fs-internal.h;h=3a7107fe41767679e93d820f0656e568fcc2bfd8;hb=5c21b096ab39ece4c9ad80047144b2fde0c54758;hp=e921921e95241901544d48e88de817ae2befa55b;hpb=14e105850c5d3451a44fc50f2814683c0df3ae1b;p=libucw.git

diff --git a/lib/sorter/s-internal.h b/lib/sorter/s-internal.h
index e921921e..3a7107fe 100644
--- a/lib/sorter/s-internal.h
+++ b/lib/sorter/s-internal.h
@@ -9,17 +9,40 @@
 
 #include "lib/stkstring.h"
 
+#ifdef SORT_INTERNAL_RADIX
+/* Keep copies of the items' hashes to save cache misses */
+#define SORT_COPY_HASH
+#endif
+
 typedef struct {
   P(key) *key;
-  // FIXME: Add the hash here to save cache misses
+#ifdef SORT_COPY_HASH
+  P(hash_t) hash;
+#endif
 } P(internal_item_t);
 
 #define ASORT_PREFIX(x) SORT_PREFIX(array_##x)
 #define ASORT_KEY_TYPE P(internal_item_t)
-#define ASORT_ELT(i) ary[i]
-#define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
-#define ASORT_EXTRA_ARGS , P(internal_item_t) *ary
-#include "lib/arraysort.h"
+#ifdef SORT_COPY_HASH
+#  ifdef SORT_INT
+#    define ASORT_LT(x,y) ((x).hash < (y).hash)		// In this mode, the hash is the value
+#  else
+#    define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0)
+#  endif
+#else
+#  define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0)
+#endif
+#ifdef SORT_INTERNAL_RADIX
+#    ifdef SORT_COPY_HASH
+#      define ASORT_HASH(x) (x).hash
+#    else
+#      define ASORT_HASH(x) P(hash)((x).key)
+#    endif
+#    ifdef SORT_LONG_HASH
+#      define ASORT_LONG_HASH
+#    endif
+#endif
+#include "lib/sorter/array.h"
 
 /*
  *  The big_buf has the following layout:
@@ -66,8 +89,8 @@ static inline size_t P(internal_workspace)(P(key) *key UNUSED)
 #ifdef SORT_UNIFY_WORKSPACE
   ws += SORT_UNIFY_WORKSPACE(*key);
 #endif
-#if 0						/* FIXME: Shadow copy if radix-sorting */
-  ws = MAX(ws, sizeof(P(key) *));
+#ifdef SORT_INTERNAL_RADIX
+  ws = MAX(ws, sizeof(P(internal_item_t)));
 #endif
   return ws;
 }
@@ -92,7 +115,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
 #ifdef SORT_VAR_DATA
   if (sizeof(key) + 2*CPU_PAGE_SIZE + SORT_DATA_SIZE(key) + P(internal_workspace)(&key) > bufsize)
     {
-      SORT_XTRACE(3, "s-internal: Generating a giant run");
+      SORT_XTRACE(4, "s-internal: Generating a giant run");
       struct fastbuf *out = sbuck_write(bout);
       P(copy_data)(&key, in, out);
       bout->runs++;
@@ -100,7 +123,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
     }
 #endif
 
-  SORT_XTRACE(4, "s-internal: Reading");
+  SORT_XTRACE(5, "s-internal: Reading");
   P(internal_item_t) *item_array = ctx->big_buf, *item = item_array, *last_item;
   byte *end = (byte *) ctx->big_buf + bufsize;
   size_t remains = bufsize - CPU_PAGE_SIZE;
@@ -132,6 +155,9 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
       breadb(in, end + ksize_aligned, dsize);
 #endif
       item->key = (P(key)*) end;
+#ifdef SORT_COPY_HASH
+      item->hash = P(hash)(item->key);
+#endif
       item++;
     }
   while (P(read_key)(in, &key));
@@ -139,17 +165,22 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
 
   uns count = last_item - item_array;
   void *workspace UNUSED = ALIGN_PTR(last_item, CPU_PAGE_SIZE);
-  SORT_XTRACE(3, "s-internal: Read %u items (%s items, %s workspace, %s data)",
+  SORT_XTRACE(4, "s-internal: Read %u items (%s items, %s workspace, %s data)",
 	count,
 	stk_fsize((byte*)last_item - (byte*)item_array),
 	stk_fsize(end - (byte*)last_item - remains),
 	stk_fsize((byte*)ctx->big_buf + bufsize - end));
   timestamp_t timer;
   init_timer(&timer);
-  P(array_sort)(count, item_array);
+  item_array = P(array_sort)(item_array, count
+#ifdef SORT_INTERNAL_RADIX
+    , workspace, bin->hash_bits
+#endif
+    );
+  last_item = item_array + count;
   ctx->total_int_time += get_timer(&timer);
 
-  SORT_XTRACE(4, "s-internal: Writing");
+  SORT_XTRACE(5, "s-internal: Writing");
   if (!ctx->more_keys)
     bout = bout_only;
   struct fastbuf *out = sbuck_write(bout);
@@ -187,7 +218,7 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
 #endif
     }
 #ifdef SORT_UNIFY
-  SORT_XTRACE(3, "Merging reduced %u records", merged);
+  SORT_XTRACE(4, "Merging reduced %u records", merged);
 #endif
 
   return ctx->more_keys;
@@ -196,15 +227,24 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct
 static u64
 P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED)
 {
+  // Most of this is just wild guesses
 #ifdef SORT_VAR_KEY
-  uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);	// Wild guess...
+  uns avg = ALIGN_TO(sizeof(P(key))/4, CPU_STRUCT_ALIGN);
 #else
   uns avg = ALIGN_TO(sizeof(P(key)), CPU_STRUCT_ALIGN);
 #endif
-  // We ignore the data part of records, it probably won't make the estimate much worse
-  size_t bufsize = ctx->big_buf_size;
-#ifdef SORT_UNIFY_WORKSPACE		// FIXME: Or if radix-sorting
-  bufsize /= 2;
+  uns ws = 0;
+#ifdef SORT_UNIFY
+  ws += sizeof(void *);
+#endif
+#ifdef SORT_UNIFY_WORKSPACE
+  ws += avg;
+#endif
+#ifdef SORT_INTERNAL_RADIX
+  ws = MAX(ws, sizeof(P(internal_item_t)));
 #endif
-  return (bufsize / (avg + sizeof(P(internal_item_t))) * avg);
+  // We ignore the data part of records, it probably won't make the estimate much worse
+  return (ctx->big_buf_size / (avg + ws + sizeof(P(internal_item_t))) * avg);
 }
+
+#undef SORT_COPY_HASH