From c0544008a494710146ac6cf88b61b5fd2d46f51b Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Fri, 7 Sep 2007 15:34:05 +0200 Subject: [PATCH] Save cache misses by keeping a copy of the hash value next to the pointer. This trades cache efficiency for extra memory, but it seems to be very well worth it (e.g., graph sorting in sort-test is 8 times faster now). --- lib/sorter/s-internal.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/lib/sorter/s-internal.h b/lib/sorter/s-internal.h index e581cecb..a2f9e06b 100644 --- a/lib/sorter/s-internal.h +++ b/lib/sorter/s-internal.h @@ -9,16 +9,35 @@ #include "lib/stkstring.h" +#ifdef SORT_INTERNAL_RADIX +/* Keep copies of the items' hashes to save cache misses */ +#define SORT_COPY_HASH +#endif + typedef struct { P(key) *key; - // FIXME: Add the hash here to save cache misses +#ifdef SORT_COPY_HASH + P(hash_t) hash; +#endif } P(internal_item_t); #define ASORT_PREFIX(x) SORT_PREFIX(array_##x) #define ASORT_KEY_TYPE P(internal_item_t) -#define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0) +#ifdef SORT_COPY_HASH +# ifdef SORT_INT +# define ASORT_LT(x,y) ((x).hash < (y).hash) // In this mode, the hash is the value +# else +# define ASORT_LT(x,y) ((x).hash < (y).hash || (x).hash == (y).hash && P(compare)((x).key, (y).key) < 0) +# endif +#else +# define ASORT_LT(x,y) (P(compare)((x).key, (y).key) < 0) +#endif #ifdef SORT_INTERNAL_RADIX -# define ASORT_HASH(x) P(hash)((x).key) +# ifdef SORT_COPY_HASH +# define ASORT_HASH(x) (x).hash +# else +# define ASORT_HASH(x) P(hash)((x).key) +# endif # ifdef SORT_LONG_HASH # define ASORT_LONG_HASH # endif @@ -136,6 +155,9 @@ static int P(internal)(struct sort_context *ctx, struct sort_bucket *bin, struct breadb(in, end + ksize_aligned, dsize); #endif item->key = (P(key)*) end; +#ifdef SORT_COPY_HASH + item->hash = P(hash)(item->key); +#endif item++; } while (P(read_key)(in, &key)); @@ -218,3 +240,5 @@ P(internal_estimate)(struct sort_context *ctx, struct sort_bucket *b UNUSED) #endif return (bufsize / (avg + sizeof(P(internal_item_t))) * avg); } + +#undef SORT_COPY_HASH -- 2.39.2