From a1d5f704b6961b195a2a8592177f1eecc5f1228c Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Tue, 9 Jun 2020 12:14:16 +0200 Subject: [PATCH] Random: Implemented new pseudo-random interface. --- ucw/Makefile | 7 +- ucw/random-fast.c | 776 ++++++++++++++++++++++++++++++++++++++++++++++ ucw/random-test.c | 202 ++++++++++++ ucw/random-test.t | 3 + ucw/random.h | 97 ++++++ 5 files changed, 1083 insertions(+), 2 deletions(-) create mode 100644 ucw/random-fast.c create mode 100644 ucw/random-test.c create mode 100644 ucw/random-test.t create mode 100644 ucw/random.h diff --git a/ucw/Makefile b/ucw/Makefile index b9bff392..6fdb641f 100644 --- a/ucw/Makefile +++ b/ucw/Makefile @@ -22,7 +22,7 @@ LIBUCW_MODS= \ char-cat char-upper char-lower unicode varint stkstring \ wildmatch regex \ prime primetable \ - random-legacy \ + random-legacy random-fast \ time-stamp time-timer time-conf \ bit-ffs bit-fls bit-array \ url \ @@ -54,6 +54,7 @@ LIBUCW_MAIN_INCLUDES= \ heap.h binheap.h binheap-node.h \ redblack.h \ prime.h \ + random.h \ bitops.h \ conf.h getopt.h ipaccess.h \ fastbuf.h io.h ff-unicode.h ff-varint.h ff-binary.h fb-socket.h fw-hex.h \ @@ -139,6 +140,7 @@ $(o)/ucw/table-test: $(o)/ucw/table-test.o $(LIBUCW) $(o)/ucw/table-test-2: $(o)/ucw/table-test-2.o $(LIBUCW) $(o)/ucw/table-test-align: $(o)/ucw/table-test-align.o $(LIBUCW) $(o)/ucw/xtypes-test: $(o)/ucw/xtypes-test.o $(LIBUCW) +$(o)/ucw/random-test: $(o)/ucw/random-test.o $(LIBUCW) TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \ @@ -147,7 +149,7 @@ TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test fb-mem.test fb-buffer.test fb-mmap.test fb-multi.test fb-null.test \ redblack-test.test url.test strtonum-test.test \ gary.test time.test crc.test signames.test md5.test bitops.test opt.test \ - table.test table-test.test table-test-2.test table-test-align.test xtypes-test.test) + table.test table-test.test table-test-2.test table-test-align.test xtypes-test.test random-test.test) $(o)/ucw/varint.test: $(o)/ucw/varint-t $(o)/ucw/regex.test: $(o)/ucw/regex-t @@ -184,6 +186,7 @@ $(o)/ucw/table-test.test: $(o)/ucw/table-test $(o)/ucw/table-test-2.test: $(o)/ucw/table-test-2 $(o)/ucw/table-test-align.test: $(o)/ucw/table-test-align $(o)/ucw/xtypes-test.test: $(o)/ucw/xtypes-test +$(o)/ucw/random-test.test: $(o)/ucw/random-test ifdef CONFIG_UCW_THREADS TESTS+=$(addprefix $(o)/ucw/,asio.test) diff --git a/ucw/random-fast.c b/ucw/random-fast.c new file mode 100644 index 00000000..6c647ab2 --- /dev/null +++ b/ucw/random-fast.c @@ -0,0 +1,776 @@ +/* + * UCW Library -- Fast pseudo-random generator + * + * (c) 2020--2022 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include + +#include +#include +#include +#include + +// Select exactly one of these pseudo-random generators: +//#define FASTRAND_USE_PCG +//#define FASTRAND_USE_SPLITMIX +#define FASTRAND_USE_XOROSHIRO +//#define FASTRAND_USE_LIBC_ISO +//#define FASTRAND_USE_LIBC_BSD +//#define FASTRAND_USE_LIBC_SVID + +/** + * FASTRAND_USE_PCG + * + * References: + * -- https://en.wikipedia.org/wiki/Permuted_congruential_generator + * -- https://www.pcg-random.org/ + * -- "PCG: A Family of Simple Fast Space-Efficient Statistically Good Algorithms for Random Number Generation", + * 2014, Melissa E. O’Neill + * + * Notes: + * -- Simple and fast generator with good statistical properties. + * -- We use a constant increment and 64->32bit XSH RR output function. + **/ + +#define FASTRAND_PCG_MUL 6364136223846793005LLU +#define FASTRAND_PCG_INC 1442695040888963407LLU + +/** + * FASTRAND_USE_SPLITMIX + * + * References: + * -- http://prng.di.unimi.it/splitmix64.c (public, no copyright) + * -- http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html + * -- "Fast Splittable Pseudorandom Number Generators", + * 2014, Guy L. Steele Jr., Doug Lea, Christine H. Flood + * + * Notes: + * -- Each 64 bit number is generated exactly once in a cycle, + * so we only use half of it -> 32 bit output. + **/ + +/** + * FASTRAND_USE_XOROSHIRO + * + * References: + * -- http://prng.di.unimi.it/ (examples are public, no copyright) + * -- "Scrambled Linear Pseudorandom Number Generators", + * 2019, David Blackman, Sebastiano Vigna + * + * Notes: + * -- A family of very fast generators. Good statistical properties, + * some variants just have problems with lowest bits. + * -- We support more xo(ro)shiro variants, see the options below. + **/ + +// Engine: xoroshiro 128, xoshiro 256 +#define FASTRAND_XOROSHIRO_SIZE 128 + +// Scrambler: 1 (+), 2 (++), 3 (**) +#define FASTRAND_XOROSHIRO_SCRAMBLER 1 + +// If defined, we only use the highest 32 bits from each 64 bit number; +// useful especially for the weakest '+' scrambler. +// XXX: We could instead cut less bits or use full 64 bits and prefer +// the higher ones only where trivial, for example in 32 bit fastrand_max(). +#define FASTRAND_XOROSHIRO_ONLY_HIGHER + +/** + * FASTRAND_USE_LIBC_ISO + * -- rand_r() + * -- libc ISO generator + * -- usually 31 bit output (RAND_MAX) + * -- very weak + **/ + +/** + * FASTRAND_USE_LIBC_BSD + * -- random_r() + * -- libc BSD generator + * -- usually 31 bit output (RAND_MAX) + **/ + +/** + * FASTRAND_USE_LIBC_SVID + * -- mrand48_r() + * -- libc SVID generator + * -- 32 bit output + **/ + +struct fastrand { +#ifdef FASTRAND_USE_PCG + u64 pcg_state; // PCG generator's current state +#endif + +#ifdef FASTRAND_USE_SPLITMIX + u64 splitmix_state; +#endif + +#ifdef FASTRAND_USE_XOROSHIRO + u64 xoroshiro_state[FASTRAND_XOROSHIRO_SIZE / 64]; +#endif + +#ifdef FASTRAND_USE_LIBC_ISO + uint iso_state; // ISO generator's current seed +#endif + +#ifdef FASTRAND_USE_LIBC_BSD + struct random_data bsd_data; // BSD generator's state structure + u64 bsd_state[128 / 8]; // Used "statebuf" in initstate_r(); see manual pages or glibc sources for useful sizes; + // glibc typecasts the (char *) to (int32_t *), so we rather make sure it's aligned +#endif + +#ifdef FASTRAND_USE_LIBC_SVID + struct drand48_data svid_data; // SVID generator's state structure +#endif + + bool inited; // Is seeded? + fastrand_seed_t seed_value; // Initial seed value; useful for reproducible debugging +}; + +/*** Common helpers ***/ + +#if defined(FASTRAND_USE_LIBC_ISO) || defined(FASTRAND_USE_LIBC_BSD) +# if RAND_MAX == (1U << 30) - 1 +# define FASTRAND_RAND_MAX_BITS 30 +# elif RAND_MAX == (1U << 31) - 1 +# define FASTRAND_RAND_MAX_BITS 31 +# else +# error "Unsupported value of RAND_MAX" +# endif +#endif + +// GCC should optimize these to a single instruction + +static inline u32 fastrand_ror32(u32 x, uint r) +{ + return (x >> r) | (x << (-r & 31)); +} + +static inline u64 fastrand_rol64(u64 x, uint r) +{ + return (x << r) | (x >> (-r & 63)); +} + +/*** PCG random generator ***/ + +#ifdef FASTRAND_USE_PCG +#define FASTRAND_ONE_BITS 32 + +static inline u32 fastrand_one(struct fastrand *c) +{ + u64 s = c->pcg_state; + uint r = s >> 59; + c->pcg_state = s * FASTRAND_PCG_MUL + FASTRAND_PCG_INC; + u32 x = ((s >> 18) ^ s) >> 27; + return fastrand_ror32(x, r); +} + +static void fastrand_init(struct fastrand *c) +{ + c->pcg_state = c->seed_value + FASTRAND_PCG_INC; + (void) fastrand_one(c); + DBG("RANDOM: Initialized PCG random generator, seed=0x%jx", (uintmax_t)c->seed_value); +} +#endif + +/*** splitmix64 generator ***/ + +// A helper; used also for seeding xoroshiro +static inline u64 fastrand_splitmix64_next(u64 *state) +{ + u64 z = (*state += 0x9e3779b97f4a7c15LLU); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9LLU; + z = (z ^ (z >> 27)) * 0x94d049bb133111ebLLU; + return z ^ (z >> 31); +} + +#ifdef FASTRAND_USE_SPLITMIX +#define FASTRAND_ONE_BITS 32 + +static void fastrand_init(struct fastrand *c) +{ + c->splitmix_state = c->seed_value; + DBG("RANDOM: Initialized splitmix64 random generator, seed=0x%jx", (uintmax_t)c->seed_value); +} + +static inline u32 fastrand_one(struct fastrand *c) +{ + return fastrand_splitmix64_next(&c->splitmix_state); +} +#endif + +/*** xoroshiro variants ***/ + +#ifdef FASTRAND_USE_XOROSHIRO +#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER +# define FASTRAND_ONE_BITS 32 +#else +# define FASTRAND_ONE_BITS 64 +#endif + +static void fastrand_init(struct fastrand *c) +{ + u64 x = c->seed_value; + for (uint i = 0; i < ARRAY_SIZE(c->xoroshiro_state); i++) + c->xoroshiro_state[i] = fastrand_splitmix64_next(&x); + if (unlikely(!c->xoroshiro_state[0])) + c->xoroshiro_state[0] = 1; + DBG("RANDOM: Initialized xoroshiro random generator, seed=0x%jx, size=%u, scrambler=%u", + (uintmax_t)c->seed_value, FASTRAND_XOROSHIRO_SIZE, FASTRAND_XOROSHIRO_SCRAMBLER); +} + +static inline +#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER +u32 +#else +u64 +#endif +fastrand_one(struct fastrand *c) +{ + u64 result; + u64 *s = c->xoroshiro_state; + +#if FASTRAND_XOROSHIRO_SIZE == 128 + u64 s0 = s[0]; + u64 s1 = s[1]; +#if FASTRAND_XOROSHIRO_SCRAMBLER == 1 + result = s0 + s1; + uint ra = 24, rb = 16, rc = 37; +#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2 + result = fastrand_rol64(s0 + s1, 17) + s0; + uint ra = 49, rb = 21, rc = 28; +#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3 + result = fastrand_rol64(s0 * 5, 7) * 9; + uint ra = 24, rb = 16, rc = 37; +#else +# error "Unsupported xoroshiro parameters" +#endif + s[0] = fastrand_rol64(s0, ra) ^ s1 ^ (s1 << rb); + s[1] = fastrand_rol64(s1, rc); + +#elif FASTRAND_XOROSHIRO_SIZE == 256 +#if FASTRAND_XOROSHIRO_SCRAMBLER == 1 + result = s[0] + s[3]; +#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2 + result = fastrand_rol64(s[0] + s[3], 23) + s[0]; +#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3 + result = fastrand_rol64(s[1] * 5, 7) * 9; +#else +# error "Unsupported xoroshiro parameters" +#endif + u64 t = s[1] << 17; + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + s[2] ^= t; + s[3] = fastrand_rol64(s[3], 45); +#else +# error "Unsupported xoroshiro parameters" +#endif + +#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER + result >>= 32; +#endif + return result; +} + +#endif + +/*** Libc ISO generator ***/ + +#ifdef FASTRAND_USE_LIBC_ISO +#define FASTRAND_ONE_BITS FASTRAND_RAND_MAX_BITS + +static void fastrand_init(struct fastrand *c) +{ + c->iso_state = c->seed_value; + DBG("RANDOM: Initialized libc ISO random generator, seed=0x%jx, bits=%u", (uintmax_t)c->seed_value, FASTRAND_ONE_BITS); +} + +static inline u32 fastrand_one(struct fastrand *c) +{ + return rand_r(&c->iso_state); +} + +#endif + +/*** Libc BSD generator ***/ + +#ifdef FASTRAND_USE_LIBC_BSD +#define FASTRAND_ONE_BITS FASTRAND_RAND_MAX_BITS + +static void fastrand_init(struct fastrand *c) +{ + DBG("RANDOM: Initialized libc BSD random generator, seed=0x%jx, statelen=%zu, bits=%u", (uintmax_t)c->seed_value, sizeof(c->bsd_state), FASTRAND_ONE_BITS); + c->bsd_data.state = NULL; // Required by initstate_r() + if (initstate_r(c->seed_value, (char *)c->bsd_state, sizeof(c->bsd_state), &c->bsd_data) < 0) + die("RANDOM: Failed to call initstate_r(): %m"); +} + +#ifdef LOCAL_DEBUG +static NONRET void fastrand_random_r_failed(void) +{ + die("RANDOM: Failed to call random_r(): %m"); +} +#endif + +static inline u32 fastrand_one(struct fastrand *c) +{ + int32_t r; +#ifdef LOCAL_DEBUG + if (unlikely(random_r(&c->bsd_data, &r) < 0)) + fastrand_random_r_failed(); +#else + (void) random_r(&c->bsd_data, &r); +#endif + return r; +} + +#endif + +/*** Libc SVID generator ***/ + +#ifdef FASTRAND_USE_LIBC_SVID +#define FASTRAND_ONE_BITS 32 + +static void fastrand_init(struct fastrand *c) +{ + DBG("RANDOM: Initialized libc SVID random generator, seed=0x%jx", (uintmax_t)c->seed_value); + if (srand48_r(c->seed_value, &c->svid_data) < 0) + die("RANDOM: Failed to call srand48_r(): %m"); +} + +#ifdef LOCAL_DEBUG +static NONRET void fastrand_mrand48_r_failed(void) +{ + die("RANDOM: Failed to call mrand48_r(): %m"); +} +#endif + +static inline u32 fastrand_one(struct fastrand *c) +{ + long int r; +#ifdef LOCAL_DEBUG + if (unlikely(mrand48_r(&c->svid_data, &r) < 0)) + fastrand_mrand48_r_failed(); +#else + (void) mrand48_r(&c->svid_data, &r); +#endif + return r; +} + +#endif + +/*** Interface for creating/deleting/setting contexts ***/ + +struct fastrand *fastrand_new(void) +{ + struct fastrand *c = xmalloc_zero(sizeof(*c)); + return c; +} + +void fastrand_delete(struct fastrand *c) +{ + ASSERT(c); + xfree(c); +} + +void fastrand_set_seed(struct fastrand *c, fastrand_seed_t seed) +{ + c->seed_value = seed; + c->inited = true; + fastrand_init(c); +} + +fastrand_seed_t fastrand_gen_seed_value(void) +{ + // Generate a seed value as a mixture of: + // -- current time + // -- process id to deal with frequent fork() or startups + // -- counter to deal with multiple contexts, for example in threaded application + u64 val; + static u64 static_counter; + ucwlib_lock(); + val = (static_counter += 0x1927a7639274162dLLU); + ucwlib_unlock(); + val += getpid(); + struct timeval tv; + if (gettimeofday(&tv, NULL) < 0) + die("RANDOM: Failed to call gettimeofday(): %m"); + val += (u64)(tv.tv_sec * 1000000LL + tv.tv_usec) * 0x5a03173d83f78347LLU; + return val; +} + +fastrand_seed_t fastrand_gen_seed(struct fastrand *c) +{ + fastrand_seed_t seed = fastrand_gen_seed_value(); + fastrand_set_seed(c, seed); + return seed; +} + +bool fastrand_has_seed(struct fastrand *c) +{ + return c->inited; +} + +void fastrand_reset(struct fastrand *c) +{ + c->inited = false; +} + +#if 1 +// XXX: Adds some overhead, but probably worth it +#define FASTRAND_CHECK_SEED(c) do { if (unlikely(!(c)->inited)) fastrand_check_seed_failed(); } while (0) +static NONRET void fastrand_check_seed_failed(void) +{ + ASSERT(0); +} +#else +#define FASTRAND_CHECK_SEED(c) do { } while (0) +#endif + +/*** Reading interface ***/ + +// We assume either 64bit or 24-32bit generator +COMPILE_ASSERT(fastrand_one_range, (FASTRAND_ONE_BITS >= 24 && FASTRAND_ONE_BITS <= 32) || FASTRAND_ONE_BITS == 64); + +#if FASTRAND_ONE_BITS < 64 +#define FASTRAND_ONE_MAX (UINT32_MAX >> (32 - FASTRAND_ONE_BITS)) +#else +#define FASTRAND_ONE_MAX UINT64_MAX +#endif + +#if FASTRAND_ONE_BITS < 64 +#define FASTRAND_TWO_BITS (FASTRAND_ONE_BITS * 2) +#define FASTRAND_TWO_MAX (UINT64_MAX >> (64 - FASTRAND_TWO_BITS)) +static inline u64 fastrand_two(struct fastrand *c) +{ + return ((u64)fastrand_one(c) << FASTRAND_ONE_BITS) | fastrand_one(c); +} +#endif + +#if FASTRAND_ONE_BITS < 32 +#define FASTRAND_THREE_BITS 64 +#define FASTRAND_THREE_MAX UINT64_MAX +static inline u64 fastrand_three(struct fastrand *c) +{ + return ((u64)fastrand_one(c) << (FASTRAND_ONE_BITS * 2)) | ((u64)fastrand_one(c) << FASTRAND_ONE_BITS) | fastrand_one(c); +} +#endif + +u32 fastrand_u32(struct fastrand *c) +{ + FASTRAND_CHECK_SEED(c); +#if FASTRAND_ONE_BITS < 32 + return fastrand_two(c); +#else + return fastrand_one(c); +#endif +} + +u64 fastrand_u64(struct fastrand *c) +{ + FASTRAND_CHECK_SEED(c); +#if FASTRAND_ONE_BITS < 32 + return fastrand_three(c); +#elif FASTRAND_ONE_BITS < 64 + return fastrand_two(c); +#else + return fastrand_one(c); +#endif +} + +u64 fastrand_bits_u64(struct fastrand *c, uint bits) +{ +#ifdef LOCAL_DEBUG + ASSERT(bits <= 64); +#endif + // Notice that we never rotate an integer by all its bits (forbidden by C standard). + FASTRAND_CHECK_SEED(c); +#if FASTRAND_ONE_BITS == 64 + if (!bits) + return 0; + u64 r = fastrand_one(c); + return r >> (FASTRAND_ONE_BITS - bits); +#else + byte n = bits; + u64 r = fastrand_one(c); + if (n <= FASTRAND_ONE_BITS) + return r >> (FASTRAND_ONE_BITS - n); + while (1) + { + n -= FASTRAND_ONE_BITS; + u64 x = fastrand_one(c); + if (n <= FASTRAND_ONE_BITS) + return (r << n) | (x >> (FASTRAND_ONE_BITS - n)); + r = (r << FASTRAND_ONE_BITS) | x; + } +#endif +} + +/* + * fastrand_max() and variants + * + * Algorithm 1: + * + * We can return r % bound from uniformly generated i-bit number r, + * but only if: + * r - r % bound + bound <= 2^i + * r - r % bound <= 2^i-1 - bound + 1 -- no risk of integer overflow + * r - r % bound <= (2^i-1 + 1) - bound -- can temporarily overflow + * + * Then r fits to a complete interval in the i-bit range + * and results should be uniformly distributed. + * + * If r is too high, we simply generate a different r and repeat + * the whole process. The probability is < 50% for any bound <= 2^i + * (with worst case bound == 2^i / 2 + 1), so the average number + * of iterations is < 2. + * + * + * Algorithm 2 (without any division in many cases, but needs big multiplication): + * ("Fast Random Integer Generation in an Interval", 2019, Daniel Lemier) + * + * Let bound < 2^i. Then for j in [0, bound) each interval [j * 2^i + 2^i % bound, (j + 1) * 2^i) + * contains the same number of multiples [0, 1 * bound, 2 * bound, ..., 2^i * bound), + * exactly 2^i / bound (rounded down) of them. + * + * Therefore if we have a random i-bit number r and r * bound fits to one of these intervals, + * we can immediately return (r * bound) / 2^i (index of that interval) and results should be + * correctly distributed. If we are lucky and r fits to even smaller interval + * [j * 2^i + bound, (j + 1) * 2^i], we can check that very quickly without any division. + * + * Probability of multiple iterations is again < 50%. + * Probability of division-less check is (2^i - bound) / 2^i. + * + * + * Algorithm 3 (always without divisions or multiplications, but needs more iterations): + * + * Let 2^(i-1) <= bound < 2^i. Then i-bit random number r has at least 50% probability + * to be lower than bound, when we can simply return it. + * Can be useful for very fast fastrand_one() like xoroshiro128+. + */ + +#define FASTRAND_MAX_ONE_U32_ALGORITHM 2 +#define FASTRAND_MAX_MORE_ALGORITHM 3 + +static inline u32 fastrand_max_one_u32(struct fastrand *c, u32 bound) +{ + // Variant for bound <= MIN(UINT32_MAX, FASTRAND_ONE_MAX). + // This is the most common case -> we optimize it and inline the code to both fastrand_max() and fastrand_max_u64(). + +#if FASTRAND_MAX_ONE_U32_ALGORITHM == 1 + while (1) + { + u32 r = fastrand_one(c); + u32 x = r % bound; + if (r - x <= (u32)FASTRAND_ONE_MAX + 1 - bound) + return x; +#if 1 + // Possible optimization for unlucky cases + uint y = r - x; + do + r = fastrand_one(c); + while (r >= y); + return r % bound; +#endif + } + +#elif FASTRAND_MAX_ONE_U32_ALGORITHM == 2 + // Algorithm 2: + + u32 r = fastrand_one(c); + u64 m = (u64)r * bound; + u32 l = m & (u32)FASTRAND_ONE_MAX; + if (l < bound) + { + u32 t = (u32)FASTRAND_ONE_MAX + 1 - bound; +#if 1 + // This condition can decrease the probability of division to <= 50% even in worst case and also optimize bad cases. + if (t < bound) + { + // Implies l < t % bound, so we don't need to check it before switching to completely different loop. + do + r = fastrand_one(c); + while (r >= bound); + return r; + } +#endif + t = t % bound; + while (l < t) + { + r = fastrand_one(c); + m = (u64)r * bound; + l = m & (u32)FASTRAND_ONE_MAX; + } + } + return m >> MIN(FASTRAND_ONE_BITS, 32); + +#elif FASTRAND_MAX_ONE_U32_ALGORITHM == 3 + // XXX: Probably could be optimized with __builtin_clz() + u32 r, y = bound - 1; + y |= y >> 1; + y |= y >> 2; + y |= y >> 4; + y |= y >> 8; + y |= y >> 16; + do + r = fastrand_one(c) & y; + while (r >= bound); + return r; + +#else +# error Invalid FASTRAND_MAX_ONE_U32_ALGORITHM +#endif +} + +static u64 fastrand_max_more(struct fastrand *c, u64 bound) +{ + // Less common case -> compromise between speed and size +#if FASTRAND_MAX_MORE_ALGORITHM == 1 + while (1) + { + u64 r = fastrand_one(c); + u64 m = FASTRAND_ONE_MAX; +#if FASTRAND_ONE_BITS < 64 +#if FASTRAND_ONE_BITS < 32 + while (bound > m + 1) +#else + if (bound > m + 1) +#endif + { + r = (r << FASTRAND_ONE_BITS) | fastrand_one(c); + m = (m << FASTRAND_ONE_BITS) | FASTRAND_ONE_MAX; + } +#endif + u64 x = r % bound; + if (r - x <= m + 1 - bound) + return x; + } + +#elif FASTRAND_MAX_MORE_ALGORITHM == 3 + u64 r, y = bound - 1; + y |= y >> 1; + y |= y >> 2; + y |= y >> 4; + y |= y >> 8; + y |= y >> 16; + y |= y >> 32; + do + { + r = fastrand_one(c); + u64 m = FASTRAND_ONE_MAX; +#if FASTRAND_ONE_BITS < 64 +#if FASTRAND_ONE_BITS < 32 + while (y > m) +#else + if (y > m) +#endif + { + r = (r << FASTRAND_ONE_BITS) | fastrand_one(c); + m = (m << FASTRAND_ONE_BITS) | FASTRAND_ONE_MAX; + } +#endif + r &= y; + } + while (r >= bound); + return r; +#else +# error Invalid FASTRAND_MAX_MORE_ALGORITHM +#endif +} + +uint fastrand_max(struct fastrand *c, uint bound) +{ + FASTRAND_CHECK_SEED(c); +#if (UINT32_MAX & FASTRAND_ONE_MAX) < UINT_MAX + if (bound <= (u32)(FASTRAND_ONE_MAX) + 1) + return fastrand_max_one_u32(c, bound); + else + return fastrand_max_more(c, bound); +#else + return fastrand_max_one_u32(c, bound); +#endif +} + +u64 fastrand_max_u64(struct fastrand *c, u64 bound) +{ + FASTRAND_CHECK_SEED(c); + if (bound <= (u64)(u32)(FASTRAND_ONE_MAX) + 1) + return fastrand_max_one_u32(c, bound); + else + return fastrand_max_more(c, bound); +} + +void fastrand_mem(struct fastrand *c, void *ptr, size_t size) +{ + FASTRAND_CHECK_SEED(c); + byte *p = ptr; + if (size > 3) + { + if ((intptr_t)p & 3) + { + uint x = fastrand_one(c); + while ((intptr_t)p & 3) + { + *p++ = x; + x >>= 8; + size--; + } + } +#if FASTRAND_ONE_BITS == 64 + for (size_t n = size >> 3; n--; ) + { + u64 x = fastrand_one(c); + ((u32 *)p)[0] = x; + ((u32 *)p)[1] = x >> 32; + p += 8; + } + if (size & 4) + { + *(u32 *)p = fastrand_one(c); + p += 4; + } +#else +#if FASTRAND_ONE_BITS < 32 + uint y, y_n = 0; +#endif + for (size_t n = size >> 2; n--; ) + { +#if FASTRAND_ONE_BITS < 32 + if (!y_n--) + { + y_n = FASTRAND_ONE_BITS / (32 - FASTRAND_ONE_BITS) - 1; + y = fastrand_one(c); + } + uint x = fastrand_one(c) | (y << (FASTRAND_ONE_BITS - 24)); + y >>= 32 - FASTRAND_ONE_BITS; +#else + uint x = fastrand_one(c); +#endif + *(u32 *)p = x; + p += 4; + } +#endif + size &= 3; + } + if (size) + { + uint x = fastrand_one(c); + while (size--) + { + *p++ = x; + x >>= 8; + } + } +} diff --git a/ucw/random-test.c b/ucw/random-test.c new file mode 100644 index 00000000..5f76423f --- /dev/null +++ b/ucw/random-test.c @@ -0,0 +1,202 @@ +/* + * UCW Library -- Random numbers: Testing + * + * (c) 2020--2022 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static int o_test_fast; + +static double o_bench_scale = 1.0; +static int o_bench_only_safe; +static int o_bench_all; +static int o_bench_libs; +static int o_bench_legacy; +static int o_bench_fast; + +static struct opt_section options = { + OPT_ITEMS { + OPT_HELP("Usage: random-test "), + OPT_HELP(""), + OPT_HELP("Options:"), + OPT_HELP_OPTION, + OPT_BOOL(0, "test-fast", o_test_fast, 0, "\tTest fast random generator"), + OPT_BOOL(0, "bench-all", o_bench_all, 0, "\tBench everything"), + OPT_BOOL(0, "bench-libs", o_bench_libs, 0, "\tBench libc"), + OPT_BOOL(0, "bench-legacy", o_bench_legacy, 0, "\tBench legacy interface"), + OPT_BOOL(0, "bench-fast", o_bench_fast, 0, "\tBench fast random generator"), + OPT_DOUBLE(0, "bench-scale", o_bench_scale, OPT_REQUIRED_VALUE, "\tIncrease/decrease the length of benchmark (default: 1.0)"), + OPT_BOOL(0, "bench-only-safe", o_bench_only_safe, 0, "\tDon't benchmark too verbose or risky functions"), + OPT_END, + } +}; + +#define TEST(x) do { if (unlikely(!(x))) test_failed(#x, __LINE__); } while (0) +static NONRET void test_failed(const char *check, uint line) +{ + msg(L_FATAL, "TEST: Failed at line %u: TEST(%s)", line, check); + abort(); +} + +static void test_fast(void) +{ + struct fastrand *r = fastrand_new(); + fastrand_gen_seed(r); + for (uint i = 0; i < 5000; i++) + { + u64 x, y; + + TEST((x = fastrand_max(r, 33)) <= 32); + TEST((y = fastrand_bits(r, x)) < (1LLU << x)); + + TEST((x = fastrand_max(r, 65)) <= 64); + TEST((y = fastrand_bits_u64(r, x)) <= ((x == 64) ? UINT64_MAX : (1LLU << x) - 1)); + + x = fastrand_u32(r) ? : 1; + TEST((y = fastrand_max(r, x)) < x); + x = fastrand_u64(r) ? : 1; + TEST((y = fastrand_max_u64(r, x)) < x); + } + fastrand_delete(r); + printf("OK\n"); +} + +#define BENCH(func) BENCH_SLOW(1, func) +#define BENCH_SLOW(mult, func) \ + do { \ + timestamp_t bench_timer; \ + init_timer(&bench_timer); \ + for (uint bench_i = 50000000 * o_bench_scale / (mult) / 4; bench_i--; ) \ + { func; func; func; func; } \ + msg(L_INFO, "%7u %s", (mult) * get_timer(&bench_timer), #func); \ + } while (0) + +static void bench_libs(void) +{ + uint seed = time(NULL) + getpid(); + + uint iso_state = seed; + srand(seed); + + u64 bsd_buf[128 / 8]; + struct random_data bsd_data; + bsd_data.state = NULL; + if (initstate_r(seed, (char *)bsd_buf, sizeof(bsd_buf), &bsd_data) < 0) + die("initstate_r(): %m"); + + u64 bsd_buf2[128 / 8]; + errno = 0; + initstate(seed, (char *)bsd_buf2, sizeof(bsd_buf2)); + if (errno) + die("initstate(): %m"); + + struct drand48_data svid_data; + if (srand48_r(seed, &svid_data) < 0) + die("srand48_r(): %m"); + srand48(seed); + + int32_t int32_res; + long int long_res; + + msg(L_INFO, "Benchmarking libc ISO generator"); + BENCH_SLOW(100, srand(seed)); + BENCH(rand()); + BENCH(rand_r(&iso_state)); + + msg(L_INFO, "Benchmarking libc BSD generator"); + BENCH_SLOW(100, srandom(seed)); + BENCH(random()); + BENCH(random_r(&bsd_data, &int32_res)); + + msg(L_INFO, "Benchmarking libc SVID generator"); + BENCH(srand48(seed)); + BENCH(mrand48()); + BENCH(mrand48_r(&svid_data, &long_res)); +} + +static void bench_legacy(void) +{ + msg(L_INFO, "Benchmarking ucw legacy generator"); + srand(time(NULL) + getpid()); + BENCH(random_u32()); + BENCH(random_u64()); + BENCH(random_max(1000000)); + BENCH(random_max_u64(1000000)); + BENCH_SLOW(2, random_max_u64(1000000000000LL)); + BENCH_SLOW(2, random_max_u64((1LLU << 63) - 1)); // It uses slightly suboptimal intervals, so +0 is still a bad case + BENCH_SLOW(4, random_max_u64((1LLU << 63) + 1)); + BENCH_SLOW(2, random_max(1 + random_max(1000000))); + BENCH_SLOW(4, random_max_u64(1 + random_max_u64(1000000000000LL))); +} + +static void bench_fast(void) +{ + msg(L_INFO, "Benchmarking fast random generator"); + struct fastrand *r = fastrand_new(); + + if (!o_bench_only_safe) + { + // Can produce huge log with LOCAL_DEBUG + uint seed = time(NULL) + getpid(); + BENCH_SLOW(100, fastrand_gen_seed(r)); + BENCH_SLOW(100, fastrand_set_seed(r, seed)); + } + + fastrand_gen_seed(r); + + BENCH(fastrand_bits(r, 1)); + BENCH(fastrand_bits(r, 32)); + BENCH(fastrand_bits_u64(r, 64)); + BENCH(fastrand_u32(r)); + BENCH(fastrand_u64(r)); + BENCH(fastrand_max(r, 1000000)); + BENCH(fastrand_max(r, (1U << 30) + 0)); + BENCH(fastrand_max(r, (1U << 30) + 1)); // Worst case for 31 bit generators (typical RAND_MAX) + BENCH(fastrand_max(r, (1U << 31) + 0)); + BENCH(fastrand_max(r, (1U << 31) + 1)); // Worst case for 32 bit generators + BENCH(fastrand_max_u64(r, 1000000)); + BENCH_SLOW(2, fastrand_max_u64(r, 1000000000000LL)); + BENCH_SLOW(2, fastrand_max_u64(r, (1LLU << 63) + 0)); + BENCH_SLOW(4, fastrand_max_u64(r, (1LLU << 63) + 1)); + BENCH_SLOW(2, fastrand_max(r, 1 + fastrand_max(r, 1000000))); + BENCH_SLOW(4, fastrand_max_u64(r, 1 + fastrand_max_u64(r, 1000000000000LL))); + + byte buf[1000]; + BENCH(fastrand_mem(r, buf, 1)); + BENCH(fastrand_mem(r, buf, 4)); + BENCH_SLOW(10, fastrand_mem(r, buf, 100)); + BENCH_SLOW(100, fastrand_mem(r, buf, 1000)); + + fastrand_delete(r); +} + +int main(int argc UNUSED, char **argv) +{ + cf_def_file = INSTALL_CONFIG_DIR "/common"; + opt_parse(&options, argv + 1); + + if (o_test_fast) + test_fast(); + + if (o_bench_all || o_bench_libs) + bench_libs(); + if (o_bench_all || o_bench_legacy) + bench_legacy(); + if (o_bench_all || o_bench_fast) + bench_fast(); + + return 0; +} diff --git a/ucw/random-test.t b/ucw/random-test.t new file mode 100644 index 00000000..0f866cd0 --- /dev/null +++ b/ucw/random-test.t @@ -0,0 +1,3 @@ +Name: random-fast +Run: ../obj/ucw/random-test --test-fast +Out: OK diff --git a/ucw/random.h b/ucw/random.h new file mode 100644 index 00000000..93c6b28f --- /dev/null +++ b/ucw/random.h @@ -0,0 +1,97 @@ +/* + * The UCW Library -- Random numbers + * + * (c) 2020--2022 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_RANDOM_H +#define _UCW_RANDOM_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define fastrand_new ucw_fastrand_new +#define fastrand_delete ucw_fastrand_delete +#define fastrand_set_seed ucw_fastrand_set_seed +#define fastrand_gen_seed_value ucw_fastrand_gen_seed_value +#define fastrand_gen_seed ucw_fastrand_gen_seed +#define fastrand_has_seed ucw_fastrand_has_seed +#define fastrand_reset ucw_fastrand_reset +#define fastrand_u32 ucw_fastrand_u32 +#define fastrand_u64 ucw_fastrand_u64 +#define fastrand_bits_u64 ucw_fastrand_bits_u64 +#define fastrand_max ucw_fastrand_max +#define fastrand_max_u64 ucw_fastrand_max_u64 +#define fastrand_mem ucw_fastrand_mem +#endif + +/* random-fast.c */ + +/** + * Context structure for a fast pseudo-random generator. + * Useful for generating numbers in randomized algorithms, but not in critical things like cryptography. + * The library is generally thread-safe, but each context must be used by at most one thread at one time. + * Also be careful with fork() if you want to avoid the same numbers in subprocesses. + **/ +struct fastrand; + +/** + * Type for a seed value. Beware that some generators are only able to use the lowest 32 bits. + **/ +typedef u64 fastrand_seed_t; + +/** + * Allocate a new context. Should be paired with fastrand_delete(). + * Don't forget to supply a seed before started to read randomness. + **/ +struct fastrand *fastrand_new(void); +void fastrand_delete(struct fastrand *c); + +/** (Re)seed the context with given value. **/ +void fastrand_set_seed(struct fastrand *c, fastrand_seed_t seed); + +/** + * Generate a seed for fastrand_set_seed(). + * + * We currently mix these things (you can add more if needed): + * -- current time + * -- process id to deal with frequent fork() or startups + * -- counter to deal with multiple contexts, for example in threaded application + **/ +fastrand_seed_t fastrand_gen_seed_value(void); + +/** (Re)seed the context with fastrand_gen_seed_value(). **/ +fastrand_seed_t fastrand_gen_seed(struct fastrand *c); + +/** Is the context seeded? **/ +bool fastrand_has_seed(struct fastrand *c); + +/** Reset the context to its initial state. Useful after a fork() to assert forgotten reseed. **/ +void fastrand_reset(struct fastrand *c); + +/** Uniformly generate 32 random bits. **/ +u32 fastrand_u32(struct fastrand *c); + +/** Uniformly generate 64 random bits. **/ +u64 fastrand_u64(struct fastrand *c); + +/** Uniformly generate [0..64] random bits. **/ +u64 fastrand_bits_u64(struct fastrand *c, uint bits); + +/** Uniformly generate [0, 8*sizeof(uint)] random bits. **/ +static inline uint fastrand_bits(struct fastrand *c, uint bits) +{ + return fastrand_bits_u64(c, bits); +} + +/** Uniformly generate a random number from range [0, bound-1], where @bound is [1, UINT_MAX]. **/ +uint fastrand_max(struct fastrand *c, uint bound); + +/** Uniformly generate a random number from range [0, bound-1], where @bound is [1, UINT64_MAX]. **/ +u64 fastrand_max_u64(struct fastrand *c, u64 bound); + +/** Generate @size random bytes. **/ +void fastrand_mem(struct fastrand *c, void *ptr, size_t size); + +#endif -- 2.39.2