--- /dev/null
+/*
+ * UCW Library -- Fast pseudo-random generator
+ *
+ * (c) 2020--2022 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include <ucw/lib.h>
+#include <ucw/random.h>
+#include <ucw/threads.h>
+
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+// Select exactly one of these pseudo-random generators:
+//#define FASTRAND_USE_PCG
+//#define FASTRAND_USE_SPLITMIX
+#define FASTRAND_USE_XOROSHIRO
+//#define FASTRAND_USE_LIBC_ISO
+//#define FASTRAND_USE_LIBC_BSD
+//#define FASTRAND_USE_LIBC_SVID
+
+/**
+ * FASTRAND_USE_PCG
+ *
+ * References:
+ * -- https://en.wikipedia.org/wiki/Permuted_congruential_generator
+ * -- https://www.pcg-random.org/
+ * -- "PCG: A Family of Simple Fast Space-Efficient Statistically Good Algorithms for Random Number Generation",
+ * 2014, Melissa E. O’Neill
+ *
+ * Notes:
+ * -- Simple and fast generator with good statistical properties.
+ * -- We use a constant increment and 64->32bit XSH RR output function.
+ **/
+
+#define FASTRAND_PCG_MUL 6364136223846793005LLU
+#define FASTRAND_PCG_INC 1442695040888963407LLU
+
+/**
+ * FASTRAND_USE_SPLITMIX
+ *
+ * References:
+ * -- http://prng.di.unimi.it/splitmix64.c (public, no copyright)
+ * -- http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
+ * -- "Fast Splittable Pseudorandom Number Generators",
+ * 2014, Guy L. Steele Jr., Doug Lea, Christine H. Flood
+ *
+ * Notes:
+ * -- Each 64 bit number is generated exactly once in a cycle,
+ * so we only use half of it -> 32 bit output.
+ **/
+
+/**
+ * FASTRAND_USE_XOROSHIRO
+ *
+ * References:
+ * -- http://prng.di.unimi.it/ (examples are public, no copyright)
+ * -- "Scrambled Linear Pseudorandom Number Generators",
+ * 2019, David Blackman, Sebastiano Vigna
+ *
+ * Notes:
+ * -- A family of very fast generators. Good statistical properties,
+ * some variants just have problems with lowest bits.
+ * -- We support more xo(ro)shiro variants, see the options below.
+ **/
+
+// Engine: xoroshiro 128, xoshiro 256
+#define FASTRAND_XOROSHIRO_SIZE 128
+
+// Scrambler: 1 (+), 2 (++), 3 (**)
+#define FASTRAND_XOROSHIRO_SCRAMBLER 1
+
+// If defined, we only use the highest 32 bits from each 64 bit number;
+// useful especially for the weakest '+' scrambler.
+// XXX: We could instead cut less bits or use full 64 bits and prefer
+// the higher ones only where trivial, for example in 32 bit fastrand_max().
+#define FASTRAND_XOROSHIRO_ONLY_HIGHER
+
+/**
+ * FASTRAND_USE_LIBC_ISO
+ * -- rand_r()
+ * -- libc ISO generator
+ * -- usually 31 bit output (RAND_MAX)
+ * -- very weak
+ **/
+
+/**
+ * FASTRAND_USE_LIBC_BSD
+ * -- random_r()
+ * -- libc BSD generator
+ * -- usually 31 bit output (RAND_MAX)
+ **/
+
+/**
+ * FASTRAND_USE_LIBC_SVID
+ * -- mrand48_r()
+ * -- libc SVID generator
+ * -- 32 bit output
+ **/
+
+struct fastrand {
+#ifdef FASTRAND_USE_PCG
+ u64 pcg_state; // PCG generator's current state
+#endif
+
+#ifdef FASTRAND_USE_SPLITMIX
+ u64 splitmix_state;
+#endif
+
+#ifdef FASTRAND_USE_XOROSHIRO
+ u64 xoroshiro_state[FASTRAND_XOROSHIRO_SIZE / 64];
+#endif
+
+#ifdef FASTRAND_USE_LIBC_ISO
+ uint iso_state; // ISO generator's current seed
+#endif
+
+#ifdef FASTRAND_USE_LIBC_BSD
+ struct random_data bsd_data; // BSD generator's state structure
+ u64 bsd_state[128 / 8]; // Used "statebuf" in initstate_r(); see manual pages or glibc sources for useful sizes;
+ // glibc typecasts the (char *) to (int32_t *), so we rather make sure it's aligned
+#endif
+
+#ifdef FASTRAND_USE_LIBC_SVID
+ struct drand48_data svid_data; // SVID generator's state structure
+#endif
+
+ bool inited; // Is seeded?
+ fastrand_seed_t seed_value; // Initial seed value; useful for reproducible debugging
+};
+
+/*** Common helpers ***/
+
+#if defined(FASTRAND_USE_LIBC_ISO) || defined(FASTRAND_USE_LIBC_BSD)
+# if RAND_MAX == (1U << 30) - 1
+# define FASTRAND_RAND_MAX_BITS 30
+# elif RAND_MAX == (1U << 31) - 1
+# define FASTRAND_RAND_MAX_BITS 31
+# else
+# error "Unsupported value of RAND_MAX"
+# endif
+#endif
+
+// GCC should optimize these to a single instruction
+
+static inline u32 fastrand_ror32(u32 x, uint r)
+{
+ return (x >> r) | (x << (-r & 31));
+}
+
+static inline u64 fastrand_rol64(u64 x, uint r)
+{
+ return (x << r) | (x >> (-r & 63));
+}
+
+/*** PCG random generator ***/
+
+#ifdef FASTRAND_USE_PCG
+#define FASTRAND_ONE_BITS 32
+
+static inline u32 fastrand_one(struct fastrand *c)
+{
+ u64 s = c->pcg_state;
+ uint r = s >> 59;
+ c->pcg_state = s * FASTRAND_PCG_MUL + FASTRAND_PCG_INC;
+ u32 x = ((s >> 18) ^ s) >> 27;
+ return fastrand_ror32(x, r);
+}
+
+static void fastrand_init(struct fastrand *c)
+{
+ c->pcg_state = c->seed_value + FASTRAND_PCG_INC;
+ (void) fastrand_one(c);
+ DBG("RANDOM: Initialized PCG random generator, seed=0x%jx", (uintmax_t)c->seed_value);
+}
+#endif
+
+/*** splitmix64 generator ***/
+
+// A helper; used also for seeding xoroshiro
+static inline u64 fastrand_splitmix64_next(u64 *state)
+{
+ u64 z = (*state += 0x9e3779b97f4a7c15LLU);
+ z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9LLU;
+ z = (z ^ (z >> 27)) * 0x94d049bb133111ebLLU;
+ return z ^ (z >> 31);
+}
+
+#ifdef FASTRAND_USE_SPLITMIX
+#define FASTRAND_ONE_BITS 32
+
+static void fastrand_init(struct fastrand *c)
+{
+ c->splitmix_state = c->seed_value;
+ DBG("RANDOM: Initialized splitmix64 random generator, seed=0x%jx", (uintmax_t)c->seed_value);
+}
+
+static inline u32 fastrand_one(struct fastrand *c)
+{
+ return fastrand_splitmix64_next(&c->splitmix_state);
+}
+#endif
+
+/*** xoroshiro variants ***/
+
+#ifdef FASTRAND_USE_XOROSHIRO
+#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER
+# define FASTRAND_ONE_BITS 32
+#else
+# define FASTRAND_ONE_BITS 64
+#endif
+
+static void fastrand_init(struct fastrand *c)
+{
+ u64 x = c->seed_value;
+ for (uint i = 0; i < ARRAY_SIZE(c->xoroshiro_state); i++)
+ c->xoroshiro_state[i] = fastrand_splitmix64_next(&x);
+ if (unlikely(!c->xoroshiro_state[0]))
+ c->xoroshiro_state[0] = 1;
+ DBG("RANDOM: Initialized xoroshiro random generator, seed=0x%jx, size=%u, scrambler=%u",
+ (uintmax_t)c->seed_value, FASTRAND_XOROSHIRO_SIZE, FASTRAND_XOROSHIRO_SCRAMBLER);
+}
+
+static inline
+#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER
+u32
+#else
+u64
+#endif
+fastrand_one(struct fastrand *c)
+{
+ u64 result;
+ u64 *s = c->xoroshiro_state;
+
+#if FASTRAND_XOROSHIRO_SIZE == 128
+ u64 s0 = s[0];
+ u64 s1 = s[1];
+#if FASTRAND_XOROSHIRO_SCRAMBLER == 1
+ result = s0 + s1;
+ uint ra = 24, rb = 16, rc = 37;
+#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2
+ result = fastrand_rol64(s0 + s1, 17) + s0;
+ uint ra = 49, rb = 21, rc = 28;
+#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3
+ result = fastrand_rol64(s0 * 5, 7) * 9;
+ uint ra = 24, rb = 16, rc = 37;
+#else
+# error "Unsupported xoroshiro parameters"
+#endif
+ s[0] = fastrand_rol64(s0, ra) ^ s1 ^ (s1 << rb);
+ s[1] = fastrand_rol64(s1, rc);
+
+#elif FASTRAND_XOROSHIRO_SIZE == 256
+#if FASTRAND_XOROSHIRO_SCRAMBLER == 1
+ result = s[0] + s[3];
+#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2
+ result = fastrand_rol64(s[0] + s[3], 23) + s[0];
+#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3
+ result = fastrand_rol64(s[1] * 5, 7) * 9;
+#else
+# error "Unsupported xoroshiro parameters"
+#endif
+ u64 t = s[1] << 17;
+ s[2] ^= s[0];
+ s[3] ^= s[1];
+ s[1] ^= s[2];
+ s[0] ^= s[3];
+ s[2] ^= t;
+ s[3] = fastrand_rol64(s[3], 45);
+#else
+# error "Unsupported xoroshiro parameters"
+#endif
+
+#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER
+ result >>= 32;
+#endif
+ return result;
+}
+
+#endif
+
+/*** Libc ISO generator ***/
+
+#ifdef FASTRAND_USE_LIBC_ISO
+#define FASTRAND_ONE_BITS FASTRAND_RAND_MAX_BITS
+
+static void fastrand_init(struct fastrand *c)
+{
+ c->iso_state = c->seed_value;
+ DBG("RANDOM: Initialized libc ISO random generator, seed=0x%jx, bits=%u", (uintmax_t)c->seed_value, FASTRAND_ONE_BITS);
+}
+
+static inline u32 fastrand_one(struct fastrand *c)
+{
+ return rand_r(&c->iso_state);
+}
+
+#endif
+
+/*** Libc BSD generator ***/
+
+#ifdef FASTRAND_USE_LIBC_BSD
+#define FASTRAND_ONE_BITS FASTRAND_RAND_MAX_BITS
+
+static void fastrand_init(struct fastrand *c)
+{
+ DBG("RANDOM: Initialized libc BSD random generator, seed=0x%jx, statelen=%zu, bits=%u", (uintmax_t)c->seed_value, sizeof(c->bsd_state), FASTRAND_ONE_BITS);
+ c->bsd_data.state = NULL; // Required by initstate_r()
+ if (initstate_r(c->seed_value, (char *)c->bsd_state, sizeof(c->bsd_state), &c->bsd_data) < 0)
+ die("RANDOM: Failed to call initstate_r(): %m");
+}
+
+#ifdef LOCAL_DEBUG
+static NONRET void fastrand_random_r_failed(void)
+{
+ die("RANDOM: Failed to call random_r(): %m");
+}
+#endif
+
+static inline u32 fastrand_one(struct fastrand *c)
+{
+ int32_t r;
+#ifdef LOCAL_DEBUG
+ if (unlikely(random_r(&c->bsd_data, &r) < 0))
+ fastrand_random_r_failed();
+#else
+ (void) random_r(&c->bsd_data, &r);
+#endif
+ return r;
+}
+
+#endif
+
+/*** Libc SVID generator ***/
+
+#ifdef FASTRAND_USE_LIBC_SVID
+#define FASTRAND_ONE_BITS 32
+
+static void fastrand_init(struct fastrand *c)
+{
+ DBG("RANDOM: Initialized libc SVID random generator, seed=0x%jx", (uintmax_t)c->seed_value);
+ if (srand48_r(c->seed_value, &c->svid_data) < 0)
+ die("RANDOM: Failed to call srand48_r(): %m");
+}
+
+#ifdef LOCAL_DEBUG
+static NONRET void fastrand_mrand48_r_failed(void)
+{
+ die("RANDOM: Failed to call mrand48_r(): %m");
+}
+#endif
+
+static inline u32 fastrand_one(struct fastrand *c)
+{
+ long int r;
+#ifdef LOCAL_DEBUG
+ if (unlikely(mrand48_r(&c->svid_data, &r) < 0))
+ fastrand_mrand48_r_failed();
+#else
+ (void) mrand48_r(&c->svid_data, &r);
+#endif
+ return r;
+}
+
+#endif
+
+/*** Interface for creating/deleting/setting contexts ***/
+
+struct fastrand *fastrand_new(void)
+{
+ struct fastrand *c = xmalloc_zero(sizeof(*c));
+ return c;
+}
+
+void fastrand_delete(struct fastrand *c)
+{
+ ASSERT(c);
+ xfree(c);
+}
+
+void fastrand_set_seed(struct fastrand *c, fastrand_seed_t seed)
+{
+ c->seed_value = seed;
+ c->inited = true;
+ fastrand_init(c);
+}
+
+fastrand_seed_t fastrand_gen_seed_value(void)
+{
+ // Generate a seed value as a mixture of:
+ // -- current time
+ // -- process id to deal with frequent fork() or startups
+ // -- counter to deal with multiple contexts, for example in threaded application
+ u64 val;
+ static u64 static_counter;
+ ucwlib_lock();
+ val = (static_counter += 0x1927a7639274162dLLU);
+ ucwlib_unlock();
+ val += getpid();
+ struct timeval tv;
+ if (gettimeofday(&tv, NULL) < 0)
+ die("RANDOM: Failed to call gettimeofday(): %m");
+ val += (u64)(tv.tv_sec * 1000000LL + tv.tv_usec) * 0x5a03173d83f78347LLU;
+ return val;
+}
+
+fastrand_seed_t fastrand_gen_seed(struct fastrand *c)
+{
+ fastrand_seed_t seed = fastrand_gen_seed_value();
+ fastrand_set_seed(c, seed);
+ return seed;
+}
+
+bool fastrand_has_seed(struct fastrand *c)
+{
+ return c->inited;
+}
+
+void fastrand_reset(struct fastrand *c)
+{
+ c->inited = false;
+}
+
+#if 1
+// XXX: Adds some overhead, but probably worth it
+#define FASTRAND_CHECK_SEED(c) do { if (unlikely(!(c)->inited)) fastrand_check_seed_failed(); } while (0)
+static NONRET void fastrand_check_seed_failed(void)
+{
+ ASSERT(0);
+}
+#else
+#define FASTRAND_CHECK_SEED(c) do { } while (0)
+#endif
+
+/*** Reading interface ***/
+
+// We assume either 64bit or 24-32bit generator
+COMPILE_ASSERT(fastrand_one_range, (FASTRAND_ONE_BITS >= 24 && FASTRAND_ONE_BITS <= 32) || FASTRAND_ONE_BITS == 64);
+
+#if FASTRAND_ONE_BITS < 64
+#define FASTRAND_ONE_MAX (UINT32_MAX >> (32 - FASTRAND_ONE_BITS))
+#else
+#define FASTRAND_ONE_MAX UINT64_MAX
+#endif
+
+#if FASTRAND_ONE_BITS < 64
+#define FASTRAND_TWO_BITS (FASTRAND_ONE_BITS * 2)
+#define FASTRAND_TWO_MAX (UINT64_MAX >> (64 - FASTRAND_TWO_BITS))
+static inline u64 fastrand_two(struct fastrand *c)
+{
+ return ((u64)fastrand_one(c) << FASTRAND_ONE_BITS) | fastrand_one(c);
+}
+#endif
+
+#if FASTRAND_ONE_BITS < 32
+#define FASTRAND_THREE_BITS 64
+#define FASTRAND_THREE_MAX UINT64_MAX
+static inline u64 fastrand_three(struct fastrand *c)
+{
+ return ((u64)fastrand_one(c) << (FASTRAND_ONE_BITS * 2)) | ((u64)fastrand_one(c) << FASTRAND_ONE_BITS) | fastrand_one(c);
+}
+#endif
+
+u32 fastrand_u32(struct fastrand *c)
+{
+ FASTRAND_CHECK_SEED(c);
+#if FASTRAND_ONE_BITS < 32
+ return fastrand_two(c);
+#else
+ return fastrand_one(c);
+#endif
+}
+
+u64 fastrand_u64(struct fastrand *c)
+{
+ FASTRAND_CHECK_SEED(c);
+#if FASTRAND_ONE_BITS < 32
+ return fastrand_three(c);
+#elif FASTRAND_ONE_BITS < 64
+ return fastrand_two(c);
+#else
+ return fastrand_one(c);
+#endif
+}
+
+u64 fastrand_bits_u64(struct fastrand *c, uint bits)
+{
+#ifdef LOCAL_DEBUG
+ ASSERT(bits <= 64);
+#endif
+ // Notice that we never rotate an integer by all its bits (forbidden by C standard).
+ FASTRAND_CHECK_SEED(c);
+#if FASTRAND_ONE_BITS == 64
+ if (!bits)
+ return 0;
+ u64 r = fastrand_one(c);
+ return r >> (FASTRAND_ONE_BITS - bits);
+#else
+ byte n = bits;
+ u64 r = fastrand_one(c);
+ if (n <= FASTRAND_ONE_BITS)
+ return r >> (FASTRAND_ONE_BITS - n);
+ while (1)
+ {
+ n -= FASTRAND_ONE_BITS;
+ u64 x = fastrand_one(c);
+ if (n <= FASTRAND_ONE_BITS)
+ return (r << n) | (x >> (FASTRAND_ONE_BITS - n));
+ r = (r << FASTRAND_ONE_BITS) | x;
+ }
+#endif
+}
+
+/*
+ * fastrand_max() and variants
+ *
+ * Algorithm 1:
+ *
+ * We can return r % bound from uniformly generated i-bit number r,
+ * but only if:
+ * r - r % bound + bound <= 2^i
+ * r - r % bound <= 2^i-1 - bound + 1 -- no risk of integer overflow
+ * r - r % bound <= (2^i-1 + 1) - bound -- can temporarily overflow
+ *
+ * Then r fits to a complete interval in the i-bit range
+ * and results should be uniformly distributed.
+ *
+ * If r is too high, we simply generate a different r and repeat
+ * the whole process. The probability is < 50% for any bound <= 2^i
+ * (with worst case bound == 2^i / 2 + 1), so the average number
+ * of iterations is < 2.
+ *
+ *
+ * Algorithm 2 (without any division in many cases, but needs big multiplication):
+ * ("Fast Random Integer Generation in an Interval", 2019, Daniel Lemier)
+ *
+ * Let bound < 2^i. Then for j in [0, bound) each interval [j * 2^i + 2^i % bound, (j + 1) * 2^i)
+ * contains the same number of multiples [0, 1 * bound, 2 * bound, ..., 2^i * bound),
+ * exactly 2^i / bound (rounded down) of them.
+ *
+ * Therefore if we have a random i-bit number r and r * bound fits to one of these intervals,
+ * we can immediately return (r * bound) / 2^i (index of that interval) and results should be
+ * correctly distributed. If we are lucky and r fits to even smaller interval
+ * [j * 2^i + bound, (j + 1) * 2^i], we can check that very quickly without any division.
+ *
+ * Probability of multiple iterations is again < 50%.
+ * Probability of division-less check is (2^i - bound) / 2^i.
+ *
+ *
+ * Algorithm 3 (always without divisions or multiplications, but needs more iterations):
+ *
+ * Let 2^(i-1) <= bound < 2^i. Then i-bit random number r has at least 50% probability
+ * to be lower than bound, when we can simply return it.
+ * Can be useful for very fast fastrand_one() like xoroshiro128+.
+ */
+
+#define FASTRAND_MAX_ONE_U32_ALGORITHM 2
+#define FASTRAND_MAX_MORE_ALGORITHM 3
+
+static inline u32 fastrand_max_one_u32(struct fastrand *c, u32 bound)
+{
+ // Variant for bound <= MIN(UINT32_MAX, FASTRAND_ONE_MAX).
+ // This is the most common case -> we optimize it and inline the code to both fastrand_max() and fastrand_max_u64().
+
+#if FASTRAND_MAX_ONE_U32_ALGORITHM == 1
+ while (1)
+ {
+ u32 r = fastrand_one(c);
+ u32 x = r % bound;
+ if (r - x <= (u32)FASTRAND_ONE_MAX + 1 - bound)
+ return x;
+#if 1
+ // Possible optimization for unlucky cases
+ uint y = r - x;
+ do
+ r = fastrand_one(c);
+ while (r >= y);
+ return r % bound;
+#endif
+ }
+
+#elif FASTRAND_MAX_ONE_U32_ALGORITHM == 2
+ // Algorithm 2:
+
+ u32 r = fastrand_one(c);
+ u64 m = (u64)r * bound;
+ u32 l = m & (u32)FASTRAND_ONE_MAX;
+ if (l < bound)
+ {
+ u32 t = (u32)FASTRAND_ONE_MAX + 1 - bound;
+#if 1
+ // This condition can decrease the probability of division to <= 50% even in worst case and also optimize bad cases.
+ if (t < bound)
+ {
+ // Implies l < t % bound, so we don't need to check it before switching to completely different loop.
+ do
+ r = fastrand_one(c);
+ while (r >= bound);
+ return r;
+ }
+#endif
+ t = t % bound;
+ while (l < t)
+ {
+ r = fastrand_one(c);
+ m = (u64)r * bound;
+ l = m & (u32)FASTRAND_ONE_MAX;
+ }
+ }
+ return m >> MIN(FASTRAND_ONE_BITS, 32);
+
+#elif FASTRAND_MAX_ONE_U32_ALGORITHM == 3
+ // XXX: Probably could be optimized with __builtin_clz()
+ u32 r, y = bound - 1;
+ y |= y >> 1;
+ y |= y >> 2;
+ y |= y >> 4;
+ y |= y >> 8;
+ y |= y >> 16;
+ do
+ r = fastrand_one(c) & y;
+ while (r >= bound);
+ return r;
+
+#else
+# error Invalid FASTRAND_MAX_ONE_U32_ALGORITHM
+#endif
+}
+
+static u64 fastrand_max_more(struct fastrand *c, u64 bound)
+{
+ // Less common case -> compromise between speed and size
+#if FASTRAND_MAX_MORE_ALGORITHM == 1
+ while (1)
+ {
+ u64 r = fastrand_one(c);
+ u64 m = FASTRAND_ONE_MAX;
+#if FASTRAND_ONE_BITS < 64
+#if FASTRAND_ONE_BITS < 32
+ while (bound > m + 1)
+#else
+ if (bound > m + 1)
+#endif
+ {
+ r = (r << FASTRAND_ONE_BITS) | fastrand_one(c);
+ m = (m << FASTRAND_ONE_BITS) | FASTRAND_ONE_MAX;
+ }
+#endif
+ u64 x = r % bound;
+ if (r - x <= m + 1 - bound)
+ return x;
+ }
+
+#elif FASTRAND_MAX_MORE_ALGORITHM == 3
+ u64 r, y = bound - 1;
+ y |= y >> 1;
+ y |= y >> 2;
+ y |= y >> 4;
+ y |= y >> 8;
+ y |= y >> 16;
+ y |= y >> 32;
+ do
+ {
+ r = fastrand_one(c);
+ u64 m = FASTRAND_ONE_MAX;
+#if FASTRAND_ONE_BITS < 64
+#if FASTRAND_ONE_BITS < 32
+ while (y > m)
+#else
+ if (y > m)
+#endif
+ {
+ r = (r << FASTRAND_ONE_BITS) | fastrand_one(c);
+ m = (m << FASTRAND_ONE_BITS) | FASTRAND_ONE_MAX;
+ }
+#endif
+ r &= y;
+ }
+ while (r >= bound);
+ return r;
+#else
+# error Invalid FASTRAND_MAX_MORE_ALGORITHM
+#endif
+}
+
+uint fastrand_max(struct fastrand *c, uint bound)
+{
+ FASTRAND_CHECK_SEED(c);
+#if (UINT32_MAX & FASTRAND_ONE_MAX) < UINT_MAX
+ if (bound <= (u32)(FASTRAND_ONE_MAX) + 1)
+ return fastrand_max_one_u32(c, bound);
+ else
+ return fastrand_max_more(c, bound);
+#else
+ return fastrand_max_one_u32(c, bound);
+#endif
+}
+
+u64 fastrand_max_u64(struct fastrand *c, u64 bound)
+{
+ FASTRAND_CHECK_SEED(c);
+ if (bound <= (u64)(u32)(FASTRAND_ONE_MAX) + 1)
+ return fastrand_max_one_u32(c, bound);
+ else
+ return fastrand_max_more(c, bound);
+}
+
+void fastrand_mem(struct fastrand *c, void *ptr, size_t size)
+{
+ FASTRAND_CHECK_SEED(c);
+ byte *p = ptr;
+ if (size > 3)
+ {
+ if ((intptr_t)p & 3)
+ {
+ uint x = fastrand_one(c);
+ while ((intptr_t)p & 3)
+ {
+ *p++ = x;
+ x >>= 8;
+ size--;
+ }
+ }
+#if FASTRAND_ONE_BITS == 64
+ for (size_t n = size >> 3; n--; )
+ {
+ u64 x = fastrand_one(c);
+ ((u32 *)p)[0] = x;
+ ((u32 *)p)[1] = x >> 32;
+ p += 8;
+ }
+ if (size & 4)
+ {
+ *(u32 *)p = fastrand_one(c);
+ p += 4;
+ }
+#else
+#if FASTRAND_ONE_BITS < 32
+ uint y, y_n = 0;
+#endif
+ for (size_t n = size >> 2; n--; )
+ {
+#if FASTRAND_ONE_BITS < 32
+ if (!y_n--)
+ {
+ y_n = FASTRAND_ONE_BITS / (32 - FASTRAND_ONE_BITS) - 1;
+ y = fastrand_one(c);
+ }
+ uint x = fastrand_one(c) | (y << (FASTRAND_ONE_BITS - 24));
+ y >>= 32 - FASTRAND_ONE_BITS;
+#else
+ uint x = fastrand_one(c);
+#endif
+ *(u32 *)p = x;
+ p += 4;
+ }
+#endif
+ size &= 3;
+ }
+ if (size)
+ {
+ uint x = fastrand_one(c);
+ while (size--)
+ {
+ *p++ = x;
+ x >>= 8;
+ }
+ }
+}
--- /dev/null
+/*
+ * UCW Library -- Random numbers: Testing
+ *
+ * (c) 2020--2022 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <ucw/lib.h>
+#include <ucw/random.h>
+#include <ucw/opt.h>
+#include <ucw/time.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+static int o_test_fast;
+
+static double o_bench_scale = 1.0;
+static int o_bench_only_safe;
+static int o_bench_all;
+static int o_bench_libs;
+static int o_bench_legacy;
+static int o_bench_fast;
+
+static struct opt_section options = {
+ OPT_ITEMS {
+ OPT_HELP("Usage: random-test <options>"),
+ OPT_HELP(""),
+ OPT_HELP("Options:"),
+ OPT_HELP_OPTION,
+ OPT_BOOL(0, "test-fast", o_test_fast, 0, "\tTest fast random generator"),
+ OPT_BOOL(0, "bench-all", o_bench_all, 0, "\tBench everything"),
+ OPT_BOOL(0, "bench-libs", o_bench_libs, 0, "\tBench libc"),
+ OPT_BOOL(0, "bench-legacy", o_bench_legacy, 0, "\tBench legacy interface"),
+ OPT_BOOL(0, "bench-fast", o_bench_fast, 0, "\tBench fast random generator"),
+ OPT_DOUBLE(0, "bench-scale", o_bench_scale, OPT_REQUIRED_VALUE, "<scale>\tIncrease/decrease the length of benchmark (default: 1.0)"),
+ OPT_BOOL(0, "bench-only-safe", o_bench_only_safe, 0, "\tDon't benchmark too verbose or risky functions"),
+ OPT_END,
+ }
+};
+
+#define TEST(x) do { if (unlikely(!(x))) test_failed(#x, __LINE__); } while (0)
+static NONRET void test_failed(const char *check, uint line)
+{
+ msg(L_FATAL, "TEST: Failed at line %u: TEST(%s)", line, check);
+ abort();
+}
+
+static void test_fast(void)
+{
+ struct fastrand *r = fastrand_new();
+ fastrand_gen_seed(r);
+ for (uint i = 0; i < 5000; i++)
+ {
+ u64 x, y;
+
+ TEST((x = fastrand_max(r, 33)) <= 32);
+ TEST((y = fastrand_bits(r, x)) < (1LLU << x));
+
+ TEST((x = fastrand_max(r, 65)) <= 64);
+ TEST((y = fastrand_bits_u64(r, x)) <= ((x == 64) ? UINT64_MAX : (1LLU << x) - 1));
+
+ x = fastrand_u32(r) ? : 1;
+ TEST((y = fastrand_max(r, x)) < x);
+ x = fastrand_u64(r) ? : 1;
+ TEST((y = fastrand_max_u64(r, x)) < x);
+ }
+ fastrand_delete(r);
+ printf("OK\n");
+}
+
+#define BENCH(func) BENCH_SLOW(1, func)
+#define BENCH_SLOW(mult, func) \
+ do { \
+ timestamp_t bench_timer; \
+ init_timer(&bench_timer); \
+ for (uint bench_i = 50000000 * o_bench_scale / (mult) / 4; bench_i--; ) \
+ { func; func; func; func; } \
+ msg(L_INFO, "%7u %s", (mult) * get_timer(&bench_timer), #func); \
+ } while (0)
+
+static void bench_libs(void)
+{
+ uint seed = time(NULL) + getpid();
+
+ uint iso_state = seed;
+ srand(seed);
+
+ u64 bsd_buf[128 / 8];
+ struct random_data bsd_data;
+ bsd_data.state = NULL;
+ if (initstate_r(seed, (char *)bsd_buf, sizeof(bsd_buf), &bsd_data) < 0)
+ die("initstate_r(): %m");
+
+ u64 bsd_buf2[128 / 8];
+ errno = 0;
+ initstate(seed, (char *)bsd_buf2, sizeof(bsd_buf2));
+ if (errno)
+ die("initstate(): %m");
+
+ struct drand48_data svid_data;
+ if (srand48_r(seed, &svid_data) < 0)
+ die("srand48_r(): %m");
+ srand48(seed);
+
+ int32_t int32_res;
+ long int long_res;
+
+ msg(L_INFO, "Benchmarking libc ISO generator");
+ BENCH_SLOW(100, srand(seed));
+ BENCH(rand());
+ BENCH(rand_r(&iso_state));
+
+ msg(L_INFO, "Benchmarking libc BSD generator");
+ BENCH_SLOW(100, srandom(seed));
+ BENCH(random());
+ BENCH(random_r(&bsd_data, &int32_res));
+
+ msg(L_INFO, "Benchmarking libc SVID generator");
+ BENCH(srand48(seed));
+ BENCH(mrand48());
+ BENCH(mrand48_r(&svid_data, &long_res));
+}
+
+static void bench_legacy(void)
+{
+ msg(L_INFO, "Benchmarking ucw legacy generator");
+ srand(time(NULL) + getpid());
+ BENCH(random_u32());
+ BENCH(random_u64());
+ BENCH(random_max(1000000));
+ BENCH(random_max_u64(1000000));
+ BENCH_SLOW(2, random_max_u64(1000000000000LL));
+ BENCH_SLOW(2, random_max_u64((1LLU << 63) - 1)); // It uses slightly suboptimal intervals, so +0 is still a bad case
+ BENCH_SLOW(4, random_max_u64((1LLU << 63) + 1));
+ BENCH_SLOW(2, random_max(1 + random_max(1000000)));
+ BENCH_SLOW(4, random_max_u64(1 + random_max_u64(1000000000000LL)));
+}
+
+static void bench_fast(void)
+{
+ msg(L_INFO, "Benchmarking fast random generator");
+ struct fastrand *r = fastrand_new();
+
+ if (!o_bench_only_safe)
+ {
+ // Can produce huge log with LOCAL_DEBUG
+ uint seed = time(NULL) + getpid();
+ BENCH_SLOW(100, fastrand_gen_seed(r));
+ BENCH_SLOW(100, fastrand_set_seed(r, seed));
+ }
+
+ fastrand_gen_seed(r);
+
+ BENCH(fastrand_bits(r, 1));
+ BENCH(fastrand_bits(r, 32));
+ BENCH(fastrand_bits_u64(r, 64));
+ BENCH(fastrand_u32(r));
+ BENCH(fastrand_u64(r));
+ BENCH(fastrand_max(r, 1000000));
+ BENCH(fastrand_max(r, (1U << 30) + 0));
+ BENCH(fastrand_max(r, (1U << 30) + 1)); // Worst case for 31 bit generators (typical RAND_MAX)
+ BENCH(fastrand_max(r, (1U << 31) + 0));
+ BENCH(fastrand_max(r, (1U << 31) + 1)); // Worst case for 32 bit generators
+ BENCH(fastrand_max_u64(r, 1000000));
+ BENCH_SLOW(2, fastrand_max_u64(r, 1000000000000LL));
+ BENCH_SLOW(2, fastrand_max_u64(r, (1LLU << 63) + 0));
+ BENCH_SLOW(4, fastrand_max_u64(r, (1LLU << 63) + 1));
+ BENCH_SLOW(2, fastrand_max(r, 1 + fastrand_max(r, 1000000)));
+ BENCH_SLOW(4, fastrand_max_u64(r, 1 + fastrand_max_u64(r, 1000000000000LL)));
+
+ byte buf[1000];
+ BENCH(fastrand_mem(r, buf, 1));
+ BENCH(fastrand_mem(r, buf, 4));
+ BENCH_SLOW(10, fastrand_mem(r, buf, 100));
+ BENCH_SLOW(100, fastrand_mem(r, buf, 1000));
+
+ fastrand_delete(r);
+}
+
+int main(int argc UNUSED, char **argv)
+{
+ cf_def_file = INSTALL_CONFIG_DIR "/common";
+ opt_parse(&options, argv + 1);
+
+ if (o_test_fast)
+ test_fast();
+
+ if (o_bench_all || o_bench_libs)
+ bench_libs();
+ if (o_bench_all || o_bench_legacy)
+ bench_legacy();
+ if (o_bench_all || o_bench_fast)
+ bench_fast();
+
+ return 0;
+}