Add functions to generate random numbers in a specified range.

This adds 3 new variants of the random() function:

    random(min integer, max integer) returns integer
    random(min bigint, max bigint) returns bigint
    random(min numeric, max numeric) returns numeric

Each returns a random number x in the range min <= x <= max.

For the numeric function, the number of digits after the decimal point
is equal to the number of digits that "min" or "max" has after the
decimal point, whichever has more.

The main entry points for these functions are in a new C source file.
The existing random(), random_normal(), and setseed() functions are
moved there too, so that they can all share the same PRNG state, which
is kept private to that file.

Dean Rasheed, reviewed by Jian He, David Zhang, Aleksander Alekseev,
and Tomas Vondra.

Discussion: https://postgr.es/m/CAEZATCV89Vxuq93xQdmc0t-0Y2zeeNQTdsjbmV7dyFBPykbV4Q@mail.gmail.com
This commit is contained in:
Dean Rasheed 2024-03-27 10:12:39 +00:00
parent 818861eb57
commit e6341323a8
13 changed files with 1022 additions and 101 deletions

View File

@ -1862,6 +1862,39 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>random</primary>
</indexterm>
<function>random</function> ( <parameter>min</parameter> <type>integer</type>, <parameter>max</parameter> <type>integer</type> )
<returnvalue>integer</returnvalue>
</para>
<para role="func_signature">
<function>random</function> ( <parameter>min</parameter> <type>bigint</type>, <parameter>max</parameter> <type>bigint</type> )
<returnvalue>bigint</returnvalue>
</para>
<para role="func_signature">
<function>random</function> ( <parameter>min</parameter> <type>numeric</type>, <parameter>max</parameter> <type>numeric</type> )
<returnvalue>numeric</returnvalue>
</para>
<para>
Returns a random value in the range
<parameter>min</parameter> &lt;= x &lt;= <parameter>max</parameter>.
For type <type>numeric</type>, the result will have the same number of
fractional decimal digits as <parameter>min</parameter> or
<parameter>max</parameter>, whichever has more.
</para>
<para>
<literal>random(1, 10)</literal>
<returnvalue>7</returnvalue>
</para>
<para>
<literal>random(-0.499, 0.499)</literal>
<returnvalue>0.347</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -1906,19 +1939,19 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in
</table>
<para>
The <function>random()</function> function uses a deterministic
pseudo-random number generator.
The <function>random()</function> and <function>random_normal()</function>
functions listed in <xref linkend="functions-math-random-table"/> use a
deterministic pseudo-random number generator.
It is fast but not suitable for cryptographic
applications; see the <xref linkend="pgcrypto"/> module for a more
secure alternative.
If <function>setseed()</function> is called, the series of results of
subsequent <function>random()</function> calls in the current session
subsequent calls to these functions in the current session
can be repeated by re-issuing <function>setseed()</function> with the same
argument.
Without any prior <function>setseed()</function> call in the same
session, the first <function>random()</function> call obtains a seed
session, the first call to any of these functions obtains a seed
from a platform-dependent source of random bits.
These remarks hold equally for <function>random_normal()</function>.
</para>
<para>

View File

@ -82,6 +82,7 @@ OBJS = \
pg_lsn.o \
pg_upgrade_support.o \
pgstatfuncs.o \
pseudorandomfuncs.o \
pseudotypes.o \
quote.o \
rangetypes.o \

View File

@ -21,10 +21,8 @@
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "common/shortest_dec.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
@ -64,10 +62,6 @@ float8 degree_c_sixty = 60.0;
float8 degree_c_one_half = 0.5;
float8 degree_c_one = 1.0;
/* State for drandom() and setseed() */
static bool drandom_seed_set = false;
static pg_prng_state drandom_seed;
/* Local function prototypes */
static double sind_q1(double x);
static double cosd_q1(double x);
@ -2785,95 +2779,6 @@ derfc(PG_FUNCTION_ARGS)
}
/* ========== RANDOM FUNCTIONS ========== */
/*
* initialize_drandom_seed - initialize drandom_seed if not yet done
*/
static void
initialize_drandom_seed(void)
{
/* Initialize random seed, if not done yet in this process */
if (unlikely(!drandom_seed_set))
{
/*
* If possible, initialize the seed using high-quality random bits.
* Should that fail for some reason, we fall back on a lower-quality
* seed based on current time and PID.
*/
if (unlikely(!pg_prng_strong_seed(&drandom_seed)))
{
TimestampTz now = GetCurrentTimestamp();
uint64 iseed;
/* Mix the PID with the most predictable bits of the timestamp */
iseed = (uint64) now ^ ((uint64) MyProcPid << 32);
pg_prng_seed(&drandom_seed, iseed);
}
drandom_seed_set = true;
}
}
/*
* drandom - returns a random number
*/
Datum
drandom(PG_FUNCTION_ARGS)
{
float8 result;
initialize_drandom_seed();
/* pg_prng_double produces desired result range [0.0 - 1.0) */
result = pg_prng_double(&drandom_seed);
PG_RETURN_FLOAT8(result);
}
/*
* drandom_normal - returns a random number from a normal distribution
*/
Datum
drandom_normal(PG_FUNCTION_ARGS)
{
float8 mean = PG_GETARG_FLOAT8(0);
float8 stddev = PG_GETARG_FLOAT8(1);
float8 result,
z;
initialize_drandom_seed();
/* Get random value from standard normal(mean = 0.0, stddev = 1.0) */
z = pg_prng_double_normal(&drandom_seed);
/* Transform the normal standard variable (z) */
/* using the target normal distribution parameters */
result = (stddev * z) + mean;
PG_RETURN_FLOAT8(result);
}
/*
* setseed - set seed for the random number generator
*/
Datum
setseed(PG_FUNCTION_ARGS)
{
float8 seed = PG_GETARG_FLOAT8(0);
if (seed < -1 || seed > 1 || isnan(seed))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("setseed parameter %g is out of allowed range [-1,1]",
seed)));
pg_prng_fseed(&drandom_seed, seed);
drandom_seed_set = true;
PG_RETURN_VOID();
}
/*
* =========================

View File

@ -69,6 +69,7 @@ backend_sources += files(
'pg_lsn.c',
'pg_upgrade_support.c',
'pgstatfuncs.c',
'pseudorandomfuncs.c',
'pseudotypes.c',
'quote.c',
'rangetypes.c',

View File

@ -583,6 +583,8 @@ static void power_var(const NumericVar *base, const NumericVar *exp,
static void power_var_int(const NumericVar *base, int exp, int exp_dscale,
NumericVar *result);
static void power_ten_int(int exp, NumericVar *result);
static void random_var(pg_prng_state *state, const NumericVar *rmin,
const NumericVar *rmax, NumericVar *result);
static int cmp_abs(const NumericVar *var1, const NumericVar *var2);
static int cmp_abs_common(const NumericDigit *var1digits, int var1ndigits,
@ -4219,6 +4221,56 @@ numeric_trim_scale(PG_FUNCTION_ARGS)
PG_RETURN_NUMERIC(res);
}
/*
* Return a random numeric value in the range [rmin, rmax].
*/
Numeric
random_numeric(pg_prng_state *state, Numeric rmin, Numeric rmax)
{
NumericVar rmin_var;
NumericVar rmax_var;
NumericVar result;
Numeric res;
/* Range bounds must not be NaN/infinity */
if (NUMERIC_IS_SPECIAL(rmin))
{
if (NUMERIC_IS_NAN(rmin))
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("lower bound cannot be NaN"));
else
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("lower bound cannot be infinity"));
}
if (NUMERIC_IS_SPECIAL(rmax))
{
if (NUMERIC_IS_NAN(rmax))
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("upper bound cannot be NaN"));
else
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("upper bound cannot be infinity"));
}
/* Return a random value in the range [rmin, rmax] */
init_var_from_num(rmin, &rmin_var);
init_var_from_num(rmax, &rmax_var);
init_var(&result);
random_var(state, &rmin_var, &rmax_var, &result);
res = make_result(&result);
free_var(&result);
return res;
}
/* ----------------------------------------------------------------------
*
@ -11262,6 +11314,173 @@ power_ten_int(int exp, NumericVar *result)
result->digits[0] *= 10;
}
/*
* random_var() - return a random value in the range [rmin, rmax].
*/
static void
random_var(pg_prng_state *state, const NumericVar *rmin,
const NumericVar *rmax, NumericVar *result)
{
int rscale;
NumericVar rlen;
int res_ndigits;
int n;
int pow10;
int i;
uint64 rlen64;
int rlen64_ndigits;
rscale = Max(rmin->dscale, rmax->dscale);
/* Compute rlen = rmax - rmin and check the range bounds */
init_var(&rlen);
sub_var(rmax, rmin, &rlen);
if (rlen.sign == NUMERIC_NEG)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("lower bound must be less than or equal to upper bound"));
/* Special case for an empty range */
if (rlen.ndigits == 0)
{
set_var_from_var(rmin, result);
result->dscale = rscale;
free_var(&rlen);
return;
}
/*
* Otherwise, select a random value in the range [0, rlen = rmax - rmin],
* and shift it to the required range by adding rmin.
*/
/* Required result digits */
res_ndigits = rlen.weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
/*
* To get the required rscale, the final result digit must be a multiple
* of pow10 = 10^n, where n = (-rscale) mod DEC_DIGITS.
*/
n = ((rscale + DEC_DIGITS - 1) / DEC_DIGITS) * DEC_DIGITS - rscale;
pow10 = 1;
for (i = 0; i < n; i++)
pow10 *= 10;
/*
* To choose a random value uniformly from the range [0, rlen], we choose
* from the slightly larger range [0, rlen2], where rlen2 is formed from
* rlen by copying the first 4 NBASE digits, and setting all remaining
* decimal digits to "9".
*
* Without loss of generality, we can ignore the weight of rlen2 and treat
* it as a pure integer for the purposes of this discussion. The process
* above gives rlen2 + 1 = rlen64 * 10^N, for some integer N, where rlen64
* is a 64-bit integer formed from the first 4 NBASE digits copied from
* rlen. Since this trivially factors into smaller pieces that fit in
* 64-bit integers, the task of choosing a random value uniformly from the
* rlen2 + 1 possible values in [0, rlen2] is much simpler.
*
* If the random value selected is too large, it is rejected, and we try
* again until we get a result <= rlen, ensuring that the overall result
* is uniform (no particular value is any more likely than any other).
*
* Since rlen64 holds 4 NBASE digits from rlen, it contains at least
* DEC_DIGITS * 3 + 1 decimal digits (i.e., at least 13 decimal digits,
* when DEC_DIGITS is 4). Therefore the probability of needing to reject
* the value chosen and retry is less than 1e-13.
*/
rlen64 = (uint64) rlen.digits[0];
rlen64_ndigits = 1;
while (rlen64_ndigits < res_ndigits && rlen64_ndigits < 4)
{
rlen64 *= NBASE;
if (rlen64_ndigits < rlen.ndigits)
rlen64 += rlen.digits[rlen64_ndigits];
rlen64_ndigits++;
}
/* Loop until we get a result <= rlen */
do
{
NumericDigit *res_digits;
uint64 rand;
int whole_ndigits;
alloc_var(result, res_ndigits);
result->sign = NUMERIC_POS;
result->weight = rlen.weight;
result->dscale = rscale;
res_digits = result->digits;
/*
* Set the first rlen64_ndigits using a random value in [0, rlen64].
*
* If this is the whole result, and rscale is not a multiple of
* DEC_DIGITS (pow10 from above is not 1), then we need this to be a
* multiple of pow10.
*/
if (rlen64_ndigits == res_ndigits && pow10 != 1)
rand = pg_prng_uint64_range(state, 0, rlen64 / pow10) * pow10;
else
rand = pg_prng_uint64_range(state, 0, rlen64);
for (i = rlen64_ndigits - 1; i >= 0; i--)
{
res_digits[i] = (NumericDigit) (rand % NBASE);
rand = rand / NBASE;
}
/*
* Set the remaining digits to random values in range [0, NBASE),
* noting that the last digit needs to be a multiple of pow10.
*/
whole_ndigits = res_ndigits;
if (pow10 != 1)
whole_ndigits--;
/* Set whole digits in groups of 4 for best performance */
i = rlen64_ndigits;
while (i < whole_ndigits - 3)
{
rand = pg_prng_uint64_range(state, 0,
(uint64) NBASE * NBASE * NBASE * NBASE - 1);
res_digits[i++] = (NumericDigit) (rand % NBASE);
rand = rand / NBASE;
res_digits[i++] = (NumericDigit) (rand % NBASE);
rand = rand / NBASE;
res_digits[i++] = (NumericDigit) (rand % NBASE);
rand = rand / NBASE;
res_digits[i++] = (NumericDigit) rand;
}
/* Remaining whole digits */
while (i < whole_ndigits)
{
rand = pg_prng_uint64_range(state, 0, NBASE - 1);
res_digits[i++] = (NumericDigit) rand;
}
/* Final partial digit (multiple of pow10) */
if (i < res_ndigits)
{
rand = pg_prng_uint64_range(state, 0, NBASE / pow10 - 1) * pow10;
res_digits[i] = (NumericDigit) rand;
}
/* Remove leading/trailing zeroes */
strip_var(result);
/* If result > rlen, try again */
} while (cmp_var(result, &rlen) > 0);
/* Offset the result to the required range */
add_var(result, rmin, result);
free_var(&rlen);
}
/* ----------------------------------------------------------------------
*

View File

@ -0,0 +1,185 @@
/*-------------------------------------------------------------------------
*
* pseudorandomfuncs.c
* Functions giving SQL access to a pseudorandom number generator.
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/adt/pseudorandomfuncs.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "common/pg_prng.h"
#include "miscadmin.h"
#include "utils/fmgrprotos.h"
#include "utils/numeric.h"
#include "utils/timestamp.h"
/* Shared PRNG state used by all the random functions */
static pg_prng_state prng_state;
static bool prng_seed_set = false;
/*
* initialize_prng() -
*
* Initialize (seed) the PRNG, if not done yet in this process.
*/
static void
initialize_prng(void)
{
if (unlikely(!prng_seed_set))
{
/*
* If possible, seed the PRNG using high-quality random bits. Should
* that fail for some reason, we fall back on a lower-quality seed
* based on current time and PID.
*/
if (unlikely(!pg_prng_strong_seed(&prng_state)))
{
TimestampTz now = GetCurrentTimestamp();
uint64 iseed;
/* Mix the PID with the most predictable bits of the timestamp */
iseed = (uint64) now ^ ((uint64) MyProcPid << 32);
pg_prng_seed(&prng_state, iseed);
}
prng_seed_set = true;
}
}
/*
* setseed() -
*
* Seed the PRNG from a specified value in the range [-1.0, 1.0].
*/
Datum
setseed(PG_FUNCTION_ARGS)
{
float8 seed = PG_GETARG_FLOAT8(0);
if (seed < -1 || seed > 1 || isnan(seed))
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("setseed parameter %g is out of allowed range [-1,1]",
seed));
pg_prng_fseed(&prng_state, seed);
prng_seed_set = true;
PG_RETURN_VOID();
}
/*
* drandom() -
*
* Returns a random number chosen uniformly in the range [0.0, 1.0).
*/
Datum
drandom(PG_FUNCTION_ARGS)
{
float8 result;
initialize_prng();
/* pg_prng_double produces desired result range [0.0, 1.0) */
result = pg_prng_double(&prng_state);
PG_RETURN_FLOAT8(result);
}
/*
* drandom_normal() -
*
* Returns a random number from a normal distribution.
*/
Datum
drandom_normal(PG_FUNCTION_ARGS)
{
float8 mean = PG_GETARG_FLOAT8(0);
float8 stddev = PG_GETARG_FLOAT8(1);
float8 result,
z;
initialize_prng();
/* Get random value from standard normal(mean = 0.0, stddev = 1.0) */
z = pg_prng_double_normal(&prng_state);
/* Transform the normal standard variable (z) */
/* using the target normal distribution parameters */
result = (stddev * z) + mean;
PG_RETURN_FLOAT8(result);
}
/*
* int4random() -
*
* Returns a random 32-bit integer chosen uniformly in the specified range.
*/
Datum
int4random(PG_FUNCTION_ARGS)
{
int32 rmin = PG_GETARG_INT32(0);
int32 rmax = PG_GETARG_INT32(1);
int32 result;
if (rmin > rmax)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("lower bound must be less than or equal to upper bound"));
initialize_prng();
result = (int32) pg_prng_int64_range(&prng_state, rmin, rmax);
PG_RETURN_INT32(result);
}
/*
* int8random() -
*
* Returns a random 64-bit integer chosen uniformly in the specified range.
*/
Datum
int8random(PG_FUNCTION_ARGS)
{
int64 rmin = PG_GETARG_INT64(0);
int64 rmax = PG_GETARG_INT64(1);
int64 result;
if (rmin > rmax)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("lower bound must be less than or equal to upper bound"));
initialize_prng();
result = pg_prng_int64_range(&prng_state, rmin, rmax);
PG_RETURN_INT64(result);
}
/*
* numeric_random() -
*
* Returns a random numeric value chosen uniformly in the specified range.
*/
Datum
numeric_random(PG_FUNCTION_ARGS)
{
Numeric rmin = PG_GETARG_NUMERIC(0);
Numeric rmax = PG_GETARG_NUMERIC(1);
Numeric result;
initialize_prng();
result = random_numeric(&prng_state, rmin, rmax);
PG_RETURN_NUMERIC(result);
}

View File

@ -184,6 +184,42 @@ pg_prng_int64p(pg_prng_state *state)
return (int64) (xoroshiro128ss(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF));
}
/*
* Select a random int64 uniformly from the range [rmin, rmax].
* If the range is empty, rmin is always produced.
*/
int64
pg_prng_int64_range(pg_prng_state *state, int64 rmin, int64 rmax)
{
int64 val;
if (likely(rmax > rmin))
{
uint64 uval;
/*
* Use pg_prng_uint64_range(). Can't simply pass it rmin and rmax,
* since (uint64) rmin will be larger than (uint64) rmax if rmin < 0.
*/
uval = (uint64) rmin +
pg_prng_uint64_range(state, 0, (uint64) rmax - (uint64) rmin);
/*
* Safely convert back to int64, avoiding implementation-defined
* behavior for values larger than PG_INT64_MAX. Modern compilers
* will reduce this to a simple assignment.
*/
if (uval > PG_INT64_MAX)
val = (int64) (uval - PG_INT64_MIN) + PG_INT64_MIN;
else
val = (int64) uval;
}
else
val = rmin;
return val;
}
/*
* Select a random uint32 uniformly from the range [0, PG_UINT32_MAX].
*/

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202403271
#define CATALOG_VERSION_NO 202403272
#endif

View File

@ -3381,6 +3381,18 @@
proname => 'random_normal', provolatile => 'v', proparallel => 'r',
prorettype => 'float8', proargtypes => 'float8 float8',
prosrc => 'drandom_normal' },
{ oid => '9719', descr => 'random integer in range',
proname => 'random', provolatile => 'v', proparallel => 'r',
prorettype => 'int4', proargtypes => 'int4 int4',
proargnames => '{min,max}', prosrc => 'int4random' },
{ oid => '9720', descr => 'random bigint in range',
proname => 'random', provolatile => 'v', proparallel => 'r',
prorettype => 'int8', proargtypes => 'int8 int8',
proargnames => '{min,max}', prosrc => 'int8random' },
{ oid => '9721', descr => 'random numeric in range',
proname => 'random', provolatile => 'v', proparallel => 'r',
prorettype => 'numeric', proargtypes => 'numeric numeric',
proargnames => '{min,max}', prosrc => 'numeric_random' },
{ oid => '1599', descr => 'set random seed',
proname => 'setseed', provolatile => 'v', proparallel => 'r',
prorettype => 'void', proargtypes => 'float8', prosrc => 'setseed' },

View File

@ -51,6 +51,7 @@ extern uint64 pg_prng_uint64(pg_prng_state *state);
extern uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax);
extern int64 pg_prng_int64(pg_prng_state *state);
extern int64 pg_prng_int64p(pg_prng_state *state);
extern int64 pg_prng_int64_range(pg_prng_state *state, int64 rmin, int64 rmax);
extern uint32 pg_prng_uint32(pg_prng_state *state);
extern int32 pg_prng_int32(pg_prng_state *state);
extern int32 pg_prng_int32p(pg_prng_state *state);

View File

@ -14,6 +14,7 @@
#ifndef _PG_NUMERIC_H_
#define _PG_NUMERIC_H_
#include "common/pg_prng.h"
#include "fmgr.h"
/*
@ -103,4 +104,7 @@ extern Numeric numeric_mod_opt_error(Numeric num1, Numeric num2,
extern int32 numeric_int4_opt_error(Numeric num, bool *have_error);
extern int64 numeric_int8_opt_error(Numeric num, bool *have_error);
extern Numeric random_numeric(pg_prng_state *state,
Numeric rmin, Numeric rmax);
#endif /* _PG_NUMERIC_H_ */

View File

@ -120,6 +120,229 @@ SELECT ks_test_normal_random() OR
t
(1 row)
-- Test random(min, max)
-- invalid range bounds
SELECT random(1, 0);
ERROR: lower bound must be less than or equal to upper bound
SELECT random(1000000000001, 1000000000000);
ERROR: lower bound must be less than or equal to upper bound
SELECT random(-2.0, -3.0);
ERROR: lower bound must be less than or equal to upper bound
SELECT random('NaN'::numeric, 10);
ERROR: lower bound cannot be NaN
SELECT random('-Inf'::numeric, 0);
ERROR: lower bound cannot be infinity
SELECT random(0, 'NaN'::numeric);
ERROR: upper bound cannot be NaN
SELECT random(0, 'Inf'::numeric);
ERROR: upper bound cannot be infinity
-- empty range is OK
SELECT random(101, 101);
random
--------
101
(1 row)
SELECT random(1000000000001, 1000000000001);
random
---------------
1000000000001
(1 row)
SELECT random(3.14, 3.14);
random
--------
3.14
(1 row)
-- There should be no triple duplicates in 1000 full-range 32-bit random()
-- values. (Each of the C(1000, 3) choices of triplets from the 1000 values
-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the
-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which
-- is roughly 9e-12.)
SELECT r, count(*)
FROM (SELECT random(-2147483648, 2147483647) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 2;
r | count
---+-------
(0 rows)
-- There should be no duplicates in 1000 full-range 64-bit random() values.
SELECT r, count(*)
FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
r | count
---+-------
(0 rows)
-- There should be no duplicates in 1000 15-digit random() numeric values.
SELECT r, count(*)
FROM (SELECT random_normal(0, 1 - 1e-15) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
r | count
---+-------
(0 rows)
-- Expect at least one out of 2000 random values to be in the lowest and
-- highest 1% of the range.
SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large
FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss;
has_small | has_large
-----------+-----------
t | t
(1 row)
SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range,
(count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large
FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss;
out_of_range | has_small | has_large
--------------+-----------+-----------
0 | t | t
(1 row)
SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large
FROM (SELECT random(-9223372036854775808, 9223372036854775807) r
FROM generate_series(1, 2000)) ss;
has_small | has_large
-----------+-----------
t | t
(1 row)
SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range,
(count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large
FROM (SELECT random(-1500000000000000, 1500000000000000) r
FROM generate_series(1, 2000)) ss;
out_of_range | has_small | has_large
--------------+-----------+-----------
0 | t | t
(1 row)
SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range,
(count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large
FROM (SELECT random(-1.500000000000000, 1.500000000000000) r
FROM generate_series(1, 2000)) ss;
out_of_range | has_small | has_large
--------------+-----------+-----------
0 | t | t
(1 row)
-- Every possible value should occur at least once in 2500 random() values
-- chosen from a range with 100 distinct values.
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500));
min | max | count
-----+-----+-------
-50 | 49 | 100
(1 row)
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(123000000000, 123000000099) r
FROM generate_series(1, 2500));
min | max | count
--------------+--------------+-------
123000000000 | 123000000099 | 100
(1 row)
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500));
min | max | count
-------+------+-------
-0.50 | 0.49 | 100
(1 row)
-- Check for uniform distribution using the Kolmogorov-Smirnov test.
CREATE FUNCTION ks_test_uniform_random_int_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_int_in_range() OR
ks_test_uniform_random_int_in_range() OR
ks_test_uniform_random_int_in_range() AS uniform_int;
uniform_int
-------------
t
(1 row)
CREATE FUNCTION ks_test_uniform_random_bigint_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_bigint_in_range() OR
ks_test_uniform_random_bigint_in_range() OR
ks_test_uniform_random_bigint_in_range() AS uniform_bigint;
uniform_bigint
----------------
t
(1 row)
CREATE FUNCTION ks_test_uniform_random_numeric_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_numeric_in_range() OR
ks_test_uniform_random_numeric_in_range() OR
ks_test_uniform_random_numeric_in_range() AS uniform_numeric;
uniform_numeric
-----------------
t
(1 row)
-- setseed() should produce a reproducible series of random() values.
SELECT setseed(0.5);
setseed
@ -176,3 +399,140 @@ SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
0.96403105557543
(10 rows)
-- Reproducible random(min, max) values.
SELECT random(1, 6) FROM generate_series(1, 10);
random
--------
5
4
5
1
6
1
1
3
6
5
(10 rows)
SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10);
random
-------------
-84380014
1287883594
-1927252904
13516867
-1902961616
-1824286201
-871264469
-1225880415
229836730
-116039023
(10 rows)
SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10);
random
----------------------
-6205280962992680052
-3583519428011353337
511801786318122700
4672737727839409655
-6674868801536280768
-7816052100626646489
-4340613370136007199
-5873174504107419786
-2249910101649817824
-4493828993910792325
(10 rows)
SELECT random(-1e30, 1e30) FROM generate_series(1, 10);
random
---------------------------------
-732116469803315942112255539315
794641423514877972798449289857
-576932746026123093304638334719
420625067723533225139761854757
-339227806779403187811001078919
-77667951539418104959241732636
239810941795708162629328071599
820784371155896967052141946697
-377084684544126871150439048352
-979773225250716295007225086726
(10 rows)
SELECT random(-0.4, 0.4) FROM generate_series(1, 10);
random
--------
0.1
0.0
0.4
-0.2
0.1
0.2
0.3
0.0
-0.2
0.2
(10 rows)
SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10);
random
----------------------------------
0.676442053784930109917469287265
0.221310454098356723569995592911
0.060101338174419259555193956224
0.509960354695248239243002172364
0.248680813394555793693952296993
0.353262552880008646603494668901
0.760692600450339509843044233719
0.554987655310094483449494782510
0.330890988458592995280347745733
0.665435298280470361228607881507
(10 rows)
SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;
n | random
-----+------------------------
-20 | 94174615760837282445
-19 | 6692559888531296894
-18 | 801114552709125931
-17 | 44091460959939971
-16 | 2956109297383113
-15 | 783332278684523
-14 | 81534303241440
-13 | 2892623140500
-12 | 269397605141
-11 | 13027512296
-10 | 9178377775
-9 | 323534150
-8 | 91897803
-7 | 6091383
-6 | 13174
-5 | 92714
-4 | 8079
-3 | 429
-2 | 30
-1 | 3
0 | 0
1 | 0.1
2 | 0.69
3 | 0.492
4 | 0.7380
5 | 0.77078
6 | 0.738142
7 | 0.1808815
8 | 0.14908933
9 | 0.222654042
10 | 0.2281295170
11 | 0.73655782966
12 | 0.056357256884
13 | 0.8998407524375
14 | 0.28198400530206
15 | 0.713478222805230
16 | 0.0415046850936909
17 | 0.45946350291315119
18 | 0.310966980367873753
19 | 0.4967623661709676512
20 | 0.60795101234744211935
(41 rows)

View File

@ -100,6 +100,161 @@ SELECT ks_test_normal_random() OR
ks_test_normal_random() OR
ks_test_normal_random() AS standard_normal;
-- Test random(min, max)
-- invalid range bounds
SELECT random(1, 0);
SELECT random(1000000000001, 1000000000000);
SELECT random(-2.0, -3.0);
SELECT random('NaN'::numeric, 10);
SELECT random('-Inf'::numeric, 0);
SELECT random(0, 'NaN'::numeric);
SELECT random(0, 'Inf'::numeric);
-- empty range is OK
SELECT random(101, 101);
SELECT random(1000000000001, 1000000000001);
SELECT random(3.14, 3.14);
-- There should be no triple duplicates in 1000 full-range 32-bit random()
-- values. (Each of the C(1000, 3) choices of triplets from the 1000 values
-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the
-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which
-- is roughly 9e-12.)
SELECT r, count(*)
FROM (SELECT random(-2147483648, 2147483647) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 2;
-- There should be no duplicates in 1000 full-range 64-bit random() values.
SELECT r, count(*)
FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
-- There should be no duplicates in 1000 15-digit random() numeric values.
SELECT r, count(*)
FROM (SELECT random_normal(0, 1 - 1e-15) r
FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
-- Expect at least one out of 2000 random values to be in the lowest and
-- highest 1% of the range.
SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large
FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss;
SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range,
(count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large
FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss;
SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large
FROM (SELECT random(-9223372036854775808, 9223372036854775807) r
FROM generate_series(1, 2000)) ss;
SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range,
(count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large
FROM (SELECT random(-1500000000000000, 1500000000000000) r
FROM generate_series(1, 2000)) ss;
SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range,
(count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small,
(count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large
FROM (SELECT random(-1.500000000000000, 1.500000000000000) r
FROM generate_series(1, 2000)) ss;
-- Every possible value should occur at least once in 2500 random() values
-- chosen from a range with 100 distinct values.
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500));
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(123000000000, 123000000099) r
FROM generate_series(1, 2500));
SELECT min(r), max(r), count(r) FROM (
SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500));
-- Check for uniform distribution using the Kolmogorov-Smirnov test.
CREATE FUNCTION ks_test_uniform_random_int_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_int_in_range() OR
ks_test_uniform_random_int_in_range() OR
ks_test_uniform_random_int_in_range() AS uniform_int;
CREATE FUNCTION ks_test_uniform_random_bigint_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_bigint_in_range() OR
ks_test_uniform_random_bigint_in_range() OR
ks_test_uniform_random_bigint_in_range() AS uniform_bigint;
CREATE FUNCTION ks_test_uniform_random_numeric_in_range()
RETURNS boolean AS
$$
DECLARE
n int := 1000; -- Number of samples
c float8 := 1.94947; -- Critical value for 99.9% confidence
ok boolean;
BEGIN
ok := (
WITH samples AS (
SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1
), indexed_samples AS (
SELECT (row_number() OVER())-1.0 i, r FROM samples
)
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
);
RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_numeric_in_range() OR
ks_test_uniform_random_numeric_in_range() OR
ks_test_uniform_random_numeric_in_range() AS uniform_numeric;
-- setseed() should produce a reproducible series of random() values.
SELECT setseed(0.5);
@ -113,3 +268,12 @@ SET extra_float_digits = -1;
SELECT random_normal() FROM generate_series(1, 10);
SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
-- Reproducible random(min, max) values.
SELECT random(1, 6) FROM generate_series(1, 10);
SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10);
SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10);
SELECT random(-1e30, 1e30) FROM generate_series(1, 10);
SELECT random(-0.4, 0.4) FROM generate_series(1, 10);
SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10);
SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;