624 lines
17 KiB
C
624 lines
17 KiB
C
/*--------------------------------------------------------------------------
|
|
*
|
|
* test_integerset.c
|
|
* Test integer set data structure.
|
|
*
|
|
* Copyright (c) 2019, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/test/modules/test_integerset/test_integerset.c
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "fmgr.h"
|
|
#include "lib/integerset.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/bitmapset.h"
|
|
#include "storage/block.h"
|
|
#include "storage/itemptr.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/timestamp.h"
|
|
|
|
/*
|
|
* If you enable this, the "pattern" tests will print information about
|
|
* how long populating, probing, and iterating the test set takes, and
|
|
* how much memory the test set consumed. That can be used as
|
|
* micro-benchmark of various operations and input patterns (you might
|
|
* want to increase the number of values used in each of the test, if
|
|
* you do that, to reduce noise).
|
|
*
|
|
* The information is printed to the server's stderr, mostly because
|
|
* that's where MemoryContextStats() output goes.
|
|
*/
|
|
static const bool intset_test_stats = false;
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
PG_FUNCTION_INFO_V1(test_integerset);
|
|
|
|
/*
|
|
* A struct to define a pattern of integers, for use with the test_pattern()
|
|
* function.
|
|
*/
|
|
typedef struct
|
|
{
|
|
char *test_name; /* short name of the test, for humans */
|
|
char *pattern_str; /* a bit pattern */
|
|
uint64 spacing; /* pattern repeats at this interval */
|
|
uint64 num_values; /* number of integers to set in total */
|
|
} test_spec;
|
|
|
|
static const test_spec test_specs[] = {
|
|
{
|
|
"all ones", "1111111111",
|
|
10, 10000000
|
|
},
|
|
{
|
|
"alternating bits", "0101010101",
|
|
10, 10000000
|
|
},
|
|
{
|
|
"clusters of ten", "1111111111",
|
|
10000, 10000000
|
|
},
|
|
{
|
|
"clusters of hundred",
|
|
"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
|
|
10000, 100000000
|
|
},
|
|
{
|
|
"one-every-64k", "1",
|
|
65536, 10000000
|
|
},
|
|
{
|
|
"sparse", "100000000000000000000000000000001",
|
|
10000000, 10000000
|
|
},
|
|
{
|
|
"single values, distance > 2^32", "1",
|
|
UINT64CONST(10000000000), 1000000
|
|
},
|
|
{
|
|
"clusters, distance > 2^32", "10101010",
|
|
UINT64CONST(10000000000), 10000000
|
|
},
|
|
{
|
|
"clusters, distance > 2^60", "10101010",
|
|
UINT64CONST(2000000000000000000),
|
|
23 /* can't be much higher than this, or we
|
|
* overflow uint64 */
|
|
}
|
|
};
|
|
|
|
static void test_pattern(const test_spec *spec);
|
|
static void test_empty(void);
|
|
static void test_single_value(uint64 value);
|
|
static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
|
|
static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
|
|
static void test_huge_distances(void);
|
|
|
|
/*
|
|
* SQL-callable entry point to perform all tests.
|
|
*/
|
|
Datum
|
|
test_integerset(PG_FUNCTION_ARGS)
|
|
{
|
|
/* Tests for various corner cases */
|
|
test_empty();
|
|
test_huge_distances();
|
|
test_single_value(0);
|
|
test_single_value(1);
|
|
test_single_value(PG_UINT64_MAX - 1);
|
|
test_single_value(PG_UINT64_MAX);
|
|
test_single_value_and_filler(0, 1000, 2000);
|
|
test_single_value_and_filler(1, 1000, 2000);
|
|
test_single_value_and_filler(1, 1000, 2000000);
|
|
test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
|
|
test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
|
|
|
|
/* Test different test patterns, with lots of entries */
|
|
for (int i = 0; i < lengthof(test_specs); i++)
|
|
{
|
|
test_pattern(&test_specs[i]);
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Test with a repeating pattern, defined by the 'spec'.
|
|
*/
|
|
static void
|
|
test_pattern(const test_spec *spec)
|
|
{
|
|
IntegerSet *intset;
|
|
MemoryContext intset_ctx;
|
|
MemoryContext old_ctx;
|
|
TimestampTz starttime;
|
|
TimestampTz endtime;
|
|
uint64 n;
|
|
uint64 last_int;
|
|
int patternlen;
|
|
uint64 *pattern_values;
|
|
uint64 pattern_num_values;
|
|
|
|
elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
|
|
if (intset_test_stats)
|
|
fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
|
|
|
|
/* Pre-process the pattern, creating an array of integers from it. */
|
|
patternlen = strlen(spec->pattern_str);
|
|
pattern_values = palloc(patternlen * sizeof(uint64));
|
|
pattern_num_values = 0;
|
|
for (int i = 0; i < patternlen; i++)
|
|
{
|
|
if (spec->pattern_str[i] == '1')
|
|
pattern_values[pattern_num_values++] = i;
|
|
}
|
|
|
|
/*
|
|
* Allocate the integer set.
|
|
*
|
|
* Allocate it in a separate memory context, so that we can print its
|
|
* memory usage easily. (intset_create() creates a memory context of its
|
|
* own, too, but we don't have direct access to it, so we cannot call
|
|
* MemoryContextStats() on it directly).
|
|
*/
|
|
intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
|
|
"intset test",
|
|
ALLOCSET_SMALL_SIZES);
|
|
MemoryContextSetIdentifier(intset_ctx, spec->test_name);
|
|
old_ctx = MemoryContextSwitchTo(intset_ctx);
|
|
intset = intset_create();
|
|
MemoryContextSwitchTo(old_ctx);
|
|
|
|
/*
|
|
* Add values to the set.
|
|
*/
|
|
starttime = GetCurrentTimestamp();
|
|
|
|
n = 0;
|
|
last_int = 0;
|
|
while (n < spec->num_values)
|
|
{
|
|
uint64 x = 0;
|
|
|
|
for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
|
|
{
|
|
x = last_int + pattern_values[i];
|
|
|
|
intset_add_member(intset, x);
|
|
n++;
|
|
}
|
|
last_int += spec->spacing;
|
|
}
|
|
|
|
endtime = GetCurrentTimestamp();
|
|
|
|
if (intset_test_stats)
|
|
fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
|
|
spec->num_values, (int) (endtime - starttime) / 1000);
|
|
|
|
/*
|
|
* Print stats on the amount of memory used.
|
|
*
|
|
* We print the usage reported by intset_memory_usage(), as well as the
|
|
* stats from the memory context. They should be in the same ballpark,
|
|
* but it's hard to automate testing that, so if you're making changes to
|
|
* the implementation, just observe that manually.
|
|
*/
|
|
if (intset_test_stats)
|
|
{
|
|
uint64 mem_usage;
|
|
|
|
/*
|
|
* Also print memory usage as reported by intset_memory_usage(). It
|
|
* should be in the same ballpark as the usage reported by
|
|
* MemoryContextStats().
|
|
*/
|
|
mem_usage = intset_memory_usage(intset);
|
|
fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
|
|
mem_usage, (double) mem_usage / spec->num_values);
|
|
|
|
MemoryContextStats(intset_ctx);
|
|
}
|
|
|
|
/* Check that intset_get_num_entries works */
|
|
n = intset_num_entries(intset);
|
|
if (n != spec->num_values)
|
|
elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
|
|
|
|
/*
|
|
* Test random-access probes with intset_is_member()
|
|
*/
|
|
starttime = GetCurrentTimestamp();
|
|
|
|
for (n = 0; n < 100000; n++)
|
|
{
|
|
bool b;
|
|
bool expected;
|
|
uint64 x;
|
|
|
|
/*
|
|
* Pick next value to probe at random. We limit the probes to the
|
|
* last integer that we added to the set, plus an arbitrary constant
|
|
* (1000). There's no point in probing the whole 0 - 2^64 range, if
|
|
* only a small part of the integer space is used. We would very
|
|
* rarely hit values that are actually in the set.
|
|
*/
|
|
x = (pg_lrand48() << 31) | pg_lrand48();
|
|
x = x % (last_int + 1000);
|
|
|
|
/* Do we expect this value to be present in the set? */
|
|
if (x >= last_int)
|
|
expected = false;
|
|
else
|
|
{
|
|
uint64 idx = x % spec->spacing;
|
|
|
|
if (idx >= patternlen)
|
|
expected = false;
|
|
else if (spec->pattern_str[idx] == '1')
|
|
expected = true;
|
|
else
|
|
expected = false;
|
|
}
|
|
|
|
/* Is it present according to intset_is_member() ? */
|
|
b = intset_is_member(intset, x);
|
|
|
|
if (b != expected)
|
|
elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
|
|
}
|
|
endtime = GetCurrentTimestamp();
|
|
if (intset_test_stats)
|
|
fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
|
|
n, (int) (endtime - starttime) / 1000);
|
|
|
|
/*
|
|
* Test iterator
|
|
*/
|
|
starttime = GetCurrentTimestamp();
|
|
|
|
intset_begin_iterate(intset);
|
|
n = 0;
|
|
last_int = 0;
|
|
while (n < spec->num_values)
|
|
{
|
|
for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
|
|
{
|
|
uint64 expected = last_int + pattern_values[i];
|
|
uint64 x;
|
|
|
|
if (!intset_iterate_next(intset, &x))
|
|
break;
|
|
|
|
if (x != expected)
|
|
elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
|
|
n++;
|
|
}
|
|
last_int += spec->spacing;
|
|
}
|
|
endtime = GetCurrentTimestamp();
|
|
if (intset_test_stats)
|
|
fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
|
|
n, (int) (endtime - starttime) / 1000);
|
|
|
|
if (n < spec->num_values)
|
|
elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
|
|
if (n > spec->num_values)
|
|
elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
|
|
|
|
MemoryContextDelete(intset_ctx);
|
|
}
|
|
|
|
/*
|
|
* Test with a set containing a single integer.
|
|
*/
|
|
static void
|
|
test_single_value(uint64 value)
|
|
{
|
|
IntegerSet *intset;
|
|
uint64 x;
|
|
uint64 num_entries;
|
|
bool found;
|
|
|
|
elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
|
|
|
|
/* Create the set. */
|
|
intset = intset_create();
|
|
intset_add_member(intset, value);
|
|
|
|
/* Test intset_get_num_entries() */
|
|
num_entries = intset_num_entries(intset);
|
|
if (num_entries != 1)
|
|
elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
|
|
|
|
/*
|
|
* Test intset_is_member() at various special values, like 0 and maximum
|
|
* possible 64-bit integer, as well as the value itself.
|
|
*/
|
|
if (intset_is_member(intset, 0) != (value == 0))
|
|
elog(ERROR, "intset_is_member failed for 0");
|
|
if (intset_is_member(intset, 1) != (value == 1))
|
|
elog(ERROR, "intset_is_member failed for 1");
|
|
if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
|
|
elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
|
|
if (intset_is_member(intset, value) != true)
|
|
elog(ERROR, "intset_is_member failed for the tested value");
|
|
|
|
/*
|
|
* Test iterator
|
|
*/
|
|
intset_begin_iterate(intset);
|
|
found = intset_iterate_next(intset, &x);
|
|
if (!found || x != value)
|
|
elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
|
|
|
|
found = intset_iterate_next(intset, &x);
|
|
if (found)
|
|
elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
|
|
}
|
|
|
|
/*
|
|
* Test with an integer set that contains:
|
|
*
|
|
* - a given single 'value', and
|
|
* - all integers between 'filler_min' and 'filler_max'.
|
|
*
|
|
* This exercises different codepaths than testing just with a single value,
|
|
* because the implementation buffers newly-added values. If we add just a
|
|
* single value to the set, we won't test the internal B-tree code at all,
|
|
* just the code that deals with the buffer.
|
|
*/
|
|
static void
|
|
test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
|
|
{
|
|
IntegerSet *intset;
|
|
uint64 x;
|
|
bool found;
|
|
uint64 *iter_expected;
|
|
uint64 n = 0;
|
|
uint64 num_entries = 0;
|
|
uint64 mem_usage;
|
|
|
|
elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
|
|
value, filler_min, filler_max);
|
|
|
|
intset = intset_create();
|
|
|
|
iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
|
|
if (value < filler_min)
|
|
{
|
|
intset_add_member(intset, value);
|
|
iter_expected[n++] = value;
|
|
}
|
|
|
|
for (x = filler_min; x < filler_max; x++)
|
|
{
|
|
intset_add_member(intset, x);
|
|
iter_expected[n++] = x;
|
|
}
|
|
|
|
if (value >= filler_max)
|
|
{
|
|
intset_add_member(intset, value);
|
|
iter_expected[n++] = value;
|
|
}
|
|
|
|
/* Test intset_get_num_entries() */
|
|
num_entries = intset_num_entries(intset);
|
|
if (num_entries != n)
|
|
elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
|
|
|
|
/*
|
|
* Test intset_is_member() at various spots, at and around the values that
|
|
* we expect to be set, as well as 0 and the maximum possible value.
|
|
*/
|
|
check_with_filler(intset, 0,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_min - 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_min,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_min + 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, value - 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, value,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, value + 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_max - 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_max,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, filler_max + 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, PG_UINT64_MAX - 1,
|
|
value, filler_min, filler_max);
|
|
check_with_filler(intset, PG_UINT64_MAX,
|
|
value, filler_min, filler_max);
|
|
|
|
intset_begin_iterate(intset);
|
|
for (uint64 i = 0; i < n; i++)
|
|
{
|
|
found = intset_iterate_next(intset, &x);
|
|
if (!found || x != iter_expected[i])
|
|
elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
|
|
}
|
|
found = intset_iterate_next(intset, &x);
|
|
if (found)
|
|
elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
|
|
|
|
mem_usage = intset_memory_usage(intset);
|
|
if (mem_usage < 5000 || mem_usage > 500000000)
|
|
elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
|
|
}
|
|
|
|
/*
|
|
* Helper function for test_single_value_and_filler.
|
|
*
|
|
* Calls intset_is_member() for value 'x', and checks that the result is what
|
|
* we expect.
|
|
*/
|
|
static void
|
|
check_with_filler(IntegerSet *intset, uint64 x,
|
|
uint64 value, uint64 filler_min, uint64 filler_max)
|
|
{
|
|
bool expected;
|
|
bool actual;
|
|
|
|
expected = (x == value || (filler_min <= x && x < filler_max));
|
|
|
|
actual = intset_is_member(intset, x);
|
|
|
|
if (actual != expected)
|
|
elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
|
|
}
|
|
|
|
/*
|
|
* Test empty set
|
|
*/
|
|
static void
|
|
test_empty(void)
|
|
{
|
|
IntegerSet *intset;
|
|
uint64 x;
|
|
|
|
elog(NOTICE, "testing intset with empty set");
|
|
|
|
intset = intset_create();
|
|
|
|
/* Test intset_is_member() */
|
|
if (intset_is_member(intset, 0) != false)
|
|
elog(ERROR, "intset_is_member on empty set returned true");
|
|
if (intset_is_member(intset, 1) != false)
|
|
elog(ERROR, "intset_is_member on empty set returned true");
|
|
if (intset_is_member(intset, PG_UINT64_MAX) != false)
|
|
elog(ERROR, "intset_is_member on empty set returned true");
|
|
|
|
/* Test iterator */
|
|
intset_begin_iterate(intset);
|
|
if (intset_iterate_next(intset, &x))
|
|
elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
|
|
}
|
|
|
|
/*
|
|
* Test with integers that are more than 2^60 apart.
|
|
*
|
|
* The Simple-8b encoding used by the set implementation can only encode
|
|
* values up to 2^60. That makes large differences like this interesting
|
|
* to test.
|
|
*/
|
|
static void
|
|
test_huge_distances(void)
|
|
{
|
|
IntegerSet *intset;
|
|
uint64 values[1000];
|
|
int num_values = 0;
|
|
uint64 val = 0;
|
|
bool found;
|
|
uint64 x;
|
|
|
|
elog(NOTICE, "testing intset with distances > 2^60 between values");
|
|
|
|
val = 0;
|
|
values[num_values++] = val;
|
|
|
|
/* Test differences on both sides of the 2^60 boundary. */
|
|
val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976); /* 2^60 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976); /* 2^60 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976); /* 2^60 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
|
|
values[num_values++] = val;
|
|
|
|
val += UINT64CONST(1152921504606846976); /* 2^60 */
|
|
values[num_values++] = val;
|
|
|
|
/*
|
|
* We're now very close to 2^64, so can't add large values anymore. But
|
|
* add more smaller values to the end, to make sure that all the above
|
|
* values get flushed and packed into the tree structure.
|
|
*/
|
|
while (num_values < 1000)
|
|
{
|
|
val += pg_lrand48();
|
|
values[num_values++] = val;
|
|
}
|
|
|
|
/* Create an IntegerSet using these values */
|
|
intset = intset_create();
|
|
for (int i = 0; i < num_values; i++)
|
|
intset_add_member(intset, values[i]);
|
|
|
|
/*
|
|
* Test intset_is_member() around each of these values
|
|
*/
|
|
for (int i = 0; i < num_values; i++)
|
|
{
|
|
uint64 x = values[i];
|
|
bool expected;
|
|
bool result;
|
|
|
|
if (x > 0)
|
|
{
|
|
expected = (values[i - 1] == x - 1);
|
|
result = intset_is_member(intset, x - 1);
|
|
if (result != expected)
|
|
elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x - 1);
|
|
}
|
|
|
|
result = intset_is_member(intset, x);
|
|
if (result != true)
|
|
elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
|
|
|
|
expected = (i != num_values - 1) ? (values[i + 1] == x + 1) : false;
|
|
result = intset_is_member(intset, x + 1);
|
|
if (result != expected)
|
|
elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x + 1);
|
|
}
|
|
|
|
/*
|
|
* Test iterator
|
|
*/
|
|
intset_begin_iterate(intset);
|
|
for (int i = 0; i < num_values; i++)
|
|
{
|
|
found = intset_iterate_next(intset, &x);
|
|
if (!found || x != values[i])
|
|
elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
|
|
}
|
|
found = intset_iterate_next(intset, &x);
|
|
if (found)
|
|
elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
|
|
}
|