Add regression test for BRIN parallel builds

Adds a regression test for parallel CREATE INDEX for BRIN indexes, to
improve coverage for BRIN code, particularly code to allow parallel
index builds introduced by b437571714.

The test is added to pageinspect, as that allows comparing the index to
one built without parallelism. Another option would be to just build the
index with parallelism and then check it produces correct results. But
checking the index is exactly as if built without parallelism makes
these query checks unnecessary.

Discussion: https://postgr.es/m/1df00a66-db5a-4e66-809a-99b386a06d86%40enterprisedb.com
This commit is contained in:
Tomas Vondra 2024-04-14 18:58:30 +02:00
parent bb616ed3e6
commit 8225c2fd40
2 changed files with 192 additions and 0 deletions

View File

@ -108,3 +108,104 @@ SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS r
DROP TABLE test1;
DROP TABLE test2;
-- Test that parallel index build produces the same BRIN index as serial build.
CREATE TABLE brin_parallel_test (a int, b text, c bigint) WITH (fillfactor=40);
-- Generate a table with a mix of NULLs and non-NULL values (and data suitable
-- for the different opclasses we build later).
INSERT INTO brin_parallel_test
SELECT (CASE WHEN (mod(i,231) = 0) OR (i BETWEEN 3500 AND 4000) THEN NULL ELSE i END),
(CASE WHEN (mod(i,233) = 0) OR (i BETWEEN 3750 AND 4250) THEN NULL ELSE md5(i::text) END),
(CASE WHEN (mod(i,233) = 0) OR (i BETWEEN 3850 AND 4500) THEN NULL ELSE (i/100) + mod(i,8) END)
FROM generate_series(1,5000) S(i);
-- Delete a couple pages, to make the ranges empty.
DELETE FROM brin_parallel_test WHERE a BETWEEN 1000 and 1500;
-- Vacuum to remove the tuples and make the ranges actually empty.
VACUUM brin_parallel_test;
-- Build an index with different opclasses - minmax, bloom and minmax-multi.
--
-- For minmax and opclass this is simple, but for minmax-multi we need to be
-- careful, because the result depends on the order in which values are added
-- to the summary, which in turn affects how are values merged etc. The order
-- of merging results from workers has similar effect. All those summaries
-- should produce correct query results, but it means we can't compare them
-- using equality (which is what EXCEPT does). To work around this issue, we
-- generated the data to only have very small number of distinct values per
-- range, so that no merging is needed. This makes the results deterministic.
-- build index without parallelism
SET max_parallel_maintenance_workers = 0;
CREATE INDEX brin_test_serial_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
-- build index using parallelism
--
-- Set a couple parameters to force parallel build for small table. There's a
-- requirement for table size, so disable that. Also, plan_create_index_workers
-- assumes each worker will use work_mem=32MB for sorting (which works for btree,
-- but not really for BRIN), so we set maintenance_work_mem for 4 workers.
SET min_parallel_table_scan_size = 0;
SET max_parallel_maintenance_workers = 4;
SET maintenance_work_mem = '128MB';
CREATE INDEX brin_test_parallel_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
SELECT relname, relpages
FROM pg_class
WHERE relname IN ('brin_test_serial_idx', 'brin_test_parallel_idx')
ORDER BY relname;
relname | relpages
------------------------+----------
brin_test_parallel_idx | 3
brin_test_serial_idx | 3
(2 rows)
-- Check that (A except B) and (B except A) is empty, which means the indexes
-- are the same.
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx');
itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | empty | value
------------+--------+--------+----------+----------+-------------+-------+-------
(0 rows)
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx');
itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | empty | value
------------+--------+--------+----------+----------+-------------+-------+-------
(0 rows)
DROP INDEX brin_test_parallel_idx;
-- force parallel build, but don't allow starting parallel workers to force
-- fallback to serial build, and repeat the checks
SET max_parallel_workers = 0;
CREATE INDEX brin_test_parallel_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
SELECT relname, relpages
FROM pg_class
WHERE relname IN ('brin_test_serial_idx', 'brin_test_parallel_idx')
ORDER BY relname;
relname | relpages
------------------------+----------
brin_test_parallel_idx | 3
brin_test_serial_idx | 3
(2 rows)
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx');
itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | empty | value
------------+--------+--------+----------+----------+-------------+-------+-------
(0 rows)
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx');
itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | empty | value
------------+--------+--------+----------+----------+-------------+-------+-------
(0 rows)
DROP TABLE brin_parallel_test;
RESET min_parallel_table_scan_size;
RESET max_parallel_maintenance_workers;
RESET maintenance_work_mem;

View File

@ -54,3 +54,94 @@ SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS r
DROP TABLE test1;
DROP TABLE test2;
-- Test that parallel index build produces the same BRIN index as serial build.
CREATE TABLE brin_parallel_test (a int, b text, c bigint) WITH (fillfactor=40);
-- Generate a table with a mix of NULLs and non-NULL values (and data suitable
-- for the different opclasses we build later).
INSERT INTO brin_parallel_test
SELECT (CASE WHEN (mod(i,231) = 0) OR (i BETWEEN 3500 AND 4000) THEN NULL ELSE i END),
(CASE WHEN (mod(i,233) = 0) OR (i BETWEEN 3750 AND 4250) THEN NULL ELSE md5(i::text) END),
(CASE WHEN (mod(i,233) = 0) OR (i BETWEEN 3850 AND 4500) THEN NULL ELSE (i/100) + mod(i,8) END)
FROM generate_series(1,5000) S(i);
-- Delete a couple pages, to make the ranges empty.
DELETE FROM brin_parallel_test WHERE a BETWEEN 1000 and 1500;
-- Vacuum to remove the tuples and make the ranges actually empty.
VACUUM brin_parallel_test;
-- Build an index with different opclasses - minmax, bloom and minmax-multi.
--
-- For minmax and opclass this is simple, but for minmax-multi we need to be
-- careful, because the result depends on the order in which values are added
-- to the summary, which in turn affects how are values merged etc. The order
-- of merging results from workers has similar effect. All those summaries
-- should produce correct query results, but it means we can't compare them
-- using equality (which is what EXCEPT does). To work around this issue, we
-- generated the data to only have very small number of distinct values per
-- range, so that no merging is needed. This makes the results deterministic.
-- build index without parallelism
SET max_parallel_maintenance_workers = 0;
CREATE INDEX brin_test_serial_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
-- build index using parallelism
--
-- Set a couple parameters to force parallel build for small table. There's a
-- requirement for table size, so disable that. Also, plan_create_index_workers
-- assumes each worker will use work_mem=32MB for sorting (which works for btree,
-- but not really for BRIN), so we set maintenance_work_mem for 4 workers.
SET min_parallel_table_scan_size = 0;
SET max_parallel_maintenance_workers = 4;
SET maintenance_work_mem = '128MB';
CREATE INDEX brin_test_parallel_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
SELECT relname, relpages
FROM pg_class
WHERE relname IN ('brin_test_serial_idx', 'brin_test_parallel_idx')
ORDER BY relname;
-- Check that (A except B) and (B except A) is empty, which means the indexes
-- are the same.
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx');
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx');
DROP INDEX brin_test_parallel_idx;
-- force parallel build, but don't allow starting parallel workers to force
-- fallback to serial build, and repeat the checks
SET max_parallel_workers = 0;
CREATE INDEX brin_test_parallel_idx ON brin_parallel_test
USING brin (a int4_minmax_ops, a int4_bloom_ops, b, c int8_minmax_multi_ops)
WITH (pages_per_range=7);
SELECT relname, relpages
FROM pg_class
WHERE relname IN ('brin_test_serial_idx', 'brin_test_parallel_idx')
ORDER BY relname;
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx');
SELECT * FROM brin_page_items(get_raw_page('brin_test_serial_idx', 2), 'brin_test_serial_idx')
EXCEPT
SELECT * FROM brin_page_items(get_raw_page('brin_test_parallel_idx', 2), 'brin_test_parallel_idx');
DROP TABLE brin_parallel_test;
RESET min_parallel_table_scan_size;
RESET max_parallel_maintenance_workers;
RESET maintenance_work_mem;