Add TIDStore, to store sets of TIDs (ItemPointerData) efficiently.

TIDStore is a data structure designed to efficiently store large sets
of TIDs. For TID storage, it employs a radix tree, where the key is
a block number, and the value is a bitmap representing offset
numbers. The TIDStore can be created on a DSA area and used by
multiple backend processes simultaneously.

There are potential future users such as tidbitmap.c, though it's very
likely the interface will need to evolve as we come to understand the
needs of different kinds of users. For example, we can support
updating the offset bitmap of existing values.

Currently, the TIDStore is not used for anything yet, aside from the
test code. But an upcoming patch will use it.

This includes a unit test module, in src/test/modules/test_tidstore.

Co-authored-by: John Naylor
Discussion: https://postgr.es/m/CAD21AoAfOZvmfR0j8VmZorZjL7RhTiQdVttNuC4W-Shdc2a-AA%40mail.gmail.com
This commit is contained in:
Masahiko Sawada 2024-03-21 10:08:42 +09:00
parent 995e0fbc1c
commit 30e144287a
15 changed files with 1091 additions and 0 deletions

View File

@ -25,6 +25,7 @@ OBJS = \
scankey.o \
session.o \
syncscan.o \
tidstore.o \
toast_compression.o \
toast_internals.o \
tupconvert.o \

View File

@ -13,6 +13,7 @@ backend_sources += files(
'scankey.c',
'session.c',
'syncscan.c',
'tidstore.c',
'toast_compression.c',
'toast_internals.c',
'tupconvert.c',

View File

@ -0,0 +1,463 @@
/*-------------------------------------------------------------------------
*
* tidstore.c
* TID (ItemPointerData) storage implementation.
*
* TidStore is a in-memory data structure to store TIDs (ItemPointerData).
* Internally it uses a radix tree as the storage for TIDs. The key is the
* BlockNumber and the value is a bitmap of offsets, BlocktableEntry.
*
* TidStore can be shared among parallel worker processes by passing DSA area
* to TidStoreCreate(). Other backends can attach to the shared TidStore by
* TidStoreAttach().
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/common/tidstore.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/tidstore.h"
#include "miscadmin.h"
#include "nodes/bitmapset.h"
#include "storage/lwlock.h"
#include "utils/dsa.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
/* number of active words for a page: */
#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
/*
* This is named similarly to PagetableEntry in tidbitmap.c
* because the two have a similar function.
*/
typedef struct BlocktableEntry
{
uint16 nwords;
bitmapword words[FLEXIBLE_ARRAY_MEMBER];
} BlocktableEntry;
#define MaxBlocktableEntrySize \
offsetof(BlocktableEntry, words) + \
(sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber))
#define RT_PREFIX local_ts
#define RT_SCOPE static
#define RT_DECLARE
#define RT_DEFINE
#define RT_VALUE_TYPE BlocktableEntry
#define RT_VARLEN_VALUE_SIZE(page) \
(offsetof(BlocktableEntry, words) + \
sizeof(bitmapword) * (page)->nwords)
#include "lib/radixtree.h"
#define RT_PREFIX shared_ts
#define RT_SHMEM
#define RT_SCOPE static
#define RT_DECLARE
#define RT_DEFINE
#define RT_VALUE_TYPE BlocktableEntry
#define RT_VARLEN_VALUE_SIZE(page) \
(offsetof(BlocktableEntry, words) + \
sizeof(bitmapword) * (page)->nwords)
#include "lib/radixtree.h"
/* Per-backend state for a TidStore */
struct TidStore
{
/* MemoryContext where the TidStore is allocated */
MemoryContext context;
/* MemoryContext that the radix tree uses */
MemoryContext rt_context;
/* Storage for TIDs. Use either one depending on TidStoreIsShared() */
union
{
local_ts_radix_tree *local;
shared_ts_radix_tree *shared;
} tree;
/* DSA area for TidStore if using shared memory */
dsa_area *area;
};
#define TidStoreIsShared(ts) ((ts)->area != NULL)
/* Iterator for TidStore */
struct TidStoreIter
{
TidStore *ts;
/* iterator of radix tree. Use either one depending on TidStoreIsShared() */
union
{
shared_ts_iter *shared;
local_ts_iter *local;
} tree_iter;
/* output for the caller */
TidStoreIterResult output;
};
static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
BlocktableEntry *page);
/*
* Create a TidStore. The TidStore will live in the memory context that is
* CurrentMemoryContext at the time of this call. The TID storage, backed
* by a radix tree, will live in its child memory context, rt_context. The
* TidStore will be limited to (approximately) max_bytes total memory
* consumption. If the 'area' is non-NULL, the radix tree is created in the
* DSA area.
*
* The returned object is allocated in backend-local memory.
*/
TidStore *
TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id)
{
TidStore *ts;
size_t initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
size_t minContextSize = ALLOCSET_DEFAULT_MINSIZE;
size_t maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;
ts = palloc0(sizeof(TidStore));
ts->context = CurrentMemoryContext;
/* choose the maxBlockSize to be no larger than 1/16 of max_bytes */
while (16 * maxBlockSize > max_bytes * 1024L)
maxBlockSize >>= 1;
if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)
maxBlockSize = ALLOCSET_DEFAULT_INITSIZE;
/* Create a memory context for the TID storage */
ts->rt_context = AllocSetContextCreate(CurrentMemoryContext,
"TID storage",
minContextSize,
initBlockSize,
maxBlockSize);
if (area != NULL)
{
ts->tree.shared = shared_ts_create(ts->rt_context, area,
tranche_id);
ts->area = area;
}
else
ts->tree.local = local_ts_create(ts->rt_context);
return ts;
}
/*
* Attach to the shared TidStore using the given handle. The returned object
* is allocated in backend-local memory using the CurrentMemoryContext.
*/
TidStore *
TidStoreAttach(dsa_area *area, dsa_pointer handle)
{
TidStore *ts;
Assert(area != NULL);
Assert(DsaPointerIsValid(handle));
/* create per-backend state */
ts = palloc0(sizeof(TidStore));
/* Find the shared the shared radix tree */
ts->tree.shared = shared_ts_attach(area, handle);
ts->area = area;
return ts;
}
/*
* Detach from a TidStore. This detaches from radix tree and frees the
* backend-local resources. The radix tree will continue to exist until
* it is either explicitly destroyed, or the area that backs it is returned
* to the operating system.
*/
void
TidStoreDetach(TidStore *ts)
{
Assert(TidStoreIsShared(ts));
shared_ts_detach(ts->tree.shared);
pfree(ts);
}
/*
* Lock support functions.
*
* We can use the radix tree's lock for shared TidStore as the data we
* need to protect is only the shared radix tree.
*/
void
TidStoreLockExclusive(TidStore *ts)
{
if (TidStoreIsShared(ts))
shared_ts_lock_exclusive(ts->tree.shared);
}
void
TidStoreLockShare(TidStore *ts)
{
if (TidStoreIsShared(ts))
shared_ts_lock_share(ts->tree.shared);
}
void
TidStoreUnlock(TidStore *ts)
{
if (TidStoreIsShared(ts))
shared_ts_unlock(ts->tree.shared);
}
/*
* Destroy a TidStore, returning all memory.
*
* Note that the caller must be certain that no other backend will attempt to
* access the TidStore before calling this function. Other backend must
* explicitly call TidStoreDetach() to free up backend-local memory associated
* with the TidStore. The backend that calls TidStoreDestroy() must not call
* TidStoreDetach().
*/
void
TidStoreDestroy(TidStore *ts)
{
/* Destroy underlying radix tree */
if (TidStoreIsShared(ts))
shared_ts_free(ts->tree.shared);
else
local_ts_free(ts->tree.local);
MemoryContextDelete(ts->rt_context);
pfree(ts);
}
/*
* Create or replace an entry for the given block and array of offsets.
*
* NB: This function is designed and optimized for vacuum's heap scanning
* phase, so has some limitations:
*
* - The offset numbers "offsets" must be sorted in ascending order.
* - If the block number already exists, the entry will be replaced --
* there is no way to add or remove offsets from an entry.
*/
void
TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
int num_offsets)
{
char data[MaxBlocktableEntrySize];
BlocktableEntry *page = (BlocktableEntry *) data;
bitmapword word;
int wordnum;
int next_word_threshold;
int idx = 0;
Assert(num_offsets > 0);
/* Check if the given offset numbers are ordered */
for (int i = 1; i < num_offsets; i++)
Assert(offsets[i] > offsets[i - 1]);
for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
wordnum <= WORDNUM(offsets[num_offsets - 1]);
wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
{
word = 0;
while (idx < num_offsets)
{
OffsetNumber off = offsets[idx];
/* safety check to ensure we don't overrun bit array bounds */
if (!OffsetNumberIsValid(off))
elog(ERROR, "tuple offset out of range: %u", off);
if (off >= next_word_threshold)
break;
word |= ((bitmapword) 1 << BITNUM(off));
idx++;
}
/* write out offset bitmap for this wordnum */
page->words[wordnum] = word;
}
page->nwords = wordnum;
Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
if (TidStoreIsShared(ts))
shared_ts_set(ts->tree.shared, blkno, page);
else
local_ts_set(ts->tree.local, blkno, page);
}
/* Return true if the given TID is present in the TidStore */
bool
TidStoreIsMember(TidStore *ts, ItemPointer tid)
{
int wordnum;
int bitnum;
BlocktableEntry *page;
BlockNumber blk = ItemPointerGetBlockNumber(tid);
OffsetNumber off = ItemPointerGetOffsetNumber(tid);
if (TidStoreIsShared(ts))
page = shared_ts_find(ts->tree.shared, blk);
else
page = local_ts_find(ts->tree.local, blk);
/* no entry for the blk */
if (page == NULL)
return false;
wordnum = WORDNUM(off);
bitnum = BITNUM(off);
/* no bitmap for the off */
if (wordnum >= page->nwords)
return false;
return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
}
/*
* Prepare to iterate through a TidStore.
*
* The TidStoreIter struct is created in the caller's memory context, and it
* will be freed in TidStoreEndIterate.
*
* The caller is responsible for locking TidStore until the iteration is
* finished.
*/
TidStoreIter *
TidStoreBeginIterate(TidStore *ts)
{
TidStoreIter *iter;
iter = palloc0(sizeof(TidStoreIter));
iter->ts = ts;
/*
* We start with an array large enough to contain at least the offsets
* from one completely full bitmap element.
*/
iter->output.max_offset = 2 * BITS_PER_BITMAPWORD;
iter->output.offsets = palloc(sizeof(OffsetNumber) * iter->output.max_offset);
if (TidStoreIsShared(ts))
iter->tree_iter.shared = shared_ts_begin_iterate(ts->tree.shared);
else
iter->tree_iter.local = local_ts_begin_iterate(ts->tree.local);
return iter;
}
/*
* Scan the TidStore and return the TIDs of the next block. The offsets in
* each iteration result are ordered, as are the block numbers over all
* iterations.
*/
TidStoreIterResult *
TidStoreIterateNext(TidStoreIter *iter)
{
uint64 key;
BlocktableEntry *page;
if (TidStoreIsShared(iter->ts))
page = shared_ts_iterate_next(iter->tree_iter.shared, &key);
else
page = local_ts_iterate_next(iter->tree_iter.local, &key);
if (page == NULL)
return NULL;
/* Collect TIDs from the key-value pair */
tidstore_iter_extract_tids(iter, (BlockNumber) key, page);
return &(iter->output);
}
/*
* Finish the iteration on TidStore.
*
* The caller is responsible for releasing any locks.
*/
void
TidStoreEndIterate(TidStoreIter *iter)
{
if (TidStoreIsShared(iter->ts))
shared_ts_end_iterate(iter->tree_iter.shared);
else
local_ts_end_iterate(iter->tree_iter.local);
pfree(iter->output.offsets);
pfree(iter);
}
/*
* Return the memory usage of TidStore.
*/
size_t
TidStoreMemoryUsage(TidStore *ts)
{
if (TidStoreIsShared(ts))
return shared_ts_memory_usage(ts->tree.shared);
else
return local_ts_memory_usage(ts->tree.local);
}
dsa_pointer
TidStoreGetHandle(TidStore *ts)
{
Assert(TidStoreIsShared(ts));
return (dsa_pointer) shared_ts_get_handle(ts->tree.shared);
}
/* Extract TIDs from the given key-value pair */
static void
tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
BlocktableEntry *page)
{
TidStoreIterResult *result = (&iter->output);
int wordnum;
result->num_offsets = 0;
result->blkno = blkno;
for (wordnum = 0; wordnum < page->nwords; wordnum++)
{
bitmapword w = page->words[wordnum];
int off = wordnum * BITS_PER_BITMAPWORD;
/* Make sure there is enough space to add offsets */
if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
{
result->max_offset *= 2;
result->offsets = repalloc(result->offsets,
sizeof(OffsetNumber) * result->max_offset);
}
while (w != 0)
{
if (w & 1)
result->offsets[result->num_offsets++] = (OffsetNumber) off;
off++;
w >>= 1;
}
}
}

View File

@ -0,0 +1,49 @@
/*-------------------------------------------------------------------------
*
* tidstore.h
* TidStore interface.
*
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/tidstore.h
*
*-------------------------------------------------------------------------
*/
#ifndef TIDSTORE_H
#define TIDSTORE_H
#include "storage/itemptr.h"
#include "utils/dsa.h"
typedef struct TidStore TidStore;
typedef struct TidStoreIter TidStoreIter;
/* Result struct for TidStoreIterateNext */
typedef struct TidStoreIterResult
{
BlockNumber blkno;
int max_offset;
int num_offsets;
OffsetNumber *offsets;
} TidStoreIterResult;
extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa,
int tranche_id);
extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp);
extern void TidStoreDetach(TidStore *ts);
extern void TidStoreLockExclusive(TidStore *ts);
extern void TidStoreLockShare(TidStore *ts);
extern void TidStoreUnlock(TidStore *ts);
extern void TidStoreDestroy(TidStore *ts);
extern void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
int num_offsets);
extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid);
extern TidStoreIter *TidStoreBeginIterate(TidStore *ts);
extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter);
extern void TidStoreEndIterate(TidStoreIter *iter);
extern size_t TidStoreMemoryUsage(TidStore *ts);
extern dsa_pointer TidStoreGetHandle(TidStore *ts);
#endif /* TIDSTORE_H */

View File

@ -35,6 +35,7 @@ SUBDIRS = \
test_rls_hooks \
test_shm_mq \
test_slru \
test_tidstore \
unsafe_tests \
worker_spi \
xid_wraparound

View File

@ -34,6 +34,7 @@ subdir('test_resowner')
subdir('test_rls_hooks')
subdir('test_shm_mq')
subdir('test_slru')
subdir('test_tidstore')
subdir('unsafe_tests')
subdir('worker_spi')
subdir('xid_wraparound')

View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

View File

@ -0,0 +1,23 @@
# src/test/modules/test_tidstore/Makefile
MODULE_big = test_tidstore
OBJS = \
$(WIN32RES) \
test_tidstore.o
PGFILEDESC = "test_tidstore - test code for src/backend/access/common/tidstore.c"
EXTENSION = test_tidstore
DATA = test_tidstore--1.0.sql
REGRESS = test_tidstore
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/test_tidstore
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@ -0,0 +1,97 @@
-- Note: The test code use an array of TIDs for verification similar
-- to vacuum's dead item array pre-PG17. To avoid adding duplicates,
-- each call to do_set_block_offsets() should use different block
-- numbers.
CREATE EXTENSION test_tidstore;
-- To hide the output of do_set_block_offsets()
CREATE TEMP TABLE hideblocks(blockno bigint);
-- Constant values used in the tests.
\set maxblkno 4294967295
-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291.
-- We use a higher number to test tidstore.
\set maxoffset 512
SELECT test_create(false);
test_create
-------------
(1 row)
-- Test on empty tidstore.
SELECT test_is_full();
test_is_full
--------------
f
(1 row)
SELECT check_set_block_offsets();
check_set_block_offsets
-------------------------
(1 row)
-- Add TIDs.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
FROM
(VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
GROUP BY blk;
-- Add enough TIDs to cause the store to appear "full", compared
-- to the allocated memory it started out with. This is easier
-- with memory contexts in local memory.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
FROM generate_series(1000, 2000, 1) blk;
-- Zero offset not allowed
SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
ERROR: tuple offset out of range: 0
-- Check TIDs we've added to the store.
SELECT check_set_block_offsets();
check_set_block_offsets
-------------------------
(1 row)
SELECT test_is_full();
test_is_full
--------------
t
(1 row)
-- Re-create the TID store for randommized tests.
SELECT test_destroy();
test_destroy
--------------
(1 row)
-- Use shared memory this time. We can't do that in test_radixtree.sql,
-- because unused static functions would raise warnings there.
SELECT test_create(true);
test_create
-------------
(1 row)
-- Random TIDs test. The offset numbers are randomized and must be
-- unique and ordered.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
FROM generate_series(1, 100) num_offsets,
generate_series(1000, 1100, 1) blkno
GROUP BY blkno;
-- Check TIDs we've added to the store.
SELECT check_set_block_offsets();
check_set_block_offsets
-------------------------
(1 row)
-- cleanup
SELECT test_destroy();
test_destroy
--------------
(1 row)
DROP TABLE hideblocks;

View File

@ -0,0 +1,33 @@
# Copyright (c) 2024, PostgreSQL Global Development Group
test_tidstore_sources = files(
'test_tidstore.c',
)
if host_system == 'windows'
test_tidstore_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
'--NAME', 'test_tidstore',
'--FILEDESC', 'test_tidstore - test code for src/backend/access/common/tidstore.c',])
endif
test_tidstore = shared_module('test_tidstore',
test_tidstore_sources,
kwargs: pg_test_mod_args,
)
test_install_libs += test_tidstore
test_install_data += files(
'test_tidstore.control',
'test_tidstore--1.0.sql',
)
tests += {
'name': 'test_tidstore',
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'regress': {
'sql': [
'test_tidstore',
],
},
}

View File

@ -0,0 +1,65 @@
-- Note: The test code use an array of TIDs for verification similar
-- to vacuum's dead item array pre-PG17. To avoid adding duplicates,
-- each call to do_set_block_offsets() should use different block
-- numbers.
CREATE EXTENSION test_tidstore;
-- To hide the output of do_set_block_offsets()
CREATE TEMP TABLE hideblocks(blockno bigint);
-- Constant values used in the tests.
\set maxblkno 4294967295
-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291.
-- We use a higher number to test tidstore.
\set maxoffset 512
SELECT test_create(false);
-- Test on empty tidstore.
SELECT test_is_full();
SELECT check_set_block_offsets();
-- Add TIDs.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
FROM
(VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
GROUP BY blk;
-- Add enough TIDs to cause the store to appear "full", compared
-- to the allocated memory it started out with. This is easier
-- with memory contexts in local memory.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
FROM generate_series(1000, 2000, 1) blk;
-- Zero offset not allowed
SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
-- Check TIDs we've added to the store.
SELECT check_set_block_offsets();
SELECT test_is_full();
-- Re-create the TID store for randommized tests.
SELECT test_destroy();
-- Use shared memory this time. We can't do that in test_radixtree.sql,
-- because unused static functions would raise warnings there.
SELECT test_create(true);
-- Random TIDs test. The offset numbers are randomized and must be
-- unique and ordered.
INSERT INTO hideblocks (blockno)
SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
FROM generate_series(1, 100) num_offsets,
generate_series(1000, 1100, 1) blkno
GROUP BY blkno;
-- Check TIDs we've added to the store.
SELECT check_set_block_offsets();
-- cleanup
SELECT test_destroy();
DROP TABLE hideblocks;

View File

@ -0,0 +1,27 @@
/* src/test/modules/test_tidstore/test_tidstore--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION test_tidstore" to load this file. \quit
CREATE FUNCTION test_create(
shared bool)
RETURNS void STRICT PARALLEL UNSAFE
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION do_set_block_offsets(
blkno bigint,
offsets int2[])
RETURNS bigint STRICT PARALLEL UNSAFE
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION check_set_block_offsets()
RETURNS void STRICT PARALLEL UNSAFE
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION test_is_full()
RETURNS bool STRICT PARALLEL UNSAFE
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION test_destroy()
RETURNS void STRICT PARALLEL UNSAFE
AS 'MODULE_PATHNAME' LANGUAGE C;

View File

@ -0,0 +1,317 @@
/*--------------------------------------------------------------------------
*
* test_tidstore.c
* Test TidStore data structure.
*
* Note: all locking in this test module is useless since there is only
* a single process to use the TidStore. It is meant to be an example of
* usage.
*
* Copyright (c) 2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/test/modules/test_tidstore/test_tidstore.c
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/tidstore.h"
#include "fmgr.h"
#include "funcapi.h"
#include "storage/block.h"
#include "storage/itemptr.h"
#include "storage/lwlock.h"
#include "utils/array.h"
#include "utils/memutils.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(test_create);
PG_FUNCTION_INFO_V1(do_set_block_offsets);
PG_FUNCTION_INFO_V1(check_set_block_offsets);
PG_FUNCTION_INFO_V1(test_is_full);
PG_FUNCTION_INFO_V1(test_destroy);
static TidStore *tidstore = NULL;
static dsa_area *dsa = NULL;
static size_t tidstore_empty_size;
/* array for verification of some tests */
typedef struct ItemArray
{
ItemPointerData *insert_tids;
ItemPointerData *lookup_tids;
ItemPointerData *iter_tids;
int max_tids;
int num_tids;
} ItemArray;
static ItemArray items;
/* comparator routine for ItemPointer */
static int
itemptr_cmp(const void *left, const void *right)
{
BlockNumber lblk,
rblk;
OffsetNumber loff,
roff;
lblk = ItemPointerGetBlockNumber((ItemPointer) left);
rblk = ItemPointerGetBlockNumber((ItemPointer) right);
if (lblk < rblk)
return -1;
if (lblk > rblk)
return 1;
loff = ItemPointerGetOffsetNumber((ItemPointer) left);
roff = ItemPointerGetOffsetNumber((ItemPointer) right);
if (loff < roff)
return -1;
if (loff > roff)
return 1;
return 0;
}
/*
* Create a TidStore. If shared is false, the tidstore is created
* on TopMemoryContext, otherwise on DSA. Although the tidstore
* is created on DSA, only the same process can subsequently use
* the tidstore. The tidstore handle is not shared anywhere.
*/
Datum
test_create(PG_FUNCTION_ARGS)
{
bool shared = PG_GETARG_BOOL(0);
MemoryContext old_ctx;
/* doesn't really matter, since it's just a hint */
size_t tidstore_max_size = 2 * 1024 * 1024;
size_t array_init_size = 1024;
Assert(tidstore == NULL);
Assert(dsa == NULL);
/*
* Create the TidStore on TopMemoryContext so that the same process use it
* for subsequent tests.
*/
old_ctx = MemoryContextSwitchTo(TopMemoryContext);
if (shared)
{
int tranche_id;
tranche_id = LWLockNewTrancheId();
LWLockRegisterTranche(tranche_id, "test_tidstore");
dsa = dsa_create(tranche_id);
/*
* Remain attached until end of backend or explicitly detached so that
* the same process use the tidstore for subsequent tests.
*/
dsa_pin_mapping(dsa);
tidstore = TidStoreCreate(tidstore_max_size, dsa, tranche_id);
}
else
tidstore = TidStoreCreate(tidstore_max_size, NULL, 0);
tidstore_empty_size = TidStoreMemoryUsage(tidstore);
items.num_tids = 0;
items.max_tids = array_init_size / sizeof(ItemPointerData);
items.insert_tids = (ItemPointerData *) palloc0(array_init_size);
items.lookup_tids = (ItemPointerData *) palloc0(array_init_size);
items.iter_tids = (ItemPointerData *) palloc0(array_init_size);
MemoryContextSwitchTo(old_ctx);
PG_RETURN_VOID();
}
static void
sanity_check_array(ArrayType *ta)
{
if (ARR_HASNULL(ta) && array_contains_nulls(ta))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
if (ARR_NDIM(ta) > 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("argument must be empty or one-dimensional array")));
}
/* Set the given block and offsets pairs */
Datum
do_set_block_offsets(PG_FUNCTION_ARGS)
{
BlockNumber blkno = PG_GETARG_INT64(0);
ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
OffsetNumber *offs;
int noffs;
sanity_check_array(ta);
noffs = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
offs = ((OffsetNumber *) ARR_DATA_PTR(ta));
/* Set TIDs in the store */
TidStoreLockExclusive(tidstore);
TidStoreSetBlockOffsets(tidstore, blkno, offs, noffs);
TidStoreUnlock(tidstore);
/* Set TIDs in verification array */
for (int i = 0; i < noffs; i++)
{
ItemPointer tid;
int idx = items.num_tids + i;
/* Enlarge the TID arrays if necessary */
if (idx >= items.max_tids)
{
items.max_tids *= 2;
items.insert_tids = repalloc(items.insert_tids, sizeof(ItemPointerData) * items.max_tids);
items.lookup_tids = repalloc(items.lookup_tids, sizeof(ItemPointerData) * items.max_tids);
items.iter_tids = repalloc(items.iter_tids, sizeof(ItemPointerData) * items.max_tids);
}
tid = &(items.insert_tids[idx]);
ItemPointerSet(tid, blkno, offs[i]);
}
/* Update statistics */
items.num_tids += noffs;
PG_RETURN_INT64(blkno);
}
/*
* Verify TIDs in store against the array.
*/
Datum
check_set_block_offsets(PG_FUNCTION_ARGS)
{
TidStoreIter *iter;
TidStoreIterResult *iter_result;
int num_iter_tids = 0;
int num_lookup_tids = 0;
BlockNumber prevblkno = 0;;
/* lookup each member in the verification array */
for (int i = 0; i < items.num_tids; i++)
if (!TidStoreIsMember(tidstore, &items.insert_tids[i]))
elog(ERROR, "missing TID with block %u, offset %u",
ItemPointerGetBlockNumber(&items.insert_tids[i]),
ItemPointerGetOffsetNumber(&items.insert_tids[i]));
/*
* Lookup all possible TIDs for each distinct block in the verification
* array and save successful lookups in the lookup array.
*/
for (int i = 0; i < items.num_tids; i++)
{
BlockNumber blkno = ItemPointerGetBlockNumber(&items.insert_tids[i]);
if (i > 0 && blkno == prevblkno)
continue;
for (OffsetNumber offset = FirstOffsetNumber; offset < MaxOffsetNumber; offset++)
{
ItemPointerData tid;
ItemPointerSet(&tid, blkno, offset);
TidStoreLockShare(tidstore);
if (TidStoreIsMember(tidstore, &tid))
ItemPointerSet(&items.lookup_tids[num_lookup_tids++], blkno, offset);
TidStoreUnlock(tidstore);
}
prevblkno = blkno;
}
/* Collect TIDs stored in the tidstore, in order */
TidStoreLockShare(tidstore);
iter = TidStoreBeginIterate(tidstore);
while ((iter_result = TidStoreIterateNext(iter)) != NULL)
{
for (int i = 0; i < iter_result->num_offsets; i++)
ItemPointerSet(&(items.iter_tids[num_iter_tids++]), iter_result->blkno,
iter_result->offsets[i]);
}
TidStoreEndIterate(iter);
TidStoreUnlock(tidstore);
/*
* Sort verification and lookup arrays and test that all arrays are the
* same.
*/
if (num_lookup_tids != items.num_tids)
elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_lookup_tids);
if (num_iter_tids != items.num_tids)
elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_iter_tids);
qsort(items.insert_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp);
qsort(items.lookup_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp);
for (int i = 0; i < items.num_tids; i++)
{
if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.iter_tids[i]) != 0)
elog(ERROR, "TID iter array doesn't match verification array, got (%u,%u) expected (%u,%u)",
ItemPointerGetBlockNumber(&items.iter_tids[i]),
ItemPointerGetOffsetNumber(&items.iter_tids[i]),
ItemPointerGetBlockNumber(&items.insert_tids[i]),
ItemPointerGetOffsetNumber(&items.insert_tids[i]));
if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.lookup_tids[i]) != 0)
elog(ERROR, "TID lookup array doesn't match verification array, got (%u,%u) expected (%u,%u)",
ItemPointerGetBlockNumber(&items.lookup_tids[i]),
ItemPointerGetOffsetNumber(&items.lookup_tids[i]),
ItemPointerGetBlockNumber(&items.insert_tids[i]),
ItemPointerGetOffsetNumber(&items.insert_tids[i]));
}
PG_RETURN_VOID();
}
/*
* In real world use, we care if the memory usage is greater than
* some configured limit. Here we just want to verify that
* TidStoreMemoryUsage is not broken.
*/
Datum
test_is_full(PG_FUNCTION_ARGS)
{
bool is_full;
is_full = (TidStoreMemoryUsage(tidstore) > tidstore_empty_size);
PG_RETURN_BOOL(is_full);
}
/* Free the tidstore */
Datum
test_destroy(PG_FUNCTION_ARGS)
{
TidStoreDestroy(tidstore);
tidstore = NULL;
items.num_tids = 0;
pfree(items.insert_tids);
pfree(items.lookup_tids);
pfree(items.iter_tids);
if (dsa)
dsa_detach(dsa);
dsa = NULL;
PG_RETURN_VOID();
}

View File

@ -0,0 +1,4 @@
comment = 'Test code for tidstore'
default_version = '1.0'
module_pathname = '$libdir/test_tidstore'
relocatable = true

View File

@ -4065,3 +4065,8 @@ rfile
ws_options
ws_file_info
PathKeyInfo
TidStore
TidStoreIter
TidStoreIterResult
BlocktableEntry
ItemArray