Generalize relation analyze in table AM interface

Currently, there is just one algorithm for sampling tuples from a table written
in acquire_sample_rows().  Custom table AM can just redefine the way to get the
next block/tuple by implementing scan_analyze_next_block() and
scan_analyze_next_tuple() API functions.

This approach doesn't seem general enough.  For instance, it's unclear how to
sample this way index-organized tables.  This commit allows table AM to
encapsulate the whole sampling algorithm (currently implemented in
acquire_sample_rows()) into the relation_analyze() API function.

Discussion: https://postgr.es/m/CAPpHfdurb9ycV8udYqM%3Do0sPS66PJ4RCBM1g-bBpvzUfogY0EA%40mail.gmail.com
Reviewed-by: Pavel Borisov, Matthias van de Meent
This commit is contained in:
Alexander Korotkov 2024-03-30 22:34:04 +02:00
parent b154d8a6d0
commit 27bc1772fc
7 changed files with 100 additions and 125 deletions

View File

@ -50,7 +50,6 @@ static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd);
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@ -1052,7 +1051,15 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
pfree(isnull);
}
static bool
/*
* Prepare to analyze block `blockno` of `scan`. The scan has been started
* with SO_TYPE_ANALYZE option.
*
* This routine holds a buffer pin and lock on the heap page. They are held
* until heapam_scan_analyze_next_tuple() returns false. That is until all the
* items of the heap page are analyzed.
*/
void
heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
BufferAccessStrategy bstrategy)
{
@ -1072,12 +1079,19 @@ heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
blockno, RBM_NORMAL, bstrategy);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
/* in heap all blocks can contain tuples, so always return true */
return true;
}
static bool
/*
* Iterate over tuples in the block selected with
* heapam_scan_analyze_next_block(). If a tuple that's suitable for sampling
* is found, true is returned and a tuple is stored in `slot`. When no more
* tuples for sampling, false is returned and the pin and lock acquired by
* heapam_scan_analyze_next_block() are released.
*
* *liverows and *deadrows are incremented according to the encountered
* tuples.
*/
bool
heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
double *liverows, double *deadrows,
TupleTableSlot *slot)
@ -2637,10 +2651,9 @@ static const TableAmRoutine heapam_methods = {
.relation_copy_data = heapam_relation_copy_data,
.relation_copy_for_cluster = heapam_relation_copy_for_cluster,
.relation_vacuum = heap_vacuum_rel,
.scan_analyze_next_block = heapam_scan_analyze_next_block,
.scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
.index_build_range_scan = heapam_index_build_range_scan,
.index_validate_scan = heapam_index_validate_scan,
.relation_analyze = heapam_analyze,
.free_rd_amcache = NULL,
.relation_size = table_block_relation_size,

View File

@ -81,8 +81,6 @@ GetTableAmRoutine(Oid amhandler)
Assert(routine->relation_copy_data != NULL);
Assert(routine->relation_copy_for_cluster != NULL);
Assert(routine->relation_vacuum != NULL);
Assert(routine->scan_analyze_next_block != NULL);
Assert(routine->scan_analyze_next_tuple != NULL);
Assert(routine->index_build_range_scan != NULL);
Assert(routine->index_validate_scan != NULL);

View File

@ -17,6 +17,7 @@
#include <math.h>
#include "access/detoast.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/multixact.h"
#include "access/relation.h"
@ -190,10 +191,9 @@ analyze_rel(Oid relid, RangeVar *relation,
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
onerel->rd_rel->relkind == RELKIND_MATVIEW)
{
/* Regular table, so we'll use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
/* Also get regular table's size */
relpages = RelationGetNumberOfBlocks(onerel);
/* Use row acquisition function provided by table AM */
table_relation_analyze(onerel, &acquirefunc,
&relpages, vac_strategy);
}
else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
@ -1103,15 +1103,15 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
}
/*
* acquire_sample_rows -- acquire a random sample of rows from the table
* acquire_sample_rows -- acquire a random sample of rows from the heap
*
* Selected rows are returned in the caller-allocated array rows[], which
* must have at least targrows entries.
* The actual number of rows selected is returned as the function result.
* We also estimate the total numbers of live and dead rows in the table,
* We also estimate the total numbers of live and dead rows in the heap,
* and return them into *totalrows and *totaldeadrows, respectively.
*
* The returned list of tuples is in order by physical position in the table.
* The returned list of tuples is in order by physical position in the heap.
* (We will rely on this later to derive correlation estimates.)
*
* As of May 2004 we use a new two-stage method: Stage one selects up
@ -1133,7 +1133,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
* look at a statistically unbiased set of blocks, we should get
* unbiased estimates of the average numbers of live and dead rows per
* block. The previous sampling method put too much credence in the row
* density near the start of the table.
* density near the start of the heap.
*/
static int
acquire_sample_rows(Relation onerel, int elevel,
@ -1184,7 +1184,7 @@ acquire_sample_rows(Relation onerel, int elevel,
/* Prepare for sampling rows */
reservoir_init_selection_state(&rstate, targrows);
scan = table_beginscan_analyze(onerel);
scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
slot = table_slot_create(onerel, NULL);
#ifdef USE_PREFETCH
@ -1214,7 +1214,6 @@ acquire_sample_rows(Relation onerel, int elevel,
/* Outer loop over blocks to sample */
while (BlockSampler_HasMore(&bs))
{
bool block_accepted;
BlockNumber targblock = BlockSampler_Next(&bs);
#ifdef USE_PREFETCH
BlockNumber prefetch_targblock = InvalidBlockNumber;
@ -1230,29 +1229,19 @@ acquire_sample_rows(Relation onerel, int elevel,
vacuum_delay_point();
block_accepted = table_scan_analyze_next_block(scan, targblock, vac_strategy);
heapam_scan_analyze_next_block(scan, targblock, vac_strategy);
#ifdef USE_PREFETCH
/*
* When pre-fetching, after we get a block, tell the kernel about the
* next one we will want, if there's any left.
*
* We want to do this even if the table_scan_analyze_next_block() call
* above decides against analyzing the block it picked.
*/
if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
#endif
/*
* Don't analyze if table_scan_analyze_next_block() indicated this
* block is unsuitable for analyzing.
*/
if (!block_accepted)
continue;
while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
{
/*
* The first targrows sample rows are simply copied into the
@ -1302,7 +1291,7 @@ acquire_sample_rows(Relation onerel, int elevel,
}
ExecDropSingleTupleTableSlot(slot);
table_endscan(scan);
heap_endscan(scan);
/*
* If we didn't find as many tuples as we wanted then we're done. No sort
@ -1373,6 +1362,19 @@ compare_rows(const void *a, const void *b, void *arg)
return 0;
}
/*
* heapam_analyze -- implementation of relation_analyze() table access method
* callback for heap
*/
void
heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
BlockNumber *totalpages, BufferAccessStrategy bstrategy)
{
*func = acquire_sample_rows;
*totalpages = RelationGetNumberOfBlocks(relation);
vac_strategy = bstrategy;
}
/*
* acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
@ -1462,9 +1464,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
if (childrel->rd_rel->relkind == RELKIND_RELATION ||
childrel->rd_rel->relkind == RELKIND_MATVIEW)
{
/* Regular table, so use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
relpages = RelationGetNumberOfBlocks(childrel);
/* Use row acquisition function provided by table AM */
table_relation_analyze(childrel, &acquirefunc,
&relpages, vac_strategy);
}
else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{

View File

@ -369,6 +369,15 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
extern bool HeapTupleIsSurelyDead(HeapTuple htup,
struct GlobalVisState *vistest);
/* in heap/heapam_handler.c*/
extern void heapam_scan_analyze_next_block(TableScanDesc scan,
BlockNumber blockno,
BufferAccessStrategy bstrategy);
extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
TransactionId OldestXmin,
double *liverows, double *deadrows,
TupleTableSlot *slot);
/*
* To avoid leaking too much knowledge about reorderbuffer implementation
* details this is implemented in reorderbuffer.c not heapam_visibility.c

View File

@ -20,6 +20,7 @@
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/xact.h"
#include "commands/vacuum.h"
#include "executor/tuptable.h"
#include "utils/rel.h"
#include "utils/snapshot.h"
@ -658,41 +659,6 @@ typedef struct TableAmRoutine
struct VacuumParams *params,
BufferAccessStrategy bstrategy);
/*
* Prepare to analyze block `blockno` of `scan`. The scan has been started
* with table_beginscan_analyze(). See also
* table_scan_analyze_next_block().
*
* The callback may acquire resources like locks that are held until
* table_scan_analyze_next_tuple() returns false. It e.g. can make sense
* to hold a lock until all tuples on a block have been analyzed by
* scan_analyze_next_tuple.
*
* The callback can return false if the block is not suitable for
* sampling, e.g. because it's a metapage that could never contain tuples.
*
* XXX: This obviously is primarily suited for block-based AMs. It's not
* clear what a good interface for non block based AMs would be, so there
* isn't one yet.
*/
bool (*scan_analyze_next_block) (TableScanDesc scan,
BlockNumber blockno,
BufferAccessStrategy bstrategy);
/*
* See table_scan_analyze_next_tuple().
*
* Not every AM might have a meaningful concept of dead rows, in which
* case it's OK to not increment *deadrows - but note that that may
* influence autovacuum scheduling (see comment for relation_vacuum
* callback).
*/
bool (*scan_analyze_next_tuple) (TableScanDesc scan,
TransactionId OldestXmin,
double *liverows,
double *deadrows,
TupleTableSlot *slot);
/* see table_index_build_range_scan for reference about parameters */
double (*index_build_range_scan) (Relation table_rel,
Relation index_rel,
@ -713,6 +679,12 @@ typedef struct TableAmRoutine
Snapshot snapshot,
struct ValidateIndexState *state);
/* See table_relation_analyze() */
void (*relation_analyze) (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages,
BufferAccessStrategy bstrategy);
/* ------------------------------------------------------------------------
* Miscellaneous functions.
@ -1008,19 +980,6 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
}
/*
* table_beginscan_analyze is an alternative entry point for setting up a
* TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
* the same data structure although the behavior is rather different.
*/
static inline TableScanDesc
table_beginscan_analyze(Relation rel)
{
uint32 flags = SO_TYPE_ANALYZE;
return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
}
/*
* End relation scan.
*/
@ -1746,42 +1705,6 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
}
/*
* Prepare to analyze block `blockno` of `scan`. The scan needs to have been
* started with table_beginscan_analyze(). Note that this routine might
* acquire resources like locks that are held until
* table_scan_analyze_next_tuple() returns false.
*
* Returns false if block is unsuitable for sampling, true otherwise.
*/
static inline bool
table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
BufferAccessStrategy bstrategy)
{
return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
bstrategy);
}
/*
* Iterate over tuples in the block selected with
* table_scan_analyze_next_block() (which needs to have returned true, and
* this routine may not have returned false for the same block before). If a
* tuple that's suitable for sampling is found, true is returned and a tuple
* is stored in `slot`.
*
* *liverows and *deadrows are incremented according to the encountered
* tuples.
*/
static inline bool
table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
double *liverows, double *deadrows,
TupleTableSlot *slot)
{
return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
liverows, deadrows,
slot);
}
/*
* table_index_build_scan - scan the table to find tuples to be indexed
*
@ -1887,6 +1810,21 @@ table_index_validate_scan(Relation table_rel,
state);
}
/*
* table_relation_analyze - fill the infromation for a sampling statistics
* acquisition
*
* The pointer to a function that will collect sample rows from the table
* should be stored to `*func`, plus the estimated size of the table in pages
* should br stored to `*totalpages`.
*/
static inline void
table_relation_analyze(Relation relation, AcquireSampleRowsFunc *func,
BlockNumber *totalpages, BufferAccessStrategy bstrategy)
{
relation->rd_tableam->relation_analyze(relation, func,
totalpages, bstrategy);
}
/* ----------------------------------------------------------------------------
* Miscellaneous functionality

View File

@ -175,6 +175,21 @@ typedef struct VacAttrStats
int rowstride;
} VacAttrStats;
/*
* AcquireSampleRowsFunc - a function for the sampling statistics collection.
*
* A random sample of up to `targrows` rows should be collected from the
* table and stored into the caller-provided `rows` array. The actual number
* of rows collected must be returned. In addition, a function should store
* estimates of the total numbers of live and dead rows in the table into the
* output parameters `*totalrows` and `*totaldeadrows1. (Set `*totaldeadrows`
* to zero if the storage does not have any concept of dead rows.)
*/
typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
HeapTuple *rows, int targrows,
double *totalrows,
double *totaldeadrows);
/* flag bits for VacuumParams->options */
#define VACOPT_VACUUM 0x01 /* do VACUUM */
#define VACOPT_ANALYZE 0x02 /* do ANALYZE */
@ -380,6 +395,10 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
extern void analyze_rel(Oid relid, RangeVar *relation,
VacuumParams *params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy);
extern void heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
BlockNumber *totalpages,
BufferAccessStrategy bstrategy);
extern bool std_typanalyze(VacAttrStats *stats);
/* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */

View File

@ -13,6 +13,7 @@
#define FDWAPI_H
#include "access/parallel.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "nodes/pathnodes.h"
@ -148,11 +149,6 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
struct ExplainState *es);
typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
HeapTuple *rows, int targrows,
double *totalrows,
double *totaldeadrows);
typedef bool (*AnalyzeForeignTable_function) (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);