postgresql/src/backend/access/tablesample/system.c

258 lines
7.2 KiB
C

/*-------------------------------------------------------------------------
*
* system.c
* support routines for SYSTEM tablesample method
*
* To ensure repeatability of samples, it is necessary that selection of a
* given tuple be history-independent; otherwise syncscanning would break
* repeatability, to say nothing of logically-irrelevant maintenance such
* as physical extension or shortening of the relation.
*
* To achieve that, we proceed by hashing each candidate block number together
* with the active seed, and then selecting it if the hash is less than the
* cutoff value computed from the selection probability by BeginSampleScan.
*
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/tablesample/system.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "access/relscan.h"
#include "access/tsmapi.h"
#include "catalog/pg_type.h"
#include "optimizer/optimizer.h"
#include "utils/builtins.h"
#include "utils/hashutils.h"
/* Private state */
typedef struct
{
uint64 cutoff; /* select blocks with hash less than this */
uint32 seed; /* random seed */
BlockNumber nextblock; /* next block to consider sampling */
OffsetNumber lt; /* last tuple returned from current block */
} SystemSamplerData;
static void system_samplescangetsamplesize(PlannerInfo *root,
RelOptInfo *baserel,
List *paramexprs,
BlockNumber *pages,
double *tuples);
static void system_initsamplescan(SampleScanState *node,
int eflags);
static void system_beginsamplescan(SampleScanState *node,
Datum *params,
int nparams,
uint32 seed);
static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
static OffsetNumber system_nextsampletuple(SampleScanState *node,
BlockNumber blockno,
OffsetNumber maxoffset);
/*
* Create a TsmRoutine descriptor for the SYSTEM method.
*/
Datum
tsm_system_handler(PG_FUNCTION_ARGS)
{
TsmRoutine *tsm = makeNode(TsmRoutine);
tsm->parameterTypes = list_make1_oid(FLOAT4OID);
tsm->repeatable_across_queries = true;
tsm->repeatable_across_scans = true;
tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
tsm->InitSampleScan = system_initsamplescan;
tsm->BeginSampleScan = system_beginsamplescan;
tsm->NextSampleBlock = system_nextsampleblock;
tsm->NextSampleTuple = system_nextsampletuple;
tsm->EndSampleScan = NULL;
PG_RETURN_POINTER(tsm);
}
/*
* Sample size estimation.
*/
static void
system_samplescangetsamplesize(PlannerInfo *root,
RelOptInfo *baserel,
List *paramexprs,
BlockNumber *pages,
double *tuples)
{
Node *pctnode;
float4 samplefract;
/* Try to extract an estimate for the sample percentage */
pctnode = (Node *) linitial(paramexprs);
pctnode = estimate_expression_value(root, pctnode);
if (IsA(pctnode, Const) &&
!((Const *) pctnode)->constisnull)
{
samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
samplefract /= 100.0f;
else
{
/* Default samplefract if the value is bogus */
samplefract = 0.1f;
}
}
else
{
/* Default samplefract if we didn't obtain a non-null Const */
samplefract = 0.1f;
}
/* We'll visit a sample of the pages ... */
*pages = clamp_row_est(baserel->pages * samplefract);
/* ... and hopefully get a representative number of tuples from them */
*tuples = clamp_row_est(baserel->tuples * samplefract);
}
/*
* Initialize during executor setup.
*/
static void
system_initsamplescan(SampleScanState *node, int eflags)
{
node->tsm_state = palloc0(sizeof(SystemSamplerData));
}
/*
* Examine parameters and prepare for a sample scan.
*/
static void
system_beginsamplescan(SampleScanState *node,
Datum *params,
int nparams,
uint32 seed)
{
SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
double percent = DatumGetFloat4(params[0]);
double dcutoff;
if (percent < 0 || percent > 100 || isnan(percent))
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
errmsg("sample percentage must be between 0 and 100")));
/*
* The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
* store that as a uint64, of course. Note that this gives strictly
* correct behavior at the limits of zero or one probability.
*/
dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
sampler->cutoff = (uint64) dcutoff;
sampler->seed = seed;
sampler->nextblock = 0;
sampler->lt = InvalidOffsetNumber;
/*
* Bulkread buffer access strategy probably makes sense unless we're
* scanning a very small fraction of the table. The 1% cutoff here is a
* guess. We should use pagemode visibility checking, since we scan all
* tuples on each selected page.
*/
node->use_bulkread = (percent >= 1);
node->use_pagemode = true;
}
/*
* Select next block to sample.
*/
static BlockNumber
system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
{
SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
BlockNumber nextblock = sampler->nextblock;
uint32 hashinput[2];
/*
* We compute the hash by applying hash_any to an array of 2 uint32's
* containing the block number and seed. This is efficient to set up, and
* with the current implementation of hash_any, it gives
* machine-independent results, which is a nice property for regression
* testing.
*
* These words in the hash input are the same throughout the block:
*/
hashinput[1] = sampler->seed;
/*
* Loop over block numbers until finding suitable block or reaching end of
* relation.
*/
for (; nextblock < nblocks; nextblock++)
{
uint32 hash;
hashinput[0] = nextblock;
hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
(int) sizeof(hashinput)));
if (hash < sampler->cutoff)
break;
}
if (nextblock < nblocks)
{
/* Found a suitable block; remember where we should start next time */
sampler->nextblock = nextblock + 1;
return nextblock;
}
/* Done, but let's reset nextblock to 0 for safety. */
sampler->nextblock = 0;
return InvalidBlockNumber;
}
/*
* Select next sampled tuple in current block.
*
* In block sampling, we just want to sample all the tuples in each selected
* block.
*
* It is OK here to return an offset without knowing if the tuple is visible
* (or even exists); nodeSamplescan.c will deal with that.
*
* When we reach end of the block, return InvalidOffsetNumber which tells
* SampleScan to go to next block.
*/
static OffsetNumber
system_nextsampletuple(SampleScanState *node,
BlockNumber blockno,
OffsetNumber maxoffset)
{
SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
OffsetNumber tupoffset = sampler->lt;
/* Advance to next possible offset on page */
if (tupoffset == InvalidOffsetNumber)
tupoffset = FirstOffsetNumber;
else
tupoffset++;
/* Done? */
if (tupoffset > maxoffset)
tupoffset = InvalidOffsetNumber;
sampler->lt = tupoffset;
return tupoffset;
}