From 554106b1163853757b72ce14d7db5050c32bfa6a Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 8 Jul 2019 14:51:53 -0400 Subject: [PATCH] tableam: Provide helper functions for relation sizing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most block-based table AMs will need the exact same implementation of the relation_size callback as the heap, and if they use a standard page layout, they will likely need an implementation of the relation_estimate_size callback that is very similar to that of the heap. Rearrange to facilitate code reuse. Patch by me, reviewed by Michael Paquier, Daniel Gustafsson, and Álvaro Herrera. Discussion: http://postgr.es/m/CA+TgmoZ6DBPnP1E-vRpQZUJQijJFD54F+SR_pxGiAAS-MyrigA@mail.gmail.com --- src/backend/access/heap/heapam_handler.c | 129 ++---------------- src/backend/access/table/tableam.c | 161 +++++++++++++++++++++++ src/include/access/tableam.h | 14 ++ 3 files changed, 185 insertions(+), 119 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index fc19f40a2e..09bc6fe98a 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -19,8 +19,6 @@ */ #include "postgres.h" -#include - #include "miscadmin.h" #include "access/genam.h" @@ -37,7 +35,6 @@ #include "catalog/storage_xlog.h" #include "commands/progress.h" #include "executor/executor.h" -#include "optimizer/plancat.h" #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" @@ -1991,26 +1988,6 @@ heapam_scan_get_blocks_done(HeapScanDesc hscan) * ------------------------------------------------------------------------ */ -static uint64 -heapam_relation_size(Relation rel, ForkNumber forkNumber) -{ - uint64 nblocks = 0; - - /* Open it at the smgr level if not already done */ - RelationOpenSmgr(rel); - - /* InvalidForkNumber indicates returning the size for all forks */ - if (forkNumber == InvalidForkNumber) - { - for (int i = 0; i < MAX_FORKNUM; i++) - nblocks += smgrnblocks(rel->rd_smgr, i); - } - else - nblocks = smgrnblocks(rel->rd_smgr, forkNumber); - - return nblocks * BLCKSZ; -} - /* * Check to see whether the table needs a TOAST table. It does only if * (1) there are any toastable attributes, and (2) the maximum length @@ -2068,106 +2045,20 @@ heapam_relation_needs_toast_table(Relation rel) * ------------------------------------------------------------------------ */ +#define HEAP_OVERHEAD_BYTES_PER_TUPLE \ + (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData)) +#define HEAP_USABLE_BYTES_PER_PAGE \ + (BLCKSZ - SizeOfPageHeaderData) + static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { - BlockNumber curpages; - BlockNumber relpages; - double reltuples; - BlockNumber relallvisible; - double density; - - /* it has storage, ok to call the smgr */ - curpages = RelationGetNumberOfBlocks(rel); - - /* coerce values in pg_class to more desirable types */ - relpages = (BlockNumber) rel->rd_rel->relpages; - reltuples = (double) rel->rd_rel->reltuples; - relallvisible = (BlockNumber) rel->rd_rel->relallvisible; - - /* - * HACK: if the relation has never yet been vacuumed, use a minimum size - * estimate of 10 pages. The idea here is to avoid assuming a - * newly-created table is really small, even if it currently is, because - * that may not be true once some data gets loaded into it. Once a vacuum - * or analyze cycle has been done on it, it's more reasonable to believe - * the size is somewhat stable. - * - * (Note that this is only an issue if the plan gets cached and used again - * after the table has been filled. What we're trying to avoid is using a - * nestloop-type plan on a table that has grown substantially since the - * plan was made. Normally, autovacuum/autoanalyze will occur once enough - * inserts have happened and cause cached-plan invalidation; but that - * doesn't happen instantaneously, and it won't happen at all for cases - * such as temporary tables.) - * - * We approximate "never vacuumed" by "has relpages = 0", which means this - * will also fire on genuinely empty relations. Not great, but - * fortunately that's a seldom-seen case in the real world, and it - * shouldn't degrade the quality of the plan too much anyway to err in - * this direction. - * - * If the table has inheritance children, we don't apply this heuristic. - * Totally empty parent tables are quite common, so we should be willing - * to believe that they are empty. - */ - if (curpages < 10 && - relpages == 0 && - !rel->rd_rel->relhassubclass) - curpages = 10; - - /* report estimated # pages */ - *pages = curpages; - /* quick exit if rel is clearly empty */ - if (curpages == 0) - { - *tuples = 0; - *allvisfrac = 0; - return; - } - - /* estimate number of tuples from previous tuple density */ - if (relpages > 0) - density = reltuples / (double) relpages; - else - { - /* - * When we have no data because the relation was truncated, estimate - * tuple width from attribute datatypes. We assume here that the - * pages are completely full, which is OK for tables (since they've - * presumably not been VACUUMed yet) but is probably an overestimate - * for indexes. Fortunately get_relation_info() can clamp the - * overestimate to the parent table's size. - * - * Note: this code intentionally disregards alignment considerations, - * because (a) that would be gilding the lily considering how crude - * the estimate is, and (b) it creates platform dependencies in the - * default plans which are kind of a headache for regression testing. - */ - int32 tuple_width; - - tuple_width = get_rel_data_width(rel, attr_widths); - tuple_width += MAXALIGN(SizeofHeapTupleHeader); - tuple_width += sizeof(ItemIdData); - /* note: integer division is intentional here */ - density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width; - } - *tuples = rint(density * (double) curpages); - - /* - * We use relallvisible as-is, rather than scaling it up like we do for - * the pages and tuples counts, on the theory that any pages added since - * the last VACUUM are most likely not marked all-visible. But costsize.c - * wants it converted to a fraction. - */ - if (relallvisible == 0 || curpages <= 0) - *allvisfrac = 0; - else if ((double) relallvisible >= curpages) - *allvisfrac = 1; - else - *allvisfrac = (double) relallvisible / curpages; + table_block_relation_estimate_size(rel, attr_widths, pages, + tuples, allvisfrac, + HEAP_OVERHEAD_BYTES_PER_TUPLE, + HEAP_USABLE_BYTES_PER_PAGE); } @@ -2644,7 +2535,7 @@ static const TableAmRoutine heapam_methods = { .index_build_range_scan = heapam_index_build_range_scan, .index_validate_scan = heapam_index_validate_scan, - .relation_size = heapam_relation_size, + .relation_size = table_block_relation_size, .relation_needs_toast_table = heapam_relation_needs_toast_table, .relation_estimate_size = heapam_estimate_rel_size, diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 9842579c0b..5d3f5c3f54 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -19,11 +19,15 @@ */ #include "postgres.h" +#include + #include "access/heapam.h" /* for ss_* */ #include "access/tableam.h" #include "access/xact.h" +#include "optimizer/plancat.h" #include "storage/bufmgr.h" #include "storage/shmem.h" +#include "storage/smgr.h" /* GUC variables */ @@ -486,3 +490,160 @@ table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbsca return page; } + +/* ---------------------------------------------------------------------------- + * Helper functions to implement relation sizing for block oriented AMs. + * ---------------------------------------------------------------------------- + */ + +/* + * table_block_relation_size + * + * If a table AM uses the various relation forks as the sole place where data + * is stored, and if it uses them in the expected manner (e.g. the actual data + * is in the main fork rather than some other), it can use this implementation + * of the relation_size callback rather than implementing its own. + */ +uint64 +table_block_relation_size(Relation rel, ForkNumber forkNumber) +{ + uint64 nblocks = 0; + + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); + + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + nblocks += smgrnblocks(rel->rd_smgr, i); + } + else + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + + return nblocks * BLCKSZ; +} + +/* + * table_block_relation_estimate_size + * + * This function can't be directly used as the implementation of the + * relation_estimate_size callback, because it has a few additional parameters. + * Instead, it is intended to be used as a helper function; the caller can + * pass through the arguments to its relation_estimate_size function plus the + * additional values required here. + * + * overhead_bytes_per_tuple should contain the approximate number of bytes + * of storage required to store a tuple above and beyond what is required for + * the tuple data proper. Typically, this would include things like the + * size of the tuple header and item pointer. This is only used for query + * planning, so a table AM where the value is not constant could choose to + * pass a "best guess". + * + * usable_bytes_per_page should contain the approximate number of bytes per + * page usable for tuple data, excluding the page header and any anticipated + * special space. + */ +void +table_block_relation_estimate_size(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, + double *allvisfrac, + Size overhead_bytes_per_tuple, + Size usable_bytes_per_page) +{ + BlockNumber curpages; + BlockNumber relpages; + double reltuples; + BlockNumber relallvisible; + double density; + + /* it should have storage, so we can call the smgr */ + curpages = RelationGetNumberOfBlocks(rel); + + /* coerce values in pg_class to more desirable types */ + relpages = (BlockNumber) rel->rd_rel->relpages; + reltuples = (double) rel->rd_rel->reltuples; + relallvisible = (BlockNumber) rel->rd_rel->relallvisible; + + /* + * HACK: if the relation has never yet been vacuumed, use a minimum size + * estimate of 10 pages. The idea here is to avoid assuming a + * newly-created table is really small, even if it currently is, because + * that may not be true once some data gets loaded into it. Once a vacuum + * or analyze cycle has been done on it, it's more reasonable to believe + * the size is somewhat stable. + * + * (Note that this is only an issue if the plan gets cached and used again + * after the table has been filled. What we're trying to avoid is using a + * nestloop-type plan on a table that has grown substantially since the + * plan was made. Normally, autovacuum/autoanalyze will occur once enough + * inserts have happened and cause cached-plan invalidation; but that + * doesn't happen instantaneously, and it won't happen at all for cases + * such as temporary tables.) + * + * We approximate "never vacuumed" by "has relpages = 0", which means this + * will also fire on genuinely empty relations. Not great, but + * fortunately that's a seldom-seen case in the real world, and it + * shouldn't degrade the quality of the plan too much anyway to err in + * this direction. + * + * If the table has inheritance children, we don't apply this heuristic. + * Totally empty parent tables are quite common, so we should be willing + * to believe that they are empty. + */ + if (curpages < 10 && + relpages == 0 && + !rel->rd_rel->relhassubclass) + curpages = 10; + + /* report estimated # pages */ + *pages = curpages; + /* quick exit if rel is clearly empty */ + if (curpages == 0) + { + *tuples = 0; + *allvisfrac = 0; + return; + } + + /* estimate number of tuples from previous tuple density */ + if (relpages > 0) + density = reltuples / (double) relpages; + else + { + /* + * When we have no data because the relation was truncated, estimate + * tuple width from attribute datatypes. We assume here that the + * pages are completely full, which is OK for tables (since they've + * presumably not been VACUUMed yet) but is probably an overestimate + * for indexes. Fortunately get_relation_info() can clamp the + * overestimate to the parent table's size. + * + * Note: this code intentionally disregards alignment considerations, + * because (a) that would be gilding the lily considering how crude + * the estimate is, (b) it creates platform dependencies in the + * default plans which are kind of a headache for regression testing, + * and (c) different table AMs might use different padding schemes. + */ + int32 tuple_width; + + tuple_width = get_rel_data_width(rel, attr_widths); + tuple_width += overhead_bytes_per_tuple; + /* note: integer division is intentional here */ + density = usable_bytes_per_page / tuple_width; + } + *tuples = rint(density * (double) curpages); + + /* + * We use relallvisible as-is, rather than scaling it up like we do for + * the pages and tuples counts, on the theory that any pages added since + * the last VACUUM are most likely not marked all-visible. But costsize.c + * wants it converted to a fraction. + */ + if (relallvisible == 0 || curpages <= 0) + *allvisfrac = 0; + else if ((double) relallvisible >= curpages) + *allvisfrac = 1; + else + *allvisfrac = (double) relallvisible / curpages; +} diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index fd07773b74..7edfcf3ef9 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -1727,6 +1727,20 @@ extern void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan); +/* ---------------------------------------------------------------------------- + * Helper functions to implement relation sizing for block oriented AMs. + * ---------------------------------------------------------------------------- + */ + +extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber); +extern void table_block_relation_estimate_size(Relation rel, + int32 *attr_widths, + BlockNumber *pages, + double *tuples, + double *allvisfrac, + Size overhead_bytes_per_tuple, + Size usable_bytes_per_page); + /* ---------------------------------------------------------------------------- * Functions in tableamapi.c * ----------------------------------------------------------------------------