postgresql/src/backend/executor/execAmi.c

628 lines
16 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* execAmi.c
* miscellaneous executor access method routines
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
2010-09-20 22:08:53 +02:00
* src/backend/executor/execAmi.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/amapi.h"
#include "access/htup_details.h"
1999-07-16 07:00:38 +02:00
#include "executor/execdebug.h"
#include "executor/nodeAgg.h"
#include "executor/nodeAppend.h"
#include "executor/nodeBitmapAnd.h"
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeBitmapIndexscan.h"
#include "executor/nodeBitmapOr.h"
#include "executor/nodeCtescan.h"
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
#include "executor/nodeFunctionscan.h"
#include "executor/nodeGather.h"
#include "executor/nodeGatherMerge.h"
1999-07-16 07:00:38 +02:00
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeLimit.h"
#include "executor/nodeLockRows.h"
#include "executor/nodeMaterial.h"
#include "executor/nodeMergeAppend.h"
1999-07-16 07:00:38 +02:00
#include "executor/nodeMergejoin.h"
#include "executor/nodeModifyTable.h"
#include "executor/nodeNamedtuplestorescan.h"
#include "executor/nodeNestloop.h"
Move targetlist SRF handling from expression evaluation to new executor node. Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT generate_series(1,5)) so far was done in the expression evaluation (i.e. ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code. This meant that most executor nodes performing projection, and most expression evaluation functions, had to deal with the possibility that an evaluated expression could return a set of return values. That's bad because it leads to repeated code in a lot of places. It also, and that's my (Andres's) motivation, made it a lot harder to implement a more efficient way of doing expression evaluation. To fix this, introduce a new executor node (ProjectSet) that can evaluate targetlists containing one or more SRFs. To avoid the complexity of the old way of handling nested expressions returning sets (e.g. having to pass up ExprDoneCond, and dealing with arguments to functions returning sets etc.), those SRFs can only be at the top level of the node's targetlist. The planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is only necessary in ProjectSet nodes and that SRFs are only present at the top level of the node's targetlist. If there are nested SRFs the planner creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get input from an underlying node. We also discussed and prototyped evaluating targetlist SRFs using ROWS FROM(), but that turned out to be more complicated than we'd hoped. While moving SRF evaluation to ProjectSet would allow to retain the old "least common multiple" behavior when multiple SRFs are present in one targetlist (i.e. continue returning rows until all SRFs are at the end of their input at the same time), we decided to instead only return rows till all SRFs are exhausted, returning NULL for already exhausted ones. We deemed the previous behavior to be too confusing, unexpected and actually not particularly useful. As a side effect, the previously prohibited case of multiple set returning arguments to a function, is now allowed. Not because it's particularly desirable, but because it ends up working and there seems to be no argument for adding code to prohibit it. Currently the behavior for COALESCE and CASE containing SRFs has changed, returning multiple rows from the expression, even when the SRF containing "arm" of the expression is not evaluated. That's because the SRFs are evaluated in a separate ProjectSet node. As that's quite confusing, we're likely to instead prohibit SRFs in those places. But that's still being discussed, and the code would reside in places not touched here, so that's a task for later. There's a lot of, now superfluous, code dealing with set return expressions around. But as the changes to get rid of those are verbose largely boring, it seems better for readability to keep the cleanup as a separate commit. Author: Tom Lane and Andres Freund Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
#include "executor/nodeProjectSet.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
#include "executor/nodeSamplescan.h"
1999-07-16 07:00:38 +02:00
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
1999-07-16 07:00:38 +02:00
#include "executor/nodeSort.h"
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTableFuncscan.h"
#include "executor/nodeTidscan.h"
1999-07-16 07:00:38 +02:00
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
#include "executor/nodeWindowAgg.h"
#include "executor/nodeWorktablescan.h"
#include "nodes/extensible.h"
#include "nodes/nodeFuncs.h"
#include "nodes/pathnodes.h"
#include "utils/rel.h"
#include "utils/syscache.h"
static bool IndexSupportsBackwardScan(Oid indexid);
/*
* ExecReScan
* Reset a plan node so that its output can be re-scanned.
*
* Note that if the plan node has parameters that have changed value,
* the output might be different from last time.
*/
void
ExecReScan(PlanState *node)
{
/* If collecting timing stats, update them */
if (node->instrument)
InstrEndLoop(node->instrument);
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-10 03:17:29 +02:00
/*
* If we have changed parameters, propagate that info.
*
* Note: ExecReScanSetParamPlan() can add bits to node->chgParam,
* corresponding to the output param(s) that the InitPlan will update.
* Since we make only one pass over the list, that means that an InitPlan
* can depend on the output param(s) of a sibling InitPlan only if that
* sibling appears earlier in the list. This is workable for now given
* the limited ways in which one InitPlan could depend on another, but
* eventually we might need to work harder (or else make the planner
* enlarge the extParam/allParam sets to include the params of depended-on
* InitPlans).
*/
if (node->chgParam != NULL)
{
2004-08-29 07:07:03 +02:00
ListCell *l;
foreach(l, node->initPlan)
{
SubPlanState *sstate = (SubPlanState *) lfirst(l);
PlanState *splan = sstate->planstate;
if (splan->plan->extParam != NULL) /* don't care about child
* local Params */
UpdateChangedParamSet(splan, node->chgParam);
if (splan->chgParam != NULL)
ExecReScanSetParamPlan(sstate, node);
}
foreach(l, node->subPlan)
{
SubPlanState *sstate = (SubPlanState *) lfirst(l);
PlanState *splan = sstate->planstate;
if (splan->plan->extParam != NULL)
UpdateChangedParamSet(splan, node->chgParam);
}
/* Well. Now set chgParam for left/right trees. */
if (node->lefttree != NULL)
UpdateChangedParamSet(node->lefttree, node->chgParam);
if (node->righttree != NULL)
UpdateChangedParamSet(node->righttree, node->chgParam);
}
/* Call expression callbacks */
if (node->ps_ExprContext)
ReScanExprContext(node->ps_ExprContext);
/* And do node-type-specific processing */
switch (nodeTag(node))
{
case T_ResultState:
ExecReScanResult((ResultState *) node);
break;
Move targetlist SRF handling from expression evaluation to new executor node. Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT generate_series(1,5)) so far was done in the expression evaluation (i.e. ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code. This meant that most executor nodes performing projection, and most expression evaluation functions, had to deal with the possibility that an evaluated expression could return a set of return values. That's bad because it leads to repeated code in a lot of places. It also, and that's my (Andres's) motivation, made it a lot harder to implement a more efficient way of doing expression evaluation. To fix this, introduce a new executor node (ProjectSet) that can evaluate targetlists containing one or more SRFs. To avoid the complexity of the old way of handling nested expressions returning sets (e.g. having to pass up ExprDoneCond, and dealing with arguments to functions returning sets etc.), those SRFs can only be at the top level of the node's targetlist. The planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is only necessary in ProjectSet nodes and that SRFs are only present at the top level of the node's targetlist. If there are nested SRFs the planner creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get input from an underlying node. We also discussed and prototyped evaluating targetlist SRFs using ROWS FROM(), but that turned out to be more complicated than we'd hoped. While moving SRF evaluation to ProjectSet would allow to retain the old "least common multiple" behavior when multiple SRFs are present in one targetlist (i.e. continue returning rows until all SRFs are at the end of their input at the same time), we decided to instead only return rows till all SRFs are exhausted, returning NULL for already exhausted ones. We deemed the previous behavior to be too confusing, unexpected and actually not particularly useful. As a side effect, the previously prohibited case of multiple set returning arguments to a function, is now allowed. Not because it's particularly desirable, but because it ends up working and there seems to be no argument for adding code to prohibit it. Currently the behavior for COALESCE and CASE containing SRFs has changed, returning multiple rows from the expression, even when the SRF containing "arm" of the expression is not evaluated. That's because the SRFs are evaluated in a separate ProjectSet node. As that's quite confusing, we're likely to instead prohibit SRFs in those places. But that's still being discussed, and the code would reside in places not touched here, so that's a task for later. There's a lot of, now superfluous, code dealing with set return expressions around. But as the changes to get rid of those are verbose largely boring, it seems better for readability to keep the cleanup as a separate commit. Author: Tom Lane and Andres Freund Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
case T_ProjectSetState:
ExecReScanProjectSet((ProjectSetState *) node);
break;
case T_ModifyTableState:
ExecReScanModifyTable((ModifyTableState *) node);
break;
case T_AppendState:
ExecReScanAppend((AppendState *) node);
break;
case T_MergeAppendState:
ExecReScanMergeAppend((MergeAppendState *) node);
break;
case T_RecursiveUnionState:
ExecReScanRecursiveUnion((RecursiveUnionState *) node);
break;
case T_BitmapAndState:
ExecReScanBitmapAnd((BitmapAndState *) node);
break;
case T_BitmapOrState:
ExecReScanBitmapOr((BitmapOrState *) node);
break;
case T_SeqScanState:
ExecReScanSeqScan((SeqScanState *) node);
break;
case T_SampleScanState:
ExecReScanSampleScan((SampleScanState *) node);
break;
case T_GatherState:
ExecReScanGather((GatherState *) node);
break;
case T_GatherMergeState:
ExecReScanGatherMerge((GatherMergeState *) node);
break;
case T_IndexScanState:
ExecReScanIndexScan((IndexScanState *) node);
break;
case T_IndexOnlyScanState:
ExecReScanIndexOnlyScan((IndexOnlyScanState *) node);
break;
case T_BitmapIndexScanState:
ExecReScanBitmapIndexScan((BitmapIndexScanState *) node);
break;
case T_BitmapHeapScanState:
ExecReScanBitmapHeapScan((BitmapHeapScanState *) node);
break;
case T_TidScanState:
ExecReScanTidScan((TidScanState *) node);
break;
case T_SubqueryScanState:
ExecReScanSubqueryScan((SubqueryScanState *) node);
break;
case T_FunctionScanState:
ExecReScanFunctionScan((FunctionScanState *) node);
break;
case T_TableFuncScanState:
ExecReScanTableFuncScan((TableFuncScanState *) node);
break;
case T_ValuesScanState:
ExecReScanValuesScan((ValuesScanState *) node);
break;
case T_CteScanState:
ExecReScanCteScan((CteScanState *) node);
break;
case T_NamedTuplestoreScanState:
ExecReScanNamedTuplestoreScan((NamedTuplestoreScanState *) node);
break;
case T_WorkTableScanState:
ExecReScanWorkTableScan((WorkTableScanState *) node);
break;
case T_ForeignScanState:
ExecReScanForeignScan((ForeignScanState *) node);
break;
case T_CustomScanState:
ExecReScanCustomScan((CustomScanState *) node);
break;
case T_NestLoopState:
ExecReScanNestLoop((NestLoopState *) node);
break;
case T_MergeJoinState:
ExecReScanMergeJoin((MergeJoinState *) node);
break;
case T_HashJoinState:
ExecReScanHashJoin((HashJoinState *) node);
break;
case T_MaterialState:
ExecReScanMaterial((MaterialState *) node);
1998-07-16 03:49:19 +02:00
break;
case T_SortState:
ExecReScanSort((SortState *) node);
break;
case T_GroupState:
ExecReScanGroup((GroupState *) node);
1998-02-23 07:28:16 +01:00
break;
case T_AggState:
ExecReScanAgg((AggState *) node);
break;
case T_WindowAggState:
ExecReScanWindowAgg((WindowAggState *) node);
break;
case T_UniqueState:
ExecReScanUnique((UniqueState *) node);
break;
case T_HashState:
ExecReScanHash((HashState *) node);
1998-02-23 07:28:16 +01:00
break;
case T_SetOpState:
ExecReScanSetOp((SetOpState *) node);
break;
case T_LockRowsState:
ExecReScanLockRows((LockRowsState *) node);
break;
case T_LimitState:
ExecReScanLimit((LimitState *) node);
1998-07-16 00:16:21 +02:00
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
break;
}
if (node->chgParam != NULL)
{
bms_free(node->chgParam);
node->chgParam = NULL;
}
}
/*
* ExecMarkPos
*
* Marks the current scan position.
*
* NOTE: mark/restore capability is currently needed only for plan nodes
* that are the immediate inner child of a MergeJoin node. Since MergeJoin
* requires sorted input, there is never any need to support mark/restore in
* node types that cannot produce sorted output. There are some cases in
* which a node can pass through sorted data from its child; if we don't
* implement mark/restore for such a node type, the planner compensates by
* inserting a Material node above that node.
*/
void
ExecMarkPos(PlanState *node)
{
switch (nodeTag(node))
{
case T_IndexScanState:
ExecIndexMarkPos((IndexScanState *) node);
break;
case T_IndexOnlyScanState:
ExecIndexOnlyMarkPos((IndexOnlyScanState *) node);
break;
case T_CustomScanState:
ExecCustomMarkPos((CustomScanState *) node);
break;
case T_MaterialState:
ExecMaterialMarkPos((MaterialState *) node);
break;
case T_SortState:
ExecSortMarkPos((SortState *) node);
break;
case T_ResultState:
ExecResultMarkPos((ResultState *) node);
break;
default:
/* don't make hard error unless caller asks to restore... */
elog(DEBUG2, "unrecognized node type: %d", (int) nodeTag(node));
break;
}
}
/*
* ExecRestrPos
*
* restores the scan position previously saved with ExecMarkPos()
*
* NOTE: the semantics of this are that the first ExecProcNode following
* the restore operation will yield the same tuple as the first one following
* the mark operation. It is unspecified what happens to the plan node's
* result TupleTableSlot. (In most cases the result slot is unchanged by
* a restore, but the node may choose to clear it or to load it with the
2005-10-15 04:49:52 +02:00
* restored-to tuple.) Hence the caller should discard any previously
* returned TupleTableSlot after doing a restore.
*/
void
ExecRestrPos(PlanState *node)
{
switch (nodeTag(node))
{
case T_IndexScanState:
ExecIndexRestrPos((IndexScanState *) node);
break;
case T_IndexOnlyScanState:
ExecIndexOnlyRestrPos((IndexOnlyScanState *) node);
break;
case T_CustomScanState:
ExecCustomRestrPos((CustomScanState *) node);
break;
case T_MaterialState:
ExecMaterialRestrPos((MaterialState *) node);
break;
case T_SortState:
ExecSortRestrPos((SortState *) node);
break;
case T_ResultState:
ExecResultRestrPos((ResultState *) node);
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
break;
}
}
/*
* ExecSupportsMarkRestore - does a Path support mark/restore?
*
* This is used during planning and so must accept a Path, not a Plan.
* We keep it here to be adjacent to the routines above, which also must
* know which plan types support mark/restore.
*/
bool
ExecSupportsMarkRestore(Path *pathnode)
{
/*
* For consistency with the routines above, we do not examine the nodeTag
* but rather the pathtype, which is the Plan node type the Path would
* produce.
*/
switch (pathnode->pathtype)
{
case T_IndexScan:
case T_IndexOnlyScan:
case T_Material:
case T_Sort:
return true;
case T_CustomScan:
{
CustomPath *customPath = castNode(CustomPath, pathnode);
if (customPath->flags & CUSTOMPATH_SUPPORT_MARK_RESTORE)
return true;
return false;
}
case T_Result:
2007-11-15 22:14:46 +01:00
/*
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
* Result supports mark/restore iff it has a child plan that does.
*
* We have to be careful here because there is more than one Path
* type that can produce a Result plan node.
*/
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
if (IsA(pathnode, ProjectionPath))
return ExecSupportsMarkRestore(((ProjectionPath *) pathnode)->subpath);
else if (IsA(pathnode, MinMaxAggPath))
return false; /* childless Result */
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
else if (IsA(pathnode, GroupResultPath))
return false; /* childless Result */
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
else
{
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
/* Simple RTE_RESULT base relation */
Assert(IsA(pathnode, Path));
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
return false; /* childless Result */
}
case T_Append:
{
AppendPath *appendPath = castNode(AppendPath, pathnode);
/*
* If there's exactly one child, then there will be no Append
* in the final plan, so we can handle mark/restore if the
* child plan node can.
*/
if (list_length(appendPath->subpaths) == 1)
return ExecSupportsMarkRestore((Path *) linitial(appendPath->subpaths));
/* Otherwise, Append can't handle it */
return false;
}
case T_MergeAppend:
{
MergeAppendPath *mapath = castNode(MergeAppendPath, pathnode);
/*
* Like the Append case above, single-subpath MergeAppends
* won't be in the final plan, so just return the child's
* mark/restore ability.
*/
if (list_length(mapath->subpaths) == 1)
return ExecSupportsMarkRestore((Path *) linitial(mapath->subpaths));
/* Otherwise, MergeAppend can't handle it */
return false;
}
default:
break;
}
return false;
}
/*
* ExecSupportsBackwardScan - does a plan type support backwards scanning?
*
* Ideally, all plan types would support backwards scan, but that seems
* unlikely to happen soon. In some cases, a plan node passes the backwards
* scan down to its children, and so supports backwards scan only if its
* children do. Therefore, this routine must be passed a complete plan tree.
*/
bool
ExecSupportsBackwardScan(Plan *node)
{
if (node == NULL)
return false;
/*
2016-06-10 00:02:36 +02:00
* Parallel-aware nodes return a subset of the tuples in each worker, and
* in general we can't expect to have enough bookkeeping state to know
* which ones we returned in this worker as opposed to some other worker.
*/
if (node->parallel_aware)
return false;
switch (nodeTag(node))
{
case T_Result:
if (outerPlan(node) != NULL)
return ExecSupportsBackwardScan(outerPlan(node));
else
return false;
case T_Append:
{
ListCell *l;
2003-08-04 02:43:34 +02:00
foreach(l, ((Append *) node)->appendplans)
{
if (!ExecSupportsBackwardScan((Plan *) lfirst(l)))
return false;
}
/* need not check tlist because Append doesn't evaluate it */
2003-08-04 02:43:34 +02:00
return true;
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_SampleScan:
/* Simplify life for tablesample methods by disallowing this */
return false;
case T_Gather:
return false;
case T_IndexScan:
return IndexSupportsBackwardScan(((IndexScan *) node)->indexid);
case T_IndexOnlyScan:
return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid);
case T_SubqueryScan:
return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan);
case T_CustomScan:
{
uint32 flags = ((CustomScan *) node)->flags;
if (flags & CUSTOMPATH_SUPPORT_BACKWARD_SCAN)
return true;
}
return false;
case T_SeqScan:
case T_TidScan:
case T_FunctionScan:
case T_ValuesScan:
case T_CteScan:
case T_Material:
case T_Sort:
return true;
case T_LockRows:
case T_Limit:
return ExecSupportsBackwardScan(outerPlan(node));
default:
return false;
}
}
/*
* An IndexScan or IndexOnlyScan node supports backward scan only if the
* index's AM does.
*/
static bool
IndexSupportsBackwardScan(Oid indexid)
{
bool result;
HeapTuple ht_idxrel;
Form_pg_class idxrelrec;
IndexAmRoutine *amroutine;
/* Fetch the pg_class tuple of the index relation */
ht_idxrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indexid));
if (!HeapTupleIsValid(ht_idxrel))
elog(ERROR, "cache lookup failed for relation %u", indexid);
idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel);
/* Fetch the index AM's API struct */
amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false);
result = amroutine->amcanbackward;
pfree(amroutine);
ReleaseSysCache(ht_idxrel);
return result;
}
/*
* ExecMaterializesOutput - does a plan type materialize its output?
*
* Returns true if the plan node type is one that automatically materializes
* its output (typically by keeping it in a tuplestore). For such plans,
* a rescan without any parameter change will have zero startup cost and
* very low per-tuple cost.
*/
bool
ExecMaterializesOutput(NodeTag plantype)
{
switch (plantype)
{
case T_Material:
case T_FunctionScan:
case T_TableFuncScan:
case T_CteScan:
case T_NamedTuplestoreScan:
case T_WorkTableScan:
case T_Sort:
return true;
default:
break;
}
return false;
}