Propagate pathkeys from CTEs up to the outer query.

If we know the sort order of a CTE's output, and it is relevant
to the outer query, label the CTE's outer-query access path using
those pathkeys.  This may enable optimizations such as avoiding
a sort in the outer query.

The code for hoisting pathkeys into the outer query already exists
for regular RTE_SUBQUERY subqueries, but it wasn't getting used for
CTEs, possibly out of concern for maintaining an optimization fence
between the CTE and the outer query.  However, on the same arguments
used for commit f7816aec2, there seems no harm in letting the outer
query know what the inner query decided to do.

In support of this, we now remember the best Path as well as Plan
for each subquery for the rest of the planner run.  There may be
future applications for having that at hand, and it surely costs
little to build one more List.

Richard Guo (minor mods by me)

Discussion: https://postgr.es/m/CAMbWs49xYd3f8CrE8-WW3--dV1zH_sDSDn-vs2DzHj81Wcnsew@mail.gmail.com
This commit is contained in:
Tom Lane 2024-03-26 13:05:49 -04:00
parent e648e77e25
commit a65724dfa7
8 changed files with 64 additions and 15 deletions

View File

@ -2872,16 +2872,19 @@ set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
static void
set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Path *ctepath;
Plan *cteplan;
PlannerInfo *cteroot;
Index levelsup;
List *pathkeys;
int ndx;
ListCell *lc;
int plan_id;
Relids required_outer;
/*
* Find the referenced CTE, and locate the plan previously made for it.
* Find the referenced CTE, and locate the path and plan previously made
* for it.
*/
levelsup = rte->ctelevelsup;
cteroot = root;
@ -2913,11 +2916,20 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
if (plan_id <= 0)
elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
Assert(list_length(root->glob->subpaths) == list_length(root->glob->subplans));
ctepath = (Path *) list_nth(root->glob->subpaths, plan_id - 1);
cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan->plan_rows);
/* Convert the ctepath's pathkeys to outer query's representation */
pathkeys = convert_subquery_pathkeys(root,
rel,
ctepath->pathkeys,
cteplan->targetlist);
/*
* We don't support pushing join clauses into the quals of a CTE scan, but
* it could still have required parameterization due to LATERAL refs in
@ -2926,7 +2938,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_ctescan_path(root, rel, required_outer));
add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer));
}
/*

View File

@ -306,6 +306,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
glob->boundParams = boundParams;
glob->subplans = NIL;
glob->subpaths = NIL;
glob->subroots = NIL;
glob->rewindPlanIDs = NULL;
glob->finalrtable = NIL;

View File

@ -65,8 +65,8 @@ typedef struct inline_cte_walker_context
} inline_cte_walker_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
static Node *build_subplan(PlannerInfo *root, Plan *plan, Path *path,
PlannerInfo *subroot, List *plan_params,
SubLinkType subLinkType, int subLinkId,
Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse);
@ -236,7 +236,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
plan = create_plan(subroot, best_path);
/* And convert to SubPlan or InitPlan format. */
result = build_subplan(root, plan, subroot, plan_params,
result = build_subplan(root, plan, best_path,
subroot, plan_params,
subLinkType, subLinkId,
testexpr, NIL, isTopQual);
@ -288,8 +289,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* ... and convert to SubPlan format */
hashplan = castNode(SubPlan,
build_subplan(root, plan, subroot,
plan_params,
build_subplan(root, plan, best_path,
subroot, plan_params,
ANY_SUBLINK, 0,
newtestexpr,
paramIds,
@ -317,8 +318,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
* make it an InitPlan, as explained in the comments for make_subplan.
*/
static Node *
build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
build_subplan(PlannerInfo *root, Plan *plan, Path *path,
PlannerInfo *subroot, List *plan_params,
SubLinkType subLinkType, int subLinkId,
Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse)
@ -539,9 +540,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
}
/*
* Add the subplan and its PlannerInfo to the global lists.
* Add the subplan, its path, and its PlannerInfo to the global lists.
*/
root->glob->subplans = lappend(root->glob->subplans, plan);
root->glob->subpaths = lappend(root->glob->subpaths, path);
root->glob->subroots = lappend(root->glob->subroots, subroot);
splan->plan_id = list_length(root->glob->subplans);
@ -1029,9 +1031,10 @@ SS_process_ctes(PlannerInfo *root)
splan->setParam = list_make1_int(paramid);
/*
* Add the subplan and its PlannerInfo to the global lists.
* Add the subplan, its path, and its PlannerInfo to the global lists.
*/
root->glob->subplans = lappend(root->glob->subplans, plan);
root->glob->subpaths = lappend(root->glob->subpaths, best_path);
root->glob->subroots = lappend(root->glob->subroots, subroot);
splan->plan_id = list_length(root->glob->subplans);
@ -3021,9 +3024,14 @@ SS_make_initplan_from_plan(PlannerInfo *root,
SubPlan *node;
/*
* Add the subplan and its PlannerInfo to the global lists.
* Add the subplan and its PlannerInfo, as well as a dummy path entry, to
* the global lists. Ideally we'd save a real path, but right now our
* sole caller doesn't build a path that exactly matches the plan. Since
* we're not currently going to need the path for an initplan, it's not
* worth requiring construction of such a path.
*/
root->glob->subplans = lappend(root->glob->subplans, plan);
root->glob->subpaths = lappend(root->glob->subpaths, NULL);
root->glob->subroots = lappend(root->glob->subroots, subroot);
/*

View File

@ -2121,7 +2121,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
* returning the pathnode.
*/
Path *
create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
List *pathkeys, Relids required_outer)
{
Path *pathnode = makeNode(Path);
@ -2133,7 +2134,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
pathnode->parallel_aware = false;
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
pathnode->pathkeys = pathkeys;
cost_ctescan(pathnode, root, rel, pathnode->param_info);

View File

@ -104,6 +104,9 @@ typedef struct PlannerGlobal
/* Plans for SubPlan nodes */
List *subplans;
/* Paths from which the SubPlan Plans were made */
List *subpaths;
/* PlannerInfos for SubPlan nodes */
List *subroots pg_node_attr(read_write_ignore);

View File

@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
List *pathkeys, Relids required_outer);
extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,

View File

@ -672,6 +672,23 @@ select count(*) from tenk1 a
Index Cond: (unique1 = x.unique1)
(10 rows)
-- test that pathkeys from a materialized CTE are propagated up to the
-- outer query
explain (costs off)
with x as materialized (select unique1 from tenk1 b order by unique1)
select count(*) from tenk1 a
where unique1 in (select * from x);
QUERY PLAN
------------------------------------------------------------
Aggregate
CTE x
-> Index Only Scan using tenk1_unique1 on tenk1 b
-> Merge Semi Join
Merge Cond: (a.unique1 = x.unique1)
-> Index Only Scan using tenk1_unique1 on tenk1 a
-> CTE Scan on x
(7 rows)
-- SEARCH clause
create temp table graph0( f int, t int, label text );
insert into graph0 values

View File

@ -359,6 +359,13 @@ with x as materialized (insert into tenk1 default values returning unique1)
select count(*) from tenk1 a
where unique1 in (select * from x);
-- test that pathkeys from a materialized CTE are propagated up to the
-- outer query
explain (costs off)
with x as materialized (select unique1 from tenk1 b order by unique1)
select count(*) from tenk1 a
where unique1 in (select * from x);
-- SEARCH clause
create temp table graph0( f int, t int, label text );