Apply band-aid fix for an oversight in reparameterize_path_by_child.

The path we wish to reparameterize is not a standalone object:
in particular, it implicitly references baserestrictinfo clauses
in the associated RelOptInfo, and if it's a SampleScan path then
there is also the TableSampleClause in the RTE to worry about.
Both of those could contain lateral references to the join partner
relation, which would need to be modified to refer to its child.
Since we aren't doing that, affected queries can give wrong answers,
or odd failures such as "variable not found in subplan target list",
or executor crashes.  But we can't just summarily modify those
expressions, because they are shared with other paths for the rel.
We'd break things if we modify them and then end up using some
non-partitioned-join path.

In HEAD, we plan to fix this by postponing reparameterization
until create_plan(), when we know that those other paths are
no longer of interest, and then adjusting those expressions along
with the ones in the path itself.  That seems like too big a change
for stable branches however.  In the back branches, let's just detect
whether any troublesome lateral references actually exist in those
expressions, and fail reparameterization if so.  This will result in
not performing a partitioned join in such cases.  Given the lack of
field complaints, nobody's likely to miss the optimization.

Report and patch by Richard Guo.  Apply to 12-16 only, since
the intended fix for HEAD looks quite different.  We're not quite
ready to push the HEAD fix, but with back-branch releases coming
up soon, it seems wise to get this stopgap fix in place there.

Discussion: https://postgr.es/m/CAMbWs496+N=UAjOc=rcD3P7B6oJe4rZw08e_TZRUsWbPxZW3Tw@mail.gmail.com
This commit is contained in:
Tom Lane 2024-02-01 12:34:21 -05:00
parent 88be3f076c
commit 62f1202031
3 changed files with 390 additions and 0 deletions

View File

@ -26,6 +26,7 @@
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
@ -56,6 +57,10 @@ static int append_startup_cost_compare(const ListCell *a, const ListCell *b);
static List *reparameterize_pathlist_by_child(PlannerInfo *root,
List *pathlist,
RelOptInfo *child_rel);
static bool contain_references_to(PlannerInfo *root, Node *clause,
Relids relids);
static bool ris_contain_references_to(PlannerInfo *root, List *rinfos,
Relids relids);
/*****************************************************************************
@ -4103,6 +4108,40 @@ do { \
switch (nodeTag(path))
{
case T_Path:
/*
* If the path's restriction clauses contain lateral references to
* the other relation, we can't reparameterize, because we must
* not change the RelOptInfo's contents here. (Doing so would
* break things if we end up using a non-partitionwise join.)
*/
if (ris_contain_references_to(root,
path->parent->baserestrictinfo,
child_rel->top_parent_relids))
return NULL;
/*
* If it's a SampleScan with tablesample parameters referencing
* the other relation, we can't reparameterize, because we must
* not change the RTE's contents here. (Doing so would break
* things if we end up using a non-partitionwise join.)
*/
if (path->pathtype == T_SampleScan)
{
Index scan_relid = path->parent->relid;
RangeTblEntry *rte;
/* it should be a base rel with a tablesample clause... */
Assert(scan_relid > 0);
rte = planner_rt_fetch(scan_relid, root);
Assert(rte->rtekind == RTE_RELATION);
Assert(rte->tablesample != NULL);
if (contain_references_to(root, (Node *) rte->tablesample,
child_rel->top_parent_relids))
return NULL;
}
FLAT_COPY_PATH(new_path, path, Path);
break;
@ -4110,6 +4149,18 @@ do { \
{
IndexPath *ipath;
/*
* If the path's restriction clauses contain lateral
* references to the other relation, we can't reparameterize,
* because we must not change the IndexOptInfo's contents
* here. (Doing so would break things if we end up using a
* non-partitionwise join.)
*/
if (ris_contain_references_to(root,
path->parent->baserestrictinfo,
child_rel->top_parent_relids))
return NULL;
FLAT_COPY_PATH(ipath, path, IndexPath);
ADJUST_CHILD_ATTRS(ipath->indexclauses);
new_path = (Path *) ipath;
@ -4120,6 +4171,18 @@ do { \
{
BitmapHeapPath *bhpath;
/*
* If the path's restriction clauses contain lateral
* references to the other relation, we can't reparameterize,
* because we must not change the RelOptInfo's contents here.
* (Doing so would break things if we end up using a
* non-partitionwise join.)
*/
if (ris_contain_references_to(root,
path->parent->baserestrictinfo,
child_rel->top_parent_relids))
return NULL;
FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
new_path = (Path *) bhpath;
@ -4151,6 +4214,18 @@ do { \
ForeignPath *fpath;
ReparameterizeForeignPathByChild_function rfpc_func;
/*
* If the path's restriction clauses contain lateral
* references to the other relation, we can't reparameterize,
* because we must not change the RelOptInfo's contents here.
* (Doing so would break things if we end up using a
* non-partitionwise join.)
*/
if (ris_contain_references_to(root,
path->parent->baserestrictinfo,
child_rel->top_parent_relids))
return NULL;
FLAT_COPY_PATH(fpath, path, ForeignPath);
if (fpath->fdw_outerpath)
REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
@ -4169,6 +4244,18 @@ do { \
{
CustomPath *cpath;
/*
* If the path's restriction clauses contain lateral
* references to the other relation, we can't reparameterize,
* because we must not change the RelOptInfo's contents here.
* (Doing so would break things if we end up using a
* non-partitionwise join.)
*/
if (ris_contain_references_to(root,
path->parent->baserestrictinfo,
child_rel->top_parent_relids))
return NULL;
FLAT_COPY_PATH(cpath, path, CustomPath);
REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
if (cpath->methods &&
@ -4358,3 +4445,91 @@ reparameterize_pathlist_by_child(PlannerInfo *root,
return result;
}
/*
* contain_references_to
* Detect whether any Vars or PlaceHolderVars in the given clause contain
* lateral references to the given 'relids'.
*/
static bool
contain_references_to(PlannerInfo *root, Node *clause, Relids relids)
{
bool ret = false;
List *vars;
ListCell *lc;
/*
* Examine all Vars and PlaceHolderVars used in the clause.
*
* By omitting the relevant flags, this also gives us a cheap sanity check
* that no aggregates or window functions appear in the clause. We don't
* expect any of those in scan-level restrictions or tablesamples.
*/
vars = pull_var_clause(clause, PVC_INCLUDE_PLACEHOLDERS);
foreach(lc, vars)
{
Node *node = (Node *) lfirst(lc);
if (IsA(node, Var))
{
Var *var = (Var *) node;
if (bms_is_member(var->varno, relids))
{
ret = true;
break;
}
}
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
/*
* We should check both ph_eval_at (in case the PHV is to be
* computed at the other relation and then laterally referenced
* here) and ph_lateral (in case the PHV is to be evaluated here
* but contains lateral references to the other relation). The
* former case should not occur in baserestrictinfo clauses, but
* it can occur in tablesample clauses.
*/
if (bms_overlap(phinfo->ph_eval_at, relids) ||
bms_overlap(phinfo->ph_lateral, relids))
{
ret = true;
break;
}
}
else
Assert(false);
}
list_free(vars);
return ret;
}
/*
* ris_contain_references_to
* Apply contain_references_to() to a list of RestrictInfos.
*
* We need extra code for this because pull_var_clause() can't descend
* through RestrictInfos.
*/
static bool
ris_contain_references_to(PlannerInfo *root, List *rinfos, Relids relids)
{
ListCell *lc;
foreach(lc, rinfos)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
/* Pseudoconstant clauses can't contain any Vars or PHVs */
if (rinfo->pseudoconstant)
continue;
if (contain_references_to(root, (Node *) rinfo->clause, relids))
return true;
}
return false;
}

View File

@ -505,6 +505,99 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
550 | |
(12 rows)
SET max_parallel_workers_per_gather = 0;
-- If there are lateral references to the other relation in sample scan,
-- we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT * FROM prt1 t1 JOIN LATERAL
(SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
ON t1.a = s.a;
QUERY PLAN
---------------------------------------------------------
Nested Loop
-> Append
-> Seq Scan on prt1_p1 t1_1
-> Seq Scan on prt1_p2 t1_2
-> Seq Scan on prt1_p3 t1_3
-> Append
-> Sample Scan on prt1_p1 t2_1
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: (t1.a = a)
-> Sample Scan on prt1_p2 t2_2
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: (t1.a = a)
-> Sample Scan on prt1_p3 t2_3
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: (t1.a = a)
(15 rows)
-- If there are lateral references to the other relation in scan's restriction
-- clauses, we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.a;
QUERY PLAN
---------------------------------------------------------------
Aggregate
-> Nested Loop
-> Append
-> Seq Scan on prt1_p1 t1_1
-> Seq Scan on prt1_p2 t1_2
-> Seq Scan on prt1_p3 t1_3
-> Append
-> Index Scan using iprt2_p1_b on prt2_p1 t2_1
Index Cond: (b = t1.a)
Filter: (t1.b = a)
-> Index Scan using iprt2_p2_b on prt2_p2 t2_2
Index Cond: (b = t1.a)
Filter: (t1.b = a)
-> Index Scan using iprt2_p3_b on prt2_p3 t2_3
Index Cond: (b = t1.a)
Filter: (t1.b = a)
(16 rows)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.a;
count
-------
100
(1 row)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.b;
QUERY PLAN
--------------------------------------------------------------------
Aggregate
-> Nested Loop
-> Append
-> Seq Scan on prt1_p1 t1_1
-> Seq Scan on prt1_p2 t1_2
-> Seq Scan on prt1_p3 t1_3
-> Append
-> Index Only Scan using iprt2_p1_b on prt2_p1 t2_1
Index Cond: (b = t1.a)
Filter: (b = t1.b)
-> Index Only Scan using iprt2_p2_b on prt2_p2 t2_2
Index Cond: (b = t1.a)
Filter: (b = t1.b)
-> Index Only Scan using iprt2_p3_b on prt2_p3 t2_3
Index Cond: (b = t1.a)
Filter: (b = t1.b)
(16 rows)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.b;
count
-------
5
(1 row)
RESET max_parallel_workers_per_gather;
-- bug with inadequate sort key representation
SET enable_partitionwise_aggregate TO true;
SET enable_hashjoin TO false;
@ -1944,6 +2037,80 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
550 | 0 | 0002 | | | | |
(12 rows)
SET max_parallel_workers_per_gather = 0;
-- If there are lateral references to the other relation in sample scan,
-- we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT * FROM prt1_l t1 JOIN LATERAL
(SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
QUERY PLAN
----------------------------------------------------------------------------------
Nested Loop
-> Append
-> Seq Scan on prt1_l_p1 t1_1
-> Seq Scan on prt1_l_p2_p1 t1_2
-> Seq Scan on prt1_l_p2_p2 t1_3
-> Seq Scan on prt1_l_p3_p1 t1_4
-> Seq Scan on prt1_l_p3_p2 t1_5
-> Append
-> Sample Scan on prt1_l_p1 t2_1
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
-> Sample Scan on prt1_l_p2_p1 t2_2
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
-> Sample Scan on prt1_l_p2_p2 t2_3
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
-> Sample Scan on prt1_l_p3_p1 t2_4
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
-> Sample Scan on prt1_l_p3_p2 t2_5
Sampling: system (t1.a) REPEATABLE (t1.b)
Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
(23 rows)
-- If there are lateral references to the other relation in scan's restriction
-- clauses, we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
WHERE s.t1b = s.a;
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Aggregate
-> Nested Loop
-> Append
-> Seq Scan on prt1_l_p1 t1_1
-> Seq Scan on prt1_l_p2_p1 t1_2
-> Seq Scan on prt1_l_p2_p2 t1_3
-> Seq Scan on prt1_l_p3_p1 t1_4
-> Seq Scan on prt1_l_p3_p2 t1_5
-> Append
-> Seq Scan on prt2_l_p1 t2_1
Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
-> Seq Scan on prt2_l_p2_p1 t2_2
Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
-> Seq Scan on prt2_l_p2_p2 t2_3
Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
-> Seq Scan on prt2_l_p3_p1 t2_4
Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
-> Seq Scan on prt2_l_p3_p2 t2_5
Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
(19 rows)
SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
WHERE s.t1b = s.a;
count
-------
100
(1 row)
RESET max_parallel_workers_per_gather;
-- join with one side empty
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;

View File

@ -100,6 +100,33 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
SET max_parallel_workers_per_gather = 0;
-- If there are lateral references to the other relation in sample scan,
-- we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT * FROM prt1 t1 JOIN LATERAL
(SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
ON t1.a = s.a;
-- If there are lateral references to the other relation in scan's restriction
-- clauses, we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.a;
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.a;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.b;
SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
ON t1.a = s.b WHERE s.t1b = s.b;
RESET max_parallel_workers_per_gather;
-- bug with inadequate sort key representation
SET enable_partitionwise_aggregate TO true;
SET enable_hashjoin TO false;
@ -387,6 +414,27 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
(SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
SET max_parallel_workers_per_gather = 0;
-- If there are lateral references to the other relation in sample scan,
-- we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT * FROM prt1_l t1 JOIN LATERAL
(SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
-- If there are lateral references to the other relation in scan's restriction
-- clauses, we cannot generate a partitionwise join.
EXPLAIN (COSTS OFF)
SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
WHERE s.t1b = s.a;
SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
(SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
WHERE s.t1b = s.a;
RESET max_parallel_workers_per_gather;
-- join with one side empty
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;