Generate EquivalenceClass members for partitionwise child join rels.

Commit d25ea0127 got rid of what I thought were entirely unnecessary
derived child expressions in EquivalenceClasses for EC members that
mention multiple baserels.  But it turns out that some of the child
expressions that code created are necessary for partitionwise joins,
else we fail to find matching pathkeys for Sort nodes.  (This happens
only for certain shapes of the resulting plan; it may be that
partitionwise aggregation is also necessary to show the failure,
though I'm not sure of that.)

Reverting that commit entirely would be quite painful performance-wise
for large partition sets.  So instead, add code that explicitly
generates child expressions that match only partitionwise child join
rels we have actually generated.

Per report from Justin Pryzby.  (Amit Langote noticed the problem
earlier, though it's not clear if he recognized then that it could
result in a planner error, not merely failure to exploit partitionwise
join, in the code as-committed.)  Back-patch to v12 where commit
d25ea0127 came in.

Amit Langote, with lots of kibitzing from me

Discussion: https://postgr.es/m/CA+HiwqG2WVUGmLJqtR0tPFhniO=H=9qQ+Z3L_ZC+Y3-EVQHFGg@mail.gmail.com
Discussion: https://postgr.es/m/20191011143703.GN10470@telsasoft.com
This commit is contained in:
Tom Lane 2019-11-05 11:42:24 -05:00
parent 2a4d96ebbd
commit 529ebb20aa
5 changed files with 259 additions and 14 deletions

View File

@ -1132,7 +1132,7 @@ generate_join_implied_equalities(PlannerInfo *root,
Relids inner_relids = inner_rel->relids;
Relids nominal_inner_relids;
Relids nominal_join_relids;
Bitmapset * matching_ecs;
Bitmapset *matching_ecs;
int i;
/* If inner rel is a child, extra setup work is needed */
@ -2209,7 +2209,7 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
/*
* add_child_rel_equivalences
* Search for EC members that reference the parent_rel, and
* Search for EC members that reference the root parent of child_rel, and
* add transformed members referencing the child_rel.
*
* Note that this function won't be called at all unless we have at least some
@ -2217,6 +2217,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
*
* parent_rel and child_rel could be derived from appinfo, but since the
* caller has already computed them, we might as well just pass them in.
*
* The passed-in AppendRelInfo is not used when the parent_rel is not a
* top-level baserel, since it shows the mapping from the parent_rel but
* we need to translate EC expressions that refer to the top-level parent.
* Using it is faster than using adjust_appendrel_attrs_multilevel(), though,
* so we prefer it when we can.
*/
void
add_child_rel_equivalences(PlannerInfo *root,
@ -2224,6 +2230,8 @@ add_child_rel_equivalences(PlannerInfo *root,
RelOptInfo *parent_rel,
RelOptInfo *child_rel)
{
Relids top_parent_relids = child_rel->top_parent_relids;
Relids child_relids = child_rel->relids;
int i;
/*
@ -2248,7 +2256,7 @@ add_child_rel_equivalences(PlannerInfo *root,
continue;
/* Sanity check eclass_indexes only contain ECs for parent_rel */
Assert(bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids));
Assert(bms_is_subset(top_parent_relids, cur_ec->ec_relids));
/*
* We don't use foreach() here because there's no point in scanning
@ -2268,13 +2276,14 @@ add_child_rel_equivalences(PlannerInfo *root,
* already-transformed child members. Otherwise, if some original
* member expression references more than one appendrel, we'd get
* an O(N^2) explosion of useless derived expressions for
* combinations of children.
* combinations of children. (But add_child_join_rel_equivalences
* may add targeted combinations for partitionwise-join purposes.)
*/
if (cur_em->em_is_child)
continue; /* ignore children here */
/* Does this member reference child's topmost parent rel? */
if (bms_overlap(cur_em->em_relids, child_rel->top_parent_relids))
if (bms_overlap(cur_em->em_relids, top_parent_relids))
{
/* Yes, generate transformed child version */
Expr *child_expr;
@ -2295,8 +2304,8 @@ add_child_rel_equivalences(PlannerInfo *root,
child_expr = (Expr *)
adjust_appendrel_attrs_multilevel(root,
(Node *) cur_em->em_expr,
child_rel->relids,
child_rel->top_parent_relids);
child_relids,
top_parent_relids);
}
/*
@ -2306,21 +2315,20 @@ add_child_rel_equivalences(PlannerInfo *root,
* don't want the child member to be marked as constant.
*/
new_relids = bms_difference(cur_em->em_relids,
child_rel->top_parent_relids);
new_relids = bms_add_members(new_relids, child_rel->relids);
top_parent_relids);
new_relids = bms_add_members(new_relids, child_relids);
/*
* And likewise for nullable_relids. Note this code assumes
* parent and child relids are singletons.
*/
new_nullable_relids = cur_em->em_nullable_relids;
if (bms_overlap(new_nullable_relids,
child_rel->top_parent_relids))
if (bms_overlap(new_nullable_relids, top_parent_relids))
{
new_nullable_relids = bms_difference(new_nullable_relids,
child_rel->top_parent_relids);
top_parent_relids);
new_nullable_relids = bms_add_members(new_nullable_relids,
child_rel->relids);
child_relids);
}
(void) add_eq_member(cur_ec, child_expr,
@ -2334,6 +2342,133 @@ add_child_rel_equivalences(PlannerInfo *root,
}
}
/*
* add_child_join_rel_equivalences
* Like add_child_rel_equivalences(), but for joinrels
*
* Here we find the ECs relevant to the top parent joinrel and add transformed
* member expressions that refer to this child joinrel.
*
* Note that this function won't be called at all unless we have at least some
* reason to believe that the EC members it generates will be useful.
*/
void
add_child_join_rel_equivalences(PlannerInfo *root,
int nappinfos, AppendRelInfo **appinfos,
RelOptInfo *parent_joinrel,
RelOptInfo *child_joinrel)
{
Relids top_parent_relids = child_joinrel->top_parent_relids;
Relids child_relids = child_joinrel->relids;
Bitmapset *matching_ecs;
int i;
Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel));
/* We need consider only ECs that mention the parent joinrel */
matching_ecs = get_eclass_indexes_for_relids(root, top_parent_relids);
i = -1;
while ((i = bms_next_member(matching_ecs, i)) >= 0)
{
EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
int num_members;
/*
* If this EC contains a volatile expression, then generating child
* EMs would be downright dangerous, so skip it. We rely on a
* volatile EC having only one EM.
*/
if (cur_ec->ec_has_volatile)
continue;
/* Sanity check on get_eclass_indexes_for_relids result */
Assert(bms_overlap(top_parent_relids, cur_ec->ec_relids));
/*
* We don't use foreach() here because there's no point in scanning
* newly-added child members, so we can stop after the last
* pre-existing EC member.
*/
num_members = list_length(cur_ec->ec_members);
for (int pos = 0; pos < num_members; pos++)
{
EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos);
if (cur_em->em_is_const)
continue; /* ignore consts here */
/*
* We consider only original EC members here, not
* already-transformed child members.
*/
if (cur_em->em_is_child)
continue; /* ignore children here */
/*
* We may ignore expressions that reference a single baserel,
* because add_child_rel_equivalences should have handled them.
*/
if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE)
continue;
/* Does this member reference child's topmost parent rel? */
if (bms_overlap(cur_em->em_relids, top_parent_relids))
{
/* Yes, generate transformed child version */
Expr *child_expr;
Relids new_relids;
Relids new_nullable_relids;
if (parent_joinrel->reloptkind == RELOPT_JOINREL)
{
/* Simple single-level transformation */
child_expr = (Expr *)
adjust_appendrel_attrs(root,
(Node *) cur_em->em_expr,
nappinfos, appinfos);
}
else
{
/* Must do multi-level transformation */
Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL);
child_expr = (Expr *)
adjust_appendrel_attrs_multilevel(root,
(Node *) cur_em->em_expr,
child_relids,
top_parent_relids);
}
/*
* Transform em_relids to match. Note we do *not* do
* pull_varnos(child_expr) here, as for example the
* transformation might have substituted a constant, but we
* don't want the child member to be marked as constant.
*/
new_relids = bms_difference(cur_em->em_relids,
top_parent_relids);
new_relids = bms_add_members(new_relids, child_relids);
/*
* For nullable_relids, we must selectively replace parent
* nullable relids with child ones.
*/
new_nullable_relids = cur_em->em_nullable_relids;
if (bms_overlap(new_nullable_relids, top_parent_relids))
new_nullable_relids =
adjust_child_relids_multilevel(root,
new_nullable_relids,
child_relids,
top_parent_relids);
(void) add_eq_member(cur_ec, child_expr,
new_relids, new_nullable_relids,
true, cur_em->em_datatype);
}
}
}
}
/*
* generate_implied_equalities_for_column

View File

@ -843,6 +843,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
/* Compute information relevant to foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
/* Compute information needed for mapping Vars to the child rel */
appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
/* Set up reltarget struct */
@ -854,7 +855,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
(Node *) parent_joinrel->joininfo,
nappinfos,
appinfos);
pfree(appinfos);
/*
* Lateral relids referred in child join will be same as that referred in
@ -886,6 +886,19 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
/* Add the relation to the PlannerInfo. */
add_join_rel(root, joinrel);
/*
* We might need EquivalenceClass members corresponding to the child join,
* so that we can represent sort pathkeys for it. As with children of
* baserels, we shouldn't need this unless there are relevant eclass joins
* (implying that a merge join might be possible) or pathkeys to sort by.
*/
if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel))
add_child_join_rel_equivalences(root,
nappinfos, appinfos,
parent_joinrel, joinrel);
pfree(appinfos);
return joinrel;
}

View File

@ -153,6 +153,11 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
AppendRelInfo *appinfo,
RelOptInfo *parent_rel,
RelOptInfo *child_rel);
extern void add_child_join_rel_equivalences(PlannerInfo *root,
int nappinfos,
AppendRelInfo **appinfos,
RelOptInfo *parent_rel,
RelOptInfo *child_rel);
extern List *generate_implied_equalities_for_column(PlannerInfo *root,
RelOptInfo *rel,
ec_matches_callback_type callback,

View File

@ -459,6 +459,83 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
550 | |
(12 rows)
-- bug with inadequate sort key representation
SET enable_partitionwise_aggregate TO true;
SET enable_hashjoin TO false;
EXPLAIN (COSTS OFF)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Group
Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
-> Merge Append
Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
-> Group
Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
-> Sort
Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
-> Merge Full Join
Merge Cond: ((prt1_p1.a = p2.a) AND (prt1_p1.b = p2.b))
Filter: ((COALESCE(prt1_p1.a, p2.a) >= 490) AND (COALESCE(prt1_p1.a, p2.a) <= 510))
-> Sort
Sort Key: prt1_p1.a, prt1_p1.b
-> Seq Scan on prt1_p1
-> Sort
Sort Key: p2.a, p2.b
-> Seq Scan on prt2_p1 p2
-> Group
Group Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
-> Sort
Sort Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
-> Merge Full Join
Merge Cond: ((prt1_p2.a = p2_1.a) AND (prt1_p2.b = p2_1.b))
Filter: ((COALESCE(prt1_p2.a, p2_1.a) >= 490) AND (COALESCE(prt1_p2.a, p2_1.a) <= 510))
-> Sort
Sort Key: prt1_p2.a, prt1_p2.b
-> Seq Scan on prt1_p2
-> Sort
Sort Key: p2_1.a, p2_1.b
-> Seq Scan on prt2_p2 p2_1
-> Group
Group Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
-> Sort
Sort Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
-> Merge Full Join
Merge Cond: ((prt1_p3.a = p2_2.a) AND (prt1_p3.b = p2_2.b))
Filter: ((COALESCE(prt1_p3.a, p2_2.a) >= 490) AND (COALESCE(prt1_p3.a, p2_2.a) <= 510))
-> Sort
Sort Key: prt1_p3.a, prt1_p3.b
-> Seq Scan on prt1_p3
-> Sort
Sort Key: p2_2.a, p2_2.b
-> Seq Scan on prt2_p3 p2_2
(43 rows)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
a | b
-----+----
490 | 15
492 | 17
494 | 19
495 | 20
496 | 21
498 | 23
500 | 0
501 | 1
502 | 2
504 | 4
506 | 6
507 | 7
508 | 8
510 | 10
(14 rows)
RESET enable_partitionwise_aggregate;
RESET enable_hashjoin;
--
-- partitioned by expression
--

View File

@ -91,6 +91,21 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
-- bug with inadequate sort key representation
SET enable_partitionwise_aggregate TO true;
SET enable_hashjoin TO false;
EXPLAIN (COSTS OFF)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
RESET enable_partitionwise_aggregate;
RESET enable_hashjoin;
--
-- partitioned by expression
--