Fix join-removal logic for pseudoconstant and outerjoin-delayed quals.

In these cases a qual can get marked with the removable rel in its
required_relids, but this is just to schedule its evaluation correctly, not
because it really depends on the rel.  We were assuming that, in effect,
we could throw away *all* quals so marked, which is nonsense.  Tighten up
the logic to be a little more paranoid about which quals belong to the
outer join being considered for removal, and arrange for all quals that
don't belong to be updated so they will still get evaluated correctly.

Also fix another problem that happened to be exposed by this test case,
which was that make_join_rel() was failing to notice some cases where
a constant-false qual could be used to prove a join relation empty.  If it's
a pushed-down constant false, then the relation is empty even if it's an
outer join, because the qual applies after the outer join expansion.

Per report from Nathan Grange.  Back-patch into 9.0.
This commit is contained in:
Tom Lane 2010-09-14 23:15:29 +00:00
parent 3522217b63
commit 4e97631e6a
6 changed files with 187 additions and 31 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.105 2010/02/26 02:00:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.106 2010/09/14 23:15:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -29,7 +29,8 @@ static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
static bool is_dummy_rel(RelOptInfo *rel);
static void mark_dummy_rel(RelOptInfo *rel);
static bool restriction_is_constant_false(List *restrictlist);
static bool restriction_is_constant_false(List *restrictlist,
bool only_pushed_down);
/*
@ -603,7 +604,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
*
* Also, a provably constant-false join restriction typically means that
* we can skip evaluating one or both sides of the join. We do this by
* marking the appropriate rel as dummy.
* marking the appropriate rel as dummy. For outer joins, a constant-false
* restriction that is pushed down still means the whole join is dummy,
* while a non-pushed-down one means that no inner rows will join so we
* can treat the inner rel as dummy.
*
* We need only consider the jointypes that appear in join_info_list, plus
* JOIN_INNER.
@ -612,7 +616,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
{
case JOIN_INNER:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist))
restriction_is_constant_false(restrictlist, false))
{
mark_dummy_rel(joinrel);
break;
@ -625,12 +629,13 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
restrictlist);
break;
case JOIN_LEFT:
if (is_dummy_rel(rel1))
if (is_dummy_rel(rel1) ||
restriction_is_constant_false(restrictlist, true))
{
mark_dummy_rel(joinrel);
break;
}
if (restriction_is_constant_false(restrictlist) &&
if (restriction_is_constant_false(restrictlist, false) &&
bms_is_subset(rel2->relids, sjinfo->syn_righthand))
mark_dummy_rel(rel2);
add_paths_to_joinrel(root, joinrel, rel1, rel2,
@ -641,7 +646,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
restrictlist);
break;
case JOIN_FULL:
if (is_dummy_rel(rel1) && is_dummy_rel(rel2))
if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
restriction_is_constant_false(restrictlist, true))
{
mark_dummy_rel(joinrel);
break;
@ -665,7 +671,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
bms_is_subset(sjinfo->min_righthand, rel2->relids))
{
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist))
restriction_is_constant_false(restrictlist, false))
{
mark_dummy_rel(joinrel);
break;
@ -687,6 +693,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
create_unique_path(root, rel2, rel2->cheapest_total_path,
sjinfo) != NULL)
{
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist, false))
{
mark_dummy_rel(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_UNIQUE_INNER, sjinfo,
restrictlist);
@ -696,12 +708,13 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
}
break;
case JOIN_ANTI:
if (is_dummy_rel(rel1))
if (is_dummy_rel(rel1) ||
restriction_is_constant_false(restrictlist, true))
{
mark_dummy_rel(joinrel);
break;
}
if (restriction_is_constant_false(restrictlist) &&
if (restriction_is_constant_false(restrictlist, false) &&
bms_is_subset(rel2->relids, sjinfo->syn_righthand))
mark_dummy_rel(rel2);
add_paths_to_joinrel(root, joinrel, rel1, rel2,
@ -947,9 +960,11 @@ mark_dummy_rel(RelOptInfo *rel)
* join situations this will leave us computing cartesian products only to
* decide there's no match for an outer row, which is pretty stupid. So,
* we need to detect the case.
*
* If only_pushed_down is TRUE, then consider only pushed-down quals.
*/
static bool
restriction_is_constant_false(List *restrictlist)
restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
{
ListCell *lc;
@ -964,6 +979,9 @@ restriction_is_constant_false(List *restrictlist)
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
Assert(IsA(rinfo, RestrictInfo));
if (only_pushed_down && !rinfo->is_pushed_down)
continue;
if (rinfo->clause && IsA(rinfo->clause, Const))
{
Const *con = (Const *) rinfo->clause;

View File

@ -16,19 +16,21 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.3 2010/07/06 19:18:56 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.4 2010/09/14 23:15:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
static void remove_rel_from_query(PlannerInfo *root, int relid);
static void remove_rel_from_query(PlannerInfo *root, int relid,
Relids joinrelids);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@ -67,7 +69,9 @@ restart:
*/
innerrelid = bms_singleton_member(sjinfo->min_righthand);
remove_rel_from_query(root, innerrelid);
remove_rel_from_query(root, innerrelid,
bms_union(sjinfo->min_lefthand,
sjinfo->min_righthand));
/* We verify that exactly one reference gets removed from joinlist */
nremoved = 0;
@ -216,19 +220,25 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/* Ignore clauses not pertinent to this join */
if (!bms_is_subset(restrictinfo->required_relids, joinrelids))
continue;
/*
* If we find a pushed-down clause, it must have come from above the
* outer join and it must contain references to the inner rel. (If it
* had only outer-rel variables, it'd have been pushed down into the
* outer rel.) Therefore, we can conclude that join removal is unsafe
* without any examination of the clause contents.
* If it's not a join clause for this outer join, we can't use it.
* Note that if the clause is pushed-down, then it is logically from
* above the outer join, even if it references no other rels (it might
* be from WHERE, for example).
*/
if (restrictinfo->is_pushed_down)
return false;
if (restrictinfo->is_pushed_down ||
!bms_equal(restrictinfo->required_relids, joinrelids))
{
/*
* If such a clause actually references the inner rel then
* join removal has to be disallowed. We have to check this
* despite the previous attr_needed checks because of the
* possibility of pushed-down clauses referencing the rel.
*/
if (bms_is_member(innerrelid, restrictinfo->clause_relids))
return false;
continue; /* else, ignore; not useful here */
}
/* Ignore if it's not a mergejoinable clause */
if (!restrictinfo->can_join ||
@ -299,14 +309,14 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
* We are not terribly thorough here. We must make sure that the rel is
* no longer treated as a baserel, and that attributes of other baserels
* are no longer marked as being needed at joins involving this rel.
* In particular, we don't bother removing join quals involving the rel from
* the joininfo lists; they'll just get ignored, since we will never form a
* join relation at which they could be evaluated.
* Also, join quals involving the rel have to be removed from the joininfo
* lists, but only if they belong to the outer join identified by joinrelids.
*/
static void
remove_rel_from_query(PlannerInfo *root, int relid)
remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
{
RelOptInfo *rel = find_base_rel(root, relid);
List *joininfos;
Index rti;
ListCell *l;
@ -379,6 +389,43 @@ remove_rel_from_query(PlannerInfo *root, int relid)
phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
}
/*
* Remove any joinquals referencing the rel from the joininfo lists.
*
* In some cases, a joinqual has to be put back after deleting its
* reference to the target rel. This can occur for pseudoconstant and
* outerjoin-delayed quals, which can get marked as requiring the rel in
* order to force them to be evaluated at or above the join. We can't
* just discard them, though. Only quals that logically belonged to the
* outer join being discarded should be removed from the query.
*
* We must make a copy of the rel's old joininfo list before starting the
* loop, because otherwise remove_join_clause_from_rels would destroy the
* list while we're scanning it.
*/
joininfos = list_copy(rel->joininfo);
foreach(l, joininfos)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
if (rinfo->is_pushed_down ||
!bms_equal(rinfo->required_relids, joinrelids))
{
/* Recheck that qual doesn't actually reference the target rel */
Assert(!bms_is_member(relid, rinfo->clause_relids));
/*
* The required_relids probably aren't shared with anything else,
* but let's copy them just to be sure.
*/
rinfo->required_relids = bms_copy(rinfo->required_relids);
rinfo->required_relids = bms_del_member(rinfo->required_relids,
relid);
distribute_restrictinfo_to_rels(root, rinfo);
}
}
}
/*

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.52 2010/01/02 16:57:48 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.53 2010/09/14 23:15:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -98,3 +98,37 @@ add_join_clause_to_rels(PlannerInfo *root,
}
bms_free(tmprelids);
}
/*
* remove_join_clause_from_rels
* Delete 'restrictinfo' from all the joininfo lists it is in
*
* This reverses the effect of add_join_clause_to_rels. It's used when we
* discover that a relation need not be joined at all.
*
* 'restrictinfo' describes the join clause
* 'join_relids' is the list of relations participating in the join clause
* (there must be more than one)
*/
void
remove_join_clause_from_rels(PlannerInfo *root,
RestrictInfo *restrictinfo,
Relids join_relids)
{
Relids tmprelids;
int cur_relid;
tmprelids = bms_copy(join_relids);
while ((cur_relid = bms_first_member(tmprelids)) >= 0)
{
RelOptInfo *rel = find_base_rel(root, cur_relid);
/*
* Remove the restrictinfo from the list. Pointer comparison is
* sufficient.
*/
Assert(list_member_ptr(rel->joininfo, restrictinfo));
rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo);
}
bms_free(tmprelids);
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.38 2010/01/02 16:58:07 momjian Exp $
* $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.39 2010/09/14 23:15:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,5 +23,8 @@ extern bool have_relevant_joinclause(PlannerInfo *root,
extern void add_join_clause_to_rels(PlannerInfo *root,
RestrictInfo *restrictinfo,
Relids join_relids);
extern void remove_join_clause_from_rels(PlannerInfo *root,
RestrictInfo *restrictinfo,
Relids join_relids);
#endif /* JOININFO_H */

View File

@ -2586,6 +2586,43 @@ explain (costs off)
-> Seq Scan on child c
(5 rows)
-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
k | pd
---+----
(0 rows)
explain (costs off)
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
QUERY PLAN
------------------------------------------------
Result
One-Time Filter: false
-> Index Scan using parent_pkey on parent p
Index Cond: (k = 1)
(4 rows)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
k | pd
---+----
(0 rows)
explain (costs off)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
-- bug 5255: this is not optimizable by join removal
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY);

View File

@ -615,6 +615,23 @@ explain (costs off)
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
explain (costs off)
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
explain (costs off)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
-- bug 5255: this is not optimizable by join removal
begin;