Use parameterized paths to generate inner indexscans more flexibly.

This patch fixes the planner so that it can generate nestloop-with-
inner-indexscan plans even with one or more levels of joining between
the indexscan and the nestloop join that is supplying the parameter.
The executor was fixed to handle such cases some time ago, but the
planner was not ready.  This should improve our plans in many situations
where join ordering restrictions formerly forced complete table scans.

There is probably a fair amount of tuning work yet to be done, because
of various heuristics that have been added to limit the number of
parameterized paths considered.  However, we are not going to find out
what needs to be adjusted until the code gets some real-world use, so
it's time to get it in there where it can be tested easily.

Note API change for index AM amcostestimate functions.  I'm not aware of
any non-core index AMs, but if there are any, they will need minor
adjustments.
This commit is contained in:
Tom Lane 2012-01-27 19:26:38 -05:00
parent b376ec6fa5
commit e2fa76d80b
26 changed files with 3884 additions and 2292 deletions

View File

@ -289,7 +289,7 @@ amcanreturn (Relation indexRelation);
void
amcostestimate (PlannerInfo *root,
IndexPath *path,
RelOptInfo *outer_rel,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
@ -928,7 +928,7 @@ amrestrpos (IndexScanDesc scan);
void
amcostestimate (PlannerInfo *root,
IndexPath *path,
RelOptInfo *outer_rel,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
@ -958,16 +958,15 @@ amcostestimate (PlannerInfo *root,
</varlistentry>
<varlistentry>
<term><parameter>outer_rel</></term>
<term><parameter>loop_count</></term>
<listitem>
<para>
If the index is being considered for use in a join inner indexscan,
the planner's information about the outer side of the join. Otherwise
<symbol>NULL</>. When non-<symbol>NULL</>, some of the qual clauses
will be join clauses for joins
with this rel rather than being simple restriction clauses. Also,
the cost estimator should expect that the index scan will be repeated
for each row of the outer rel.
The number of repetitions of the index scan that should be factored
into the cost estimates. This will typically be greater than one when
considering a parameterized scan for use in the inside of a nestloop
join. Note that the cost estimates should still be for just one scan;
a larger <parameter>loop_count</> means that it may be appropriate
to allow for some caching effects across multiple scans.
</para>
</listitem>
</varlistentry>
@ -1062,8 +1061,8 @@ amcostestimate (PlannerInfo *root,
</para>
<para>
In the join case, the returned numbers should be averages expected for
any one scan of the index.
When <parameter>loop_count</> is greater than one, the returned numbers
should be averages expected for any one scan of the index.
</para>
<procedure>
@ -1121,7 +1120,7 @@ cost_qual_eval(&amp;index_qual_cost, path-&gt;indexquals, root);
</programlisting>
However, the above does not account for amortization of index reads
across repeated index scans in the join case.
across repeated index scans.
</para>
</step>

View File

@ -335,6 +335,83 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
return true;
}
/*
* bms_subset_compare - compare A and B for equality/subset relationships
*
* This is more efficient than testing bms_is_subset in both directions.
*/
BMS_Comparison
bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
{
BMS_Comparison result;
int shortlen;
int longlen;
int i;
/* Handle cases where either input is NULL */
if (a == NULL)
{
if (b == NULL)
return BMS_EQUAL;
return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
}
if (b == NULL)
return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
/* Check common words */
result = BMS_EQUAL; /* status so far */
shortlen = Min(a->nwords, b->nwords);
for (i = 0; i < shortlen; i++)
{
bitmapword aword = a->words[i];
bitmapword bword = b->words[i];
if ((aword & ~bword) != 0)
{
/* a is not a subset of b */
if (result == BMS_SUBSET1)
return BMS_DIFFERENT;
result = BMS_SUBSET2;
}
if ((bword & ~aword) != 0)
{
/* b is not a subset of a */
if (result == BMS_SUBSET2)
return BMS_DIFFERENT;
result = BMS_SUBSET1;
}
}
/* Check extra words */
if (a->nwords > b->nwords)
{
longlen = a->nwords;
for (; i < longlen; i++)
{
if (a->words[i] != 0)
{
/* a is not a subset of b */
if (result == BMS_SUBSET1)
return BMS_DIFFERENT;
result = BMS_SUBSET2;
}
}
}
else if (a->nwords < b->nwords)
{
longlen = b->nwords;
for (; i < longlen; i++)
{
if (b->words[i] != 0)
{
/* b is not a subset of a */
if (result == BMS_SUBSET2)
return BMS_DIFFERENT;
result = BMS_SUBSET1;
}
}
}
return result;
}
/*
* bms_is_member - is X a member of A?
*/

View File

@ -1475,9 +1475,12 @@ _outPathInfo(StringInfo str, const Path *node)
WRITE_ENUM_FIELD(pathtype, NodeTag);
appendStringInfo(str, " :parent_relids ");
_outBitmapset(str, node->parent->relids);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_NODE_FIELD(pathkeys);
WRITE_BITMAPSET_FIELD(required_outer);
WRITE_NODE_FIELD(param_clauses);
}
/*
@ -1515,11 +1518,9 @@ _outIndexPath(StringInfo str, const IndexPath *node)
WRITE_NODE_FIELD(indexqualcols);
WRITE_NODE_FIELD(indexorderbys);
WRITE_NODE_FIELD(indexorderbycols);
WRITE_BOOL_FIELD(isjoininner);
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
@ -1530,8 +1531,6 @@ _outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapqual);
WRITE_BOOL_FIELD(isjoininner);
WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
@ -1628,7 +1627,6 @@ _outUniquePath(StringInfo str, const UniquePath *node)
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_NODE_FIELD(in_operators);
WRITE_NODE_FIELD(uniq_exprs);
WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
@ -1691,6 +1689,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(parse);
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
WRITE_BITMAPSET_FIELD(all_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
WRITE_NODE_FIELD(init_plans);
@ -1738,6 +1737,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(cheapest_startup_path);
WRITE_NODE_FIELD(cheapest_total_path);
WRITE_NODE_FIELD(cheapest_unique_path);
WRITE_NODE_FIELD(cheapest_parameterized_paths);
WRITE_UINT_FIELD(relid);
WRITE_UINT_FIELD(reltablespace);
WRITE_ENUM_FIELD(rtekind, RTEKind);
@ -1752,8 +1752,6 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
WRITE_BITMAPSET_FIELD(index_outer_relids);
WRITE_NODE_FIELD(index_inner_paths);
}
static void
@ -1854,16 +1852,6 @@ _outRestrictInfo(StringInfo str, const RestrictInfo *node)
WRITE_OID_FIELD(hashjoinoperator);
}
static void
_outInnerIndexscanInfo(StringInfo str, const InnerIndexscanInfo *node)
{
WRITE_NODE_TYPE("INNERINDEXSCANINFO");
WRITE_BITMAPSET_FIELD(other_relids);
WRITE_BOOL_FIELD(isouterjoin);
WRITE_NODE_FIELD(cheapest_startup_innerpath);
WRITE_NODE_FIELD(cheapest_total_innerpath);
}
static void
_outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
{
@ -3015,9 +3003,6 @@ _outNode(StringInfo str, const void *obj)
case T_RestrictInfo:
_outRestrictInfo(str, obj);
break;
case T_InnerIndexscanInfo:
_outInnerIndexscanInfo(str, obj);
break;
case T_PlaceHolderVar:
_outPlaceHolderVar(str, obj);
break;

View File

@ -78,10 +78,10 @@ ways. All the Paths made for a given relation are placed in its
RelOptInfo.pathlist. (Actually, we discard Paths that are obviously
inferior alternatives before they ever get into the pathlist --- what
ends up in the pathlist is the cheapest way of generating each potentially
useful sort ordering of the relation.) Also create a RelOptInfo.joininfo
list including all the join clauses that involve this relation. For
example, the WHERE clause "tab1.col1 = tab2.col1" generates entries in
both tab1 and tab2's joininfo lists.
useful sort ordering and parameterization of the relation.) Also create a
RelOptInfo.joininfo list including all the join clauses that involve this
relation. For example, the WHERE clause "tab1.col1 = tab2.col1" generates
entries in both tab1 and tab2's joininfo lists.
If we have only a single base relation in the query, we are done.
Otherwise we have to figure out how to join the base relations into a
@ -173,12 +173,12 @@ for it or the cheapest path with the desired ordering (if that's cheaper
than applying a sort to the cheapest other path).
If the query contains one-sided outer joins (LEFT or RIGHT joins), or
IN or EXISTS WHERE clauses that were converted to joins, then some of
the possible join orders may be illegal. These are excluded by having
join_is_legal consult a side list of such "special" joins to see
whether a proposed join is illegal. (The same consultation allows it
to see which join style should be applied for a valid join, ie,
JOIN_INNER, JOIN_LEFT, etc.)
IN or EXISTS WHERE clauses that were converted to semijoins or antijoins,
then some of the possible join orders may be illegal. These are excluded
by having join_is_legal consult a side list of such "special" joins to see
whether a proposed join is illegal. (The same consultation allows it to
see which join style should be applied for a valid join, ie, JOIN_INNER,
JOIN_LEFT, etc.)
Valid OUTER JOIN Optimizations
@ -526,12 +526,11 @@ multi-column index generates a list with one element per index column.
are two possible sort orders and two possible PathKey lists it can
generate.)
Note that a bitmap scan or multi-pass indexscan (OR clause scan) has NIL
pathkeys since we can say nothing about the overall order of its result.
Also, an indexscan on an unordered type of index generates NIL pathkeys.
However, we can always create a pathkey by doing an explicit sort. The
pathkeys for a Sort plan's output just represent the sort key fields and
the ordering operators used.
Note that a bitmap scan has NIL pathkeys since we can say nothing about
the overall order of its result. Also, an indexscan on an unordered type
of index generates NIL pathkeys. However, we can always create a pathkey
by doing an explicit sort. The pathkeys for a Sort plan's output just
represent the sort key fields and the ordering operators used.
Things get more interesting when we consider joins. Suppose we do a
mergejoin between A and B using the mergeclause A.X = B.Y. The output
@ -668,4 +667,102 @@ Currently this happens only for queries involving multiple window functions
with different orderings, for which extra sorts are needed anyway.
Parameterized Paths
-------------------
The naive way to join two relations using a clause like WHERE A.X = B.Y
is to generate a nestloop plan like this:
NestLoop
Filter: A.X = B.Y
-> Seq Scan on A
-> Seq Scan on B
We can make this better by using a merge or hash join, but it still
requires scanning all of both input relations. If A is very small and B is
very large, but there is an index on B.Y, it can be enormously better to do
something like this:
NestLoop
-> Seq Scan on A
-> Index Scan using B_Y_IDX on B
Index Condition: B.Y = A.X
Here, we are expecting that for each row scanned from A, the nestloop
plan node will pass down the current value of A.X into the scan of B.
That allows the indexscan to treat A.X as a constant for any one
invocation, and thereby use it as an index key. This is the only plan type
that can avoid fetching all of B, and for small numbers of rows coming from
A, that will dominate every other consideration. (As A gets larger, this
gets less attractive, and eventually a merge or hash join will win instead.
So we have to cost out all the alternatives to decide what to do.)
It can be useful for the parameter value to be passed down through
intermediate layers of joins, for example:
NestLoop
-> Seq Scan on A
Hash Join
Join Condition: B.Y = C.W
-> Seq Scan on B
-> Index Scan using C_Z_IDX on C
Index Condition: C.Z = A.X
If all joins are plain inner joins then this is unnecessary, because
it's always possible to reorder the joins so that a parameter is used
immediately below the nestloop node that provides it. But in the
presence of outer joins, join reordering may not be possible, and then
this option can be critical. Before version 9.2, Postgres used ad-hoc
methods for planning and executing such queries, and those methods could
not handle passing parameters down through multiple join levels.
To plan such queries, we now use a notion of a "parameterized path",
which is a path that makes use of a join clause to a relation that's not
scanned by the path. In the example just above, we would construct a
path representing the possibility of doing this:
-> Index Scan using C_Z_IDX on C
Index Condition: C.Z = A.X
This path will be marked as being parameterized by relation A. (Note that
this is only one of the possible access paths for C; we'd still have a
plain unparameterized seqscan, and perhaps other possibilities.) The
parameterization marker does not prevent joining the path to B, so one of
the paths generated for the joinrel {B C} will represent
Hash Join
Join Condition: B.Y = C.W
-> Seq Scan on B
-> Index Scan using C_Z_IDX on C
Index Condition: C.Z = A.X
This path is still marked as being parameterized by A. When we attempt to
join {B C} to A to form the complete join tree, such a path can only be
used as the inner side of a nestloop join: it will be ignored for other
possible join types. So we will form a join path representing the query
plan shown above, and it will compete in the usual way with paths built
from non-parameterized scans.
To limit planning time, we have to avoid generating an unreasonably large
number of parameterized paths. We do this by only generating parameterized
relation scan paths for index scans, and then only for indexes for which
suitable join clauses are available. There are also heuristics in join
planning that try to limit the number of parameterized paths considered.
In particular, there's been a deliberate policy decision to favor hash
joins over merge joins for parameterized join steps (those occurring below
a nestloop that provides parameters to the lower join's inputs). While we
do not ignore merge joins entirely, joinpath.c does not fully explore the
space of potential merge joins with parameterized inputs. Also, add_path
treats parameterized paths as having no pathkeys, so that they compete
only on cost and don't get preference for producing a special sort order.
This creates additional bias against merge joins, since we might discard
a path that could have been useful for performing a merge without an
explicit sort step. Since a parameterized path must ultimately be used
on the inside of a nestloop, where its sort order is uninteresting, these
choices do not affect any requirement for the final output order of a
query --- they only make it harder to use a merge join at a lower level.
The savings in planning work justifies that.
-- bjm & tgl

View File

@ -46,13 +46,28 @@ int geqo_threshold;
join_search_hook_type join_search_hook = NULL;
static void set_base_rel_sizes(PlannerInfo *root);
static void set_base_rel_pathlists(PlannerInfo *root);
static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
List *all_child_pathkeys,
Relids required_outer);
static List *accumulate_append_subpath(List *subpaths, Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@ -65,8 +80,6 @@ static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes);
@ -91,10 +104,33 @@ RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
Index rti;
/*
* Construct the all_baserels Relids set.
*/
root->all_baserels = NULL;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
}
/*
* Generate access paths for the base rels.
*/
set_base_rel_sizes(root);
set_base_rel_pathlists(root);
/*
@ -105,35 +141,41 @@ make_one_rel(PlannerInfo *root, List *joinlist)
/*
* The result should join all and only the query's base rels.
*/
#ifdef USE_ASSERT_CHECKING
{
int num_base_rels = 0;
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
Assert(bms_is_member(rti, rel->relids));
num_base_rels++;
}
Assert(bms_num_members(rel->relids) == num_base_rels);
}
#endif
Assert(bms_equal(rel->relids, root->all_baserels));
return rel;
}
/*
* set_base_rel_sizes
* Set the size estimates (rows and widths) for each base-relation entry.
*
* We do this in a separate pass over the base rels so that rowcount
* estimates are available for parameterized path generation.
*/
static void
set_base_rel_sizes(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
}
}
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
@ -164,11 +206,11 @@ set_base_rel_pathlists(PlannerInfo *root)
}
/*
* set_rel_pathlist
* Build access paths for a base relation
* set_rel_size
* Set size estimates for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (rel->reloptkind == RELOPT_BASEREL &&
@ -179,10 +221,78 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
* so set up a single dummy path for it. Here we only check this for
* regular baserels; if it's an otherrel, CE was already checked in
* set_append_rel_pathlist().
*
* In this case, we go ahead and set up the relation's path right away
* instead of leaving it for set_rel_pathlist to do. This is because
* we don't have a convention for marking a rel as dummy except by
* assigning a dummy path to it.
*/
set_dummy_rel_pathlist(rel);
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_size(root, rel, rti, rte);
}
else
{
switch (rel->rtekind)
{
case RTE_RELATION:
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* Foreign table */
set_foreign_size(root, rel, rte);
}
else
{
/* Plain relation */
set_plain_rel_size(root, rel, rte);
}
break;
case RTE_SUBQUERY:
/*
* Subqueries don't support parameterized paths, so just go
* ahead and build their paths immediately.
*/
set_subquery_pathlist(root, rel, rti, rte);
break;
case RTE_FUNCTION:
set_function_size_estimates(root, rel);
break;
case RTE_VALUES:
set_values_size_estimates(root, rel);
break;
case RTE_CTE:
/*
* CTEs don't support parameterized paths, so just go ahead
* and build their paths immediately.
*/
if (rte->self_reference)
set_worktable_pathlist(root, rel, rte);
else
set_cte_pathlist(root, rel, rte);
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
break;
}
}
}
/*
* set_rel_pathlist
* Build access paths for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (IS_DUMMY_REL(rel))
{
/* We already proved the relation empty, so nothing more to do */
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_pathlist(root, rel, rti, rte);
@ -204,23 +314,18 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
}
break;
case RTE_SUBQUERY:
/* Subquery --- generate a separate plan for it */
set_subquery_pathlist(root, rel, rti, rte);
/* Subquery --- fully handled during set_rel_size */
break;
case RTE_FUNCTION:
/* RangeFunction --- generate a suitable path for it */
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list --- generate a suitable path for it */
/* Values list */
set_values_pathlist(root, rel, rte);
break;
case RTE_CTE:
/* CTE reference --- generate a suitable path for it */
if (rte->self_reference)
set_worktable_pathlist(root, rel, rte);
else
set_cte_pathlist(root, rel, rte);
/* CTE reference --- fully handled during set_rel_size */
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
@ -234,11 +339,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
}
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
* set_plain_rel_size
* Set size estimates for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/*
* Test any partial indexes of rel for applicability. We must do this
@ -259,15 +364,15 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
check_partial_indexes(root, rel);
set_baserel_size_estimates(root, rel);
}
}
/*
* Generate paths and add them to the rel's pathlist.
*
* Note: add_path() will discard any paths that are dominated by another
* available path, keeping only those paths that are superior along at
* least one dimension of cost or sortedness.
*/
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel));
@ -282,8 +387,33 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
}
/*
* set_append_rel_pathlist
* Build access paths for an "append relation"
* set_foreign_size
* Set size estimates for a foreign table RTE
*/
static void
set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_foreign_size_estimates(root, rel);
}
/*
* set_foreign_pathlist
* Build the (single) access path for a foreign table RTE
*/
static void
set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Generate appropriate path */
add_path(rel, (Path *) create_foreignscan_path(root, rel));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_append_rel_size
* Set size estimates for an "append relation"
*
* The passed-in rel and RTE represent the entire append relation. The
* relation's contents are computed by appending together the output of
@ -293,13 +423,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
* a good thing because their outputs are not the same size.
*/
static void
set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
List *live_childrels = NIL;
List *subpaths = NIL;
List *all_child_pathkeys = NIL;
double parent_rows;
double parent_size;
double *parent_attrsizes;
@ -325,10 +452,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
nattrs = rel->max_attr - rel->min_attr + 1;
parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
/*
* Generate access paths for each member relation, and pick the cheapest
* path for each one.
*/
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
@ -337,7 +460,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *childrel;
List *childquals;
Node *childqual;
ListCell *lcp;
ListCell *parentvars;
ListCell *childvars;
@ -394,8 +516,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
{
/*
* This child need not be scanned, so we can omit it from the
* appendrel. Mark it with a dummy cheapest-path though, in case
* best_appendrel_indexscan() looks at it later.
* appendrel.
*/
set_dummy_rel_pathlist(childrel);
continue;
@ -438,65 +559,20 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
*/
/*
* Compute the child's access paths.
* Compute the child's size.
*/
set_rel_pathlist(root, childrel, childRTindex, childRTE);
set_rel_size(root, childrel, childRTindex, childRTE);
/*
* It is possible that constraint exclusion detected a contradiction
* within a child subquery, even though we didn't prove one above.
* If what we got back was a dummy path, we can skip this child.
* If so, we can skip this child.
*/
if (IS_DUMMY_PATH(childrel->cheapest_total_path))
if (IS_DUMMY_REL(childrel))
continue;
/*
* Child is live, so add its cheapest access path to the Append path
* we are constructing for the parent.
*/
subpaths = accumulate_append_subpath(subpaths,
childrel->cheapest_total_path);
/* Remember which childrels are live, for MergeAppend logic below */
live_childrels = lappend(live_childrels, childrel);
/*
* Collect a list of all the available path orderings for all the
* children. We use this as a heuristic to indicate which sort
* orderings we should build MergeAppend paths for.
*/
foreach(lcp, childrel->pathlist)
{
Path *childpath = (Path *) lfirst(lcp);
List *childkeys = childpath->pathkeys;
ListCell *lpk;
bool found = false;
/* Ignore unsorted paths */
if (childkeys == NIL)
continue;
/* Have we already seen this ordering? */
foreach(lpk, all_child_pathkeys)
{
List *existing_pathkeys = (List *) lfirst(lpk);
if (compare_pathkeys(existing_pathkeys,
childkeys) == PATHKEYS_EQUAL)
{
found = true;
break;
}
}
if (!found)
{
/* No, so add it to all_child_pathkeys */
all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
}
}
/*
* Accumulate size information from each child.
* Accumulate size information from each live child.
*/
if (childrel->rows > 0)
{
@ -560,24 +636,208 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
rel->tuples = parent_rows;
pfree(parent_attrsizes);
}
/*
* set_append_rel_pathlist
* Build access paths for an "append relation"
*/
static void
set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
List *live_childrels = NIL;
List *subpaths = NIL;
List *all_child_pathkeys = NIL;
List *all_child_outers = NIL;
ListCell *l;
/*
* Next, build an unordered Append path for the rel. (Note: this is
* correct even if we have zero or one live subpath due to constraint
* exclusion.)
* Generate access paths for each member relation, and remember the
* cheapest path for each one. Also, identify all pathkeys (orderings)
* and parameterizations (required_outer sets) available for the member
* relations.
*/
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RangeTblEntry *childRTE;
RelOptInfo *childrel;
ListCell *lcp;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
continue;
/* Re-locate the child RTE and RelOptInfo */
childRTindex = appinfo->child_relid;
childRTE = root->simple_rte_array[childRTindex];
childrel = root->simple_rel_array[childRTindex];
/*
* Compute the child's access paths.
*/
set_rel_pathlist(root, childrel, childRTindex, childRTE);
/*
* If child is dummy, ignore it.
*/
if (IS_DUMMY_REL(childrel))
continue;
/*
* Child is live, so add its cheapest access path to the Append path
* we are constructing for the parent.
*/
subpaths = accumulate_append_subpath(subpaths,
childrel->cheapest_total_path);
/* Remember which childrels are live, for logic below */
live_childrels = lappend(live_childrels, childrel);
/*
* Collect lists of all the available path orderings and
* parameterizations for all the children. We use these as a
* heuristic to indicate which sort orderings and parameterizations we
* should build Append and MergeAppend paths for.
*/
foreach(lcp, childrel->pathlist)
{
Path *childpath = (Path *) lfirst(lcp);
List *childkeys = childpath->pathkeys;
Relids childouter = childpath->required_outer;
/* Unsorted paths don't contribute to pathkey list */
if (childkeys != NIL)
{
ListCell *lpk;
bool found = false;
/* Have we already seen this ordering? */
foreach(lpk, all_child_pathkeys)
{
List *existing_pathkeys = (List *) lfirst(lpk);
if (compare_pathkeys(existing_pathkeys,
childkeys) == PATHKEYS_EQUAL)
{
found = true;
break;
}
}
if (!found)
{
/* No, so add it to all_child_pathkeys */
all_child_pathkeys = lappend(all_child_pathkeys,
childkeys);
}
}
/* Unparameterized paths don't contribute to param-set list */
if (childouter)
{
ListCell *lco;
bool found = false;
/* Have we already seen this param set? */
foreach(lco, all_child_outers)
{
Relids existing_outers = (Relids) lfirst(lco);
if (bms_equal(existing_outers, childouter))
{
found = true;
break;
}
}
if (!found)
{
/* No, so add it to all_child_outers */
all_child_outers = lappend(all_child_outers,
childouter);
}
}
}
}
/*
* Next, build an unordered, unparameterized Append path for the rel.
* (Note: this is correct even if we have zero or one live subpath due to
* constraint exclusion.)
*/
add_path(rel, (Path *) create_append_path(rel, subpaths));
/*
* Next, build MergeAppend paths based on the collected list of child
* pathkeys. We consider both cheapest-startup and cheapest-total cases,
* ie, for each interesting ordering, collect all the cheapest startup
* subpaths and all the cheapest total paths, and build a MergeAppend path
* for each list.
* Build unparameterized MergeAppend paths based on the collected list of
* child pathkeys.
*/
foreach(l, all_child_pathkeys)
generate_mergeappend_paths(root, rel, live_childrels,
all_child_pathkeys, NULL);
/*
* Build Append and MergeAppend paths for each parameterization seen
* among the child rels. (This may look pretty expensive, but in most
* cases of practical interest, the child relations will tend to expose
* the same parameterizations and pathkeys, so that not that many cases
* actually get considered here.)
*/
foreach(l, all_child_outers)
{
List *pathkeys = (List *) lfirst(l);
Relids required_outer = (Relids) lfirst(l);
ListCell *lcr;
/* Select the child paths for an Append with this parameterization */
subpaths = NIL;
foreach(lcr, live_childrels)
{
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
Path *cheapest_total;
cheapest_total =
get_cheapest_path_for_pathkeys(childrel->pathlist,
NIL,
required_outer,
TOTAL_COST);
Assert(cheapest_total != NULL);
subpaths = accumulate_append_subpath(subpaths, cheapest_total);
}
add_path(rel, (Path *) create_append_path(rel, subpaths));
/* And build parameterized MergeAppend paths */
generate_mergeappend_paths(root, rel, live_childrels,
all_child_pathkeys, required_outer);
}
/* Select cheapest paths */
set_cheapest(rel);
}
/*
* generate_mergeappend_paths
* Generate MergeAppend paths for an append relation
*
* Generate a path for each ordering (pathkey list) appearing in
* all_child_pathkeys. If required_outer isn't NULL, accept paths having
* those relations as required outer relations.
*
* We consider both cheapest-startup and cheapest-total cases, ie, for each
* interesting ordering, collect all the cheapest startup subpaths and all the
* cheapest total paths, and build a MergeAppend path for each case.
*/
static void
generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
List *all_child_pathkeys,
Relids required_outer)
{
ListCell *lcp;
foreach(lcp, all_child_pathkeys)
{
List *pathkeys = (List *) lfirst(lcp);
List *startup_subpaths = NIL;
List *total_subpaths = NIL;
bool startup_neq_total = false;
@ -594,20 +854,32 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
cheapest_startup =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
required_outer,
STARTUP_COST);
cheapest_total =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
required_outer,
TOTAL_COST);
/*
* If we can't find any paths with the right order just add the
* cheapest-total path; we'll have to sort it.
* If we can't find any paths with the right order just use the
* cheapest-total path; we'll have to sort it later. We can
* use the cheapest path for the parameterization, though.
*/
if (cheapest_startup == NULL)
cheapest_startup = childrel->cheapest_total_path;
if (cheapest_total == NULL)
cheapest_total = childrel->cheapest_total_path;
if (cheapest_startup == NULL || cheapest_total == NULL)
{
if (required_outer)
cheapest_startup = cheapest_total =
get_cheapest_path_for_pathkeys(childrel->pathlist,
NIL,
required_outer,
TOTAL_COST);
else
cheapest_startup = cheapest_total =
childrel->cheapest_total_path;
Assert(cheapest_total != NULL);
}
/*
* Notice whether we actually have different paths for the
@ -634,9 +906,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
total_subpaths,
pathkeys));
}
/* Select cheapest path */
set_cheapest(rel);
}
/*
@ -667,7 +936,7 @@ accumulate_append_subpath(List *subpaths, Path *path)
* Build a dummy path for a relation that's been excluded by constraints
*
* Rather than inventing a special "dummy" path type, we represent this as an
* AppendPath with no members (see also IS_DUMMY_PATH macro).
* AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
*/
static void
set_dummy_rel_pathlist(RelOptInfo *rel)
@ -676,6 +945,9 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
rel->rows = 0;
rel->width = 0;
/* Discard any pre-existing paths; no further need for them */
rel->pathlist = NIL;
add_path(rel, (Path *) create_append_path(rel, NIL));
/* Select cheapest path (pretty easy in this case...) */
@ -707,6 +979,9 @@ has_multiple_baserels(PlannerInfo *root)
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
*
* There's no need for a separate set_subquery_size phase, since we don't
* support parameterized paths for subqueries.
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@ -847,9 +1122,6 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_function_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, create_functionscan_path(root, rel));
@ -864,9 +1136,6 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_values_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, create_valuesscan_path(root, rel));
@ -877,6 +1146,9 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_cte_pathlist
* Build the (single) access path for a non-self-reference CTE RTE
*
* There's no need for a separate set_cte_size phase, since we don't
* support parameterized paths for CTEs.
*/
static void
set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@ -935,6 +1207,9 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_worktable_pathlist
* Build the (single) access path for a self-reference CTE RTE
*
* There's no need for a separate set_worktable_size phase, since we don't
* support parameterized paths for CTEs.
*/
static void
set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@ -973,23 +1248,6 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
set_cheapest(rel);
}
/*
* set_foreign_pathlist
* Build the (single) access path for a foreign table RTE
*/
static void
set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_foreign_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, (Path *) create_foreignscan_path(root, rel));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.

File diff suppressed because it is too large Load Diff

View File

@ -59,6 +59,7 @@ static bool reconsider_outer_join_clause(PlannerInfo *root,
bool outer_on_left);
static bool reconsider_full_join_clause(PlannerInfo *root,
RestrictInfo *rinfo);
static Index get_parent_relid(PlannerInfo *root, RelOptInfo *rel);
/*
@ -892,7 +893,7 @@ generate_base_implied_equalities_broken(PlannerInfo *root,
*
* The results are sufficient for use in merge, hash, and plain nestloop join
* methods. We do not worry here about selecting clauses that are optimal
* for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes
* for use in a nestloop-with-parameterized-inner-scan. indxpath.c makes
* its own selections of clauses to use, and if the ones we pick here are
* redundant with those, the extras will be eliminated in createplan.c.
*
@ -1858,21 +1859,40 @@ mutate_eclass_expressions(PlannerInfo *root,
/*
* find_eclass_clauses_for_index_join
* Create joinclauses usable for a nestloop-with-inner-indexscan
* scanning the given inner rel with the specified set of outer rels.
* generate_implied_equalities_for_indexcol
* Create EC-derived joinclauses usable with a specific index column.
*
* We assume that any given index column could appear in only one EC.
* (This should be true in all but the most pathological cases, and if it
* isn't, we stop on the first match anyway.) Therefore, what we return
* is a redundant list of clauses equating the index column to each of
* the other-relation values it is known to be equal to. Any one of
* these clauses can be used to create a parameterized indexscan, and there
* is no value in using more than one. (But it *is* worthwhile to create
* a separate parameterized path for each one, since that leads to different
* join orders.)
*/
List *
find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids)
generate_implied_equalities_for_indexcol(PlannerInfo *root,
IndexOptInfo *index,
int indexcol)
{
List *result = NIL;
RelOptInfo *rel = index->rel;
bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
Index parent_relid;
ListCell *lc1;
/* If it's a child rel, we'll need to know what its parent is */
if (is_child_rel)
parent_relid = get_parent_relid(root, rel);
else
parent_relid = 0; /* not used, but keep compiler quiet */
foreach(lc1, root->eq_classes)
{
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
EquivalenceMember *cur_em;
ListCell *lc2;
/*
@ -1889,71 +1909,94 @@ find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
if (!is_child_rel &&
!bms_is_subset(rel->relids, cur_ec->ec_relids))
continue;
/* ... nor if no overlap with outer_relids */
if (!bms_overlap(outer_relids, cur_ec->ec_relids))
continue;
/* Scan members, looking for indexable columns */
/* Scan members, looking for a match to the indexable column */
cur_em = NULL;
foreach(lc2, cur_ec->ec_members)
{
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
EquivalenceMember *best_outer_em = NULL;
Oid best_eq_op = InvalidOid;
ListCell *lc3;
cur_em = (EquivalenceMember *) lfirst(lc2);
if (bms_equal(cur_em->em_relids, rel->relids) &&
eclass_member_matches_indexcol(cur_ec, cur_em,
index, indexcol))
break;
cur_em = NULL;
}
if (!bms_equal(cur_em->em_relids, rel->relids) ||
!eclass_matches_any_index(cur_ec, cur_em, rel))
if (!cur_em)
continue;
/*
* Found our match. Scan the other EC members and attempt to generate
* joinclauses.
*/
foreach(lc2, cur_ec->ec_members)
{
EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
Oid eq_op;
RestrictInfo *rinfo;
/* Make sure it'll be a join to a different rel */
if (other_em == cur_em ||
bms_overlap(other_em->em_relids, rel->relids))
continue;
/*
* Found one, so try to generate a join clause. This is like
* generate_join_implied_equalities_normal, except simpler since
* our only preference item is to pick a Var on the outer side. We
* only need one join clause per index col.
* Also, if this is a child rel, avoid generating a useless join
* to its parent rel.
*/
foreach(lc3, cur_ec->ec_members)
{
EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3);
Oid eq_op;
if (is_child_rel &&
bms_is_member(parent_relid, other_em->em_relids))
continue;
if (!bms_is_subset(outer_em->em_relids, outer_relids))
continue;
eq_op = select_equality_operator(cur_ec,
cur_em->em_datatype,
outer_em->em_datatype);
if (!OidIsValid(eq_op))
continue;
best_outer_em = outer_em;
best_eq_op = eq_op;
if (IsA(outer_em->em_expr, Var) ||
(IsA(outer_em->em_expr, RelabelType) &&
IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
break; /* no need to look further */
}
eq_op = select_equality_operator(cur_ec,
cur_em->em_datatype,
other_em->em_datatype);
if (!OidIsValid(eq_op))
continue;
if (best_outer_em)
{
/* Found a suitable joinclause */
RestrictInfo *rinfo;
/* set parent_ec to mark as redundant with other joinclauses */
rinfo = create_join_clause(root, cur_ec, eq_op,
cur_em, other_em,
cur_ec);
/* set parent_ec to mark as redundant with other joinclauses */
rinfo = create_join_clause(root, cur_ec, best_eq_op,
cur_em, best_outer_em,
cur_ec);
result = lappend(result, rinfo);
/*
* Note: we keep scanning here because we want to provide a
* clause for every possible indexcol.
*/
}
result = lappend(result, rinfo);
}
/*
* If somehow we failed to create any join clauses, we might as well
* keep scanning the ECs for another match. But if we did make any,
* we're done, because we don't want to return non-redundant clauses.
*/
if (result)
break;
}
return result;
}
/*
* get_parent_relid
* Get the relid of a child rel's parent appendrel
*
* Possibly this should be somewhere else, but right now the logic is only
* needed here.
*/
static Index
get_parent_relid(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *lc;
foreach(lc, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
if (appinfo->child_relid == rel->relid)
return appinfo->parent_relid;
}
/* should have found the entry ... */
elog(ERROR, "child rel not found in append_rel_list");
return 0;
}
/*
* have_relevant_eclass_joinclause

File diff suppressed because it is too large Load Diff

View File

@ -25,17 +25,20 @@
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype, SpecialJoinInfo *sjinfo);
JoinType jointype, SpecialJoinInfo *sjinfo,
Relids param_source_rels);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype, SpecialJoinInfo *sjinfo);
JoinType jointype, SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype, SpecialJoinInfo *sjinfo);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype);
JoinType jointype, SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels);
static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
@ -79,6 +82,9 @@ add_paths_to_joinrel(PlannerInfo *root,
{
List *mergeclause_list = NIL;
bool mergejoin_allowed = true;
SemiAntiJoinFactors semifactors;
Relids param_source_rels = NULL;
ListCell *lc;
/*
* Find potential mergejoin clauses. We can skip this if we are not
@ -95,13 +101,60 @@ add_paths_to_joinrel(PlannerInfo *root,
jointype,
&mergejoin_allowed);
/*
* If it's SEMI or ANTI join, compute correction factors for cost
* estimation. These will be the same for all paths.
*/
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
compute_semi_anti_join_factors(root, outerrel, innerrel,
jointype, sjinfo, restrictlist,
&semifactors);
/*
* Decide whether it's sensible to generate parameterized paths for this
* joinrel, and if so, which relations such paths should require. There
* is no need to create a parameterized result path unless there is a join
* order restriction that prevents joining one of our input rels directly
* to the parameter source rel instead of joining to the other input rel.
* This restriction reduces the number of parameterized paths we have to
* deal with at higher join levels, without compromising the quality of
* the resulting plan. We express the restriction as a Relids set that
* must overlap the parameterization of any proposed join path.
*/
foreach(lc, root->join_info_list)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
/*
* SJ is relevant to this join if we have some part of its RHS
* (possibly not all of it), and haven't yet joined to its LHS. (This
* test is pretty simplistic, but should be sufficient considering the
* join has already been proven legal.) If the SJ is relevant, it
* presents constraints for joining to anything not in its RHS.
*/
if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_lefthand))
param_source_rels = bms_join(param_source_rels,
bms_difference(root->all_baserels,
sjinfo->min_righthand));
/* full joins constrain both sides symmetrically */
if (sjinfo->jointype == JOIN_FULL &&
bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_righthand))
param_source_rels = bms_join(param_source_rels,
bms_difference(root->all_baserels,
sjinfo->min_lefthand));
}
/*
* 1. Consider mergejoin paths where both relations must be explicitly
* sorted. Skip this if we can't mergejoin.
*/
if (mergejoin_allowed)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo);
restrictlist, mergeclause_list, jointype,
sjinfo, param_source_rels);
/*
* 2. Consider paths where the outer relation need not be explicitly
@ -112,7 +165,8 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo);
restrictlist, mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#ifdef NOT_USED
@ -129,7 +183,8 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_inner(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo);
restrictlist, mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#endif
/*
@ -139,7 +194,226 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype, sjinfo);
restrictlist, jointype,
sjinfo, &semifactors, param_source_rels);
}
/*
* try_nestloop_path
* Consider a nestloop join path; if it appears useful, push it into
* the joinrel's pathlist via add_path().
*/
static void
try_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys)
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if
* the parameterization wouldn't be sensible.
*/
required_outer = calc_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* Do a precheck to quickly eliminate obviously-inferior paths. We
* calculate a cheap lower bound on the path's cost and then use
* add_path_precheck() to see if the path is clearly going to be dominated
* by some existing path for the joinrel. If not, do the full pushup with
* creating a fully valid path structure and submitting it to add_path().
* The latter two steps are expensive enough to make this two-phase
* methodology worthwhile.
*/
initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path,
sjinfo, semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
semifactors,
outer_path,
inner_path,
restrict_clauses,
pathkeys,
required_outer));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
/*
* try_mergejoin_path
* Consider a merge join path; if it appears useful, push it into
* the joinrel's pathlist via add_path().
*/
static void
try_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
Relids param_source_rels,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys)
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if
* the parameterization wouldn't be sensible.
*/
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* If the given paths are already well enough ordered, we can skip doing
* an explicit sort.
*/
if (outersortkeys &&
pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
outersortkeys = NIL;
if (innersortkeys &&
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
/*
* See comments in try_nestloop_path().
*/
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path,
outersortkeys, innersortkeys,
sjinfo);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
outer_path,
inner_path,
restrict_clauses,
pathkeys,
required_outer,
mergeclauses,
outersortkeys,
innersortkeys));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
/*
* try_hashjoin_path
* Consider a hash join path; if it appears useful, push it into
* the joinrel's pathlist via add_path().
*/
static void
try_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *hashclauses)
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if
* the parameterization wouldn't be sensible.
*/
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* See comments in try_nestloop_path(). Also note that hashjoin paths
* never have any output pathkeys, per comments in create_hashjoin_path.
*/
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path,
sjinfo, semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
NIL, required_outer))
{
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
semifactors,
outer_path,
inner_path,
restrict_clauses,
required_outer,
hashclauses));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
/*
@ -187,6 +461,7 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
* 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
sort_inner_and_outer(PlannerInfo *root,
@ -196,7 +471,8 @@ sort_inner_and_outer(PlannerInfo *root,
List *restrictlist,
List *mergeclause_list,
JoinType jointype,
SpecialJoinInfo *sjinfo)
SpecialJoinInfo *sjinfo,
Relids param_source_rels)
{
Path *outer_path;
Path *inner_path;
@ -209,6 +485,13 @@ sort_inner_and_outer(PlannerInfo *root,
* cheapest-startup-cost input paths later, and only if they don't need a
* sort.
*
* This function intentionally does not consider parameterized input paths
* (implicit in the fact that it only looks at cheapest_total_path, which
* is always unparameterized). If we did so, we'd have a combinatorial
* explosion of mergejoin paths of dubious value. This interacts with
* decisions elsewhere that also discriminate against mergejoins with
* parameterized inputs; see comments in src/backend/optimizer/README.
*
* If unique-ification is requested, do it and then handle as a plain
* inner join.
*/
@ -299,21 +582,21 @@ sort_inner_and_outer(PlannerInfo *root,
* And now we can make the path.
*
* Note: it's possible that the cheapest paths will already be sorted
* properly. create_mergejoin_path will detect that case and suppress
* properly. try_mergejoin_path will detect that case and suppress
* an explicit sort step, so we needn't do so here.
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outer_path,
inner_path,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
outerkeys,
innerkeys));
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outer_path,
inner_path,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
outerkeys,
innerkeys);
}
}
@ -350,6 +633,8 @@ sort_inner_and_outer(PlannerInfo *root,
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
match_unsorted_outer(PlannerInfo *root,
@ -359,17 +644,16 @@ match_unsorted_outer(PlannerInfo *root,
List *restrictlist,
List *mergeclause_list,
JoinType jointype,
SpecialJoinInfo *sjinfo)
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels)
{
JoinType save_jointype = jointype;
bool nestjoinOK;
bool useallclauses;
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
Path *index_cheapest_startup = NULL;
Path *index_cheapest_total = NULL;
ListCell *l;
ListCell *lc1;
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
@ -408,14 +692,13 @@ match_unsorted_outer(PlannerInfo *root,
/*
* If we need to unique-ify the inner path, we will consider only the
* cheapest inner.
* cheapest-total inner.
*/
if (save_jointype == JOIN_UNIQUE_INNER)
{
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
inner_cheapest_startup = inner_cheapest_total;
}
else if (nestjoinOK)
{
@ -428,28 +711,11 @@ match_unsorted_outer(PlannerInfo *root,
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
matpath = (Path *)
create_material_path(innerrel, inner_cheapest_total);
/*
* Get the best innerjoin indexpaths (if any) for this outer rel.
* They're the same for all outer paths.
*/
if (innerrel->reloptkind != RELOPT_JOINREL)
{
if (IsA(inner_cheapest_total, AppendPath))
index_cheapest_total = best_appendrel_indexscan(root,
innerrel,
outerrel,
jointype);
else if (innerrel->rtekind == RTE_RELATION)
best_inner_indexscan(root, innerrel, outerrel, jointype,
&index_cheapest_startup,
&index_cheapest_total);
}
}
foreach(l, outerrel->pathlist)
foreach(lc1, outerrel->pathlist)
{
Path *outerpath = (Path *) lfirst(l);
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
List *mergeclauses;
List *innersortkeys;
@ -459,9 +725,16 @@ match_unsorted_outer(PlannerInfo *root,
int num_sortkeys;
int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
*/
if (bms_overlap(outerpath->required_outer, innerrel->relids))
continue;
/*
* If we need to unique-ify the outer path, it's pointless to consider
* any but the cheapest outer.
* any but the cheapest outer. (XXX we don't consider parameterized
* outers, nor inners, for unique-ified cases. Should we?)
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
@ -480,65 +753,61 @@ match_unsorted_outer(PlannerInfo *root,
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
if (nestjoinOK)
if (save_jointype == JOIN_UNIQUE_INNER)
{
/*
* Always consider a nestloop join with this outer and
* cheapest-total-cost inner. When appropriate, also consider
* using the materialized form of the cheapest inner, the
* cheapest-startup-cost inner path, and the cheapest innerjoin
* indexpaths.
* Consider nestloop join, but only with the unique-ified cheapest
* inner path
*/
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys));
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys);
}
else if (nestjoinOK)
{
/*
* Consider nestloop joins using this outer path and various
* available paths for the inner relation. We consider the
* cheapest-total paths for each available parameterization of
* the inner relation, including the unparameterized case.
*/
ListCell *lc2;
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys);
}
/* Also consider materialized form of the cheapest inner path */
if (matpath != NULL)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
matpath,
restrictlist,
merge_pathkeys));
if (inner_cheapest_startup != inner_cheapest_total)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_startup,
restrictlist,
merge_pathkeys));
if (index_cheapest_total != NULL)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
index_cheapest_total,
restrictlist,
merge_pathkeys));
if (index_cheapest_startup != NULL &&
index_cheapest_startup != index_cheapest_total)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
index_cheapest_startup,
restrictlist,
merge_pathkeys));
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
matpath,
restrictlist,
merge_pathkeys);
}
/* Can't do anything else if outer path needs to be unique'd */
@ -578,21 +847,21 @@ match_unsorted_outer(PlannerInfo *root,
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
* Since a sort will be needed, only cheapest total cost matters. (But
* create_mergejoin_path will do the right thing if
* try_mergejoin_path will do the right thing if
* inner_cheapest_total is already correctly sorted.)
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys));
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
@ -604,6 +873,11 @@ match_unsorted_outer(PlannerInfo *root,
* mergejoin using a subset of the merge clauses. Here, we consider
* both cheap startup cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here.
* This interacts with decisions elsewhere that also discriminate
* against mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that
* are strictly cheaper than (in particular, not the same as) any path
* found in an earlier iteration. Otherwise we'd be intentionally
@ -654,6 +928,7 @@ match_unsorted_outer(PlannerInfo *root,
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
@ -673,23 +948,24 @@ match_unsorted_outer(PlannerInfo *root,
}
else
newclauses = mergeclauses;
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL));
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
@ -717,18 +993,18 @@ match_unsorted_outer(PlannerInfo *root,
else
newclauses = mergeclauses;
}
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL));
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL);
}
cheapest_startup_inner = innerpath;
}
@ -754,6 +1030,8 @@ match_unsorted_outer(PlannerInfo *root,
* clauses that apply to this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
hash_inner_and_outer(PlannerInfo *root,
@ -762,15 +1040,17 @@ hash_inner_and_outer(PlannerInfo *root,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype,
SpecialJoinInfo *sjinfo)
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels)
{
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
ListCell *l;
/*
* We need to build only one hashpath for any given pair of outer and
* inner relations; all of the hashable clauses will be used as keys.
* We need to build only one hashclauses list for any given pair of outer
* and inner relations; all of the hashable clauses will be used as keys.
*
* Scan the join's restrictinfo list to find hashjoinable clauses that are
* usable with this pair of sub-relations.
@ -800,7 +1080,7 @@ hash_inner_and_outer(PlannerInfo *root,
hashclauses = lappend(hashclauses, restrictinfo);
}
/* If we found any usable hashclauses, make a path */
/* If we found any usable hashclauses, make paths */
if (hashclauses)
{
/*
@ -812,15 +1092,25 @@ hash_inner_and_outer(PlannerInfo *root,
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
/* Unique-ify if need be */
/* Unique-ify if need be; we ignore parameterized possibilities */
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel,
cheapest_total_outer, sjinfo);
Assert(cheapest_total_outer);
cheapest_startup_outer = cheapest_total_outer;
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
/* no possibility of cheap startup here */
}
else if (jointype == JOIN_UNIQUE_INNER)
{
@ -829,99 +1119,94 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_total_inner, sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
if (cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
}
else
{
/*
* For other jointypes, we consider the cheapest startup outer
* together with the cheapest total inner, and then consider
* pairings of cheapest-total paths including parameterized ones.
* There is no use in generating parameterized paths on the basis
* of possibly cheap startup cost, so this is sufficient.
*/
ListCell *lc1;
ListCell *lc2;
add_path(joinrel, (Path *)
create_hashjoin_path(root,
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
foreach(lc1, outerrel->cheapest_parameterized_paths)
{
Path *outerpath = (Path *) lfirst(lc1);
/*
* We cannot use an outer path that is parameterized by the
* inner rel.
*/
if (bms_overlap(outerpath->required_outer, innerrel->relids))
continue;
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
/*
* We cannot use an inner path that is parameterized by
* the outer rel, either.
*/
if (bms_overlap(innerpath->required_outer,
outerrel->relids))
continue;
if (outerpath == cheapest_startup_outer &&
innerpath == cheapest_total_inner)
continue; /* already tried it */
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
cheapest_total_outer,
cheapest_total_inner,
semifactors,
param_source_rels,
outerpath,
innerpath,
restrictlist,
hashclauses));
if (cheapest_startup_outer != cheapest_total_outer)
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses));
hashclauses);
}
}
}
}
}
/*
* best_appendrel_indexscan
* Finds the best available set of inner indexscans for a nestloop join
* with the given append relation on the inside and the given outer_rel
* outside. Returns an AppendPath comprising the best inner scans, or
* NULL if there are no possible inner indexscans.
*
* Note that we currently consider only cheapest-total-cost. It's not
* very clear what cheapest-startup-cost might mean for an AppendPath.
*/
static Path *
best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype)
{
int parentRTindex = rel->relid;
List *append_paths = NIL;
bool found_indexscan = false;
ListCell *l;
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RelOptInfo *childrel;
Path *index_cheapest_startup;
Path *index_cheapest_total;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
continue;
childRTindex = appinfo->child_relid;
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
/*
* Check to see if child was rejected by constraint exclusion. If so,
* it will have a cheapest_total_path that's a "dummy" path.
*/
if (IS_DUMMY_PATH(childrel->cheapest_total_path))
continue; /* OK, we can ignore it */
/*
* Get the best innerjoin indexpaths (if any) for this child rel.
*/
best_inner_indexscan(root, childrel, outer_rel, jointype,
&index_cheapest_startup, &index_cheapest_total);
/*
* If no luck on an indexpath for this rel, we'll still consider an
* Append substituting the cheapest-total inner path. However we must
* find at least one indexpath, else there's not going to be any
* improvement over the base path for the appendrel.
*/
if (index_cheapest_total)
found_indexscan = true;
else
index_cheapest_total = childrel->cheapest_total_path;
append_paths = lappend(append_paths, index_cheapest_total);
}
if (!found_indexscan)
return NULL;
/* Form and return the completed Append path. */
return (Path *) create_append_path(rel, append_paths);
}
/*
* select_mergejoin_clauses
* Select mergejoin clauses that are usable for a particular join.

View File

@ -935,14 +935,11 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
/*
* is_dummy_rel --- has relation been proven empty?
*
* If so, it will have a single path that is dummy.
*/
static bool
is_dummy_rel(RelOptInfo *rel)
{
return (rel->cheapest_total_path != NULL &&
IS_DUMMY_PATH(rel->cheapest_total_path));
return IS_DUMMY_REL(rel);
}
/*
@ -981,7 +978,7 @@ mark_dummy_rel(RelOptInfo *rel)
/* Set up the dummy path */
add_path(rel, (Path *) create_append_path(rel, NIL));
/* Set or update cheapest_total_path */
/* Set or update cheapest_total_path and related fields */
set_cheapest(rel);
MemoryContextSwitchTo(oldcontext);

View File

@ -88,6 +88,10 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
orig_selec;
ListCell *i;
/* Skip the whole mess if no indexes */
if (rel->indexlist == NIL)
return false;
/*
* Find potentially interesting OR joinclauses.
*
@ -114,8 +118,8 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
* Use the generate_bitmap_or_paths() machinery to estimate the
* value of each OR clause. We can use regular restriction
* clauses along with the OR clause contents to generate
* indexquals. We pass outer_rel = NULL so that sub-clauses that
* are actually joins will be ignored.
* indexquals. We pass restriction_only = true so that any
* sub-clauses that are actually joins will be ignored.
*/
List *orpaths;
ListCell *k;
@ -123,7 +127,7 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
orpaths = generate_bitmap_or_paths(root, rel,
list_make1(rinfo),
rel->baserestrictinfo,
NULL);
true);
/* Locate the cheapest OR path */
foreach(k, orpaths)

View File

@ -403,14 +403,17 @@ pathkeys_contained_in(List *keys1, List *keys2)
/*
* get_cheapest_path_for_pathkeys
* Find the cheapest path (according to the specified criterion) that
* satisfies the given pathkeys. Return NULL if no such path.
* satisfies the given pathkeys and parameterization.
* Return NULL if no such path.
*
* 'paths' is a list of possible paths that all generate the same relation
* 'pathkeys' represents a required ordering (already canonicalized!)
* 'required_outer' denotes allowable outer relations for parameterized paths
* 'cost_criterion' is STARTUP_COST or TOTAL_COST
*/
Path *
get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion)
{
Path *matched_path = NULL;
@ -428,7 +431,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
compare_path_costs(matched_path, path, cost_criterion) <= 0)
continue;
if (pathkeys_contained_in(pathkeys, path->pathkeys))
if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
bms_is_subset(path->required_outer, required_outer))
matched_path = path;
}
return matched_path;
@ -437,7 +441,7 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
/*
* get_cheapest_fractional_path_for_pathkeys
* Find the cheapest path (for retrieving a specified fraction of all
* the tuples) that satisfies the given pathkeys.
* the tuples) that satisfies the given pathkeys and parameterization.
* Return NULL if no such path.
*
* See compare_fractional_path_costs() for the interpretation of the fraction
@ -445,11 +449,13 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
*
* 'paths' is a list of possible paths that all generate the same relation
* 'pathkeys' represents a required ordering (already canonicalized!)
* 'required_outer' denotes allowable outer relations for parameterized paths
* 'fraction' is the fraction of the total tuples expected to be retrieved
*/
Path *
get_cheapest_fractional_path_for_pathkeys(List *paths,
List *pathkeys,
Relids required_outer,
double fraction)
{
Path *matched_path = NULL;
@ -461,13 +467,14 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
/*
* Since cost comparison is a lot cheaper than pathkey comparison, do
* that first.
* that first. (XXX is that still true?)
*/
if (matched_path != NULL &&
compare_fractional_path_costs(matched_path, path, fraction) <= 0)
continue;
if (pathkeys_contained_in(pathkeys, path->pathkeys))
if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
bms_is_subset(path->required_outer, required_outer))
matched_path = path;
}
return matched_path;

View File

@ -932,7 +932,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
long numGroups;
Oid *groupOperators;
numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
numGroups = (long) Min(best_path->path.rows, (double) LONG_MAX);
/*
* Get the hashable equality operators for the Agg node to use.
@ -1018,7 +1018,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
}
/* Adjust output size estimate (other fields should be OK already) */
plan->plan_rows = best_path->rows;
plan->plan_rows = best_path->path.rows;
return plan;
}
@ -1112,7 +1112,7 @@ create_indexscan_plan(PlannerInfo *root,
fixed_indexorderbys = fix_indexorderby_references(root, best_path);
/*
* If this is an innerjoin scan, the indexclauses will contain join
* If this is a parameterized scan, the indexclauses will contain join
* clauses that are not present in scan_clauses (since the passed-in value
* is just the rel's baserestrictinfo list). We must add these clauses to
* scan_clauses to ensure they get checked. In most cases we will remove
@ -1122,7 +1122,7 @@ create_indexscan_plan(PlannerInfo *root,
* Note: pointer comparison should be enough to determine RestrictInfo
* matches.
*/
if (best_path->isjoininner)
if (best_path->path.required_outer)
scan_clauses = list_union_ptr(scan_clauses, best_path->indexclauses);
/*
@ -1189,7 +1189,7 @@ create_indexscan_plan(PlannerInfo *root,
* it'd break the comparisons to predicates above ... (or would it? Those
* wouldn't have outer refs)
*/
if (best_path->isjoininner)
if (best_path->path.required_outer)
{
stripped_indexquals = (List *)
replace_nestloop_params(root, (Node *) stripped_indexquals);
@ -1221,8 +1221,6 @@ create_indexscan_plan(PlannerInfo *root,
best_path->indexscandir);
copy_path_costsize(&scan_plan->plan, &best_path->path);
/* use the indexscan-specific rows estimate, not the parent rel's */
scan_plan->plan.plan_rows = best_path->rows;
return scan_plan;
}
@ -1258,14 +1256,14 @@ create_bitmap_scan_plan(PlannerInfo *root,
scan_clauses = extract_actual_clauses(scan_clauses, false);
/*
* If this is a innerjoin scan, the indexclauses will contain join clauses
* If this is a parameterized scan, the indexclauses will contain join clauses
* that are not present in scan_clauses (since the passed-in value is just
* the rel's baserestrictinfo list). We must add these clauses to
* scan_clauses to ensure they get checked. In most cases we will remove
* the join clauses again below, but if a join clause contains a special
* operator, we need to make sure it gets into the scan_clauses.
*/
if (best_path->isjoininner)
if (best_path->path.required_outer)
{
scan_clauses = list_concat_unique(scan_clauses, bitmapqualorig);
}
@ -1328,8 +1326,6 @@ create_bitmap_scan_plan(PlannerInfo *root,
baserelid);
copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
/* use the indexscan-specific rows estimate, not the parent rel's */
scan_plan->scan.plan.plan_rows = best_path->rows;
return scan_plan;
}
@ -1510,7 +1506,7 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
* Replace outer-relation variables with nestloop params, but only
* after doing the above comparisons to index predicates.
*/
if (ipath->isjoininner)
if (ipath->path.required_outer)
{
*qual = (List *)
replace_nestloop_params(root, (Node *) *qual);
@ -1883,14 +1879,13 @@ create_nestloop_plan(PlannerInfo *root,
ListCell *next;
/*
* If the inner path is a nestloop inner indexscan, it might be using some
* of the join quals as index quals, in which case we don't have to check
* them again at the join node. Remove any join quals that are redundant.
* If the inner path is parameterized, it might have already used some of
* the join quals, in which case we don't have to check them again at the
* join node. Remove any join quals that are redundant.
*/
joinrestrictclauses =
select_nonredundant_join_clauses(root,
joinrestrictclauses,
best_path->innerjoinpath);
select_nonredundant_join_clauses(joinrestrictclauses,
best_path->innerjoinpath->param_clauses);
/* Sort join qual clauses into best execution order */
joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
@ -2054,7 +2049,7 @@ create_mergejoin_plan(PlannerInfo *root,
/*
* We assume the materialize will not spill to disk, and therefore
* charge just cpu_operator_cost per tuple. (Keep this estimate in
* sync with cost_mergejoin.)
* sync with final_cost_mergejoin.)
*/
copy_plan_costsize(matplan, inner_plan);
matplan->total_cost += cpu_operator_cost * matplan->plan_rows;
@ -2885,7 +2880,7 @@ copy_path_costsize(Plan *dest, Path *src)
{
dest->startup_cost = src->startup_cost;
dest->total_cost = src->total_cost;
dest->plan_rows = src->parent->rows;
dest->plan_rows = src->rows;
dest->plan_width = src->parent->width;
}
else

View File

@ -375,6 +375,7 @@ query_planner(PlannerInfo *root, List *tlist,
sortedpath =
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
root->query_pathkeys,
NULL,
tuple_fraction);
/* Don't return same path in both guises; just wastes effort */

View File

@ -3297,7 +3297,8 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false, NULL);
ForwardScanDirection, false,
NULL, 1.0);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}

File diff suppressed because it is too large Load Diff

View File

@ -103,6 +103,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->cheapest_startup_path = NULL;
rel->cheapest_total_path = NULL;
rel->cheapest_unique_path = NULL;
rel->cheapest_parameterized_paths = NIL;
rel->relid = relid;
rel->rtekind = rte->rtekind;
/* min_attr, max_attr, attr_needed, attr_widths are set below */
@ -117,8 +118,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->baserestrictcost.per_tuple = 0;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
rel->index_outer_relids = NULL;
rel->index_inner_paths = NIL;
/* Check type of rtable entry */
switch (rte->rtekind)
@ -354,6 +353,7 @@ build_join_rel(PlannerInfo *root,
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
joinrel->cheapest_unique_path = NULL;
joinrel->cheapest_parameterized_paths = NIL;
joinrel->relid = 0; /* indicates not a baserel */
joinrel->rtekind = RTE_JOIN;
joinrel->min_attr = 0;
@ -371,8 +371,6 @@ build_join_rel(PlannerInfo *root,
joinrel->baserestrictcost.per_tuple = 0;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
joinrel->index_outer_relids = NULL;
joinrel->index_inner_paths = NIL;
/*
* Create a new tlist containing just the vars that need to be output from

View File

@ -33,8 +33,6 @@ static Expr *make_sub_restrictinfos(Expr *clause,
bool pseudoconstant,
Relids required_relids,
Relids nullable_relids);
static List *select_nonredundant_join_list(List *restrictinfo_list,
List *reference_list);
static bool join_clause_is_redundant(RestrictInfo *rinfo,
List *reference_list);
@ -623,11 +621,14 @@ extract_actual_join_clauses(List *restrictinfo_list,
/*
* select_nonredundant_join_clauses
* Select the members of restrictinfo_list that are not redundant with
* any member of reference_list.
*
* Given a list of RestrictInfo clauses that are to be applied in a join,
* select the ones that are not redundant with any clause that's enforced
* by the inner_path. This is used for nestloop joins, wherein any clause
* being used in an inner indexscan need not be checked again at the join.
* select the ones that are not redundant with any clause that's listed in
* reference_list. This is used, for example, to avoid checking joinclauses
* again at a nestloop join when they've already been enforced by a
* parameterized inner path.
*
* "Redundant" means either equal() or derived from the same EquivalenceClass.
* We have to check the latter because indxpath.c may select different derived
@ -637,78 +638,16 @@ extract_actual_join_clauses(List *restrictinfo_list,
* restrictinfo_list; that should have been handled elsewhere.
*/
List *
select_nonredundant_join_clauses(PlannerInfo *root,
List *restrictinfo_list,
Path *inner_path)
{
if (IsA(inner_path, IndexPath))
{
/*
* Check the index quals to see if any of them are join clauses.
*
* We can skip this if the index path is an ordinary indexpath and not
* a special innerjoin path, since it then wouldn't be using any join
* clauses.
*/
IndexPath *innerpath = (IndexPath *) inner_path;
if (innerpath->isjoininner)
restrictinfo_list =
select_nonredundant_join_list(restrictinfo_list,
innerpath->indexclauses);
}
else if (IsA(inner_path, BitmapHeapPath))
{
/*
* Same deal for bitmapped index scans.
*
* Note: both here and above, we ignore any implicit index
* restrictions associated with the use of partial indexes. This is
* OK because we're only trying to prove we can dispense with some
* join quals; failing to prove that doesn't result in an incorrect
* plan. It's quite unlikely that a join qual could be proven
* redundant by an index predicate anyway. (Also, if we did manage to
* prove it, we'd have to have a special case for update targets; see
* notes about EvalPlanQual testing in create_indexscan_plan().)
*/
BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
if (innerpath->isjoininner)
{
List *bitmapclauses;
bitmapclauses =
make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
true,
false);
restrictinfo_list =
select_nonredundant_join_list(restrictinfo_list,
bitmapclauses);
}
}
/*
* XXX the inner path of a nestloop could also be an append relation whose
* elements use join quals. However, they might each use different quals;
* we could only remove join quals that are enforced by all the appendrel
* members. For the moment we don't bother to try.
*/
return restrictinfo_list;
}
/*
* select_nonredundant_join_list
* Select the members of restrictinfo_list that are not redundant with
* any member of reference_list. See above for more info.
*/
static List *
select_nonredundant_join_list(List *restrictinfo_list,
List *reference_list)
select_nonredundant_join_clauses(List *restrictinfo_list,
List *reference_list)
{
List *result = NIL;
ListCell *item;
/* Quick out if nothing could be removed */
if (reference_list == NIL)
return restrictinfo_list;
foreach(item, restrictinfo_list)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);

View File

@ -5971,7 +5971,7 @@ string_to_bytea_const(const char *str, size_t str_len)
static void
genericcostestimate(PlannerInfo *root,
IndexPath *path,
RelOptInfo *outer_rel,
double loop_count,
double numIndexTuples,
Cost *indexStartupCost,
Cost *indexTotalCost,
@ -6119,16 +6119,8 @@ genericcostestimate(PlannerInfo *root,
* Note that we are counting pages not tuples anymore, so we take N = T =
* index size, as if there were one "tuple" per page.
*/
if (outer_rel != NULL && outer_rel->rows > 1)
{
num_outer_scans = outer_rel->rows;
num_scans = num_sa_scans * num_outer_scans;
}
else
{
num_outer_scans = 1;
num_scans = num_sa_scans;
}
num_outer_scans = loop_count;
num_scans = num_sa_scans * num_outer_scans;
if (num_scans > 1)
{
@ -6234,7 +6226,7 @@ btcostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
@ -6410,7 +6402,7 @@ btcostestimate(PG_FUNCTION_ARGS)
numIndexTuples = rint(numIndexTuples / num_sa_scans);
}
genericcostestimate(root, path, outer_rel,
genericcostestimate(root, path, loop_count,
numIndexTuples,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
@ -6527,13 +6519,13 @@ hashcostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
genericcostestimate(root, path, outer_rel, 0.0,
genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
@ -6545,13 +6537,13 @@ gistcostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
genericcostestimate(root, path, outer_rel, 0.0,
genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
@ -6563,13 +6555,13 @@ spgcostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
genericcostestimate(root, path, outer_rel, 0.0,
genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
@ -6884,7 +6876,7 @@ gincostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
@ -7051,10 +7043,7 @@ gincostestimate(PG_FUNCTION_ARGS)
}
/* Will we have more than one iteration of a nestloop scan? */
if (outer_rel != NULL && outer_rel->rows > 1)
outer_scans = outer_rel->rows;
else
outer_scans = 1;
outer_scans = loop_count;
/*
* Compute cost to begin scan, first of all, pay attention to pending list.

View File

@ -36,6 +36,15 @@ typedef struct Bitmapset
} Bitmapset; /* VARIABLE LENGTH STRUCT */
/* result of bms_subset_compare */
typedef enum
{
BMS_EQUAL, /* sets are equal */
BMS_SUBSET1, /* first set is a subset of the second */
BMS_SUBSET2, /* second set is a subset of the first */
BMS_DIFFERENT /* neither set is a subset of the other */
} BMS_Comparison;
/* result of bms_membership */
typedef enum
{
@ -58,6 +67,7 @@ extern Bitmapset *bms_union(const Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);

View File

@ -232,7 +232,6 @@ typedef enum NodeTag
T_EquivalenceMember,
T_PathKey,
T_RestrictInfo,
T_InnerIndexscanInfo,
T_PlaceHolderVar,
T_SpecialJoinInfo,
T_AppendRelInfo,

View File

@ -145,6 +145,13 @@ typedef struct PlannerInfo
*/
RangeTblEntry **simple_rte_array; /* rangetable as an array */
/*
* all_baserels is a Relids set of all base relids (but not "other"
* relids) in the query; that is, the Relids identifier of the final
* join we need to form.
*/
Relids all_baserels;
/*
* join_rel_list is a list of all join-relation RelOptInfos we have
* considered in this planning run. For small problems we just scan the
@ -298,11 +305,16 @@ typedef struct PlannerInfo
* pathlist - List of Path nodes, one for each potentially useful
* method of generating the relation
* cheapest_startup_path - the pathlist member with lowest startup cost
* (regardless of its ordering)
* (regardless of its ordering; but must be
* unparameterized)
* cheapest_total_path - the pathlist member with lowest total cost
* (regardless of its ordering)
* (regardless of its ordering; but must be
* unparameterized)
* cheapest_unique_path - for caching cheapest path to produce unique
* (no duplicates) output from relation
* cheapest_parameterized_paths - paths with cheapest total costs for
* their parameterizations; always includes
* cheapest_total_path
*
* If the relation is a base relation it will have these fields set:
*
@ -343,11 +355,6 @@ typedef struct PlannerInfo
* note this excludes clauses that might be derivable from
* EquivalenceClasses)
* has_eclass_joins - flag that EquivalenceClass joins are possible
* index_outer_relids - only used for base rels; set of outer relids
* that participate in indexable joinclauses for this rel
* index_inner_paths - only used for base rels; list of InnerIndexscanInfo
* nodes showing best indexpaths for various subsets of
* index_outer_relids.
*
* Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
* base rels, because for a join rel the set of clauses that are treated as
@ -393,6 +400,7 @@ typedef struct RelOptInfo
struct Path *cheapest_startup_path;
struct Path *cheapest_total_path;
struct Path *cheapest_unique_path;
List *cheapest_parameterized_paths;
/* information about a base rel (not set for join rels!) */
Index relid;
@ -416,18 +424,6 @@ typedef struct RelOptInfo
List *joininfo; /* RestrictInfo structures for join clauses
* involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */
/* cached info about inner indexscan paths for relation: */
Relids index_outer_relids; /* other relids in indexable join
* clauses */
List *index_inner_paths; /* InnerIndexscanInfo nodes */
/*
* Inner indexscans are not in the main pathlist because they are not
* usable except in specific join contexts. We use the index_inner_paths
* list just to avoid recomputing the best inner indexscan repeatedly for
* similar outer relations. See comments for InnerIndexscanInfo.
*/
} RelOptInfo;
/*
@ -609,7 +605,6 @@ typedef struct EquivalenceMember
* BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable
* index types will use btree-compatible strategy numbers.
*/
typedef struct PathKey
{
NodeTag type;
@ -625,12 +620,31 @@ typedef struct PathKey
* simple plan types that we don't need any extra information in the path for.
* For other path types it is the first component of a larger struct.
*
* Note: "pathtype" is the NodeTag of the Plan node we could build from this
* Path. It is partially redundant with the Path's NodeTag, but allows us
* to use the same Path type for multiple Plan types where there is no need
* to distinguish the Plan type during path processing.
* "pathtype" is the NodeTag of the Plan node we could build from this Path.
* It is partially redundant with the Path's NodeTag, but allows us to use
* the same Path type for multiple Plan types when there is no need to
* distinguish the Plan type during path processing.
*
* "rows" is the same as parent->rows in simple paths, but in parameterized
* paths and UniquePaths it can be less than parent->rows, reflecting the
* fact that we've filtered by extra join conditions or removed duplicates.
*
* "pathkeys" is a List of PathKey nodes (see above), describing the sort
* ordering of the path's output rows.
*
* "required_outer", if not NULL, contains the relids of one or more relations
* that must provide parameter values to each scan of this path, because the
* path relies on join clauses using those rels. That means this path can only
* be joined to those rels by means of nestloop joins with this path on the
* inside. Note: for a normal unparameterized path, required_outer must be
* NULL, not an empty-but-not-null Bitmapset.
*
* "param_clauses" is a List of RestrictInfo nodes, containing the join
* clauses used by a parameterized path. Ideally param_clauses should be NIL
* if and only if required_outer is NULL. XXX for the moment, however, we do
* not compute param_clauses for Append and MergeAppend paths, so the list
* is inaccurate in those paths and possibly paths above them.
*/
typedef struct Path
{
NodeTag type;
@ -639,12 +653,15 @@ typedef struct Path
RelOptInfo *parent; /* the relation this path can build */
/* estimated execution costs for path (see costsize.c for more info) */
/* estimated size/costs for path (see costsize.c for more info) */
double rows; /* estimated number of result tuples */
Cost startup_cost; /* cost expended before fetching any tuples */
Cost total_cost; /* total cost (assuming all tuples fetched) */
List *pathkeys; /* sort ordering of path's output */
/* pathkeys is a List of PathKey nodes; see above */
Relids required_outer; /* rels supplying parameters used by path */
List *param_clauses; /* join clauses that use such parameters */
} Path;
/*----------
@ -685,12 +702,6 @@ typedef struct Path
* ORDER BY expression is meant to be used with. (There is no restriction
* on which index column each ORDER BY can be used with.)
*
* 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
* some of the index conditions are join rather than restriction clauses).
* Note that the path costs will be calculated differently from a plain
* indexscan in this case, and in addition there's a special 'rows' value
* different from the parent RelOptInfo's (see below).
*
* 'indexscandir' is one of:
* ForwardScanDirection: forward scan of an ordered index
* BackwardScanDirection: backward scan of an ordered index
@ -703,12 +714,6 @@ typedef struct Path
* we need not recompute them when considering using the same index in a
* bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath
* itself represent the costs of an IndexScan or IndexOnlyScan plan type.
*
* 'rows' is the estimated result tuple count for the indexscan. This
* is the same as path.parent->rows for a simple indexscan, but it is
* different for a nestloop inner scan, because the additional indexquals
* coming from join clauses make the scan more selective than the parent
* rel's restrict clauses alone would do.
*----------
*/
typedef struct IndexPath
@ -720,11 +725,9 @@ typedef struct IndexPath
List *indexqualcols;
List *indexorderbys;
List *indexorderbycols;
bool isjoininner;
ScanDirection indexscandir;
Cost indextotalcost;
Selectivity indexselectivity;
double rows; /* estimated number of result tuples */
} IndexPath;
/*
@ -743,16 +746,11 @@ typedef struct IndexPath
* always represent the costs to use it as a regular (or index-only)
* IndexScan. The costs of a BitmapIndexScan can be computed using the
* IndexPath's indextotalcost and indexselectivity.
*
* BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and
* rows fields serve the same purpose as for plain IndexPaths.
*/
typedef struct BitmapHeapPath
{
Path path;
Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */
bool isjoininner; /* T if it's a nestloop inner scan */
double rows; /* estimated number of result tuples */
} BitmapHeapPath;
/*
@ -822,6 +820,11 @@ typedef struct AppendPath
#define IS_DUMMY_PATH(p) \
(IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
/* A relation that's been proven empty will have one path that is dummy */
#define IS_DUMMY_REL(r) \
((r)->cheapest_total_path != NULL && \
IS_DUMMY_PATH((r)->cheapest_total_path))
/*
* MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
* results from several member plans to produce similarly-sorted output.
@ -885,7 +888,6 @@ typedef struct UniquePath
UniquePathMethod umethod;
List *in_operators; /* equality operators of the IN clause */
List *uniq_exprs; /* expressions to be made unique */
double rows; /* estimated number of result tuples */
} UniquePath;
/*
@ -1172,42 +1174,6 @@ typedef struct MergeScanSelCache
Selectivity rightendsel; /* last-join fraction for clause right side */
} MergeScanSelCache;
/*
* Inner indexscan info.
*
* An inner indexscan is one that uses one or more joinclauses as index
* conditions (perhaps in addition to plain restriction clauses). So it
* can only be used as the inner path of a nestloop join where the outer
* relation includes all other relids appearing in those joinclauses.
* The set of usable joinclauses, and thus the best inner indexscan,
* thus varies depending on which outer relation we consider; so we have
* to recompute the best such paths for every join. To avoid lots of
* redundant computation, we cache the results of such searches. For
* each relation we compute the set of possible otherrelids (all relids
* appearing in joinquals that could become indexquals for this table).
* Two outer relations whose relids have the same intersection with this
* set will have the same set of available joinclauses and thus the same
* best inner indexscans for the inner relation. By taking the intersection
* before scanning the cache, we avoid recomputing when considering
* join rels that differ only by the inclusion of irrelevant other rels.
*
* The search key also includes a bool showing whether the join being
* considered is an outer join. Since we constrain the join order for
* outer joins, I believe that this bool can only have one possible value
* for any particular lookup key; but store it anyway to avoid confusion.
*/
typedef struct InnerIndexscanInfo
{
NodeTag type;
/* The lookup key: */
Relids other_relids; /* a set of relevant other relids */
bool isouterjoin; /* true if join is outer */
/* Best paths for this lookup key (NULL if no available indexscans): */
Path *cheapest_startup_innerpath; /* cheapest startup cost */
Path *cheapest_total_innerpath; /* cheapest total cost */
} InnerIndexscanInfo;
/*
* Placeholder node for an expression to be evaluated below the top level
* of a plan tree. This is used during planning to represent the contained
@ -1490,4 +1456,64 @@ typedef struct PlannerParamItem
Index abslevel; /* its absolute query level */
} PlannerParamItem;
/*
* When making cost estimates for a SEMI or ANTI join, there are some
* correction factors that are needed in both nestloop and hash joins
* to account for the fact that the executor can stop scanning inner rows
* as soon as it finds a match to the current outer row. These numbers
* depend only on the selected outer and inner join relations, not on the
* particular paths used for them, so it's worthwhile to calculate them
* just once per relation pair not once per considered path. This struct
* is filled by compute_semi_anti_join_factors and must be passed along
* to the join cost estimation functions.
*
* outer_match_frac is the fraction of the outer tuples that are
* expected to have at least one match.
* match_count is the average number of matches expected for
* outer tuples that have at least one match.
*/
typedef struct SemiAntiJoinFactors
{
Selectivity outer_match_frac;
Selectivity match_count;
} SemiAntiJoinFactors;
/*
* For speed reasons, cost estimation for join paths is performed in two
* phases: the first phase tries to quickly derive a lower bound for the
* join cost, and then we check if that's sufficient to reject the path.
* If not, we come back for a more refined cost estimate. The first phase
* fills a JoinCostWorkspace struct with its preliminary cost estimates
* and possibly additional intermediate values. The second phase takes
* these values as inputs to avoid repeating work.
*
* (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
* so seems best to put it here.)
*/
typedef struct JoinCostWorkspace
{
/* Preliminary cost estimates --- must not be larger than final ones! */
Cost startup_cost; /* cost expended before fetching any tuples */
Cost total_cost; /* total cost (assuming all tuples fetched) */
/* Fields below here should be treated as private to costsize.c */
Cost run_cost; /* non-startup cost components */
/* private for cost_nestloop code */
Cost inner_rescan_run_cost;
double outer_matched_rows;
Selectivity inner_scan_frac;
/* private for cost_mergejoin code */
Cost inner_run_cost;
double outer_rows;
double inner_rows;
double outer_skip_rows;
double inner_skip_rows;
/* private for cost_hashjoin code */
int numbuckets;
int numbatches;
} JoinCostWorkspace;
#endif /* RELATION_H */

View File

@ -68,9 +68,9 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
extern void cost_index(IndexPath *path, PlannerInfo *root,
RelOptInfo *outer_rel);
double loop_count);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
Path *bitmapqual, RelOptInfo *outer_rel);
Path *bitmapqual, double loop_count);
extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
@ -107,15 +107,47 @@ extern void cost_group(Path *path, PlannerInfo *root,
int numGroupCols, double numGroups,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples);
extern void cost_nestloop(NestPath *path, PlannerInfo *root,
SpecialJoinInfo *sjinfo);
extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
SpecialJoinInfo *sjinfo);
extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
SpecialJoinInfo *sjinfo);
extern void initial_cost_nestloop(PlannerInfo *root,
JoinCostWorkspace *workspace,
JoinType jointype,
Path *outer_path, Path *inner_path,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors);
extern void final_cost_nestloop(PlannerInfo *root, NestPath *path,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors);
extern void initial_cost_mergejoin(PlannerInfo *root,
JoinCostWorkspace *workspace,
JoinType jointype,
List *mergeclauses,
Path *outer_path, Path *inner_path,
List *outersortkeys, List *innersortkeys,
SpecialJoinInfo *sjinfo);
extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo);
extern void initial_cost_hashjoin(PlannerInfo *root,
JoinCostWorkspace *workspace,
JoinType jointype,
List *hashclauses,
Path *outer_path, Path *inner_path,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors);
extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors);
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
extern void compute_semi_anti_join_factors(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
List *restrictlist,
SemiAntiJoinFactors *semifactors);
extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel,

View File

@ -26,6 +26,9 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2,
double fraction);
extern void set_cheapest(RelOptInfo *parent_rel);
extern void add_path(RelOptInfo *parent_rel, Path *new_path);
extern bool add_path_precheck(RelOptInfo *parent_rel,
Cost startup_cost, Cost total_cost,
List *pathkeys, Relids required_outer);
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel);
extern IndexPath *create_index_path(PlannerInfo *root,
@ -37,11 +40,12 @@ extern IndexPath *create_index_path(PlannerInfo *root,
List *pathkeys,
ScanDirection indexscandir,
bool indexonly,
RelOptInfo *outer_rel);
Relids required_outer,
double loop_count);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
RelOptInfo *outer_rel);
double loop_count);
extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals);
@ -66,23 +70,31 @@ extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel);
extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel);
extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel);
extern Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path);
extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path);
extern NestPath *create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys);
List *pathkeys,
Relids required_outer);
extern MergePath *create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys);
@ -90,10 +102,13 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
extern HashPath *create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
Relids required_outer,
List *hashclauses);
/*

View File

@ -44,17 +44,14 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
*/
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
RelOptInfo *outer_rel);
extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype,
Path **cheapest_startup, Path **cheapest_total);
List *clauses, List *other_clauses,
bool restriction_only);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
List *exprlist, List *oprlist);
extern bool eclass_matches_any_index(EquivalenceClass *ec,
EquivalenceMember *em,
RelOptInfo *rel);
extern bool eclass_member_matches_indexcol(EquivalenceClass *ec,
EquivalenceMember *em,
IndexOptInfo *index, int indexcol);
extern bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
extern void expand_indexqual_conditions(IndexOptInfo *index,
@ -127,9 +124,9 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
extern void mutate_eclass_expressions(PlannerInfo *root,
Node *(*mutator) (),
void *context);
extern List *find_eclass_clauses_for_index_join(PlannerInfo *root,
RelOptInfo *rel,
Relids outer_relids);
extern List *generate_implied_equalities_for_indexcol(PlannerInfo *root,
IndexOptInfo *index,
int indexcol);
extern bool have_relevant_eclass_joinclause(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2);
extern bool has_relevant_eclass_joinclause(PlannerInfo *root,
@ -153,9 +150,11 @@ extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys);
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion);
extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
List *pathkeys,
Relids required_outer,
double fraction);
extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
ScanDirection scandir);

View File

@ -40,8 +40,7 @@ extern List *extract_actual_clauses(List *restrictinfo_list,
extern void extract_actual_join_clauses(List *restrictinfo_list,
List **joinquals,
List **otherquals);
extern List *select_nonredundant_join_clauses(PlannerInfo *root,
List *restrictinfo_list,
Path *inner_path);
extern List *select_nonredundant_join_clauses(List *restrictinfo_list,
List *reference_list);
#endif /* RESTRICTINFO_H */