Repair planning bugs caused by my misguided removal of restrictinfo link

fields in JoinPaths --- turns out that we do need that after all :-(.
Also, rearrange planner so that only one RelOptInfo is created for a
particular set of joined base relations, no matter how many different
subsets of relations it can be created from.  This saves memory and
processing time compared to the old method of making a bunch of RelOptInfos
and then removing the duplicates.  Clean up the jointree iteration logic;
not sure if it's better, but I sure find it more readable and plausible
now, particularly for the case of 'bushy plans'.
This commit is contained in:
Tom Lane 2000-02-07 04:41:04 +00:00
parent 2bda7a4406
commit d8733ce674
24 changed files with 1224 additions and 1106 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.103 2000/01/27 18:11:27 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.104 2000/02/07 04:40:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -977,7 +977,7 @@ _copyRelOptInfo(RelOptInfo *from)
newnode->pages = from->pages;
newnode->tuples = from->tuples;
Node_Copy(from, newnode, restrictinfo);
Node_Copy(from, newnode, baserestrictinfo);
Node_Copy(from, newnode, joininfo);
Node_Copy(from, newnode, innerjoin);
@ -1137,6 +1137,7 @@ CopyJoinPathFields(JoinPath *from, JoinPath *newnode)
{
Node_Copy(from, newnode, outerjoinpath);
Node_Copy(from, newnode, innerjoinpath);
Node_Copy(from, newnode, joinrestrictinfo);
}
/* ----------------

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.58 2000/01/31 01:21:39 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.59 2000/02/07 04:40:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -374,6 +374,8 @@ _equalJoinPath(JoinPath *a, JoinPath *b)
return false;
if (!equal(a->innerjoinpath, b->innerjoinpath))
return false;
if (!equal(a->joinrestrictinfo, b->joinrestrictinfo))
return false;
return true;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.33 2000/01/27 18:11:28 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.34 2000/02/07 04:40:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -735,7 +735,7 @@ _freeRelOptInfo(RelOptInfo *node)
*/
freeObject(node->cheapestpath);
freeObject(node->restrictinfo);
freeObject(node->baserestrictinfo);
freeObject(node->joininfo);
freeObject(node->innerjoin);
@ -853,6 +853,10 @@ FreeJoinPathFields(JoinPath *node)
{
freeObject(node->outerjoinpath);
freeObject(node->innerjoinpath);
/* XXX probably wrong, since ordinarily a JoinPath would share its
* restrictinfo list with other paths made for the same join?
*/
freeObject(node->joinrestrictinfo);
}
/* ----------------

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.105 2000/01/27 18:11:28 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.106 2000/02/07 04:40:57 tgl Exp $
*
* NOTES
* Every (plan) node in POSTGRES has an associated "out" routine which
@ -915,10 +915,10 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
*/
appendStringInfo(str,
" :cheapestpath @ 0x%x :pruneable %s :restrictinfo ",
" :cheapestpath @ 0x%x :pruneable %s :baserestrictinfo ",
(int) node->cheapestpath,
node->pruneable ? "true" : "false");
_outNode(str, node->restrictinfo);
_outNode(str, node->baserestrictinfo);
appendStringInfo(str, " :joininfo ");
_outNode(str, node->joininfo);
@ -1035,16 +1035,12 @@ _outNestPath(StringInfo str, NestPath *node)
node->path.pathtype,
node->path.path_cost);
_outNode(str, node->path.pathkeys);
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*/
appendStringInfo(str,
" :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ",
(int) node->outerjoinpath,
(int) node->innerjoinpath);
appendStringInfo(str, " :outerjoinpath ");
_outNode(str, node->outerjoinpath);
appendStringInfo(str, " :innerjoinpath ");
_outNode(str, node->innerjoinpath);
appendStringInfo(str, " :joinrestrictinfo ");
_outNode(str, node->joinrestrictinfo);
}
/*
@ -1058,16 +1054,12 @@ _outMergePath(StringInfo str, MergePath *node)
node->jpath.path.pathtype,
node->jpath.path.path_cost);
_outNode(str, node->jpath.path.pathkeys);
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*/
appendStringInfo(str,
" :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ",
(int) node->jpath.outerjoinpath,
(int) node->jpath.innerjoinpath);
appendStringInfo(str, " :outerjoinpath ");
_outNode(str, node->jpath.outerjoinpath);
appendStringInfo(str, " :innerjoinpath ");
_outNode(str, node->jpath.innerjoinpath);
appendStringInfo(str, " :joinrestrictinfo ");
_outNode(str, node->jpath.joinrestrictinfo);
appendStringInfo(str, " :path_mergeclauses ");
_outNode(str, node->path_mergeclauses);
@ -1090,16 +1082,12 @@ _outHashPath(StringInfo str, HashPath *node)
node->jpath.path.pathtype,
node->jpath.path.path_cost);
_outNode(str, node->jpath.path.pathkeys);
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*/
appendStringInfo(str,
" :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ",
(int) node->jpath.outerjoinpath,
(int) node->jpath.innerjoinpath);
appendStringInfo(str, " :outerjoinpath ");
_outNode(str, node->jpath.outerjoinpath);
appendStringInfo(str, " :innerjoinpath ");
_outNode(str, node->jpath.innerjoinpath);
appendStringInfo(str, " :joinrestrictinfo ");
_outNode(str, node->jpath.joinrestrictinfo);
appendStringInfo(str, " :path_hashclauses ");
_outNode(str, node->path_hashclauses);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.81 2000/01/27 18:11:28 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.82 2000/02/07 04:40:57 tgl Exp $
*
* NOTES
* Most of the read functions for plan nodes are tested. (In fact, they
@ -1288,8 +1288,8 @@ _readRelOptInfo()
sscanf(token, "%x", (unsigned int *) &local_node->cheapestpath);
token = lsptok(NULL, &length); /* get :restrictinfo */
local_node->restrictinfo = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :baserestrictinfo */
local_node->baserestrictinfo = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :joininfo */
local_node->joininfo = nodeRead(true); /* now read it */
@ -1518,25 +1518,14 @@ _readNestPath()
token = lsptok(NULL, &length); /* get :pathkeys */
local_node->path.pathkeys = nodeRead(true); /* now read it */
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*
* GJK: Since I am parsing this stuff, I'll just ignore the addresses,
* and initialize these pointers to NULL.
*/
token = lsptok(NULL, &length); /* get :outerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->outerjoinpath = NULL;
local_node->outerjoinpath = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :innerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->innerjoinpath = nodeRead(true); /* now read it */
local_node->innerjoinpath = NULL;
token = lsptok(NULL, &length); /* get :joinrestrictinfo */
local_node->joinrestrictinfo = nodeRead(true); /* now read it */
return local_node;
}
@ -1569,25 +1558,14 @@ _readMergePath()
token = lsptok(NULL, &length); /* get :pathkeys */
local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*
* GJK: Since I am parsing this stuff, I'll just ignore the addresses,
* and initialize these pointers to NULL.
*/
token = lsptok(NULL, &length); /* get :outerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->jpath.outerjoinpath = NULL;
local_node->jpath.outerjoinpath = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :innerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->jpath.innerjoinpath = nodeRead(true); /* now read it */
local_node->jpath.innerjoinpath = NULL;
token = lsptok(NULL, &length); /* get :joinrestrictinfo */
local_node->jpath.joinrestrictinfo = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :path_mergeclauses */
local_node->path_mergeclauses = nodeRead(true); /* now read it */
@ -1629,25 +1607,14 @@ _readHashPath()
token = lsptok(NULL, &length); /* get :pathkeys */
local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */
/*
* Not sure if these are nodes; they're declared as "struct path *".
* For now, i'll just print the addresses.
*
* GJK: Since I am parsing this stuff, I'll just ignore the addresses,
* and initialize these pointers to NULL.
*/
token = lsptok(NULL, &length); /* get :outerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->jpath.outerjoinpath = NULL;
local_node->jpath.outerjoinpath = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :innerjoinpath */
token = lsptok(NULL, &length); /* get @ */
token = lsptok(NULL, &length); /* now read it */
local_node->jpath.innerjoinpath = nodeRead(true); /* now read it */
local_node->jpath.innerjoinpath = NULL;
token = lsptok(NULL, &length); /* get :joinrestrictinfo */
local_node->jpath.joinrestrictinfo = nodeRead(true); /* now read it */
token = lsptok(NULL, &length); /* get :path_hashclauses */
local_node->path_hashclauses = nodeRead(true); /* now read it */

View File

@ -1,6 +1,20 @@
Summary
-------
These directories take the Query structure returned by the parser, and
generate a plan used by the executor. The /plan directory generates the
actual output plan, the /path code generates all possible ways to join the
tables, and /prep handles special cases like inheritance. /util is utility
stuff. /geqo is the separate "genetic optimization" planner --- it does
a semi-random search through the join tree space, rather than exhaustively
considering all possible join trees. (But each join considered by geqo
is given to /path to create paths for, so we consider all possible
implementation paths for each specific join even in GEQO mode.)
Join Tree Construction
----------------------
The optimizer generates optimal query plans by doing a more-or-less
exhaustive search through the ways of executing the query. During
the planning/optimizing process, we build "Path" trees representing
@ -26,8 +40,17 @@ the WHERE clause "tab1.col1 = tab2.col1" generates a JoinInfo for tab1
listing tab2 as an unjoined relation, and also one for tab2 showing tab1
as an unjoined relation.
If we have only a single base relation in the query, we are done here.
Otherwise we have to figure out how to join the base relations into a
single join relation.
2) Consider joining each RelOptInfo to each other RelOptInfo specified in
its RelOptInfo.joininfo, and generate a Path for each possible join method.
(If we have a RelOptInfo with no join clauses, we have no choice but to
generate a clauseless Cartesian-product join; so we consider joining that
rel to each other available rel. But in the presence of join clauses we
will only consider joins that use available join clauses.)
At this stage each input RelOptInfo is a single relation, so we are joining
every relation to the other relations as joined in the WHERE clause. We
generate a new "join" RelOptInfo for each possible combination of two
@ -53,9 +76,17 @@ that represent the scan methods used for the two input relations.
3) If we only had two base relations, we are done: we just pick the
cheapest path for the join RelOptInfo. If we had more than two, we now
need to consider ways of joining join RelOptInfos to each other to make
join RelOptInfos that represent more than two base relations. This process
is repeated until we have finally built a RelOptInfo that represents all
the base relations in the query. Then we pick its cheapest Path.
join RelOptInfos that represent more than two base relations.
The join tree is constructed using a "dynamic programming" algorithm:
in the first pass (already described) we consider ways to create join rels
representing exactly two base relations. The second pass considers ways
to make join rels that represent exactly three base relations; the next pass,
four relations, etc. The last pass considers how to make the final join
relation that includes all base rels --- obviously there can be only one
join rel at this top level, whereas there can be more than one join rel
at lower levels. At each level we use joins that follow available join
clauses, if possible, just as described for the first level.
For example:
@ -69,6 +100,7 @@ For example:
{1 2},{2 3},{3 4}
{1 2 3},{2 3 4}
{1 2 3 4}
(other possibilities will be excluded for lack of join clauses)
SELECT *
FROM tab1, tab2, tab3, tab4
@ -78,56 +110,43 @@ For example:
Tables 1, 2, 3, and 4 are joined as:
{1 2},{1 3},{1 4}
{1 2 3},{1 3 4},{1,2,4}
{1 2 3},{1 3 4},{1 2 4}
{1 2 3 4}
In the default left-handed joins, each RelOptInfo adds one
single-relation RelOptInfo in each join pass, and the added RelOptInfo
is always the inner relation in the join. In right-handed joins, the
added RelOptInfo is the outer relation in the join. In bushy plans,
multi-relation RelOptInfo's can be joined to other multi-relation
RelOptInfo's.
We consider left-handed plans (the outer rel of an upper join is a joinrel,
but the inner is always a base rel); right-handed plans (outer rel is always
a base rel); and bushy plans (both inner and outer can be joins themselves).
For example, when building {1 2 3 4} we consider joining {1 2 3} to {4}
(left-handed), {4} to {1 2 3} (right-handed), and {1 2} to {3 4} (bushy),
among other choices. Although the jointree scanning code produces these
potential join combinations one at a time, all the ways to produce the
same set of joined base rels will share the same RelOptInfo, so the paths
produced from different join combinations that produce equivalent joinrels
will compete in add_pathlist.
Once we have built the final join rel, we use either the cheapest path
for it or the cheapest path with the desired ordering (if that's cheaper
than applying a sort to the cheapest other path).
Optimizer Functions
-------------------
These directories take the Query structure returned by the parser, and
generate a plan used by the executor. The /plan directory generates the
actual output plan, the /path code generates all possible ways to join the
tables, and /prep handles special cases like inheritance. /util is utility
stuff. /geqo is the separate "genetic optimization" planner --- it does
a semi-random search rather than exhaustively considering all possible
join trees.
planner()
handle inheritance by processing separately
-init_query_planner()
preprocess target list
preprocess qualifications(WHERE)
--query_planner()
cnfify()
Summary:
Simple cases with all AND's are handled by removing the AND's:
convert: a = 1 AND b = 2 AND c = 3
to: a = 1, b = 2, c = 3
Qualifications with OR's are handled differently. OR's inside AND
clauses are not modified drastically:
convert: a = 1 AND b = 2 AND (c = 3 OR d = 4)
to: a = 1, b = 2, c = 3 OR d = 4
OR's in the upper level are more complex to handle:
convert: (a = 1 AND b = 2) OR c = 3
to: (a = 1 OR c = 3) AND (b = 2 OR c = 3)
finally: (a = 1 OR c = 3), (b = 2 OR c = 3)
These clauses all have to be true for a result to be returned,
so the optimizer can choose the most restrictive clauses.
simplify constant subexpressions
canonicalize_qual()
Attempt to reduce WHERE clause to either CNF or DNF canonical form.
CNF (top-level-AND) is preferred, since the optimizer can then use
any of the AND subclauses to filter tuples; but quals that are in
or close to DNF form will suffer exponential expansion if we try to
force them to CNF. In pathological cases either transform may expand
the qual unreasonably; so we may have to leave it un-normalized,
thereby reducing the accuracy of selectivity estimates.
pull out constants from target list
get a target list that only contains column names, no expressions
if none, then return
@ -142,20 +161,14 @@ planner()
find selectivity of columns used in joins
-----make_one_rel_by_joins()
jump to geqo if needed
again:
make_rels_by_joins():
for each joinrel:
make_rels_by_clause_joins()
for each rel's joininfo list:
if a join from the join clause adds only one relation, do the join
or make_rels_by_clauseless_joins()
update_rels_pathlist_for_joins()
generate nested,merge,hash join paths for new rel's created above
merge_rels_with_same_relids()
merge RelOptInfo paths that have the same relids because of joins
rels_set_cheapest()
set cheapest path
if all relations in one RelOptInfo, return
else call make_rels_by_joins() for each level of join tree needed
make_rels_by_joins():
For each joinrel of the prior level, do make_rels_by_clause_joins()
if it has join clauses, or make_rels_by_clauseless_joins() if not.
Also generate "bushy plan" joins between joinrels of lower levels.
Back at make_one_rel_by_joins(), apply set_cheapest() to extract the
cheapest path for each newly constructed joinrel.
Loop back if this wasn't the top join level.
do group(GROUP)
do aggregate
put back constants
@ -164,7 +177,6 @@ planner()
make sort(ORDER BY)
Optimizer Data Structures
-------------------------
@ -174,13 +186,13 @@ RelOptInfo - a relation or joined relations
JoinInfo - join clauses, including the relids needed for the join
Path - every way to generate a RelOptInfo(sequential,index,joins)
SeqScan - a plain Path node with nodeTag = T_SeqScan
SeqScan - a plain Path node with nodeTag = T_SeqScan
IndexPath - index scans
NestPath - nested-loop joins
MergePath - merge joins
HashPath - hash joins
PathKeys - a data structure representing the ordering of a path
PathKeys - a data structure representing the ordering of a path
The optimizer spends a good deal of its time worrying about the ordering
of the tuples returned by a path. The reason this is useful is that by
@ -192,21 +204,19 @@ generated during the optimization process are marked with their sort order
(to the extent that it is known) for possible use by a higher-level merge.
It is also possible to avoid an explicit sort step to implement a user's
ORDER BY clause if the final path has the right ordering already.
Currently, this is not very well implemented: we avoid generating a
redundant sort if the chosen path has the desired order, but we do not do
anything to encourage the selection of such a path --- so we only avoid the
sort if the path that would be chosen anyway (because it is cheapest
without regard to its ordering) is properly sorted. The path winnowing
process needs to be aware of the desired output order and account for the
cost of doing an explicit sort while it is choosing the best path.
ORDER BY clause if the final path has the right ordering already, so the
sort ordering is of interest even at the top level. subplanner() will
look for the cheapest path with a sort order matching the desired order,
and will compare its cost to the cost of using the cheapest-overall path
and doing an explicit sort.
When we are generating paths for a particular RelOptInfo, we discard a path
if it is more expensive than another known path that has the same or better
sort order. We will never discard a path that is the only known way to
achieve a given sort order. In this way, the next level up will have the
maximum freedom to build mergejoins without sorting, since it can pick from
any of the paths retained for its inputs.
achieve a given sort order (without an explicit sort, that is). In this
way, the next level up will have the maximum freedom to build mergejoins
without sorting, since it can pick from any of the paths retained for its
inputs.
See path/pathkeys.c for an explanation of the PathKeys data structure that
represents what is known about the sort order of a particular Path.

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_eval.c,v 1.46 2000/01/26 05:56:33 momjian Exp $
* $Id: geqo_eval.c,v 1.47 2000/02/07 04:40:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,6 +32,7 @@
#include "optimizer/cost.h"
#include "optimizer/geqo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "utils/portal.h"
@ -121,7 +122,6 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old
{
RelOptInfo *inner_rel; /* current relation */
int base_rel_index;
List *new_rels;
RelOptInfo *new_rel;
if (rel_count < num_gene)
@ -130,7 +130,7 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old
/* tour[0] = 3; tour[1] = 1; tour[2] = 2 */
base_rel_index = (int) tour[rel_count];
inner_rel = (RelOptInfo *) nth(base_rel_index - 1, root->base_rel_list);
inner_rel = (RelOptInfo *) nth(base_rel_index-1, root->base_rel_list);
if (rel_count == 0)
{ /* processing first join with
@ -140,54 +140,23 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old
}
else
{ /* tree main part */
if (!(new_rels = make_rels_by_clause_joins(root, old_rel,
old_rel->joininfo,
inner_rel->relids)))
List *acceptable_rels = lcons(inner_rel, NIL);
new_rel = make_rels_by_clause_joins(root, old_rel,
acceptable_rels);
if (! new_rel)
{
new_rels = make_rels_by_clauseless_joins(old_rel,
lcons(inner_rel, NIL));
/*
* we don't do bushy plans in geqo, do we? bjm 02/18/1999
* new_rels = append(new_rels,
* make_rels_by_clauseless_joins(old_rel,
* lcons(old_rel,NIL));
*/
new_rel = make_rels_by_clauseless_joins(root, old_rel,
acceptable_rels);
if (! new_rel)
elog(ERROR, "gimme_tree: failed to construct join rel");
}
/* process new_rel->pathlist */
update_rels_pathlist_for_joins(root, new_rels);
/* prune new_rels */
/* MAU: is this necessary? */
/*
* what's the matter if more than one new rel is left till
* now?
*/
/*
* joinrels in newrels with different ordering of relids are
* not possible
*/
if (length(new_rels) > 1)
merge_rels_with_same_relids(new_rels);
if (length(new_rels) > 1)
{ /* should never be reached ... */
elog(DEBUG, "gimme_tree: still %d relations left", length(new_rels));
}
rels_set_cheapest(root, new_rels);
/* get essential new relation */
new_rel = (RelOptInfo *) lfirst(new_rels);
rel_count++;
Assert(length(new_rel->relids) == rel_count);
/* processing of other new_rel attributes */
set_rel_rows_width(root, new_rel);
root->join_rel_list = lcons(new_rel, NIL);
/* Find and save the cheapest path for this rel */
set_cheapest(new_rel, new_rel->pathlist);
return gimme_tree(root, tour, rel_count, num_gene, new_rel);
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_misc.c,v 1.26 2000/01/26 05:56:33 momjian Exp $
* $Id: geqo_misc.c,v 1.27 2000/02/07 04:40:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -188,7 +188,7 @@ geqo_print_path(Query *root, Path *path, int indent)
for (i = 0; i < indent + 1; i++)
printf("\t");
printf(" clauses=(");
geqo_print_joinclauses(root, path->parent->restrictinfo);
geqo_print_joinclauses(root, jp->joinrestrictinfo);
printf(")\n");
if (nodeTag(path) == T_MergePath)

View File

@ -4,7 +4,7 @@
# Makefile for optimizer/path
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Makefile,v 1.12 1999/12/13 22:32:52 momjian Exp $
# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Makefile,v 1.13 2000/02/07 04:40:59 tgl Exp $
#
#-------------------------------------------------------------------------
@ -14,8 +14,7 @@ include ../../../Makefile.global
CFLAGS += -I../..
OBJS = allpaths.o clausesel.o costsize.o indxpath.o \
joinpath.o joinrels.o orindxpath.o pathkeys.o prune.o \
tidpath.o
joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o
all: SUBSYS.o

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.57 2000/01/26 05:56:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.58 2000/02/07 04:40:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -21,6 +21,7 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#ifdef GEQO
bool enable_geqo = true;
#else
@ -30,31 +31,28 @@ bool enable_geqo = false;
int geqo_rels = GEQO_RELS;
static void set_base_rel_pathlist(Query *root, List *rels);
static RelOptInfo *make_one_rel_by_joins(Query *root, List *rels,
int levels_needed);
static void set_base_rel_pathlist(Query *root);
static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed);
#ifdef OPTIMIZER_DEBUG
static void debug_print_rel(Query *root, RelOptInfo *rel);
#endif
/*
* make_one_rel
* Finds all possible access paths for executing a query, returning a
* single rel.
*
* 'rels' is the list of single relation entries appearing in the query
* single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(Query *root, List *rels)
make_one_rel(Query *root)
{
int levels_needed;
/*
* Set the number of join (not nesting) levels yet to be processed.
*/
levels_needed = length(rels);
levels_needed = length(root->base_rel_list);
if (levels_needed <= 0)
return NULL;
@ -62,159 +60,162 @@ make_one_rel(Query *root, List *rels)
/*
* Generate access paths for the base rels.
*/
set_base_rel_pathlist(root, rels);
set_base_rel_pathlist(root);
if (levels_needed <= 1)
if (levels_needed == 1)
{
/*
* Single relation, no more processing is required.
*/
return lfirst(rels);
return (RelOptInfo *) lfirst(root->base_rel_list);
}
else
{
/*
* Generate join tree.
*/
return make_one_rel_by_joins(root, rels, levels_needed);
return make_one_rel_by_joins(root, levels_needed);
}
}
/*
* set_base_rel_pathlist
* Finds all paths available for scanning each relation entry in
* 'rels'. Sequential scan and any available indices are considered
* if possible (indices are not considered for lower nesting levels).
* All useful paths are attached to the relation's 'pathlist' field.
*
* MODIFIES: rels
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlist(Query *root, List *rels)
set_base_rel_pathlist(Query *root)
{
List *temp;
List *rellist;
foreach(temp, rels)
foreach(rellist, root->base_rel_list)
{
RelOptInfo *rel = (RelOptInfo *) lfirst(temp);
RelOptInfo *rel = (RelOptInfo *) lfirst(rellist);
List *indices = find_relation_indices(root, rel);
List *sequential_scan_list;
List *rel_index_scan_list;
List *or_index_scan_list;
List *tidscan_pathlist;
sequential_scan_list = lcons(create_seqscan_path(rel), NIL);
/* Tid Scan Pathlist add */
tidscan_pathlist = create_tidscan_paths(root, rel);
if (tidscan_pathlist)
sequential_scan_list = nconc(sequential_scan_list,
tidscan_pathlist);
rel_index_scan_list = create_index_paths(root,
rel,
indices,
rel->restrictinfo,
rel->joininfo);
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/*
* Generate paths and add them to the rel's pathlist.
*
* add_path/add_pathlist will discard any paths that are dominated
* by another available path, keeping only those paths that are
* superior along at least one dimension of cost or sortedness.
*/
/* Consider sequential scan */
add_path(rel, create_seqscan_path(rel));
/* Consider TID scans */
add_pathlist(rel, create_tidscan_paths(root, rel));
/* Consider index paths for both simple and OR index clauses */
add_pathlist(rel, create_index_paths(root,
rel,
indices,
rel->baserestrictinfo,
rel->joininfo));
/* Note: create_or_index_paths depends on create_index_paths
* to have marked OR restriction clauses with relevant indices;
* this is why it doesn't need to be given the full list of indices.
*/
or_index_scan_list = create_or_index_paths(root, rel,
rel->restrictinfo);
add_pathlist(rel, create_or_index_paths(root, rel,
rel->baserestrictinfo));
/* add_pathlist will discard any paths that are dominated by
* another available path, keeping only those paths that are
* superior along at least one dimension of cost or sortedness.
*/
rel->pathlist = add_pathlist(rel,
sequential_scan_list,
nconc(rel_index_scan_list,
or_index_scan_list));
/* Now find the cheapest of the paths */
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel, rel->pathlist);
/* Mark rel with estimated output rows and width */
set_rel_rows_width(root, rel);
}
}
/*
* make_one_rel_by_joins
* Find all possible joinpaths for a query by successively finding ways
* to join single relations into join relations.
* to join component relations into join relations.
*
* Find all possible joinpaths(bushy trees) for a query by systematically
* finding ways to join relations(both original and derived) together.
*
* 'rels' is the current list of relations for which join paths
* are to be found, i.e., the current list of relations that
* have already been derived.
* 'levels_needed' is the number of iterations needed
* 'levels_needed' is the number of iterations needed, ie, the number of
* base relations present in the query
*
* Returns the final level of join relations, i.e., the relation that is
* the result of joining all the original relations together.
*/
static RelOptInfo *
make_one_rel_by_joins(Query *root, List *rels, int levels_needed)
make_one_rel_by_joins(Query *root, int levels_needed)
{
List *x;
List *joined_rels = NIL;
int lev;
RelOptInfo *rel;
/*******************************************
* genetic query optimizer entry point *
* <utesch@aut.tu-freiberg.de> *
*******************************************/
if (enable_geqo && length(root->base_rel_list) >= geqo_rels)
return geqo(root);
/*******************************************
* rest will be skipped in case of GEQO *
*******************************************/
if (enable_geqo && levels_needed >= geqo_rels)
return geqo(root);
while (--levels_needed)
/*
* We employ a simple "dynamic programming" algorithm: we first
* find all ways to build joins of two base relations, then all ways
* to build joins of three base relations (from two-base-rel joins
* and other base rels), then four-base-rel joins, and so on until
* we have considered all ways to join all N relations into one rel.
*/
for (lev = 2; lev <= levels_needed; lev++)
{
List *first_old_rel = root->join_rel_list;
List *x;
/*
* Determine all possible pairs of relations to be joined at this
* level. Determine paths for joining these relation pairs and
* modify 'joined_rels' accordingly, then eliminate redundant join
* relations.
* level, and build paths for making each one from every available
* pair of lower-level relations. Results are prepended to
* root->join_rel_list.
*/
joined_rels = make_rels_by_joins(root, rels);
update_rels_pathlist_for_joins(root, joined_rels);
merge_rels_with_same_relids(joined_rels);
root->join_rel_list = rels = joined_rels;
#ifdef NOT_USED
make_rels_by_joins(root, lev);
/*
* * for each expensive predicate in each path in each distinct
* rel, * consider doing pullup -- JMH
* The relations created at the current level will appear at the
* front of root->join_rel_list.
*/
if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF)
foreach(x, joined_rels)
xfunc_trypullup((RelOptInfo *) lfirst(x));
#endif
rels_set_cheapest(root, joined_rels);
foreach(x, joined_rels)
foreach(x, root->join_rel_list)
{
if (x == first_old_rel)
break; /* no more rels added at this level */
rel = (RelOptInfo *) lfirst(x);
#ifdef NOT_USED
/*
* * for each expensive predicate in each path in each distinct
* rel, * consider doing pullup -- JMH
*/
if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF)
xfunc_trypullup(rel);
#endif
/* Find and save the cheapest path for this rel */
set_cheapest(rel, rel->pathlist);
#ifdef OPTIMIZER_DEBUG
printf("levels left: %d\n", levels_needed);
debug_print_rel(root, rel);
#endif
}
}
return get_cheapest_complete_rel(rels);
/*
* Now, the front of the join_rel_list should be the single rel
* representing the join of all the base rels.
*/
Assert(length(root->join_rel_list) > 0);
rel = (RelOptInfo *) lfirst(root->join_rel_list);
Assert(length(rel->relids) == levels_needed);
Assert(length(root->join_rel_list) == 1 ||
length(((RelOptInfo *) lsecond(root->join_rel_list))->relids) < levels_needed);
return rel;
}
/*****************************************************************************
@ -222,6 +223,7 @@ make_one_rel_by_joins(Query *root, List *rels, int levels_needed)
*****************************************************************************/
#ifdef OPTIMIZER_DEBUG
static void
print_joinclauses(Query *root, List *clauses)
{
@ -286,7 +288,7 @@ print_path(Query *root, Path *path, int indent)
for (i = 0; i < indent + 1; i++)
printf("\t");
printf(" clauses=(");
print_joinclauses(root, jp->path.parent->restrictinfo);
print_joinclauses(root, jp->joinrestrictinfo);
printf(")\n");
if (nodeTag(path) == T_MergePath)

View File

@ -19,7 +19,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.50 2000/01/26 05:56:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -447,22 +447,26 @@ cost_hashjoin(Path *outer_path,
}
/*
* set_rel_rows_width
* Set the 'rows' and 'width' estimates for the given base relation.
* set_baserel_size_estimates
* Set the size estimates for the given base relation.
*
* 'rows' is the estimated number of output tuples (after applying
* restriction clauses).
* 'width' is the estimated average output tuple width in bytes.
* The rel's targetlist and restrictinfo list must have been constructed
* already.
*
* We set the following fields of the rel node:
* rows: the estimated number of output tuples (after applying
* restriction clauses).
* width: the estimated average output tuple width in bytes.
*/
void
set_rel_rows_width(Query *root, RelOptInfo *rel)
set_baserel_size_estimates(Query *root, RelOptInfo *rel)
{
/* Should only be applied to base relations */
Assert(length(rel->relids) == 1);
rel->rows = rel->tuples *
restrictlist_selectivity(root,
rel->restrictinfo,
rel->baserestrictinfo,
lfirsti(rel->relids));
Assert(rel->rows >= 0);
@ -470,28 +474,56 @@ set_rel_rows_width(Query *root, RelOptInfo *rel)
}
/*
* set_joinrel_rows_width
* Set the 'rows' and 'width' estimates for the given join relation.
* set_joinrel_size_estimates
* Set the size estimates for the given join relation.
*
* The rel's targetlist must have been constructed already, and a
* restriction clause list that matches the given component rels must
* be provided.
*
* Since there is more than one way to make a joinrel for more than two
* base relations, the results we get here could depend on which component
* rel pair is provided. In theory we should get the same answers no matter
* which pair is provided; in practice, since the selectivity estimation
* routines don't handle all cases equally well, we might not. But there's
* not much to be done about it. (Would it make sense to repeat the
* calculations for each pair of input rels that's encountered, and somehow
* average the results? Probably way more trouble than it's worth.)
*
* We set the same relnode fields as set_baserel_size_estimates() does.
*/
void
set_joinrel_rows_width(Query *root, RelOptInfo *rel,
JoinPath *joinpath)
set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
List *restrictlist)
{
double temp;
/* cartesian product */
temp = joinpath->outerjoinpath->parent->rows *
joinpath->innerjoinpath->parent->rows;
temp = outer_rel->rows * inner_rel->rows;
/* apply join restrictivity */
/*
* Apply join restrictivity. Note that we are only considering clauses
* that become restriction clauses at this join level; we are not
* double-counting them because they were not considered in estimating
* the sizes of the component rels.
*/
temp *= restrictlist_selectivity(root,
joinpath->path.parent->restrictinfo,
restrictlist,
0);
Assert(temp >= 0);
rel->rows = temp;
set_rel_width(root, rel);
/*
* We could apply set_rel_width() to compute the output tuple width
* from scratch, but at present it's always just the sum of the input
* widths, so why work harder than necessary? If relnode.c is ever
* taught to remove unneeded columns from join targetlists, go back
* to using set_rel_width here.
*/
rel->width = outer_rel->width + inner_rel->width;
}
/*
@ -516,6 +548,7 @@ set_rel_width(Query *root, RelOptInfo *rel)
*
* If a field is variable-length, we make a default assumption. Would be
* better if VACUUM recorded some stats about the average field width...
* also, we have access to the atttypmod, but fail to use it...
*/
static int
compute_attribute_width(TargetEntry *tlistentry)

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.50 2000/02/06 03:27:32 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -31,126 +31,108 @@ static Path *best_innerjoin(List *join_paths, List *outer_relid);
static List *sort_inner_and_outer(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *mergeclause_list);
static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, List *outerpath_list,
Path *cheapest_inner, Path *best_innerjoin,
RelOptInfo *innerrel, List *restrictlist,
List *outerpath_list, Path *cheapest_inner,
Path *best_innerjoin,
List *mergeclause_list);
static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, List *innerpath_list,
RelOptInfo *innerrel, List *restrictlist,
List *innerpath_list,
List *mergeclause_list);
static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel);
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist);
static Selectivity estimate_disbursion(Query *root, Var *var);
static List *select_mergejoin_clauses(List *restrictinfo_list);
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist);
/*
* update_rels_pathlist_for_joins
* Creates all possible ways to process joins for each of the join
* relations in the list 'joinrels.' Each unique path will be included
* in the join relation's 'pathlist' field.
* add_paths_to_joinrel
* Given a join relation and two component rels from which it can be made,
* consider all possible paths that use the two component rels as outer
* and inner rel respectively. Add these paths to the join rel's pathlist
* if they survive comparison with other paths (and remove any existing
* paths that are dominated by these paths).
*
* 'joinrels' is the list of relation entries to be joined
*
* Modifies the pathlist field of each joinrel node to contain
* the unique join paths.
* Modifies the pathlist field of the joinrel node to contain the best
* paths found so far.
*/
void
update_rels_pathlist_for_joins(Query *root, List *joinrels)
add_paths_to_joinrel(Query *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist)
{
List *j;
Path *bestinnerjoin;
List *mergeclause_list = NIL;
foreach(j, joinrels)
{
RelOptInfo *joinrel = (RelOptInfo *) lfirst(j);
Relids innerrelids;
Relids outerrelids;
RelOptInfo *innerrel;
RelOptInfo *outerrel;
Path *bestinnerjoin;
List *pathlist;
List *mergeclause_list = NIL;
/*
* Get the best inner join for match_unsorted_outer().
*/
bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
/*
* On entry, joinrel->relids is a list of two sublists of relids,
* namely the outer and inner member relids. Extract these sublists
* and change joinrel->relids to a flattened single list.
* (Use listCopy so as not to damage the member lists...)
*/
outerrelids = lfirst(joinrel->relids);
innerrelids = lsecond(joinrel->relids);
/*
* Find potential mergejoin clauses.
*/
if (enable_mergejoin)
mergeclause_list = select_mergejoin_clauses(joinrel,
outerrel,
innerrel,
restrictlist);
joinrel->relids = nconc(listCopy(outerrelids),
listCopy(innerrelids));
/*
* 1. Consider mergejoin paths where both relations must be
* explicitly sorted.
*/
add_pathlist(joinrel, sort_inner_and_outer(joinrel,
outerrel,
innerrel,
restrictlist,
mergeclause_list));
/*
* Get the corresponding RelOptInfos for the outer and inner sides.
* Base relation id is an integer and join relation relid is a
* list of integers.
*/
innerrel = (length(innerrelids) == 1) ?
get_base_rel(root, lfirsti(innerrelids)) :
get_join_rel(root, innerrelids);
outerrel = (length(outerrelids) == 1) ?
get_base_rel(root, lfirsti(outerrelids)) :
get_join_rel(root, outerrelids);
/*
* 2. Consider paths where the outer relation need not be
* explicitly sorted. This includes both nestloops and
* mergejoins where the outer path is already ordered.
*/
add_pathlist(joinrel, match_unsorted_outer(joinrel,
outerrel,
innerrel,
restrictlist,
outerrel->pathlist,
innerrel->cheapestpath,
bestinnerjoin,
mergeclause_list));
/*
* Get the best inner join for match_unsorted_outer().
*/
bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
/*
* 3. Consider paths where the inner relation need not be
* explicitly sorted. This includes mergejoins only
* (nestloops were already built in match_unsorted_outer).
*/
add_pathlist(joinrel, match_unsorted_inner(joinrel,
outerrel,
innerrel,
restrictlist,
innerrel->pathlist,
mergeclause_list));
/*
* Find potential mergejoin clauses.
*/
if (enable_mergejoin)
mergeclause_list = select_mergejoin_clauses(joinrel->restrictinfo);
/*
* 1. Consider mergejoin paths where both relations must be
* explicitly sorted.
*/
pathlist = sort_inner_and_outer(joinrel, outerrel,
innerrel, mergeclause_list);
/*
* 2. Consider paths where the outer relation need not be
* explicitly sorted. This includes both nestloops and
* mergejoins where the outer path is already ordered.
*/
pathlist = add_pathlist(joinrel, pathlist,
match_unsorted_outer(joinrel,
outerrel,
innerrel,
outerrel->pathlist,
innerrel->cheapestpath,
bestinnerjoin,
mergeclause_list));
/*
* 3. Consider paths where the inner relation need not be
* explicitly sorted. This includes mergejoins only
* (nestloops were already built in match_unsorted_outer).
*/
pathlist = add_pathlist(joinrel, pathlist,
match_unsorted_inner(joinrel, outerrel,
innerrel,
innerrel->pathlist,
mergeclause_list));
/*
* 4. Consider paths where both outer and inner relations must be
* hashed before being joined.
*/
if (enable_hashjoin)
pathlist = add_pathlist(joinrel, pathlist,
hash_inner_and_outer(root, joinrel,
outerrel,
innerrel));
/* Save the completed pathlist in the join rel */
joinrel->pathlist = pathlist;
}
/*
* 4. Consider paths where both outer and inner relations must be
* hashed before being joined.
*/
if (enable_hashjoin)
add_pathlist(joinrel, hash_inner_and_outer(root,
joinrel,
outerrel,
innerrel,
restrictlist));
}
/*
@ -197,8 +179,10 @@ best_innerjoin(List *join_paths, Relids outer_relids)
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses between these two relations
* mergejoin clauses in this join
*
* Returns a list of mergejoin paths.
*/
@ -206,6 +190,7 @@ static List *
sort_inner_and_outer(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *mergeclause_list)
{
List *path_list = NIL;
@ -255,12 +240,14 @@ sort_inner_and_outer(RelOptInfo *joinrel,
innerkeys = make_pathkeys_for_mergeclauses(curclause_list,
innerrel->targetlist);
/* Build pathkeys representing output sort order. */
merge_pathkeys = build_join_pathkeys(outerkeys, joinrel->targetlist,
merge_pathkeys = build_join_pathkeys(outerkeys,
joinrel->targetlist,
curclause_list);
/* And now we can make the path. */
path_node = create_mergejoin_path(joinrel,
outerrel->cheapestpath,
innerrel->cheapestpath,
restrictlist,
merge_pathkeys,
get_actual_clauses(curclause_list),
outerkeys,
@ -301,11 +288,13 @@ sort_inner_and_outer(RelOptInfo *joinrel,
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
* 'outerpath_list' is the list of possible outer paths
* 'cheapest_inner' is the cheapest inner path
* 'best_innerjoin' is the best inner index path (if any)
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses between these two relations
* mergejoin clauses in this join
*
* Returns a list of possible join path nodes.
*/
@ -313,6 +302,7 @@ static List *
match_unsorted_outer(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *outerpath_list,
Path *cheapest_inner,
Path *best_innerjoin,
@ -358,6 +348,7 @@ match_unsorted_outer(RelOptInfo *joinrel,
create_nestloop_path(joinrel,
outerpath,
nestinnerpath,
restrictlist,
merge_pathkeys));
/* Done with this outer path if no chance for a mergejoin */
@ -425,6 +416,7 @@ match_unsorted_outer(RelOptInfo *joinrel,
create_mergejoin_path(joinrel,
outerpath,
mergeinnerpath,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
@ -442,9 +434,11 @@ match_unsorted_outer(RelOptInfo *joinrel,
* 'joinrel' is the join result relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
* 'innerpath_list' is the list of possible inner join paths
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses between these two relations
* mergejoin clauses in this join
*
* Returns a list of possible merge paths.
*/
@ -452,6 +446,7 @@ static List *
match_unsorted_inner(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *innerpath_list,
List *mergeclause_list)
{
@ -510,6 +505,7 @@ match_unsorted_inner(RelOptInfo *joinrel,
create_mergejoin_path(joinrel,
mergeouterpath,
innerpath,
restrictlist,
merge_pathkeys,
mergeclauses,
outersortkeys,
@ -528,6 +524,8 @@ match_unsorted_inner(RelOptInfo *joinrel,
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
*
* Returns a list of hashjoin paths.
*/
@ -535,39 +533,62 @@ static List *
hash_inner_and_outer(Query *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel)
RelOptInfo *innerrel,
List *restrictlist)
{
List *hpath_list = NIL;
Relids outerrelids = outerrel->relids;
Relids innerrelids = innerrel->relids;
List *i;
foreach(i, joinrel->restrictinfo)
/*
* Scan the join's restrictinfo list to find hashjoinable clauses
* that are usable with this pair of sub-relations. Since we currently
* accept only var-op-var clauses as hashjoinable, we need only check
* the membership of the vars to determine whether a particular clause
* can be used with this pair of sub-relations. This code would need
* to be upgraded if we wanted to allow more-complex expressions in
* hash joins.
*/
foreach(i, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Expr *clause;
Var *left,
*right,
*inner;
Selectivity innerdisbursion;
HashPath *hash_path;
/* we consider only clauses previously marked hashjoinable */
if (restrictinfo->hashjoinoperator)
{
Expr *clause = restrictinfo->clause;
Var *leftop = get_leftop(clause);
Var *rightop = get_rightop(clause);
Var *innerop;
Selectivity innerdisbursion;
HashPath *hash_path;
if (restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
/* find the inner var and estimate its disbursion */
if (intMember(leftop->varno, innerrel->relids))
innerop = leftop;
else
innerop = rightop;
innerdisbursion = estimate_disbursion(root, innerop);
clause = restrictinfo->clause;
/* these must be OK, since check_hashjoinable accepted the clause */
left = get_leftop(clause);
right = get_rightop(clause);
hash_path = create_hashjoin_path(joinrel,
outerrel->cheapestpath,
innerrel->cheapestpath,
lcons(clause, NIL),
innerdisbursion);
hpath_list = lappend(hpath_list, hash_path);
}
/* check if clause is usable with these sub-rels, find inner var */
if (intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids))
inner = right;
else if (intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids))
inner = left;
else
continue; /* no good for these input relations */
/* estimate disbursion of inner var for costing purposes */
innerdisbursion = estimate_disbursion(root, inner);
hash_path = create_hashjoin_path(joinrel,
outerrel->cheapestpath,
innerrel->cheapestpath,
restrictlist,
lcons(clause, NIL),
innerdisbursion);
hpath_list = lappend(hpath_list, hash_path);
}
return hpath_list;
@ -600,28 +621,47 @@ estimate_disbursion(Query *root, Var *var)
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
* Currently, all we need is the restrictinfo list of the joinrel.
* By definition, any mergejoinable clause in that list will work ---
* it must involve only vars in the join, or it wouldn't have been
* in the restrict list, and it must involve vars on both sides of
* the join, or it wouldn't have made it up to this level of join.
* Since we currently allow only simple Vars as the left and right
* sides of mergejoin clauses, that means the mergejoin clauses must
* be usable for this join. If we ever allow more complex expressions
* containing multiple Vars, we would need to check that each side
* of a potential joinclause uses only vars from one side of the join.
* We examine each restrictinfo clause known for the join to see
* if it is mergejoinable and involves vars from the two sub-relations
* currently of interest.
*
* Since we currently allow only plain Vars as the left and right sides
* of mergejoin clauses, this test is relatively simple. This routine
* would need to be upgraded to support more-complex expressions
* as sides of mergejoins. In theory, we could allow arbitrarily complex
* expressions in mergejoins, so long as one side uses only vars from one
* sub-relation and the other side uses only vars from the other.
*/
static List *
select_mergejoin_clauses(List *restrictinfo_list)
select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist)
{
List *result_list = NIL;
Relids outerrelids = outerrel->relids;
Relids innerrelids = innerrel->relids;
List *i;
foreach(i, restrictinfo_list)
foreach(i, restrictlist)
{
RestrictInfo *restrictinfo = lfirst(i);
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Expr *clause;
Var *left,
*right;
if (restrictinfo->mergejoinoperator != InvalidOid)
if (restrictinfo->mergejoinoperator == InvalidOid)
continue; /* not mergejoinable */
clause = restrictinfo->clause;
/* these must be OK, since check_mergejoinable accepted the clause */
left = get_leftop(clause);
right = get_rightop(clause);
if ((intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids)) ||
(intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids)))
result_list = lcons(restrictinfo, result_list);
}

View File

@ -8,399 +8,263 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.42 2000/02/06 03:27:32 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.43 2000/02/07 04:40:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef HAVE_LIMITS_H
#include <limits.h>
#ifndef MAXINT
#define MAXINT INT_MAX
#endif
#else
#ifdef HAVE_VALUES_H
#include <values.h>
#endif
#endif
#include "optimizer/cost.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
static RelOptInfo *make_join_rel(RelOptInfo *outer_rel, RelOptInfo *inner_rel);
static List *new_join_tlist(List *tlist, int first_resdomno);
static void build_joinrel_restrict_and_join(RelOptInfo *joinrel,
List *joininfo_list,
Relids join_relids);
static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1,
RelOptInfo *rel2);
/*
* make_rels_by_joins
* Find all possible joins for each of the outer join relations in
* 'old_rels'. A rel node is created for each possible join relation,
* and the resulting list of nodes is returned. If at all possible, only
* those relations for which join clauses exist are considered. If none
* of these exist for a given relation, all remaining possibilities are
* considered.
* Consider ways to produce join relations containing exactly 'level'
* base relations. (This is one step of the dynamic-programming method
* embodied in make_one_rel_by_joins.) Join rel nodes for each feasible
* combination of base rels are created and added to the front of the
* query's join_rel_list. Implementation paths are created for each
* such joinrel, too.
*
* Returns a list of rel nodes corresponding to the new join relations.
* Returns nothing, but adds entries to root->join_rel_list.
*/
List *
make_rels_by_joins(Query *root, List *old_rels)
void
make_rels_by_joins(Query *root, int level)
{
List *join_list = NIL;
List *r;
foreach(r, old_rels)
/*
* First, consider left-sided and right-sided plans, in which rels of
* exactly level-1 member relations are joined against base relations.
* We prefer to join using join clauses, but if we find a rel of level-1
* members that has no join clauses, we will generate Cartesian-product
* joins against all base rels not already contained in it.
*
* In the first pass (level == 2), we try to join each base rel to each
* base rel that appears later in base_rel_list. (The mirror-image
* joins are handled automatically by make_join_rel.) In later passes,
* we try to join rels of size level-1 from join_rel_list to each
* base rel in base_rel_list.
*
* We assume that the rels already present in join_rel_list appear in
* decreasing order of level (number of members). This should be true
* since we always add new higher-level rels to the front of the list.
*/
if (level == 2)
r = root->base_rel_list; /* level-1 is base rels */
else
r = root->join_rel_list;
for (; r != NIL; r = lnext(r))
{
RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
List *joined_rels;
int old_level = length(old_rel->relids);
List *other_rels;
if (!(joined_rels = make_rels_by_clause_joins(root, old_rel,
old_rel->joininfo,
NIL)))
if (old_level != level-1)
break;
if (level == 2)
other_rels = lnext(r); /* only consider remaining base rels */
else
other_rels = root->base_rel_list; /* consider all base rels */
if (old_rel->joininfo != NIL)
{
/*
* Note that if all available join clauses for this rel require
* more than one other rel, we will fail to make any joins against
* it here. That's OK; it'll be considered by "bushy plan" join
* code in a higher-level pass.
*/
make_rels_by_clause_joins(root,
old_rel,
other_rels);
}
else
{
/*
* Oops, we have a relation that is not joined to any other
* relation. Cartesian product time.
*/
joined_rels = make_rels_by_clauseless_joins(old_rel,
root->base_rel_list);
joined_rels = nconc(joined_rels,
make_rels_by_clauseless_joins(old_rel,
old_rels));
make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
}
join_list = nconc(join_list, joined_rels);
}
return join_list;
}
/*
* make_rels_by_clause_joins
* Build joins between an outer relation 'old_rel' and relations
* within old_rel's joininfo nodes
* (i.e., relations that participate in join clauses that 'old_rel'
* also participates in).
*
* 'old_rel' is the relation entry for the outer relation
* 'joininfo_list' is a list of join clauses which 'old_rel'
* participates in
* 'only_relids': if not NIL, only joins against base rels mentioned in
* only_relids are allowable.
*
* Returns a list of new join relations.
*/
List *
make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel,
List *joininfo_list, Relids only_relids)
{
List *join_list = NIL;
List *i;
foreach(i, joininfo_list)
/*
* Now, consider "bushy plans" in which relations of k base rels are
* joined to relations of level-k base rels, for 2 <= k <= level-2.
* The previous loop left r pointing to the first rel of level level-2.
*
* We only consider bushy-plan joins for pairs of rels where there is
* a suitable join clause, in order to avoid unreasonable growth of
* planning time.
*/
for (; r != NIL; r = lnext(r))
{
JoinInfo *joininfo = (JoinInfo *) lfirst(i);
Relids unjoined_relids = joininfo->unjoined_relids;
RelOptInfo *joined_rel;
RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
int old_level = length(old_rel->relids);
List *r2;
if (unjoined_relids == NIL)
continue; /* probably can't happen */
/* We can quit once past the halfway point (make_join_rel took care
* of making the opposite-direction joins)
*/
if (old_level * 2 < level)
break;
if (length(unjoined_relids) == 1 &&
(only_relids == NIL ||
/* geqo only wants certain relids to be joined to old_rel */
intMember(lfirsti(unjoined_relids), only_relids)))
if (old_rel->joininfo == NIL)
continue; /* we ignore clauseless joins here */
foreach(r2, lnext(r))
{
RelOptInfo *base_rel = get_base_rel(root,
lfirsti(unjoined_relids));
RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
int new_level = length(new_rel->relids);
/* Left-sided join of outer rel against a single base rel */
joined_rel = make_join_rel(old_rel, base_rel);
join_list = lappend(join_list, joined_rel);
/* Consider right-sided plan as well */
if (length(old_rel->relids) > 1)
if (old_level + new_level > level)
continue; /* scan down to new_rels of right size */
if (old_level + new_level < level)
break; /* no more new_rels of right size */
if (nonoverlap_setsi(old_rel->relids, new_rel->relids))
{
joined_rel = make_join_rel(base_rel, old_rel);
join_list = lappend(join_list, joined_rel);
}
}
List *i;
if (only_relids == NIL) /* no bushy plans for geqo */
{
List *r;
/* Build "bushy" plans: join old_rel against all pre-existing
* joins of rels it doesn't already contain, if there is a
* suitable join clause.
*/
foreach(r, root->join_rel_list)
{
RelOptInfo *join_rel = lfirst(r);
Assert(length(join_rel->relids) > 1);
if (is_subseti(unjoined_relids, join_rel->relids) &&
nonoverlap_setsi(old_rel->relids, join_rel->relids))
/* OK, we can build a rel of the right level from this pair of
* rels. Do so if there is at least one usable join clause.
*/
foreach(i, old_rel->joininfo)
{
joined_rel = make_join_rel(old_rel, join_rel);
join_list = lappend(join_list, joined_rel);
JoinInfo *joininfo = (JoinInfo *) lfirst(i);
if (is_subseti(joininfo->unjoined_relids, new_rel->relids))
{
make_join_rel(root, old_rel, new_rel);
break;
}
}
}
}
}
}
return join_list;
/*
* make_rels_by_clause_joins
* Build joins between the given relation 'old_rel' and other relations
* that are mentioned within old_rel's joininfo nodes (i.e., relations
* that participate in join clauses that 'old_rel' also participates in).
* The join rel nodes are added to root->join_rel_list.
*
* 'old_rel' is the relation entry for the relation to be joined
* 'other_rels': other rels to be considered for joining
*
* Currently, this is only used with base rels in other_rels, but it would
* work for joining to joinrels too, if the caller ensures there is no
* membership overlap between old_rel and the rels in other_rels. (We need
* no extra test for overlap for base rels, since the is_subset test can
* only succeed when other_rel is not already part of old_rel.)
*
* Returns NULL if no suitable joins were found, else the last suitable
* joinrel processed. (The only caller who checks the return value is
* geqo_eval.c, and it sets things up so there can be no more than one
* "suitable" joinrel; so we don't bother with returning a list.)
*/
RelOptInfo *
make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels)
{
RelOptInfo *result = NULL;
List *i,
*j;
foreach(i, old_rel->joininfo)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(i);
Relids unjoined_relids = joininfo->unjoined_relids;
foreach(j, other_rels)
{
RelOptInfo *other_rel = (RelOptInfo *) lfirst(j);
if (is_subseti(unjoined_relids, other_rel->relids))
result = make_join_rel(root, old_rel, other_rel);
}
}
return result;
}
/*
* make_rels_by_clauseless_joins
* Given an outer relation 'old_rel' and a list of inner relations
* 'inner_rels', create a join relation between 'old_rel' and each
* member of 'inner_rels' that isn't already included in 'old_rel'.
* Given a relation 'old_rel' and a list of other relations
* 'other_rels', create a join relation between 'old_rel' and each
* member of 'other_rels' that isn't already included in 'old_rel'.
*
* Returns a list of new join relations.
* 'old_rel' is the relation entry for the relation to be joined
* 'other_rels': other rels to be considered for joining
*
* Currently, this is only used with base rels in other_rels, but it would
* work for joining to joinrels too.
*
* Returns NULL if no suitable joins were found, else the last suitable
* joinrel processed. (The only caller who checks the return value is
* geqo_eval.c, and it sets things up so there can be no more than one
* "suitable" joinrel; so we don't bother with returning a list.)
*/
List *
make_rels_by_clauseless_joins(RelOptInfo *old_rel, List *inner_rels)
RelOptInfo *
make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels)
{
List *join_list = NIL;
RelOptInfo *result = NULL;
List *i;
foreach(i, inner_rels)
foreach(i, other_rels)
{
RelOptInfo *inner_rel = (RelOptInfo *) lfirst(i);
RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
if (nonoverlap_setsi(inner_rel->relids, old_rel->relids))
{
join_list = lappend(join_list,
make_join_rel(old_rel, inner_rel));
}
if (nonoverlap_setsi(other_rel->relids, old_rel->relids))
result = make_join_rel(root, old_rel, other_rel);
}
return join_list;
return result;
}
/*
* make_join_rel
* Creates and initializes a new join relation.
*
* 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
* joined
*
* Returns the new join relation node.
* Find or create a join RelOptInfo that represents the join of
* the two given rels, and add to it path information for paths
* created with the two rels as outer and inner rel.
* (The join rel may already contain paths generated from other
* pairs of rels that add up to the same set of base rels.)
* The join rel is stored in the query's join_rel_list.
*/
static RelOptInfo *
make_join_rel(RelOptInfo *outer_rel, RelOptInfo *inner_rel)
make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2)
{
RelOptInfo *joinrel = makeNode(RelOptInfo);
List *new_outer_tlist;
List *new_inner_tlist;
RelOptInfo *joinrel;
List *restrictlist;
/*
* This function uses a trick to pass inner/outer rels as two sublists.
* The list will be flattened out in update_rels_pathlist_for_joins().
* Find or build the join RelOptInfo, and compute the restrictlist
* that goes with this particular joining.
*/
joinrel->relids = lcons(outer_rel->relids, lcons(inner_rel->relids, NIL));
joinrel->rows = 0;
joinrel->width = 0;
joinrel->targetlist = NIL;
joinrel->pathlist = NIL;
joinrel->cheapestpath = (Path *) NULL;
joinrel->pruneable = true;
joinrel->indexed = false;
joinrel->pages = 0;
joinrel->tuples = 0;
joinrel->restrictinfo = NIL;
joinrel->joininfo = NIL;
joinrel->innerjoin = NIL;
joinrel = get_join_rel(root, rel1, rel2, &restrictlist);
/*
* Create a new tlist by removing irrelevant elements from both tlists
* of the outer and inner join relations and then merging the results
* together.
* We consider paths using each rel as both outer and inner.
*/
new_outer_tlist = new_join_tlist(outer_rel->targetlist, 1);
new_inner_tlist = new_join_tlist(inner_rel->targetlist,
length(new_outer_tlist) + 1);
joinrel->targetlist = nconc(new_outer_tlist, new_inner_tlist);
/*
* Construct restrict and join clause lists for the new joinrel.
*
* nconc(listCopy(x), y) is an idiom for making a new list without
* changing either input list.
*/
build_joinrel_restrict_and_join(joinrel,
nconc(listCopy(outer_rel->joininfo),
inner_rel->joininfo),
nconc(listCopy(outer_rel->relids),
inner_rel->relids));
add_paths_to_joinrel(root, joinrel, rel1, rel2, restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, restrictlist);
return joinrel;
}
/*
* new_join_tlist
* Builds a join relation's target list by keeping those elements that
* will be in the final target list and any other elements that are still
* needed for future joins. For a target list entry to still be needed
* for future joins, its 'joinlist' field must not be empty after removal
* of all relids in 'other_relids'.
*
* XXX this seems to be a dead test --- we don't keep track of joinlists
* for individual targetlist entries anymore, if we ever did...
*
* 'tlist' is the target list of one of the join relations
* 'other_relids' is a list of relids contained within the other
* join relation
* 'first_resdomno' is the resdom number to use for the first created
* target list entry
*
* Returns the new target list.
*/
static List *
new_join_tlist(List *tlist,
int first_resdomno)
{
int resdomno = first_resdomno - 1;
List *t_list = NIL;
List *i;
List *join_list = NIL;
foreach(i, tlist)
{
TargetEntry *xtl = lfirst(i);
bool in_final_tlist;
/*
* XXX surely this is wrong? join_list is never changed? tgl
* 2/99
*/
in_final_tlist = (join_list == NIL);
if (in_final_tlist)
{
resdomno += 1;
t_list = lappend(t_list,
create_tl_element(get_expr(xtl), resdomno));
}
}
return t_list;
}
/*
* build_joinrel_restrict_and_join
* Builds a join relation's restrictinfo and joininfo lists from the
* joininfo lists of the relations it joins. If a join clause from an
* input relation refers to base rels still not present in the joinrel,
* then it is still a join clause for the joinrel; we put it into an
* appropriate JoinInfo list for the joinrel. Otherwise, the clause is
* now a restrict clause for the joined relation, and we put it into
* the joinrel's restrictinfo list. (It will not need to be considered
* further up the join tree.)
*
* 'joininfo_list' is a list of joininfo nodes from the relations being joined
* 'join_relids' is a list of all base relids in the new join relation
*
* NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass
* up to the join relation. I believe this is no longer necessary, because
* RestrictInfo nodes are no longer context-dependent. Instead, just add
* the original nodes to the lists belonging to the join relation.
*/
static void
build_joinrel_restrict_and_join(RelOptInfo *joinrel,
List *joininfo_list,
Relids join_relids)
{
List *xjoininfo;
foreach(xjoininfo, joininfo_list)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo);
Relids new_unjoined_relids;
new_unjoined_relids = set_differencei(joininfo->unjoined_relids,
join_relids);
if (new_unjoined_relids == NIL)
{
/*
* Clauses in this JoinInfo list become restriction clauses
* for the joinrel, since they refer to no outside rels.
*
* Be careful to eliminate duplicates, since we will see the
* same clauses arriving from both input relations...
*/
joinrel->restrictinfo =
LispUnion(joinrel->restrictinfo,
joininfo->jinfo_restrictinfo);
}
else
{
/*
* These clauses are still join clauses at this level,
* so find or make the appropriate JoinInfo item for the joinrel,
* and add the clauses to it (eliminating duplicates).
*/
JoinInfo *new_joininfo;
new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids);
new_joininfo->jinfo_restrictinfo =
LispUnion(new_joininfo->jinfo_restrictinfo,
joininfo->jinfo_restrictinfo);
}
}
}
/*
* get_cheapest_complete_rel
* Find the join relation that includes all the original
* relations, i.e. the final join result.
*
* 'join_rel_list' is a list of join relations.
*
* Returns the list of final join relations.
*/
RelOptInfo *
get_cheapest_complete_rel(List *join_rel_list)
{
RelOptInfo *final_rel = NULL;
List *xrel;
/*
* find the relations that have no further joins, i.e., its joininfos
* all have unjoined_relids nil. (Actually, a JoinInfo shouldn't
* ever have nil unjoined_relids, so I think this code is overly
* complex. In fact it seems wrong; shouldn't we be looking for
* rels with complete relids lists??? Seems like a cartesian-product
* case could fail because sub-relations could have nil JoinInfo lists.
* Doesn't actually fail but I don't really understand why...)
*/
foreach(xrel, join_rel_list)
{
RelOptInfo *rel = (RelOptInfo *) lfirst(xrel);
bool final = true;
List *xjoininfo;
foreach(xjoininfo, rel->joininfo)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo);
if (joininfo->unjoined_relids != NIL)
{
final = false;
break;
}
}
if (final)
if (final_rel == NULL ||
path_is_cheaper(rel->cheapestpath, final_rel->cheapestpath))
final_rel = rel;
}
return final_rel;
}

View File

@ -1,109 +0,0 @@
/*-------------------------------------------------------------------------
*
* prune.c
* Routines to prune redundant paths and relations
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/prune.c,v 1.46 2000/02/06 03:27:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
static List *merge_rel_with_same_relids(RelOptInfo *rel, List *unmerged_rels);
/*
* merge_rels_with_same_relids
* Removes any redundant relation entries from a list of rel nodes
* 'rel_list', merging their pathlists into the first non-duplicate
* relation entry for each value of relids.
*
* Returns the resulting list.
*/
void
merge_rels_with_same_relids(List *rel_list)
{
List *i;
/*
* rel_list can shorten while running as duplicate relations are
* deleted. Obviously, the first relation can't be a duplicate,
* so the list head pointer won't change.
*/
foreach(i, rel_list)
{
lnext(i) = merge_rel_with_same_relids((RelOptInfo *) lfirst(i),
lnext(i));
}
}
/*
* merge_rel_with_same_relids
* Prunes those relations from 'unmerged_rels' that are redundant with
* 'rel'. A relation is redundant if it is built up of the same
* relations as 'rel'. Paths for the redundant relations are merged into
* the pathlist of 'rel'.
*
* Returns a list of non-redundant relations, and sets the pathlist field
* of 'rel' appropriately.
*
*/
static List *
merge_rel_with_same_relids(RelOptInfo *rel, List *unmerged_rels)
{
List *result = NIL;
List *i;
foreach(i, unmerged_rels)
{
RelOptInfo *unmerged_rel = (RelOptInfo *) lfirst(i);
if (sameseti(rel->relids, unmerged_rel->relids))
{
/*
* These rels are for the same set of base relations,
* so get the best of their pathlists. We assume it's
* ok to reassign a path to the other RelOptInfo without
* doing more than changing its parent pointer (cf. pathnode.c).
*/
rel->pathlist = add_pathlist(rel,
rel->pathlist,
unmerged_rel->pathlist);
}
else
result = lappend(result, unmerged_rel);
}
return result;
}
/*
* rels_set_cheapest
* For each relation entry in 'rel_list' (which should contain only join
* relations), set pointers to the cheapest path and compute rel size.
*/
void
rels_set_cheapest(Query *root, List *rel_list)
{
List *x;
foreach(x, rel_list)
{
RelOptInfo *rel = (RelOptInfo *) lfirst(x);
JoinPath *cheapest;
cheapest = (JoinPath *) set_cheapest(rel, rel->pathlist);
if (IsA_JoinPath(cheapest))
set_joinrel_rows_width(root, rel, cheapest);
else
elog(ERROR, "rels_set_cheapest: non JoinPath found");
}
}

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.3 2000/01/26 05:56:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.4 2000/02/07 04:40:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -37,7 +37,7 @@
#include "utils/lsyscache.h"
static List *create_tidscan_joinpaths(RelOptInfo *);
static List *TidqualFromRestrictinfo(List *relids, List * restrictinfo);
static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
static bool isEvaluable(int varno, Node *node);
static Node *TidequalClause(int varno, Expr *node);
static List *TidqualFromExpr(int varno, Expr *expr);
@ -209,16 +209,17 @@ List *TidqualFromExpr(int varno, Expr *expr)
return rlst;
}
static
List *TidqualFromRestrictinfo(List *relids, List * restrictinfo)
static List *
TidqualFromRestrictinfo(List *relids, List *restrictinfo)
{
List *lst, *rlst = NIL;
int varno;
Node *node;
Expr *expr;
if (length(relids)>1) return NIL;
varno = (int)lfirst(relids);
if (length(relids) != 1)
return NIL;
varno = lfirsti(relids);
foreach (lst, restrictinfo)
{
node = lfirst(lst);
@ -226,9 +227,7 @@ List *TidqualFromRestrictinfo(List *relids, List * restrictinfo)
expr = ((RestrictInfo *)node)->clause;
rlst = TidqualFromExpr(varno, expr);
if (rlst)
{
break;
}
}
return rlst;
}
@ -281,8 +280,9 @@ List *
create_tidscan_paths(Query *root, RelOptInfo *rel)
{
List *rlst = NIL;
TidPath *pathnode = (TidPath *)0;
List *tideval = TidqualFromRestrictinfo(rel->relids, rel->restrictinfo);
TidPath *pathnode = (TidPath *) NULL;
List *tideval = TidqualFromRestrictinfo(rel->relids,
rel->baserestrictinfo);
if (tideval)
pathnode = create_tidscan_path(rel, tideval);

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.83 2000/02/03 06:12:18 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.84 2000/02/07 04:41:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -147,7 +147,7 @@ create_scan_node(Query *root, Path *best_path, List *tlist)
* Extract the relevant restriction clauses from the parent relation;
* the executor must apply all these restrictions during the scan.
*/
scan_clauses = get_actual_clauses(best_path->parent->restrictinfo);
scan_clauses = get_actual_clauses(best_path->parent->baserestrictinfo);
switch (best_path->pathtype)
{
@ -203,7 +203,7 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist)
inner_node = create_plan(root, best_path->innerjoinpath);
inner_tlist = inner_node->targetlist;
clauses = get_actual_clauses(best_path->path.parent->restrictinfo);
clauses = get_actual_clauses(best_path->joinrestrictinfo);
switch (best_path->path.pathtype)
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.43 2000/01/26 05:56:37 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.44 2000/02/07 04:41:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -179,7 +179,8 @@ add_restrict_and_join_to_rel(Query *root, Node *clause)
*/
RelOptInfo *rel = get_base_rel(root, lfirsti(relids));
rel->restrictinfo = lcons(restrictinfo, rel->restrictinfo);
rel->baserestrictinfo = lcons(restrictinfo,
rel->baserestrictinfo);
}
else
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.50 2000/01/26 05:56:37 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.51 2000/02/07 04:41:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -219,7 +219,7 @@ subplanner(Query *root,
add_restrict_and_join_to_rels(root, qual);
add_missing_rels_to_query(root);
final_rel = make_one_rel(root, root->base_rel_list);
final_rel = make_one_rel(root);
if (! final_rel)
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.58 2000/01/26 05:56:40 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.59 2000/02/07 04:41:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -76,101 +76,104 @@ set_cheapest(RelOptInfo *parent_rel, List *pathlist)
/*
* add_pathlist
* Construct an output path list by adding to old_paths each path in
* new_paths that is worth considering --- that is, it has either a
* better sort order (better pathkeys) or cheaper cost than any of the
* existing old paths.
*
* Unless parent_rel->pruneable is false, we also remove from the output
* pathlist any old paths that are dominated by added path(s) --- that is,
* some new path is both cheaper and at least as well ordered.
*
* Note: the list old_paths is destructively modified, and in fact is
* turned into the output list.
*
* 'parent_rel' is the relation entry to which these paths correspond.
* 'old_paths' is the list of previously accepted paths for parent_rel.
* 'new_paths' is a list of potential new paths.
*
* Returns the updated list of interesting pathnodes.
* Consider each path given in new_paths, and add it to the parent rel's
* pathlist if it seems worthy.
*/
List *
add_pathlist(RelOptInfo *parent_rel, List *old_paths, List *new_paths)
void
add_pathlist(RelOptInfo *parent_rel, List *new_paths)
{
List *p1;
foreach(p1, new_paths)
{
Path *new_path = (Path *) lfirst(p1);
bool accept_new = true; /* unless we find a superior old path */
List *p2_prev = NIL;
List *p2;
/*
* Loop to check proposed new path against old paths. Note it is
* possible for more than one old path to be tossed out because
* new_path dominates it.
*/
foreach(p2, old_paths)
add_path(parent_rel, new_path);
}
}
/*
* add_path
* Consider a potential implementation path for the specified parent rel,
* and add it to the rel's pathlist if it is worthy of consideration.
* A path is worthy if it has either a better sort order (better pathkeys)
* or cheaper cost than any of the existing old paths.
*
* Unless parent_rel->pruneable is false, we also remove from the rel's
* pathlist any old paths that are dominated by new_path --- that is,
* new_path is both cheaper and at least as well ordered.
*
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a potential path for parent_rel.
*
* Returns nothing, but modifies parent_rel->pathlist.
*/
void
add_path(RelOptInfo *parent_rel, Path *new_path)
{
bool accept_new = true; /* unless we find a superior old path */
List *p1_prev = NIL;
List *p1;
/*
* Loop to check proposed new path against old paths. Note it is
* possible for more than one old path to be tossed out because
* new_path dominates it.
*/
foreach(p1, parent_rel->pathlist)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
{
Path *old_path = (Path *) lfirst(p2);
bool remove_old = false; /* unless new proves superior */
switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
{
case PATHKEYS_EQUAL:
if (new_path->path_cost < old_path->path_cost)
remove_old = true; /* new dominates old */
else
accept_new = false; /* old equals or dominates new */
break;
case PATHKEYS_BETTER1:
if (new_path->path_cost <= old_path->path_cost)
remove_old = true; /* new dominates old */
break;
case PATHKEYS_BETTER2:
if (new_path->path_cost >= old_path->path_cost)
accept_new = false; /* old dominates new */
break;
case PATHKEYS_DIFFERENT:
/* keep both paths, since they have different ordering */
break;
}
/*
* Remove current element from old_list if dominated by new,
* unless xfunc told us not to remove any paths.
*/
if (remove_old && parent_rel->pruneable)
{
if (p2_prev)
lnext(p2_prev) = lnext(p2);
case PATHKEYS_EQUAL:
if (new_path->path_cost < old_path->path_cost)
remove_old = true; /* new dominates old */
else
old_paths = lnext(p2);
}
else
p2_prev = p2;
/*
* If we found an old path that dominates new_path, we can quit
* scanning old_paths; we will not add new_path, and we assume
* new_path cannot dominate any other elements of old_paths.
*/
if (! accept_new)
accept_new = false; /* old equals or dominates new */
break;
case PATHKEYS_BETTER1:
if (new_path->path_cost <= old_path->path_cost)
remove_old = true; /* new dominates old */
break;
case PATHKEYS_BETTER2:
if (new_path->path_cost >= old_path->path_cost)
accept_new = false; /* old dominates new */
break;
case PATHKEYS_DIFFERENT:
/* keep both paths, since they have different ordering */
break;
}
if (accept_new)
/*
* Remove current element from pathlist if dominated by new,
* unless xfunc told us not to remove any paths.
*/
if (remove_old && parent_rel->pruneable)
{
/* Accept the path. Note that it will now be eligible to be
* compared against the additional elements of new_paths...
*/
new_path->parent = parent_rel; /* not redundant, see prune.c */
old_paths = lcons(new_path, old_paths);
if (p1_prev)
lnext(p1_prev) = lnext(p1);
else
parent_rel->pathlist = lnext(p1);
}
else
p1_prev = p1;
/*
* If we found an old path that dominates new_path, we can quit
* scanning the pathlist; we will not add new_path, and we assume
* new_path cannot dominate any other elements of the pathlist.
*/
if (! accept_new)
break;
}
return old_paths;
if (accept_new)
{
/* Accept the path */
parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
}
}
@ -271,6 +274,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
* 'joinrel' is the join relation.
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
*
* Returns the resulting path node.
@ -280,6 +284,7 @@ NestPath *
create_nestloop_path(RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys)
{
NestPath *pathnode = makeNode(NestPath);
@ -288,6 +293,7 @@ create_nestloop_path(RelOptInfo *joinrel,
pathnode->path.parent = joinrel;
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
pathnode->path.pathkeys = pathkeys;
pathnode->path.path_cost = cost_nestloop(outer_path,
@ -305,6 +311,7 @@ create_nestloop_path(RelOptInfo *joinrel,
* 'joinrel' is the join relation
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'mergeclauses' are the applicable join/restriction clauses
* 'outersortkeys' are the sort varkeys for the outer relation
@ -315,6 +322,7 @@ MergePath *
create_mergejoin_path(RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
@ -337,6 +345,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->jpath.path.pathkeys = pathkeys;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
@ -356,6 +365,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
* 'joinrel' is the join relation
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
* 'innerdisbursion' is an estimate of the disbursion of the inner hash key
*
@ -364,6 +374,7 @@ HashPath *
create_hashjoin_path(RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *hashclauses,
Selectivity innerdisbursion)
{
@ -373,6 +384,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
/* A hashjoin never has pathkeys, since its ordering is unpredictable */
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;

View File

@ -1,109 +1,408 @@
/*-------------------------------------------------------------------------
*
* relnode.c
* Relation manipulation routines
* Relation-node lookup/construction routines
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.22 2000/02/06 03:27:33 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.23 2000/02/07 04:41:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/cost.h"
#include "optimizer/internal.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
#include "optimizer/tlist.h"
static List *new_join_tlist(List *tlist, int first_resdomno);
static List *build_joinrel_restrictlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel);
static void build_joinrel_joinlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel);
static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
List *joininfo_list);
static void subbuild_joinrel_joinlist(RelOptInfo *joinrel,
List *joininfo_list);
/*
* get_base_rel
* Returns relation entry corresponding to 'relid', creating a new one if
* necessary. This is for base relations.
*
* Returns relation entry corresponding to 'relid', creating a new one
* if necessary. This is for base relations.
*/
RelOptInfo *
get_base_rel(Query *root, int relid)
{
Relids relids = lconsi(relid, NIL);
List *baserels;
RelOptInfo *rel;
rel = rel_member(relids, root->base_rel_list);
if (rel == NULL)
foreach(baserels, root->base_rel_list)
{
rel = makeNode(RelOptInfo);
rel->relids = relids;
rel->rows = 0;
rel->width = 0;
rel->targetlist = NIL;
rel->pathlist = NIL;
rel->cheapestpath = (Path *) NULL;
rel->pruneable = true;
rel->indexed = false;
rel->pages = 0;
rel->tuples = 0;
rel->restrictinfo = NIL;
rel->joininfo = NIL;
rel->innerjoin = NIL;
rel = (RelOptInfo *) lfirst(baserels);
root->base_rel_list = lcons(rel, root->base_rel_list);
if (relid < 0)
{
/*
* If the relation is a materialized relation, assume
* constants for sizes.
*/
rel->pages = _NONAME_RELATION_PAGES_;
rel->tuples = _NONAME_RELATION_TUPLES_;
}
else
{
/*
* Otherwise, retrieve relation statistics from the
* system catalogs.
*/
relation_info(root, relid,
&rel->indexed, &rel->pages, &rel->tuples);
}
/* We know length(rel->relids) == 1 for all members of base_rel_list */
if (lfirsti(rel->relids) == relid)
return rel;
}
/* No existing RelOptInfo for this base rel, so make a new one */
rel = makeNode(RelOptInfo);
rel->relids = lconsi(relid, NIL);
rel->rows = 0;
rel->width = 0;
rel->targetlist = NIL;
rel->pathlist = NIL;
rel->cheapestpath = (Path *) NULL;
rel->pruneable = true;
rel->indexed = false;
rel->pages = 0;
rel->tuples = 0;
rel->baserestrictinfo = NIL;
rel->joininfo = NIL;
rel->innerjoin = NIL;
if (relid < 0)
{
/*
* If the relation is a materialized relation, assume
* constants for sizes.
*/
rel->pages = _NONAME_RELATION_PAGES_;
rel->tuples = _NONAME_RELATION_TUPLES_;
}
else
{
/*
* Otherwise, retrieve relation statistics from the
* system catalogs.
*/
relation_info(root, relid,
&rel->indexed, &rel->pages, &rel->tuples);
}
root->base_rel_list = lcons(rel, root->base_rel_list);
return rel;
}
/*
* get_join_rel
* Returns relation entry corresponding to 'relid' (a list of relids),
* or NULL.
*/
RelOptInfo *
get_join_rel(Query *root, Relids relid)
{
return rel_member(relid, root->join_rel_list);
}
/*
* rel_member
* Determines whether a relation of id 'relid' is contained within a list
* 'rels'.
*
* Returns the corresponding entry in 'rels' if it is there.
* find_join_rel
* Returns relation entry corresponding to 'relids' (a list of RT indexes),
* or NULL if none exists. This is for join relations.
*
* Note: there is probably no good reason for this to be called from
* anywhere except get_join_rel, but keep it as a separate routine
* just in case.
*/
RelOptInfo *
rel_member(Relids relids, List *rels)
static RelOptInfo *
find_join_rel(Query *root, Relids relids)
{
List *temp;
List *joinrels;
foreach(temp, rels)
foreach(joinrels, root->join_rel_list)
{
RelOptInfo *rel = (RelOptInfo *) lfirst(temp);
RelOptInfo *rel = (RelOptInfo *) lfirst(joinrels);
if (sameseti(rel->relids, relids))
return rel;
}
return NULL;
}
/*
* get_join_rel
* Returns relation entry corresponding to the union of two given rels,
* creating a new relation entry if none already exists.
*
* 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
* joined
* 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr
* receives the list of RestrictInfo nodes that apply to this
* particular pair of joinable relations.
*
* restrictlist_ptr makes the routine's API a little grotty, but it saves
* duplicated calculation of the restrictlist...
*/
RelOptInfo *
get_join_rel(Query *root,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
List **restrictlist_ptr)
{
List *joinrelids;
RelOptInfo *joinrel;
List *restrictlist;
List *new_outer_tlist;
List *new_inner_tlist;
/* We should never try to join two overlapping sets of rels. */
Assert(nonoverlap_setsi(outer_rel->relids, inner_rel->relids));
/*
* See if we already have a joinrel for this set of base rels.
*
* nconc(listCopy(x), y) is an idiom for making a new list without
* changing either input list.
*/
joinrelids = nconc(listCopy(outer_rel->relids), inner_rel->relids);
joinrel = find_join_rel(root, joinrelids);
if (joinrel)
{
/*
* Yes, so we only need to figure the restrictlist for this
* particular pair of component relations.
*/
if (restrictlist_ptr)
*restrictlist_ptr = build_joinrel_restrictlist(joinrel,
outer_rel,
inner_rel);
return joinrel;
}
/*
* Nope, so make one.
*/
joinrel = makeNode(RelOptInfo);
joinrel->relids = joinrelids;
joinrel->rows = 0;
joinrel->width = 0;
joinrel->targetlist = NIL;
joinrel->pathlist = NIL;
joinrel->cheapestpath = (Path *) NULL;
joinrel->pruneable = true;
joinrel->indexed = false;
joinrel->pages = 0;
joinrel->tuples = 0;
joinrel->baserestrictinfo = NIL;
joinrel->joininfo = NIL;
joinrel->innerjoin = NIL;
/*
* Create a new tlist by removing irrelevant elements from both tlists
* of the outer and inner join relations and then merging the results
* together.
*
* NOTE: the tlist order for a join rel will depend on which pair
* of outer and inner rels we first try to build it from. But the
* contents should be the same regardless.
*
* XXX someday: consider pruning vars from the join's targetlist
* if they are needed only to evaluate restriction clauses of this
* join, and will never be accessed at higher levels of the plantree.
*/
new_outer_tlist = new_join_tlist(outer_rel->targetlist, 1);
new_inner_tlist = new_join_tlist(inner_rel->targetlist,
length(new_outer_tlist) + 1);
joinrel->targetlist = nconc(new_outer_tlist, new_inner_tlist);
/*
* Construct restrict and join clause lists for the new joinrel.
* (The caller might or might not need the restrictlist, but
* I need it anyway for set_joinrel_size_estimates().)
*/
restrictlist = build_joinrel_restrictlist(joinrel, outer_rel, inner_rel);
if (restrictlist_ptr)
*restrictlist_ptr = restrictlist;
build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
/*
* Set estimates of the joinrel's size.
*/
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
restrictlist);
/*
* Add the joinrel to the front of the query's joinrel list.
* (allpaths.c depends on this ordering!)
*/
root->join_rel_list = lcons(joinrel, root->join_rel_list);
return joinrel;
}
/*
* new_join_tlist
* Builds a join relation's target list by keeping those elements that
* will be in the final target list and any other elements that are still
* needed for future joins. For a target list entry to still be needed
* for future joins, its 'joinlist' field must not be empty after removal
* of all relids in 'other_relids'.
*
* XXX the above comment refers to code that is long dead and gone;
* we don't keep track of joinlists for individual targetlist entries
* anymore. For now, all vars present in either input tlist will be
* emitted in the join's tlist.
*
* 'tlist' is the target list of one of the join relations
* 'first_resdomno' is the resdom number to use for the first created
* target list entry
*
* Returns the new target list.
*/
static List *
new_join_tlist(List *tlist,
int first_resdomno)
{
int resdomno = first_resdomno - 1;
List *t_list = NIL;
List *i;
foreach(i, tlist)
{
TargetEntry *xtl = lfirst(i);
resdomno += 1;
t_list = lappend(t_list,
create_tl_element(get_expr(xtl), resdomno));
}
return t_list;
}
/*
* build_joinrel_restrictlist
* build_joinrel_joinlist
* These routines build lists of restriction and join clauses for a
* join relation from the joininfo lists of the relations it joins.
*
* These routines are separate because the restriction list must be
* built afresh for each pair of input sub-relations we consider, whereas
* the join lists need only be computed once for any join RelOptInfo.
* The join lists are fully determined by the set of rels making up the
* joinrel, so we should get the same results (up to ordering) from any
* candidate pair of sub-relations. But the restriction list is whatever
* is not handled in the sub-relations, so it depends on which
* sub-relations are considered.
*
* If a join clause from an input relation refers to base rels still not
* present in the joinrel, then it is still a join clause for the joinrel;
* we put it into an appropriate JoinInfo list for the joinrel. Otherwise,
* the clause is now a restrict clause for the joined relation, and we
* return it to the caller of build_joinrel_restrictlist() to be stored in
* join paths made from this pair of sub-relations. (It will not need to
* be considered further up the join tree.)
*
* 'joinrel' is a join relation node
* 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
* to form joinrel.
*
* build_joinrel_restrictlist() returns a list of relevant restrictinfos,
* whereas build_joinrel_joinlist() stores its results in the joinrel's
* joininfo lists. One or the other must accept each given clause!
*
* NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass
* up to the join relation. I believe this is no longer necessary, because
* RestrictInfo nodes are no longer context-dependent. Instead, just include
* the original nodes in the lists made for the join relation.
*/
static List *
build_joinrel_restrictlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel)
{
/*
* We must eliminate duplicates, since we will see the
* same clauses arriving from both input relations...
*/
return LispUnion(subbuild_joinrel_restrictlist(joinrel,
outer_rel->joininfo),
subbuild_joinrel_restrictlist(joinrel,
inner_rel->joininfo));
}
static void
build_joinrel_joinlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel)
{
subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo);
subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo);
}
static List *
subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
List *joininfo_list)
{
List *restrictlist = NIL;
List *xjoininfo;
foreach(xjoininfo, joininfo_list)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo);
Relids new_unjoined_relids;
new_unjoined_relids = set_differencei(joininfo->unjoined_relids,
joinrel->relids);
if (new_unjoined_relids == NIL)
{
/*
* Clauses in this JoinInfo list become restriction clauses
* for the joinrel, since they refer to no outside rels.
*
* We must copy the list to avoid disturbing the input relation,
* but we can use a shallow copy.
*/
restrictlist = nconc(restrictlist,
listCopy(joininfo->jinfo_restrictinfo));
}
else
{
/*
* These clauses are still join clauses at this level,
* so we ignore them in this routine.
*/
}
}
return restrictlist;
}
static void
subbuild_joinrel_joinlist(RelOptInfo *joinrel,
List *joininfo_list)
{
List *xjoininfo;
foreach(xjoininfo, joininfo_list)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo);
Relids new_unjoined_relids;
new_unjoined_relids = set_differencei(joininfo->unjoined_relids,
joinrel->relids);
if (new_unjoined_relids == NIL)
{
/*
* Clauses in this JoinInfo list become restriction clauses
* for the joinrel, since they refer to no outside rels.
* So we can ignore them in this routine.
*/
}
else
{
/*
* These clauses are still join clauses at this level,
* so find or make the appropriate JoinInfo item for the joinrel,
* and add the clauses to it (eliminating duplicates).
*/
JoinInfo *new_joininfo;
new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids);
new_joininfo->jinfo_restrictinfo =
LispUnion(new_joininfo->jinfo_restrictinfo,
joininfo->jinfo_restrictinfo);
}
}
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: relation.h,v 1.42 2000/01/26 05:58:17 momjian Exp $
* $Id: relation.h,v 1.43 2000/02/07 04:41:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -54,13 +54,26 @@ typedef List *Relids;
* * The presence of the remaining fields depends on the restrictions
* and joins that the relation participates in:
*
* restrictinfo - List of RestrictInfo nodes, containing info about each
* qualification clause in which this relation participates
* baserestrictinfo - List of RestrictInfo nodes, containing info about
* each qualification clause in which this relation
* participates (only used for base rels)
* joininfo - List of JoinInfo nodes, containing info about each join
* clause in which this relation participates
* innerjoin - List of Path nodes that represent indices that may be used
* as inner paths of nestloop joins. This field is non-null
* only for base rels, since join rels have no indices.
*
* Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
* base rels, because for a join rel the set of clauses that are treated as
* restrict clauses varies depending on which sub-relations we choose to join.
* (For example, in a 3-base-rel join, a clause relating rels 1 and 2 must be
* treated as a restrictclause if we join {1} and {2 3} to make {1 2 3}; but
* if we join {1 2} and {3} then that clause will be a restrictclause in {1 2}
* and should not be processed again at the level of {1 2 3}.) Therefore,
* the restrictinfo list in the join case appears in individual JoinPaths
* (field joinrestrictinfo), not in the parent relation. But it's OK for
* the RelOptInfo to store the joininfo lists, because those are the same
* for a given rel no matter how we form it.
*/
typedef struct RelOptInfo
@ -86,7 +99,7 @@ typedef struct RelOptInfo
double tuples;
/* used by various scans and joins: */
List *restrictinfo; /* RestrictInfo structures */
List *baserestrictinfo; /* RestrictInfo structures (if base rel) */
List *joininfo; /* JoinInfo structures */
List *innerjoin; /* potential indexscans for nestloop joins */
/* innerjoin indexscans are not in the main pathlist because they are
@ -235,6 +248,10 @@ typedef struct JoinPath
Path *outerjoinpath; /* path for the outer side of the join */
Path *innerjoinpath; /* path for the inner side of the join */
List *joinrestrictinfo; /* RestrictInfos to apply to join */
/* See the notes for RelOptInfo to understand why joinrestrictinfo is
* needed in JoinPath, and can't be merged into the parent RelOptInfo.
*/
} JoinPath;
/*
@ -289,18 +306,29 @@ typedef struct HashPath
* without having to evaluate the rest. The RestrictInfo node itself stores
* data used by the optimizer while choosing the best query plan.
*
* A restriction clause will appear in the restrictinfo list of a RelOptInfo
* that describes exactly the set of base relations referenced by the
* restriction clause. It is not possible to apply the clause at any lower
* nesting level, and there is little point in delaying its evaluation to
* higher nesting levels. (The "predicate migration" code was once intended
* to push restriction clauses up and down the plan tree, but it's dead code
* and is unlikely to be resurrected in the foreseeable future.)
* If a restriction clause references a single base relation, it will appear
* in the baserestrictinfo list of the RelOptInfo for that base rel.
*
* If a restriction clause references more than one base rel, it will also
* appear in the JoinInfo list of every RelOptInfo that describes a strict
* If a restriction clause references more than one base rel, it will
* appear in the JoinInfo lists of every RelOptInfo that describes a strict
* subset of the base rels mentioned in the clause. The JoinInfo lists are
* used to drive join tree building by selecting plausible join candidates.
* The clause cannot actually be applied until we have built a join rel
* containing all the base rels it references, however.
*
* When we construct a join rel that describes exactly the set of base rels
* referenced in a multi-relation restriction clause, we place that clause
* into the joinrestrictinfo lists of paths for the join rel. It will be
* applied at that join level, and will not propagate any further up the
* join tree. (Note: the "predicate migration" code was once intended to
* push restriction clauses up and down the plan tree based on evaluation
* costs, but it's dead code and is unlikely to be resurrected in the
* foreseeable future.)
*
* Note that in the presence of more than two rels, a multi-rel restriction
* might reach different heights in the join tree depending on the join
* sequence we use. So, these clauses cannot be associated directly with
* the join RelOptInfo, but must be kept track of on a per-join-path basis.
*
* In general, the referenced clause might be arbitrarily complex. The
* kinds of clauses we can handle as indexscan quals, mergejoin clauses,

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: cost.h,v 1.28 2000/01/26 05:58:20 momjian Exp $
* $Id: cost.h,v 1.29 2000/02/07 04:41:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -55,9 +55,11 @@ extern Cost cost_mergejoin(Path *outer_path, Path *inner_path,
List *outersortkeys, List *innersortkeys);
extern Cost cost_hashjoin(Path *outer_path, Path *inner_path,
Selectivity innerdisbursion);
extern void set_rel_rows_width(Query *root, RelOptInfo *rel);
extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel,
JoinPath *joinpath);
extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
List *restrictlist);
/*
* prototypes for clausesel.c

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: pathnode.h,v 1.24 2000/01/26 05:58:20 momjian Exp $
* $Id: pathnode.h,v 1.25 2000/02/07 04:41:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -21,8 +21,8 @@
*/
extern bool path_is_cheaper(Path *path1, Path *path2);
extern Path *set_cheapest(RelOptInfo *parent_rel, List *pathlist);
extern List *add_pathlist(RelOptInfo *parent_rel, List *old_paths,
List *new_paths);
extern void add_path(RelOptInfo *parent_rel, Path *new_path);
extern void add_pathlist(RelOptInfo *parent_rel, List *new_paths);
extern Path *create_seqscan_path(RelOptInfo *rel);
extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
@ -31,25 +31,34 @@ extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval);
extern NestPath *create_nestloop_path(RelOptInfo *joinrel,
Path *outer_path, Path *inner_path,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys);
extern MergePath *create_mergejoin_path(RelOptInfo *joinrel, Path *outer_path,
Path *inner_path, List *pathkeys,
extern MergePath *create_mergejoin_path(RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys);
extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, Path *outer_path,
Path *inner_path, List *hashclauses,
extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *hashclauses,
Selectivity innerdisbursion);
/*
* prototypes for rel.c
* prototypes for relnode.c
*/
extern RelOptInfo *rel_member(Relids relid, List *rels);
extern RelOptInfo *get_base_rel(Query *root, int relid);
extern RelOptInfo *get_join_rel(Query *root, Relids relid);
extern RelOptInfo *get_join_rel(Query *root, RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
List **restrictlist_ptr);
/*
* prototypes for indexnode.h

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: paths.h,v 1.41 2000/02/06 03:27:35 tgl Exp $
* $Id: paths.h,v 1.42 2000/02/07 04:41:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -27,15 +27,15 @@
extern bool enable_geqo;
extern int geqo_rels;
extern RelOptInfo *make_one_rel(Query *root, List *rels);
extern RelOptInfo *make_one_rel(Query *root);
/*
* indxpath.c
* routines to generate index paths
*/
extern List *create_index_paths(Query *root, RelOptInfo *rel, List *indices,
List *restrictinfo_list,
List *joininfo_list);
List *restrictinfo_list,
List *joininfo_list);
extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam,
bool indexkey_on_left);
extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
@ -60,7 +60,22 @@ extern List *create_tidscan_paths(Query *root, RelOptInfo *rel);
* joinpath.c
* routines to create join paths
*/
extern void update_rels_pathlist_for_joins(Query *root, List *joinrels);
extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist);
/*
* joinrels.c
* routines to determine which relations to join
*/
extern void make_rels_by_joins(Query *root, int level);
extern RelOptInfo *make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
extern RelOptInfo *make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
/*
* pathkeys.c
@ -90,22 +105,4 @@ extern List *find_mergeclauses_for_pathkeys(List *pathkeys,
extern List *make_pathkeys_for_mergeclauses(List *mergeclauses,
List *tlist);
/*
* joinrels.c
* routines to determine which relations to join
*/
extern List *make_rels_by_joins(Query *root, List *old_rels);
extern List *make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel,
List *joininfo_list, Relids only_relids);
extern List *make_rels_by_clauseless_joins(RelOptInfo *old_rel,
List *inner_rels);
extern RelOptInfo *get_cheapest_complete_rel(List *join_rel_list);
/*
* prune.c
*/
extern void merge_rels_with_same_relids(List *rel_list);
extern void rels_set_cheapest(Query *root, List *rel_list);
extern List *del_rels_all_bushy_inactive(List *old_rels);
#endif /* PATHS_H */