Repair logic flaw in cost estimator: cost_nestloop() was estimating CPU

costs using the inner path's parent->rows count as the number of tuples
processed per inner scan iteration.  This is wrong when we are using an
inner indexscan with indexquals based on join clauses, because the rows
count in a Relation node reflects the selectivity of the restriction
clauses for that rel only.  Upshot was that if join clause was very
selective, we'd drastically overestimate the true cost of the join.
Fix is to calculate correct output-rows estimate for an inner indexscan
when the IndexPath node is created and save it in the path node.
Change of path node doesn't require initdb, since path nodes don't
appear in saved rules.
This commit is contained in:
Tom Lane 2000-03-22 22:08:35 +00:00
parent d825e55c13
commit 1d5e7a6f46
11 changed files with 83 additions and 63 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.109 2000/03/14 23:06:27 thomas Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.110 2000/03/22 22:08:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1135,6 +1135,7 @@ _copyIndexPath(IndexPath *from)
Node_Copy(from, newnode, indexqual);
newnode->indexscandir = from->indexscandir;
newnode->joinrelids = listCopy(from->joinrelids);
newnode->rows = from->rows;
return newnode;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.64 2000/03/01 18:47:43 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.65 2000/03/22 22:08:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -375,6 +375,9 @@ _equalIndexPath(IndexPath *a, IndexPath *b)
return false;
if (!equali(a->joinrelids, b->joinrelids))
return false;
/* Skip 'rows' because of possibility of floating-point roundoff error.
* It should be derivable from the other fields anyway.
*/
return true;
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.111 2000/03/17 02:36:12 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.112 2000/03/22 22:08:32 tgl Exp $
*
* NOTES
* Every (plan) node in POSTGRES has an associated "out" routine which
@ -1019,6 +1019,9 @@ _outIndexPath(StringInfo str, IndexPath *node)
appendStringInfo(str, " :indexscandir %d :joinrelids ",
(int) node->indexscandir);
_outIntList(str, node->joinrelids);
appendStringInfo(str, " :rows %.2f ",
node->rows);
}
/*

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.86 2000/03/17 02:36:12 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.87 2000/03/22 22:08:32 tgl Exp $
*
* NOTES
* Most of the read functions for plan nodes are tested. (In fact, they
@ -1510,6 +1510,10 @@ _readIndexPath()
token = lsptok(NULL, &length); /* get :joinrelids */
local_node->joinrelids = toIntList(nodeRead(true));
token = lsptok(NULL, &length); /* get :rows */
token = lsptok(NULL, &length); /* now read it */
local_node->rows = atof(token);
return local_node;
}

View File

@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.54 2000/03/22 22:08:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -202,7 +202,7 @@ cost_nonsequential_access(double relpages)
* 'index' is the index to be used
* 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
* 'is_injoin' is T if we are considering using the index scan as the inside
* of a nestloop join.
* of a nestloop join (hence, some of the indexQuals are join clauses)
*
* NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
* Any additional quals evaluated as qpquals may reduce the number of returned
@ -281,12 +281,15 @@ cost_index(Path *path, Query *root,
/* CPU costs */
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
/*
* Assume that the indexquals will be removed from the list of
* restriction clauses that we actually have to evaluate as qpquals.
* This is not completely right, but it's close.
* For a lossy index, however, we will have to recheck all the quals.
* Normally the indexquals will be removed from the list of restriction
* clauses that we have to evaluate as qpquals, so we should subtract
* their costs from baserestrictcost. For a lossy index, however, we
* will have to recheck all the quals and so mustn't subtract anything.
* Also, if we are doing a join then some of the indexquals are join
* clauses and shouldn't be subtracted. Rather than work out exactly
* how much to subtract, we don't subtract anything in that case either.
*/
if (! index->lossy)
if (! index->lossy && ! is_injoin)
cpu_per_tuple -= cost_qual_eval(indexQuals);
run_cost += cpu_per_tuple * tuples_fetched;
@ -418,15 +421,12 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width)
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
* 'is_indexjoin' is true if we are using an indexscan for the inner relation
* (not currently needed here; the indexscan adjusts its cost...)
*/
void
cost_nestloop(Path *path,
Path *outer_path,
Path *inner_path,
List *restrictlist,
bool is_indexjoin)
List *restrictlist)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@ -447,8 +447,15 @@ cost_nestloop(Path *path,
run_cost += outer_path->parent->rows *
(inner_path->total_cost - inner_path->startup_cost);
/* number of tuples processed (not number emitted!) */
ntuples = outer_path->parent->rows * inner_path->parent->rows;
/* Number of tuples processed (not number emitted!). If inner path is
* an indexscan, be sure to use its estimated output row count, which
* may be lower than the restriction-clause-only row count of its parent.
*/
if (IsA(inner_path, IndexPath))
ntuples = ((IndexPath *) inner_path)->rows;
else
ntuples = inner_path->parent->rows;
ntuples *= outer_path->parent->rows;
/* CPU costs */
cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.81 2000/03/22 22:08:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1454,6 +1454,26 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
/* joinrelids saves the rels needed on the outer side of the join */
pathnode->joinrelids = lfirst(outerrelids_list);
/*
* We must compute the estimated number of output rows for the
* indexscan. This is less than rel->rows because of the additional
* selectivity of the join clauses. Since clausegroup may contain
* both restriction and join clauses, we have to do a set union to
* get the full set of clauses that must be considered to compute
* the correct selectivity. (We can't just nconc the two lists;
* then we might have some restriction clauses appearing twice,
* which'd mislead restrictlist_selectivity into double-counting
* their selectivity.)
*/
pathnode->rows = rel->tuples *
restrictlist_selectivity(root,
LispUnion(rel->baserestrictinfo,
clausegroup),
lfirsti(rel->relids));
/* Like costsize.c, force estimate to be at least one row */
if (pathnode->rows < 1.0)
pathnode->rows = 1.0;
cost_index(&pathnode->path, root, rel, index, indexquals, true);
path_list = lappend(path_list, pathnode);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.38 2000/03/22 22:08:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -99,7 +99,9 @@ create_or_index_paths(Query *root,
/* We don't actually care what order the index scans in ... */
pathnode->indexscandir = NoMovementScanDirection;
/* This isn't a nestloop innerjoin, so: */
pathnode->joinrelids = NIL; /* no join clauses here */
pathnode->rows = rel->rows;
best_or_subclause_indices(root,
rel,

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.86 2000/02/18 23:47:21 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.87 2000/03/22 22:08:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -314,7 +314,6 @@ create_indexscan_node(Query *root,
List *ixid;
IndexScan *scan_node;
bool lossy = false;
double plan_rows;
/* there should be exactly one base rel involved... */
Assert(length(best_path->path.parent->relids) == 1);
@ -350,20 +349,7 @@ create_indexscan_node(Query *root,
* given by scan_clauses, there will normally be some duplications
* between the lists. We get rid of the duplicates, then add back
* if lossy.
*
* If this indexscan is a nestloop-join inner indexscan (as indicated
* by having nonempty joinrelids), then it uses indexqual conditions
* that are not part of the relation's restriction clauses. The rows
* estimate stored in the relation's RelOptInfo will be an overestimate
* because it did not take these extra conditions into account. So,
* in this case we recompute the selectivity of the whole scan ---
* considering both indexqual and qpqual --- rather than using the
* RelOptInfo's rows value. Since clausesel.c assumes it's working on
* minimized (no duplicates) expressions, we have to do that while we
* have the duplicate-free qpqual available.
*/
plan_rows = best_path->path.parent->rows; /* OK unless nestloop inner */
if (length(indxqual) > 1)
{
/*
@ -383,15 +369,6 @@ create_indexscan_node(Query *root,
qpqual = set_difference(scan_clauses,
lcons(indxqual_expr, NIL));
if (best_path->joinrelids)
{
/* recompute output row estimate using all available quals */
plan_rows = best_path->path.parent->tuples *
clauselist_selectivity(root,
lcons(indxqual_expr, qpqual),
baserelid);
}
if (lossy)
qpqual = lappend(qpqual, copyObject(indxqual_expr));
}
@ -404,15 +381,6 @@ create_indexscan_node(Query *root,
qpqual = set_difference(scan_clauses, indxqual_list);
if (best_path->joinrelids)
{
/* recompute output row estimate using all available quals */
plan_rows = best_path->path.parent->tuples *
clauselist_selectivity(root,
nconc(listCopy(indxqual_list), qpqual),
baserelid);
}
if (lossy)
qpqual = nconc(qpqual, (List *) copyObject(indxqual_list));
}
@ -433,10 +401,8 @@ create_indexscan_node(Query *root,
best_path->indexscandir);
copy_path_costsize(&scan_node->scan.plan, &best_path->path);
/* set up rows estimate (just to make EXPLAIN output reasonable) */
if (plan_rows < 1.0)
plan_rows = 1.0;
scan_node->scan.plan.plan_rows = plan_rows;
/* use the indexscan-specific rows estimate, not the parent rel's */
scan_node->scan.plan.plan_rows = best_path->rows;
return scan_node;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.61 2000/02/18 23:47:30 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.62 2000/03/22 22:08:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -337,8 +337,16 @@ create_index_path(Query *root,
*/
pathnode->indexid = lconsi(index->indexoid, NIL);
pathnode->indexqual = lcons(indexquals, NIL);
pathnode->indexscandir = indexscandir;
/*
* This routine is only used to generate "standalone" indexpaths,
* not nestloop inner indexpaths. So joinrelids is always NIL
* and the number of rows is the same as the parent rel's estimate.
*/
pathnode->joinrelids = NIL; /* no join clauses here */
pathnode->rows = rel->rows;
cost_index(&pathnode->path, root, rel, index, indexquals, false);
@ -400,8 +408,7 @@ create_nestloop_path(RelOptInfo *joinrel,
pathnode->joinrestrictinfo = restrict_clauses;
pathnode->path.pathkeys = pathkeys;
cost_nestloop(&pathnode->path, outer_path, inner_path,
restrict_clauses, IsA(inner_path, IndexPath));
cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
return pathnode;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: relation.h,v 1.45 2000/02/18 23:47:17 tgl Exp $
* $Id: relation.h,v 1.46 2000/03/22 22:08:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -252,6 +252,12 @@ typedef struct Path
* as the inner path of a nestloop join. These paths have indexquals
* that refer to values of other rels, so those other rels must be
* included in the outer joinrel in order to make a usable join.
*
* 'rows' is the estimated result tuple count for the indexscan. This
* is the same as path.parent->rows for a simple indexscan, but it is
* different for a nestloop inner path, because the additional indexquals
* coming from join clauses make the scan more selective than the parent
* rel's restrict clauses alone would do.
*----------
*/
typedef struct IndexPath
@ -260,7 +266,8 @@ typedef struct IndexPath
List *indexid;
List *indexqual;
ScanDirection indexscandir;
Relids joinrelids; /* other rels mentioned in indexqual */
Relids joinrelids; /* other rels mentioned in indexqual */
double rows; /* estimated number of result tuples */
} IndexPath;
typedef struct TidPath

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: cost.h,v 1.30 2000/02/15 20:49:25 tgl Exp $
* $Id: cost.h,v 1.31 2000/03/22 22:08:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -58,7 +58,7 @@ extern void cost_index(Path *path, Query *root,
extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval);
extern void cost_sort(Path *path, List *pathkeys, double tuples, int width);
extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path,
List *restrictlist, bool is_indexjoin);
List *restrictlist);
extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
List *restrictlist,
List *outersortkeys, List *innersortkeys);