Fix some bogosities in the code that deals with estimating the fraction

of tuples we are going to retrieve from a sub-SELECT. Must have been half asleep when I did this code the first time :-(
2000-03-14 02:23:15 +00:00 · 2000-03-14 02:23:15 +00:00 · 6217a8c7ba
parent a1642089bf
commit 6217a8c7ba
3 changed files with 42 additions and 29 deletions
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -42,7 +42,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -687,8 +687,8 @@ cost_qual_eval_walker(Node *node, Cost *total)
 				 * (We assume that sub-selects that can be executed as
 				 * InitPlans have already been removed from the expression.)
 				 *
-				 * NOTE: this logic should agree with make_subplan in
-				 * subselect.c. 
+				 * NOTE: this logic should agree with the estimates used by
+				 * make_subplan() in plan/subselect.c. 
 				 */
 				{
 					SubPlan	   *subplan = (SubPlan *) expr->oper;
@ -701,16 +701,18 @@ cost_qual_eval_walker(Node *node, Cost *total)
 						subcost = plan->startup_cost +
 							(plan->total_cost - plan->startup_cost) / plan->plan_rows;
 					}
-					else if (subplan->sublink->subLinkType == EXPR_SUBLINK)
-					{
-						/* assume we need all tuples */
-						subcost = plan->total_cost;
-					}
-					else
+					else if (subplan->sublink->subLinkType == ALL_SUBLINK ||
+							 subplan->sublink->subLinkType == ANY_SUBLINK)
 					{
 						/* assume we need 50% of the tuples */
 						subcost = plan->startup_cost +
 							0.50 * (plan->total_cost - plan->startup_cost);
+						/* XXX what if subplan has been materialized? */
+					}
+					else
+					{
+						/* assume we need all tuples */
+						subcost = plan->total_cost;
 					}
 					*total += subcost;
 				}
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.76 2000/02/21 01:13:04 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -360,11 +360,14 @@ union_planner(Query *parse,
 			 * In GROUP BY mode, we have the little problem that we don't
 			 * really know how many input tuples will be needed to make a
 			 * group, so we can't translate an output LIMIT count into an
-			 * input count.  For lack of a better idea, assume 10% of the
+			 * input count.  For lack of a better idea, assume 25% of the
 			 * input data will be processed if there is any output limit.
+			 * However, if the caller gave us a fraction rather than an
+			 * absolute count, we can keep using that fraction (which amounts
+			 * to assuming that all the groups are about the same size).
 			 */
-			if (tuple_fraction > 0.0)
-				tuple_fraction = 0.10;
+			if (tuple_fraction >= 1.0)
+				tuple_fraction = 0.25;
 			/*
 			 * If both GROUP BY and ORDER BY are specified, we will need
 			 * two levels of sort --- and, therefore, certainly need to
@ -386,11 +389,10 @@ union_planner(Query *parse,
 		{
 			/*
 			 * SELECT DISTINCT, like GROUP, will absorb an unpredictable
-			 * number of input tuples per output tuple.  So, fall back to
-			 * our same old 10% default...
+			 * number of input tuples per output tuple.  Handle the same way.
 			 */
-			if (tuple_fraction > 0.0)
-				tuple_fraction = 0.10;
+			if (tuple_fraction >= 1.0)
+				tuple_fraction = 0.25;
 		}

 		/* Generate the (sub) plan */
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.30 2000/03/11 23:53:41 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.31 2000/03/14 02:23:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -134,25 +134,34 @@ make_subplan(SubLink *slink)

 	PlannerInitPlan = NULL;

-	PlannerQueryLevel++;		/* we becomes child */
+	PlannerQueryLevel++;		/* we become child */

 	/*
 	 * For an EXISTS subplan, tell lower-level planner to expect that
-	 * only the first tuple will be retrieved.  For ALL, ANY, and MULTIEXPR
-	 * subplans, we will be able to stop evaluating if the test condition
-	 * fails, so very often not all the tuples will be retrieved; for lack
-	 * of a better idea, specify 50% retrieval.  For EXPR_SUBLINK use default
-	 * behavior.
+	 * only the first tuple will be retrieved.  For ALL and ANY subplans,
+	 * we will be able to stop evaluating if the test condition fails,
+	 * so very often not all the tuples will be retrieved; for lack of a
+	 * better idea, specify 50% retrieval.  For EXPR and MULTIEXPR subplans,
+	 * use default behavior (we're only expecting one row out, anyway).
 	 *
-	 * NOTE: if you change these numbers, also change cost_qual_eval_walker
-	 * in costsize.c.
+	 * NOTE: if you change these numbers, also change cost_qual_eval_walker()
+	 * in path/costsize.c.
+	 *
+	 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to materialize
+	 * its result below.  In that case it would've been better to specify
+	 * full retrieval.  At present, however, we can only detect correlation
+	 * or lack of it after we've made the subplan :-(.  Perhaps detection
+	 * of correlation should be done as a separate step.  Meanwhile, we don't
+	 * want to be too optimistic about the percentage of tuples retrieved,
+	 * for fear of selecting a plan that's bad for the materialization case.
 	 */
 	if (slink->subLinkType == EXISTS_SUBLINK)
 		tuple_fraction = 1.0;	/* just like a LIMIT 1 */
-	else if (slink->subLinkType == EXPR_SUBLINK)
-		tuple_fraction = -1.0;	/* default behavior */
-	else
+	else if (slink->subLinkType == ALL_SUBLINK ||
+			 slink->subLinkType == ANY_SUBLINK)
 		tuple_fraction = 0.5;	/* 50% */
+	else
+		tuple_fraction = -1.0;	/* default behavior */

 	node->plan = plan = union_planner(subquery, tuple_fraction);