Tighten coding in new_join_pathkey, which seems to be a hotspot

for GEQO ...
1999-05-17 00:26:33 +00:00 · 1999-05-17 00:26:33 +00:00 · 0b8b1fe3aa
parent 1332c1e144
commit 0b8b1fe3aa
1 changed files with 53 additions and 96 deletions
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.8 1999/04/30 03:59:06 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -29,30 +29,34 @@ static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
 						int outer_or_inner);
 static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
 							  List *joinclauses);
-static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys,
-					 	List *join_rel_tlist, List *joinclauses);


-/*
+/*--------------------
 *	Explanation of Path.pathkeys
 *
 *	Path.pathkeys is a List of List of Var nodes that represent the sort
 *	order of the result generated by the Path.
 *
 *	In single/base relation RelOptInfo's, the Path's represent various ways
- *	of generate the relation.  Sequential scan Paths have a NIL pathkeys.
- *	Index scans have Path.pathkeys that represent the chosen index.  A
- *	single-key index pathkeys would be { {tab1_indexkey1} }.  The pathkeys
- *	entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
+ *	of generating the relation and the resulting ordering of the tuples.
+ *	Sequential scan Paths have NIL pathkeys, indicating no known ordering.
+ *	Index scans have Path.pathkeys that represent the chosen index.
+ *	A single-key index pathkeys would be { {tab1_indexkey1} }.  For a
+ *	multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
+ *	indicating major sort by indexkey1 and minor sort by indexkey2.
 *
 *	Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
- *	only performed with equajoins("=").  Because of this, the multi-relation
+ *	only performed with equijoins ("=").  Because of this, the multi-relation
 *	path actually has more than one primary Var key.  For example, a
 *	mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
- *	{ {tab1.col1, tab2.col1} }.  This allows future joins to use either Var
- *	as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
- *	They are equal, so they are both primary sort keys.  This is why pathkeys
- *	is a List of Lists.
+ *	{ {tab1.col1, tab2.col1} }, indicating that the major sort order of the
+ *	Path can be taken to be *either* tab1.col1 or tab2.col1.
+ *	They are equal, so they are both primary sort keys.  This allows future
+ *	joins to use either Var as a pre-sorted key to prevent upper Mergejoins
+ *	from having to re-sort the Path.  This is why pathkeys is a List of Lists.
+ *
+ *	Note that while the order of the top list is meaningful (primary vs.
+ *	secondary sort key), the order of each sublist is arbitrary.
 *
 *	For multi-key sorts, if the outer is sorted by a multi-key index, the
 *	multi-key index remains after the join.  If the inner has a multi-key
@ -60,11 +64,16 @@ static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys,
 *	Mergejoins only join on the primary key.  Currently, non-primary keys
 *	in the pathkeys List are of limited value.
 *
- *	HashJoin has similar functionality.  NestJoin does not perform sorting,
- *	and allows non-equajoins, so it does not allow useful pathkeys.
+ *	Although Hashjoins also work only with equijoin operators, it is *not*
+ *	safe to consider the output of a Hashjoin to be sorted in any particular
+ *	order --- not even the outer path's order.  This is true because the
+ *	executor might have to split the join into multiple batches.
+ *
+ *	NestJoin does not perform sorting, and allows non-equijoins, so it does
+ *	not allow useful pathkeys.  (But couldn't we use the outer path's order?)
 *
 *	-- bjm
- *	
+ *--------------------
 */
 
 /****************************************************************************
@ -228,7 +237,7 @@ get_cheapest_path_for_joinkeys(List *joinkeys,
 								 int outer_or_inner)
 {
 	Path	   *matched_path = NULL;
-	List	   *i = NIL;
+	List	   *i;

 	foreach(i, paths)
 	{
@ -337,16 +346,16 @@ new_join_pathkeys(List *outer_pathkeys,
 				  List *join_rel_tlist,
 				  List *joinclauses)
 {
-	List	   *outer_pathkey = NIL;
 	List	   *final_pathkeys = NIL;
-	List	   *new_pathkey;
-	List	   *i = NIL;
+	List	   *i;

 	foreach(i, outer_pathkeys)
 	{
-		outer_pathkey = lfirst(i);
+		List	   *outer_pathkey = lfirst(i);
+		List	   *new_pathkey;
+
 		new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
-										joinclauses);
+									   joinclauses);
 		if (new_pathkey != NIL)
 			final_pathkeys = lappend(final_pathkeys, new_pathkey);
 	}
@ -355,19 +364,18 @@ new_join_pathkeys(List *outer_pathkeys,

 /*
 * new_join_pathkey
- *	  Finds new vars that become pathkeys due to qualification clauses that
- *	  contain any previously considered pathkeys.  These new pathkeys plus the
- *	  pathkeys from 'pathkeys' form a new pathkey for the join relation.
+ *	  Generate an individual pathkey sublist, consisting of the outer vars
+ *	  already mentioned in 'pathkey' plus any inner vars that are joined to
+ *	  them (and thus can now also be considered path keys, per discussion
+ *	  at the top of this file).
 *
 *	  Note that each returned pathkey is the var node found in
 *	  'join_rel_tlist' rather than the joinclause var node.
+ *	  (Is this important?)  Also, we return a fully copied list
+ *	  that does not share any subnodes with existing data structures.
+ *	  (Is that important, either?)
 *
- * 'pathkeys' is a list of pathkeys for which matching pathkeys are to be
- *		found
- * 'considered_pathkeys' is the current list of all pathkeys corresponding
- *		to a given pathkey
- *
- * Returns a new pathkey(list of pathkeys).
+ * Returns a new pathkey (list of pathkey variables).
 *
 */
 static List *
@ -375,84 +383,33 @@ new_join_pathkey(List *pathkey,
 				 List *join_rel_tlist,
 				 List *joinclauses)
 {
-	List	   *final_pathkey = NIL;
-	List	   *i = NIL;
-	List	   *considered_pathkeys = NIL;
+	List	   *new_pathkey = NIL;
+	List	   *i,
+			   *j;

 	foreach(i, pathkey)
 	{
 		Var		   *key = (Var *) lfirst(i);
-		List	   *joined_keys;
 		Expr	   *tlist_key;

 		Assert(key);
-		joined_keys = get_joinvars_for_var(key, &considered_pathkeys,
-										join_rel_tlist, joinclauses);
-		if (joined_keys)
-		{
-			considered_pathkeys =  nconc(considered_pathkeys, joined_keys);
-			final_pathkey = nconc(final_pathkey, joined_keys);
-		}
 									
 		tlist_key = matching_tlist_var(key, join_rel_tlist);
-		if (tlist_key && !member(tlist_key, considered_pathkeys))
+		if (tlist_key && !member(tlist_key, new_pathkey))
+			new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
+
+		foreach(j, joinclauses)
 		{
-			/*
-			 *	If pathkey is in the target list, and not considered,
-			 *	add it
-			 */
-			considered_pathkeys =  lcons(tlist_key, considered_pathkeys);
-			final_pathkey = lcons(tlist_key, final_pathkey);
-		}
-	}
-	return copyObject(final_pathkey);
-}
+			Expr	   *joinclause = lfirst(j);
+			Expr	   *tlist_other_var;

-/*
- * get_joinvars_for_var
- *	  Returns a list of new pathkeys:
- *	  (1) which are not listed in 'considered_pathkeys'
- *	  (2) for which the "other" variable in some clause in 'joinclauses' is
- *		  'pathkey'
- *	  (3) which are mentioned in 'join_rel_tlist'
- *
- *	  Note that each returned pathkey is the var node found in
- *	  'join_rel_tlist' rather than the joinclause var node.
- *
- * 'pathkey' is the var node for which we are trying to find matching
- *		clauses
- *
- * Returns a list of new pathkeys.
- *
- */
-static List *
-get_joinvars_for_var(Var *key,
-					 List **considered_pathkeys,
-					 List *join_rel_tlist,
-					 List *joinclauses)
-{
-	List	   *final_pathkey = NIL;
-	Expr	   *joinclause;
-	List	   *i;
-	Expr	   *tlist_other_var;
-
-	foreach(i, joinclauses)
-	{
-		joinclause = lfirst(i);
-
-		tlist_other_var = matching_tlist_var(
+			tlist_other_var = matching_tlist_var(
 								other_join_clause_var(key, joinclause),
 								join_rel_tlist);
-		if (tlist_other_var &&
-			!member(tlist_other_var, *considered_pathkeys))
-		{
-			/*
-			 *	The key has a join variable that is in the target list,
-			 *	and has not been considered.
-			 */
-			*considered_pathkeys = lcons(tlist_other_var, *considered_pathkeys);
-			final_pathkey = lcons(tlist_other_var, final_pathkey);
+			if (tlist_other_var && !member(tlist_other_var, new_pathkey))
+				new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
 		}
 	}
-	return final_pathkey;
+
+	return new_pathkey;
 }