Fix list-munging bug that broke SQL function result coercions.

Since commit 913bbd88d, check_sql_fn_retval() can either insert type coercion steps in-line in the Query that produces the SQL function's results, or generate a new top-level Query to perform the coercions, if modifying the Query's output in-place wouldn't be safe. However, it appears that the latter case has never actually worked, because the code tried to inject the new Query back into the query list it was passed ... which is not the list that will be used for later processing when we execute the SQL function "normally" (without inlining it). So we ended up with no coercion happening at run-time, leading to wrong results or crashes depending on the datatypes involved. While the regression tests look like they cover this area well enough, through a huge bit of bad luck all the test cases that exercise the separate-Query path were checking either inline-able cases (which accidentally didn't have the bug) or cases that are no-ops at runtime (e.g., varchar to text), so that the failure to perform the coercion wasn't obvious. The fact that the cases that don't work weren't allowed at all before v13 probably contributed to not noticing the problem sooner, too. To fix, get rid of the separate "flat" list of Query nodes and instead pass the real two-level list that is going to be used later. I chose to make the same change in check_sql_fn_statements(), although that has no actual bug, just so that we don't need that data structure at all. This is an API change, as evidenced by the adjustments needed to callers outside functions.c. That's a bit scary to be doing in a released branch, but so far as I can tell from a quick search, there are no outside callers of these functions (and they are sufficiently specific to our semantics for SQL-language functions that it's not apparent why any extension would need to call them). In any case, v13 already changed the API of check_sql_fn_retval() compared to prior branches. Per report from pinker. Back-patch to v13 where this code came in. Discussion: https://postgr.es/m/1603050466566-0.post@n3.nabble.com
2020-10-19 14:33:01 -04:00 · 2020-10-19 14:33:01 -04:00 · c8ab970179
parent c5f42daa60
commit c8ab970179
6 changed files with 131 additions and 59 deletions
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@ -913,8 +913,8 @@ fmgr_sql_validator(PG_FUNCTION_ARGS)
 																  (ParserSetupHook) sql_fn_parser_setup,
 																  pinfo,
 																  NULL);
-				querytree_list = list_concat(querytree_list,
-											 querytree_sublist);
+				querytree_list = lappend(querytree_list,
+										 querytree_sublist);
 			}

 			check_sql_fn_statements(querytree_list);
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@ -609,7 +609,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	SQLFunctionCachePtr fcache;
 	List	   *raw_parsetree_list;
 	List	   *queryTree_list;
-	List	   *flat_query_list;
 	List	   *resulttlist;
 	ListCell   *lc;
 	Datum		tmp;
@ -689,13 +688,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)

 	/*
 	 * Parse and rewrite the queries in the function text.  Use sublists to
-	 * keep track of the original query boundaries.  But we also build a
-	 * "flat" list of the rewritten queries to pass to check_sql_fn_retval.
-	 * This is because the last canSetTag query determines the result type
-	 * independently of query boundaries --- and it might not be in the last
-	 * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
-	 * (It might not be unreasonable to throw an error in such a case, but
-	 * this is the historical behavior and it doesn't seem worth changing.)
+	 * keep track of the original query boundaries.
 	 *
 	 * Note: since parsing and planning is done in fcontext, we will generate
 	 * a lot of cruft that lives as long as the fcache does.  This is annoying
@ -705,7 +698,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	raw_parsetree_list = pg_parse_query(fcache->src);

 	queryTree_list = NIL;
-	flat_query_list = NIL;
 	foreach(lc, raw_parsetree_list)
 	{
 		RawStmt    *parsetree = lfirst_node(RawStmt, lc);
@ -717,10 +709,12 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 														  fcache->pinfo,
 														  NULL);
 		queryTree_list = lappend(queryTree_list, queryTree_sublist);
-		flat_query_list = list_concat(flat_query_list, queryTree_sublist);
 	}

-	check_sql_fn_statements(flat_query_list);
+	/*
+	 * Check that there are no statements we don't want to allow.
+	 */
+	check_sql_fn_statements(queryTree_list);

 	/*
 	 * Check that the function returns the type it claims to.  Although in
@ -740,7 +734,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	 * the rowtype column into multiple columns, since we have no way to
 	 * notify the caller that it should do that.)
 	 */
-	fcache->returnsTuple = check_sql_fn_retval(flat_query_list,
+	fcache->returnsTuple = check_sql_fn_retval(queryTree_list,
 											   rettype,
 											   rettupdesc,
 											   false,
@ -1510,51 +1504,63 @@ ShutdownSQLFunction(Datum arg)
 * is not acceptable.
 */
 void
-check_sql_fn_statements(List *queryTreeList)
+check_sql_fn_statements(List *queryTreeLists)
 {
 	ListCell   *lc;

-	foreach(lc, queryTreeList)
+	/* We are given a list of sublists of Queries */
+	foreach(lc, queryTreeLists)
 	{
-		Query	   *query = lfirst_node(Query, lc);
+		List	   *sublist = lfirst_node(List, lc);
+		ListCell   *lc2;

-		/*
-		 * Disallow procedures with output arguments.  The current
-		 * implementation would just throw the output values away, unless the
-		 * statement is the last one.  Per SQL standard, we should assign the
-		 * output values by name.  By disallowing this here, we preserve an
-		 * opportunity for future improvement.
-		 */
-		if (query->commandType == CMD_UTILITY &&
-			IsA(query->utilityStmt, CallStmt))
+		foreach(lc2, sublist)
 		{
-			CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
-			HeapTuple	tuple;
-			int			numargs;
-			Oid		   *argtypes;
-			char	  **argnames;
-			char	   *argmodes;
-			int			i;
+			Query	   *query = lfirst_node(Query, lc2);

-			tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(stmt->funcexpr->funcid));
-			if (!HeapTupleIsValid(tuple))
-				elog(ERROR, "cache lookup failed for function %u", stmt->funcexpr->funcid);
-			numargs = get_func_arg_info(tuple, &argtypes, &argnames, &argmodes);
-			ReleaseSysCache(tuple);
-
-			for (i = 0; i < numargs; i++)
+			/*
+			 * Disallow procedures with output arguments.  The current
+			 * implementation would just throw the output values away, unless
+			 * the statement is the last one.  Per SQL standard, we should
+			 * assign the output values by name.  By disallowing this here, we
+			 * preserve an opportunity for future improvement.
+			 */
+			if (query->commandType == CMD_UTILITY &&
+				IsA(query->utilityStmt, CallStmt))
 			{
-				if (argmodes && (argmodes[i] == PROARGMODE_INOUT || argmodes[i] == PROARGMODE_OUT))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("calling procedures with output arguments is not supported in SQL functions")));
+				CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
+				HeapTuple	tuple;
+				int			numargs;
+				Oid		   *argtypes;
+				char	  **argnames;
+				char	   *argmodes;
+				int			i;
+
+				tuple = SearchSysCache1(PROCOID,
+										ObjectIdGetDatum(stmt->funcexpr->funcid));
+				if (!HeapTupleIsValid(tuple))
+					elog(ERROR, "cache lookup failed for function %u",
+						 stmt->funcexpr->funcid);
+				numargs = get_func_arg_info(tuple,
+											&argtypes, &argnames, &argmodes);
+				ReleaseSysCache(tuple);
+
+				for (i = 0; i < numargs; i++)
+				{
+					if (argmodes && (argmodes[i] == PROARGMODE_INOUT ||
+									 argmodes[i] == PROARGMODE_OUT))
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("calling procedures with output arguments is not supported in SQL functions")));
+				}
 			}
 		}
 	}
 }

 /*
- * check_sql_fn_retval() -- check return value of a list of sql parse trees.
+ * check_sql_fn_retval()
+ *		Check return value of a list of lists of sql parse trees.
 *
 * The return value of a sql function is the value returned by the last
 * canSetTag query in the function.  We do some ad-hoc type checking and
@ -1592,7 +1598,7 @@ check_sql_fn_statements(List *queryTreeList)
 * function is defined to return VOID then *resultTargetList is set to NIL.
 */
 bool
-check_sql_fn_retval(List *queryTreeList,
+check_sql_fn_retval(List *queryTreeLists,
 					Oid rettype, TupleDesc rettupdesc,
 					bool insertDroppedCols,
 					List **resultTargetList)
@ -1619,20 +1625,30 @@ check_sql_fn_retval(List *queryTreeList,
 		return false;

 	/*
-	 * Find the last canSetTag query in the list.  This isn't necessarily the
-	 * last parsetree, because rule rewriting can insert queries after what
-	 * the user wrote.
+	 * Find the last canSetTag query in the function body (which is presented
+	 * to us as a list of sublists of Query nodes).  This isn't necessarily
+	 * the last parsetree, because rule rewriting can insert queries after
+	 * what the user wrote.  Note that it might not even be in the last
+	 * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
+	 * (It might not be unreasonable to throw an error in such a case, but
+	 * this is the historical behavior and it doesn't seem worth changing.)
 	 */
 	parse = NULL;
 	parse_cell = NULL;
-	foreach(lc, queryTreeList)
+	foreach(lc, queryTreeLists)
 	{
-		Query	   *q = lfirst_node(Query, lc);
+		List	   *sublist = lfirst_node(List, lc);
+		ListCell   *lc2;

-		if (q->canSetTag)
+		foreach(lc2, sublist)
 		{
-			parse = q;
-			parse_cell = lc;
+			Query	   *q = lfirst_node(Query, lc2);
+
+			if (q->canSetTag)
+			{
+				parse = q;
+				parse_cell = lc2;
+			}
 		}
 	}

--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -4522,7 +4522,8 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid,
 	 * needed; that's probably not important, but let's be careful.
 	 */
 	querytree_list = list_make1(querytree);
-	if (check_sql_fn_retval(querytree_list, result_type, rettupdesc,
+	if (check_sql_fn_retval(list_make1(querytree_list),
+							result_type, rettupdesc,
 							false, NULL))
 		goto fail;				/* reject whole-tuple-result cases */

@ -5040,7 +5041,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
 	 * shows it's returning a whole tuple result; otherwise what it's
 	 * returning is a single composite column which is not what we need.
 	 */
-	if (!check_sql_fn_retval(querytree_list,
+	if (!check_sql_fn_retval(list_make1(querytree_list),
 							 fexpr->funcresulttype, rettupdesc,
 							 true, NULL) &&
 		(functypclass == TYPEFUNC_COMPOSITE ||
@ -5052,7 +5053,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
 	 * check_sql_fn_retval might've inserted a projection step, but that's
 	 * fine; just make sure we use the upper Query.
 	 */
-	querytree = linitial(querytree_list);
+	querytree = linitial_node(Query, querytree_list);

 	/*
 	 * Looks good --- substitute parameters into the query.
--- a/src/include/executor/functions.h
+++ b/src/include/executor/functions.h
@ -29,9 +29,9 @@ extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTupl
 extern void sql_fn_parser_setup(struct ParseState *pstate,
 								SQLFunctionParseInfoPtr pinfo);

-extern void check_sql_fn_statements(List *queryTreeList);
+extern void check_sql_fn_statements(List *queryTreeLists);

-extern bool check_sql_fn_retval(List *queryTreeList,
+extern bool check_sql_fn_retval(List *queryTreeLists,
 								Oid rettype, TupleDesc rettupdesc,
 								bool insertDroppedCols,
 								List **resultTargetList);
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@ -2109,6 +2109,50 @@ select * from testrngfunc();
 7.136178 | 7.14
 (1 row)

+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+explain (verbose, costs off)
+select testrngfunc();
+       QUERY PLAN        
+-------------------------
+ ProjectSet
+   Output: testrngfunc()
+   ->  Result
+(3 rows)
+
+select testrngfunc();
+   testrngfunc   
+-----------------
+ (1.000000,2.00)
+ (3.000000,4.00)
+(2 rows)
+
+explain (verbose, costs off)
+select * from testrngfunc();
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Subquery Scan on "*SELECT*"
+   Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1"
+   ->  Unique
+         Output: (1), (2)
+         ->  Sort
+               Output: (1), (2)
+               Sort Key: (1), (2)
+               ->  Append
+                     ->  Result
+                           Output: 1, 2
+                     ->  Result
+                           Output: 3, 4
+(12 rows)
+
+select * from testrngfunc();
+    f1    |  f2  
+----------+------
+ 1.000000 | 2.00
+ 3.000000 | 4.00
+(2 rows)
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 ERROR:  a column definition list is redundant for a function returning a named composite type
--- a/src/test/regress/sql/rangefuncs.sql
+++ b/src/test/regress/sql/rangefuncs.sql
@ -629,6 +629,17 @@ explain (verbose, costs off)
 select * from testrngfunc();
 select * from testrngfunc();

+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+
+explain (verbose, costs off)
+select testrngfunc();
+select testrngfunc();
+explain (verbose, costs off)
+select * from testrngfunc();
+select * from testrngfunc();
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 select * from pg_get_keywords() as t(f1 int8,f2 int8);  -- fail, OUT params