Fix plpgsql to allow new-style SQL CREATE FUNCTION as a SQL command.

plpgsql fails on new-style CREATE FUNCTION/PROCEDURE commands within
a routine or DO block, because make_execsql_stmt believes that a
semicolon token always terminates a SQL command.  Now, that's actually
been wrong since the day it was written, because CREATE RULE has long
allowed multiple rule actions separated by semicolons.  But there are
few enough people using multi-action rules that there was never an
attempt to fix it.  New-style SQL functions, though, are popular.

psql has this same problem of "does this semicolon really terminate
the command?".  It deals with CREATE RULE by counting parenthesis
nesting depth: a semicolon within parens doesn't end a command.
Commits e717a9a18 and 029c5ac03 created a similar heuristic to count
matching BEGIN/END pairs (but only within CREATEs, so as not to be
fooled by plain BEGIN).  That's survived several releases now without
trouble reports, so let's just absorb those heuristics into plpgsql.

Per report from Samuel Dussault.  Back-patch to v14 where new-style
SQL function syntax came in.

Discussion: https://postgr.es/m/YT2PR01MB88552C3E9AD40A6C038774A781722@YT2PR01MB8855.CANPRD01.PROD.OUTLOOK.COM
This commit is contained in:
Tom Lane 2024-01-18 16:10:57 -05:00
parent e313a61137
commit 57b440ec11
5 changed files with 122 additions and 11 deletions

View File

@ -32,8 +32,9 @@ DATA = plpgsql.control plpgsql--1.0.sql
REGRESS_OPTS = --dbname=$(PL_TESTDB)
REGRESS = plpgsql_array plpgsql_call plpgsql_control plpgsql_copy plpgsql_domain \
plpgsql_record plpgsql_cache plpgsql_simple plpgsql_transaction \
REGRESS = plpgsql_array plpgsql_cache plpgsql_call plpgsql_control \
plpgsql_copy plpgsql_domain plpgsql_misc \
plpgsql_record plpgsql_simple plpgsql_transaction \
plpgsql_trap plpgsql_trigger plpgsql_varprops
# where to find gen_keywordlist.pl and subsidiary files

View File

@ -0,0 +1,31 @@
--
-- Miscellaneous topics
--
-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE
do
$$
declare procedure int; -- check we still recognize non-keywords as vars
begin
create function test1() returns int
begin atomic
select 2 + 2;
end;
create or replace procedure test2(x int)
begin atomic
select x + 2;
end;
end
$$;
\sf test1
CREATE OR REPLACE FUNCTION public.test1()
RETURNS integer
LANGUAGE sql
BEGIN ATOMIC
SELECT (2 + 2);
END
\sf test2
CREATE OR REPLACE PROCEDURE public.test2(IN x integer)
LANGUAGE sql
BEGIN ATOMIC
SELECT (x + 2);
END

View File

@ -76,12 +76,13 @@ tests += {
'regress': {
'sql': [
'plpgsql_array',
'plpgsql_cache',
'plpgsql_call',
'plpgsql_control',
'plpgsql_copy',
'plpgsql_domain',
'plpgsql_misc',
'plpgsql_record',
'plpgsql_cache',
'plpgsql_simple',
'plpgsql_transaction',
'plpgsql_trap',

View File

@ -76,7 +76,8 @@ static PLpgSQL_expr *read_sql_expression2(int until, int until2,
int *endtoken);
static PLpgSQL_expr *read_sql_stmt(void);
static PLpgSQL_type *read_datatype(int tok);
static PLpgSQL_stmt *make_execsql_stmt(int firsttoken, int location);
static PLpgSQL_stmt *make_execsql_stmt(int firsttoken, int location,
PLword *word);
static PLpgSQL_stmt_fetch *read_fetch_direction(void);
static void complete_direction(PLpgSQL_stmt_fetch *fetch,
bool *check_FROM);
@ -1972,15 +1973,15 @@ loop_body : proc_sect K_END K_LOOP opt_label ';'
*/
stmt_execsql : K_IMPORT
{
$$ = make_execsql_stmt(K_IMPORT, @1);
$$ = make_execsql_stmt(K_IMPORT, @1, NULL);
}
| K_INSERT
{
$$ = make_execsql_stmt(K_INSERT, @1);
$$ = make_execsql_stmt(K_INSERT, @1, NULL);
}
| K_MERGE
{
$$ = make_execsql_stmt(K_MERGE, @1);
$$ = make_execsql_stmt(K_MERGE, @1, NULL);
}
| T_WORD
{
@ -1991,7 +1992,7 @@ stmt_execsql : K_IMPORT
if (tok == '=' || tok == COLON_EQUALS ||
tok == '[' || tok == '.')
word_is_not_variable(&($1), @1);
$$ = make_execsql_stmt(T_WORD, @1);
$$ = make_execsql_stmt(T_WORD, @1, &($1));
}
| T_CWORD
{
@ -2002,7 +2003,7 @@ stmt_execsql : K_IMPORT
if (tok == '=' || tok == COLON_EQUALS ||
tok == '[' || tok == '.')
cword_is_not_variable(&($1), @1);
$$ = make_execsql_stmt(T_CWORD, @1);
$$ = make_execsql_stmt(T_CWORD, @1, NULL);
}
;
@ -2947,8 +2948,13 @@ read_datatype(int tok)
return result;
}
/*
* Read a generic SQL statement. We have already read its first token;
* firsttoken is that token's code and location its starting location.
* If firsttoken == T_WORD, pass its yylval value as "word", else pass NULL.
*/
static PLpgSQL_stmt *
make_execsql_stmt(int firsttoken, int location)
make_execsql_stmt(int firsttoken, int location, PLword *word)
{
StringInfoData ds;
IdentifierLookup save_IdentifierLookup;
@ -2961,9 +2967,16 @@ make_execsql_stmt(int firsttoken, int location)
bool have_strict = false;
int into_start_loc = -1;
int into_end_loc = -1;
int paren_depth = 0;
int begin_depth = 0;
bool in_routine_definition = false;
int token_count = 0;
char tokens[4]; /* records the first few tokens */
initStringInfo(&ds);
memset(tokens, 0, sizeof(tokens));
/* special lookup mode for identifiers within the SQL text */
save_IdentifierLookup = plpgsql_IdentifierLookup;
plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR;
@ -2972,6 +2985,12 @@ make_execsql_stmt(int firsttoken, int location)
* Scan to the end of the SQL command. Identify any INTO-variables
* clause lurking within it, and parse that via read_into_target().
*
* The end of the statement is defined by a semicolon ... except that
* semicolons within parentheses or BEGIN/END blocks don't terminate a
* statement. We follow psql's lead in not recognizing BEGIN/END except
* after CREATE [OR REPLACE] {FUNCTION|PROCEDURE}. END can also appear
* within a CASE construct, so we treat CASE/END like BEGIN/END.
*
* Because INTO is sometimes used in the main SQL grammar, we have to be
* careful not to take any such usage of INTO as a PL/pgSQL INTO clause.
* There are currently three such cases:
@ -2997,13 +3016,50 @@ make_execsql_stmt(int firsttoken, int location)
* break this logic again ... beware!
*/
tok = firsttoken;
if (tok == T_WORD && strcmp(word->ident, "create") == 0)
tokens[token_count] = 'c';
token_count++;
for (;;)
{
prev_tok = tok;
tok = yylex();
if (have_into && into_end_loc < 0)
into_end_loc = yylloc; /* token after the INTO part */
if (tok == ';')
/* Detect CREATE [OR REPLACE] {FUNCTION|PROCEDURE} */
if (tokens[0] == 'c' && token_count < sizeof(tokens))
{
if (tok == K_OR)
tokens[token_count] = 'o';
else if (tok == T_WORD &&
strcmp(yylval.word.ident, "replace") == 0)
tokens[token_count] = 'r';
else if (tok == T_WORD &&
strcmp(yylval.word.ident, "function") == 0)
tokens[token_count] = 'f';
else if (tok == T_WORD &&
strcmp(yylval.word.ident, "procedure") == 0)
tokens[token_count] = 'f'; /* treat same as "function" */
if (tokens[1] == 'f' ||
(tokens[1] == 'o' && tokens[2] == 'r' && tokens[3] == 'f'))
in_routine_definition = true;
token_count++;
}
/* Track paren nesting (needed for CREATE RULE syntax) */
if (tok == '(')
paren_depth++;
else if (tok == ')' && paren_depth > 0)
paren_depth--;
/* We need track BEGIN/END nesting only in a routine definition */
if (in_routine_definition && paren_depth == 0)
{
if (tok == K_BEGIN || tok == K_CASE)
begin_depth++;
else if (tok == K_END && begin_depth > 0)
begin_depth--;
}
/* Command-ending semicolon? */
if (tok == ';' && paren_depth == 0 && begin_depth == 0)
break;
if (tok == 0)
yyerror("unexpected end of function definition");

View File

@ -0,0 +1,22 @@
--
-- Miscellaneous topics
--
-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE
do
$$
declare procedure int; -- check we still recognize non-keywords as vars
begin
create function test1() returns int
begin atomic
select 2 + 2;
end;
create or replace procedure test2(x int)
begin atomic
select x + 2;
end;
end
$$;
\sf test1
\sf test2