postgresql/src/bin/psql/psqlscanslash.l

864 lines
20 KiB
Plaintext

%top{
/*-------------------------------------------------------------------------
*
* psqlscanslash.l
* lexical scanner for psql backslash commands
*
* XXX Avoid creating backtracking cases --- see the backend lexer for info.
*
* See fe_utils/psqlscan_int.h for additional commentary.
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/psql/psqlscanslash.l
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <ctype.h>
#include "common.h"
#include "psqlscanslash.h"
#include "common/logging.h"
#include "fe_utils/conditional.h"
#include "libpq-fe.h"
}
%{
#include "fe_utils/psqlscan_int.h"
/*
* We must have a typedef YYSTYPE for yylex's first argument, but this lexer
* doesn't presently make use of that argument, so just declare it as int.
*/
typedef int YYSTYPE;
/*
* Set the type of yyextra; we use it as a pointer back to the containing
* PsqlScanState.
*/
#define YY_EXTRA_TYPE PsqlScanState
/*
* These variables do not need to be saved across calls. Yeah, it's a bit
* of a hack, but putting them into PsqlScanStateData would be klugy too.
*/
static enum slash_option_type option_type;
static char *option_quote;
static int unquoted_option_chars;
static int backtick_start_offset;
/* Return values from yylex() */
#define LEXRES_EOL 0 /* end of input */
#define LEXRES_OK 1 /* OK completion of backslash argument */
static void evaluate_backtick(PsqlScanState state);
#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
/*
* Work around a bug in flex 2.5.35: it emits a couple of functions that
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
* this would cause warnings. Providing our own declarations should be
* harmless even when the bug gets fixed.
*/
extern int slash_yyget_column(yyscan_t yyscanner);
extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
/* LCOV_EXCL_START */
%}
/* Except for the prefix, these options should match psqlscan.l */
%option reentrant
%option bison-bridge
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="slash_yy"
/*
* OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen.
* INITIAL is the starting state, to which all non-conditional rules apply.
* Exclusive states change parsing rules while the state is active. When in
* an exclusive state, only those rules defined for that state apply.
*/
/* Exclusive states for lexing backslash commands */
%x xslashcmd
%x xslashargstart
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdquote
%x xslashwholeline
%x xslashend
/*
* Assorted character class definitions that should match psqlscan.l.
*/
space [ \t\n\r\f\v]
quote '
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
xqdouble {quote}{quote}
dquote \"
variable_char [A-Za-z\200-\377_0-9]
other .
%%
%{
/* Declare some local variables inside yylex(), for convenience */
PsqlScanState cur_state = yyextra;
PQExpBuffer output_buf = cur_state->output_buf;
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a new
* starting state without ugly direct access to flex variables, and it
* allows us to transition from one flex lexer to another so that we
* can lex different parts of the source string using separate lexers.
*/
BEGIN(cur_state->start_state);
%}
/*
* We don't really expect to be invoked in the INITIAL state in this
* lexer; but if we are, just spit data to the output_buf until EOF.
*/
{other}|\n { ECHO; }
/*
* Exclusive lexer states to handle backslash command lexing
*/
<xslashcmd>{
/* command name ends at whitespace or backslash; eat all else */
{space}|"\\" {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other} { ECHO; }
}
<xslashargstart>{
/*
* Discard any whitespace before argument, then go to xslasharg state.
* An exception is that "|" is only special at start of argument, so we
* check for it here.
*/
{space}+ { }
"|" {
if (option_type == OT_FILEPIPE)
{
/* treat like whole-string case */
ECHO;
BEGIN(xslashwholeline);
}
else
{
/* vertical bar is not special otherwise */
yyless(0);
BEGIN(xslasharg);
}
}
{other} {
yyless(0);
BEGIN(xslasharg);
}
}
<xslasharg>{
/*
* Default processing of text in a slash command's argument.
*
* Note: unquoted_option_chars counts the number of characters at the
* end of the argument that were not subject to any form of quoting.
* psql_scan_slash_option needs this to strip trailing semicolons safely.
*/
{space}|"\\" {
/*
* Unquoted space is end of arg; do not eat. Likewise
* backslash is end of command or next command, do not eat
*
* XXX this means we can't conveniently accept options
* that include unquoted backslashes; therefore, option
* processing that encourages use of backslashes is rather
* broken.
*/
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{quote} {
*option_quote = '\'';
unquoted_option_chars = 0;
BEGIN(xslashquote);
}
"`" {
backtick_start_offset = output_buf->len;
*option_quote = '`';
unquoted_option_chars = 0;
BEGIN(xslashbackquote);
}
{dquote} {
ECHO;
*option_quote = '"';
unquoted_option_chars = 0;
BEGIN(xslashdquote);
}
:{variable_char}+ {
/* Possible psql variable substitution */
if (cur_state->callbacks->get_variable == NULL)
ECHO;
else
{
char *varname;
char *value;
varname = psqlscan_extract_substring(cur_state,
yytext + 1,
yyleng - 1);
value = cur_state->callbacks->get_variable(varname,
PQUOTE_PLAIN,
cur_state->cb_passthrough);
free(varname);
/*
* The variable value is just emitted without any
* further examination. This is consistent with the
* pre-8.0 code behavior, if not with the way that
* variables are handled outside backslash commands.
* Note that we needn't guard against recursion here.
*/
if (value)
{
appendPQExpBufferStr(output_buf, value);
free(value);
}
else
ECHO;
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}+' {
psqlscan_escape_variable(cur_state, yytext, yyleng,
PQUOTE_SQL_LITERAL);
*option_quote = ':';
unquoted_option_chars = 0;
}
:\"{variable_char}+\" {
psqlscan_escape_variable(cur_state, yytext, yyleng,
PQUOTE_SQL_IDENT);
*option_quote = ':';
unquoted_option_chars = 0;
}
:\{\?{variable_char}+\} {
psqlscan_test_variable(cur_state, yytext, yyleng);
}
:'{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\"{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\{\?{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\{ {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
{other} {
unquoted_option_chars++;
ECHO;
}
}
<xslashquote>{
/*
* single-quoted text: copy literally except for '' and backslash
* sequences
*/
{quote} { BEGIN(xslasharg); }
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
{xeoctesc} {
/* octal case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 1, NULL, 8));
}
{xehexesc} {
/* hex case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 2, NULL, 16));
}
"\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
{other}|\n { ECHO; }
}
<xslashbackquote>{
/*
* backticked text: copy everything until next backquote (expanding
* variable references, but doing nought else), then evaluate.
*/
"`" {
/* In an inactive \if branch, don't evaluate the command */
if (cur_state->cb_passthrough == NULL ||
conditional_active((ConditionalStack) cur_state->cb_passthrough))
evaluate_backtick(cur_state);
BEGIN(xslasharg);
}
:{variable_char}+ {
/* Possible psql variable substitution */
if (cur_state->callbacks->get_variable == NULL)
ECHO;
else
{
char *varname;
char *value;
varname = psqlscan_extract_substring(cur_state,
yytext + 1,
yyleng - 1);
value = cur_state->callbacks->get_variable(varname,
PQUOTE_PLAIN,
cur_state->cb_passthrough);
free(varname);
if (value)
{
appendPQExpBufferStr(output_buf, value);
free(value);
}
else
ECHO;
}
}
:'{variable_char}+' {
psqlscan_escape_variable(cur_state, yytext, yyleng,
PQUOTE_SHELL_ARG);
}
:'{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
ECHO;
}
{other}|\n { ECHO; }
}
<xslashdquote>{
/* double-quoted text: copy verbatim, including the double quotes */
{dquote} {
ECHO;
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashwholeline>{
/* copy everything until end of input line */
/* but suppress leading whitespace */
{space}+ {
if (output_buf->len > 0)
ECHO;
}
{other} { ECHO; }
}
<xslashend>{
/* at end of command, eat a double backslash, but not anything else */
"\\\\" {
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other}|\n {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
}
<<EOF>> {
if (cur_state->buffer_stack == NULL)
{
cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */
}
/*
* We were expanding a variable, so pop the inclusion
* stack and keep lexing
*/
psqlscan_pop_buffer_stack(cur_state);
psqlscan_select_top_buffer(cur_state);
}
%%
/* LCOV_EXCL_STOP */
/*
* Scan the command name of a psql backslash command. This should be called
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
* has been consumed through the leading backslash.
*
* The return value is a malloc'd copy of the command name, as parsed off
* from the input.
*/
char *
psql_scan_slash_command(PsqlScanState state)
{
PQExpBufferData mybuf;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/*
* Set lexer start state. Note that this is sufficient to switch
* state->scanner over to using the tables in this lexer file.
*/
state->start_state = xslashcmd;
/* And lex. */
yylex(NULL, state->scanner);
/* There are no possible errors in this lex state... */
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
return mybuf.data;
}
/*
* Parse off the next argument for a backslash command, and return it as a
* malloc'd string. If there are no more arguments, returns NULL.
*
* type tells what processing, if any, to perform on the option string;
* for example, if it's a SQL identifier, we want to downcase any unquoted
* letters.
*
* if quote is not NULL, *quote is set to 0 if no quoting was found, else
* the last quote symbol used in the argument.
*
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise
* be taken as part of the option string will be stripped.
*
* NOTE: the only possible syntax errors for backslash options are unmatched
* quotes, which are detected when we run out of input. Therefore, on a
* syntax error we just throw away the string and return NULL; there is no
* need to worry about flushing remaining input.
*/
char *
psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon)
{
PQExpBufferData mybuf;
int lexresult PG_USED_FOR_ASSERTS_ONLY;
int final_state;
char local_quote;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
if (quote == NULL)
quote = &local_quote;
*quote = 0;
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set up static variables that will be used by yylex */
option_type = type;
option_quote = quote;
unquoted_option_chars = 0;
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
if (type == OT_WHOLE_LINE)
state->start_state = xslashwholeline;
else
state->start_state = xslashargstart;
/* And lex. */
lexresult = yylex(NULL, state->scanner);
/* Save final state for a moment... */
final_state = state->start_state;
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
/*
* Check the lex result: we should have gotten back either LEXRES_OK
* or LEXRES_EOL (the latter indicating end of string). If we were inside
* a quoted string, as indicated by final_state, EOL is an error.
*/
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
switch (final_state)
{
case xslashargstart:
/* empty arg */
break;
case xslasharg:
/* Strip any unquoted trailing semicolons if requested */
if (semicolon)
{
while (unquoted_option_chars-- > 0 &&
mybuf.len > 0 &&
mybuf.data[mybuf.len - 1] == ';')
{
mybuf.data[--mybuf.len] = '\0';
}
}
/*
* If SQL identifier processing was requested, then we strip out
* excess double quotes and optionally downcase unquoted letters.
*/
if (type == OT_SQLID || type == OT_SQLIDHACK)
{
dequote_downcase_identifier(mybuf.data,
(type != OT_SQLIDHACK),
state->encoding);
/* update mybuf.len for possible shortening */
mybuf.len = strlen(mybuf.data);
}
break;
case xslashquote:
case xslashbackquote:
case xslashdquote:
/* must have hit EOL inside quotes */
pg_log_error("unterminated quoted string");
termPQExpBuffer(&mybuf);
return NULL;
case xslashwholeline:
/*
* In whole-line mode, we interpret semicolon = true as stripping
* trailing whitespace as well as semicolons; this gives the
* nearest equivalent to what semicolon = true does in normal
* mode. Note there's no concept of quoting in this mode.
*/
if (semicolon)
{
while (mybuf.len > 0 &&
(mybuf.data[mybuf.len - 1] == ';' ||
(isascii((unsigned char) mybuf.data[mybuf.len - 1]) &&
isspace((unsigned char) mybuf.data[mybuf.len - 1]))))
{
mybuf.data[--mybuf.len] = '\0';
}
}
break;
default:
/* can't get here */
fprintf(stderr, "invalid YY_START\n");
exit(1);
}
/*
* An unquoted empty argument isn't possible unless we are at end of
* command. Return NULL instead.
*/
if (mybuf.len == 0 && *quote == 0)
{
termPQExpBuffer(&mybuf);
return NULL;
}
/* Else return the completed string. */
return mybuf.data;
}
/*
* Eat up any unused \\ to complete a backslash command.
*/
void
psql_scan_slash_command_end(PsqlScanState state)
{
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = NULL; /* we won't output anything */
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
state->start_state = xslashend;
/* And lex. */
yylex(NULL, state->scanner);
/* There are no possible errors in this lex state... */
/*
* We expect the caller to return to using the regular SQL lexer, so
* reselect the appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
}
/*
* Fetch current paren nesting depth
*/
int
psql_scan_get_paren_depth(PsqlScanState state)
{
return state->paren_depth;
}
/*
* Set paren nesting depth
*/
void
psql_scan_set_paren_depth(PsqlScanState state, int depth)
{
Assert(depth >= 0);
state->paren_depth = depth;
}
/*
* De-quote and optionally downcase a SQL identifier.
*
* The string at *str is modified in-place; it can become shorter,
* but not longer.
*
* If downcase is true then non-quoted letters are folded to lower case.
* Ideally this behavior will match the backend's downcase_identifier();
* but note that it could differ if LC_CTYPE is different in the frontend.
*
* Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz;
* this is somewhat inconsistent with the SQL spec, which would have us
* parse it as several identifiers. But for psql's purposes, we want a
* string like "foo"."bar" to be treated as one option, so there's little
* choice; this routine doesn't get to change the token boundaries.
*/
void
dequote_downcase_identifier(char *str, bool downcase, int encoding)
{
bool inquotes = false;
char *cp = str;
while (*cp)
{
if (*cp == '"')
{
if (inquotes && cp[1] == '"')
{
/* Keep the first quote, remove the second */
cp++;
}
else
inquotes = !inquotes;
/* Collapse out quote at *cp */
memmove(cp, cp + 1, strlen(cp));
/* do not advance cp */
}
else
{
if (downcase && !inquotes)
*cp = pg_tolower((unsigned char) *cp);
cp += PQmblenBounded(cp, encoding);
}
}
}
/*
* Evaluate a backticked substring of a slash command's argument.
*
* The portion of output_buf starting at backtick_start_offset is evaluated
* as a shell command and then replaced by the command's output.
*/
static void
evaluate_backtick(PsqlScanState state)
{
PQExpBuffer output_buf = state->output_buf;
char *cmd = output_buf->data + backtick_start_offset;
PQExpBufferData cmd_output;
FILE *fd;
bool error = false;
int exit_code = 0;
char buf[512];
size_t result;
initPQExpBuffer(&cmd_output);
fflush(NULL);
fd = popen(cmd, "r");
if (!fd)
{
pg_log_error("%s: %m", cmd);
error = true;
exit_code = -1;
}
if (!error)
{
do
{
result = fread(buf, 1, sizeof(buf), fd);
if (ferror(fd))
{
pg_log_error("%s: %m", cmd);
error = true;
break;
}
appendBinaryPQExpBuffer(&cmd_output, buf, result);
} while (!feof(fd));
}
if (fd)
{
/*
* Although pclose's result always sets the shell result variables, we
* historically have abandoned the backtick substitution only if it
* returns -1.
*/
exit_code = pclose(fd);
if (exit_code == -1)
{
pg_log_error("%s: %m", cmd);
error = true;
}
}
if (PQExpBufferDataBroken(cmd_output))
{
pg_log_error("%s: out of memory", cmd);
error = true;
}
/* Now done with cmd, delete it from output_buf */
output_buf->len = backtick_start_offset;
output_buf->data[output_buf->len] = '\0';
/* If no error, transfer result to output_buf */
if (!error)
{
/* strip any trailing newline (but only one) */
if (cmd_output.len > 0 &&
cmd_output.data[cmd_output.len - 1] == '\n')
cmd_output.len--;
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
}
/* And finally, set the shell result variables */
SetShellResultVariables(exit_code);
termPQExpBuffer(&cmd_output);
}