464 lines
11 KiB
Plaintext
464 lines
11 KiB
Plaintext
%{
|
|
/*-------------------------------------------------------------------------
|
|
*
|
|
* exprscan.l
|
|
* lexical scanner for pgbench backslash commands
|
|
*
|
|
* This lexer supports two operating modes:
|
|
*
|
|
* In INITIAL state, just parse off whitespace-separated words (this mode
|
|
* is basically equivalent to strtok(), which is what we used to use).
|
|
*
|
|
* In EXPR state, lex for the simple expression syntax of exprparse.y.
|
|
*
|
|
* In either mode, stop upon hitting newline or end of string.
|
|
*
|
|
* Note that this lexer operates within the framework created by psqlscan.l,
|
|
*
|
|
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/bin/pgbench/exprscan.l
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "fe_utils/psqlscan_int.h"
|
|
|
|
/* context information for reporting errors in expressions */
|
|
static const char *expr_source = NULL;
|
|
static int expr_lineno = 0;
|
|
static int expr_start_offset = 0;
|
|
static const char *expr_command = NULL;
|
|
|
|
/* indicates whether last yylex() call read a newline */
|
|
static bool last_was_newline = false;
|
|
|
|
/*
|
|
* Work around a bug in flex 2.5.35: it emits a couple of functions that
|
|
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
|
|
* this would cause warnings. Providing our own declarations should be
|
|
* harmless even when the bug gets fixed.
|
|
*/
|
|
extern int expr_yyget_column(yyscan_t yyscanner);
|
|
extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
|
|
|
|
/* LCOV_EXCL_START */
|
|
|
|
%}
|
|
|
|
/* Except for the prefix, these options should match psqlscan.l */
|
|
%option reentrant
|
|
%option bison-bridge
|
|
%option 8bit
|
|
%option never-interactive
|
|
%option nodefault
|
|
%option noinput
|
|
%option nounput
|
|
%option noyywrap
|
|
%option warn
|
|
%option prefix="expr_yy"
|
|
|
|
/* Character classes */
|
|
alpha [a-zA-Z\200-\377_]
|
|
digit [0-9]
|
|
alnum [A-Za-z\200-\377_0-9]
|
|
/* {space} + {nonspace} + {newline} should cover all characters */
|
|
space [ \t\r\f\v]
|
|
nonspace [^ \t\r\f\v\n]
|
|
newline [\n]
|
|
|
|
/* Line continuation marker */
|
|
continuation \\\r?{newline}
|
|
|
|
/* case insensitive keywords */
|
|
and [Aa][Nn][Dd]
|
|
or [Oo][Rr]
|
|
not [Nn][Oo][Tt]
|
|
case [Cc][Aa][Ss][Ee]
|
|
when [Ww][Hh][Ee][Nn]
|
|
then [Tt][Hh][Ee][Nn]
|
|
else [Ee][Ll][Ss][Ee]
|
|
end [Ee][Nn][Dd]
|
|
true [Tt][Rr][Uu][Ee]
|
|
false [Ff][Aa][Ll][Ss][Ee]
|
|
null [Nn][Uu][Ll][Ll]
|
|
is [Ii][Ss]
|
|
isnull [Ii][Ss][Nn][Uu][Ll][Ll]
|
|
notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
|
|
|
|
/* Exclusive states */
|
|
%x EXPR
|
|
|
|
%%
|
|
|
|
%{
|
|
/* Declare some local variables inside yylex(), for convenience */
|
|
PsqlScanState cur_state = yyextra;
|
|
|
|
/*
|
|
* Force flex into the state indicated by start_state. This has a
|
|
* couple of purposes: it lets some of the functions below set a new
|
|
* starting state without ugly direct access to flex variables, and it
|
|
* allows us to transition from one flex lexer to another so that we
|
|
* can lex different parts of the source string using separate lexers.
|
|
*/
|
|
BEGIN(cur_state->start_state);
|
|
|
|
/* Reset was-newline flag */
|
|
last_was_newline = false;
|
|
%}
|
|
|
|
/* INITIAL state */
|
|
|
|
{nonspace}+ {
|
|
/* Found a word, emit and return it */
|
|
psqlscan_emit(cur_state, yytext, yyleng);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* We need this rule to avoid returning "word\" instead of recognizing
|
|
* a continuation marker just after a word:
|
|
*/
|
|
{nonspace}+{continuation} {
|
|
/* Found "word\\\r?\n", emit and return just "word" */
|
|
int wordlen = yyleng - 2;
|
|
if (yytext[wordlen] == '\r')
|
|
wordlen--;
|
|
Assert(yytext[wordlen] == '\\');
|
|
psqlscan_emit(cur_state, yytext, wordlen);
|
|
return 1;
|
|
}
|
|
|
|
{space}+ { /* ignore */ }
|
|
|
|
{continuation} { /* ignore */ }
|
|
|
|
{newline} {
|
|
/* report end of command */
|
|
last_was_newline = true;
|
|
return 0;
|
|
}
|
|
|
|
/* EXPR state */
|
|
|
|
<EXPR>{
|
|
|
|
"+" { return '+'; }
|
|
"-" { return '-'; }
|
|
"*" { return '*'; }
|
|
"/" { return '/'; }
|
|
"%" { return '%'; } /* C version, also in Pg SQL */
|
|
"=" { return '='; }
|
|
"<>" { return NE_OP; }
|
|
"!=" { return NE_OP; } /* C version, also in Pg SQL */
|
|
"<=" { return LE_OP; }
|
|
">=" { return GE_OP; }
|
|
"<<" { return LS_OP; }
|
|
">>" { return RS_OP; }
|
|
"<" { return '<'; }
|
|
">" { return '>'; }
|
|
"|" { return '|'; }
|
|
"&" { return '&'; }
|
|
"#" { return '#'; }
|
|
"~" { return '~'; }
|
|
|
|
"(" { return '('; }
|
|
")" { return ')'; }
|
|
"," { return ','; }
|
|
|
|
{and} { return AND_OP; }
|
|
{or} { return OR_OP; }
|
|
{not} { return NOT_OP; }
|
|
{is} { return IS_OP; }
|
|
{isnull} { return ISNULL_OP; }
|
|
{notnull} { return NOTNULL_OP; }
|
|
|
|
{case} { return CASE_KW; }
|
|
{when} { return WHEN_KW; }
|
|
{then} { return THEN_KW; }
|
|
{else} { return ELSE_KW; }
|
|
{end} { return END_KW; }
|
|
|
|
:{alnum}+ {
|
|
yylval->str = pg_strdup(yytext + 1);
|
|
return VARIABLE;
|
|
}
|
|
|
|
{null} { return NULL_CONST; }
|
|
{true} {
|
|
yylval->bval = true;
|
|
return BOOLEAN_CONST;
|
|
}
|
|
{false} {
|
|
yylval->bval = false;
|
|
return BOOLEAN_CONST;
|
|
}
|
|
"9223372036854775808" {
|
|
/*
|
|
* Special handling for PG_INT64_MIN, which can't
|
|
* accurately be represented here, as the minus sign is
|
|
* lexed separately and INT64_MIN can't be represented as
|
|
* a positive integer.
|
|
*/
|
|
return MAXINT_PLUS_ONE_CONST;
|
|
}
|
|
{digit}+ {
|
|
if (!strtoint64(yytext, true, &yylval->ival))
|
|
expr_yyerror_more(yyscanner, "bigint constant overflow",
|
|
strdup(yytext));
|
|
return INTEGER_CONST;
|
|
}
|
|
{digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
|
|
if (!strtodouble(yytext, true, &yylval->dval))
|
|
expr_yyerror_more(yyscanner, "double constant overflow",
|
|
strdup(yytext));
|
|
return DOUBLE_CONST;
|
|
}
|
|
\.{digit}+([eE][-+]?{digit}+)? {
|
|
if (!strtodouble(yytext, true, &yylval->dval))
|
|
expr_yyerror_more(yyscanner, "double constant overflow",
|
|
strdup(yytext));
|
|
return DOUBLE_CONST;
|
|
}
|
|
{alpha}{alnum}* {
|
|
yylval->str = pg_strdup(yytext);
|
|
return FUNCTION;
|
|
}
|
|
|
|
{space}+ { /* ignore */ }
|
|
|
|
{continuation} { /* ignore */ }
|
|
|
|
{newline} {
|
|
/* report end of command */
|
|
last_was_newline = true;
|
|
return 0;
|
|
}
|
|
|
|
. {
|
|
/*
|
|
* must strdup yytext so that expr_yyerror_more doesn't
|
|
* change it while finding end of line
|
|
*/
|
|
expr_yyerror_more(yyscanner, "unexpected character",
|
|
pg_strdup(yytext));
|
|
/* NOTREACHED, syntax_error calls exit() */
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
<<EOF>> {
|
|
if (cur_state->buffer_stack == NULL)
|
|
return 0; /* end of input reached */
|
|
|
|
/*
|
|
* We were expanding a variable, so pop the inclusion
|
|
* stack and keep lexing
|
|
*/
|
|
psqlscan_pop_buffer_stack(cur_state);
|
|
psqlscan_select_top_buffer(cur_state);
|
|
}
|
|
|
|
%%
|
|
|
|
/* LCOV_EXCL_STOP */
|
|
|
|
void
|
|
expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
|
|
{
|
|
PsqlScanState state = yyget_extra(yyscanner);
|
|
int error_detection_offset = expr_scanner_offset(state) - 1;
|
|
YYSTYPE lval;
|
|
char *full_line;
|
|
|
|
/*
|
|
* While parsing an expression, we may not have collected the whole line
|
|
* yet from the input source. Lex till EOL so we can report whole line.
|
|
* (If we're at EOF, it's okay to call yylex() an extra time.)
|
|
*/
|
|
if (!last_was_newline)
|
|
{
|
|
while (yylex(&lval, yyscanner))
|
|
/* skip */ ;
|
|
}
|
|
|
|
/* Extract the line, trimming trailing newline if any */
|
|
full_line = expr_scanner_get_substring(state,
|
|
expr_start_offset,
|
|
expr_scanner_offset(state),
|
|
true);
|
|
|
|
syntax_error(expr_source, expr_lineno, full_line, expr_command,
|
|
message, more, error_detection_offset - expr_start_offset);
|
|
}
|
|
|
|
void
|
|
expr_yyerror(yyscan_t yyscanner, const char *message)
|
|
{
|
|
expr_yyerror_more(yyscanner, message, NULL);
|
|
}
|
|
|
|
/*
|
|
* Collect a space-separated word from a backslash command and return it
|
|
* in word_buf, along with its starting string offset in *offset.
|
|
* Returns true if successful, false if at end of command.
|
|
*/
|
|
bool
|
|
expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
|
|
{
|
|
int lexresult;
|
|
YYSTYPE lval;
|
|
|
|
/* Must be scanning already */
|
|
Assert(state->scanbufhandle != NULL);
|
|
|
|
/* Set current output target */
|
|
state->output_buf = word_buf;
|
|
resetPQExpBuffer(word_buf);
|
|
|
|
/* Set input source */
|
|
if (state->buffer_stack != NULL)
|
|
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
|
|
else
|
|
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
|
|
|
|
/* Set start state */
|
|
state->start_state = INITIAL;
|
|
|
|
/* And lex. */
|
|
lexresult = yylex(&lval, state->scanner);
|
|
|
|
/*
|
|
* Save start offset of word, if any. We could do this more efficiently,
|
|
* but for now this seems fine.
|
|
*/
|
|
if (lexresult)
|
|
*offset = expr_scanner_offset(state) - word_buf->len;
|
|
else
|
|
*offset = -1;
|
|
|
|
/*
|
|
* In case the caller returns to using the regular SQL lexer, reselect the
|
|
* appropriate initial state.
|
|
*/
|
|
psql_scan_reselect_sql_lexer(state);
|
|
|
|
return (bool) lexresult;
|
|
}
|
|
|
|
/*
|
|
* Prepare to lex an expression via expr_yyparse().
|
|
*
|
|
* Returns the yyscan_t that is to be passed to expr_yyparse().
|
|
* (This is just state->scanner, but callers don't need to know that.)
|
|
*/
|
|
yyscan_t
|
|
expr_scanner_init(PsqlScanState state,
|
|
const char *source, int lineno, int start_offset,
|
|
const char *command)
|
|
{
|
|
/* Save error context info */
|
|
expr_source = source;
|
|
expr_lineno = lineno;
|
|
expr_start_offset = start_offset;
|
|
expr_command = command;
|
|
|
|
/* Must be scanning already */
|
|
Assert(state->scanbufhandle != NULL);
|
|
|
|
/* Set current output target */
|
|
state->output_buf = NULL;
|
|
|
|
/* Set input source */
|
|
if (state->buffer_stack != NULL)
|
|
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
|
|
else
|
|
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
|
|
|
|
/* Set start state */
|
|
state->start_state = EXPR;
|
|
|
|
return state->scanner;
|
|
}
|
|
|
|
/*
|
|
* Finish lexing an expression.
|
|
*/
|
|
void
|
|
expr_scanner_finish(yyscan_t yyscanner)
|
|
{
|
|
PsqlScanState state = yyget_extra(yyscanner);
|
|
|
|
/*
|
|
* Reselect appropriate initial state for SQL lexer.
|
|
*/
|
|
psql_scan_reselect_sql_lexer(state);
|
|
}
|
|
|
|
/*
|
|
* Get offset from start of string to end of current lexer token.
|
|
*
|
|
* We rely on the knowledge that flex modifies the scan buffer by storing
|
|
* a NUL at the end of the current token (yytext). Note that this might
|
|
* not work quite right if we were parsing a sub-buffer, but since pgbench
|
|
* never invokes that functionality, it doesn't matter.
|
|
*/
|
|
int
|
|
expr_scanner_offset(PsqlScanState state)
|
|
{
|
|
return strlen(state->scanbuf);
|
|
}
|
|
|
|
/*
|
|
* Get a malloc'd copy of the lexer input string from start_offset
|
|
* to just before end_offset. If chomp is true, drop any trailing
|
|
* newline(s).
|
|
*/
|
|
char *
|
|
expr_scanner_get_substring(PsqlScanState state,
|
|
int start_offset, int end_offset,
|
|
bool chomp)
|
|
{
|
|
char *result;
|
|
const char *scanptr = state->scanbuf + start_offset;
|
|
int slen = end_offset - start_offset;
|
|
|
|
Assert(slen >= 0);
|
|
Assert(end_offset <= strlen(state->scanbuf));
|
|
|
|
if (chomp)
|
|
{
|
|
while (slen > 0 &&
|
|
(scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
|
|
slen--;
|
|
}
|
|
|
|
result = (char *) pg_malloc(slen + 1);
|
|
memcpy(result, scanptr, slen);
|
|
result[slen] = '\0';
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Get the line number associated with the given string offset
|
|
* (which must not be past the end of where we've lexed to).
|
|
*/
|
|
int
|
|
expr_scanner_get_lineno(PsqlScanState state, int offset)
|
|
{
|
|
int lineno = 1;
|
|
const char *p = state->scanbuf;
|
|
|
|
while (*p && offset > 0)
|
|
{
|
|
if (*p == '\n')
|
|
lineno++;
|
|
p++, offset--;
|
|
}
|
|
return lineno;
|
|
}
|