postgresql/src/bin/pgbench/exprscan.l

464 lines
11 KiB
Plaintext

%{
/*-------------------------------------------------------------------------
*
* exprscan.l
* lexical scanner for pgbench backslash commands
*
* This lexer supports two operating modes:
*
* In INITIAL state, just parse off whitespace-separated words (this mode
* is basically equivalent to strtok(), which is what we used to use).
*
* In EXPR state, lex for the simple expression syntax of exprparse.y.
*
* In either mode, stop upon hitting newline or end of string.
*
* Note that this lexer operates within the framework created by psqlscan.l,
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/bin/pgbench/exprscan.l
*
*-------------------------------------------------------------------------
*/
#include "fe_utils/psqlscan_int.h"
/* context information for reporting errors in expressions */
static const char *expr_source = NULL;
static int expr_lineno = 0;
static int expr_start_offset = 0;
static const char *expr_command = NULL;
/* indicates whether last yylex() call read a newline */
static bool last_was_newline = false;
/*
* Work around a bug in flex 2.5.35: it emits a couple of functions that
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
* this would cause warnings. Providing our own declarations should be
* harmless even when the bug gets fixed.
*/
extern int expr_yyget_column(yyscan_t yyscanner);
extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
/* LCOV_EXCL_START */
%}
/* Except for the prefix, these options should match psqlscan.l */
%option reentrant
%option bison-bridge
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="expr_yy"
/* Character classes */
alpha [a-zA-Z\200-\377_]
digit [0-9]
alnum [A-Za-z\200-\377_0-9]
/* {space} + {nonspace} + {newline} should cover all characters */
space [ \t\r\f\v]
nonspace [^ \t\r\f\v\n]
newline [\n]
/* Line continuation marker */
continuation \\\r?{newline}
/* case insensitive keywords */
and [Aa][Nn][Dd]
or [Oo][Rr]
not [Nn][Oo][Tt]
case [Cc][Aa][Ss][Ee]
when [Ww][Hh][Ee][Nn]
then [Tt][Hh][Ee][Nn]
else [Ee][Ll][Ss][Ee]
end [Ee][Nn][Dd]
true [Tt][Rr][Uu][Ee]
false [Ff][Aa][Ll][Ss][Ee]
null [Nn][Uu][Ll][Ll]
is [Ii][Ss]
isnull [Ii][Ss][Nn][Uu][Ll][Ll]
notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
/* Exclusive states */
%x EXPR
%%
%{
/* Declare some local variables inside yylex(), for convenience */
PsqlScanState cur_state = yyextra;
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a new
* starting state without ugly direct access to flex variables, and it
* allows us to transition from one flex lexer to another so that we
* can lex different parts of the source string using separate lexers.
*/
BEGIN(cur_state->start_state);
/* Reset was-newline flag */
last_was_newline = false;
%}
/* INITIAL state */
{nonspace}+ {
/* Found a word, emit and return it */
psqlscan_emit(cur_state, yytext, yyleng);
return 1;
}
/*
* We need this rule to avoid returning "word\" instead of recognizing
* a continuation marker just after a word:
*/
{nonspace}+{continuation} {
/* Found "word\\\r?\n", emit and return just "word" */
int wordlen = yyleng - 2;
if (yytext[wordlen] == '\r')
wordlen--;
Assert(yytext[wordlen] == '\\');
psqlscan_emit(cur_state, yytext, wordlen);
return 1;
}
{space}+ { /* ignore */ }
{continuation} { /* ignore */ }
{newline} {
/* report end of command */
last_was_newline = true;
return 0;
}
/* EXPR state */
<EXPR>{
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; } /* C version, also in Pg SQL */
"=" { return '='; }
"<>" { return NE_OP; }
"!=" { return NE_OP; } /* C version, also in Pg SQL */
"<=" { return LE_OP; }
">=" { return GE_OP; }
"<<" { return LS_OP; }
">>" { return RS_OP; }
"<" { return '<'; }
">" { return '>'; }
"|" { return '|'; }
"&" { return '&'; }
"#" { return '#'; }
"~" { return '~'; }
"(" { return '('; }
")" { return ')'; }
"," { return ','; }
{and} { return AND_OP; }
{or} { return OR_OP; }
{not} { return NOT_OP; }
{is} { return IS_OP; }
{isnull} { return ISNULL_OP; }
{notnull} { return NOTNULL_OP; }
{case} { return CASE_KW; }
{when} { return WHEN_KW; }
{then} { return THEN_KW; }
{else} { return ELSE_KW; }
{end} { return END_KW; }
:{alnum}+ {
yylval->str = pg_strdup(yytext + 1);
return VARIABLE;
}
{null} { return NULL_CONST; }
{true} {
yylval->bval = true;
return BOOLEAN_CONST;
}
{false} {
yylval->bval = false;
return BOOLEAN_CONST;
}
"9223372036854775808" {
/*
* Special handling for PG_INT64_MIN, which can't
* accurately be represented here, as the minus sign is
* lexed separately and INT64_MIN can't be represented as
* a positive integer.
*/
return MAXINT_PLUS_ONE_CONST;
}
{digit}+ {
if (!strtoint64(yytext, true, &yylval->ival))
expr_yyerror_more(yyscanner, "bigint constant overflow",
strdup(yytext));
return INTEGER_CONST;
}
{digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
if (!strtodouble(yytext, true, &yylval->dval))
expr_yyerror_more(yyscanner, "double constant overflow",
strdup(yytext));
return DOUBLE_CONST;
}
\.{digit}+([eE][-+]?{digit}+)? {
if (!strtodouble(yytext, true, &yylval->dval))
expr_yyerror_more(yyscanner, "double constant overflow",
strdup(yytext));
return DOUBLE_CONST;
}
{alpha}{alnum}* {
yylval->str = pg_strdup(yytext);
return FUNCTION;
}
{space}+ { /* ignore */ }
{continuation} { /* ignore */ }
{newline} {
/* report end of command */
last_was_newline = true;
return 0;
}
. {
/*
* must strdup yytext so that expr_yyerror_more doesn't
* change it while finding end of line
*/
expr_yyerror_more(yyscanner, "unexpected character",
pg_strdup(yytext));
/* NOTREACHED, syntax_error calls exit() */
return 0;
}
}
<<EOF>> {
if (cur_state->buffer_stack == NULL)
return 0; /* end of input reached */
/*
* We were expanding a variable, so pop the inclusion
* stack and keep lexing
*/
psqlscan_pop_buffer_stack(cur_state);
psqlscan_select_top_buffer(cur_state);
}
%%
/* LCOV_EXCL_STOP */
void
expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
{
PsqlScanState state = yyget_extra(yyscanner);
int error_detection_offset = expr_scanner_offset(state) - 1;
YYSTYPE lval;
char *full_line;
/*
* While parsing an expression, we may not have collected the whole line
* yet from the input source. Lex till EOL so we can report whole line.
* (If we're at EOF, it's okay to call yylex() an extra time.)
*/
if (!last_was_newline)
{
while (yylex(&lval, yyscanner))
/* skip */ ;
}
/* Extract the line, trimming trailing newline if any */
full_line = expr_scanner_get_substring(state,
expr_start_offset,
expr_scanner_offset(state),
true);
syntax_error(expr_source, expr_lineno, full_line, expr_command,
message, more, error_detection_offset - expr_start_offset);
}
void
expr_yyerror(yyscan_t yyscanner, const char *message)
{
expr_yyerror_more(yyscanner, message, NULL);
}
/*
* Collect a space-separated word from a backslash command and return it
* in word_buf, along with its starting string offset in *offset.
* Returns true if successful, false if at end of command.
*/
bool
expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
{
int lexresult;
YYSTYPE lval;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = word_buf;
resetPQExpBuffer(word_buf);
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set start state */
state->start_state = INITIAL;
/* And lex. */
lexresult = yylex(&lval, state->scanner);
/*
* Save start offset of word, if any. We could do this more efficiently,
* but for now this seems fine.
*/
if (lexresult)
*offset = expr_scanner_offset(state) - word_buf->len;
else
*offset = -1;
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
return (bool) lexresult;
}
/*
* Prepare to lex an expression via expr_yyparse().
*
* Returns the yyscan_t that is to be passed to expr_yyparse().
* (This is just state->scanner, but callers don't need to know that.)
*/
yyscan_t
expr_scanner_init(PsqlScanState state,
const char *source, int lineno, int start_offset,
const char *command)
{
/* Save error context info */
expr_source = source;
expr_lineno = lineno;
expr_start_offset = start_offset;
expr_command = command;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = NULL;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set start state */
state->start_state = EXPR;
return state->scanner;
}
/*
* Finish lexing an expression.
*/
void
expr_scanner_finish(yyscan_t yyscanner)
{
PsqlScanState state = yyget_extra(yyscanner);
/*
* Reselect appropriate initial state for SQL lexer.
*/
psql_scan_reselect_sql_lexer(state);
}
/*
* Get offset from start of string to end of current lexer token.
*
* We rely on the knowledge that flex modifies the scan buffer by storing
* a NUL at the end of the current token (yytext). Note that this might
* not work quite right if we were parsing a sub-buffer, but since pgbench
* never invokes that functionality, it doesn't matter.
*/
int
expr_scanner_offset(PsqlScanState state)
{
return strlen(state->scanbuf);
}
/*
* Get a malloc'd copy of the lexer input string from start_offset
* to just before end_offset. If chomp is true, drop any trailing
* newline(s).
*/
char *
expr_scanner_get_substring(PsqlScanState state,
int start_offset, int end_offset,
bool chomp)
{
char *result;
const char *scanptr = state->scanbuf + start_offset;
int slen = end_offset - start_offset;
Assert(slen >= 0);
Assert(end_offset <= strlen(state->scanbuf));
if (chomp)
{
while (slen > 0 &&
(scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
slen--;
}
result = (char *) pg_malloc(slen + 1);
memcpy(result, scanptr, slen);
result[slen] = '\0';
return result;
}
/*
* Get the line number associated with the given string offset
* (which must not be past the end of where we've lexed to).
*/
int
expr_scanner_get_lineno(PsqlScanState state, int offset)
{
int lineno = 1;
const char *p = state->scanbuf;
while (*p && offset > 0)
{
if (*p == '\n')
lineno++;
p++, offset--;
}
return lineno;
}