postgresql/src/bin/pgbench/exprscan.l

%{
/*-------------------------------------------------------------------------
 *
 * exprscan.l
 *	  lexical scanner for pgbench backslash commands
 *
 * This lexer supports two operating modes:
 *
 * In INITIAL state, just parse off whitespace-separated words (this mode
 * is basically equivalent to strtok(), which is what we used to use).
 *
 * In EXPR state, lex for the simple expression syntax of exprparse.y.
 *
 * In either mode, stop upon hitting newline or end of string.
 *
 * Note that this lexer operates within the framework created by psqlscan.l,
 *
 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/bin/pgbench/exprscan.l
 *
 *-------------------------------------------------------------------------
 */

#include "fe_utils/psqlscan_int.h"

/* context information for reporting errors in expressions */
static const char *expr_source = NULL;
static int	expr_lineno = 0;
static int	expr_start_offset = 0;
static const char *expr_command = NULL;

/* indicates whether last yylex() call read a newline */
static bool last_was_newline = false;

/*
 * Work around a bug in flex 2.5.35: it emits a couple of functions that
 * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
 * this would cause warnings.  Providing our own declarations should be
 * harmless even when the bug gets fixed.
 */
extern int	expr_yyget_column(yyscan_t yyscanner);
extern void expr_yyset_column(int column_no, yyscan_t yyscanner);

/* LCOV_EXCL_START */

%}

/* Except for the prefix, these options should match psqlscan.l */
%option reentrant
%option bison-bridge
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="expr_yy"

/* Character classes */
alpha			[a-zA-Z\200-\377_]
digit			[0-9]
alnum			[A-Za-z\200-\377_0-9]
/* {space} + {nonspace} + {newline} should cover all characters */
space			[ \t\r\f\v]
nonspace		[^ \t\r\f\v\n]
newline			[\n]

/* Line continuation marker */
continuation	\\\r?{newline}

/* case insensitive keywords */
and				[Aa][Nn][Dd]
or				[Oo][Rr]
not				[Nn][Oo][Tt]
case			[Cc][Aa][Ss][Ee]
when			[Ww][Hh][Ee][Nn]
then			[Tt][Hh][Ee][Nn]
else			[Ee][Ll][Ss][Ee]
end				[Ee][Nn][Dd]
true			[Tt][Rr][Uu][Ee]
false			[Ff][Aa][Ll][Ss][Ee]
null			[Nn][Uu][Ll][Ll]
is				[Ii][Ss]
isnull			[Ii][Ss][Nn][Uu][Ll][Ll]
notnull			[Nn][Oo][Tt][Nn][Uu][Ll][Ll]

/* Exclusive states */
%x EXPR

%%

%{
		/* Declare some local variables inside yylex(), for convenience */
		PsqlScanState cur_state = yyextra;

		/*
		 * Force flex into the state indicated by start_state.  This has a
		 * couple of purposes: it lets some of the functions below set a new
		 * starting state without ugly direct access to flex variables, and it
		 * allows us to transition from one flex lexer to another so that we
		 * can lex different parts of the source string using separate lexers.
		 */
		BEGIN(cur_state->start_state);

		/* Reset was-newline flag */
		last_was_newline = false;
%}

	/* INITIAL state */

{nonspace}+		{
					/* Found a word, emit and return it */
					psqlscan_emit(cur_state, yytext, yyleng);
					return 1;
				}

	/*
	 * We need this rule to avoid returning "word\" instead of recognizing
	 * a continuation marker just after a word:
	 */
{nonspace}+{continuation}	{
					/* Found "word\\\r?\n", emit and return just "word" */
					int		wordlen = yyleng - 2;
					if (yytext[wordlen] == '\r')
						wordlen--;
					Assert(yytext[wordlen] == '\\');
					psqlscan_emit(cur_state, yytext, wordlen);
					return 1;
				}

{space}+		{ /* ignore */ }

{continuation}	{ /* ignore */ }

{newline}		{
					/* report end of command */
					last_was_newline = true;
					return 0;
				}

	/* EXPR state */

<EXPR>{

"+"				{ return '+'; }
"-"				{ return '-'; }
"*"				{ return '*'; }
"/"				{ return '/'; }
"%"				{ return '%'; } /* C version, also in Pg SQL */
"="				{ return '='; }
"<>"			{ return NE_OP; }
"!="			{ return NE_OP; } /* C version, also in Pg SQL */
"<="			{ return LE_OP; }
">="			{ return GE_OP; }
"<<"			{ return LS_OP; }
">>"			{ return RS_OP; }
"<"				{ return '<'; }
">"				{ return '>'; }
"|"				{ return '|'; }
"&"				{ return '&'; }
"#"				{ return '#'; }
"~"				{ return '~'; }

"("				{ return '('; }
")"				{ return ')'; }
","				{ return ','; }

{and}			{ return AND_OP; }
{or}			{ return OR_OP; }
{not}			{ return NOT_OP; }
{is}			{ return IS_OP; }
{isnull}		{ return ISNULL_OP; }
{notnull}		{ return NOTNULL_OP; }

{case}			{ return CASE_KW; }
{when}			{ return WHEN_KW; }
{then}			{ return THEN_KW; }
{else}			{ return ELSE_KW; }
{end}			{ return END_KW; }

:{alnum}+		{
					yylval->str = pg_strdup(yytext + 1);
					return VARIABLE;
				}

{null}			{ return NULL_CONST; }
{true}			{
					yylval->bval = true;
					return BOOLEAN_CONST;
				}
{false}			{
					yylval->bval = false;
					return BOOLEAN_CONST;
				}
"9223372036854775808" {
					/*
					 * Special handling for PG_INT64_MIN, which can't
					 * accurately be represented here, as the minus sign is
					 * lexed separately and INT64_MIN can't be represented as
					 * a positive integer.
					 */
					return MAXINT_PLUS_ONE_CONST;
				}
{digit}+		{
					if (!strtoint64(yytext, true, &yylval->ival))
						expr_yyerror_more(yyscanner, "bigint constant overflow",
										  strdup(yytext));
					return INTEGER_CONST;
				}
{digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
					if (!strtodouble(yytext, true, &yylval->dval))
						expr_yyerror_more(yyscanner, "double constant overflow",
										  strdup(yytext));
					return DOUBLE_CONST;
				}
\.{digit}+([eE][-+]?{digit}+)?	{
					if (!strtodouble(yytext, true, &yylval->dval))
						expr_yyerror_more(yyscanner, "double constant overflow",
										  strdup(yytext));
					return DOUBLE_CONST;
				}
{alpha}{alnum}*	{
					yylval->str = pg_strdup(yytext);
					return FUNCTION;
				}

{space}+		{ /* ignore */ }

{continuation}	{ /* ignore */ }

{newline}		{
					/* report end of command */
					last_was_newline = true;
					return 0;
				}

.				{
					/*
					 * must strdup yytext so that expr_yyerror_more doesn't
					 * change it while finding end of line
					 */
					expr_yyerror_more(yyscanner, "unexpected character",
									  pg_strdup(yytext));
					/* NOTREACHED, syntax_error calls exit() */
					return 0;
				}

}

<<EOF>>			{
					if (cur_state->buffer_stack == NULL)
						return 0;			/* end of input reached */

					/*
					 * We were expanding a variable, so pop the inclusion
					 * stack and keep lexing
					 */
					psqlscan_pop_buffer_stack(cur_state);
					psqlscan_select_top_buffer(cur_state);
				}

%%

/* LCOV_EXCL_STOP */

void
expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
{
	PsqlScanState state = yyget_extra(yyscanner);
	int			error_detection_offset = expr_scanner_offset(state) - 1;
	YYSTYPE		lval;
	char	   *full_line;

	/*
	 * While parsing an expression, we may not have collected the whole line
	 * yet from the input source.  Lex till EOL so we can report whole line.
	 * (If we're at EOF, it's okay to call yylex() an extra time.)
	 */
	if (!last_was_newline)
	{
		while (yylex(&lval, yyscanner))
			 /* skip */ ;
	}

	/* Extract the line, trimming trailing newline if any */
	full_line = expr_scanner_get_substring(state,
										   expr_start_offset,
										   expr_scanner_offset(state),
										   true);

	syntax_error(expr_source, expr_lineno, full_line, expr_command,
				 message, more, error_detection_offset - expr_start_offset);
}

void
expr_yyerror(yyscan_t yyscanner, const char *message)
{
	expr_yyerror_more(yyscanner, message, NULL);
}

/*
 * Collect a space-separated word from a backslash command and return it
 * in word_buf, along with its starting string offset in *offset.
 * Returns true if successful, false if at end of command.
 */
bool
expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
{
	int			lexresult;
	YYSTYPE		lval;

	/* Must be scanning already */
	Assert(state->scanbufhandle != NULL);

	/* Set current output target */
	state->output_buf = word_buf;
	resetPQExpBuffer(word_buf);

	/* Set input source */
	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
	else
		yy_switch_to_buffer(state->scanbufhandle, state->scanner);

	/* Set start state */
	state->start_state = INITIAL;

	/* And lex. */
	lexresult = yylex(&lval, state->scanner);

	/*
	 * Save start offset of word, if any.  We could do this more efficiently,
	 * but for now this seems fine.
	 */
	if (lexresult)
		*offset = expr_scanner_offset(state) - word_buf->len;
	else
		*offset = -1;

	/*
	 * In case the caller returns to using the regular SQL lexer, reselect the
	 * appropriate initial state.
	 */
	psql_scan_reselect_sql_lexer(state);

	return (bool) lexresult;
}

/*
 * Prepare to lex an expression via expr_yyparse().
 *
 * Returns the yyscan_t that is to be passed to expr_yyparse().
 * (This is just state->scanner, but callers don't need to know that.)
 */
yyscan_t
expr_scanner_init(PsqlScanState state,
				  const char *source, int lineno, int start_offset,
				  const char *command)
{
	/* Save error context info */
	expr_source = source;
	expr_lineno = lineno;
	expr_start_offset = start_offset;
	expr_command = command;

	/* Must be scanning already */
	Assert(state->scanbufhandle != NULL);

	/* Set current output target */
	state->output_buf = NULL;

	/* Set input source */
	if (state->buffer_stack != NULL)
		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
	else
		yy_switch_to_buffer(state->scanbufhandle, state->scanner);

	/* Set start state */
	state->start_state = EXPR;

	return state->scanner;
}

/*
 * Finish lexing an expression.
 */
void
expr_scanner_finish(yyscan_t yyscanner)
{
	PsqlScanState state = yyget_extra(yyscanner);

	/*
	 * Reselect appropriate initial state for SQL lexer.
	 */
	psql_scan_reselect_sql_lexer(state);
}

/*
 * Get offset from start of string to end of current lexer token.
 *
 * We rely on the knowledge that flex modifies the scan buffer by storing
 * a NUL at the end of the current token (yytext).  Note that this might
 * not work quite right if we were parsing a sub-buffer, but since pgbench
 * never invokes that functionality, it doesn't matter.
 */
int
expr_scanner_offset(PsqlScanState state)
{
	return strlen(state->scanbuf);
}

/*
 * Get a malloc'd copy of the lexer input string from start_offset
 * to just before end_offset.  If chomp is true, drop any trailing
 * newline(s).
 */
char *
expr_scanner_get_substring(PsqlScanState state,
						   int start_offset, int end_offset,
						   bool chomp)
{
	char	   *result;
	const char *scanptr = state->scanbuf + start_offset;
	int			slen = end_offset - start_offset;

	Assert(slen >= 0);
	Assert(end_offset <= strlen(state->scanbuf));

	if (chomp)
	{
		while (slen > 0 &&
			   (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
			slen--;
	}

	result = (char *) pg_malloc(slen + 1);
	memcpy(result, scanptr, slen);
	result[slen] = '\0';

	return result;
}

/*
 * Get the line number associated with the given string offset
 * (which must not be past the end of where we've lexed to).
 */
int
expr_scanner_get_lineno(PsqlScanState state, int offset)
{
	int			lineno = 1;
	const char *p = state->scanbuf;

	while (*p && offset > 0)
	{
		if (*p == '\n')
			lineno++;
		p++, offset--;
	}
	return lineno;
}