postgresql/src/interfaces/ecpg/preproc/pgc.l

1687 lines
42 KiB
Plaintext
Raw Normal View History

%top{
/*-------------------------------------------------------------------------
*
* pgc.l
* lexical scanner for ecpg
*
* This is a modified version of src/backend/parser/scan.l
*
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
* The ecpg scanner is not backup-free, so the fail rules are
* only here to simplify syncing this file with scan.l.
*
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/interfaces/ecpg/preproc/pgc.l
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <ctype.h>
#include <limits.h>
#include "common/string.h"
#include "preproc_extern.h"
#include "preproc.h"
}
%{
/* LCOV_EXCL_START */
extern YYSTYPE base_yylval;
static int xcdepth = 0; /* depth of nesting in slash-star comments */
static char *dolqstart = NULL; /* current $foo$ quote start string */
/*
* literalbuf is used to accumulate literal values when multiple rules
* are needed to parse a single literal. Call startlit to reset buffer
* to empty, addlit to add text. Note that the buffer is permanently
* malloc'd to the largest size needed so far in the current run.
*/
static char *literalbuf = NULL; /* expandable buffer */
static int literallen; /* actual current length */
static int literalalloc; /* current allocated buffer size */
/* Used for detecting global state together with braces_open */
static int parenths_open;
/* Used to tell parse_include() whether the command was #include or #include_next */
static bool include_next;
#define startlit() (literalbuf[0] = '\0', literallen = 0)
static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char);
static int process_integer_literal(const char *token, YYSTYPE *lval);
static void parse_include(void);
static bool ecpg_isspace(char ch);
static bool isdefine(void);
static bool isinformixdefine(void);
char *token_start;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
/* vars to keep track of start conditions when scanning literals */
static int state_before_str_start;
static int state_before_str_stop;
struct _yy_buffer
{
YY_BUFFER_STATE buffer;
long lineno;
char *filename;
struct _yy_buffer *next;
} *yy_buffer = NULL;
static char *old;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
#define MAX_NESTED_IF 128
static short preproc_tos;
static short ifcond;
static struct _if_value
{
short condition;
short else_branch;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
} stacked_if_value[MAX_NESTED_IF];
%}
2000-02-22 20:57:12 +01:00
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option noyywrap
%option warn
%option yylineno
%option prefix="base_yy"
2000-02-22 20:57:12 +01:00
/*
* OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen.
2000-02-22 20:57:12 +01:00
* INITIAL is the starting state, to which all non-conditional rules apply.
* Exclusive states change parsing rules while the state is active. When in
* an exclusive state, only those rules defined for that state apply.
*
2000-02-22 20:57:12 +01:00
* We use exclusive states for quoted strings, extended comments,
* and to eliminate parsing troubles for numeric strings.
* Exclusive states:
* <xb> bit string literal
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
* <xc> extended C-style comments
* <xd> delimited identifiers (double-quoted identifiers)
* <xdc> double-quoted strings in C
* <xh> hexadecimal numeric string
* <xn> national character quoted strings
* <xq> standard quoted strings
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
* <xqs> quote stop (detect continued strings)
* <xe> extended quoted strings (support backslash escape sequences)
* <xqc> single-quoted strings in C
2006-02-01 21:57:39 +01:00
* <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes
* <xus> quoted string with Unicode escapes
* <xcond> condition of an EXEC SQL IFDEF construct
* <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
*
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
* Note: we intentionally don't mimic the backend's <xeu> state; we have
* no need to distinguish it from <xe> state.
*
* Remember to add an <<EOF>> case whenever you add a new exclusive state!
* The default one is probably not the right thing.
*/
2002-07-20 10:24:18 +02:00
%x xb
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
%x xc
%x xd
%x xdc
%x xh
%x xn
%x xq
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
%x xqs
%x xe
%x xqc
%x xdolq
%x xui
%x xus
%x xcond
%x xskip
/* Additional exclusive states that are specific to ECPG */
%x C SQL incl def def_ident undef
/*
* In order to make the world safe for Windows and Mac clients as well as
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
* sequence will be seen as two successive newlines, but that doesn't cause
* any problems. SQL-style comments, which start with -- and extend to the
* next newline, are treated as equivalent to a single whitespace character.
*
* NOTE a fine point: if there is no newline following --, we will absorb
* everything to the end of the input as a comment. This is correct. Older
* versions of Postgres failed to recognize -- as a comment if the input
* did not end with a newline.
*
* XXX perhaps \f (formfeed) should be treated as a newline as well?
*
* XXX if you change the set of whitespace characters, fix ecpg_isspace()
* to agree.
*/
space [ \t\n\r\f]
horiz_space [ \t\f]
newline [\n\r]
non_newline [^\n\r]
comment ("--"{non_newline}*)
whitespace ({space}+|{comment})
/*
* SQL requires at least one newline in the whitespace separating
* string literals that are to be concatenated. Silly, but who are we
* to argue? Note that {whitespace_with_newline} should not have * after
* it, whereas {whitespace} should generally have a * after it...
*/
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
quote '
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
/* If we see {quote} then {quotecontinue}, the quoted string continues */
quotecontinue {whitespace_with_newline}{quote}
/*
* {quotecontinuefail} is needed to avoid lexer backup when we fail to match
* {quotecontinue}. It might seem that this could just be {whitespace}*,
* but if there's a dash after {whitespace_with_newline}, it must be consumed
* to see if there's another dash --- which would start a {comment} and thus
* allow continuation of the {quotecontinue} token.
*/
quotecontinuefail {whitespace}*"-"?
2000-11-03 11:47:54 +01:00
/* Bit string
*/
2002-07-20 10:24:18 +02:00
xbstart [bB]{quote}
xbinside [^']*
/* Hexadecimal number */
xhstart [xX]{quote}
2002-10-21 15:09:31 +02:00
xhinside [^']*
/* National character */
xnstart [nN]{quote}
2002-07-20 10:24:18 +02:00
/* Quoted string that allows backslash escapes */
xestart [eE]{quote}
xeinside [^\\']+
xeescape [\\][^0-7]
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
/* Extended quote
* xqdouble implements embedded quote, ''''
*/
xqstart {quote}
xqdouble {quote}{quote}
xqcquote [\\]{quote}
xqinside [^']+
/* $foo$ style quotes ("dollar quoting")
* The quoted string starts with $foo$ where "foo" is an optional string
* in the form of an identifier, except that it may not contain "$",
* and extends to the first occurrence of an identical string.
* There is *no* processing of the quoted text.
*
* {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
* fails to match its trailing "$".
*/
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
dolqfailed \${dolq_start}{dolq_cont}*
dolqinside [^$]+
/* Double quote
* Allows embedded spaces and other special characters into identifiers.
*/
dquote \"
xdstart {dquote}
xdstop {dquote}
2006-02-01 21:57:39 +01:00
xddouble {dquote}{dquote}
2000-02-22 20:57:12 +01:00
xdinside [^"]+
1999-09-17 11:48:25 +02:00
/* Quoted identifier with Unicode escapes */
xuistart [uU]&{dquote}
/* Quoted string with Unicode escapes */
xusstart [uU]&{quote}
1999-09-17 11:48:25 +02:00
/* special stuff for C strings */
xdcqq \\\\
xdcqdq \\\"
xdcother [^"]
xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
/* C-style comments
*
2000-02-22 20:57:12 +01:00
* The "extended comment" syntax closely resembles allowable operator syntax.
* The tricky part here is to get lex to recognize a string starting with
* slash-star as a comment, when interpreting it as an operator would produce
* a longer match --- remember lex will prefer a longer match! Also, if we
* have something like plus-slash-star, lex will think this is a 3-character
* operator whereas we want to see it as a + operator and a comment start.
2000-03-15 20:09:10 +01:00
* The solution is two-fold:
* 1. append {op_chars}* to xcstart so that it matches as much text as
* {operator} would. Then the tie-breaker (first matching rule of same
* length) ensures xcstart wins. We put back the extra stuff with yyless()
* in case it contains a star-slash that should terminate the comment.
2000-03-15 20:09:10 +01:00
* 2. In the operator rule, check for slash-star within the operator, and
* if found throw it back with yyless(). This handles the plus-slash-star
* problem.
* Dash-dash comments have similar interactions with the operator rule.
*/
xcstart \/\*{op_chars}*
2000-02-22 20:57:12 +01:00
xcstop \*+\/
xcinside [^*/]+
digit [0-9]
2003-06-20 17:16:06 +02:00
ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$]
2003-06-20 17:16:06 +02:00
identifier {ident_start}{ident_cont}*
2004-03-02 07:45:05 +01:00
array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
/* Assorted special-case operators and operator-like tokens */
typecast "::"
dot_dot \.\.
colon_equals ":="
/*
* These operator-like tokens (unlike the above ones) also match the {operator}
* rule, which means that they might be overridden by a longer match if they
* are followed by a comment start or a + or - character. Accordingly, if you
* add to this list, you must also add corresponding code to the {operator}
* block to return the correct token in such cases. (This is not needed in
* psqlscan.l since the token value is ignored there.)
*/
equals_greater "=>"
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
/*
* "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
* which can be one or more characters long (but if a single-char token
* appears in the "self" set, it is not to be returned as an Op). Note
* that the sets overlap, but each has some chars that are not in the other.
*
* If you change either set, adjust the character lists appearing in the
* rule for "operator"!
*/
2003-06-20 17:16:06 +02:00
self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator {op_chars}+
/* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets
* coerced via doNegate() -- Leon aug 20 1999
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
1999-10-08 13:05:05 +02:00
*/
1999-10-08 13:05:05 +02:00
integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail {digit}+\.\.
real ({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
param \${integer}
/* special characters for other dbms */
/* we have to react differently in compat mode */
informix_special [\$]
other .
/*
* Dollar quoted strings are totally opaque, and no escaping is done on them.
* Other quoted strings must allow some special characters such as single-quote
* and newline.
* Embedded single-quotes are implemented both in the SQL standard
* style of two adjacent single quotes "''" and in the Postgres/Java style
* of escaped-quote "\'".
* Other embedded escaped characters are matched explicitly and the leading
* backslash is dropped from the string.
* Note that xcstart must appear before operator, as explained above!
* Also whitespace (comment) must appear before operator.
*/
/* some stuff needed for ecpg */
2006-02-02 04:51:41 +01:00
exec [eE][xX][eE][cC]
sql [sS][qQ][lL]
2006-02-02 04:51:41 +01:00
define [dD][eE][fF][iI][nN][eE]
include [iI][nN][cC][lL][uU][dD][eE]
include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
import [iI][mM][pP][oO][rR][tT]
undef [uU][nN][dD][eE][fF]
/* C version of hex number */
xch 0[xX][0-9A-Fa-f]*
ccomment "//".*\n
if [iI][fF]
2006-02-02 04:51:41 +01:00
ifdef [iI][fF][dD][eE][fF]
ifndef [iI][fF][nN][dD][eE][fF]
else [eE][lL][sS][eE]
elif [eE][lL][iI][fF]
endif [eE][nN][dD][iI][fF]
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
2006-02-02 04:51:41 +01:00
struct [sS][tT][rR][uU][cC][tT]
exec_sql {exec}{space}*{sql}{space}*
2000-09-21 13:56:08 +02:00
ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit})
ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
/* we might want to parse all cpp include files */
cppinclude {space}*#{include}{space}*
cppinclude_next {space}*#{include_next}{space}*
/* take care of cpp lines, they may also be continued */
/* first a general line for all commands not starting with "i" */
/* and then the other commands starting with "i", we have to add these
* separately because the cppline production would match on "include" too
*/
cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
%%
%{
/* code to execute during start of each call of yylex() */
token_start = NULL;
%}
<SQL>{
{whitespace} {
/* ignore */
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
} /* <SQL> */
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<C,SQL>{
{xcstart} {
2006-02-02 04:51:41 +01:00
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
2006-02-02 04:51:41 +01:00
xcdepth = 0;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
BEGIN(xc);
2006-02-02 04:51:41 +01:00
/* Put back any characters past slash-star; see above */
yyless(2);
fputs("/*", yyout);
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
} /* <C,SQL> */
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xc>{
{xcstart} {
if (state_before_str_start == SQL)
2006-02-02 04:51:41 +01:00
{
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
xcdepth++;
/* Put back any characters past slash-star; see above */
yyless(2);
fputs("/_*", yyout);
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
else if (state_before_str_start == C)
{
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
ECHO;
}
2006-02-02 04:51:41 +01:00
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
{xcstop} {
if (state_before_str_start == SQL)
{
if (xcdepth <= 0)
{
ECHO;
BEGIN(SQL);
token_start = NULL;
}
else
{
xcdepth--;
fputs("*_/", yyout);
}
}
else if (state_before_str_start == C)
{
ECHO;
BEGIN(C);
token_start = NULL;
}
}
{xcinside} {
ECHO;
}
{op_chars} {
ECHO;
}
\*+ {
ECHO;
}
<<EOF>> {
mmfatal(PARSE_ERROR, "unterminated /* comment");
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
} /* <xc> */
<SQL>{
{xbstart} {
token_start = yytext;
BEGIN(xb);
startlit();
addlitchar('b');
}
} /* <SQL> */
<xh>{xhinside} |
<xb>{xbinside} {
addlit(yytext, yyleng);
}
<xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
<SQL>{xhstart} {
token_start = yytext;
BEGIN(xh);
startlit();
addlitchar('x');
}
<xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
<C>{xqstart} {
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xqc);
startlit();
}
<SQL>{
{xnstart} {
/* National character.
* Transfer it as-is to the backend.
*/
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xn);
startlit();
}
{xqstart} {
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xq);
startlit();
}
{xestart} {
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xe);
startlit();
}
{xusstart} {
token_start = yytext;
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xus);
startlit();
}
} /* <SQL> */
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xb,xh,xq,xqc,xe,xn,xus>{quote} {
/*
* When we are scanning a quoted string and see an end
* quote, we must look ahead for a possible continuation.
* If we don't see one, we know the end quote was in fact
* the end of the string. To reduce the lexer table size,
* we use a single "xqs" state to do the lookahead for all
* types of strings.
*/
state_before_str_stop = YYSTATE;
BEGIN(xqs);
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xqs>{quotecontinue} {
/*
* Found a quote continuation, so return to the in-quote
* state and continue scanning the literal. Nothing is
* added to the literal's contents.
*/
BEGIN(state_before_str_stop);
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xqs>{quotecontinuefail} |
<xqs>{other} |
<xqs><<EOF>> {
/*
* Failed to see a quote continuation. Throw back
* everything after the end quote, and handle the string
* according to the state we were in previously.
*/
yyless(0);
BEGIN(state_before_str_start);
switch (state_before_str_stop)
{
case xb:
if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
base_yylval.str = mm_strdup(literalbuf);
return BCONST;
case xh:
base_yylval.str = mm_strdup(literalbuf);
return XCONST;
case xq:
/* fallthrough */
case xqc:
base_yylval.str = psprintf("'%s'", literalbuf);
return SCONST;
case xe:
base_yylval.str = psprintf("E'%s'", literalbuf);
return SCONST;
case xn:
base_yylval.str = psprintf("N'%s'", literalbuf);
return SCONST;
case xus:
base_yylval.str = psprintf("U&'%s'", literalbuf);
return USCONST;
default:
mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
}
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xq,xe,xn,xus>{xqdouble} { addlitchar('\''); }
<xqc>{xqcquote} {
addlitchar('\\');
addlitchar('\'');
}
<xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); }
<xe>{xeinside} {
addlit(yytext, yyleng);
}
<xe>{xeunicode} {
addlit(yytext, yyleng);
}
<xe>{xeescape} {
addlit(yytext, yyleng);
}
<xe>{xeoctesc} {
addlit(yytext, yyleng);
}
<xe>{xehexesc} {
addlit(yytext, yyleng);
}
<xe>. {
/* This is only needed for \ just before EOF */
addlitchar(yytext[0]);
}
<xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
<SQL>{
{dolqdelim} {
token_start = yytext;
if (dolqstart)
free(dolqstart);
dolqstart = mm_strdup(yytext);
BEGIN(xdolq);
startlit();
addlit(yytext, yyleng);
}
{dolqfailed} {
/* throw back all but the initial "$" */
yyless(1);
/* and treat it as {other} */
return yytext[0];
}
} /* <SQL> */
<xdolq>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
{
addlit(yytext, yyleng);
free(dolqstart);
dolqstart = NULL;
BEGIN(SQL);
base_yylval.str = mm_strdup(literalbuf);
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
return SCONST;
}
else
{
/*
* When we fail to match $...$ to dolqstart, transfer
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
addlit(yytext, yyleng - 1);
yyless(yyleng - 1);
2000-03-15 20:09:10 +01:00
}
}
<xdolq>{dolqinside} {
addlit(yytext, yyleng);
}
<xdolq>{dolqfailed} {
addlit(yytext, yyleng);
}
<xdolq>. {
/* single quote or dollar sign */
addlitchar(yytext[0]);
}
<xdolq><<EOF>> { mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
<SQL>{
{xdstart} {
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xd);
startlit();
}
{xuistart} {
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xui);
startlit();
}
} /* <SQL> */
<xd>{xdstop} {
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
BEGIN(state_before_str_start);
if (literallen == 0)
mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
/* The backend will truncate the identifier here. We do not as it does not change the result. */
base_yylval.str = mm_strdup(literalbuf);
return CSTRING;
}
<xdc>{xdstop} {
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
BEGIN(state_before_str_start);
base_yylval.str = mm_strdup(literalbuf);
return CSTRING;
}
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
<xui>{dquote} {
BEGIN(state_before_str_start);
if (literallen == 2) /* "U&" */
mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
/* The backend will truncate the identifier here. We do not as it does not change the result. */
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
base_yylval.str = psprintf("U&\"%s\"", literalbuf);
return UIDENT;
}
<xd,xui>{xddouble} {
addlitchar('"');
}
<xd,xui>{xdinside} {
addlit(yytext, yyleng);
}
<xd,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
<C>{xdstart} {
Reduce size of backend scanner's tables. Previously, the core scanner's yy_transition[] array had 37045 elements. Since that number is larger than INT16_MAX, Flex generated the array to contain 32-bit integers. By reimplementing some of the bulkier scanner rules, this patch reduces the array to 20495 elements. The much smaller total length, combined with the consequent use of 16-bit integers for the array elements reduces the binary size by over 200kB. This was accomplished in two ways: 1. Consolidate handling of quote continuations into a new start condition, rather than duplicating that logic for five different string types. 2. Treat Unicode strings and identifiers followed by a UESCAPE sequence as three separate tokens, rather than one. The logic to de-escape Unicode strings is moved to the filter code in parser.c, which already had the ability to provide special processing for token sequences. While we could have implemented the conversion in the grammar, that approach was rejected for performance and maintainability reasons. Performance in microbenchmarks of raw parsing seems equal or slightly faster in most cases, and it's reasonable to expect that in real-world usage (with more competition for the CPU cache) there will be a larger win. The exception is UESCAPE sequences; lexing those is about 10% slower, primarily because the scanner now has to be called three times rather than one. This seems acceptable since that feature is very rarely used. The psql and epcg lexers are likewise modified, primarily because we want to keep them all in sync. Since those lexers don't use the space-hogging -CF option, the space savings is much less, but it's still good for perhaps 10kB apiece. While at it, merge the ecpg lexer's handling of C-style comments used in SQL and in C. Those have different rules regarding nested comments, but since we already have the ability to keep track of the previous start condition, we can use that to handle both cases within a single start condition. This matches the core scanner more closely. John Naylor Discussion: https://postgr.es/m/CACPNZCvaoa3EgVWm5yZhcSTX6RAtaLgniCPcBVOCwm8h3xpWkw@mail.gmail.com
2020-01-13 21:04:31 +01:00
state_before_str_start = YYSTATE;
BEGIN(xdc);
startlit();
}
<xdc>{xdcinside} {
addlit(yytext, yyleng);
}
<xdc><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
<SQL>{
{typecast} {
return TYPECAST;
}
{dot_dot} {
return DOT_DOT;
}
{colon_equals} {
return COLON_EQUALS;
}
{equals_greater} {
return EQUALS_GREATER;
}
{less_equals} {
return LESS_EQUALS;
}
{greater_equals} {
return GREATER_EQUALS;
}
{less_greater} {
/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
return NOT_EQUALS;
}
{not_equals} {
/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
return NOT_EQUALS;
}
{informix_special} {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
unput(':');
}
else
return yytext[0];
}
{self} {
/*
* We may find a ';' inside a structure
* definition in a TYPE or VAR statement.
* This is not an EOL marker.
*/
if (yytext[0] == ';' && struct_level == 0)
BEGIN(C);
return yytext[0];
}
{operator} {
/*
* Check for embedded slash-star or dash-dash; those
* are comment starts, so operator must stop there.
* Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one.
*/
int nchars = yyleng;
char *slashstar = strstr(yytext, "/*");
char *dashdash = strstr(yytext, "--");
if (slashstar && dashdash)
{
/* if both appear, take the first one */
if (slashstar > dashdash)
slashstar = dashdash;
}
else if (!slashstar)
slashstar = dashdash;
if (slashstar)
nchars = slashstar - yytext;
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
* contains chars that are not in SQL operators.
* The idea is to lex '=-' as two operators, but not
* to forbid operator names like '?-' that could not be
* sequences of SQL operators.
*/
if (nchars > 1 &&
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'))
{
int ic;
for (ic = nchars - 2; ic >= 0; ic--)
{
char c = yytext[ic];
if (c == '~' || c == '!' || c == '@' ||
c == '#' || c == '^' || c == '&' ||
c == '|' || c == '`' || c == '?' ||
c == '%')
break;
}
if (ic < 0)
{
/*
* didn't find a qualifying character, so remove
* all trailing [+-]
*/
do {
nchars--;
} while (nchars > 1 &&
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'));
}
}
2000-03-15 20:09:10 +01:00
if (nchars < yyleng)
{
/* Strip the unwanted chars from the token */
yyless(nchars);
/*
* If what we have left is only one char, and it's
* one of the characters matching "self", then
* return it as a character token the same way
* that the "self" rule would have.
*/
if (nchars == 1 &&
strchr(",()[].;:+-*/%^<>=", yytext[0]))
return yytext[0];
/*
* Likewise, if what we have left is two chars, and
* those match the tokens ">=", "<=", "=>", "<>" or
* "!=", then we must return the appropriate token
* rather than the generic Op.
*/
if (nchars == 2)
{
if (yytext[0] == '=' && yytext[1] == '>')
return EQUALS_GREATER;
if (yytext[0] == '>' && yytext[1] == '=')
return GREATER_EQUALS;
if (yytext[0] == '<' && yytext[1] == '=')
return LESS_EQUALS;
if (yytext[0] == '<' && yytext[1] == '>')
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
}
}
base_yylval.str = mm_strdup(yytext);
return Op;
}
{param} {
base_yylval.ival = atol(yytext+1);
return PARAM;
}
{ip} {
base_yylval.str = mm_strdup(yytext);
return IP;
}
} /* <SQL> */
<C,SQL>{
{integer} {
return process_integer_literal(yytext, &base_yylval);
}
{decimal} {
base_yylval.str = mm_strdup(yytext);
return FCONST;
}
{decimalfail} {
/* throw back the .., and treat as integer */
yyless(yyleng - 2);
return process_integer_literal(yytext, &base_yylval);
}
{real} {
base_yylval.str = mm_strdup(yytext);
return FCONST;
}
{realfail1} {
/*
* throw back the [Ee], and figure out whether what
* remains is an {integer} or {decimal}.
*/
yyless(yyleng - 1);
return process_integer_literal(yytext, &base_yylval);
}
{realfail2} {
/* throw back the [Ee][+-], and proceed as above */
yyless(yyleng - 2);
return process_integer_literal(yytext, &base_yylval);
}
} /* <C,SQL> */
<SQL>{
:{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
base_yylval.str = mm_strdup(yytext+1);
return CVARIABLE;
}
{identifier} {
if (!isdefine())
{
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
int kwvalue;
/* Is it an SQL/ECPG keyword? */
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
kwvalue = ScanECPGKeywordLookup(yytext);
if (kwvalue >= 0)
return kwvalue;
/* Is it a C keyword? */
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
kwvalue = ScanCKeywordLookup(yytext);
if (kwvalue >= 0)
return kwvalue;
/*
* None of the above. Return it as an identifier.
*
* The backend will attempt to truncate and case-fold
* the identifier, but I see no good reason for ecpg
* to do so; that's just another way that ecpg could get
* out of step with the backend.
*/
base_yylval.str = mm_strdup(yytext);
return IDENT;
}
}
{other} {
return yytext[0];
}
} /* <SQL> */
/*
* Begin ECPG-specific rules
*/
<C>{exec_sql} { BEGIN(SQL); return SQL_START; }
<C>{informix_special} {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
BEGIN(SQL);
return SQL_START;
}
else
return S_ANYTHING;
}
<C>{ccomment} { ECHO; }
1999-10-15 21:02:08 +02:00
<C>{xch} {
char* endptr;
1999-10-15 21:02:08 +02:00
errno = 0;
base_yylval.ival = strtoul((char *)yytext,&endptr,16);
if (*endptr != '\0' || errno == ERANGE)
{
errno = 0;
base_yylval.str = mm_strdup(yytext);
return SCONST;
}
return ICONST;
}
<C>{cppinclude} {
if (system_includes)
{
include_next = false;
BEGIN(incl);
}
else
{
base_yylval.str = mm_strdup(yytext);
return CPP_LINE;
}
}
<C>{cppinclude_next} {
if (system_includes)
{
include_next = true;
BEGIN(incl);
}
else
{
base_yylval.str = mm_strdup(yytext);
return CPP_LINE;
}
}
<C,SQL>{cppline} {
base_yylval.str = mm_strdup(yytext);
return CPP_LINE;
1999-10-15 21:02:08 +02:00
}
<C>{identifier} {
/*
* Try to detect a function name:
* look for identifiers at the global scope
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
* keep the last identifier before the first '(' and '{'
*/
if (braces_open == 0 && parenths_open == 0)
{
if (current_function)
free(current_function);
current_function = mm_strdup(yytext);
}
/* Informix uses SQL defines only in SQL space */
/* however, some defines have to be taken care of for compatibility */
if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
{
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
int kwvalue;
kwvalue = ScanCKeywordLookup(yytext);
if (kwvalue >= 0)
return kwvalue;
else
{
base_yylval.str = mm_strdup(yytext);
return IDENT;
}
}
}
<C>{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); }
<C>":" { return ':'; }
<C>";" { return ';'; }
<C>"," { return ','; }
<C>"*" { return '*'; }
<C>"%" { return '%'; }
<C>"/" { return '/'; }
<C>"+" { return '+'; }
<C>"-" { return '-'; }
<C>"(" { parenths_open++; return '('; }
<C>")" { parenths_open--; return ')'; }
<C,xskip>{space} { ECHO; }
<C>\{ { return '{'; }
<C>\} { return '}'; }
<C>\[ { return '['; }
<C>\] { return ']'; }
<C>\= { return '='; }
<C>"->" { return S_MEMBER; }
<C>">>" { return S_RSHIFT; }
<C>"<<" { return S_LSHIFT; }
<C>"||" { return S_OR; }
<C>"&&" { return S_AND; }
<C>"++" { return S_INC; }
<C>"--" { return S_DEC; }
<C>"==" { return S_EQUAL; }
<C>"!=" { return S_NEQUAL; }
<C>"+=" { return S_ADD; }
<C>"-=" { return S_SUB; }
<C>"*=" { return S_MUL; }
<C>"/=" { return S_DIV; }
<C>"%=" { return S_MOD; }
<C>"->*" { return S_MEMPOINT; }
<C>".*" { return S_DOTPOINT; }
<C>{other} { return S_ANYTHING; }
<C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
<C>{informix_special}{define}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
BEGIN(def_ident);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
<C>{exec_sql}{undef}{space}* { BEGIN(undef); }
<C>{informix_special}{undef}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
BEGIN(undef);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
<undef>{identifier}{space}*";" {
struct _defines *ptr, *ptr2 = NULL;
int i;
/*
* Skip the ";" and trailing whitespace. Note that yytext
* contains at least one non-space character plus the ";"
*/
for (i = strlen(yytext)-2;
i > 0 && ecpg_isspace(yytext[i]);
i-- )
;
yytext[i+1] = '\0';
for (ptr = defines; ptr != NULL; ptr2 = ptr, ptr = ptr->next)
{
if (strcmp(yytext, ptr->olddef) == 0)
{
if (ptr2 == NULL)
defines = ptr->next;
else
ptr2->next = ptr->next;
free(ptr->newdef);
free(ptr->olddef);
free(ptr);
break;
}
}
BEGIN(C);
}
<undef>{other}|\n {
mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
yyterminate();
}
<C>{exec_sql}{include}{space}* { BEGIN(incl); }
<C>{informix_special}{include}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
BEGIN(incl);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
<C,xskip>{exec_sql}{ifdef}{space}* { ifcond = true; BEGIN(xcond); }
<C,xskip>{informix_special}{ifdef}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
ifcond = true;
BEGIN(xcond);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
<C,xskip>{exec_sql}{ifndef}{space}* { ifcond = false; BEGIN(xcond); }
<C,xskip>{informix_special}{ifndef}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
ifcond = false;
BEGIN(xcond);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
<C,xskip>{exec_sql}{elif}{space}* { /* pop stack */
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
if ( preproc_tos == 0 ) {
mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
}
else if ( stacked_if_value[preproc_tos].else_branch )
mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
else
preproc_tos--;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
ifcond = true; BEGIN(xcond);
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
}
<C,xskip>{informix_special}{elif}{space}* {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
if (preproc_tos == 0)
mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
else if (stacked_if_value[preproc_tos].else_branch)
mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
else
preproc_tos--;
ifcond = true;
BEGIN(xcond);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
<C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
if (stacked_if_value[preproc_tos].else_branch)
mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
else
{
stacked_if_value[preproc_tos].else_branch = true;
stacked_if_value[preproc_tos].condition =
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
(stacked_if_value[preproc_tos-1].condition &&
!stacked_if_value[preproc_tos].condition);
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
if (stacked_if_value[preproc_tos].condition)
BEGIN(C);
else
BEGIN(xskip);
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
}
}
<C,xskip>{informix_special}{else}{space}*";" {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
if (stacked_if_value[preproc_tos].else_branch)
mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
else
{
stacked_if_value[preproc_tos].else_branch = true;
stacked_if_value[preproc_tos].condition =
(stacked_if_value[preproc_tos-1].condition &&
!stacked_if_value[preproc_tos].condition);
if (stacked_if_value[preproc_tos].condition)
BEGIN(C);
else
BEGIN(xskip);
}
}
else
{
yyless(1);
return S_ANYTHING;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
}
}
<C,xskip>{exec_sql}{endif}{space}*";" {
if (preproc_tos == 0)
mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
else
preproc_tos--;
if (stacked_if_value[preproc_tos].condition)
BEGIN(C);
else
BEGIN(xskip);
}
2003-06-20 17:16:06 +02:00
<C,xskip>{informix_special}{endif}{space}*";" {
/* are we simulating Informix? */
if (INFORMIX_MODE)
{
if (preproc_tos == 0)
mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
else
preproc_tos--;
if (stacked_if_value[preproc_tos].condition)
BEGIN(C);
else
BEGIN(xskip);
}
else
{
yyless(1);
return S_ANYTHING;
}
}
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
<xskip>{other} { /* ignore */ }
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
<xcond>{identifier}{space}*";" {
if (preproc_tos >= MAX_NESTED_IF-1)
mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
else
{
struct _defines *defptr;
unsigned int i;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
/*
* Skip the ";" and trailing whitespace. Note that yytext
* contains at least one non-space character plus the ";"
*/
for (i = strlen(yytext)-2;
i > 0 && ecpg_isspace(yytext[i]);
i-- )
;
yytext[i+1] = '\0';
for (defptr = defines;
defptr != NULL &&
strcmp(yytext, defptr->olddef) != 0;
defptr = defptr->next)
/* skip */ ;
preproc_tos++;
stacked_if_value[preproc_tos].else_branch = false;
stacked_if_value[preproc_tos].condition =
(defptr ? ifcond : !ifcond) && stacked_if_value[preproc_tos-1].condition;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
}
if (stacked_if_value[preproc_tos].condition)
BEGIN(C);
else
BEGIN(xskip);
}
<xcond>{other}|\n {
mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
yyterminate();
}
<def_ident>{identifier} {
old = mm_strdup(yytext);
BEGIN(def);
startlit();
}
<def_ident>{other}|\n {
mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
yyterminate();
}
<def>{space}*";" {
struct _defines *ptr, *this;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
for (ptr = defines; ptr != NULL; ptr = ptr->next)
{
if (strcmp(old, ptr->olddef) == 0)
{
free(ptr->newdef);
ptr->newdef = mm_strdup(literalbuf);
}
}
if (ptr == NULL)
{
this = (struct _defines *) mm_alloc(sizeof(struct _defines));
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
/* initial definition */
this->olddef = old;
this->newdef = mm_strdup(literalbuf);
this->next = defines;
this->used = NULL;
defines = this;
}
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
BEGIN(C);
}
<def>[^;] { addlit(yytext, yyleng); }
<incl>\<[^\>]+\>{space}*";"? { parse_include(); }
<incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
2003-05-29 14:00:22 +02:00
<incl>[^;\<\>\"]+";" { parse_include(); }
<incl>{other}|\n {
mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
yyterminate();
}
<<EOF>> {
if (yy_buffer == NULL)
{
if ( preproc_tos > 0 )
{
preproc_tos = 0;
mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
}
yyterminate();
}
else
{
struct _yy_buffer *yb = yy_buffer;
int i;
struct _defines *ptr;
for (ptr = defines; ptr; ptr = ptr->next)
if (ptr->used == yy_buffer)
{
ptr->used = NULL;
break;
}
if (yyin != NULL)
fclose(yyin);
yy_delete_buffer( YY_CURRENT_BUFFER );
yy_switch_to_buffer(yy_buffer->buffer);
yylineno = yy_buffer->lineno;
/* We have to output the filename only if we change files here */
i = strcmp(input_filename, yy_buffer->filename);
free(input_filename);
input_filename = yy_buffer->filename;
yy_buffer = yy_buffer->next;
free(yb);
if (i != 0)
output_line_number();
}
}
<INITIAL>{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <pgsql-bugs@lists.postgresql.org>"); }
%%
/* LCOV_EXCL_STOP */
void
lex_init(void)
{
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
braces_open = 0;
parenths_open = 0;
current_function = NULL;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
preproc_tos = 0;
yylineno = 1;
ifcond = true;
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
stacked_if_value[preproc_tos].condition = ifcond;
stacked_if_value[preproc_tos].else_branch = false;
/* initialize literal buffer to a reasonable but expansible size */
if (literalbuf == NULL)
{
2007-09-04 12:02:29 +02:00
literalalloc = 1024;
literalbuf = (char *) mm_alloc(literalalloc);
}
startlit();
BEGIN(C);
}
static void
addlit(char *ytext, int yleng)
{
/* enlarge buffer if needed */
if ((literallen+yleng) >= literalalloc)
{
do
literalalloc *= 2;
while ((literallen+yleng) >= literalalloc);
literalbuf = (char *) realloc(literalbuf, literalalloc);
}
/* append new data, add trailing null */
memcpy(literalbuf+literallen, ytext, yleng);
literallen += yleng;
literalbuf[literallen] = '\0';
}
static void
addlitchar(unsigned char ychar)
The first fix is to allow an input file with a relative path and without a ".pgc " extension. The second patch fixes a coredump when there is more than one input file (in that case, cur and types were not set to NULL before processing the second f ile) The patch below modifies the accepted grammar of ecpg to accept FETCH [direction] [amount] cursor name i.e. the IN|FROM clause becomes optional (as in Oracle and Informix). This removes the incompatibility mentioned in section "Porting From Other RDBMS Packages" p169, PostgreSQL Programmer's Guide. The grammar is modified in such a way as to avoid shift/reduce conflicts. It does not accept the statement "EXEC SQL FETCH;" anymore, as the old grammar did (this seems to be a bug of the old grammar anyway). This patch cleans up the handling of space characters in the scanner; some patte rns require \n to be in {space}, some do not. A second fix is the handling of cpp continuati on lines; the old pattern did not match these. The parser is patched to fix an off-by-one error in the #line directives. The pa rser is also enhanced to report the correct location of errors in declarations in the "E XEC SQL DECLARE SECTION". Finally, some right recursions in the parser were replaced by left-recursions. This patch adds preprocessor directives to ecpg; in particular EXEC SQL IFDEF, EXEC SQL IFNDEF, EXEC SQL ELSE, EXEC SQL ELIF and EXEC SQL ENDIF "EXEC SQL IFDEF" is used with defines made with "EXEC SQL DEFINE" and defines, specified on the command line with -D. Defines, specified on the command line are persistent across multiple input files. Defines can be nested up to a maximum level of 128 (see patch). There is a fair amount of error checking to make sure directives are matched properly. I need preprocessor directives for porting code, that is written for an Informix database, to a PostgreSQL database, while maintaining compatibility with the original code. I decided not to extend the already large ecpg grammar. Everything is done in the scanner by adding some states, e.g. to skip all input except newlines and directives. The preprocessor commands are compatible with Informix. Oracle uses a cpp replacement. Rene Hogendoorn
1999-12-21 18:42:16 +01:00
{
/* enlarge buffer if needed */
if ((literallen+1) >= literalalloc)
{
literalalloc *= 2;
literalbuf = (char *) realloc(literalbuf, literalalloc);
}
/* append new data, add trailing null */
literalbuf[literallen] = ychar;
literallen += 1;
literalbuf[literallen] = '\0';
}
/*
* Process {integer}. Note this will also do the right thing with {decimal},
* ie digits and a decimal point.
*/
static int
process_integer_literal(const char *token, YYSTYPE *lval)
{
int val;
char *endptr;
errno = 0;
val = strtoint(token, &endptr, 10);
if (*endptr != '\0' || errno == ERANGE)
{
/* integer too large (or contains decimal pt), treat it as a float */
lval->str = mm_strdup(token);
return FCONST;
}
lval->ival = val;
return ICONST;
}
static void
parse_include(void)
{
/* got the include file name */
struct _yy_buffer *yb;
struct _include_path *ip;
char inc_file[MAXPGPATH];
unsigned int i;
yb = mm_alloc(sizeof(struct _yy_buffer));
yb->buffer = YY_CURRENT_BUFFER;
yb->lineno = yylineno;
yb->filename = input_filename;
yb->next = yy_buffer;
yy_buffer = yb;
/*
* skip the ";" if there is one and trailing whitespace. Note that
* yytext contains at least one non-space character plus the ";"
*/
for (i = strlen(yytext)-2;
i > 0 && ecpg_isspace(yytext[i]);
i--)
;
if (yytext[i] == ';')
i--;
yytext[i+1] = '\0';
yyin = NULL;
/* If file name is enclosed in '"' remove these and look only in '.' */
/* Informix does look into all include paths though, except filename starts with '/' */
if (yytext[0] == '"' && yytext[i] == '"' &&
((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
{
yytext[i] = '\0';
memmove(yytext, yytext+1, strlen(yytext));
strlcpy(inc_file, yytext, sizeof(inc_file));
yyin = fopen(inc_file, "r");
if (!yyin)
{
if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
{
strcat(inc_file, ".h");
yyin = fopen(inc_file, "r");
}
}
}
else
{
if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
{
yytext[i] = '\0';
memmove(yytext, yytext+1, strlen(yytext));
}
for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
{
if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
{
2009-01-23 13:43:32 +01:00
fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
continue;
}
snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
yyin = fopen(inc_file, "r");
if (!yyin)
{
if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
{
strcat(inc_file, ".h");
yyin = fopen( inc_file, "r" );
}
}
/* if the command was "include_next" we have to disregard the first hit */
if (yyin && include_next)
{
fclose (yyin);
yyin = NULL;
include_next = false;
}
}
}
if (!yyin)
mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
input_filename = mm_strdup(inc_file);
yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE ));
yylineno = 1;
output_line_number();
BEGIN(C);
}
/*
* ecpg_isspace() --- return true if flex scanner considers char whitespace
*/
static bool
ecpg_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\f')
return true;
return false;
}
static bool isdefine(void)
{
struct _defines *ptr;
/* is it a define? */
for (ptr = defines; ptr; ptr = ptr->next)
{
if (strcmp(yytext, ptr->olddef) == 0 && ptr->used == NULL)
{
struct _yy_buffer *yb;
yb = mm_alloc(sizeof(struct _yy_buffer));
yb->buffer = YY_CURRENT_BUFFER;
yb->lineno = yylineno;
yb->filename = mm_strdup(input_filename);
yb->next = yy_buffer;
ptr->used = yy_buffer = yb;
yy_scan_string(ptr->newdef);
return true;
}
}
return false;
}
static bool isinformixdefine(void)
{
const char *new = NULL;
if (strcmp(yytext, "dec_t") == 0)
new = "decimal";
else if (strcmp(yytext, "intrvl_t") == 0)
new = "interval";
else if (strcmp(yytext, "dtime_t") == 0)
new = "timestamp";
if (new)
{
struct _yy_buffer *yb;
yb = mm_alloc(sizeof(struct _yy_buffer));
yb->buffer = YY_CURRENT_BUFFER;
yb->lineno = yylineno;
yb->filename = mm_strdup(input_filename);
yb->next = yy_buffer;
yy_buffer = yb;
yy_scan_string(new);
return true;
}
return false;
}