Avoid premature de-doubling of quote marks in ECPG strings.

If you write the literal 'abc''def' in an EXEC SQL command, that will come out the other end as 'abc'def', triggering a syntax error in the backend. Likewise, "abc""def" is reduced to "abc"def" which is wrong syntax for a quoted identifier. The cause is that the lexer thinks it should emit just one quote mark, whereas what it really should do is keep the string as-is. Add some docs and test cases, too. Although this seems clearly a bug, I fear users wouldn't appreciate changing it in minor releases. Some may well be working around it by applying an extra doubling of affected quotes, as for example sql/dyntest.pgc has been doing. Per investigation of a report from 1250kv, although this isn't exactly what he/she was on about. Discussion: https://postgr.es/m/673825.1603223178@sss.pgh.pa.us
2020-10-22 17:34:32 -04:00 · 2020-10-22 17:34:32 -04:00 · 3dfb1942d9
parent 8bb0c9770e
commit 3dfb1942d9
7 changed files with 28 additions and 20 deletions
--- a/doc/src/sgml/ecpg.sgml
+++ b/doc/src/sgml/ecpg.sgml
@ -31,7 +31,7 @@
   specially marked sections.  To build the program, the source code (<filename>*.pgc</filename>)
   is first passed through the embedded SQL preprocessor, which converts it
   to an ordinary C program (<filename>*.c</filename>), and afterwards it can be processed by a C
-   compiler.  (For details about the compiling and linking see <xref linkend="ecpg-process"/>).
+   compiler.  (For details about the compiling and linking see <xref linkend="ecpg-process"/>.)
   Converted ECPG applications call functions in the libpq library
   through the embedded SQL library (ecpglib), and communicate with
   the PostgreSQL server using the normal frontend-backend protocol.
@ -63,11 +63,22 @@ EXEC SQL ...;
 </programlisting>
   These statements syntactically take the place of a C statement.
   Depending on the particular statement, they can appear at the
-   global level or within a function.  Embedded
+   global level or within a function.
+  </para>
+
+  <para>
+   Embedded
   <acronym>SQL</acronym> statements follow the case-sensitivity rules of
   normal <acronym>SQL</acronym> code, and not those of C. Also they allow nested
-   C-style comments that are part of the SQL standard. The C part of the
+   C-style comments as per the SQL standard. The C part of the
   program, however, follows the C standard of not accepting nested comments.
+   Embedded <acronym>SQL</acronym> statements likewise use SQL rules, not
+   C rules, for parsing quoted strings and identifiers.
+   (See <xref linkend="sql-syntax-strings"/> and
+   <xref linkend="sql-syntax-identifiers"/> respectively.  Note that
+   ECPG assumes that <varname>standard_conforming_strings</varname>
+   is <literal>on</literal>.)
+   Of course, the C part of the program follows C quoting rules.
  </para>

  <para>
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@ -623,11 +623,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					}
 				}

-<xq,xe,xn,xus>{xqdouble}	{ addlitchar('\''); }
-<xqc>{xqcquote}	{
-					addlitchar('\\');
-					addlitchar('\'');
-				}
+<xq,xe,xn,xus>{xqdouble}	{ addlit(yytext, yyleng); }
+<xqc>{xqcquote}				{ addlit(yytext, yyleng); }
 <xq,xqc,xn,xus>{xqinside}	{ addlit(yytext, yyleng); }
 <xe>{xeinside}  {
 					addlit(yytext, yyleng);
@ -736,7 +733,7 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					return UIDENT;
 				}
 <xd,xui>{xddouble}	{
-					addlitchar('"');
+					addlit(yytext, yyleng);
 				}
 <xd,xui>{xdinside}	{
 					addlit(yytext, yyleng);
--- a/src/interfaces/ecpg/test/expected/preproc-strings.c
+++ b/src/interfaces/ecpg/test/expected/preproc-strings.c
@ -45,7 +45,7 @@ int main(void)
 #line 13 "strings.pgc"


-  { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 'abcdef' , N'abcdef' as foo , E'abc\\bdef' as \"foo\" , U&'d\\0061t\\0061' as U&\"foo\" , U&'d!+000061t!+000061' UESCAPE '!' , $foo$abc$def$foo$", ECPGt_EOIT, 
+  { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 'abc''d\\ef' , N'abc''d\\ef' as foo , E'abc''d\\\\ef' as \"foo\"\"bar\" , U&'d\\0061t\\0061' as U&\"foo\"\"bar\" , U&'d!+000061t!+000061' UESCAPE '!' , $foo$abc$def$foo$", ECPGt_EOIT, 
 	ECPGt_char,&(s1),(long)0,(long)1,(1)*sizeof(char), 
 	ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, 
 	ECPGt_char,&(s2),(long)0,(long)1,(1)*sizeof(char), 
--- a/src/interfaces/ecpg/test/expected/preproc-strings.stderr
+++ b/src/interfaces/ecpg/test/expected/preproc-strings.stderr
@ -8,7 +8,7 @@
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_process_output on line 13: OK: SET
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 15: query: select 'abcdef' , N'abcdef' as foo , E'abc\bdef' as "foo" , U&'d\0061t\0061' as U&"foo" , U&'d!+000061t!+000061' UESCAPE '!' , $foo$abc$def$foo$; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 15: query: select 'abc''d\ef' , N'abc''d\ef' as foo , E'abc''d\\ef' as "foo""bar" , U&'d\0061t\0061' as U&"foo""bar" , U&'d!+000061t!+000061' UESCAPE '!' , $foo$abc$def$foo$; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_execute on line 15: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
@ -16,15 +16,15 @@
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 15: RESULT: abc'd\ef offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 15: RESULT: abc'd\ef offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 15: RESULT: abc'd\ef offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
 [NO_PID]: sqlca: code: 0, state: 00000
--- a/src/interfaces/ecpg/test/expected/preproc-strings.stdout
+++ b/src/interfaces/ecpg/test/expected/preproc-strings.stdout
@ -1 +1 @@
-abcdef abcdef abcdef data data abc$def
+abc'd\ef abc'd\ef abc'd\ef data data abc$def
--- a/src/interfaces/ecpg/test/preproc/strings.pgc
+++ b/src/interfaces/ecpg/test/preproc/strings.pgc
@ -12,10 +12,10 @@ int main(void)

  exec sql set standard_conforming_strings to on;

-  exec sql select 'abcdef',
-                  N'abcdef' AS foo,
-                  E'abc\bdef' AS "foo",
-                  U&'d\0061t\0061' AS U&"foo",
+  exec sql select 'abc''d\ef',
+                  N'abc''d\ef' AS foo,
+                  E'abc''d\\ef' AS "foo""bar",
+                  U&'d\0061t\0061' AS U&"foo""bar",
                  U&'d!+000061t!+000061' uescape '!',
                  $foo$abc$def$foo$
                  into :s1, :s2, :s3, :s4, :s5, :s6;
--- a/src/interfaces/ecpg/test/sql/dyntest.pgc
+++ b/src/interfaces/ecpg/test/sql/dyntest.pgc
@ -51,7 +51,7 @@ main ()
  exec sql create table dyntest (name char (14), d float8, i int,
 				 bignumber int8, b boolean, comment text,
 				 day date);
-  exec sql insert into dyntest values ('first entry', 14.7, 14, 123045607890, true, 'The world''''s most advanced open source database.', '1987-07-14');
+  exec sql insert into dyntest values ('first entry', 14.7, 14, 123045607890, true, 'The world''s most advanced open source database.', '1987-07-14');
  exec sql insert into dyntest values ('second entry', 1407.87, 1407, 987065403210, false, 'The elephant never forgets.', '1999-11-5');

  exec sql prepare MYQUERY from :QUERY;