Make postgres.bki use the same literal-string syntax as postgresql.conf.

The BKI file's string quoting conventions were previously quite weird,
perhaps as a result of repurposing a function built to scan
single-quoted strings to scan double-quoted ones.  Change to use the
same rules as we use in GUC files, allowing some simplifications in
genbki.pl and initdb.c.

While at it, completely remove the backend's scanstr() function, which
was essentially a duplicate of the string dequoting code in guc-file.l.
Instead export that one (under a less generic name than it had) and let
bootscanner.l use it.  Now we can clarify that scansup.c exists only to
support the main lexer. We could alternatively have removed GUC_scanstr,
but this way seems better since the previous arrangement could mislead
a reader into thinking that scanstr() had something to do with the main
lexer's handling of string literals.  Maybe it did once, but if so it
was a long time ago.

This patch does not bump catversion, since the initially-installed
catalog contents don't change.  Note however that successful initdb
after applying this patch will require up-to-date postgres.bki as well
as postgres and initdb executables.

In passing, remove a bunch of very-long-obsolete #include's in
bootparse.y and bootscanner.l.

John Naylor

Discussion: https://postgr.es/m/CACPNZCtDpd18T0KATTmCggO2GdVC4ow86ypiq5ENff1VnauL8g@mail.gmail.com
This commit is contained in:
Tom Lane 2020-10-04 16:09:55 -04:00
parent 9081bddbd7
commit 97b6144826
9 changed files with 31 additions and 182 deletions

View File

@ -752,8 +752,8 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
next token that syntactically cannot belong to the preceding next token that syntactically cannot belong to the preceding
command starts a new one. (Usually you would put a new command on command starts a new one. (Usually you would put a new command on
a new line, for clarity.) Tokens can be certain key words, special a new line, for clarity.) Tokens can be certain key words, special
characters (parentheses, commas, etc.), numbers, or double-quoted characters (parentheses, commas, etc.), identifiers, numbers, or
strings. Everything is case sensitive. single-quoted strings. Everything is case sensitive.
</para> </para>
<para> <para>
@ -876,7 +876,9 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
<para> <para>
NULL values can be specified using the special key word NULL values can be specified using the special key word
<literal>_null_</literal>. Values that do not look like <literal>_null_</literal>. Values that do not look like
identifiers or digit strings must be double quoted. identifiers or digit strings must be single-quoted.
(To include a single quote in a value, write it twice.
Escape-string-style backslash escapes are allowed in the string, too.)
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
@ -1046,7 +1048,7 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
<programlisting> <programlisting>
create test_table 420 (oid = oid, cola = int4, colb = text) create test_table 420 (oid = oid, cola = int4, colb = text)
open test_table open test_table
insert ( 421 1 "value1" ) insert ( 421 1 'value 1' )
insert ( 422 2 _null_ ) insert ( 422 2 _null_ )
close test_table close test_table
</programlisting> </programlisting>

View File

@ -18,16 +18,10 @@
#include <unistd.h> #include <unistd.h>
#include "access/attnum.h"
#include "access/htup.h"
#include "access/itup.h"
#include "access/tupdesc.h"
#include "bootstrap/bootstrap.h" #include "bootstrap/bootstrap.h"
#include "catalog/catalog.h"
#include "catalog/heap.h" #include "catalog/heap.h"
#include "catalog/namespace.h" #include "catalog/namespace.h"
#include "catalog/pg_am.h" #include "catalog/pg_am.h"
#include "catalog/pg_attribute.h"
#include "catalog/pg_authid.h" #include "catalog/pg_authid.h"
#include "catalog/pg_class.h" #include "catalog/pg_class.h"
#include "catalog/pg_namespace.h" #include "catalog/pg_namespace.h"
@ -36,20 +30,7 @@
#include "commands/defrem.h" #include "commands/defrem.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "nodes/nodes.h"
#include "nodes/parsenodes.h"
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"
#include "rewrite/prs2lock.h"
#include "storage/block.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/itemptr.h"
#include "storage/off.h"
#include "storage/smgr.h"
#include "tcop/dest.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/rel.h"
/* /*

View File

@ -15,25 +15,8 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "access/attnum.h"
#include "access/htup.h"
#include "access/itup.h"
#include "access/tupdesc.h"
#include "bootstrap/bootstrap.h" #include "bootstrap/bootstrap.h"
#include "catalog/pg_am.h" #include "utils/guc.h"
#include "catalog/pg_attribute.h"
#include "catalog/pg_class.h"
#include "nodes/nodes.h"
#include "nodes/parsenodes.h"
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"
#include "parser/scansup.h"
#include "rewrite/prs2lock.h"
#include "storage/block.h"
#include "storage/fd.h"
#include "storage/itemptr.h"
#include "storage/off.h"
#include "utils/rel.h"
/* Not needed now that this file is compiled as part of bootparse. */ /* Not needed now that this file is compiled as part of bootparse. */
/* #include "bootparse.h" */ /* #include "bootparse.h" */
@ -66,7 +49,7 @@ static int yyline = 1; /* line number for error reporting */
id [-A-Za-z0-9_]+ id [-A-Za-z0-9_]+
sid \"([^\"])*\" sid \'([^']|\'\')*\'
/* /*
* Keyword tokens return the keyword text (as a constant string) in yylval.kw, * Keyword tokens return the keyword text (as a constant string) in yylval.kw,
@ -120,14 +103,12 @@ NOT { yylval.kw = "NOT"; return XNOT; }
NULL { yylval.kw = "NULL"; return XNULL; } NULL { yylval.kw = "NULL"; return XNULL; }
{id} { {id} {
yylval.str = scanstr(yytext); yylval.str = pstrdup(yytext);
return ID; return ID;
} }
{sid} { {sid} {
/* leading and trailing quotes are not passed to scanstr */ /* strip quotes and escapes */
yytext[strlen(yytext) - 1] = '\0'; yylval.str = DeescapeQuotedString(yytext);
yylval.str = scanstr(yytext+1);
yytext[strlen(yytext)] = '"'; /* restore yytext */
return ID; return ID;
} }

View File

@ -845,17 +845,15 @@ sub print_bki_insert
# since that represents a NUL char in C code. # since that represents a NUL char in C code.
$bki_value = '' if $bki_value eq '\0'; $bki_value = '' if $bki_value eq '\0';
# Handle single quotes by doubling them, and double quotes by # Handle single quotes by doubling them, because that's what the
# converting them to octal escapes, because that's what the
# bootstrap scanner requires. We do not process backslashes # bootstrap scanner requires. We do not process backslashes
# specially; this allows escape-string-style backslash escapes # specially; this allows escape-string-style backslash escapes
# to be used in catalog data. # to be used in catalog data.
$bki_value =~ s/'/''/g; $bki_value =~ s/'/''/g;
$bki_value =~ s/"/\\042/g;
# Quote value if needed. We need not quote values that satisfy # Quote value if needed. We need not quote values that satisfy
# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+". # the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
$bki_value = sprintf(qq'"%s"', $bki_value) $bki_value = sprintf("'%s'", $bki_value)
if length($bki_value) == 0 if length($bki_value) == 0
or $bki_value =~ /[^-A-Za-z0-9_]/; or $bki_value =~ /[^-A-Za-z0-9_]/;

View File

@ -1,8 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* scansup.c * scansup.c
* support routines for the lex/flex scanner, used by both the normal * scanner support routines used by the core lexer
* backend as well as the bootstrap backend
* *
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
@ -20,98 +19,6 @@
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
#include "parser/scansup.h" #include "parser/scansup.h"
/* ----------------
* scanstr
*
* if the string passed in has escaped codes, map the escape codes to actual
* chars
*
* the string returned is palloc'd and should eventually be pfree'd by the
* caller!
* ----------------
*/
char *
scanstr(const char *s)
{
char *newStr;
int len,
i,
j;
if (s == NULL || s[0] == '\0')
return pstrdup("");
len = strlen(s);
newStr = palloc(len + 1); /* string cannot get longer */
for (i = 0, j = 0; i < len; i++)
{
if (s[i] == '\'')
{
/*
* Note: if scanner is working right, unescaped quotes can only
* appear in pairs, so there should be another character.
*/
i++;
/* The bootstrap parser is not as smart, so check here. */
Assert(s[i] == '\'');
newStr[j] = s[i];
}
else if (s[i] == '\\')
{
i++;
switch (s[i])
{
case 'b':
newStr[j] = '\b';
break;
case 'f':
newStr[j] = '\f';
break;
case 'n':
newStr[j] = '\n';
break;
case 'r':
newStr[j] = '\r';
break;
case 't':
newStr[j] = '\t';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
int k;
long octVal = 0;
for (k = 0;
s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
k++)
octVal = (octVal << 3) + (s[i + k] - '0');
i += k - 1;
newStr[j] = ((char) octVal);
}
break;
default:
newStr[j] = s[i];
break;
} /* switch */
} /* s[i] == '\\' */
else
newStr[j] = s[i];
j++;
}
newStr[j] = '\0';
return newStr;
}
/* /*
* downcase_truncate_identifier() --- do appropriate downcasing and * downcase_truncate_identifier() --- do appropriate downcasing and

View File

@ -55,7 +55,6 @@ static void record_config_file_error(const char *errmsg,
ConfigVariable **tail_p); ConfigVariable **tail_p);
static int GUC_flex_fatal(const char *msg); static int GUC_flex_fatal(const char *msg);
static char *GUC_scanstr(const char *s);
/* LCOV_EXCL_START */ /* LCOV_EXCL_START */
@ -797,7 +796,7 @@ ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
token != GUC_UNQUOTED_STRING) token != GUC_UNQUOTED_STRING)
goto parse_error; goto parse_error;
if (token == GUC_STRING) /* strip quotes and escapes */ if (token == GUC_STRING) /* strip quotes and escapes */
opt_value = GUC_scanstr(yytext); opt_value = DeescapeQuotedString(yytext);
else else
opt_value = pstrdup(yytext); opt_value = pstrdup(yytext);
@ -1132,22 +1131,25 @@ FreeConfigVariable(ConfigVariable *item)
/* /*
* scanstr * DeescapeQuotedString
* *
* Strip the quotes surrounding the given string, and collapse any embedded * Strip the quotes surrounding the given string, and collapse any embedded
* '' sequences and backslash escapes. * '' sequences and backslash escapes.
* *
* the string returned is palloc'd and should eventually be pfree'd by the * The string returned is palloc'd and should eventually be pfree'd by the
* caller. * caller.
*
* This is exported because it is also used by the bootstrap scanner.
*/ */
static char * char *
GUC_scanstr(const char *s) DeescapeQuotedString(const char *s)
{ {
char *newStr; char *newStr;
int len, int len,
i, i,
j; j;
/* We just Assert that there are leading and trailing quotes */
Assert(s != NULL && s[0] == '\''); Assert(s != NULL && s[0] == '\'');
len = strlen(s); len = strlen(s);
Assert(len >= 2); Assert(len >= 2);

View File

@ -331,12 +331,9 @@ escape_quotes(const char *src)
/* /*
* Escape a field value to be inserted into the BKI data. * Escape a field value to be inserted into the BKI data.
* Here, we first run the value through escape_quotes (which * Run the value through escape_quotes (which will be inverted
* will be inverted by the backend's scanstr() function) and * by the backend's DeescapeQuotedString() function), then wrap
* then overlay special processing of double quotes, which * the value in single quotes, even if that isn't strictly necessary.
* bootscanner.l will only accept as data if converted to octal
* representation ("\042"). We always wrap the value in double
* quotes, even if that isn't strictly necessary.
*/ */
static char * static char *
escape_quotes_bki(const char *src) escape_quotes_bki(const char *src)
@ -345,30 +342,13 @@ escape_quotes_bki(const char *src)
char *data = escape_quotes(src); char *data = escape_quotes(src);
char *resultp; char *resultp;
char *datap; char *datap;
int nquotes = 0;
/* count double quotes in data */ result = (char *) pg_malloc(strlen(data) + 3);
datap = data;
while ((datap = strchr(datap, '"')) != NULL)
{
nquotes++;
datap++;
}
result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
resultp = result; resultp = result;
*resultp++ = '"'; *resultp++ = '\'';
for (datap = data; *datap; datap++) for (datap = data; *datap; datap++)
{ *resultp++ = *datap;
if (*datap == '"') *resultp++ = '\'';
{
strcpy(resultp, "\\042");
resultp += 4;
}
else
*resultp++ = *datap;
}
*resultp++ = '"';
*resultp = '\0'; *resultp = '\0';
free(data); free(data);

View File

@ -1,8 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* scansup.h * scansup.h
* scanner support routines. used by both the bootstrap lexer * scanner support routines used by the core lexer
* as well as the normal lexer
* *
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
@ -15,8 +14,6 @@
#ifndef SCANSUP_H #ifndef SCANSUP_H
#define SCANSUP_H #define SCANSUP_H
extern char *scanstr(const char *s);
extern char *downcase_truncate_identifier(const char *ident, int len, extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn); bool warn);

View File

@ -155,6 +155,7 @@ extern bool ParseConfigDirectory(const char *includedir,
ConfigVariable **head_p, ConfigVariable **head_p,
ConfigVariable **tail_p); ConfigVariable **tail_p);
extern void FreeConfigVariables(ConfigVariable *list); extern void FreeConfigVariables(ConfigVariable *list);
extern char *DeescapeQuotedString(const char *s);
/* /*
* The possible values of an enum variable are specified by an array of * The possible values of an enum variable are specified by an array of