Rationalize handling of single and double quotes in bootstrap data.

Change things around so that proper quoting of values interpolated into
the BKI data by initdb is the responsibility of initdb, not something
we half-heartedly handle by putting double quotes into the raw BKI data.
(Note: experimentation shows that it still doesn't work to put a double
quote into the initial superuser username, but that's the fault of
inadequate quoting while interpolating the name into SQL scripts;
the BKI aspect of it works fine now.)

Having done that, we can remove the special-case handling of values
that look like "something" from genbki.pl, and instead teach it to
escape double --- and single --- quotes properly.  This removes the
nowhere-documented need to treat those specially in the BKI source
data; whatever you write will be passed through unchanged into the
inserted data value, modulo Perl's rules about single-quoted strings.

Add documentation explaining the (pre-existing) handling of backslashes
in the BKI data.

Per an earlier discussion with John Naylor.

Discussion: https://postgr.es/m/CAJVSVGUNao=-Q2-vAN3PYcdF5tnL5JAHwGwzZGuYHtq+Mk_9ng@mail.gmail.com
This commit is contained in:
Tom Lane 2018-04-17 19:53:50 -04:00
parent 9ffcccdb95
commit 55d26ff638
6 changed files with 89 additions and 38 deletions

View File

@ -184,13 +184,11 @@
<programlisting>
[
# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
# that might contain non-word characters, so we must double-quote them.
# A comment could appear here.
{ oid =&gt; '1', oid_symbol =&gt; 'TemplateDbOid',
descr =&gt; 'database\'s default template',
datname =&gt; 'template1', datdba =&gt; 'PGUID', encoding =&gt; 'ENCODING',
datcollate =&gt; '"LC_COLLATE"', datctype =&gt; '"LC_CTYPE"', datistemplate =&gt; 't',
datcollate =&gt; 'LC_COLLATE', datctype =&gt; 'LC_CTYPE', datistemplate =&gt; 't',
datallowconn =&gt; 't', datconnlimit =&gt; '-1', datlastsysoid =&gt; '0',
datfrozenxid =&gt; '0', datminmxid =&gt; '1', dattablespace =&gt; '1663',
datacl =&gt; '_null_' },
@ -234,10 +232,16 @@
<listitem>
<para>
All values must be single-quoted. Escape single quotes used within
a value with a backslash. (Backslashes meant as data need not be
doubled, however; this follows Perl's rules for simple quoted
literals.)
All values must be single-quoted. Escape single quotes used within a
value with a backslash. Backslashes meant as data can, but need not,
be doubled; this follows Perl's rules for simple quoted literals.
Note that backslashes appearing as data will be treated as escapes by
the bootstrap scanner, according to the same rules as for escape string
constants (see <xref linkend="sql-syntax-strings-escape"/>); for
example <literal>\t</literal> converts to a tab character. If you
actually want a backslash in the final value, you will need to write
four of them: Perl strips two, leaving <literal>\\</literal> for the
bootstrap scanner to see.
</para>
</listitem>
@ -247,15 +251,6 @@
</para>
</listitem>
<listitem>
<para>
If a value is a macro to be expanded
by <application>initdb</application>, it should also contain double
quotes as shown above, unless we know that no special characters can
appear within the string that will be substituted.
</para>
</listitem>
<listitem>
<para>
Comments are preceded by <literal>#</literal>, and must be on their

View File

@ -660,12 +660,19 @@ sub print_bki_insert
# since that represents a NUL char in C code.
$bki_value = '' if $bki_value eq '\0';
# Handle single quotes by doubling them, and double quotes by
# converting them to octal escapes, because that's what the
# bootstrap scanner requires. We do not process backslashes
# specially; this allows escape-string-style backslash escapes
# to be used in catalog data.
$bki_value =~ s/'/''/g;
$bki_value =~ s/"/\\042/g;
# Quote value if needed. We need not quote values that satisfy
# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
$bki_value = sprintf(qq'"%s"', $bki_value)
if $bki_value !~ /^"[^"]+"$/
and ( length($bki_value) == 0
or $bki_value =~ /[^-A-Za-z0-9_]/);
if length($bki_value) == 0
or $bki_value =~ /[^-A-Za-z0-9_]/;
push @bki_values, $bki_value;
}

View File

@ -265,6 +265,7 @@ static void make_postgres(FILE *cmdfd);
static void trapsig(int signum);
static void check_ok(void);
static char *escape_quotes(const char *src);
static char *escape_quotes_bki(const char *src);
static int locale_date_order(const char *locale);
static void check_locale_name(int category, const char *locale,
char **canonname);
@ -324,6 +325,10 @@ do { \
output_failed = true, output_errno = errno; \
} while (0)
/*
* Escape single quotes and backslashes, suitably for insertions into
* configuration files or SQL E'' strings.
*/
static char *
escape_quotes(const char *src)
{
@ -337,6 +342,52 @@ escape_quotes(const char *src)
return result;
}
/*
* Escape a field value to be inserted into the BKI data.
* Here, we first run the value through escape_quotes (which
* will be inverted by the backend's scanstr() function) and
* then overlay special processing of double quotes, which
* bootscanner.l will only accept as data if converted to octal
* representation ("\042"). We always wrap the value in double
* quotes, even if that isn't strictly necessary.
*/
static char *
escape_quotes_bki(const char *src)
{
char *result;
char *data = escape_quotes(src);
char *resultp;
char *datap;
int nquotes = 0;
/* count double quotes in data */
datap = data;
while ((datap = strchr(datap, '"')) != NULL)
{
nquotes++;
datap++;
}
result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
resultp = result;
*resultp++ = '"';
for (datap = data; *datap; datap++)
{
if (*datap == '"')
{
strcpy(resultp, "\\042");
resultp += 4;
}
else
*resultp++ = *datap;
}
*resultp++ = '"';
*resultp = '\0';
free(data);
return result;
}
/*
* make a copy of the array of lines, with token replaced by replacement
* the first time it occurs on each line.
@ -1368,13 +1419,17 @@ bootstrap_template1(void)
bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL",
FLOAT8PASSBYVAL ? "true" : "false");
bki_lines = replace_token(bki_lines, "POSTGRES", escape_quotes(username));
bki_lines = replace_token(bki_lines, "POSTGRES",
escape_quotes_bki(username));
bki_lines = replace_token(bki_lines, "ENCODING", encodingid_to_string(encodingid));
bki_lines = replace_token(bki_lines, "ENCODING",
encodingid_to_string(encodingid));
bki_lines = replace_token(bki_lines, "LC_COLLATE", escape_quotes(lc_collate));
bki_lines = replace_token(bki_lines, "LC_COLLATE",
escape_quotes_bki(lc_collate));
bki_lines = replace_token(bki_lines, "LC_CTYPE", escape_quotes(lc_ctype));
bki_lines = replace_token(bki_lines, "LC_CTYPE",
escape_quotes_bki(lc_ctype));
/*
* Pass correct LC_xxx environment to bootstrap.

View File

@ -12,14 +12,11 @@
[
# POSTGRES will be replaced at initdb time with a user choice that might
# contain non-word characters, so we must double-quote it.
# The C code typically refers to these roles using the #define symbols,
# so make sure every entry has an oid_symbol value.
{ oid => '10', oid_symbol => 'BOOTSTRAP_SUPERUSERID',
rolname => '"POSTGRES"', rolsuper => 't', rolinherit => 't',
rolname => 'POSTGRES', rolsuper => 't', rolinherit => 't',
rolcreaterole => 't', rolcreatedb => 't', rolcanlogin => 't',
rolreplication => 't', rolbypassrls => 't', rolconnlimit => '-1',
rolpassword => '_null_', rolvaliduntil => '_null_' },

View File

@ -12,13 +12,10 @@
[
# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
# that might contain non-word characters, so we must double-quote them.
{ oid => '1', oid_symbol => 'TemplateDbOid',
descr => 'default template for new databases',
datname => 'template1', datdba => 'PGUID', encoding => 'ENCODING',
datcollate => '"LC_COLLATE"', datctype => '"LC_CTYPE"', datistemplate => 't',
datcollate => 'LC_COLLATE', datctype => 'LC_CTYPE', datistemplate => 't',
datallowconn => 't', datconnlimit => '-1', datlastsysoid => '0',
datfrozenxid => '0', datminmxid => '1', dattablespace => '1663',
datacl => '_null_' },

View File

@ -2417,7 +2417,7 @@
{ oid => '1216', descr => 'get description for table column',
proname => 'col_description', prolang => '14', procost => '100',
provolatile => 's', prorettype => 'text', proargtypes => 'oid int4',
prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'\'pg_catalog.pg_class\'\'::pg_catalog.regclass and objsubid = $2' },
prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'pg_catalog.pg_class\'::pg_catalog.regclass and objsubid = $2' },
{ oid => '1993',
descr => 'get description for object id and shared catalog name',
proname => 'shobj_description', prolang => '14', procost => '100',
@ -3483,11 +3483,11 @@
{ oid => '879', descr => 'left-pad string to length',
proname => 'lpad', prolang => '14', prorettype => 'text',
proargtypes => 'text int4',
prosrc => 'select pg_catalog.lpad($1, $2, \'\' \'\')' },
prosrc => 'select pg_catalog.lpad($1, $2, \' \')' },
{ oid => '880', descr => 'right-pad string to length',
proname => 'rpad', prolang => '14', prorettype => 'text',
proargtypes => 'text int4',
prosrc => 'select pg_catalog.rpad($1, $2, \'\' \'\')' },
prosrc => 'select pg_catalog.rpad($1, $2, \' \')' },
{ oid => '881', descr => 'trim spaces from left end of string',
proname => 'ltrim', prorettype => 'text', proargtypes => 'text',
prosrc => 'ltrim1' },
@ -6930,7 +6930,7 @@
descr => 'disk space usage for the main fork of the specified table or index',
proname => 'pg_relation_size', prolang => '14', provolatile => 'v',
prorettype => 'int8', proargtypes => 'regclass',
prosrc => 'select pg_catalog.pg_relation_size($1, \'\'main\'\')' },
prosrc => 'select pg_catalog.pg_relation_size($1, \'main\')' },
{ oid => '2332',
descr => 'disk space usage for the specified fork of a table or index',
proname => 'pg_relation_size', provolatile => 'v', prorettype => 'int8',
@ -8168,7 +8168,7 @@
{ oid => '2932', descr => 'evaluate XPath expression',
proname => 'xpath', prolang => '14', prorettype => '_xml',
proargtypes => 'text xml',
prosrc => 'select pg_catalog.xpath($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
prosrc => 'select pg_catalog.xpath($1, $2, \'{}\'::pg_catalog.text[])' },
{ oid => '2614', descr => 'test XML value against XPath expression',
proname => 'xmlexists', prorettype => 'bool', proargtypes => 'text xml',
@ -8181,7 +8181,7 @@
{ oid => '3050', descr => 'test XML value against XPath expression',
proname => 'xpath_exists', prolang => '14', prorettype => 'bool',
proargtypes => 'text xml',
prosrc => 'select pg_catalog.xpath_exists($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
prosrc => 'select pg_catalog.xpath_exists($1, $2, \'{}\'::pg_catalog.text[])' },
{ oid => '3051', descr => 'determine if a string is well formed XML',
proname => 'xml_is_well_formed', provolatile => 's', prorettype => 'bool',
proargtypes => 'text', prosrc => 'xml_is_well_formed' },