Rationalize handling of single and double quotes in bootstrap data.

Change things around so that proper quoting of values interpolated into the BKI data by initdb is the responsibility of initdb, not something we half-heartedly handle by putting double quotes into the raw BKI data. (Note: experimentation shows that it still doesn't work to put a double quote into the initial superuser username, but that's the fault of inadequate quoting while interpolating the name into SQL scripts; the BKI aspect of it works fine now.) Having done that, we can remove the special-case handling of values that look like "something" from genbki.pl, and instead teach it to escape double --- and single --- quotes properly. This removes the nowhere-documented need to treat those specially in the BKI source data; whatever you write will be passed through unchanged into the inserted data value, modulo Perl's rules about single-quoted strings. Add documentation explaining the (pre-existing) handling of backslashes in the BKI data. Per an earlier discussion with John Naylor. Discussion: https://postgr.es/m/CAJVSVGUNao=-Q2-vAN3PYcdF5tnL5JAHwGwzZGuYHtq+Mk_9ng@mail.gmail.com
2018-04-17 19:53:50 -04:00 · 2018-04-17 19:53:50 -04:00 · 55d26ff638
parent 9ffcccdb95
commit 55d26ff638
6 changed files with 89 additions and 38 deletions
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@ -184,13 +184,11 @@
 <programlisting>
 [

-# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
-# that might contain non-word characters, so we must double-quote them.
-
+# A comment could appear here.
 { oid =&gt; '1', oid_symbol =&gt; 'TemplateDbOid',
  descr =&gt; 'database\'s default template',
  datname =&gt; 'template1', datdba =&gt; 'PGUID', encoding =&gt; 'ENCODING',
-  datcollate =&gt; '"LC_COLLATE"', datctype =&gt; '"LC_CTYPE"', datistemplate =&gt; 't',
+  datcollate =&gt; 'LC_COLLATE', datctype =&gt; 'LC_CTYPE', datistemplate =&gt; 't',
  datallowconn =&gt; 't', datconnlimit =&gt; '-1', datlastsysoid =&gt; '0',
  datfrozenxid =&gt; '0', datminmxid =&gt; '1', dattablespace =&gt; '1663',
  datacl =&gt; '_null_' },
@ -234,10 +232,16 @@

    <listitem>
     <para>
-      All values must be single-quoted.  Escape single quotes used within
-      a value with a backslash.  (Backslashes meant as data need not be
-      doubled, however; this follows Perl's rules for simple quoted
-      literals.)
+      All values must be single-quoted.  Escape single quotes used within a
+      value with a backslash.  Backslashes meant as data can, but need not,
+      be doubled; this follows Perl's rules for simple quoted literals.
+      Note that backslashes appearing as data will be treated as escapes by
+      the bootstrap scanner, according to the same rules as for escape string
+      constants (see <xref linkend="sql-syntax-strings-escape"/>); for
+      example <literal>\t</literal> converts to a tab character.  If you
+      actually want a backslash in the final value, you will need to write
+      four of them: Perl strips two, leaving <literal>\\</literal> for the
+      bootstrap scanner to see.
     </para>
    </listitem>

@ -247,15 +251,6 @@
     </para>
    </listitem>

-    <listitem>
-     <para>
-      If a value is a macro to be expanded
-      by <application>initdb</application>, it should also contain double
-      quotes as shown above, unless we know that no special characters can
-      appear within the string that will be substituted.
-     </para>
-    </listitem>
-
    <listitem>
     <para>
      Comments are preceded by <literal>#</literal>, and must be on their
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@ -660,12 +660,19 @@ sub print_bki_insert
 		# since that represents a NUL char in C code.
 		$bki_value = '' if $bki_value eq '\0';

+		# Handle single quotes by doubling them, and double quotes by
+		# converting them to octal escapes, because that's what the
+		# bootstrap scanner requires.  We do not process backslashes
+		# specially; this allows escape-string-style backslash escapes
+		# to be used in catalog data.
+		$bki_value =~ s/'/''/g;
+		$bki_value =~ s/"/\\042/g;
+
 		# Quote value if needed.  We need not quote values that satisfy
 		# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
 		$bki_value = sprintf(qq'"%s"', $bki_value)
-		  if $bki_value !~ /^"[^"]+"$/
-		  and ( length($bki_value) == 0
-				or $bki_value =~ /[^-A-Za-z0-9_]/);
+		  if length($bki_value) == 0
+			 or $bki_value =~ /[^-A-Za-z0-9_]/;

 		push @bki_values, $bki_value;
 	}
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@ -265,6 +265,7 @@ static void make_postgres(FILE *cmdfd);
 static void trapsig(int signum);
 static void check_ok(void);
 static char *escape_quotes(const char *src);
+static char *escape_quotes_bki(const char *src);
 static int	locale_date_order(const char *locale);
 static void check_locale_name(int category, const char *locale,
 				  char **canonname);
@ -324,6 +325,10 @@ do { \
 		output_failed = true, output_errno = errno; \
 } while (0)

+/*
+ * Escape single quotes and backslashes, suitably for insertions into
+ * configuration files or SQL E'' strings.
+ */
 static char *
 escape_quotes(const char *src)
 {
@ -337,6 +342,52 @@ escape_quotes(const char *src)
 	return result;
 }

+/*
+ * Escape a field value to be inserted into the BKI data.
+ * Here, we first run the value through escape_quotes (which
+ * will be inverted by the backend's scanstr() function) and
+ * then overlay special processing of double quotes, which
+ * bootscanner.l will only accept as data if converted to octal
+ * representation ("\042").  We always wrap the value in double
+ * quotes, even if that isn't strictly necessary.
+ */
+static char *
+escape_quotes_bki(const char *src)
+{
+	char	   *result;
+	char	   *data = escape_quotes(src);
+	char	   *resultp;
+	char	   *datap;
+	int			nquotes = 0;
+
+	/* count double quotes in data */
+	datap = data;
+	while ((datap = strchr(datap, '"')) != NULL)
+	{
+		nquotes++;
+		datap++;
+	}
+
+	result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
+	resultp = result;
+	*resultp++ = '"';
+	for (datap = data; *datap; datap++)
+	{
+		if (*datap == '"')
+		{
+			strcpy(resultp, "\\042");
+			resultp += 4;
+		}
+		else
+			*resultp++ = *datap;
+	}
+	*resultp++ = '"';
+	*resultp = '\0';
+
+	free(data);
+	return result;
+}
+
 /*
 * make a copy of the array of lines, with token replaced by replacement
 * the first time it occurs on each line.
@ -1368,13 +1419,17 @@ bootstrap_template1(void)
 	bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL",
 							  FLOAT8PASSBYVAL ? "true" : "false");

-	bki_lines = replace_token(bki_lines, "POSTGRES", escape_quotes(username));
+	bki_lines = replace_token(bki_lines, "POSTGRES",
+							  escape_quotes_bki(username));

-	bki_lines = replace_token(bki_lines, "ENCODING", encodingid_to_string(encodingid));
+	bki_lines = replace_token(bki_lines, "ENCODING",
+							  encodingid_to_string(encodingid));

-	bki_lines = replace_token(bki_lines, "LC_COLLATE", escape_quotes(lc_collate));
+	bki_lines = replace_token(bki_lines, "LC_COLLATE",
+							  escape_quotes_bki(lc_collate));

-	bki_lines = replace_token(bki_lines, "LC_CTYPE", escape_quotes(lc_ctype));
+	bki_lines = replace_token(bki_lines, "LC_CTYPE",
+							  escape_quotes_bki(lc_ctype));

 	/*
 	 * Pass correct LC_xxx environment to bootstrap.
--- a/src/include/catalog/pg_authid.dat
+++ b/src/include/catalog/pg_authid.dat
@ -12,14 +12,11 @@

 [

-# POSTGRES will be replaced at initdb time with a user choice that might
-# contain non-word characters, so we must double-quote it.
-
 # The C code typically refers to these roles using the #define symbols,
 # so make sure every entry has an oid_symbol value.

 { oid => '10', oid_symbol => 'BOOTSTRAP_SUPERUSERID',
-  rolname => '"POSTGRES"', rolsuper => 't', rolinherit => 't',
+  rolname => 'POSTGRES', rolsuper => 't', rolinherit => 't',
  rolcreaterole => 't', rolcreatedb => 't', rolcanlogin => 't',
  rolreplication => 't', rolbypassrls => 't', rolconnlimit => '-1',
  rolpassword => '_null_', rolvaliduntil => '_null_' },
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@ -12,13 +12,10 @@

 [

-# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
-# that might contain non-word characters, so we must double-quote them.
-
 { oid => '1', oid_symbol => 'TemplateDbOid',
  descr => 'default template for new databases',
  datname => 'template1', datdba => 'PGUID', encoding => 'ENCODING',
-  datcollate => '"LC_COLLATE"', datctype => '"LC_CTYPE"', datistemplate => 't',
+  datcollate => 'LC_COLLATE', datctype => 'LC_CTYPE', datistemplate => 't',
  datallowconn => 't', datconnlimit => '-1', datlastsysoid => '0',
  datfrozenxid => '0', datminmxid => '1', dattablespace => '1663',
  datacl => '_null_' },
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@ -2417,7 +2417,7 @@
 { oid => '1216', descr => 'get description for table column',
  proname => 'col_description', prolang => '14', procost => '100',
  provolatile => 's', prorettype => 'text', proargtypes => 'oid int4',
-  prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'\'pg_catalog.pg_class\'\'::pg_catalog.regclass and objsubid = $2' },
+  prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'pg_catalog.pg_class\'::pg_catalog.regclass and objsubid = $2' },
 { oid => '1993',
  descr => 'get description for object id and shared catalog name',
  proname => 'shobj_description', prolang => '14', procost => '100',
@ -3483,11 +3483,11 @@
 { oid => '879', descr => 'left-pad string to length',
  proname => 'lpad', prolang => '14', prorettype => 'text',
  proargtypes => 'text int4',
-  prosrc => 'select pg_catalog.lpad($1, $2, \'\' \'\')' },
+  prosrc => 'select pg_catalog.lpad($1, $2, \' \')' },
 { oid => '880', descr => 'right-pad string to length',
  proname => 'rpad', prolang => '14', prorettype => 'text',
  proargtypes => 'text int4',
-  prosrc => 'select pg_catalog.rpad($1, $2, \'\' \'\')' },
+  prosrc => 'select pg_catalog.rpad($1, $2, \' \')' },
 { oid => '881', descr => 'trim spaces from left end of string',
  proname => 'ltrim', prorettype => 'text', proargtypes => 'text',
  prosrc => 'ltrim1' },
@ -6930,7 +6930,7 @@
  descr => 'disk space usage for the main fork of the specified table or index',
  proname => 'pg_relation_size', prolang => '14', provolatile => 'v',
  prorettype => 'int8', proargtypes => 'regclass',
-  prosrc => 'select pg_catalog.pg_relation_size($1, \'\'main\'\')' },
+  prosrc => 'select pg_catalog.pg_relation_size($1, \'main\')' },
 { oid => '2332',
  descr => 'disk space usage for the specified fork of a table or index',
  proname => 'pg_relation_size', provolatile => 'v', prorettype => 'int8',
@ -8168,7 +8168,7 @@
 { oid => '2932', descr => 'evaluate XPath expression',
  proname => 'xpath', prolang => '14', prorettype => '_xml',
  proargtypes => 'text xml',
-  prosrc => 'select pg_catalog.xpath($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
+  prosrc => 'select pg_catalog.xpath($1, $2, \'{}\'::pg_catalog.text[])' },

 { oid => '2614', descr => 'test XML value against XPath expression',
  proname => 'xmlexists', prorettype => 'bool', proargtypes => 'text xml',
@ -8181,7 +8181,7 @@
 { oid => '3050', descr => 'test XML value against XPath expression',
  proname => 'xpath_exists', prolang => '14', prorettype => 'bool',
  proargtypes => 'text xml',
-  prosrc => 'select pg_catalog.xpath_exists($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
+  prosrc => 'select pg_catalog.xpath_exists($1, $2, \'{}\'::pg_catalog.text[])' },
 { oid => '3051', descr => 'determine if a string is well formed XML',
  proname => 'xml_is_well_formed', provolatile => 's', prorettype => 'bool',
  proargtypes => 'text', prosrc => 'xml_is_well_formed' },