diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index c878a0ba4d..f2e545ed87 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4121,6 +4121,14 @@ cast(-44 as bit(12)) 111111010100
special meaning of underscore and percent signs in the pattern.
+
+ According to the SQL standard, omitting ESCAPE
+ means there is no escape character (rather than defaulting to a
+ backslash), and a zero-length ESCAPE value is
+ disallowed. PostgreSQL's behavior in
+ this regard is therefore slightly nonstandard.
+
+
The key word ILIKE can be used instead of
LIKE to make the match case-insensitive according
@@ -4139,9 +4147,9 @@ cast(-44 as bit(12)) 111111010100
- There is also the prefix operator ^@ and corresponding
- starts_with function which covers cases when only
- searching by beginning of the string is needed.
+ Also see the prefix operator ^@ and corresponding
+ starts_with function, which are useful in cases
+ where simply matching the beginning of a string is needed.
@@ -4172,7 +4180,7 @@ cast(-44 as bit(12)) 111111010100
It is similar to LIKE, except that it
interprets the pattern using the SQL standard's definition of a
regular expression. SQL regular expressions are a curious cross
- between LIKE notation and common regular
+ between LIKE notation and common (POSIX) regular
expression notation.
@@ -4256,18 +4264,38 @@ cast(-44 as bit(12)) 111111010100
- As with LIKE, a backslash disables the special meaning
- of any of these metacharacters; or a different escape character can
- be specified with ESCAPE.
+ As with LIKE, a backslash disables the special
+ meaning of any of these metacharacters. A different escape character
+ can be specified with ESCAPE, or the escape
+ capability can be disabled by writing ESCAPE ''.
+
+
+
+ According to the SQL standard, omitting ESCAPE
+ means there is no escape character (rather than defaulting to a
+ backslash), and a zero-length ESCAPE value is
+ disallowed. PostgreSQL's behavior in
+ this regard is therefore slightly nonstandard.
+
+
+
+ Another nonstandard extension is that following the escape character
+ with a letter or digit provides access to the escape sequences
+ defined for POSIX regular expressions; see
+ ,
+ , and
+ below.
Some examples:
-'abc' SIMILAR TO 'abc' true
-'abc' SIMILAR TO 'a' false
-'abc' SIMILAR TO '%(b|d)%' true
-'abc' SIMILAR TO '(b|c)%' false
+'abc' SIMILAR TO 'abc' true
+'abc' SIMILAR TO 'a' false
+'abc' SIMILAR TO '%(b|d)%' true
+'abc' SIMILAR TO '(b|c)%' false
+'-abc-' SIMILAR TO '%\mabc\M%' true
+'xabcy' SIMILAR TO '%\mabc\M%' false
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c97bb367f8..a954acf509 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -13073,15 +13073,15 @@ a_expr: c_expr { $$ = $1; }
| a_expr SIMILAR TO a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"),
- list_make2($4, makeNullAConst(-1)),
+ FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ list_make1($4),
@2);
$$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "~",
$1, (Node *) n, @2);
}
| a_expr SIMILAR TO a_expr ESCAPE a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"),
+ FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
list_make2($4, $6),
@2);
$$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "~",
@@ -13089,15 +13089,15 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA SIMILAR TO a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"),
- list_make2($5, makeNullAConst(-1)),
+ FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ list_make1($5),
@2);
$$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "!~",
$1, (Node *) n, @2);
}
| a_expr NOT_LA SIMILAR TO a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_escape"),
+ FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
list_make2($5, $7),
@2);
$$ = (Node *) makeSimpleA_Expr(AEXPR_SIMILAR, "!~",
@@ -14323,9 +14323,9 @@ subquery_Op:
| NOT_LA ILIKE
{ $$ = list_make1(makeString("!~~*")); }
/* cannot put SIMILAR TO here, because SIMILAR TO is a hack.
- * the regular expression is preprocessed by a function (similar_escape),
+ * the regular expression is preprocessed by a function (similar_to_escape),
* and the ~ operator for posix regular expressions is used.
- * x SIMILAR TO y -> x ~ similar_escape(y)
+ * x SIMILAR TO y -> x ~ similar_to_escape(y)
* this transformation is made on the fly by the parser upwards.
* however the SubLink structure which handles any/some/all stuff
* is not ready for such a thing.
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 90a9197792..3d38aef820 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -654,15 +654,18 @@ textregexreplace(PG_FUNCTION_ARGS)
}
/*
- * similar_escape()
- * Convert a SQL:2008 regexp pattern to POSIX style, so it can be used by
- * our regexp engine.
+ * similar_to_escape(), similar_escape()
+ *
+ * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be
+ * used by our regexp engine.
+ *
+ * similar_escape_internal() is the common workhorse for three SQL-exposed
+ * functions. esc_text can be passed as NULL to select the default escape
+ * (which is '\'), or as an empty string to select no escape character.
*/
-Datum
-similar_escape(PG_FUNCTION_ARGS)
+static text *
+similar_escape_internal(text *pat_text, text *esc_text)
{
- text *pat_text;
- text *esc_text;
text *result;
char *p,
*e,
@@ -673,13 +676,9 @@ similar_escape(PG_FUNCTION_ARGS)
bool incharclass = false;
int nquotes = 0;
- /* This function is not strict, so must test explicitly */
- if (PG_ARGISNULL(0))
- PG_RETURN_NULL();
- pat_text = PG_GETARG_TEXT_PP(0);
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
- if (PG_ARGISNULL(1))
+ if (esc_text == NULL)
{
/* No ESCAPE clause provided; default to backslash as escape */
e = "\\";
@@ -687,12 +686,11 @@ similar_escape(PG_FUNCTION_ARGS)
}
else
{
- esc_text = PG_GETARG_TEXT_PP(1);
e = VARDATA_ANY(esc_text);
elen = VARSIZE_ANY_EXHDR(esc_text);
if (elen == 0)
e = NULL; /* no escape character */
- else
+ else if (elen > 1)
{
int escape_mblen = pg_mbstrlen_with_len(e, elen);
@@ -898,6 +896,65 @@ similar_escape(PG_FUNCTION_ARGS)
SET_VARSIZE(result, r - ((char *) result));
+ return result;
+}
+
+/*
+ * similar_to_escape(pattern, escape)
+ */
+Datum
+similar_to_escape_2(PG_FUNCTION_ARGS)
+{
+ text *pat_text = PG_GETARG_TEXT_PP(0);
+ text *esc_text = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ result = similar_escape_internal(pat_text, esc_text);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * similar_to_escape(pattern)
+ * Inserts a default escape character.
+ */
+Datum
+similar_to_escape_1(PG_FUNCTION_ARGS)
+{
+ text *pat_text = PG_GETARG_TEXT_PP(0);
+ text *result;
+
+ result = similar_escape_internal(pat_text, NULL);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * similar_escape(pattern, escape)
+ *
+ * Legacy function for compatibility with views stored using the
+ * pre-v13 expansion of SIMILAR TO. Unlike the above functions, this
+ * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL".
+ */
+Datum
+similar_escape(PG_FUNCTION_ARGS)
+{
+ text *pat_text;
+ text *esc_text;
+ text *result;
+
+ /* This function is not strict, so must test explicitly */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat_text = PG_GETARG_TEXT_PP(0);
+
+ if (PG_ARGISNULL(1))
+ esc_text = NULL; /* use default escape character */
+ else
+ esc_text = PG_GETARG_TEXT_PP(1);
+
+ result = similar_escape_internal(pat_text, esc_text);
+
PG_RETURN_TEXT_P(result);
}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 80def7d401..00cc71dcd1 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201908012
+#define CATALOG_VERSION_NO 201909071
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cf1f409351..e6645f139c 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3346,9 +3346,15 @@
proname => 'repeat', prorettype => 'text', proargtypes => 'text int4',
prosrc => 'repeat' },
-{ oid => '1623', descr => 'convert SQL99 regexp pattern to POSIX style',
+{ oid => '1623', descr => 'convert SQL regexp pattern to POSIX style',
proname => 'similar_escape', proisstrict => 'f', prorettype => 'text',
proargtypes => 'text text', prosrc => 'similar_escape' },
+{ oid => '1986', descr => 'convert SQL regexp pattern to POSIX style',
+ proname => 'similar_to_escape', prorettype => 'text',
+ proargtypes => 'text text', prosrc => 'similar_to_escape_2' },
+{ oid => '1987', descr => 'convert SQL regexp pattern to POSIX style',
+ proname => 'similar_to_escape', prorettype => 'text', proargtypes => 'text',
+ prosrc => 'similar_to_escape_1' },
{ oid => '1624',
proname => 'mul_d_interval', prorettype => 'interval',
@@ -5771,10 +5777,10 @@
{ oid => '2073', descr => 'extract text matching regular expression',
proname => 'substring', prorettype => 'text', proargtypes => 'text text',
prosrc => 'textregexsubstr' },
-{ oid => '2074', descr => 'extract text matching SQL99 regular expression',
+{ oid => '2074', descr => 'extract text matching SQL regular expression',
proname => 'substring', prolang => 'sql', prorettype => 'text',
proargtypes => 'text text text',
- prosrc => 'select pg_catalog.substring($1, pg_catalog.similar_escape($2, $3))' },
+ prosrc => 'select pg_catalog.substring($1, pg_catalog.similar_to_escape($2, $3))' },
{ oid => '2075', descr => 'convert int8 to bitstring',
proname => 'bit', prorettype => 'bit', proargtypes => 'int8 int4',
@@ -10554,8 +10560,7 @@
proparallel => 'r', prorettype => 'void', proargtypes => '',
prosrc => 'pg_replication_origin_xact_reset' },
-{ oid => '6012',
- descr => 'advance replication origin to specific location',
+{ oid => '6012', descr => 'advance replication origin to specific location',
proname => 'pg_replication_origin_advance', provolatile => 'v',
proparallel => 'u', prorettype => 'void', proargtypes => 'text pg_lsn',
prosrc => 'pg_replication_origin_advance' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 486c00b3b3..2483966576 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -410,7 +410,56 @@ SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
cde
(1 row)
--- PostgreSQL extension to allow using back reference in replace string;
+-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
+SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
+ true
+------
+ t
+(1 row)
+
+SELECT 'abcdefg' SIMILAR TO 'bcd%' AS false;
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '#' AS false;
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abcd%' SIMILAR TO '_bcd#%' ESCAPE '#' AS true;
+ true
+------
+ t
+(1 row)
+
+-- Postgres uses '\' as the default escape character, which is not per spec
+SELECT 'abcdefg' SIMILAR TO '_bcd\%' AS false;
+ false
+-------
+ f
+(1 row)
+
+-- and an empty string to mean "no escape", which is also not per spec
+SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
+ true
+------
+ t
+(1 row)
+
+-- these behaviors are per spec, though:
+SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
+ null
+------
+
+(1 row)
+
+SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
+ERROR: invalid escape string
+HINT: Escape string must be empty or one character.
+-- Test back reference in regexp_replace
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
regexp_replace
----------------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 5744c9f800..b5e75c344f 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -144,7 +144,20 @@ SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
-- With a parenthesized subexpression, return only what matches the subexpr
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
--- PostgreSQL extension to allow using back reference in replace string;
+-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
+SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
+SELECT 'abcdefg' SIMILAR TO 'bcd%' AS false;
+SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '#' AS false;
+SELECT 'abcd%' SIMILAR TO '_bcd#%' ESCAPE '#' AS true;
+-- Postgres uses '\' as the default escape character, which is not per spec
+SELECT 'abcdefg' SIMILAR TO '_bcd\%' AS false;
+-- and an empty string to mean "no escape", which is also not per spec
+SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
+-- these behaviors are per spec, though:
+SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
+SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
+
+-- Test back reference in regexp_replace
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
SELECT regexp_replace('AAA BBB CCC ', E'\\s+', ' ', 'g');
SELECT regexp_replace('AAA', '^|$', 'Z', 'g');