From 5e1963fb764e9cc092e0f7b58b28985c311431d9 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 22 Mar 2019 12:09:32 +0100 Subject: [PATCH] Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite Reviewed-by: Peter Geoghegan Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com --- contrib/bloom/bloom.h | 1 + contrib/bloom/blutils.c | 3 +- doc/src/sgml/catalogs.sgml | 7 + doc/src/sgml/charset.sgml | 61 +- doc/src/sgml/citext.sgml | 21 + doc/src/sgml/func.sgml | 6 + doc/src/sgml/ref/create_collation.sgml | 22 + src/backend/access/hash/hashfunc.c | 100 ++- src/backend/access/spgist/spgtextproc.c | 3 +- src/backend/catalog/pg_collation.c | 2 + src/backend/commands/collationcmds.c | 25 +- src/backend/commands/extension.c | 6 +- src/backend/executor/execExpr.c | 4 +- src/backend/executor/execGrouping.c | 14 +- src/backend/executor/execPartition.c | 1 + src/backend/executor/execReplication.c | 5 +- src/backend/executor/nodeAgg.c | 9 +- src/backend/executor/nodeGroup.c | 1 + src/backend/executor/nodeHash.c | 14 +- src/backend/executor/nodeHashjoin.c | 5 + src/backend/executor/nodeRecursiveunion.c | 1 + src/backend/executor/nodeSetOp.c | 2 + src/backend/executor/nodeSubplan.c | 14 +- src/backend/executor/nodeUnique.c | 1 + src/backend/executor/nodeWindowAgg.c | 2 + src/backend/nodes/copyfuncs.c | 7 + src/backend/nodes/outfuncs.c | 7 + src/backend/nodes/readfuncs.c | 7 + src/backend/optimizer/plan/createplan.c | 54 +- src/backend/optimizer/util/tlist.c | 25 + src/backend/partitioning/partbounds.c | 4 +- src/backend/partitioning/partprune.c | 3 +- src/backend/regex/regc_pg_locale.c | 5 + src/backend/utils/adt/arrayfuncs.c | 2 +- src/backend/utils/adt/like.c | 27 +- src/backend/utils/adt/like_support.c | 14 + src/backend/utils/adt/name.c | 38 +- src/backend/utils/adt/orderedsetaggs.c | 3 +- src/backend/utils/adt/pg_locale.c | 1 + src/backend/utils/adt/ri_triggers.c | 33 +- src/backend/utils/adt/varchar.c | 194 ++++- src/backend/utils/adt/varlena.c | 333 +++++--- src/backend/utils/cache/catcache.c | 9 +- src/backend/utils/cache/lsyscache.c | 16 + src/bin/initdb/initdb.c | 4 +- src/bin/pg_dump/pg_dump.c | 39 +- src/bin/psql/describe.c | 17 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_collation.h | 2 + src/include/executor/executor.h | 4 + src/include/executor/hashjoin.h | 1 + src/include/executor/nodeHash.h | 2 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/plannodes.h | 7 + src/include/optimizer/planmain.h | 2 +- src/include/optimizer/tlist.h | 1 + src/include/partitioning/partbounds.h | 1 + src/include/utils/lsyscache.h | 1 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 710 ++++++++++++++++++ .../regress/expected/collate.linux.utf8.out | 5 + src/test/regress/expected/collate.out | 15 + src/test/regress/expected/subselect.out | 19 + src/test/regress/sql/collate.icu.utf8.sql | 250 ++++++ src/test/regress/sql/collate.linux.utf8.sql | 7 + src/test/regress/sql/collate.sql | 5 + src/test/regress/sql/subselect.sql | 17 + src/test/subscription/Makefile | 2 + src/test/subscription/t/012_collation.pl | 103 +++ 69 files changed, 2090 insertions(+), 242 deletions(-) create mode 100644 src/test/subscription/t/012_collation.pl diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index d641361aef..7c18eaa508 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -137,6 +137,7 @@ typedef struct BloomMetaPageData typedef struct BloomState { FmgrInfo hashFn[INDEX_MAX_KEYS]; + Oid collations[INDEX_MAX_KEYS]; BloomOptions opts; /* copy of options on index's metapage */ int32 nColumns; diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6458376578..d078dfbd46 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index) fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); + state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ @@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) * different columns will be mapped into different bits because of step * above */ - hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value)); mySrand(hashVal ^ myRand()); for (j = 0; j < state->opts.bitSize[attno]; j++) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0fd792ff1a..45ed077654 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2077,6 +2077,13 @@ SCRAM-SHA-256$<iteration count>:&l default, c = libc, i = icu + + collisdeterministic + bool + + Is the collation deterministic? + + collencoding int4 diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a6143ef8a7..555d1b4ac6 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -847,11 +847,13 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE'); Note that while this system allows creating collations that ignore - case or ignore accents or similar (using - the ks key), PostgreSQL does not at the moment allow - such collations to act in a truly case- or accent-insensitive manner. Any - strings that compare equal according to the collation but are not - byte-wise equal will be sorted according to their byte values. + case or ignore accents or similar (using the + ks key), in order for such collations to act in a + truly case- or accent-insensitive manner, they also need to be declared as not + deterministic in CREATE COLLATION; + see . + Otherwise, any strings that compare equal according to the collation but + are not byte-wise equal will be sorted according to their byte values. @@ -883,6 +885,55 @@ CREATE COLLATION french FROM "fr-x-icu"; + + + Nondeterminstic Collations + + + A collation is either deterministic or + nondeterministic. A deterministic collation uses + deterministic comparisons, which means that it considers strings to be + equal only if they consist of the same byte sequence. Nondeterministic + comparison may determine strings to be equal even if they consist of + different bytes. Typical situations include case-insensitive comparison, + accent-insensitive comparison, as well as comparion of strings in + different Unicode normal forms. It is up to the collation provider to + actually implement such insensitive comparisons; the deterministic flag + only determines whether ties are to be broken using bytewise comparison. + See also Unicode Technical + Standard 10 for more information on the terminology. + + + + To create a nondeterministic collation, specify the property + deterministic = false to CREATE + COLLATION, for example: + +CREATE COLLATION ndcoll (provider = icu, locale = 'und', deterministic = false); + + This example would use the standard Unicode collation in a + nondeterministic way. In particular, this would allow strings in + different normal forms to be compared correctly. More interesting + examples make use of the ICU customization facilities explained above. + For example: + +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + + + + + All standard and predefined collations are deterministic, all + user-defined collations are deterministic by default. While + nondeterministic collations give a more correct behavior, + especially when considering the full power of Unicode and its many + special cases, they also have some drawbacks. Foremost, their use leads + to a performance penalty. Also, certain operations are not possible with + nondeterministic collations, such as pattern matching operations. + Therefore, they should be used only in cases where they are specifically + wanted. + + diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml index b1fe7101b2..85aa339d8b 100644 --- a/doc/src/sgml/citext.sgml +++ b/doc/src/sgml/citext.sgml @@ -14,6 +14,16 @@ exactly like text. + + + Consider using nondeterministic collations (see + ) instead of this module. They + can be used for case-insensitive comparisons, accent-insensitive + comparisons, and other combinations, and they handle more Unicode special + cases correctly. + + + Rationale @@ -246,6 +256,17 @@ SELECT * FROM users WHERE nick = 'Larry'; will be invoked instead. + + + + The approach of lower-casing strings for comparison does not handle some + Unicode special cases correctly, for example when one upper-case letter + has two lower-case letter equivalents. Unicode distinguishes between + case mapping and case + folding for this reason. Use nondeterministic collations + instead of citext to handle that correctly. + + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3a99e209a2..1a01473291 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -4065,6 +4065,12 @@ cast(-44 as bit(12)) 111111010100 + + The pattern matching operators of all three kinds do not support + nondeterministic collations. If required, apply a different collation to + the expression to work around this limitation. + + <function>LIKE</function> diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 038797fce1..def4dda6e8 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -23,6 +23,7 @@ CREATE COLLATION [ IF NOT EXISTS ] name ( [ LC_COLLATE = lc_collate, ] [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] + [ DETERMINISTIC = boolean, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -124,6 +125,27 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM + + DETERMINISTIC + + + + Specifies whether the collation should use deterministic comparisons. + The default is true. A deterministic comparison considers strings that + are not byte-wise equal to be unequal even if they are considered + logically equal by the comparison. PostgreSQL breaks ties using a + byte-wise comparison. Comparison that is not deterministic can make the + collation be, say, case- or accent-insensitive. For that, you need to + choose an appropriate LC_COLLATE setting + and set the collation to not deterministic here. + + + + Nondeterministic collations are only supported with the ICU provider. + + + + version diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index e5f3d42e04..0bf15ae723 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,8 +27,10 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" #include "utils/hashutils.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -243,15 +245,51 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* - * Note: this is currently identical in behavior to hashvarlena, but keep - * it as a separate function in case we someday want to do something - * different in non-C locales. (See also hashbpchar, if so.) - */ - result = hash_any((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -263,12 +301,52 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* Same approach as hashtext */ - result = hash_any_extended((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), - PG_GETARG_INT64(1)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } PG_FREE_IF_COPY(key, 0); diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c index 39cd391529..d22998c54b 100644 --- a/src/backend/access/spgist/spgtextproc.c +++ b/src/backend/access/spgist/spgtextproc.c @@ -630,7 +630,8 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS) * query (prefix) string, so we don't need to check it again. */ res = (level >= queryLen) || - DatumGetBool(DirectFunctionCall2(text_starts_with, + DatumGetBool(DirectFunctionCall2Coll(text_starts_with, + PG_GET_COLLATION(), out->leafValue, PointerGetDatum(query))); diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 74e1e82cb9..dd99d53547 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -46,6 +46,7 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, @@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); + values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index ed3f1c12e5..919e092483 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; + bool collisdeterministic = true; int collencoding = 0; char collprovider = 0; char *collversion = NULL; @@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lcctypeEl; else if (strcmp(defel->defname, "provider") == 0) defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; ReleaseSysCache(tp); @@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (providerEl) collproviderstr = defGetString(providerEl); + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + if (versionEl) collversion = defGetString(versionEl); @@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a difference. + * So we can save writing the code for the other providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + if (!fromEl) { if (collprovider == COLLPROVIDER_ICU) @@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collNamespace, GetUserId(), collprovider, + collisdeterministic, collencoding, collcollate, collctype, @@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); @@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); @@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), - COLLPROVIDER_ICU, -1, + COLLPROVIDER_ICU, true, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index daf3f51636..d4723fced8 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -901,7 +901,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control, { const char *qSchemaName = quote_identifier(schemaName); - t_sql = DirectFunctionCall3(replace_text, + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, t_sql, CStringGetTextDatum("@extschema@"), CStringGetTextDatum(qSchemaName)); @@ -913,7 +914,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control, */ if (control->module_pathname) { - t_sql = DirectFunctionCall3(replace_text, + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, t_sql, CStringGetTextDatum("MODULE_PATHNAME"), CStringGetTextDatum(control->module_pathname)); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 7cbf9d3bc1..0fb31f5c3d 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3317,6 +3317,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent) { ExprState *state = makeNode(ExprState); @@ -3377,6 +3378,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1); Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1); Oid foid = eqfunctions[natt]; + Oid collid = collations[natt]; FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; @@ -3394,7 +3396,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, - InvalidOid, NULL, NULL); + collid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 417e971ec8..14ee8db3f9 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -60,6 +60,7 @@ execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent) { Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); @@ -75,7 +76,7 @@ execTuplesMatchPrepare(TupleDesc desc, /* build actual expression */ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, - numCols, keyColIdx, eqFunctions, + numCols, keyColIdx, eqFunctions, collations, parent); return expr; @@ -155,6 +156,7 @@ BuildTupleHashTableExt(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, @@ -177,6 +179,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; + hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; @@ -212,7 +215,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, - keyColIdx, eqfuncoids, + keyColIdx, eqfuncoids, collations, NULL); /* @@ -240,6 +243,7 @@ BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, @@ -250,6 +254,7 @@ BuildTupleHashTable(PlanState *parent, numCols, keyColIdx, eqfuncoids, hashfunctions, + collations, nbuckets, additionalsize, tablecxt, tablecxt, @@ -421,8 +426,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], - attr)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + hashtable->tab_collations[i], + attr)); hashkey ^= hkey; } } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 37e96a6013..cfad8a38f0 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1246,6 +1246,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, + key->partcollation, values, isnull); part_index = boundinfo->indexes[rowHash % greatest_modulus]; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 95dfc4987d..c539bb5a3f 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -96,6 +96,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, regop, searchslot->tts_values[mainattno - 1]); + skey[attoff].sk_collation = idxrel->rd_indcollation[attoff]; + /* Check for null value. */ if (searchslot->tts_isnull[mainattno - 1]) { @@ -262,7 +264,8 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2) errmsg("could not identify an equality operator for type %s", format_type_be(att->atttypid)))); - if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, + if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo, + att->attcollation, slot1->tts_values[attrnum], slot2->tts_values[attrnum]))) return false; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index bae7989a42..47161afbd4 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -746,15 +746,14 @@ process_ordered_aggregate_single(AggState *aggstate, /* * If DISTINCT mode, and not distinct from prior, skip it. - * - * Note: we assume equality functions don't care about collation. */ if (isDistinct && haveOldVal && ((oldIsNull && *isNull) || (!oldIsNull && !*isNull && oldAbbrevVal == newAbbrevVal && - DatumGetBool(FunctionCall2(&pertrans->equalfnOne, + DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne, + pertrans->aggCollation, oldVal, *newVal))))) { /* equal to prior, so forget this one */ @@ -1287,6 +1286,7 @@ build_hash_table(AggState *aggstate) perhash->hashGrpColIdxHash, perhash->eqfuncoids, perhash->hashfunctions, + perhash->aggnode->grpCollations, perhash->aggnode->numGroups, additionalsize, aggstate->ss.ps.state->es_query_cxt, @@ -2381,6 +2381,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) length, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } @@ -2392,6 +2393,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggnode->numCols, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } } @@ -3155,6 +3157,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, numDistinctCols, pertrans->sortColIdx, ops, + pertrans->sortCollations, &aggstate->ss.ps); pfree(ops); } diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 655084d7b5..05f1d33150 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags) node->numCols, node->grpColIdx, node->grpOperators, + node->grpCollations, &grpstate->ss.ps); return grpstate; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 856daf6a7f..64eec91f8b 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -425,7 +425,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) +ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) { Hash *node; HashJoinTable hashtable; @@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) int nkeys; int i; ListCell *ho; + ListCell *hc; MemoryContext oldcxt; /* @@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); + hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid)); i = 0; - foreach(ho, hashOperators) + forboth(ho, hashOperators, hc, hashCollations) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; @@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); + hashtable->collations[i] = lfirst_oid(hc); i++; } @@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable, /* Compute the hash function */ uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); hashkey ^= hkey; } @@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], - sslot.values[i])); + hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], + hashtable->collations[0], + sslot.values[i])); /* * While we have not hit a hole in the hashtable and have not hit diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 2098708864..aa43296e26 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) */ hashtable = ExecHashTableCreate(hashNode, node->hj_HashOperators, + node->hj_Collations, HJ_FILL_INNER(node)); node->hj_HashTable = hashtable; @@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) List *rclauses; List *rhclauses; List *hoperators; + List *hcollations; TupleDesc outerDesc, innerDesc; ListCell *l; @@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rclauses = NIL; rhclauses = NIL; hoperators = NIL; + hcollations = NIL; foreach(l, node->hashclauses) { OpExpr *hclause = lfirst_node(OpExpr, l); @@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args), innerPlanState(hjstate))); hoperators = lappend_oid(hoperators, hclause->opno); + hcollations = lappend_oid(hcollations, hclause->inputcollid); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; + hjstate->hj_Collations = hcollations; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses; diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 9b74ed3208..9c5eed7def 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate) node->dupColIdx, rustate->eqfuncoids, rustate->hashfunctions, + node->dupCollations, node->numGroups, 0, rustate->ps.state->es_query_cxt, diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 26aeaee083..044246aa09 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate) node->dupColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, + node->dupCollations, node->numGroups, 0, setopstate->ps.state->es_query_cxt, @@ -554,6 +555,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) node->numCols, node->dupColIdx, node->dupOperators, + node->dupCollations, &setopstate->ps); if (node->strategy == SETOP_HASHED) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index d7d076758c..749b4eced3 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -514,6 +514,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->planstate->state->es_query_cxt, @@ -541,6 +542,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->planstate->state->es_query_cxt, @@ -642,6 +644,7 @@ execTuplesUnequal(TupleTableSlot *slot1, int numCols, AttrNumber *matchColIdx, FmgrInfo *eqfunctions, + const Oid *collations, MemoryContext evalContext) { MemoryContext oldContext; @@ -679,8 +682,8 @@ execTuplesUnequal(TupleTableSlot *slot1, continue; /* can't prove anything here */ /* Apply the type-specific equality function */ - - if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i], + collations[i], attr1, attr2))) { result = true; /* they are unequal */ @@ -722,6 +725,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot, if (!execTuplesUnequal(slot, hashtable->tableslot, numCols, keyColIdx, eqfunctions, + hashtable->tab_collations, hashtable->tempcxt)) { TermTupleHashIterator(&hashiter); @@ -817,6 +821,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_eq_funcs = NULL; + sstate->tab_collations = NULL; sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; @@ -915,6 +920,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); i = 1; @@ -965,6 +971,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); + /* Set collation */ + sstate->tab_collations[i - 1] = opexpr->inputcollid; + i++; } @@ -1001,6 +1010,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ncols, sstate->keyColIdx, sstate->tab_eq_funcoids, + sstate->tab_collations, parent); } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index ad7039937d..c553f150b8 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) node->numCols, node->uniqColIdx, node->uniqOperators, + node->uniqCollations, &uniquestate->ps); return uniquestate; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 157ac042b8..b090828c01 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->partNumCols, node->partColIdx, node->partOperators, + node->partCollations, &winstate->ss.ps); if (node->ordNumCols > 0) @@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->ordNumCols, node->ordColIdx, node->ordOperators, + node->ordCollations, &winstate->ss.ps); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index c68bd7bcf7..1ea6b84561 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from) { COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); @@ -956,6 +957,7 @@ _copyGroup(const Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -977,6 +979,7 @@ _copyAgg(const Agg *from) { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); COPY_BITMAPSET_FIELD(aggParams); @@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from) { COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(ordNumCols); if (from->ordNumCols > 0) { COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); @@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 69179a07c3..910a738c20 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_LONG_FIELD(numGroups); } @@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); WRITE_LONG_FIELD(numGroups); WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); @@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(partNumCols); WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols); WRITE_OID_ARRAY(partOperators, node->partNumCols); + WRITE_OID_ARRAY(partCollations, node->partNumCols); WRITE_INT_FIELD(ordNumCols); WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols); WRITE_OID_ARRAY(ordOperators, node->ordNumCols); + WRITE_OID_ARRAY(ordCollations, node->ordNumCols); WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); @@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); } static void @@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols); WRITE_OID_ARRAY(uniqOperators, node->numCols); + WRITE_OID_ARRAY(uniqCollations, node->numCols); } static void @@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_INT_FIELD(flagColIdx); WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 4b845b1bb7..eff98febf1 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1677,6 +1677,7 @@ _readRecursiveUnion(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_DONE(); @@ -2143,6 +2144,7 @@ _readGroup(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_DONE(); } @@ -2162,6 +2164,7 @@ _readAgg(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); @@ -2184,9 +2187,11 @@ _readWindowAgg(void) READ_INT_FIELD(partNumCols); READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols); READ_OID_ARRAY(partOperators, local_node->partNumCols); + READ_OID_ARRAY(partCollations, local_node->partNumCols); READ_INT_FIELD(ordNumCols); READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols); READ_OID_ARRAY(ordOperators, local_node->ordNumCols); + READ_OID_ARRAY(ordCollations, local_node->ordNumCols); READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); @@ -2212,6 +2217,7 @@ _readUnique(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols); READ_OID_ARRAY(uniqOperators, local_node->numCols); + READ_OID_ARRAY(uniqCollations, local_node->numCols); READ_DONE(); } @@ -2290,6 +2296,7 @@ _readSetOp(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_INT_FIELD(flagColIdx); READ_INT_FIELD(firstFlag); READ_LONG_FIELD(numGroups); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9fbe5b2a5f..93c56c657c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -260,14 +260,14 @@ static Sort *make_sort_from_groupcols(List *groupcls, Plan *lefttree); static Material *make_material(Plan *lefttree); static WindowAgg *make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, - AttrNumber *grpColIdx, Oid *grpOperators, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, @@ -1387,6 +1387,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) bool newitems; int numGroupCols; AttrNumber *groupColIdx; + Oid *groupCollations; int groupColPos; ListCell *l; @@ -1453,6 +1454,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); groupColPos = 0; foreach(l, uniq_exprs) @@ -1463,7 +1465,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) tle = tlist_member(uniqexpr, newtlist); if (!tle) /* shouldn't happen */ elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos++] = tle->resno; + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; } if (best_path->umethod == UNIQUE_PATH_HASH) @@ -1501,6 +1505,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) numGroupCols, groupColIdx, groupOperators, + groupCollations, NIL, NIL, best_path->path.rows, @@ -1883,6 +1888,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1949,6 +1956,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), NIL, NIL, best_path->numGroups, @@ -2110,6 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, NIL, rollup->numGroups, @@ -2147,6 +2157,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, chain, rollup->numGroups, @@ -2246,9 +2257,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) int partNumCols; AttrNumber *partColIdx; Oid *partOperators; + Oid *partCollations; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + Oid *ordCollations; ListCell *lc; /* @@ -2270,6 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) */ partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); partNumCols = 0; foreach(lc, wc->partitionClause) @@ -2280,11 +2294,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); partColIdx[partNumCols] = tle->resno; partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); partNumCols++; } ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); ordNumCols = 0; foreach(lc, wc->orderClause) @@ -2295,6 +2311,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); ordColIdx[ordNumCols] = tle->resno; ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); ordNumCols++; } @@ -2304,9 +2321,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) partNumCols, partColIdx, partOperators, + partCollations, ordNumCols, ordColIdx, ordOperators, + ordCollations, wc->frameOptions, wc->startOffset, wc->endOffset, @@ -5326,10 +5345,12 @@ make_recursive_union(List *tlist, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -5339,11 +5360,13 @@ make_recursive_union(List *tlist, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; } node->numGroups = numGroups; @@ -6015,7 +6038,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree) { @@ -6031,6 +6054,7 @@ make_agg(List *tlist, List *qual, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; node->numGroups = numGroups; node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; @@ -6046,8 +6070,8 @@ make_agg(List *tlist, List *qual, static WindowAgg * make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, @@ -6060,9 +6084,11 @@ make_windowagg(List *tlist, Index winref, node->partNumCols = partNumCols; node->partColIdx = partColIdx; node->partOperators = partOperators; + node->partCollations = partCollations; node->ordNumCols = ordNumCols; node->ordColIdx = ordColIdx; node->ordOperators = ordOperators; + node->ordCollations = ordCollations; node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; @@ -6087,6 +6113,7 @@ make_group(List *tlist, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + Oid *grpCollations, Plan *lefttree) { Group *node = makeNode(Group); @@ -6095,6 +6122,7 @@ make_group(List *tlist, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; plan->qual = qual; plan->targetlist = tlist; @@ -6118,6 +6146,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6132,6 +6161,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6140,6 +6170,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -6147,6 +6178,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6162,6 +6194,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *lc; plan->targetlist = lefttree->targetlist; @@ -6177,6 +6210,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) Assert(numCols >= 0 && numCols <= list_length(pathkeys)); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(lc, pathkeys) { @@ -6245,6 +6279,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; keyno++; } @@ -6252,6 +6287,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6296,6 +6332,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6309,6 +6346,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, */ dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6317,6 +6355,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } @@ -6326,6 +6365,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->numCols = numCols; node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; node->flagColIdx = flagColIdx; node->firstFlag = firstFlag; node->numGroups = numGroups; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 14d1c67a94..bb3b7969f2 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -503,6 +503,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + /* * extract_grouping_cols - make an array of the grouping column resnos * for a SortGroupClause list diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 5b897d50ee..803c23aaf5 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -2657,7 +2657,7 @@ get_range_nulltest(PartitionKey key) * Compute the hash value for given partition key values. */ uint64 -compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, +compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull) { int i; @@ -2678,7 +2678,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, * datatype-specific hash functions of each partition key * attribute. */ - hash = FunctionCall2(&partsupfunc[i], values[i], seed); + hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed); /* Form a single 64-bit hash value */ rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index b5c0889935..31e0164ea9 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2159,6 +2159,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, int i; uint64 rowHash; int greatest_modulus; + Oid *partcollation = context->partcollation; Assert(context->strategy == PARTITION_STRATEGY_HASH); @@ -2179,7 +2180,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, isnull[i] = bms_is_member(i, nullkeys); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); - rowHash = compute_partition_hash_value(partnatts, partsupfunc, + rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); if (partindices[rowHash % greatest_modulus] >= 0) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index a8c0b156fa..4a808b7606 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -263,6 +263,11 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } + if (pg_regex_locale && !pg_regex_locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for regular expressions"))); + #ifdef USE_ICU if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) pg_regex_strategy = PG_REGEX_LOCALE_ICU; diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index a34605ac94..9cef018c0b 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3957,7 +3957,7 @@ hash_array(PG_FUNCTION_ARGS) * apply the hash function to each array element. */ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 853c9c01e9..704e5720cf 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -45,7 +45,7 @@ static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c); -static int GenericMatchText(const char *s, int slen, const char *p, int plen); +static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation); /*-------------------- @@ -148,8 +148,18 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) /* Generic for all cases not requiring inline case-folding */ static inline int -GenericMatchText(const char *s, int slen, const char *p, int plen) +GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) { + if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID) + { + pg_locale_t locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for LIKE"))); + } + if (pg_database_encoding_max_length() == 1) return SB_MatchText(s, slen, p, plen, 0, true); else if (GetDatabaseEncoding() == PG_UTF8) @@ -184,6 +194,11 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for ILIKE"))); } /* @@ -240,7 +255,7 @@ namelike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -261,7 +276,7 @@ namenlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -282,7 +297,7 @@ textlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -303,7 +318,7 @@ textnlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 69509811ef..a65e63736c 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -257,6 +257,20 @@ match_pattern_prefix(Node *leftop, return NIL; patt = (Const *) rightop; + /* + * Not supported if the expression collation is nondeterministic. The + * optimized equality or prefix tests use bytewise comparisons, which is + * not consistent with nondeterministic collations. The actual + * pattern-matching implementation functions will later error out that + * pattern-matching is not supported with nondeterministic collations. + * (We could also error out here, but by doing it later we get more + * precise error messages.) (It should be possible to support at least + * Pattern_Prefix_Exact, but no point as along as the actual + * pattern-matching implementations don't support it.) + */ + if (!get_collation_isdeterministic(expr_coll)) + return NIL; + /* * Try to extract a fixed prefix from the pattern. */ diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index 3a7887d455..54425925ed 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -131,26 +131,6 @@ namesend(PG_FUNCTION_ARGS) * have a '\0' terminator. Whatever might be past the terminator is not * considered relevant to comparisons. */ -Datum -nameeq(PG_FUNCTION_ARGS) -{ - Name arg1 = PG_GETARG_NAME(0); - Name arg2 = PG_GETARG_NAME(1); - - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0); -} - -Datum -namene(PG_FUNCTION_ARGS) -{ - Name arg1 = PG_GETARG_NAME(0); - Name arg2 = PG_GETARG_NAME(1); - - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0); -} - static int namecmp(Name arg1, Name arg2, Oid collid) { @@ -164,6 +144,24 @@ namecmp(Name arg1, Name arg2, Oid collid) collid); } +Datum +nameeq(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0); +} + +Datum +namene(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0); +} + Datum namelt(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 2d384a9944..4db2d0d0e1 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS) last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && - DatumGetBool(FunctionCall2(equalfn, val, last_val))) + DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) @@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) numDistinctCols, sortColIdx, osastate->qstate->eqOperators, + osastate->qstate->sortCollations, NULL); MemoryContextSwitchTo(oldContext); osastate->qstate->compareTuple = compareTuple; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 7fe10e284a..6e33d65340 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1312,6 +1312,7 @@ pg_newlocale_from_collation(Oid collid) /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; + result.deterministic = collform->collisdeterministic; if (collform->collprovider == COLLPROVIDER_LIBC) { diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6d443db7e2..72f8a9d69c 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -676,6 +676,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -684,6 +686,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -778,6 +782,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -786,6 +792,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -890,6 +898,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -901,6 +911,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1065,6 +1077,8 @@ ri_set(TriggerData *trigdata, bool is_set_null) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1077,6 +1091,8 @@ ri_set(TriggerData *trigdata, bool is_set_null) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -2496,11 +2512,20 @@ ri_AttributesEqual(Oid eq_opr, Oid typeid, } /* - * Apply the comparison operator. We assume it doesn't care about - * collations. + * Apply the comparison operator. + * + * Note: This function is part of a call stack that determines whether an + * update to a row is significant enough that it needs checking or action + * on the other side of a foreign-key constraint. Therefore, the + * comparison here would need to be done with the collation of the *other* + * table. For simplicity (e.g., we might not even have the other table + * open), we'll just use the default collation here, which could lead to + * some false negatives. All this would break if we ever allow + * database-wide collations to be nondeterministic. */ - return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo, - oldvalue, newvalue)); + return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, + DEFAULT_COLLATION_OID, + oldvalue, newvalue)); } /* diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 440fc8ed66..4003631d8f 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -23,6 +23,8 @@ #include "utils/array.h" #include "utils/builtins.h" #include "utils/hashutils.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" #include "utils/varlena.h" #include "mb/pg_wchar.h" @@ -717,6 +719,22 @@ bpcharoctetlen(PG_FUNCTION_ARGS) * need to be so careful. *****************************************************************************/ +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + Datum bpchareq(PG_FUNCTION_ARGS) { @@ -725,18 +743,31 @@ bpchareq(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. - */ - if (len1 != len2) - result = false; + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = false; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + } else - result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) == 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -752,18 +783,29 @@ bpcharne(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. - */ - if (len1 != len2) - result = true; + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = true; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + } else - result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) != 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -933,23 +975,60 @@ bpchar_smaller(PG_FUNCTION_ARGS) /* * bpchar needs a specialized hash function because we want to ignore * trailing blanks in comparisons. - * - * Note: currently there is no need for locale-specific behavior here, - * but if we ever change the semantics of bpchar comparison to trust - * strcoll() completely, we'd need to do something different in non-C locales. */ Datum hashbpchar(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; + pg_locale_t mylocale = 0; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - result = hash_any((unsigned char *) keydata, keylen); + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) keydata, keylen); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, keydata, keylen); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -961,15 +1040,56 @@ Datum hashbpcharextended(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; + pg_locale_t mylocale = 0; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - result = hash_any_extended((unsigned char *) keydata, keylen, - PG_GETARG_INT64(1)); + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) keydata, keylen, + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } PG_FREE_IF_COPY(key, 0); @@ -985,12 +1105,23 @@ hashbpcharextended(PG_FUNCTION_ARGS) */ static int -internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) +internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid) { int result; int len1, len2; + check_collation_set(collid); + + /* + * see internal_text_pattern_compare() + */ + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "bpchar_pattern_ops"))); + len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); @@ -1013,7 +1144,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1029,7 +1160,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1045,7 +1176,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1061,7 +1192,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1077,7 +1208,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1090,8 +1221,17 @@ Datum btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; MemoryContext oldcontext; + check_collation_set(collid); + + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "bpchar_pattern_ops"))); + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 39c394331b..68a6e49aeb 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -122,13 +122,14 @@ static text *text_substring(Datum str, int32 length, bool length_not_specified); static text *text_overlay(text *t1, text *t2, int sp, int sl); -static int text_position(text *t1, text *t2); -static void text_position_setup(text *t1, text *t2, TextPositionState *state); +static int text_position(text *t1, text *t2, Oid collid); +static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state); static bool text_position_next(TextPositionState *state); static char *text_position_next_internal(char *start_ptr, TextPositionState *state); static char *text_position_get_match_ptr(TextPositionState *state); static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); +static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); static bytea *bytea_catenate(bytea *t1, bytea *t2); static bytea *bytea_substring(Datum str, @@ -1094,7 +1095,7 @@ textpos(PG_FUNCTION_ARGS) text *str = PG_GETARG_TEXT_PP(0); text *search_str = PG_GETARG_TEXT_PP(1); - PG_RETURN_INT32((int32) text_position(str, search_str)); + PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION())); } /* @@ -1112,7 +1113,7 @@ textpos(PG_FUNCTION_ARGS) * functions. */ static int -text_position(text *t1, text *t2) +text_position(text *t1, text *t2, Oid collid) { TextPositionState state; int result; @@ -1120,7 +1121,7 @@ text_position(text *t1, text *t2) if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1) return 0; - text_position_setup(t1, t2, &state); + text_position_setup(t1, t2, collid, &state); if (!text_position_next(&state)) result = 0; else @@ -1147,10 +1148,21 @@ text_position(text *t1, text *t2) */ static void -text_position_setup(text *t1, text *t2, TextPositionState *state) +text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) { int len1 = VARSIZE_ANY_EXHDR(t1); int len2 = VARSIZE_ANY_EXHDR(t2); + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); Assert(len1 > 0); Assert(len2 > 0); @@ -1429,6 +1441,22 @@ text_position_cleanup(TextPositionState *state) /* no cleanup needed */ } +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + /* varstr_cmp() * Comparison function for text strings with given lengths. * Includes locale support, but must copy strings to temporary memory @@ -1441,6 +1469,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) { int result; + check_collation_set(collid); + /* * Unfortunately, there is no strncoll(), so in the non-C locale case we * have to do some memory copying. This turns out to be significantly @@ -1462,20 +1492,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) pg_locale_t mylocale = 0; if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } mylocale = pg_newlocale_from_collation(collid); - } /* * memcmp() can't tell us which of two unequal strings sorts first, @@ -1558,13 +1575,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); - /* - * In some locales wcscoll() can claim that nonidentical strings - * are equal. Believing that would be bad news for a number of - * reasons, so we follow Perl's lead and sort "equal" strings - * according to strcmp (on the UTF-8 representation). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!mylocale || mylocale->deterministic)) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1649,13 +1662,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) else result = strcoll(a1p, a2p); - /* - * In some locales strcoll() can claim that nonidentical strings are - * equal. Believing that would be bad news for a number of reasons, - * so we follow Perl's lead and sort "equal" strings according to - * strcmp(). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!mylocale || mylocale->deterministic)) result = strcmp(a1p, a2p); if (a1p != a1buf) @@ -1699,33 +1708,52 @@ text_cmp(text *arg1, text *arg2, Oid collid) Datum texteq(PG_FUNCTION_ARGS) { - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); bool result; - Size len1, - len2; - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. In fact, we - * don't even have to do a bitwise comparison if we can show the lengths - * of the strings are unequal; which might save us from having to detoast - * one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. In fact, we + * don't even have to do a bitwise comparison if we can show the lengths + * of the strings are unequal; which might save us from having to detoast + * one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } else { - text *targ1 = DatumGetTextPP(arg1); - text *targ2 = DatumGetTextPP(arg2); + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); - result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), - len1 - VARHDRSZ) == 0); + result = (text_cmp(arg1, arg2, collid) == 0); - PG_FREE_IF_COPY(targ1, 0); - PG_FREE_IF_COPY(targ2, 1); + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); } PG_RETURN_BOOL(result); @@ -1734,27 +1762,46 @@ texteq(PG_FUNCTION_ARGS) Datum textne(PG_FUNCTION_ARGS) { - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); bool result; - Size len1, - len2; - /* See comment in texteq() */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* See comment in texteq() */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } else { - text *targ1 = DatumGetTextPP(arg1); - text *targ2 = DatumGetTextPP(arg2); + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); - result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), - len1 - VARHDRSZ) != 0); + result = (text_cmp(arg1, arg2, collid) != 0); - PG_FREE_IF_COPY(targ1, 0); - PG_FREE_IF_COPY(targ2, 1); + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); } PG_RETURN_BOOL(result); @@ -1825,10 +1872,22 @@ text_starts_with(PG_FUNCTION_ARGS) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; bool result; Size len1, len2; + check_collation_set(collid); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); + len1 = toast_raw_datum_size(arg1); len2 = toast_raw_datum_size(arg2); if (len2 > len1) @@ -1898,6 +1957,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) VarStringSortSupport *sss; pg_locale_t locale = 0; + check_collation_set(collid); + /* * If possible, set ssup->comparator to a function which can be used to * directly compare two datums. If we can do this, we'll avoid the @@ -1934,20 +1995,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * result. */ if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } locale = pg_newlocale_from_collation(collid); - } /* * There is a further exception on Windows. When the database @@ -2328,12 +2376,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) else result = strcoll(sss->buf1, sss->buf2); - /* - * In some locales strcoll() can claim that nonidentical strings are - * equal. Believing that would be bad news for a number of reasons, so we - * follow Perl's lead and sort "equal" strings according to strcmp(). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!sss->locale || sss->locale->deterministic)) result = strcmp(sss->buf1, sss->buf2); /* Cache result, perhaps saving an expensive strcoll() call next time */ @@ -2760,10 +2805,18 @@ nameeqtext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = (varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2777,10 +2830,18 @@ texteqname(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = (varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -2794,10 +2855,18 @@ namenetext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = !(varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2811,10 +2880,18 @@ textnename(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = !(varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -2919,12 +2996,34 @@ textgename(PG_FUNCTION_ARGS) */ static int -internal_text_pattern_compare(text *arg1, text *arg2) +internal_text_pattern_compare(text *arg1, text *arg2, Oid collid) { int result; int len1, len2; + check_collation_set(collid); + + /* + * XXX We cannot use a text_pattern_ops index for nondeterministic + * collations, because these operators intentionally ignore the collation. + * However, the planner has no way to know that, so it might choose such + * an index for an "=" clause, which would lead to wrong results. This + * check here doesn't prevent choosing the index, but it will at least + * error out if the index is chosen. A text_pattern_ops index on a column + * with nondeterministic collation is pretty useless anyway, since LIKE + * etc. won't work there either. A future possibility would be to + * annotate the operator class or its members in the catalog to avoid the + * index. Another alternative is to stay away from the *_pattern_ops + * operator classes and prefer creating LIKE-supporting indexes with + * COLLATE "C". + */ + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "text_pattern_ops"))); + len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); @@ -2947,7 +3046,7 @@ text_pattern_lt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2963,7 +3062,7 @@ text_pattern_le(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2979,7 +3078,7 @@ text_pattern_ge(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2995,7 +3094,7 @@ text_pattern_gt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3011,7 +3110,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3024,8 +3123,17 @@ Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; MemoryContext oldcontext; + check_collation_set(collid); + + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "text_pattern_ops"))); + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ @@ -4121,7 +4229,7 @@ replace_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(src_text); } - text_position_setup(src_text, from_sub_text, &state); + text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state); found = text_position_next(&state); @@ -4482,7 +4590,7 @@ split_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text("")); } - text_position_setup(inputstring, fldsep, &state); + text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); /* identify bounds of first field */ start_ptr = VARDATA_ANY(inputstring); @@ -4538,11 +4646,12 @@ split_text(PG_FUNCTION_ARGS) * Convenience function to return true when two text params are equal. */ static bool -text_isequal(text *txt1, text *txt2) +text_isequal(text *txt1, text *txt2, Oid collid) { - return DatumGetBool(DirectFunctionCall2(texteq, - PointerGetDatum(txt1), - PointerGetDatum(txt2))); + return DatumGetBool(DirectFunctionCall2Coll(texteq, + collid, + PointerGetDatum(txt1), + PointerGetDatum(txt2))); } /* @@ -4633,7 +4742,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) int lbs[1]; /* single element can be a NULL too */ - is_null = null_string ? text_isequal(inputstring, null_string) : false; + is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false; elems[0] = PointerGetDatum(inputstring); nulls[0] = is_null; @@ -4645,7 +4754,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) TEXTOID, -1, false, 'i')); } - text_position_setup(inputstring, fldsep, &state); + text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); start_ptr = VARDATA_ANY(inputstring); @@ -4673,7 +4782,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, @@ -4715,7 +4824,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 07e1cd7696..d05930bc4c 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -170,13 +170,18 @@ int4hashfast(Datum datum) static bool texteqfast(Datum a, Datum b) { - return DatumGetBool(DirectFunctionCall2(texteq, a, b)); + /* + * The use of DEFAULT_COLLATION_OID is fairly arbitrary here. We just + * want to take the fast "deterministic" path in texteq(). + */ + return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b)); } static uint32 texthashfast(Datum datum) { - return DatumGetInt32(DirectFunctionCall1(hashtext, datum)); + /* analogously here as in texteqfast() */ + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); } static bool diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index e88c45d268..59e6bcd856 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -908,6 +908,22 @@ get_collation_name(Oid colloid) return NULL; } +bool +get_collation_isdeterministic(Oid colloid) +{ + HeapTuple tp; + Form_pg_collation colltup; + bool result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", colloid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + result = colltup->collisdeterministic; + ReleaseSysCache(tp); + return result; +} + /* ---------- CONSTRAINT CACHE ---------- */ /* diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index fd50a809ea..4886090132 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1765,8 +1765,8 @@ setup_collation(FILE *cmdfd) * in pg_collation.h. But add it before reading system collations, so * that it wins if libc defines a locale named ucs_basic. */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); /* Now import all collations we can find in the operating system */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 4c98ae4d7f..63699932c1 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13417,6 +13417,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) char *qcollname; PGresult *res; int i_collprovider; + int i_collisdeterministic; int i_collcollate; int i_collctype; const char *collprovider; @@ -13434,28 +13435,35 @@ dumpCollation(Archive *fout, CollInfo *collinfo) qcollname = pg_strdup(fmtId(collinfo->dobj.name)); /* Get collation-specific details */ + appendPQExpBuffer(query, "SELECT "); + if (fout->remoteVersion >= 100000) - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "collprovider, " - "collcollate, " - "collctype, " - "collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "collversion, "); else - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "'c' AS collprovider, " - "collcollate, " - "collctype, " - "NULL AS collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "NULL AS collversion, "); + + if (fout->remoteVersion >= 120000) + appendPQExpBuffer(query, + "collisdeterministic, "); + else + appendPQExpBuffer(query, + "true AS collisdeterministic, "); + + appendPQExpBuffer(query, + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); i_collprovider = PQfnumber(res, "collprovider"); + i_collisdeterministic = PQfnumber(res, "collisdeterministic"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); @@ -13482,6 +13490,9 @@ dumpCollation(Archive *fout, CollInfo *collinfo) "unrecognized collation provider: %s\n", collprovider); + if (strcmp(PQgetvalue(res, 0, i_collisdeterministic), "f") == 0) + appendPQExpBufferStr(q, ", deterministic = false"); + if (strcmp(collcollate, collctype) == 0) { appendPQExpBufferStr(q, ", locale = "); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 779e48437c..fd8ebee8cd 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4106,7 +4106,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false}; + static const bool translate_columns[] = {false, false, false, false, false, true, false}; if (pset.sversion < 90100) { @@ -4134,6 +4134,21 @@ listCollations(const char *pattern, bool verbose, bool showSystem) appendPQExpBuffer(&buf, ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + ",\n 'libc' AS \"%s\"", + gettext_noop("Provider")); + + if (pset.sversion >= 120000) + appendPQExpBuffer(&buf, + ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + gettext_noop("yes"), gettext_noop("no"), + gettext_noop("Deterministic?")); + else + appendPQExpBuffer(&buf, + ",\n '%s' AS \"%s\"", + gettext_noop("yes"), + gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 45c33b63d6..d4dfe237c9 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201903211 +#define CATALOG_VERSION_NO 201903221 #endif diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 10fe711a91..4d2fcb3858 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId) Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ char collprovider; /* see constants below */ + bool collisdeterministic BKI_DEFAULT(t); int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ @@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation; extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 9003f2ce58..0cf7aa3495 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent); extern void execTuplesHashPrepare(int numCols, const Oid *eqOperators, @@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv); @@ -129,6 +131,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, @@ -257,6 +260,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index e7bf158c1b..2c94b926d3 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -337,6 +337,7 @@ typedef struct HashJoinTableData FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ bool *hashStrict; /* is each hash join operator strict? */ + Oid *collations; Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 1309b32b90..1233766023 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, +extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 62eb1a06ee..869c303e15 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -693,6 +693,7 @@ typedef struct TupleHashTableData AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ Size entrysize; /* actual size to make each hash entry */ @@ -862,6 +863,7 @@ typedef struct SubPlanState AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ @@ -1872,6 +1874,7 @@ typedef struct HashJoinState List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d66a187a53..24740c31e3 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -297,6 +297,7 @@ typedef struct RecursiveUnion * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; long numGroups; /* estimated number of groups in input */ } RecursiveUnion; @@ -773,6 +774,7 @@ typedef struct Group int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; } Group; /* --------------- @@ -797,6 +799,7 @@ typedef struct Agg int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ @@ -815,9 +818,11 @@ typedef struct WindowAgg int partNumCols; /* number of columns in partition clause */ AttrNumber *partColIdx; /* their indexes in the target list */ Oid *partOperators; /* equality operators for partition columns */ + Oid *partCollations; /* collations for partition columns */ int ordNumCols; /* number of columns in ordering clause */ AttrNumber *ordColIdx; /* their indexes in the target list */ Oid *ordOperators; /* equality operators for ordering columns */ + Oid *ordCollations; /* collations for ordering columns */ int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -839,6 +844,7 @@ typedef struct Unique int numCols; /* number of columns to check for uniqueness */ AttrNumber *uniqColIdx; /* their indexes in the target list */ Oid *uniqOperators; /* equality operators to compare with */ + Oid *uniqCollations; /* collations for equality comparisons */ } Unique; /* ------------ @@ -913,6 +919,7 @@ typedef struct SetOp * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; AttrNumber flagColIdx; /* where is the flag column, if any */ int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 3bbdb5e2f7..b093a3c8ac 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -53,7 +53,7 @@ extern bool is_projection_capable_plan(Plan *plan); extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 58db79203b..46d614f4fb 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -32,6 +32,7 @@ extern bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK) extern void apply_tlist_labeling(List *dest_tlist, List *src_tlist); extern Oid *extract_grouping_ops(List *groupClause); +extern Oid *extract_grouping_collations(List *groupClause, List *tlist); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index b1ae39ad63..683e1574ea 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b); extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, Datum *values, bool *isnull); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 16b0b1d2dc..b9a9ecb7cc 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -90,6 +90,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); extern char *get_collation_name(Oid colloid); +extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 606952afd7..a342a62549 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -82,6 +82,7 @@ extern void cache_locale_time(void); struct pg_locale_struct { char provider; + bool deterministic; union { #ifdef HAVE_LOCALE_T diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index b66193d1be..23d48f4ea3 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1149,6 +1149,716 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes t | t (1 row) +-- nondeterministic collations +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; + a | b +---+----- + 1 | äbc +(1 row) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3cs WHERE x = 'abc'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x <> 'abc'; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test3cs WHERE x LIKE 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ILIKE 'a%'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ~ 'a'; + x +----- + abc +(1 row) + +SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; + x +----- + ghi +(1 row) + +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; + x +----- + ghi +(1 row) + +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; + x +----- + abc + def +(2 rows) + +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; + x +----- + ABC +(1 row) + +SELECT DISTINCT x FROM test3cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT count(DISTINCT x) FROM test3cs; + count +------- + 4 +(1 row) + +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 1 + ABC | 1 + def | 1 + ghi | 1 +(4 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 2 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); + string_to_array +----------------- + {ABC,DEF,GHI} +(1 row) + +SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); + string_to_array +--------------------- + {A,B,C,D,E,F,G,H,I} +(1 row) + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +CREATE INDEX ON test3ci (x text_pattern_ops); -- error +ERROR: nondeterministic collations are not supported for operator class "text_pattern_ops" +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3ci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3ci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3ci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3ci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; + x +----- + def +(1 row) + +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3ci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT count(DISTINCT x) FROM test3ci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1ci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3ci (x); -- error +ERROR: could not create unique index "test3ci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); +ERROR: nondeterministic collations are not supported for substring searches +SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b'); + string_to_array +------------------------ + {A,NULL,C,D,E,F,G,H,I} +(1 row) + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +CREATE INDEX ON test3bpci (x bpchar_pattern_ops); -- error +ERROR: nondeterministic collations are not supported for operator class "bpchar_pattern_ops" +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3bpci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3bpci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3bpci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; + x +----- + def +(1 row) + +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3bpci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT count(DISTINCT x) FROM test3bpci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1bpci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3bpci (x); -- error +ERROR: could not create unique index "test3bpci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); +ERROR: nondeterministic collations are not supported for substring searches +SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b'); + string_to_array +------------------------ + {A,NULL,C,D,E,F,G,H,I} +(1 row) + +-- This tests the issue described in match_pattern_prefix(). In the +-- absence of that check, the case_insensitive tests below would +-- return no rows where they should logically return one. +CREATE TABLE test4c (x text COLLATE "C"); +INSERT INTO test4c VALUES ('abc'); +CREATE INDEX ON test4c (x); +SET enable_seqscan = off; +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows + x +--- +(0 rows) + +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows + x +--- +(0 rows) + +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error +ERROR: nondeterministic collations are not supported for LIKE +RESET enable_seqscan; +-- Unicode special case: different variants of Greek lower case sigma. +-- A naive implementation like citext that just does lower(x) = +-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ' +-- but upper('ς') is 'Σ'. +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive; + ?column? +---------- + f +(1 row) + +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive; + ?column? +---------- + t +(1 row) + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; + typname +--------- + int4 + int8 +(2 rows) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + typname +--------- + int4 + int8 +(2 rows) + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + f1 | f2 +----+---- + b | a + B | +(2 rows) + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; + a | b +---+------ + 1 | cote +(1 row) + +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; + a | b +---+------ + 1 | cote + 2 | côte + 3 | coté + 4 | côté +(4 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive + a | b +---+--- +(0 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + a | b +---+------ + 1 | cote +(1 row) + +-- foreign keys (should use collation of primary key) +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +INSERT INTO test10fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test10pk". +SELECT * FROM test10pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test10fk; + x +--- +(0 rows) + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test11pk". +SELECT * FROM test11pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test11fk; + x +----- + abc + ABC +(2 rows) + +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; + x +----- + ABC + ABC +(2 rows) + +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test11fk; + x +--- +(0 rows) + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); + ?column? +---------- + t +(1 row) + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index d33f04a3b5..15b3222239 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -1117,6 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); +ERROR: nondeterministic collations not supported with this provider -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out index fcbe3a5cc8..dbfa5c9348 100644 --- a/src/test/regress/expected/collate.out +++ b/src/test/regress/expected/collate.out @@ -498,6 +498,21 @@ SELECT a, b, a < b as lt FROM A | b | t (2 rows) +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +ERROR: could not determine which collation to use for string hashing +HINT: Use the COLLATE clause to set the collation explicitly. +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + -- casting SELECT CAST('42' AS text COLLATE "C"); ERROR: syntax error at or near "COLLATE" diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index fe5fc64480..4a54104182 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -745,6 +745,25 @@ select * from outer_7597 where (f1, f2) not in (select * from inner_7597); 1 | (2 rows) +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); +select * from outer_text where (f1, f2) not in (select * from inner_text); + f1 | f2 +----+---- + b | a + b | +(2 rows) + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 68c2d69659..42fb491df7 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -453,6 +453,256 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook; +-- nondeterministic collations + +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); + +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); + +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3cs WHERE x = 'abc'; +SELECT x FROM test3cs WHERE x <> 'abc'; +SELECT x FROM test3cs WHERE x LIKE 'a%'; +SELECT x FROM test3cs WHERE x ILIKE 'a%'; +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3cs WHERE x ~ 'a'; +SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x; +SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x; +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; +SELECT DISTINCT x FROM test3cs ORDER BY x; +SELECT count(DISTINCT x) FROM test3cs; +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +CREATE INDEX ON test3ci (x text_pattern_ops); -- error +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3ci WHERE x = 'abc'; +SELECT x FROM test3ci WHERE x <> 'abc'; +SELECT x FROM test3ci WHERE x LIKE 'a%'; +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3ci WHERE x ~ 'a'; +SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x; +SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x; +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; +SELECT DISTINCT x FROM test3ci ORDER BY x; +SELECT count(DISTINCT x) FROM test3ci; +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3ci (x); -- error +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b'); + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +CREATE INDEX ON test3bpci (x bpchar_pattern_ops); -- error +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3bpci WHERE x = 'abc'; +SELECT x FROM test3bpci WHERE x <> 'abc'; +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3bpci WHERE x ~ 'a'; +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x; +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x; +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; +SELECT DISTINCT x FROM test3bpci ORDER BY x; +SELECT count(DISTINCT x) FROM test3bpci; +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3bpci (x); -- error +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b'); + +-- This tests the issue described in match_pattern_prefix(). In the +-- absence of that check, the case_insensitive tests below would +-- return no rows where they should logically return one. +CREATE TABLE test4c (x text COLLATE "C"); +INSERT INTO test4c VALUES ('abc'); +CREATE INDEX ON test4c (x); +SET enable_seqscan = off; +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error +RESET enable_seqscan; + +-- Unicode special case: different variants of Greek lower case sigma. +-- A naive implementation like citext that just does lower(x) = +-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ' +-- but upper('ς') is 'Σ'. +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive; +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive; + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); + +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); + +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + +-- foreign keys (should use collation of primary key) + +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +INSERT INTO test10fk VALUES ('xyz'); -- error +SELECT * FROM test10pk; +SELECT * FROM test10fk; +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +SELECT * FROM test10fk; +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; +SELECT * FROM test10fk; + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +SELECT * FROM test11pk; +SELECT * FROM test11fk; +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; +SELECT * FROM test11fk; + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1); + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); + + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index e882153244..4ca02b821d 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -428,6 +428,13 @@ drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) + +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); + + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql index 4ddde95a5e..cb2bc22155 100644 --- a/src/test/regress/sql/collate.sql +++ b/src/test/regress/sql/collate.sql @@ -163,6 +163,11 @@ SELECT * FROM foo; SELECT a, b, a < b as lt FROM (VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b); +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); -- casting diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index b5931ee700..856bbff732 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -435,6 +435,23 @@ insert into inner_7597 values(0, null); select * from outer_7597 where (f1, f2) not in (select * from inner_7597); +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- + +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); + +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); + +select * from outer_text where (f1, f2) not in (select * from inner_text); + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile index e7bbb454c7..4378819530 100644 --- a/src/test/subscription/Makefile +++ b/src/test/subscription/Makefile @@ -15,6 +15,8 @@ include $(top_builddir)/src/Makefile.global EXTRA_INSTALL = contrib/hstore +export with_icu + check: $(prove_check) diff --git a/src/test/subscription/t/012_collation.pl b/src/test/subscription/t/012_collation.pl new file mode 100644 index 0000000000..6c480de9c1 --- /dev/null +++ b/src/test/subscription/t/012_collation.pl @@ -0,0 +1,103 @@ +# Test collations, in particular nondeterministic ones +# (only works with ICU) +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +if ($ENV{with_icu} eq 'yes') +{ + plan tests => 2; +} +else +{ + plan skip_all => 'ICU not supported by this build'; +} + +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; + +# Test plan: Create a table with a nondeterministic collation in the +# primary key column. Pre-insert rows on the publisher and subscriber +# that are collation-wise equal but byte-wise different. (We use a +# string in different normal forms for that.) Set up publisher and +# subscriber. Update the row on the publisher, but don't change the +# primary key column. The subscriber needs to find the row to be +# updated using the nondeterministic collation semantics. We need to +# test for both a replica identity index and for replica identity +# full, since those have different code paths internally. + +$node_subscriber->safe_psql('postgres', + q{CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false)}); + +# table with replica identity index + +$node_publisher->safe_psql('postgres', + q{CREATE TABLE tab1 (a text PRIMARY KEY, b text)}); + +$node_publisher->safe_psql('postgres', + q{INSERT INTO tab1 VALUES (U&'\00E4bc', 'foo')}); + +$node_subscriber->safe_psql('postgres', + q{CREATE TABLE tab1 (a text COLLATE ctest_nondet PRIMARY KEY, b text)}); + +$node_subscriber->safe_psql('postgres', + q{INSERT INTO tab1 VALUES (U&'\0061\0308bc', 'foo')}); + +# table with replica identity full + +$node_publisher->safe_psql('postgres', + q{CREATE TABLE tab2 (a text, b text)}); +$node_publisher->safe_psql('postgres', + q{ALTER TABLE tab2 REPLICA IDENTITY FULL}); + +$node_publisher->safe_psql('postgres', + q{INSERT INTO tab2 VALUES (U&'\00E4bc', 'foo')}); + +$node_subscriber->safe_psql('postgres', + q{CREATE TABLE tab2 (a text COLLATE ctest_nondet, b text)}); +$node_subscriber->safe_psql('postgres', + q{ALTER TABLE tab2 REPLICA IDENTITY FULL}); + +$node_subscriber->safe_psql('postgres', + q{INSERT INTO tab2 VALUES (U&'\0061\0308bc', 'foo')}); + +# set up publication, subscription + +$node_publisher->safe_psql('postgres', + q{CREATE PUBLICATION pub1 FOR ALL TABLES}); + +$node_subscriber->safe_psql('postgres', + qq{CREATE SUBSCRIPTION sub1 CONNECTION '$publisher_connstr' PUBLICATION pub1 WITH (copy_data = false)}); + +$node_publisher->wait_for_catchup('sub1'); + +# test with replica identity index + +$node_publisher->safe_psql('postgres', + q{UPDATE tab1 SET b = 'bar' WHERE b = 'foo'}); + +$node_publisher->wait_for_catchup('sub1'); + +is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab1}), + qq(bar), + 'update with primary key with nondeterministic collation'); + +# test with replica identity full + +$node_publisher->safe_psql('postgres', + q{UPDATE tab2 SET b = 'bar' WHERE b = 'foo'}); + +$node_publisher->wait_for_catchup('sub1'); + +is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab2}), + qq(bar), + 'update with replica identity full with nondeterministic collation');