hstore: Tighten key/value parsing check for whitespaces

isspace() can be locale-sensitive depending on the platform, causing
hstore to consider as whitespaces characters it should not see as such.
For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be
discarded from the input given.

This problem is similar to 9ae2661, though it was missed that hstore
can also manipulate non-ASCII inputs, so replace the existing isspace()
calls with scanner_isspace().

This problem exists for a long time, so backpatch all the way down.

Author: Evan Jones
Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com
Backpatch-through: 11
This commit is contained in:
Michael Paquier 2023-06-12 09:14:03 +09:00
parent d088ba5a5a
commit d522b05c8c
6 changed files with 70 additions and 5 deletions

View File

@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type"
HEADERS = hstore.h
REGRESS = hstore
REGRESS = hstore hstore_utf8
ifdef USE_PGXS
PG_CONFIG = pg_config

View File

@ -0,0 +1,36 @@
/*
* This test must be run in a database with UTF-8 encoding,
* because other encodings don't support all the characters used.
*/
SELECT getdatabaseencoding() <> 'UTF8'
AS skip_test \gset
\if :skip_test
\quit
\endif
SET client_encoding = utf8;
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
SELECT E'key\u0105=>value\u0105'::hstore;
hstore
------------------
"keyą"=>"valueą"
(1 row)
SELECT 'keyą=>valueą'::hstore;
hstore
------------------
"keyą"=>"valueą"
(1 row)
SELECT 'ą=>ą'::hstore;
hstore
----------
"ą"=>"ą"
(1 row)
SELECT 'keyąfoo=>valueą'::hstore;
hstore
---------------------
"keyąfoo"=>"valueą"
(1 row)

View File

@ -0,0 +1,8 @@
/*
* This test must be run in a database with UTF-8 encoding,
* because other encodings don't support all the characters used.
*/
SELECT getdatabaseencoding() <> 'UTF8'
AS skip_test \gset
\if :skip_test
\quit

View File

@ -13,6 +13,7 @@
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/json.h"
#include "utils/jsonb.h"
@ -118,7 +119,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
{
st = GV_WAITESCIN;
}
else if (!isspace((unsigned char) *(state->ptr)))
else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
*(state->cur) = *(state->ptr);
state->cur++;
@ -141,7 +142,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
state->ptr--;
return true;
}
else if (isspace((unsigned char) *(state->ptr)))
else if (scanner_isspace((unsigned char) *(state->ptr)))
{
return true;
}
@ -255,7 +256,7 @@ parse_hstore(HSParser *state)
{
PRSEOF;
}
else if (!isspace((unsigned char) *(state->ptr)))
else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
PRSSYNTAXERROR;
}
@ -309,7 +310,7 @@ parse_hstore(HSParser *state)
{
return true;
}
else if (!isspace((unsigned char) *(state->ptr)))
else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
PRSSYNTAXERROR;
}

View File

@ -50,6 +50,7 @@ tests += {
'regress': {
'sql': [
'hstore',
'hstore_utf8',
],
},
}

View File

@ -0,0 +1,19 @@
/*
* This test must be run in a database with UTF-8 encoding,
* because other encodings don't support all the characters used.
*/
SELECT getdatabaseencoding() <> 'UTF8'
AS skip_test \gset
\if :skip_test
\quit
\endif
SET client_encoding = utf8;
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
SELECT E'key\u0105=>value\u0105'::hstore;
SELECT 'keyą=>valueą'::hstore;
SELECT 'ą=>ą'::hstore;
SELECT 'keyąfoo=>valueą'::hstore;