Add three-parameter forms of array_to_string and string_to_array, to allow

better handling of NULL elements within the arrays.  The third parameter
is a string that should be used to represent a NULL element, or should
be translated into a NULL element, respectively.  If the third parameter
is NULL it behaves the same as the two-parameter form.

There are two incompatible changes in the behavior of the two-parameter form
of string_to_array.  First, it will return an empty (zero-element) array
rather than NULL when the input string is of zero length.  Second, if the
field separator is NULL, the function splits the string into individual
characters, rather than returning NULL as before.  These two changes make
this form fully compatible with the behavior of the new three-parameter form.

Pavel Stehule, reviewed by Brendan Jurd
This commit is contained in:
Tom Lane 2010-08-10 21:51:00 +00:00
parent 5148a04636
commit 33f43725fb
9 changed files with 390 additions and 95 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.525 2010/08/08 19:15:27 tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.526 2010/08/10 21:51:00 tgl Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@ -9736,13 +9736,14 @@ SELECT NULLIF(value, '(none)') ...
<row>
<entry>
<literal>
<function>array_to_string</function>(<type>anyarray</type>, <type>text</type>)
<function>array_to_string</function>(<type>anyarray</type>, <type>text</type> <optional>, <type>text</type></optional>)
</literal>
</entry>
<entry><type>text</type></entry>
<entry>concatenates array elements using supplied delimiter</entry>
<entry><literal>array_to_string(ARRAY[1, 2, 3], '~^~')</literal></entry>
<entry><literal>1~^~2~^~3</literal></entry>
<entry>concatenates array elements using supplied delimiter and
optional null string</entry>
<entry><literal>array_to_string(ARRAY[1, 2, 3, NULL, 5], ',', '*')</literal></entry>
<entry><literal>1,2,3,*,5</literal></entry>
</row>
<row>
<entry>
@ -9758,13 +9759,14 @@ SELECT NULLIF(value, '(none)') ...
<row>
<entry>
<literal>
<function>string_to_array</function>(<type>text</type>, <type>text</type>)
<function>string_to_array</function>(<type>text</type>, <type>text</type> <optional>, <type>text</type></optional>)
</literal>
</entry>
<entry><type>text[]</type></entry>
<entry>splits string into array elements using supplied delimiter</entry>
<entry><literal>string_to_array('xx~^~yy~^~zz', '~^~')</literal></entry>
<entry><literal>{xx,yy,zz}</literal></entry>
<entry>splits string into array elements using supplied delimiter and
optional null string</entry>
<entry><literal>string_to_array('xx~^~yy~^~zz', '~^~', 'yy')</literal></entry>
<entry><literal>{xx,NULL,zz}</literal></entry>
</row>
<row>
<entry>
@ -9781,6 +9783,34 @@ SELECT NULLIF(value, '(none)') ...
</tgroup>
</table>
<para>
In <function>string_to_array</function>, if the delimiter parameter is
NULL, each character in the input string will become a separate element in
the resulting array. If the delimiter is an empty string, then the entire
input string is returned as a one-element array. Otherwise the input
string is split at each occurrence of the delimiter string.
</para>
<para>
In <function>string_to_array</function>, if the null-string parameter
is omitted or NULL, none of the substrings of the input will be replaced
by NULL.
In <function>array_to_string</function>, if the null-string parameter
is omitted or NULL, any null elements in the array are simply skipped
and not represented in the output string.
</para>
<note>
<para>
There are two differences in the behavior of <function>string_to_array</>
from pre-9.1 versions of <productname>PostgreSQL</>.
First, it will return an empty (zero-element) array rather than NULL when
the input string is of zero length. Second, if the delimiter string is
NULL, the function splits the input into individual characters, rather
than returning NULL as before.
</para>
</note>
<para>
See also <xref linkend="functions-aggregate"> about the aggregate
function <function>array_agg</function> for use with arrays.

View File

@ -6,7 +6,7 @@
* Copyright (c) 2003-2010, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/array_userfuncs.c,v 1.35 2010/02/26 02:01:06 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/array_userfuncs.c,v 1.36 2010/08/10 21:51:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -407,9 +407,11 @@ ArrayType *
create_singleton_array(FunctionCallInfo fcinfo,
Oid element_type,
Datum element,
bool isNull,
int ndims)
{
Datum dvalues[1];
bool nulls[1];
int16 typlen;
bool typbyval;
char typalign;
@ -429,6 +431,7 @@ create_singleton_array(FunctionCallInfo fcinfo,
ndims, MAXDIM)));
dvalues[0] = element;
nulls[0] = isNull;
for (i = 0; i < ndims; i++)
{
@ -462,7 +465,7 @@ create_singleton_array(FunctionCallInfo fcinfo,
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
return construct_md_array(dvalues, NULL, ndims, dims, lbs, element_type,
return construct_md_array(dvalues, nulls, ndims, dims, lbs, element_type,
typlen, typbyval, typalign);
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.178 2010/08/05 18:21:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.179 2010/08/10 21:51:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -75,6 +75,10 @@ static bytea *bytea_substring(Datum str,
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
char *fldsep, char *null_string);
/*****************************************************************************
* CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
@ -2964,99 +2968,205 @@ split_text(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result_text);
}
/*
* Convenience function to return true when two text params are equal.
*/
static bool
text_isequal(text *txt1, text *txt2)
{
return DatumGetBool(DirectFunctionCall2(texteq,
PointerGetDatum(txt1),
PointerGetDatum(txt2)));
}
/*
* text_to_array
* parse input string
* return text array of elements
* parse input string and return text array of elements,
* based on provided field separator
*/
Datum
text_to_array(PG_FUNCTION_ARGS)
{
text *inputstring = PG_GETARG_TEXT_PP(0);
text *fldsep = PG_GETARG_TEXT_PP(1);
return text_to_array_internal(fcinfo);
}
/*
* text_to_array_null
* parse input string and return text array of elements,
* based on provided field separator and null string
*
* This is a separate entry point only to prevent the regression tests from
* complaining about different argument sets for the same internal function.
*/
Datum
text_to_array_null(PG_FUNCTION_ARGS)
{
return text_to_array_internal(fcinfo);
}
/*
* common code for text_to_array and text_to_array_null functions
*
* These are not strict so we have to test for null inputs explicitly.
*/
static Datum
text_to_array_internal(PG_FUNCTION_ARGS)
{
text *inputstring;
text *fldsep;
text *null_string;
int inputstring_len;
int fldsep_len;
TextPositionState state;
int fldnum;
int start_posn;
int end_posn;
int chunk_len;
char *start_ptr;
text *result_text;
bool is_null;
ArrayBuildState *astate = NULL;
text_position_setup(inputstring, fldsep, &state);
/*
* Note: we check the converted string length, not the original, because
* they could be different if the input contained invalid encoding.
*/
inputstring_len = state.len1;
fldsep_len = state.len2;
/* return NULL for empty input string */
if (inputstring_len < 1)
{
text_position_cleanup(&state);
/* when input string is NULL, then result is NULL too */
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
}
/*
* empty field separator return one element, 1D, array using the input
* string
*/
if (fldsep_len < 1)
inputstring = PG_GETARG_TEXT_PP(0);
/* fldsep can be NULL */
if (!PG_ARGISNULL(1))
fldsep = PG_GETARG_TEXT_PP(1);
else
fldsep = NULL;
/* null_string can be NULL or omitted */
if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
null_string = PG_GETARG_TEXT_PP(2);
else
null_string = NULL;
if (fldsep != NULL)
{
/*
* Normal case with non-null fldsep. Use the text_position machinery
* to search for occurrences of fldsep.
*/
TextPositionState state;
int fldnum;
int start_posn;
int end_posn;
int chunk_len;
text_position_setup(inputstring, fldsep, &state);
/*
* Note: we check the converted string length, not the original,
* because they could be different if the input contained invalid
* encoding.
*/
inputstring_len = state.len1;
fldsep_len = state.len2;
/* return empty array for empty input string */
if (inputstring_len < 1)
{
text_position_cleanup(&state);
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
}
/*
* empty field separator: return the input string as a one-element
* array
*/
if (fldsep_len < 1)
{
text_position_cleanup(&state);
/* single element can be a NULL too */
is_null = null_string ? text_isequal(inputstring, null_string) : false;
PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
PointerGetDatum(inputstring),
is_null, 1));
}
start_posn = 1;
/* start_ptr points to the start_posn'th character of inputstring */
start_ptr = VARDATA_ANY(inputstring);
for (fldnum = 1;; fldnum++) /* field number is 1 based */
{
CHECK_FOR_INTERRUPTS();
end_posn = text_position_next(start_posn, &state);
if (end_posn == 0)
{
/* fetch last field */
chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
}
else
{
/* fetch non-last field */
chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
}
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(start_ptr, chunk_len);
is_null = null_string ? text_isequal(result_text, null_string) : false;
/* stash away this field */
astate = accumArrayResult(astate,
PointerGetDatum(result_text),
is_null,
TEXTOID,
CurrentMemoryContext);
pfree(result_text);
if (end_posn == 0)
break;
start_posn = end_posn;
start_ptr += chunk_len;
start_posn += fldsep_len;
start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
}
text_position_cleanup(&state);
PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
PointerGetDatum(inputstring), 1));
}
start_posn = 1;
/* start_ptr points to the start_posn'th character of inputstring */
start_ptr = VARDATA_ANY(inputstring);
for (fldnum = 1;; fldnum++) /* field number is 1 based */
else
{
CHECK_FOR_INTERRUPTS();
end_posn = text_position_next(start_posn, &state);
if (end_posn == 0)
/*
* When fldsep is NULL, each character in the inputstring becomes an
* element in the result array. The separator is effectively the space
* between characters.
*/
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
/* return empty array for empty input string */
if (inputstring_len < 1)
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
start_ptr = VARDATA_ANY(inputstring);
while (inputstring_len > 0)
{
/* fetch last field */
chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
int chunk_len = pg_mblen(start_ptr);
CHECK_FOR_INTERRUPTS();
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(start_ptr, chunk_len);
is_null = null_string ? text_isequal(result_text, null_string) : false;
/* stash away this field */
astate = accumArrayResult(astate,
PointerGetDatum(result_text),
is_null,
TEXTOID,
CurrentMemoryContext);
pfree(result_text);
start_ptr += chunk_len;
inputstring_len -= chunk_len;
}
else
{
/* fetch non-last field */
chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
}
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(start_ptr, chunk_len);
/* stash away this field */
astate = accumArrayResult(astate,
PointerGetDatum(result_text),
false,
TEXTOID,
CurrentMemoryContext);
pfree(result_text);
if (end_posn == 0)
break;
start_posn = end_posn;
start_ptr += chunk_len;
start_posn += fldsep_len;
start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
}
text_position_cleanup(&state);
PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
CurrentMemoryContext));
}
@ -3071,6 +3181,48 @@ array_to_text(PG_FUNCTION_ARGS)
{
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
}
/*
* array_to_text_null
* concatenate Cstring representation of input array elements
* using provided field separator and null string
*
* This version is not strict so we have to test for null inputs explicitly.
*/
Datum
array_to_text_null(PG_FUNCTION_ARGS)
{
ArrayType *v;
char *fldsep;
char *null_string;
/* returns NULL when first or second parameter is NULL */
if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
PG_RETURN_NULL();
v = PG_GETARG_ARRAYTYPE_P(0);
fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
/* NULL null string is passed through as a null pointer */
if (!PG_ARGISNULL(2))
null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
else
null_string = NULL;
PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
}
/*
* common code for array_to_text and array_to_text_null functions
*/
static text *
array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
char *fldsep, char *null_string)
{
text *result;
int nitems,
*dims,
ndims;
@ -3092,7 +3244,7 @@ array_to_text(PG_FUNCTION_ARGS)
/* if there are no elements, return an empty string */
if (nitems == 0)
PG_RETURN_TEXT_P(cstring_to_text(""));
return cstring_to_text_with_len("", 0);
element_type = ARR_ELEMTYPE(v);
initStringInfo(&buf);
@ -3140,7 +3292,15 @@ array_to_text(PG_FUNCTION_ARGS)
/* Get source element, checking for NULL */
if (bitmap && (*bitmap & bitmask) == 0)
{
/* we ignore nulls */
/* if null_string is NULL, we just ignore null elements */
if (null_string != NULL)
{
if (printed)
appendStringInfo(&buf, "%s%s", fldsep, null_string);
else
appendStringInfoString(&buf, null_string);
printed = true;
}
}
else
{
@ -3169,8 +3329,11 @@ array_to_text(PG_FUNCTION_ARGS)
}
}
}
result = cstring_to_text_with_len(buf.data, buf.len);
pfree(buf.data);
PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
return result;
}
#define HEXBASE 16

View File

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.593 2010/08/08 19:15:27 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.594 2010/08/10 21:51:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201008081
#define CATALOG_VERSION_NO 201008101
#endif

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.577 2010/08/08 19:15:27 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.578 2010/08/10 21:51:00 tgl Exp $
*
* NOTES
* The script catalog/genbki.pl reads this file and generates .bki
@ -1018,10 +1018,14 @@ DATA(insert OID = 379 ( array_prepend PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2
DESCR("prepend element onto front of array");
DATA(insert OID = 383 ( array_cat PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_cat _null_ _null_ _null_ ));
DESCR("concatenate two arrays");
DATA(insert OID = 394 ( string_to_array PGNSP PGUID 12 1 0 0 f f f t f i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ text_to_array _null_ _null_ _null_ ));
DATA(insert OID = 394 ( string_to_array PGNSP PGUID 12 1 0 0 f f f f f i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ text_to_array _null_ _null_ _null_ ));
DESCR("split delimited text into text[]");
DATA(insert OID = 395 ( array_to_string PGNSP PGUID 12 1 0 0 f f f t f s 2 0 25 "2277 25" _null_ _null_ _null_ _null_ array_to_text _null_ _null_ _null_ ));
DESCR("concatenate array elements, using delimiter, into text");
DATA(insert OID = 376 ( string_to_array PGNSP PGUID 12 1 0 0 f f f f f i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ text_to_array_null _null_ _null_ _null_ ));
DESCR("split delimited text into text[], with null string");
DATA(insert OID = 384 ( array_to_string PGNSP PGUID 12 1 0 0 f f f f f s 3 0 25 "2277 25 25" _null_ _null_ _null_ _null_ array_to_text_null _null_ _null_ _null_ ));
DESCR("concatenate array elements, using delimiter and null string, into text");
DATA(insert OID = 515 ( array_larger PGNSP PGUID 12 1 0 0 f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_larger _null_ _null_ _null_ ));
DESCR("larger of two");
DATA(insert OID = 516 ( array_smaller PGNSP PGUID 12 1 0 0 f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_smaller _null_ _null_ _null_ ));

View File

@ -49,7 +49,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/array.h,v 1.77 2010/01/02 16:58:09 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/array.h,v 1.78 2010/08/10 21:51:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -274,6 +274,7 @@ extern Datum array_cat(PG_FUNCTION_ARGS);
extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
Oid element_type,
Datum element,
bool isNull,
int ndims);
extern Datum array_agg_transfn(PG_FUNCTION_ARGS);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.353 2010/08/05 18:21:19 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.354 2010/08/10 21:51:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -716,6 +716,8 @@ extern text *replace_text_regexp(text *src_text, void *regexp,
extern Datum split_text(PG_FUNCTION_ARGS);
extern Datum text_to_array(PG_FUNCTION_ARGS);
extern Datum array_to_text(PG_FUNCTION_ARGS);
extern Datum text_to_array_null(PG_FUNCTION_ARGS);
extern Datum array_to_text_null(PG_FUNCTION_ARGS);
extern Datum to_hex32(PG_FUNCTION_ARGS);
extern Datum to_hex64(PG_FUNCTION_ARGS);
extern Datum md5_text(PG_FUNCTION_ARGS);

View File

@ -1055,21 +1055,99 @@ select string_to_array('1|2|3', '');
select string_to_array('', '|');
string_to_array
-----------------
{}
(1 row)
select string_to_array('1|2|3', NULL);
string_to_array
-----------------
{1,|,2,|,3}
(1 row)
select string_to_array(NULL, '|') IS NULL;
?column?
----------
t
(1 row)
select string_to_array('abc', '');
string_to_array
-----------------
{abc}
(1 row)
select string_to_array('abc', '', 'abc');
string_to_array
-----------------
{NULL}
(1 row)
select string_to_array('abc', ',');
string_to_array
-----------------
{abc}
(1 row)
select string_to_array('abc', ',', 'abc');
string_to_array
-----------------
{NULL}
(1 row)
select string_to_array('1,2,3,4,,6', ',');
string_to_array
-----------------
{1,2,3,4,"",6}
(1 row)
select string_to_array('1,2,3,4,,6', ',', '');
string_to_array
------------------
{1,2,3,4,NULL,6}
(1 row)
select string_to_array('1,2,3,4,*,6', ',', '*');
string_to_array
------------------
{1,2,3,4,NULL,6}
(1 row)
select array_to_string(NULL::int4[], ',') IS NULL;
?column?
----------
t
(1 row)
select array_to_string('{}'::int4[], ',');
array_to_string
-----------------
(1 row)
select string_to_array(NULL, '|');
string_to_array
select array_to_string(array[1,2,3,4,NULL,6], ',');
array_to_string
-----------------
1,2,3,4,6
(1 row)
select array_to_string(array[1,2,3,4,NULL,6], ',', '*');
array_to_string
-----------------
1,2,3,4,*,6
(1 row)
select array_to_string(array[1,2,3,4,NULL,6], NULL);
array_to_string
-----------------
(1 row)
select array_to_string(array[1,2,3,4,NULL,6], ',', NULL);
array_to_string
-----------------
1,2,3,4,6
(1 row)
select array_to_string(string_to_array('1|2|3', '|'), '|');
array_to_string
-----------------

View File

@ -383,7 +383,21 @@ select string_to_array('1||2|3||', '||');
select string_to_array('1|2|3', '');
select string_to_array('', '|');
select string_to_array('1|2|3', NULL);
select string_to_array(NULL, '|');
select string_to_array(NULL, '|') IS NULL;
select string_to_array('abc', '');
select string_to_array('abc', '', 'abc');
select string_to_array('abc', ',');
select string_to_array('abc', ',', 'abc');
select string_to_array('1,2,3,4,,6', ',');
select string_to_array('1,2,3,4,,6', ',', '');
select string_to_array('1,2,3,4,*,6', ',', '*');
select array_to_string(NULL::int4[], ',') IS NULL;
select array_to_string('{}'::int4[], ',');
select array_to_string(array[1,2,3,4,NULL,6], ',');
select array_to_string(array[1,2,3,4,NULL,6], ',', '*');
select array_to_string(array[1,2,3,4,NULL,6], NULL);
select array_to_string(array[1,2,3,4,NULL,6], ',', NULL);
select array_to_string(string_to_array('1|2|3', '|'), '|');