Use wide-character library routines, if available, for upper/lower/initcap

functions.  This allows these functions to work correctly with Unicode and
other multibyte encodings.  Per prior discussion.

Also, revert my earlier change to move installation path mashing from
Makefile.global to configure.  Turns out not to work well because configure
script is working with unexpanded variables, and so fails to match in
cases where it should match.
This commit is contained in:
Tom Lane 2004-05-22 00:34:51 +00:00
parent add8b70dda
commit 3983869439
5 changed files with 249 additions and 117 deletions

63
configure vendored
View File

@ -6955,7 +6955,9 @@ done
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h
do
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
if eval "test \"\${$as_ac_Header+set}\" = set"; then
@ -10968,7 +10970,9 @@ test $ac_cv_func_memcmp_working = no && LIBOBJS="$LIBOBJS memcmp.$ac_objext"
for ac_func in cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf utime utimes waitpid
for ac_func in cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs
do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
echo "$as_me:$LINENO: checking for $ac_func" >&5
@ -17939,59 +17943,6 @@ CFLAGS="$_CFLAGS"
LIBS="$_LIBS"
fi
# Adjust installation directories.
#
# These are initially set by the equivalent --xxxdir configure options.
# We append "postgresql" to some of them, if the string does not already
# contain "pgsql" or "postgres", in order to avoid directory clutter.
if echo "$libexecdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
libexecdir="$libexecdir/postgresql"
fi
if echo "$datadir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
datadir="$datadir/postgresql"
fi
if echo "$sysconfdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
sysconfdir="$sysconfdir/postgresql"
fi
pkglibdir="$libdir"
if echo "$pkglibdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
pkglibdir="$pkglibdir/postgresql"
fi
pkgincludedir="$includedir"
if echo "$pkgincludedir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
pkgincludedir="$pkgincludedir/postgresql"
fi
if echo "$docdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
docdir="$docdir/postgresql"
fi
# prepare build tree if outside source tree
# Note 1: test -ef might not exist, but it's more reliable than `pwd`.
# Note 2: /bin/pwd might be better than shell's built-in at getting
@ -18692,8 +18643,6 @@ s,@have_docbook@,$have_docbook,;t t
s,@DOCBOOKSTYLE@,$DOCBOOKSTYLE,;t t
s,@COLLATEINDEX@,$COLLATEINDEX,;t t
s,@SGMLSPL@,$SGMLSPL,;t t
s,@pkglibdir@,$pkglibdir,;t t
s,@pkgincludedir@,$pkgincludedir,;t t
s,@vpath_build@,$vpath_build,;t t
CEOF

View File

@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
dnl $PostgreSQL: pgsql/configure.in,v 1.357 2004/05/21 20:56:47 tgl Exp $
dnl $PostgreSQL: pgsql/configure.in,v 1.358 2004/05/22 00:34:49 tgl Exp $
dnl
dnl Developers, please strive to achieve this order:
dnl
@ -670,7 +670,7 @@ fi
##
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h kernel/OS.h kernel/image.h SupportDefs.h])
AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h])
# At least on IRIX, cpp test for netinet/tcp.h will fail unless
# netinet/in.h is included first.
@ -795,7 +795,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
# SunOS doesn't handle negative byte comparisons properly with +/- return
AC_FUNC_MEMCMP
AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf utime utimes waitpid])
AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs])
AC_CHECK_DECLS(fdatasync, [], [], [#include <unistd.h>])
@ -1202,59 +1202,6 @@ CFLAGS="$_CFLAGS"
LIBS="$_LIBS"
fi
# Adjust installation directories.
#
# These are initially set by the equivalent --xxxdir configure options.
# We append "postgresql" to some of them, if the string does not already
# contain "pgsql" or "postgres", in order to avoid directory clutter.
if echo "$libexecdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
libexecdir="$libexecdir/postgresql"
fi
if echo "$datadir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
datadir="$datadir/postgresql"
fi
if echo "$sysconfdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
sysconfdir="$sysconfdir/postgresql"
fi
pkglibdir="$libdir"
if echo "$pkglibdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
pkglibdir="$pkglibdir/postgresql"
fi
AC_SUBST(pkglibdir)
pkgincludedir="$includedir"
if echo "$pkgincludedir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
pkgincludedir="$pkgincludedir/postgresql"
fi
AC_SUBST(pkgincludedir)
if echo "$docdir" | egrep 'pgsql|postgres' >/dev/null 2>&1
then
:
else
docdir="$docdir/postgresql"
fi
# prepare build tree if outside source tree
# Note 1: test -ef might not exist, but it's more reliable than `pwd`.
# Note 2: /bin/pwd might be better than shell's built-in at getting

View File

@ -1,5 +1,5 @@
# -*-makefile-*-
# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.186 2004/05/21 20:56:48 tgl Exp $
# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.187 2004/05/22 00:34:49 tgl Exp $
#------------------------------------------------------------------------------
# All PostgreSQL makefiles include this file and use the variables it sets,
@ -51,22 +51,53 @@ configure_args = @configure_args@
##########################################################################
#
# Installation directories
#
# These are set by the equivalent --xxxdir configure options. We
# append "postgresql" to some of them, if the string does not already
# contain "pgsql" or "postgres", in order to avoid directory clutter.
prefix := @prefix@
exec_prefix := @exec_prefix@
bindir := @bindir@
sbindir := @sbindir@
libexecdir := @libexecdir@
ifeq "$(findstring pgsql, $(libexecdir))" ""
ifeq "$(findstring postgres, $(libexecdir))" ""
override libexecdir := $(libexecdir)/postgresql
endif
endif
datadir := @datadir@
ifeq "$(findstring pgsql, $(datadir))" ""
ifeq "$(findstring postgres, $(datadir))" ""
override datadir := $(datadir)/postgresql
endif
endif
sysconfdir := @sysconfdir@
ifeq "$(findstring pgsql, $(sysconfdir))" ""
ifeq "$(findstring postgres, $(sysconfdir))" ""
override sysconfdir := $(sysconfdir)/postgresql
endif
endif
libdir := @libdir@
pkglibdir := @pkglibdir@
pkglibdir = $(libdir)
ifeq "$(findstring pgsql, $(pkglibdir))" ""
ifeq "$(findstring postgres, $(pkglibdir))" ""
override pkglibdir := $(pkglibdir)/postgresql
endif
endif
includedir := @includedir@
pkgincludedir := @pkgincludedir@
pkgincludedir = $(includedir)
ifeq "$(findstring pgsql, $(pkgincludedir))" ""
ifeq "$(findstring postgres, $(pkgincludedir))" ""
override pkgincludedir := $(pkgincludedir)/postgresql
endif
endif
includedir_server = $(pkgincludedir)/server
includedir_internal = $(pkgincludedir)/internal
@ -74,6 +105,11 @@ mandir := @mandir@
sqlmansect_dummy = l
docdir := @docdir@
ifeq "$(findstring pgsql, $(docdir))" ""
ifeq "$(findstring postgres, $(docdir))" ""
override docdir := $(docdir)/postgresql
endif
endif
localedir := @localedir@

View File

@ -9,23 +9,144 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.50 2004/02/27 03:59:23 neilc Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.51 2004/05/22 00:34:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
/*
* towlower() and friends should be in <wctype.h>, but some pre-C99 systems
* declare them in <wchar.h>.
*/
#include <ctype.h>
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif
#include "utils/builtins.h"
#include "mb/pg_wchar.h"
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
* using wide-character functions. Otherwise we use the traditional <ctype.h>
* functions, which of course will not work as desired in multibyte character
* sets. Note that in either case we are effectively assuming that the
* database character encoding matches the encoding implied by LC_CTYPE.
*
* We assume if we have these two functions, we have their friends too, and
* can use the wide-character method.
*/
#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
#define USE_WIDE_UPPER_LOWER
#endif
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
#ifdef USE_WIDE_UPPER_LOWER
/*
* Convert a TEXT value into a palloc'd wchar string.
*/
static wchar_t *
texttowcs(const text *txt)
{
int nbytes = VARSIZE(txt) - VARHDRSZ;
char *workstr;
wchar_t *result;
size_t ncodes;
/* Overflow paranoia */
if (nbytes < 0 ||
nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Need a null-terminated version of the input */
workstr = (char *) palloc(nbytes + 1);
memcpy(workstr, VARDATA(txt), nbytes);
workstr[nbytes] = '\0';
/* Output workspace cannot have more codes than input bytes */
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
/* Do the conversion */
ncodes = mbstowcs(result, workstr, nbytes + 1);
if (ncodes == (size_t) -1)
{
/*
* Invalid multibyte character encountered. We try to give a useful
* error message by letting pg_verifymbstr check the string. But
* it's possible that the string is OK to us, and not OK to mbstowcs
* --- this suggests that the LC_CTYPE locale is different from the
* database encoding. Give a generic error message if verifymbstr
* can't find anything wrong.
*/
pg_verifymbstr(workstr, nbytes, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale")));
}
Assert(ncodes <= (size_t) nbytes);
return result;
}
/*
* Convert a wchar string into a palloc'd TEXT value. The wchar string
* must be zero-terminated, but we also require the caller to pass the string
* length, since it will know it anyway in current uses.
*/
static text *
wcstotext(const wchar_t *str, int ncodes)
{
text *result;
size_t nbytes;
/* Overflow paranoia */
if (ncodes < 0 ||
ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Make workspace certainly large enough for result */
result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
/* Do the conversion */
nbytes = wcstombs((char *) VARDATA(result), str,
(ncodes + 1) * MB_CUR_MAX);
if (nbytes == (size_t) -1)
{
/* Invalid multibyte character encountered ... shouldn't happen */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale")));
}
Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
VARATT_SIZEP(result) = nbytes + VARHDRSZ;
return result;
}
#endif /* USE_WIDE_UPPER_LOWER */
/********************************************************************
*
* lower
@ -43,6 +164,25 @@ static text *dotrim(const char *string, int stringlen,
Datum
lower(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *string = PG_GETARG_TEXT_P(0);
text *result;
wchar_t *workspace;
int i;
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towlower(workspace[i]);
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
#else /* !USE_WIDE_UPPER_LOWER */
text *string = PG_GETARG_TEXT_P_COPY(0);
char *ptr;
int m;
@ -58,6 +198,7 @@ lower(PG_FUNCTION_ARGS)
}
PG_RETURN_TEXT_P(string);
#endif /* USE_WIDE_UPPER_LOWER */
}
@ -78,6 +219,25 @@ lower(PG_FUNCTION_ARGS)
Datum
upper(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *string = PG_GETARG_TEXT_P(0);
text *result;
wchar_t *workspace;
int i;
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towupper(workspace[i]);
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
#else /* !USE_WIDE_UPPER_LOWER */
text *string = PG_GETARG_TEXT_P_COPY(0);
char *ptr;
int m;
@ -93,6 +253,7 @@ upper(PG_FUNCTION_ARGS)
}
PG_RETURN_TEXT_P(string);
#endif /* USE_WIDE_UPPER_LOWER */
}
@ -116,6 +277,32 @@ upper(PG_FUNCTION_ARGS)
Datum
initcap(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *string = PG_GETARG_TEXT_P(0);
text *result;
wchar_t *workspace;
int wasalnum = 0;
int i;
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
{
if (wasalnum)
workspace[i] = towlower(workspace[i]);
else
workspace[i] = towupper(workspace[i]);
wasalnum = iswalnum(workspace[i]);
}
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
#else /* !USE_WIDE_UPPER_LOWER */
text *string = PG_GETARG_TEXT_P_COPY(0);
char *ptr;
int m;
@ -142,6 +329,7 @@ initcap(PG_FUNCTION_ARGS)
}
PG_RETURN_TEXT_P(string);
#endif /* USE_WIDE_UPPER_LOWER */
}

View File

@ -509,6 +509,9 @@
`HAVE_STRUCT_TM_TM_ZONE' instead. */
#undef HAVE_TM_ZONE
/* Define to 1 if you have the `towlower' function. */
#undef HAVE_TOWLOWER
/* Define to 1 if you have the external array `tzname'. */
#undef HAVE_TZNAME
@ -545,6 +548,15 @@
/* Define to 1 if you have the `waitpid' function. */
#undef HAVE_WAITPID
/* Define to 1 if you have the <wchar.h> header file. */
#undef HAVE_WCHAR_H
/* Define to 1 if you have the `wcstombs' function. */
#undef HAVE_WCSTOMBS
/* Define to 1 if you have the <wctype.h> header file. */
#undef HAVE_WCTYPE_H
/* Define to the appropriate snprintf format for 64-bit ints, if any. */
#undef INT64_FORMAT