From: Thomas Lockhart <Thomas.G.Lockhart@jpl.nasa.gov>

Subject: [HACKERS] Aggregate function patches

Here are the aggregate function patches I originally sent in last December.
They fix sum() and avg() behavior for ints and floats when NULL values are
involved.

I was waiting to resubmit these until I had a chance to write a v6.0->v6.1
database upgrade script to ensure that existing v6.0 databases which have
not been reloaded for v6.1 do no break with the new aggregate behavior.
These scripts are included below. It's OK with me if someone wants to do
something different with the upgrade strategy, but something like this
was discussed a few weeks ago.

Also, there were a couple of small items which cropped up in doing a clean
install of 970403 (actually 970402 + 970403 changes since the full 970403
tar file appears to be damaged or at least suspect). They are the first
two patches below and can be omitted if desired (although I think they
aren't dangerous :).
This commit is contained in:
Marc G. Fournier 1997-04-03 19:56:47 +00:00
parent 164cd7ab34
commit 9d5c0af586
6 changed files with 127 additions and 21 deletions

View File

@ -368,7 +368,12 @@ ExecAgg(Agg *node)
char *args[2];
AggFuncInfo *aggfns = &aggFuncInfo[i];
if (aggfns->finalfn && nTuplesAgged > 0) {
if (noInitValue[i]) {
/*
* No values found for this agg; return current state.
* This seems to fix behavior for avg() aggregate. -tgl 12/96
*/
} else if (aggfns->finalfn && nTuplesAgged > 0) {
if (aggfns->finalfn_nargs > 1) {
args[0] = (char*)value1[i];
args[1] = (char*)value2[i];

View File

@ -5,7 +5,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_eval.c,v 1.6 1997/03/03 23:26:45 scrappy Exp $
* $Id: geqo_eval.c,v 1.7 1997/04/03 19:55:35 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,7 +23,9 @@
#include <math.h>
#ifdef HAVE_LIMITS_H
# include <limits.h>
# define MAXINT INT_MAX
# ifndef MAXINT
# define MAXINT INT_MAX
# endif
#else
# include <values.h>
#endif

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.7 1997/03/12 20:48:17 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.8 1997/04/03 19:55:12 scrappy Exp $
*
* NOTES
* See acl.h.
@ -277,7 +277,7 @@ aclcheck(Acl *acl, AclId id, AclIdType idtype, AclMode mode)
* the system never creates an empty ACL.
*/
if (num < 1) {
#ifdef ACLDEBUG_TRACE || 1
#if ACLDEBUG_TRACE || 1
elog(DEBUG, "aclcheck: zero-length ACL, returning 1");
#endif
return ACLCHECK_OK;

View File

@ -7,7 +7,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_aggregate.h,v 1.4 1997/04/02 18:36:09 scrappy Exp $
* $Id: pg_aggregate.h,v 1.5 1997/04/03 19:56:19 scrappy Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@ -19,7 +19,7 @@
#define PG_AGGREGATE_H
/* ----------------
* postgres.h contains the system type definintions and the
* postgres.h contains the system type definitions and the
* CATALOG(), BOOTSTRAP and DATA() sugar words so this file
* can be read by both genbki.sh and the C compiler.
* ----------------
@ -87,15 +87,15 @@ typedef FormData_pg_aggregate *Form_pg_aggregate;
* ---------------
*/
DATA(insert OID = 0 ( avg PGUID int4pl int4inc int4div 23 23 23 23 0 0 ));
DATA(insert OID = 0 ( avg PGUID int2pl int2inc int2div 21 21 21 21 0 0 ));
DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div 700 700 700 700 0.0 0.0 ));
DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div 701 701 701 701 0.0 0.0 ));
DATA(insert OID = 0 ( avg PGUID int4pl int4inc int4div 23 23 23 23 _null_ 0 ));
DATA(insert OID = 0 ( avg PGUID int2pl int2inc int2div 21 21 21 21 _null_ 0 ));
DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div 700 700 700 700 _null_ 0.0 ));
DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div 701 701 701 701 _null_ 0.0 ));
DATA(insert OID = 0 ( sum PGUID int4pl - - 23 23 0 23 0 _null_ ));
DATA(insert OID = 0 ( sum PGUID int2pl - - 21 21 0 21 0 _null_ ));
DATA(insert OID = 0 ( sum PGUID float4pl - - 700 700 0 700 0.0 _null_ ));
DATA(insert OID = 0 ( sum PGUID float8pl - - 701 701 0 701 0.0 _null_ ));
DATA(insert OID = 0 ( sum PGUID int4pl - - 23 23 0 23 _null_ _null_ ));
DATA(insert OID = 0 ( sum PGUID int2pl - - 21 21 0 21 _null_ _null_ ));
DATA(insert OID = 0 ( sum PGUID float4pl - - 700 700 0 700 _null_ _null_ ));
DATA(insert OID = 0 ( sum PGUID float8pl - - 701 701 0 701 _null_ _null_ ));
DATA(insert OID = 0 ( max PGUID int4larger - - 23 23 0 23 _null_ _null_ ));
DATA(insert OID = 0 ( max PGUID int2larger - - 21 21 0 21 _null_ _null_ ));
@ -113,10 +113,10 @@ DATA(insert OID = 0 ( min PGUID int4smaller - - 702 702 0 702 _null_ _nul
DATA(insert OID = 0 ( min PGUID date_smaller - - 1082 1082 0 1082 _null_ _null_ ));
DATA(insert OID = 0 ( min PGUID float8smaller - - 1084 1084 0 1084 _null_ _null_ ));
DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 ));
DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 ));
/*
* prototypes for fucnctions in pg_aggregate.c
* prototypes for functions in pg_aggregate.c
*/
extern void AggregateCreate(char *aggName,
char *aggtransfn1Name,
@ -131,7 +131,3 @@ extern char *AggNameGetInitVal(char *aggName, Oid basetype,
int xfuncno, bool *isNull);
#endif /* PG_AGGREGATE_H */

56
src/update6_0-6_1.sh Normal file
View File

@ -0,0 +1,56 @@
#!/bin/sh
# update
# Script to apply patches to existing databases
# to upgrade from Postgres v6.0 to v6.1.
echo ""
echo "This utility does a minimal upgrade for existing v6.0 databases."
echo "Note that several new features and functions in Postgres"
echo " will not be available unless the databases are reloaded"
echo " from a clean install of v6.1."
echo ""
echo "This update script is not necessary for new or reloaded databases,"
echo " but will not damage them. You should update existing v6.1beta"
echo " databases created on or before 1997-04-04 (when the patches for"
echo " aggregate functions were applied to the v6.1beta source tree)."
echo ""
echo "Features present with this simple update include:"
echo " - aggregate operators sum() and avg() behave correctly with NULLs"
echo " - the point distance operator '<===>' returns float8 rather than int4"
echo " - some duplicate function OIDs are renumbered to eliminate conflicts"
echo ""
echo "Features unavailable with only this simple update include:"
echo " - new string handling functions a la Oracle/Ingres"
echo " - new date and time data types and expanded functionality"
echo " - some new function overloading to simplify function names"
echo ""
echo "Note that if v6.0 databases are not reloaded from a clean install of v6.1"
echo " or if this update is not applied to existing v6.0 databases:"
echo " - aggregate functions avg() and sum() may divide-by-zero for int4 data types"
#
srcdir=`pwd`
srcsql="update6_0-6_1.sql"
CMDSQL="psql"
SRCSQL="$srcdir/$srcsql"
#
if [ -z $SRCSQL ]; then
echo "unable to locate $SRCSQL"
exit 1
fi
#
echo ""
echo "updating databases found in $PGDATA/base"
echo ""
#
cd $PGDATA/base
for d in *
do
echo "updating $d at `date` ..."
echo "try $CMDSQL $d < $SRCSQL"
$CMDSQL $d < $SRCSQL
echo "completed updating $d at `date`"
done
#
echo ""
echo "completed all updates at `date`"
echo ""
exit

47
src/update6_0-6_1.sql Normal file
View File

@ -0,0 +1,47 @@
-- Aggregate functions
-- Thomas Lockhart
-- This fixes the behavior of built-in aggregate functions avg() and sum().
-- Code tested on postgres95 v1.0.9, postgres v6.0, and postgres v6.1b-970315.
-- Original definitions return zero rather than null for empty set attributes.
-- Postgres source code says that null behavior for aggregates is not correct,
-- but does describe the correct behavior for pass-by-reference data types
-- if it is given null initial values (from pg_aggregate).
-- Note that pass-by-value data types (e.g. int4) require a simple source code
-- change in backend/executor/nodeAgg.c to avoid divide-by-zero results.
-- If this SQL update is applied without making the corresponding source code
-- patch, then floating point types will work correctly but integer types will
-- divide-by-zero.
-- If the source code patch is applied but this SQL update is not, then there
-- will be divide-by-zero results for floating point types.
-- For aggregate attributes, the correct behavior is as follows:
-- count(*) should return a count of all tuples, null or otherwise
-- count(col) should return a count of all non-null values, zero if none
-- avg(col), sum(col), etc should ignore null fields and return null if there
-- are no non-null inputs
-- Ref: the original Date book
update pg_aggregate set agginitval1=null
where aggname = 'avg' or aggname = 'sum';
-- Geometric functions
-- Thomas Lockhart
-- This replaces the distance operator with one returning a floating point number.
-- The original operator 'pointdist' returned an integer.
-- There is no corresponding source code change required for this patch.
update pg_operator set oprresult = 701, oprcode = 'point_distance'::regproc
where oprname = '<===>' and oprresult = 23;
-- Date functions
-- Thomas Lockhart
-- This fixes conflicting OIDs within the date and time declarations.
update pg_proc set oid = 1138::oid where proname = 'date_larger';
update pg_proc set oid = 1139::oid where proname = 'date_smaller';
update pg_proc set oid = 1140::oid where proname = 'date_mi';
update pg_proc set oid = 1141::oid where proname = 'date_pli';
update pg_proc set oid = 1142::oid where proname = 'date_mii';
update pg_proc set oid = 1143::oid where proname = 'timein';
update pg_proc set oid = 1144::oid where proname = 'timeout';
update pg_proc set oid = 1145::oid where proname = 'time_eq';