Recognize functional dependency on primary keys. This allows a table's

other columns to be referenced without listing them in GROUP BY, so long as the primary key column(s) are listed in GROUP BY. Eventually we should also allow functional dependency on a UNIQUE constraint when the columns are marked NOT NULL, but that has to wait until NOT NULL constraints are represented in pg_constraint, because we need to have pg_constraint OIDs for all the conditions needed to ensure functional dependency. Peter Eisentraut, reviewed by Alex Hunsaker and Tom Lane
2010-08-07 02:44:09 +00:00 · 2010-08-07 02:44:09 +00:00 · e49ae8d3bc
parent ce6ce1a09d
commit e49ae8d3bc
16 changed files with 684 additions and 35 deletions
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.58 2010/07/03 02:57:46 rhaas Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.59 2010/08/07 02:44:05 tgl Exp $ -->

 <chapter id="queries">
 <title>Queries</title>
@ -886,10 +886,7 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
    In this example, the columns <literal>product_id</literal>,
    <literal>p.name</literal>, and <literal>p.price</literal> must be
    in the <literal>GROUP BY</> clause since they are referenced in
-    the query select list.  (Depending on how the products
-    table is set up, name and price might be fully dependent on the
-    product ID, so the additional groupings could theoretically be
-    unnecessary, though this is not implemented.)  The column
+    the query select list (but see below).  The column
    <literal>s.units</> does not have to be in the <literal>GROUP
    BY</> list since it is only used in an aggregate expression
    (<literal>sum(...)</literal>), which represents the sales
@ -897,6 +894,18 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
    all sales of the product.
   </para>

+   <para>
+    If the products table is set up so that,
+    say, <literal>product_id</literal> is the primary key, then it
+    would be enough to group by <literal>product_id</literal> in the
+    above example, since name and price would
+    be <firstterm>functionally
+    dependent</firstterm><indexterm><primary>functional
+    dependency</primary></indexterm> on the product ID, and so there
+    would be no ambiguity about which name and price value to return
+    for each product ID group.
+   </para>
+
   <para>
    In strict SQL, <literal>GROUP BY</> can only group by columns of
    the source table but <productname>PostgreSQL</productname> extends
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.131 2010/06/15 20:04:53 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.132 2010/08/07 02:44:05 tgl Exp $
 PostgreSQL documentation
 -->

@ -520,9 +520,12 @@ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...]
    produces a single value computed across all the selected rows).
    When <literal>GROUP BY</literal> is present, it is not valid for
    the <command>SELECT</command> list expressions to refer to
-    ungrouped columns except within aggregate functions, since there
-    would be more than one possible value to return for an ungrouped
-    column.
+    ungrouped columns except within aggregate functions or if the
+    ungrouped column is functionally dependent on the grouped columns,
+    since there would otherwise be more than one possible value to
+    return for an ungrouped column.  A functional dependency exists if
+    the grouped columns (or a subset thereof) are the primary key of
+    the table containing the ungrouped column.
   </para>
  </refsect2>

@ -1590,6 +1593,18 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
   </para>
  </refsect2>

+  <refsect2>
+   <title>Functional Dependencies</title>
+
+   <para>
+    <productname>PostgreSQL</productname> recognizes functional dependency
+    (allowing columns to be omitted from <literal>GROUP BY</>) only when
+    a table's primary key is included in the <literal>GROUP BY</> list.
+    The SQL standard specifies additional conditions that should be
+    recognized.
+   </para>
+  </refsect2>
+
  <refsect2>
   <title><literal>WINDOW</literal> Clause Restrictions</title>

--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.96 2010/02/26 02:00:36 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.97 2010/08/07 02:44:06 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1594,7 +1594,7 @@ find_expr_references_walker(Node *node,
 	{
 		/* Recurse into RTE subquery or not-yet-planned sublink subquery */
 		Query	   *query = (Query *) node;
-		ListCell   *rtable;
+		ListCell   *lc;
 		bool		result;

 		/*
@ -1604,9 +1604,9 @@ find_expr_references_walker(Node *node,
 		 * of recursing into RTE_FUNCTION RTEs, subqueries, etc, so no need to
 		 * do that here.  But keep it from looking at join alias lists.)
 		 */
-		foreach(rtable, query->rtable)
+		foreach(lc, query->rtable)
 		{
-			RangeTblEntry *rte = (RangeTblEntry *) lfirst(rtable);
+			RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
 			ListCell   *ct;

 			switch (rte->rtekind)
@ -1627,6 +1627,15 @@ find_expr_references_walker(Node *node,
 			}
 		}

+		/*
+		 * Add dependencies on constraints listed in query's constraintDeps
+		 */
+		foreach(lc, query->constraintDeps)
+		{
+			add_object_address(OCLASS_CONSTRAINT, lfirst_oid(lc), 0,
+							   context->addrs);
+		}
+
 		/* query_tree_walker ignores ORDER BY etc, but we need those opers */
 		find_expr_references_walker((Node *) query->sortClause, context);
 		find_expr_references_walker((Node *) query->groupClause, context);
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/pg_constraint.c,v 1.54 2010/08/05 15:25:35 rhaas Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/pg_constraint.c,v 1.55 2010/08/07 02:44:06 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -783,3 +783,112 @@ get_constraint_oid(Oid relid, const char *conname, bool missing_ok)

 	return conOid;
 }
+
+/*
+ * Determine whether a relation can be proven functionally dependent on
+ * a set of grouping columns.  If so, return TRUE and add the pg_constraint
+ * OIDs of the constraints needed for the proof to the *constraintDeps list.
+ *
+ * grouping_columns is a list of grouping expressions, in which columns of
+ * the rel of interest are Vars with the indicated varno/varlevelsup.
+ *
+ * Currently we only check to see if the rel has a primary key that is a
+ * subset of the grouping_columns.  We could also use plain unique constraints
+ * if all their columns are known not null, but there's a problem: we need
+ * to be able to represent the not-null-ness as part of the constraints added
+ * to *constraintDeps.  FIXME whenever not-null constraints get represented
+ * in pg_constraint.
+ */
+bool
+check_functional_grouping(Oid relid,
+						  Index varno, Index varlevelsup,
+						  List *grouping_columns,
+						  List **constraintDeps)
+{
+	bool		result = false;
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData skey[1];
+
+	/* Scan pg_constraint for constraints of the target rel */
+	pg_constraint = heap_open(ConstraintRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true,
+							  SnapshotNow, 1, skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+		Datum		adatum;
+		bool		isNull;
+		ArrayType  *arr;
+		int16	   *attnums;
+		int			numkeys;
+		int			i;
+		bool		found_col;
+
+		/* Only PK constraints are of interest for now, see comment above */
+		if (con->contype != CONSTRAINT_PRIMARY)
+			continue;
+
+		/* Extract the conkey array, ie, attnums of PK's columns */
+		adatum = heap_getattr(tuple, Anum_pg_constraint_conkey,
+							  RelationGetDescr(pg_constraint), &isNull);
+		if (isNull)
+			elog(ERROR, "null conkey for constraint %u",
+				 HeapTupleGetOid(tuple));
+		arr = DatumGetArrayTypeP(adatum);	/* ensure not toasted */
+		numkeys = ARR_DIMS(arr)[0];
+		if (ARR_NDIM(arr) != 1 ||
+			numkeys < 0 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != INT2OID)
+			elog(ERROR, "conkey is not a 1-D smallint array");
+		attnums = (int16 *) ARR_DATA_PTR(arr);
+
+		found_col = false;
+		for (i = 0; i < numkeys; i++)
+		{
+			AttrNumber	attnum = attnums[i];
+			ListCell   *gl;
+
+			found_col = false;
+			foreach(gl, grouping_columns)
+			{
+				Var		   *gvar = (Var *) lfirst(gl);
+
+				if (IsA(gvar, Var) &&
+					gvar->varno == varno &&
+					gvar->varlevelsup == varlevelsup &&
+					gvar->varattno == attnum)
+				{
+					found_col = true;
+					break;
+				}
+			}
+			if (!found_col)
+				break;
+		}
+
+		if (found_col)
+		{
+			/* The PK is a subset of grouping_columns, so we win */
+			*constraintDeps = lappend_oid(*constraintDeps,
+										  HeapTupleGetOid(tuple));
+			result = true;
+			break;
+		}
+	}
+
+	systable_endscan(scan);
+
+	heap_close(pg_constraint, AccessShareLock);
+
+	return result;
+}
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.466 2010/07/25 23:21:21 rhaas Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.467 2010/08/07 02:44:06 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -2272,6 +2272,7 @@ _copyQuery(Query *from)
 	COPY_NODE_FIELD(limitCount);
 	COPY_NODE_FIELD(rowMarks);
 	COPY_NODE_FIELD(setOperations);
+	COPY_NODE_FIELD(constraintDeps);

 	return newnode;
 }
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@ -22,7 +22,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.386 2010/07/25 23:21:21 rhaas Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.387 2010/08/07 02:44:06 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -877,6 +877,7 @@ _equalQuery(Query *a, Query *b)
 	COMPARE_NODE_FIELD(limitCount);
 	COMPARE_NODE_FIELD(rowMarks);
 	COMPARE_NODE_FIELD(setOperations);
+	COMPARE_NODE_FIELD(constraintDeps);

 	return true;
 }
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.387 2010/07/25 23:21:21 rhaas Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.388 2010/08/07 02:44:06 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@ -2020,6 +2020,7 @@ _outQuery(StringInfo str, Query *node)
 	WRITE_NODE_FIELD(limitCount);
 	WRITE_NODE_FIELD(rowMarks);
 	WRITE_NODE_FIELD(setOperations);
+	WRITE_NODE_FIELD(constraintDeps);
 }

 static void
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.232 2010/02/16 22:34:43 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.233 2010/08/07 02:44:07 tgl Exp $
 *
 * NOTES
 *	  Path and Plan nodes do not have any readfuncs support, because we
@ -218,6 +218,7 @@ _readQuery(void)
 	READ_NODE_FIELD(limitCount);
 	READ_NODE_FIELD(rowMarks);
 	READ_NODE_FIELD(setOperations);
+	READ_NODE_FIELD(constraintDeps);

 	READ_DONE();
 }
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@ -8,12 +8,13 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.93 2010/03/17 16:52:38 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.94 2010/08/07 02:44:07 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

+#include "catalog/pg_constraint.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/tlist.h"
@ -29,13 +30,16 @@
 typedef struct
 {
 	ParseState *pstate;
+	Query	   *qry;
 	List	   *groupClauses;
 	bool		have_non_var_grouping;
+	List	  **func_grouped_rels;
 	int			sublevels_up;
 } check_ungrouped_columns_context;

-static void check_ungrouped_columns(Node *node, ParseState *pstate,
-						List *groupClauses, bool have_non_var_grouping);
+static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+						List *groupClauses, bool have_non_var_grouping,
+						List **func_grouped_rels);
 static bool check_ungrouped_columns_walker(Node *node,
 							   check_ungrouped_columns_context *context);

@ -293,6 +297,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 {
 	List	   *groupClauses = NIL;
 	bool		have_non_var_grouping;
+	List	   *func_grouped_rels = NIL;
 	ListCell   *l;
 	bool		hasJoinRTEs;
 	bool		hasSelfRefRTEs;
@ -408,14 +413,16 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 	clause = (Node *) qry->targetList;
 	if (hasJoinRTEs)
 		clause = flatten_join_alias_vars(root, clause);
-	check_ungrouped_columns(clause, pstate,
-							groupClauses, have_non_var_grouping);
+	check_ungrouped_columns(clause, pstate, qry,
+							groupClauses, have_non_var_grouping,
+							&func_grouped_rels);

 	clause = (Node *) qry->havingQual;
 	if (hasJoinRTEs)
 		clause = flatten_join_alias_vars(root, clause);
-	check_ungrouped_columns(clause, pstate,
-							groupClauses, have_non_var_grouping);
+	check_ungrouped_columns(clause, pstate, qry,
+							groupClauses, have_non_var_grouping,
+							&func_grouped_rels);

 	/*
 	 * Per spec, aggregates can't appear in a recursive term.
@ -535,14 +542,17 @@ parseCheckWindowFuncs(ParseState *pstate, Query *qry)
 * way more pain than the feature seems worth.
 */
 static void
-check_ungrouped_columns(Node *node, ParseState *pstate,
-						List *groupClauses, bool have_non_var_grouping)
+check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+						List *groupClauses, bool have_non_var_grouping,
+						List **func_grouped_rels)
 {
 	check_ungrouped_columns_context context;

 	context.pstate = pstate;
+	context.qry = qry;
 	context.groupClauses = groupClauses;
 	context.have_non_var_grouping = have_non_var_grouping;
+	context.func_grouped_rels = func_grouped_rels;
 	context.sublevels_up = 0;
 	check_ungrouped_columns_walker(node, &context);
 }
@ -619,10 +629,43 @@ check_ungrouped_columns_walker(Node *node,
 			}
 		}

-		/* Found an ungrouped local variable; generate error message */
+		/*
+		 * Check whether the Var is known functionally dependent on the GROUP
+		 * BY columns.  If so, we can allow the Var to be used, because the
+		 * grouping is really a no-op for this table.  However, this deduction
+		 * depends on one or more constraints of the table, so we have to add
+		 * those constraints to the query's constraintDeps list, because it's
+		 * not semantically valid anymore if the constraint(s) get dropped.
+		 * (Therefore, this check must be the last-ditch effort before raising
+		 * error: we don't want to add dependencies unnecessarily.)
+		 *
+		 * Because this is a pretty expensive check, and will have the same
+		 * outcome for all columns of a table, we remember which RTEs we've
+		 * already proven functional dependency for in the func_grouped_rels
+		 * list.  This test also prevents us from adding duplicate entries
+		 * to the constraintDeps list.
+		 */
+		if (list_member_int(*context->func_grouped_rels, var->varno))
+			return false;				/* previously proven acceptable */
+
 		Assert(var->varno > 0 &&
 			   (int) var->varno <= list_length(context->pstate->p_rtable));
 		rte = rt_fetch(var->varno, context->pstate->p_rtable);
+		if (rte->rtekind == RTE_RELATION)
+		{
+			if (check_functional_grouping(rte->relid,
+										  var->varno,
+										  0,
+										  context->groupClauses,
+										  &context->qry->constraintDeps))
+			{
+				*context->func_grouped_rels =
+					lappend_int(*context->func_grouped_rels, var->varno);
+				return false;			/* acceptable */
+			}
+		}
+
+		/* Found an ungrouped local variable; generate error message */
 		attname = get_rte_attribute_name(rte, var->varattno);
 		if (context->sublevels_up == 0)
 			ereport(ERROR,
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -37,7 +37,7 @@
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.590 2010/08/05 18:21:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.591 2010/08/07 02:44:07 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -53,6 +53,6 @@
 */

 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201008052
+#define CATALOG_VERSION_NO	201008061

 #endif
--- a/src/include/catalog/pg_constraint.h
+++ b/src/include/catalog/pg_constraint.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/pg_constraint.h,v 1.40 2010/08/05 15:25:36 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_constraint.h,v 1.41 2010/08/07 02:44:07 tgl Exp $
 *
 * NOTES
 *	  the genbki.pl script reads this file and generates .bki
@ -239,4 +239,9 @@ extern void AlterConstraintNamespaces(Oid ownerId, Oid oldNspId,
 						  Oid newNspId, bool isType);
 extern Oid	get_constraint_oid(Oid relid, const char *conname, bool missing_ok);

+extern bool check_functional_grouping(Oid relid,
+									  Index varno, Index varlevelsup,
+									  List *grouping_columns,
+									  List **constraintDeps);
+
 #endif   /* PG_CONSTRAINT_H */
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@ -13,7 +13,7 @@
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.433 2010/07/25 23:21:22 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.434 2010/08/07 02:44:07 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -146,6 +146,9 @@ typedef struct Query

 	Node	   *setOperations;	/* set-operation tree if this is top level of
 								 * a UNION/INTERSECT/EXCEPT query */
+
+	List	   *constraintDeps;	/* a list of pg_constraint OIDs that the query
+								 * depends on to be semantically valid */
 } Query;


--- a/src/test/regress/expected/functional_deps.out
+++ b/src/test/regress/expected/functional_deps.out
@ -0,0 +1,241 @@
+-- from http://www.depesz.com/index.php/2010/04/19/getting-unique-elements/
+CREATE TEMP TABLE articles (
+    id int CONSTRAINT articles_pkey PRIMARY KEY,
+    keywords text,
+    title text UNIQUE NOT NULL,
+    body text UNIQUE,
+    created date
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "articles_pkey" for table "articles"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "articles_title_key" for table "articles"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "articles_body_key" for table "articles"
+CREATE TEMP TABLE articles_in_category (
+    article_id int,
+    category_id int,
+    changed date,
+    PRIMARY KEY (article_id, category_id)
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "articles_in_category_pkey" for table "articles_in_category"
+-- test functional dependencies based on primary keys/unique constraints
+-- base tables
+-- group by primary key (OK)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by unique not null (fail/todo)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY title;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- group by unique nullable (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- group by something else (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY keywords;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- multiple tables
+-- group by primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+ERROR:  column "a.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT a.id, a.keywords, a.title, a.body, a.created
+               ^
+-- JOIN syntax
+-- group by left table's primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+ERROR:  column "a.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT a.id, a.keywords, a.title, a.body, a.created
+               ^
+-- group by right table's (composite) primary key (OK)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.category_id, aic.article_id;
+ changed 
+---------
+(0 rows)
+
+-- group by right table's partial primary key (fail)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id;
+ERROR:  column "aic.changed" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT aic.changed
+               ^
+-- example from documentation
+CREATE TEMP TABLE products (product_id int, name text, price numeric);
+CREATE TEMP TABLE sales (product_id int, units int);
+-- OK
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id, p.name, p.price;
+ product_id | name | sales 
+------------+------+-------
+(0 rows)
+
+-- fail
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+ERROR:  column "p.name" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+                           ^
+ALTER TABLE products ADD PRIMARY KEY (product_id);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "products_pkey" for table "products"
+-- OK now
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+ product_id | name | sales 
+------------+------+-------
+(0 rows)
+
+-- Drupal example, http://drupal.org/node/555530
+CREATE TEMP TABLE node (
+    nid SERIAL,
+    vid integer NOT NULL default '0',
+    type varchar(32) NOT NULL default '',
+    title varchar(128) NOT NULL default '',
+    uid integer NOT NULL default '0',
+    status integer NOT NULL default '1',
+    created integer NOT NULL default '0',
+    -- snip
+    PRIMARY KEY (nid, vid)
+);
+NOTICE:  CREATE TABLE will create implicit sequence "node_nid_seq" for serial column "node.nid"
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "node_pkey" for table "node"
+CREATE TEMP TABLE users (
+    uid integer NOT NULL default '0',
+    name varchar(60) NOT NULL default '',
+    pass varchar(32) NOT NULL default '',
+    -- snip
+    PRIMARY KEY (uid),
+    UNIQUE (name)
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "users_pkey" for table "users"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "users_name_key" for table "users"
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid, u.name;
+ uid | name 
+-----+------
+(0 rows)
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid;
+ uid | name 
+-----+------
+(0 rows)
+
+-- Check views and dependencies
+-- fail
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 2: SELECT id, keywords, title, body, created
+               ^
+-- OK
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+-- fail
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv1 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv1;
+-- multiple dependencies
+CREATE TEMP VIEW fdv2 AS
+SELECT a.id, a.keywords, a.title, aic.category_id, aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id, aic.category_id, aic.article_id;
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv2 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+ALTER TABLE articles_in_category DROP CONSTRAINT articles_in_category_pkey RESTRICT; --fail
+ERROR:  cannot drop constraint articles_in_category_pkey on table articles_in_category because other objects depend on it
+DETAIL:  view fdv2 depends on constraint articles_in_category_pkey on table articles_in_category
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv2;
+-- nested queries
+CREATE TEMP VIEW fdv3 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id
+UNION
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv3 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv3;
+CREATE TEMP VIEW fdv4 AS
+SELECT * FROM articles WHERE title IN (SELECT title FROM articles GROUP BY id);
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv4 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv4;
+-- prepared query plans: this results in failure on reuse
+PREPARE foo AS
+  SELECT id, keywords, title, body, created
+  FROM articles
+  GROUP BY id;
+EXECUTE foo;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+EXECUTE foo;  -- fail
+ERROR:  column "articles.keywords" must appear in the GROUP BY clause or be used in an aggregate function
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@ -1,5 +1,5 @@
 # ----------
-# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.61 2010/02/12 17:33:21 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.62 2010/08/07 02:44:08 tgl Exp $
 #
 # By convention, we put no more than twenty tests in any one parallel group;
 # this limits the number of connections needed to run the tests.
@ -84,7 +84,7 @@ test: rules
 # ----------
 # Another group of parallel tests
 # ----------
-test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap
+test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps

 # ----------
 # Another group of parallel tests
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.56 2010/02/12 17:33:21 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.57 2010/08/07 02:44:08 tgl Exp $
 # This should probably be in an order similar to parallel_schedule.
 test: tablespace
 test: boolean
@ -103,6 +103,7 @@ test: tsdicts
 test: foreign_data
 test: window
 test: xmlmap
+test: functional_deps
 test: plancache
 test: limit
 test: plpgsql
--- a/src/test/regress/sql/functional_deps.sql
+++ b/src/test/regress/sql/functional_deps.sql
@ -0,0 +1,210 @@
+-- from http://www.depesz.com/index.php/2010/04/19/getting-unique-elements/
+
+CREATE TEMP TABLE articles (
+    id int CONSTRAINT articles_pkey PRIMARY KEY,
+    keywords text,
+    title text UNIQUE NOT NULL,
+    body text UNIQUE,
+    created date
+);
+
+CREATE TEMP TABLE articles_in_category (
+    article_id int,
+    category_id int,
+    changed date,
+    PRIMARY KEY (article_id, category_id)
+);
+
+-- test functional dependencies based on primary keys/unique constraints
+
+-- base tables
+
+-- group by primary key (OK)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+-- group by unique not null (fail/todo)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY title;
+
+-- group by unique nullable (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+
+-- group by something else (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY keywords;
+
+-- multiple tables
+
+-- group by primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+
+-- JOIN syntax
+
+-- group by left table's primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+
+-- group by right table's (composite) primary key (OK)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.category_id, aic.article_id;
+
+-- group by right table's partial primary key (fail)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id;
+
+
+-- example from documentation
+
+CREATE TEMP TABLE products (product_id int, name text, price numeric);
+CREATE TEMP TABLE sales (product_id int, units int);
+
+-- OK
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id, p.name, p.price;
+
+-- fail
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+
+ALTER TABLE products ADD PRIMARY KEY (product_id);
+
+-- OK now
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+
+
+-- Drupal example, http://drupal.org/node/555530
+
+CREATE TEMP TABLE node (
+    nid SERIAL,
+    vid integer NOT NULL default '0',
+    type varchar(32) NOT NULL default '',
+    title varchar(128) NOT NULL default '',
+    uid integer NOT NULL default '0',
+    status integer NOT NULL default '1',
+    created integer NOT NULL default '0',
+    -- snip
+    PRIMARY KEY (nid, vid)
+);
+
+CREATE TEMP TABLE users (
+    uid integer NOT NULL default '0',
+    name varchar(60) NOT NULL default '',
+    pass varchar(32) NOT NULL default '',
+    -- snip
+    PRIMARY KEY (uid),
+    UNIQUE (name)
+);
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid, u.name;
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid;
+
+
+-- Check views and dependencies
+
+-- fail
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+
+-- OK
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+-- fail
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+
+DROP VIEW fdv1;
+
+
+-- multiple dependencies
+CREATE TEMP VIEW fdv2 AS
+SELECT a.id, a.keywords, a.title, aic.category_id, aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id, aic.category_id, aic.article_id;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ALTER TABLE articles_in_category DROP CONSTRAINT articles_in_category_pkey RESTRICT; --fail
+
+DROP VIEW fdv2;
+
+
+-- nested queries
+
+CREATE TEMP VIEW fdv3 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id
+UNION
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+
+DROP VIEW fdv3;
+
+
+CREATE TEMP VIEW fdv4 AS
+SELECT * FROM articles WHERE title IN (SELECT title FROM articles GROUP BY id);
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+
+DROP VIEW fdv4;
+
+
+-- prepared query plans: this results in failure on reuse
+
+PREPARE foo AS
+  SELECT id, keywords, title, body, created
+  FROM articles
+  GROUP BY id;
+
+EXECUTE foo;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+
+EXECUTE foo;  -- fail