Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,

but seem like a separate patch since most of the remaining work is on the
executor side.)  I took the opportunity to push selection of the grouping
operators for set operations into the parser where it belongs.  Otherwise this
is just a small exercise in making prepunion.c consider both alternatives.

As with the recent DISTINCT patch, this means we can UNION on datatypes that
can hash but not sort, and it means that UNION without ORDER BY is no longer
certain to produce sorted output.
This commit is contained in:
Tom Lane 2008-08-07 01:11:52 +00:00
parent 3d40d5e70e
commit 2d1d96b1ce
20 changed files with 462 additions and 212 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1597,6 +1597,15 @@ find_expr_references_walker(Node *node,
context->rtables = list_delete_first(context->rtables);
return result;
}
else if (IsA(node, SetOperationStmt))
{
SetOperationStmt *setop = (SetOperationStmt *) node;
/* we need to look at the groupClauses for operator references */
find_expr_references_walker((Node *) setop->groupClauses, context);
/* fall through to examine child nodes */
}
return expression_tree_walker(node, find_expr_references_walker,
(void *) context);
}

View File

@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1943,6 +1943,7 @@ _copySetOperationStmt(SetOperationStmt *from)
COPY_NODE_FIELD(rarg);
COPY_NODE_FIELD(colTypes);
COPY_NODE_FIELD(colTypmods);
COPY_NODE_FIELD(groupClauses);
return newnode;
}

View File

@ -18,7 +18,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -839,6 +839,7 @@ _equalSetOperationStmt(SetOperationStmt *a, SetOperationStmt *b)
COMPARE_NODE_FIELD(rarg);
COMPARE_NODE_FIELD(colTypes);
COMPARE_NODE_FIELD(colTypmods);
COMPARE_NODE_FIELD(groupClauses);
return true;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -1780,6 +1780,7 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node)
WRITE_NODE_FIELD(rarg);
WRITE_NODE_FIELD(colTypes);
WRITE_NODE_FIELD(colTypmods);
WRITE_NODE_FIELD(groupClauses);
}
static void

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.212 2008/08/07 01:11:49 tgl Exp $
*
* NOTES
* Path and Plan nodes do not have any readfuncs support, because we
@ -232,6 +232,7 @@ _readSetOperationStmt(void)
READ_NODE_FIELD(rarg);
READ_NODE_FIELD(colTypes);
READ_NODE_FIELD(colTypmods);
READ_NODE_FIELD(groupClauses);
READ_DONE();
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static void preprocess_groupclause(PlannerInfo *root);
static Oid *extract_grouping_ops(List *groupClause);
static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
static bool grouping_is_sortable(List *groupClause);
static bool grouping_is_hashable(List *groupClause);
static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/*
* If there's a top-level ORDER BY, assume we have to fetch all the
* tuples. This might seem too simplistic given all the hackery below
* to possibly avoid the sort ... but a nonzero tuple_fraction is only
* of use to plan_set_operations() when the setop is UNION ALL, and
* the result of UNION ALL is always unsorted.
* tuples. This might be too simplistic given all the hackery below
* to possibly avoid the sort; but the odds of accurate estimates
* here are pretty low anyway.
*/
if (parse->sortClause)
tuple_fraction = 0.0;
@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*/
Assert(parse->commandType == CMD_SELECT);
tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
tlist);
/*
* Can't handle FOR UPDATE/SHARE here (parser should have checked
@ -1714,100 +1710,6 @@ preprocess_groupclause(PlannerInfo *root)
parse->groupClause = new_groupclause;
}
/*
* extract_grouping_ops - make an array of the equality operator OIDs
* for a SortGroupClause list
*/
static Oid *
extract_grouping_ops(List *groupClause)
{
int numCols = list_length(groupClause);
int colno = 0;
Oid *groupOperators;
ListCell *glitem;
groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
groupOperators[colno] = groupcl->eqop;
Assert(OidIsValid(groupOperators[colno]));
colno++;
}
return groupOperators;
}
/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
*/
static AttrNumber *
extract_grouping_cols(List *groupClause, List *tlist)
{
AttrNumber *grpColIdx;
int numCols = list_length(groupClause);
int colno = 0;
ListCell *glitem;
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
grpColIdx[colno++] = tle->resno;
}
return grpColIdx;
}
/*
* grouping_is_sortable - is it possible to implement grouping list by sorting?
*
* This is easy since the parser will have included a sortop if one exists.
*/
static bool
grouping_is_sortable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!OidIsValid(groupcl->sortop))
return false;
}
return true;
}
/*
* grouping_is_hashable - is it possible to implement grouping list by hashing?
*
* We assume hashing is OK if the equality operators are marked oprcanhash.
* (If there isn't actually a supporting hash function, the executor will
* complain at runtime; but this is a misdeclaration of the operator, not
* a system bug.)
*/
static bool
grouping_is_hashable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!op_hashjoinable(groupcl->eqop))
return false;
}
return true;
}
/*
* choose_hashed_grouping - should we use hashed grouping?
*

View File

@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,8 +32,12 @@
#include "access/heapam.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root,
double tuple_fraction,
SetOperationStmt *top_union,
List *refnames_tlist);
static Plan *make_union_unique(SetOperationStmt *op, Plan *plan,
PlannerInfo *root, double tuple_fraction,
List **sortClauses);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Plan *input_plan,
double tuple_fraction, double dNumDistinctRows,
const char *construct);
static List *generate_setop_tlist(List *colTypes, int flag,
Index varno,
bool hack_constants,
@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
static List *generate_append_tlist(List *colTypes, bool flag,
List *input_plans,
List *refnames_tlist);
static List *generate_setop_sortlist(List *targetlist);
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
Index rti);
static void make_inh_translation_lists(Relation oldrelation,
@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist,
* top level has already been factored into tuple_fraction.
*
* *sortClauses is an output argument: it is set to a list of SortGroupClauses
* representing the result ordering of the topmost set operation.
* representing the result ordering of the topmost set operation. (This will
* be NIL if the output isn't ordered.)
*/
Plan *
plan_set_operations(PlannerInfo *root, double tuple_fraction,
@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
/*
* If any of my children are identical UNION nodes (same op, all-flag, and
* colTypes) then they can be merged into this node so that we generate
* only one Append and Sort for the lot. Recurse to find such nodes and
* compute their children's plans.
* only one Append and unique-ification for the lot. Recurse to find such
* nodes and compute their children's plans.
*/
planlist = list_concat(recurse_union_children(op->larg, root,
tuple_fraction,
@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
/*
* For UNION ALL, we just need the Append plan. For UNION, need to add
* Sort and Unique nodes to produce unique output.
* node(s) to remove duplicates.
*/
if (!op->all)
{
List *sortList;
sortList = generate_setop_sortlist(tlist);
if (sortList)
{
plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
plan = (Plan *) make_unique(plan, sortList);
}
*sortClauses = sortList;
}
if (op->all)
*sortClauses = NIL; /* result of UNION ALL is always unsorted */
else
*sortClauses = NIL;
plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses);
return plan;
}
@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
*rplan,
*plan;
List *tlist,
*sortList,
*groupList,
*planlist,
*child_sortclauses;
SetOpCmd cmd;
@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
*/
plan = (Plan *) make_append(planlist, false, tlist);
/*
* Sort the child results, then add a SetOp plan node to generate the
* correct output.
*/
sortList = generate_setop_sortlist(tlist);
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
if (sortList == NIL) /* nothing to sort on? */
/* punt if nothing to group on (can this happen?) */
if (groupList == NIL)
{
*sortClauses = NIL;
return plan;
}
plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
/*
* Decide whether to hash or sort, and add a sort node if needed.
*/
plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
/*
* Finally, add a SetOp plan node to generate the correct output.
*/
switch (op->op)
{
case SETOP_INTERSECT:
@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
break;
default:
elog(ERROR, "unrecognized set op: %d",
(int) op->op);
elog(ERROR, "unrecognized set op: %d", (int) op->op);
cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
break;
}
plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1);
plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
*sortClauses = sortList;
*sortClauses = groupList;
return plan;
}
@ -465,6 +471,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
&child_sortclauses));
}
/*
* Add nodes to the given plan tree to unique-ify the result of a UNION.
*/
static Plan *
make_union_unique(SetOperationStmt *op, Plan *plan,
PlannerInfo *root, double tuple_fraction,
List **sortClauses)
{
List *groupList;
double dNumDistinctRows;
long numDistinctRows;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, plan->targetlist);
/* punt if nothing to group on (can this happen?) */
if (groupList == NIL)
{
*sortClauses = NIL;
return plan;
}
/*
* XXX for the moment, take the number of distinct groups as being the
* total input size, ie, the worst case. This is too conservative, but
* we don't want to risk having the hashtable overrun memory; also,
* it's not clear how to get a decent estimate of the true size. One
* should note as well the propensity of novices to write UNION rather
* than UNION ALL even when they don't expect any duplicates...
*/
dNumDistinctRows = plan->plan_rows;
/* Also convert to long int --- but 'ware overflow! */
numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
/* Decide whether to hash or sort */
if (choose_hashed_setop(root, groupList, plan,
tuple_fraction, dNumDistinctRows,
"UNION"))
{
/* Hashed aggregate plan --- no sort needed */
plan = (Plan *) make_agg(root,
plan->targetlist,
NIL,
AGG_HASHED,
list_length(groupList),
extract_grouping_cols(groupList,
plan->targetlist),
extract_grouping_ops(groupList),
numDistinctRows,
0,
plan);
/* Hashed aggregation produces randomly-ordered results */
*sortClauses = NIL;
}
else
{
/* Sort and Unique */
plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
plan = (Plan *) make_unique(plan, groupList);
plan->plan_rows = dNumDistinctRows;
/* We know the sort order of the result */
*sortClauses = groupList;
}
return plan;
}
/*
* choose_hashed_setop - should we use hashing for a set operation?
*/
static bool
choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Plan *input_plan,
double tuple_fraction, double dNumDistinctRows,
const char *construct)
{
int numDistinctCols = list_length(groupClauses);
bool can_sort;
bool can_hash;
Size hashentrysize;
List *needed_pathkeys;
Path hashed_p;
Path sorted_p;
/* Check whether the operators support sorting or hashing */
can_sort = grouping_is_sortable(groupClauses);
can_hash = grouping_is_hashable(groupClauses);
if (can_hash && can_sort)
{
/* we have a meaningful choice to make, continue ... */
}
else if (can_hash)
return true;
else if (can_sort)
return false;
else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/* translator: %s is UNION, INTERSECT, or EXCEPT */
errmsg("could not implement %s", construct),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/* Prefer sorting when enable_hashagg is off */
if (!enable_hashagg)
return false;
/*
* Don't do it if it doesn't look like the hashtable will fit into
* work_mem.
*/
hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
return false;
/*
* See if the estimated cost is no more than doing it the other way.
*
* We need to consider input_plan + hashagg versus input_plan + sort +
* group. Note that the actual result plan might involve a SetOp or
* Unique node, not Agg or Group, but the cost estimates for Agg and Group
* should be close enough for our purposes here.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
*/
cost_agg(&hashed_p, root, AGG_HASHED, 0,
numDistinctCols, dNumDistinctRows,
input_plan->startup_cost, input_plan->total_cost,
input_plan->plan_rows);
/*
* Now for the sorted case. Note that the input is *always* unsorted,
* since it was made by appending unrelated sub-relations together.
*/
sorted_p.startup_cost = input_plan->startup_cost;
sorted_p.total_cost = input_plan->total_cost;
/* XXX this is more expensive than cost_sort really needs: */
needed_pathkeys = make_pathkeys_for_sortclauses(root,
groupClauses,
input_plan->targetlist,
true);
cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost,
input_plan->plan_rows, input_plan->plan_width, -1.0);
cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
sorted_p.startup_cost, sorted_p.total_cost,
input_plan->plan_rows);
/*
* Now make the decision using the top-level tuple fraction. First we
* have to convert an absolute count (LIMIT) into fractional form.
*/
if (tuple_fraction >= 1.0)
tuple_fraction /= dNumDistinctRows;
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
tuple_fraction) < 0)
{
/* Hashed is cheaper, so use it */
return true;
}
return false;
}
/*
* Generate targetlist for a set-operation plan node
*
@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag,
}
/*
* generate_setop_sortlist
* Build a SortGroupClause list enumerating all the non-resjunk
* tlist entries, using default ordering properties.
* generate_setop_grouplist
* Build a SortGroupClause list defining the sort/grouping properties
* of the setop's output columns.
*
* For now, we require all the items to be sortable. Eventually we
* should implement hashing setops and allow hash-only datatypes.
* Parse analysis already determined the properties and built a suitable
* list, except that the entries do not have sortgrouprefs set because
* the parser output representation doesn't include a tlist for each
* setop. So what we need to do here is copy that list and install
* proper sortgrouprefs into it and into the targetlist.
*/
static List *
generate_setop_sortlist(List *targetlist)
generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
{
List *sortlist = NIL;
ListCell *l;
List *grouplist = (List *) copyObject(op->groupClauses);
ListCell *lg;
ListCell *lt;
Index refno = 1;
foreach(l, targetlist)
lg = list_head(grouplist);
foreach(lt, targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
TargetEntry *tle = (TargetEntry *) lfirst(lt);
SortGroupClause *sgc;
if (!tle->resjunk)
sortlist = addTargetToGroupList(NULL, tle,
sortlist, targetlist,
true, /* XXX fixme someday */
false);
/* tlist shouldn't have any sortgrouprefs yet */
Assert(tle->ressortgroupref == 0);
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* non-resjunk columns should have grouping clauses */
Assert(lg != NULL);
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(lg);
Assert(sgc->tleSortGroupRef == 0);
/* we could use assignSortGroupRef here, but seems a bit silly */
sgc->tleSortGroupRef = tle->ressortgroupref = refno++;
}
return sortlist;
Assert(lg == NULL);
return grouplist;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node,
return true;
if (walker(setop->rarg, context))
return true;
/* groupClauses are deemed uninteresting */
}
break;
case T_InClauseInfo:
@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node,
FLATCOPY(newnode, setop, SetOperationStmt);
MUTATE(newnode->larg, setop->larg, Node *);
MUTATE(newnode->rarg, setop->rarg, Node *);
/* We do not mutate groupClauses by default */
return (Node *) newnode;
}
break;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -24,7 +24,6 @@
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "utils/selfuncs.h"
#include "utils/lsyscache.h"
@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
/*
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
* except with ALL.
*
* XXX this code knows that prepunion.c will adopt the default sort/group
* operators for each column datatype to determine uniqueness. It'd
* probably be better if these operators were chosen at parse time and
* stored into the parsetree, instead of leaving bits of the planner to
* decide semantics.
*/
if (query->setOperations)
{
@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
if (!topop->all)
{
ListCell *lg;
/* We're good if all the nonjunk output columns are in colnos */
lg = list_head(topop->groupClauses);
foreach(l, query->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
Oid tle_eq_opr;
SortGroupClause *sgc;
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* non-resjunk columns should have grouping clauses */
Assert(lg != NULL);
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(lg);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid))
break; /* exit early if no match */
/* check for compatible semantics */
get_sort_group_operators(exprType((Node *) tle->expr),
false, false, false,
NULL, &tle_eq_opr, NULL);
if (!OidIsValid(tle_eq_opr) ||
!equality_ops_are_compatible(opid, tle_eq_opr))
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -18,6 +18,7 @@
#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "parser/parse_expr.h"
#include "utils/lsyscache.h"
/*****************************************************************************
@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
}
/*****************************************************************************
* Functions to extract data from a list of SortGroupClauses
*
* These don't really belong in tlist.c, but they are sort of related to the
* functions just above, and they don't seem to deserve their own file.
*****************************************************************************/
/*
* extract_grouping_ops - make an array of the equality operator OIDs
* for a SortGroupClause list
*/
Oid *
extract_grouping_ops(List *groupClause)
{
int numCols = list_length(groupClause);
int colno = 0;
Oid *groupOperators;
ListCell *glitem;
groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
groupOperators[colno] = groupcl->eqop;
Assert(OidIsValid(groupOperators[colno]));
colno++;
}
return groupOperators;
}
/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
*/
AttrNumber *
extract_grouping_cols(List *groupClause, List *tlist)
{
AttrNumber *grpColIdx;
int numCols = list_length(groupClause);
int colno = 0;
ListCell *glitem;
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
grpColIdx[colno++] = tle->resno;
}
return grpColIdx;
}
/*
* grouping_is_sortable - is it possible to implement grouping list by sorting?
*
* This is easy since the parser will have included a sortop if one exists.
*/
bool
grouping_is_sortable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!OidIsValid(groupcl->sortop))
return false;
}
return true;
}
/*
* grouping_is_hashable - is it possible to implement grouping list by hashing?
*
* We assume hashing is OK if the equality operators are marked oprcanhash.
* (If there isn't actually a supporting hash function, the executor will
* complain at runtime; but this is a misdeclaration of the operator, not
* a system bug.)
*/
bool
grouping_is_hashable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!op_hashjoinable(groupcl->eqop))
return false;
}
return true;
}
/*
* Does tlist have same output datatypes as listed in colTypes?
*

View File

@ -17,7 +17,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.376 2008/08/07 01:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -33,6 +33,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parsetree.h"
@ -1326,6 +1327,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt)
op->colTypes = NIL;
op->colTypmods = NIL;
op->groupClauses = NIL;
/* don't have a "foreach4", so chase two of the lists by hand */
lcm = list_head(lcoltypmods);
rcm = list_head(rcoltypmods);
@ -1349,6 +1351,31 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt)
op->colTypes = lappend_oid(op->colTypes, rescoltype);
op->colTypmods = lappend_int(op->colTypmods, rescoltypmod);
/*
* For all cases except UNION ALL, identify the grouping operators
* (and, if available, sorting operators) that will be used to
* eliminate duplicates.
*/
if (op->op != SETOP_UNION || !op->all)
{
SortGroupClause *grpcl = makeNode(SortGroupClause);
Oid sortop;
Oid eqop;
/* determine the eqop and optional sortop */
get_sort_group_operators(rescoltype,
false, true, false,
&sortop, &eqop, NULL);
/* we don't have a tlist yet, so can't assign sortgrouprefs */
grpcl->tleSortGroupRef = 0;
grpcl->eqop = eqop;
grpcl->sortop = sortop;
grpcl->nulls_first = false; /* OK with or without sortop */
op->groupClauses = lappend(op->groupClauses, grpcl);
}
lcm = lnext(lcm);
rcm = lnext(rcm);
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.175 2008/08/07 01:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -70,6 +70,9 @@ static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist,
SortByDir sortby_dir, SortByNulls sortby_nulls,
List *sortby_opname, bool resolveUnknown);
static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist,
bool resolveUnknown);
/*
@ -1355,7 +1358,7 @@ transformGroupClause(ParseState *pstate, List *grouplist,
if (!found)
result = addTargetToGroupList(pstate, tle,
result, *targetlist,
false, true);
true);
}
return result;
@ -1456,7 +1459,7 @@ transformDistinctClause(ParseState *pstate,
continue; /* ignore junk */
result = addTargetToGroupList(pstate, tle,
result, *targetlist,
false, true);
true);
}
return result;
@ -1551,7 +1554,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
result = addTargetToGroupList(pstate, tle,
result, *targetlist,
false, true);
true);
}
return result;
@ -1679,10 +1682,6 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
* the TLE is considered "already in the list" if it appears there with any
* sorting semantics.
*
* If requireSortOp is TRUE, we require a sorting operator to be found too.
* XXX this argument should eventually be obsolete, but for now there are
* parts of the system that can't support non-sortable grouping lists.
*
* If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not,
* do nothing (which implies the search for an equality operator will fail).
* pstate should be provided if resolveUnknown is TRUE, but can be NULL
@ -1690,10 +1689,10 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
*
* Returns the updated SortGroupClause list.
*/
List *
static List *
addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist,
bool requireSortOp, bool resolveUnknown)
bool resolveUnknown)
{
Oid restype = exprType((Node *) tle->expr);
Oid sortop;
@ -1716,7 +1715,7 @@ addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
/* determine the eqop and optional sortop */
get_sort_group_operators(restype,
requireSortOp, true, false,
false, true, false,
&sortop, &eqop, NULL);
grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);

View File

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.475 2008/08/05 12:09:30 mha Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.476 2008/08/07 01:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200808051
#define CATALOG_VERSION_NO 200808061
#endif

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.371 2008/08/07 01:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -797,7 +797,12 @@ typedef struct SelectStmt
* top-level Query node containing the leaf SELECTs as subqueries in its
* range table. Its setOperations field shows the tree of set operations,
* with leaf SelectStmt nodes replaced by RangeTblRef nodes, and internal
* nodes replaced by SetOperationStmt nodes.
* nodes replaced by SetOperationStmt nodes. Information about the output
* column types is added, too. (Note that the child nodes do not necessarily
* produce these types directly, but we've checked that their output types
* can be coerced to the output column type.) Also, if it's not UNION ALL,
* information about the types' sort/group semantics is provided in the form
* of a SortGroupClause list (same representation as, eg, DISTINCT).
* ----------------------
*/
typedef struct SetOperationStmt
@ -812,6 +817,8 @@ typedef struct SetOperationStmt
/* Fields derived during parse analysis: */
List *colTypes; /* OID list of output column type OIDs */
List *colTypmods; /* integer list of output column typmods */
List *groupClauses; /* a list of SortGroupClause's */
/* groupClauses is NIL if UNION ALL, but must be set otherwise */
} SetOperationStmt;

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.51 2008/08/07 01:11:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,6 +32,11 @@ extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause,
extern List *get_sortgrouplist_exprs(List *sgClauses,
List *targetList);
extern Oid *extract_grouping_ops(List *groupClause);
extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
extern bool grouping_is_sortable(List *groupClause);
extern bool grouping_is_hashable(List *groupClause);
extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK);
#endif /* TLIST_H */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $
* $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.52 2008/08/07 01:11:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -35,9 +35,6 @@ extern List *transformDistinctClause(ParseState *pstate,
extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
List **targetlist, List *sortClause);
extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist,
bool requireSortOp, bool resolveUnknown);
extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);

View File

@ -73,7 +73,7 @@ copy (select * from test1 join test2 using (id)) to stdout;
--
-- Test UNION SELECT
--
copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout;
copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout;
a
v_a
v_b
@ -83,7 +83,7 @@ v_e
--
-- Test subselect
--
copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout;
copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout;
a
v_a
v_b

View File

@ -102,7 +102,7 @@ SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3;
3
(3 rows)
SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8;
SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1;
two
-----
1.1
@ -129,7 +129,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2);
--
SELECT f1 AS five FROM FLOAT8_TBL
UNION
SELECT f1 FROM FLOAT8_TBL;
SELECT f1 FROM FLOAT8_TBL
ORDER BY 1;
five
-----------------------
-1.2345678901234e+200
@ -158,7 +159,8 @@ SELECT f1 FROM FLOAT8_TBL;
SELECT f1 AS nine FROM FLOAT8_TBL
UNION
SELECT f1 FROM INT4_TBL;
SELECT f1 FROM INT4_TBL
ORDER BY 1;
nine
-----------------------
-1.2345678901234e+200
@ -205,7 +207,8 @@ SELECT f1 FROM INT4_TBL
SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL
UNION
SELECT f1 FROM CHAR_TBL;
SELECT f1 FROM CHAR_TBL
ORDER BY 1;
three
-------
a
@ -215,7 +218,8 @@ SELECT f1 FROM CHAR_TBL;
SELECT f1 AS three FROM VARCHAR_TBL
UNION
SELECT CAST(f1 AS varchar) FROM CHAR_TBL;
SELECT CAST(f1 AS varchar) FROM CHAR_TBL
ORDER BY 1;
three
-------
a
@ -242,7 +246,8 @@ SELECT f1 AS five FROM TEXT_TBL
UNION
SELECT f1 FROM VARCHAR_TBL
UNION
SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL;
SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL
ORDER BY 1;
five
-------------------
a

View File

@ -53,11 +53,11 @@ copy (select * from test1 join test2 using (id)) to stdout;
--
-- Test UNION SELECT
--
copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout;
copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout;
--
-- Test subselect
--
copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout;
copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout;
--
-- Test headers, CSV and quotes
--

View File

@ -34,7 +34,7 @@ SELECT 1.0::float8 AS two UNION ALL SELECT 1;
SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3;
SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8;
SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1;
SELECT 1.1 AS three UNION SELECT 2 UNION ALL SELECT 2;
@ -46,7 +46,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2);
SELECT f1 AS five FROM FLOAT8_TBL
UNION
SELECT f1 FROM FLOAT8_TBL;
SELECT f1 FROM FLOAT8_TBL
ORDER BY 1;
SELECT f1 AS ten FROM FLOAT8_TBL
UNION ALL
@ -54,7 +55,8 @@ SELECT f1 FROM FLOAT8_TBL;
SELECT f1 AS nine FROM FLOAT8_TBL
UNION
SELECT f1 FROM INT4_TBL;
SELECT f1 FROM INT4_TBL
ORDER BY 1;
SELECT f1 AS ten FROM FLOAT8_TBL
UNION ALL
@ -68,11 +70,13 @@ SELECT f1 FROM INT4_TBL
SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL
UNION
SELECT f1 FROM CHAR_TBL;
SELECT f1 FROM CHAR_TBL
ORDER BY 1;
SELECT f1 AS three FROM VARCHAR_TBL
UNION
SELECT CAST(f1 AS varchar) FROM CHAR_TBL;
SELECT CAST(f1 AS varchar) FROM CHAR_TBL
ORDER BY 1;
SELECT f1 AS eight FROM VARCHAR_TBL
UNION ALL
@ -82,7 +86,8 @@ SELECT f1 AS five FROM TEXT_TBL
UNION
SELECT f1 FROM VARCHAR_TBL
UNION
SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL;
SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL
ORDER BY 1;
--
-- INTERSECT and EXCEPT