Allow planner to use Merge Append to efficiently implement UNION

Until now, UNION queries have often been suboptimal as the planner has
only ever considered using an Append node and making the results unique
by either using a Hash Aggregate, or by Sorting the entire Append result
and running it through the Unique operator.  Both of these methods
always require reading all rows from the union subqueries.

Here we adjust the union planner so that it can request that each subquery
produce results in target list order so that these can be Merge Appended
together and made unique with a Unique node.  This can improve performance
significantly as the union child can make use of the likes of btree
indexes and/or Merge Joins to provide the top-level UNION with presorted
input.  This is especially good if the top-level UNION contains a LIMIT
node that limits the output rows to a small subset of the unioned rows as
cheap startup plans can be used.

Author: David Rowley
Reviewed-by: Richard Guo, Andy Fan
Discussion: https://postgr.es/m/CAApHDvpb_63XQodmxKUF8vb9M7CxyUyT4sWvEgqeQU-GB7QFoQ@mail.gmail.com
This commit is contained in:
David Rowley 2024-03-25 14:31:14 +13:00
parent 47f99a407d
commit 66c0185a3d
15 changed files with 707 additions and 253 deletions

View File

@ -11495,6 +11495,10 @@ DROP INDEX base_tbl1_idx;
DROP INDEX base_tbl2_idx;
DROP INDEX async_p3_idx;
-- UNION queries
SET enable_sort TO off;
SET enable_incremental_sort TO off;
-- Adjust fdw_startup_cost so that we get an unordered path in the Append.
ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@ -11576,6 +11580,9 @@ SELECT * FROM result_tbl ORDER BY a;
(12 rows)
DELETE FROM result_tbl;
RESET enable_incremental_sort;
RESET enable_sort;
ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-- Disable async execution if we use gating Result nodes for pseudoconstant
-- quals
EXPLAIN (VERBOSE, COSTS OFF)

View File

@ -3877,6 +3877,11 @@ DROP INDEX base_tbl2_idx;
DROP INDEX async_p3_idx;
-- UNION queries
SET enable_sort TO off;
SET enable_incremental_sort TO off;
-- Adjust fdw_startup_cost so that we get an unordered path in the Append.
ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@ -3903,6 +3908,10 @@ UNION ALL
SELECT * FROM result_tbl ORDER BY a;
DELETE FROM result_tbl;
RESET enable_incremental_sort;
RESET enable_sort;
ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-- Disable async execution if we use gating Result nodes for pseudoconstant
-- quals
EXPLAIN (VERBOSE, COSTS OFF)

View File

@ -2867,6 +2867,67 @@ add_child_join_rel_equivalences(PlannerInfo *root,
MemoryContextSwitchTo(oldcontext);
}
/*
* add_setop_child_rel_equivalences
* Add equivalence members for each non-resjunk target in 'child_tlist'
* to the EquivalenceClass in the corresponding setop_pathkey's pk_class.
*
* 'root' is the PlannerInfo belonging to the top-level set operation.
* 'child_rel' is the RelOptInfo of the child relation we're adding
* EquivalenceMembers for.
* 'child_tlist' is the target list for the setop child relation. The target
* list expressions are what we add as EquivalenceMembers.
* 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
* non-resjunk target in 'child_tlist'.
*/
void
add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
List *child_tlist, List *setop_pathkeys)
{
ListCell *lc;
ListCell *lc2 = list_head(setop_pathkeys);
foreach(lc, child_tlist)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);
EquivalenceMember *parent_em;
PathKey *pk;
if (tle->resjunk)
continue;
if (lc2 == NULL)
elog(ERROR, "too few pathkeys for set operation");
pk = lfirst_node(PathKey, lc2);
parent_em = linitial(pk->pk_eclass->ec_members);
/*
* We can safely pass the parent member as the first member in the
* ec_members list as this is added first in generate_union_paths,
* likewise, the JoinDomain can be that of the initial member of the
* Pathkey's EquivalenceClass.
*/
add_eq_member(pk->pk_eclass,
tle->expr,
child_rel->relids,
parent_em->em_jdomain,
parent_em,
exprType((Node *) tle->expr));
lc2 = lnext(setop_pathkeys, lc2);
}
/*
* transformSetOperationStmt() ensures that the targetlist never contains
* any resjunk columns, so all eclasses that exist in 'root' must have
* received a new member in the loop above. Add them to the child_rel's
* eclass_indexes.
*/
child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
list_length(root->eq_classes) - 1);
}
/*
* generate_implied_equalities_for_column

View File

@ -2191,6 +2191,22 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
return n;
}
/*
* pathkeys_useful_for_setop
* Count the number of leading common pathkeys root's 'setop_pathkeys' in
* 'pathkeys'.
*/
static int
pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
{
int n_common_pathkeys;
(void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
&n_common_pathkeys);
return n_common_pathkeys;
}
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
@ -2208,6 +2224,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
if (nuseful2 > nuseful)
nuseful = nuseful2;
nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;

View File

@ -54,6 +54,7 @@
#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
#include "parser/parse_clause.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "partitioning/partdesc.h"
@ -119,6 +120,8 @@ typedef struct
{
List *activeWindows; /* active windows, if any */
grouping_sets_data *gset_data; /* grouping sets data, if any */
SetOperationStmt *setop; /* parent set operation or NULL if not a
* subquery belonging to a set operation */
} standard_qp_extra;
/* Local functions */
@ -249,6 +252,8 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
List *targetList,
List *groupClause);
static int common_prefix_cmp(const void *a, const void *b);
static List *generate_setop_child_grouplist(SetOperationStmt *op,
List *targetlist);
/*****************************************************************************
@ -1500,6 +1505,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
qp_extra.activeWindows = activeWindows;
qp_extra.gset_data = gset_data;
/*
* Check if we're a subquery for a set operation. If we are, store
* the SetOperationStmt in qp_extra.
*/
if (root->parent_root != NULL &&
root->parent_root->parse->setOperations != NULL &&
IsA(root->parent_root->parse->setOperations, SetOperationStmt))
qp_extra.setop =
(SetOperationStmt *) root->parent_root->parse->setOperations;
else
qp_extra.setop = NULL;
/*
* Generate the best unsorted and presorted paths for the scan/join
* portion of this Query, ie the processing represented by the
@ -3433,6 +3450,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
parse->sortClause,
tlist);
/* setting setop_pathkeys might be useful to the union planner */
if (qp_extra->setop != NULL &&
set_operation_ordered_results_useful(qp_extra->setop))
{
List *groupClauses;
bool sortable;
groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
root->setop_pathkeys =
make_pathkeys_for_sortclauses_extended(root,
&groupClauses,
tlist,
false,
&sortable);
if (!sortable)
root->setop_pathkeys = NIL;
}
else
root->setop_pathkeys = NIL;
/*
* Figure out whether we want a sorted result from query_planner.
*
@ -3442,7 +3480,9 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
* we try to produce output that's sufficiently well sorted for the
* DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
* by the ORDER BY clause.
* by the ORDER BY clause. Otherwise, if we're a subquery being planned
* for a set operation which can benefit from presorted results and have a
* sortable targetlist, we want to sort by the target list.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
* of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@ -3460,6 +3500,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else if (root->setop_pathkeys != NIL)
root->query_pathkeys = root->setop_pathkeys;
else
root->query_pathkeys = NIL;
}
@ -7866,3 +7908,44 @@ group_by_has_partkey(RelOptInfo *input_rel,
return true;
}
/*
* generate_setop_child_grouplist
* Build a SortGroupClause list defining the sort/grouping properties
* of the child of a set operation.
*
* This is similar to generate_setop_grouplist() but differs as the setop
* child query's targetlist entries may already have a tleSortGroupRef
* assigned for other purposes, such as GROUP BYs. Here we keep the
* SortGroupClause list in the same order as 'op' groupClauses and just adjust
* the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
*/
static List *
generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
{
List *grouplist = copyObject(op->groupClauses);
ListCell *lg;
ListCell *lt;
lg = list_head(grouplist);
foreach(lt, targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lt);
SortGroupClause *sgc;
/* resjunk columns could have sortgrouprefs. Leave these alone */
if (tle->resjunk)
continue;
/* we expect every non-resjunk target to have a SortGroupClause */
Assert(lg != NULL);
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(grouplist, lg);
/* assign a tleSortGroupRef, or reuse the existing one */
sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
tle->ressortgroupref = sgc->tleSortGroupRef;
}
Assert(lg == NULL);
return grouplist;
}

View File

@ -43,11 +43,15 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
double *pNumGroups);
bool *istrivial_tlist);
static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
bool trivial_tlist, List *child_tlist,
List *interesting_pathkeys,
double *pNumGroups);
static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
@ -57,9 +61,8 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
static List *plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
List **tlist_list);
static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
PlannerInfo *root);
List **tlist_list,
List **istrivial_tlist);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Path *input_path,
@ -114,10 +117,10 @@ plan_set_operations(PlannerInfo *root)
Assert(parse->distinctClause == NIL);
/*
* In the outer query level, we won't have any true equivalences to deal
* with; but we do want to be able to make pathkeys, which will require
* single-member EquivalenceClasses. Indicate that EC merging is complete
* so that pathkeys.c won't complain.
* In the outer query level, equivalence classes are limited to classes
* which define that the top-level target entry is equivalent to the
* corresponding child target entry. There won't be any equivalence class
* merging. Mark that merging is complete to allow us to make pathkeys.
*/
Assert(root->eq_classes == NIL);
root->ec_merging_done = true;
@ -152,6 +155,8 @@ plan_set_operations(PlannerInfo *root)
}
else
{
bool trivial_tlist;
/*
* Recurse on setOperations tree to generate paths for set ops. The
* final output paths should have just the column types shown as the
@ -163,7 +168,7 @@ plan_set_operations(PlannerInfo *root)
true, -1,
leftmostQuery->targetList,
&top_tlist,
NULL);
&trivial_tlist);
}
/* Must return the built tlist into root->processed_tlist. */
@ -172,6 +177,31 @@ plan_set_operations(PlannerInfo *root)
return setop_rel;
}
/*
* set_operation_ordered_results_useful
* Return true if the given SetOperationStmt can be executed by utilizing
* paths that provide sorted input according to the setop's targetlist.
* Returns false when sorted paths are not any more useful then unsorted
* ones.
*/
bool
set_operation_ordered_results_useful(SetOperationStmt *setop)
{
/*
* Paths sorted by the targetlist are useful for UNION as we can opt to
* MergeAppend the sorted paths then Unique them. Ordered paths are no
* more useful than unordered ones for UNION ALL.
*/
if (!setop->all && setop->op == SETOP_UNION)
return true;
/*
* EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
* correctly sorted input paths.
*/
return false;
}
/*
* recurse_set_operations
* Recursively handle one step in a tree of set operations
@ -184,8 +214,7 @@ plan_set_operations(PlannerInfo *root)
*
* Returns a RelOptInfo for the subtree, as well as these output parameters:
* *pTargetList: receives the fully-fledged tlist for the subtree's top plan
* *pNumGroups: if not NULL, we estimate the number of distinct groups
* in the result, and store it there
* *istrivial_tlist: true iif datatypes between parent and child match.
*
* The pTargetList output parameter is mostly redundant with the pathtarget
* of the returned RelOptInfo, but for the moment we need it because much of
@ -202,9 +231,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
double *pNumGroups)
bool *istrivial_tlist)
{
RelOptInfo *rel = NULL; /* keep compiler quiet */
RelOptInfo *rel;
*istrivial_tlist = true; /* for now */
/* Guard against stack overflow due to overly complex setop nests */
check_stack_depth();
@ -215,9 +246,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
Query *subquery = rte->subquery;
PlannerInfo *subroot;
RelOptInfo *final_rel;
Path *subpath;
Path *path;
List *tlist;
bool trivial_tlist;
@ -254,93 +282,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
/* Return the fully-fledged tlist to caller, too */
*pTargetList = tlist;
/*
* Mark rel with estimated output rows, width, etc. Note that we have
* to do this before generating outer-query paths, else
* cost_subqueryscan is not happy.
*/
set_subquery_size_estimates(root, rel);
/*
* Since we may want to add a partial path to this relation, we must
* set its consider_parallel flag correctly.
*/
final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
rel->consider_parallel = final_rel->consider_parallel;
/*
* For the moment, we consider only a single Path for the subquery.
* This should change soon (make it look more like
* set_subquery_pathlist).
*/
subpath = get_cheapest_fractional_path(final_rel,
root->tuple_fraction);
/*
* Stick a SubqueryScanPath atop that.
*
* We don't bother to determine the subquery's output ordering since
* it won't be reflected in the set-op result anyhow; so just label
* the SubqueryScanPath with nil pathkeys. (XXX that should change
* soon too, likely.)
*/
path = (Path *) create_subqueryscan_path(root, rel, subpath,
trivial_tlist,
NIL, NULL);
add_path(rel, path);
/*
* If we have a partial path for the child relation, we can use that
* to build a partial path for this relation. But there's no point in
* considering any path but the cheapest.
*/
if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
final_rel->partial_pathlist != NIL)
{
Path *partial_subpath;
Path *partial_path;
partial_subpath = linitial(final_rel->partial_pathlist);
partial_path = (Path *)
create_subqueryscan_path(root, rel, partial_subpath,
trivial_tlist,
NIL, NULL);
add_partial_path(rel, partial_path);
}
/*
* Estimate number of groups if caller wants it. If the subquery used
* grouping or aggregation, its output is probably mostly unique
* anyway; otherwise do statistical estimation.
*
* XXX you don't really want to know about this: we do the estimation
* using the subroot->parse's original targetlist expressions, not the
* subroot->processed_tlist which might seem more appropriate. The
* reason is that if the subquery is itself a setop, it may return a
* processed_tlist containing "varno 0" Vars generated by
* generate_append_tlist, and those would confuse estimate_num_groups
* mightily. We ought to get rid of the "varno 0" hack, but that
* requires a redesign of the parsetree representation of setops, so
* that there can be an RTE corresponding to each setop's output.
* Note, we use this not subquery's targetlist but subroot->parse's
* targetlist, because it was revised by self-join removal. subquery's
* targetlist might contain the references to the removed relids.
*/
if (pNumGroups)
{
if (subquery->groupClause || subquery->groupingSets ||
subquery->distinctClause ||
subroot->hasHavingQual || subquery->hasAggs)
*pNumGroups = subpath->rows;
else
*pNumGroups = estimate_num_groups(subroot,
get_tlist_exprs(subroot->parse->targetList, false),
subpath->rows,
NULL,
NULL);
}
*istrivial_tlist = trivial_tlist;
}
else if (IsA(setOp, SetOperationStmt))
{
@ -355,8 +297,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
rel = generate_nonunion_paths(op, root,
refnames_tlist,
pTargetList);
if (pNumGroups)
*pNumGroups = rel->rows;
/*
* If necessary, add a Result node to project the caller-requested
@ -386,6 +326,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
*pTargetList,
refnames_tlist,
&trivial_tlist);
*istrivial_tlist = trivial_tlist;
target = create_pathtarget(root, *pTargetList);
/* Apply projection to each path */
@ -416,16 +357,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
lfirst(lc) = path;
}
}
postprocess_setop_rel(root, rel);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
*pTargetList = NIL;
rel = NULL; /* keep compiler quiet */
}
postprocess_setop_rel(root, rel);
return rel;
}
@ -444,7 +385,9 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
Path *lpath;
Path *rpath;
List *lpath_tlist;
bool lpath_trivial_tlist;
List *rpath_tlist;
bool rpath_trivial_tlist;
List *tlist;
List *groupList;
double dNumGroups;
@ -464,7 +407,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
false, -1,
refnames_tlist,
&lpath_tlist,
NULL);
&lpath_trivial_tlist);
if (lrel->rtekind == RTE_SUBQUERY)
build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
NIL, NULL);
lpath = lrel->cheapest_total_path;
/* The right path will want to look at the left one ... */
root->non_recursive_path = lpath;
@ -473,7 +419,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
false, -1,
refnames_tlist,
&rpath_tlist,
NULL);
&rpath_trivial_tlist);
if (rrel->rtekind == RTE_SUBQUERY)
build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
NIL, NULL);
rpath = rrel->cheapest_total_path;
root->non_recursive_path = NULL;
@ -535,6 +484,207 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
return result_rel;
}
/*
* build_setop_child_paths
* Build paths for the set op child relation denoted by 'rel'.
*
* interesting_pathkeys: if not NIL, also include paths that suit these
* pathkeys, sorting any unsorted paths as required.
* *pNumGroups: if not NULL, we estimate the number of distinct groups
* in the result, and store it there
*/
static void
build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
bool trivial_tlist, List *child_tlist,
List *interesting_pathkeys, double *pNumGroups)
{
RelOptInfo *final_rel;
List *setop_pathkeys = rel->subroot->setop_pathkeys;
ListCell *lc;
/* it can't be a set op child rel if it's not a subquery */
Assert(rel->rtekind == RTE_SUBQUERY);
/* when sorting is needed, add child rel equivalences */
if (interesting_pathkeys != NIL)
add_setop_child_rel_equivalences(root,
rel,
child_tlist,
interesting_pathkeys);
/*
* Mark rel with estimated output rows, width, etc. Note that we have to
* do this before generating outer-query paths, else cost_subqueryscan is
* not happy.
*/
set_subquery_size_estimates(root, rel);
/*
* Since we may want to add a partial path to this relation, we must set
* its consider_parallel flag correctly.
*/
final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
rel->consider_parallel = final_rel->consider_parallel;
/* Generate subquery scan paths for any interesting path in final_rel */
foreach(lc, final_rel->pathlist)
{
Path *subpath = (Path *) lfirst(lc);
List *pathkeys;
Path *cheapest_input_path = final_rel->cheapest_total_path;
bool is_sorted;
int presorted_keys;
/*
* Include the cheapest path as-is so that the set operation can be
* cheaply implemented using a method which does not require the input
* to be sorted.
*/
if (subpath == cheapest_input_path)
{
/* Convert subpath's pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
make_tlist_from_pathtarget(subpath->pathtarget));
/* Generate outer path using this subpath */
add_path(rel, (Path *) create_subqueryscan_path(root,
rel,
subpath,
trivial_tlist,
pathkeys,
NULL));
}
/* skip dealing with sorted paths if the setop doesn't need them */
if (interesting_pathkeys == NIL)
continue;
/*
* Create paths to suit final sort order required for setop_pathkeys.
* Here we'll sort the cheapest input path (if not sorted already) and
* incremental sort any paths which are partially sorted.
*/
is_sorted = pathkeys_count_contained_in(setop_pathkeys,
subpath->pathkeys,
&presorted_keys);
if (!is_sorted)
{
double limittuples = rel->subroot->limit_tuples;
/*
* Try at least sorting the cheapest path and also try
* incrementally sorting any path which is partially sorted
* already (no need to deal with paths which have presorted keys
* when incremental sort is disabled unless it's the cheapest
* input path).
*/
if (subpath != cheapest_input_path &&
(presorted_keys == 0 || !enable_incremental_sort))
continue;
/*
* We've no need to consider both a sort and incremental sort.
* We'll just do a sort if there are no presorted keys and an
* incremental sort when there are presorted keys.
*/
if (presorted_keys == 0 || !enable_incremental_sort)
subpath = (Path *) create_sort_path(rel->subroot,
final_rel,
subpath,
setop_pathkeys,
limittuples);
else
subpath = (Path *) create_incremental_sort_path(rel->subroot,
final_rel,
subpath,
setop_pathkeys,
presorted_keys,
limittuples);
}
/*
* subpath is now sorted, so add it to the pathlist. We already added
* the cheapest_input_path above, so don't add it again unless we just
* sorted it.
*/
if (subpath != cheapest_input_path)
{
/* Convert subpath's pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
make_tlist_from_pathtarget(subpath->pathtarget));
/* Generate outer path using this subpath */
add_path(rel, (Path *) create_subqueryscan_path(root,
rel,
subpath,
trivial_tlist,
pathkeys,
NULL));
}
}
/* if consider_parallel is false, there should be no partial paths */
Assert(final_rel->consider_parallel ||
final_rel->partial_pathlist == NIL);
/*
* If we have a partial path for the child relation, we can use that to
* build a partial path for this relation. But there's no point in
* considering any path but the cheapest.
*/
if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
final_rel->partial_pathlist != NIL)
{
Path *partial_subpath;
Path *partial_path;
partial_subpath = linitial(final_rel->partial_pathlist);
partial_path = (Path *)
create_subqueryscan_path(root, rel, partial_subpath,
trivial_tlist,
NIL, NULL);
add_partial_path(rel, partial_path);
}
postprocess_setop_rel(root, rel);
/*
* Estimate number of groups if caller wants it. If the subquery used
* grouping or aggregation, its output is probably mostly unique anyway;
* otherwise do statistical estimation.
*
* XXX you don't really want to know about this: we do the estimation
* using the subroot->parse's original targetlist expressions, not the
* subroot->processed_tlist which might seem more appropriate. The reason
* is that if the subquery is itself a setop, it may return a
* processed_tlist containing "varno 0" Vars generated by
* generate_append_tlist, and those would confuse estimate_num_groups
* mightily. We ought to get rid of the "varno 0" hack, but that requires
* a redesign of the parsetree representation of setops, so that there can
* be an RTE corresponding to each setop's output. Note, we use this not
* subquery's targetlist but subroot->parse's targetlist, because it was
* revised by self-join removal. subquery's targetlist might contain the
* references to the removed relids.
*/
if (pNumGroups)
{
PlannerInfo *subroot = rel->subroot;
Query *subquery = subroot->parse;
if (subquery->groupClause || subquery->groupingSets ||
subquery->distinctClause || subroot->hasHavingQual ||
subquery->hasAggs)
*pNumGroups = rel->cheapest_total_path->rows;
else
*pNumGroups = estimate_num_groups(subroot,
get_tlist_exprs(subroot->parse->targetList, false),
rel->cheapest_total_path->rows,
NULL,
NULL);
}
}
/*
* Generate paths for a UNION or UNION ALL node
*/
@ -545,41 +695,38 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
{
Relids relids = NULL;
RelOptInfo *result_rel;
double save_fraction = root->tuple_fraction;
ListCell *lc;
List *pathlist = NIL;
ListCell *lc2;
ListCell *lc3;
List *cheapest_pathlist = NIL;
List *ordered_pathlist = NIL;
List *partial_pathlist = NIL;
bool partial_paths_valid = true;
bool consider_parallel = true;
List *rellist;
List *tlist_list;
List *trivial_tlist_list;
List *tlist;
Path *path;
/*
* If plain UNION, tell children to fetch all tuples.
*
* Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
* each arm of the UNION ALL. One could make a case for reducing the
* tuple fraction for later arms (discounting by the expected size of the
* earlier arms' results) but it seems not worth the trouble. The normal
* case where tuple_fraction isn't already zero is a LIMIT at top level,
* and passing it down as-is is usually enough to get the desired result
* of preferring fast-start plans.
*/
if (!op->all)
root->tuple_fraction = 0.0;
List *groupList = NIL;
Path *apath;
Path *gpath = NULL;
bool try_sorted;
List *union_pathkeys = NIL;
/*
* If any of my children are identical UNION nodes (same op, all-flag, and
* colTypes) then they can be merged into this node so that we generate
* only one Append and unique-ification for the lot. Recurse to find such
* nodes and compute their children's paths.
* only one Append/MergeAppend and unique-ification for the lot. Recurse
* to find such nodes.
*/
rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
rellist = plan_union_children(root,
op,
refnames_tlist,
&tlist_list,
&trivial_tlist_list);
/*
* Generate tlist for Append plan node.
* Generate tlist for Append/MergeAppend plan node.
*
* The tlist for an Append plan isn't important as far as the Append is
* concerned, but we must make it look real anyway for the benefit of the
@ -587,15 +734,68 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
*/
tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
tlist_list, refnames_tlist);
*pTargetList = tlist;
/* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
if (try_sorted)
{
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
/* Determine the pathkeys for sorting by the whole target list */
union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
root->query_pathkeys = union_pathkeys;
}
/*
* Now that we've got the append target list, we can build the union child
* paths.
*/
forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
{
RelOptInfo *rel = lfirst(lc);
bool trivial_tlist = lfirst_int(lc2);
List *child_tlist = lfirst_node(List, lc3);
/* only build paths for the union children */
if (rel->rtekind == RTE_SUBQUERY)
build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
union_pathkeys, NULL);
}
/* Build path lists and relid set. */
foreach(lc, rellist)
{
RelOptInfo *rel = lfirst(lc);
Path *ordered_path;
pathlist = lappend(pathlist, rel->cheapest_total_path);
cheapest_pathlist = lappend(cheapest_pathlist,
rel->cheapest_total_path);
if (try_sorted)
{
ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
union_pathkeys,
NULL,
TOTAL_COST,
false);
if (ordered_path != NULL)
ordered_pathlist = lappend(ordered_pathlist, ordered_path);
else
{
/*
* If we can't find a sorted path, just give up trying to
* generate a list of correctly sorted child paths. This can
* happen when type coercion was added to the targetlist due
* to mismatching types from the union children.
*/
try_sorted = false;
}
}
if (consider_parallel)
{
@ -618,28 +818,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
result_rel->reltarget = create_pathtarget(root, tlist);
result_rel->consider_parallel = consider_parallel;
result_rel->consider_startup = (root->tuple_fraction > 0);
/*
* Append the child results together.
* Append the child results together using the cheapest paths from each
* union child.
*/
path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
NIL, NULL, 0, false, -1);
/*
* For UNION ALL, we just need the Append path. For UNION, need to add
* node(s) to remove duplicates.
*/
if (!op->all)
path = make_union_unique(op, path, tlist, root);
add_path(result_rel, path);
apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
NIL, NIL, NULL, 0, false, -1);
/*
* Estimate number of groups. For now we just assume the output is unique
* --- this is certainly true for the UNION case, and we want worst-case
* estimates anyway.
*/
result_rel->rows = path->rows;
result_rel->rows = apath->rows;
/*
* Now consider doing the same thing using the partial paths plus Append
@ -647,7 +840,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
*/
if (partial_paths_valid)
{
Path *ppath;
Path *papath;
int parallel_workers = 0;
/* Find the highest number of workers requested for any subpath. */
@ -676,21 +869,137 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
}
Assert(parallel_workers > 0);
ppath = (Path *)
papath = (Path *)
create_append_path(root, result_rel, NIL, partial_pathlist,
NIL, NULL,
parallel_workers, enable_parallel_append,
-1);
ppath = (Path *)
create_gather_path(root, result_rel, ppath,
NIL, NULL, parallel_workers,
enable_parallel_append, -1);
gpath = (Path *)
create_gather_path(root, result_rel, papath,
result_rel->reltarget, NULL, NULL);
if (!op->all)
ppath = make_union_unique(op, ppath, tlist, root);
add_path(result_rel, ppath);
}
/* Undo effects of possibly forcing tuple_fraction to 0 */
root->tuple_fraction = save_fraction;
if (!op->all)
{
double dNumGroups;
bool can_sort = grouping_is_sortable(groupList);
bool can_hash = grouping_is_hashable(groupList);
/*
* XXX for the moment, take the number of distinct groups as equal to
* the total input size, i.e., the worst case. This is too
* conservative, but it's not clear how to get a decent estimate of
* the true size. One should note as well the propensity of novices
* to write UNION rather than UNION ALL even when they don't expect
* any duplicates...
*/
dNumGroups = apath->rows;
if (can_hash)
{
Path *path;
/*
* Try a hash aggregate plan on 'apath'. This is the cheapest
* available path containing each append child.
*/
path = (Path *) create_agg_path(root,
result_rel,
apath,
create_pathtarget(root, tlist),
AGG_HASHED,
AGGSPLIT_SIMPLE,
groupList,
NIL,
NULL,
dNumGroups);
add_path(result_rel, path);
/* Try hash aggregate on the Gather path, if valid */
if (gpath != NULL)
{
/* Hashed aggregate plan --- no sort needed */
path = (Path *) create_agg_path(root,
result_rel,
gpath,
create_pathtarget(root, tlist),
AGG_HASHED,
AGGSPLIT_SIMPLE,
groupList,
NIL,
NULL,
dNumGroups);
add_path(result_rel, path);
}
}
if (can_sort)
{
Path *path = apath;
/* Try Sort -> Unique on the Append path */
if (groupList != NIL)
path = (Path *) create_sort_path(root, result_rel, path,
make_pathkeys_for_sortclauses(root, groupList, tlist),
-1.0);
path = (Path *) create_upper_unique_path(root,
result_rel,
path,
list_length(path->pathkeys),
dNumGroups);
add_path(result_rel, path);
/* Try Sort -> Unique on the Gather path, if set */
if (gpath != NULL)
{
path = gpath;
path = (Path *) create_sort_path(root, result_rel, path,
make_pathkeys_for_sortclauses(root, groupList, tlist),
-1.0);
path = (Path *) create_upper_unique_path(root,
result_rel,
path,
list_length(path->pathkeys),
dNumGroups);
add_path(result_rel, path);
}
}
/*
* Try making a MergeAppend path if we managed to find a path with the
* correct pathkeys in each union child query.
*/
if (try_sorted && groupList != NIL)
{
Path *path;
path = (Path *) create_merge_append_path(root,
result_rel,
ordered_pathlist,
union_pathkeys,
NULL);
/* and make the MergeAppend unique */
path = (Path *) create_upper_unique_path(root,
result_rel,
path,
list_length(tlist),
dNumGroups);
add_path(result_rel, path);
}
}
else
{
/* UNION ALL */
add_path(result_rel, apath);
if (gpath != NULL)
add_path(result_rel, gpath);
}
return result_rel;
}
@ -716,6 +1025,8 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
*tlist,
*groupList,
*pathlist;
bool lpath_trivial_tlist,
rpath_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
@ -735,14 +1046,26 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
false, 0,
refnames_tlist,
&lpath_tlist,
&dLeftGroups);
&lpath_trivial_tlist);
if (lrel->rtekind == RTE_SUBQUERY)
build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
NIL, &dLeftGroups);
else
dLeftGroups = lrel->rows;
lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
false, 1,
refnames_tlist,
&rpath_tlist,
&dRightGroups);
&rpath_trivial_tlist);
if (rrel->rtekind == RTE_SUBQUERY)
build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
NIL, &dRightGroups);
else
dRightGroups = rrel->rows;
rpath = rrel->cheapest_total_path;
/* Undo effects of forcing tuple_fraction to 0 */
@ -879,13 +1202,16 @@ static List *
plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
List **tlist_list)
List **tlist_list,
List **istrivial_tlist)
{
List *pending_rels = list_make1(top_union);
List *result = NIL;
List *child_tlist;
bool trivial_tlist;
*tlist_list = NIL;
*istrivial_tlist = NIL;
while (pending_rels != NIL)
{
@ -924,75 +1250,14 @@ plan_union_children(PlannerInfo *root,
false, -1,
refnames_tlist,
&child_tlist,
NULL));
&trivial_tlist));
*tlist_list = lappend(*tlist_list, child_tlist);
*istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
}
return result;
}
/*
* Add nodes to the given path tree to unique-ify the result of a UNION.
*/
static Path *
make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
PlannerInfo *root)
{
RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
List *groupList;
double dNumGroups;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
/*
* XXX for the moment, take the number of distinct groups as equal to the
* total input size, ie, the worst case. This is too conservative, but
* it's not clear how to get a decent estimate of the true size. One
* should note as well the propensity of novices to write UNION rather
* than UNION ALL even when they don't expect any duplicates...
*/
dNumGroups = path->rows;
/* Decide whether to hash or sort */
if (choose_hashed_setop(root, groupList, path,
dNumGroups, dNumGroups,
"UNION"))
{
/* Hashed aggregate plan --- no sort needed */
path = (Path *) create_agg_path(root,
result_rel,
path,
create_pathtarget(root, tlist),
AGG_HASHED,
AGGSPLIT_SIMPLE,
groupList,
NIL,
NULL,
dNumGroups);
}
else
{
/* Sort and Unique */
if (groupList)
path = (Path *)
create_sort_path(root,
result_rel,
path,
make_pathkeys_for_sortclauses(root,
groupList,
tlist),
-1.0);
path = (Path *) create_upper_unique_path(root,
result_rel,
path,
list_length(path->pathkeys),
dNumGroups);
}
return path;
}
/*
* postprocess_setop_rel - perform steps required after adding paths
*/

View File

@ -1890,7 +1890,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
* For now, we don't support resjunk sort clauses on the output of a
* setOperation tree --- you can only use the SQL92-spec options of
* selecting an output column by name or number. Enforce by checking that
* transformSortClause doesn't add any items to tlist.
* transformSortClause doesn't add any items to tlist. Note, if changing
* this, add_setop_child_rel_equivalences() will need to be updated.
*/
tllen = list_length(qry->targetList);

View File

@ -397,6 +397,8 @@ struct PlannerInfo
List *distinct_pathkeys;
/* sortClause pathkeys, if any */
List *sort_pathkeys;
/* set operator pathkeys, if any */
List *setop_pathkeys;
/* Canonicalised partition schemes used in the query. */
List *part_schemes pg_node_attr(read_write_ignore);

View File

@ -174,6 +174,10 @@ extern void add_child_join_rel_equivalences(PlannerInfo *root,
AppendRelInfo **appinfos,
RelOptInfo *parent_joinrel,
RelOptInfo *child_joinrel);
extern void add_setop_child_rel_equivalences(PlannerInfo *root,
RelOptInfo *child_rel,
List *child_tlist,
List *setop_pathkeys);
extern List *generate_implied_equalities_for_column(PlannerInfo *root,
RelOptInfo *rel,
ec_matches_callback_type callback,

View File

@ -53,6 +53,6 @@ extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
* prototypes for prepunion.c
*/
extern RelOptInfo *plan_set_operations(PlannerInfo *root);
extern bool set_operation_ordered_results_useful(SetOperationStmt *setop);
#endif /* PREP_H */

View File

@ -1396,6 +1396,7 @@ SELECT x FROM test3cs WHERE x ~ 'a';
abc
(1 row)
SET enable_hashagg TO off;
SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
x
-----
@ -1448,6 +1449,7 @@ SELECT DISTINCT x FROM test3cs ORDER BY x;
ghi
(4 rows)
RESET enable_hashagg;
SELECT count(DISTINCT x) FROM test3cs;
count
-------

View File

@ -1472,14 +1472,19 @@ explain (costs off) select * from t union select * from t order by 1,3;
Sort Key: t.a, t.c
Presorted Key: t.a
-> Unique
-> Sort
-> Merge Append
Sort Key: t.a, t.b, t.c
-> Gather
-> Gather Merge
Workers Planned: 2
-> Parallel Append
-> Sort
Sort Key: t.a, t.b, t.c
-> Parallel Seq Scan on t
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: t_1.a, t_1.b, t_1.c
-> Parallel Seq Scan on t t_1
(11 rows)
(16 rows)
-- Full sort, not just incremental sort can be pushed below a gather merge path
-- by generate_useful_gather_paths.

View File

@ -412,16 +412,17 @@ set enable_hashagg to off;
explain (costs off)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
QUERY PLAN
----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------
Aggregate
-> Unique
-> Sort
-> Merge Append
Sort Key: tenk1.unique1
-> Append
-> Index Only Scan using tenk1_unique1 on tenk1
-> Index Only Scan using tenk1_unique1 on tenk1
-> Sort
Sort Key: tenk1_1.fivethous
-> Seq Scan on tenk1 tenk1_1
(7 rows)
(8 rows)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
@ -950,16 +951,9 @@ select except select;
-- check hashed implementation
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
select from generate_series(1,5) union select from generate_series(1,3);
QUERY PLAN
----------------------------------------------------------------
HashAggregate
-> Append
-> Function Scan on generate_series
-> Function Scan on generate_series generate_series_1
(4 rows)
-- We've no way to check hashed UNION as the empty pathkeys in the Append are
-- fine to make use of Unique, which is cheaper than HashAggregate and we've
-- no means to disable Unique.
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
QUERY PLAN
@ -972,10 +966,6 @@ select from generate_series(1,5) intersect select from generate_series(1,3);
-> Function Scan on generate_series generate_series_1
(6 rows)
select from generate_series(1,5) union select from generate_series(1,3);
--
(1 row)
select from generate_series(1,5) union all select from generate_series(1,3);
--
(8 rows)
@ -1081,6 +1071,7 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
set enable_seqscan = off;
set enable_indexscan = on;
set enable_bitmapscan = off;
set enable_sort = off;
explain (costs off)
SELECT * FROM
(SELECT a || b AS ab FROM t1
@ -1162,6 +1153,7 @@ explain (costs off)
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
reset enable_sort;
-- This simpler variant of the above test has been observed to fail differently
create table events (event_id int primary key);
create table other_events (event_id int primary key);

View File

@ -555,6 +555,7 @@ SELECT x FROM test3cs WHERE x LIKE 'a%';
SELECT x FROM test3cs WHERE x ILIKE 'a%';
SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
SELECT x FROM test3cs WHERE x ~ 'a';
SET enable_hashagg TO off;
SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
@ -562,6 +563,7 @@ SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
SELECT DISTINCT x FROM test3cs ORDER BY x;
RESET enable_hashagg;
SELECT count(DISTINCT x) FROM test3cs;
SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;

View File

@ -302,12 +302,12 @@ select except select;
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
select from generate_series(1,5) union select from generate_series(1,3);
-- We've no way to check hashed UNION as the empty pathkeys in the Append are
-- fine to make use of Unique, which is cheaper than HashAggregate and we've
-- no means to disable Unique.
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
select from generate_series(1,5) union select from generate_series(1,3);
select from generate_series(1,5) union all select from generate_series(1,3);
select from generate_series(1,5) intersect select from generate_series(1,3);
select from generate_series(1,5) intersect all select from generate_series(1,3);
@ -361,6 +361,7 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
set enable_seqscan = off;
set enable_indexscan = on;
set enable_bitmapscan = off;
set enable_sort = off;
explain (costs off)
SELECT * FROM
@ -407,6 +408,7 @@ explain (costs off)
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
reset enable_sort;
-- This simpler variant of the above test has been observed to fail differently