diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 4ab290360a..c227282975 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -840,6 +840,19 @@ ExecSetTupleBound(int64 tuples_needed, PlanState *child_node) sortState->bound = tuples_needed; } } + else if (IsA(child_node, AppendState)) + { + /* + * If it is an Append, we can apply the bound to any nodes that are + * children of the Append, since the Append surely need read no more + * than that many tuples from any one input. + */ + AppendState *aState = (AppendState *) child_node; + int i; + + for (i = 0; i < aState->as_nplans; i++) + ExecSetTupleBound(tuples_needed, aState->appendplans[i]); + } else if (IsA(child_node, MergeAppendState)) { /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3282be0e4b..82ca6826ab 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1847,6 +1847,7 @@ _outAppendPath(StringInfo str, const AppendPath *node) WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_INT_FIELD(first_partial_path); + WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 727da33338..af05bb7511 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -44,6 +44,7 @@ #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" +#include "partitioning/partbounds.h" #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -96,15 +97,16 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); -static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels); +static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys, + List *partitioned_rels); static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); static void accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths); +static Path *get_singleton_append_subpath(Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -1520,7 +1522,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - NULL, 0, false, + NIL, NULL, 0, false, partitioned_rels, -1)); /* @@ -1562,7 +1564,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, /* Generate a partial append path. */ appendpath = create_append_path(root, rel, NIL, partial_subpaths, - NULL, parallel_workers, + NIL, NULL, parallel_workers, enable_parallel_append, partitioned_rels, -1); @@ -1612,19 +1614,19 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, - NULL, parallel_workers, true, + NIL, NULL, parallel_workers, true, partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } /* - * Also build unparameterized MergeAppend paths based on the collected + * Also build unparameterized ordered append paths based on the collected * list of child pathkeys. */ if (subpaths_valid) - generate_mergeappend_paths(root, rel, live_childrels, - all_child_pathkeys, - partitioned_rels); + generate_orderedappend_paths(root, rel, live_childrels, + all_child_pathkeys, + partitioned_rels); /* * Build Append paths for each parameterization seen among the child rels. @@ -1674,7 +1676,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - required_outer, 0, false, + NIL, required_outer, 0, false, partitioned_rels, -1)); } @@ -1703,8 +1705,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, continue; appendpath = create_append_path(root, rel, NIL, list_make1(path), - NULL, path->parallel_workers, - true, + NIL, NULL, + path->parallel_workers, true, partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } @@ -1712,17 +1714,21 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, } /* - * generate_mergeappend_paths - * Generate MergeAppend paths for an append relation + * generate_orderedappend_paths + * Generate ordered append paths for an append relation * - * Generate a path for each ordering (pathkey list) appearing in + * Usually we generate MergeAppend paths here, but there are some special + * cases where we can generate simple Append paths, because the subpaths + * can provide tuples in the required order already. + * + * We generate a path for each ordering (pathkey list) appearing in * all_child_pathkeys. * * We consider both cheapest-startup and cheapest-total cases, ie, for each * interesting ordering, collect all the cheapest startup subpaths and all the - * cheapest total paths, and build a MergeAppend path for each case. + * cheapest total paths, and build a suitable path for each case. * - * We don't currently generate any parameterized MergeAppend paths. While + * We don't currently generate any parameterized ordered paths here. While * it would not take much more code here to do so, it's very unclear that it * is worth the planning cycles to investigate such paths: there's little * use for an ordered path on the inside of a nestloop. In fact, it's likely @@ -1731,17 +1737,52 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, * and a parameterized MergeAppend is going to be more expensive than the * corresponding parameterized Append path. If we ever try harder to support * parameterized mergejoin plans, it might be worth adding support for - * parameterized MergeAppends to feed such joins. (See notes in + * parameterized paths here to feed such joins. (See notes in * optimizer/README for why that might not ever happen, though.) */ static void -generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels) +generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys, + List *partitioned_rels) { ListCell *lcp; + List *partition_pathkeys = NIL; + List *partition_pathkeys_desc = NIL; + bool partition_pathkeys_partial = true; + bool partition_pathkeys_desc_partial = true; + /* + * Some partitioned table setups may allow us to use an Append node + * instead of a MergeAppend. This is possible in cases such as RANGE + * partitioned tables where it's guaranteed that an earlier partition must + * contain rows which come earlier in the sort order. To detect whether + * this is relevant, build pathkey descriptions of the partition ordering, + * for both forward and reverse scans. + */ + if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) && + partitions_are_ordered(rel->boundinfo, rel->nparts)) + { + partition_pathkeys = build_partition_pathkeys(root, rel, + ForwardScanDirection, + &partition_pathkeys_partial); + + partition_pathkeys_desc = build_partition_pathkeys(root, rel, + BackwardScanDirection, + &partition_pathkeys_desc_partial); + + /* + * You might think we should truncate_useless_pathkeys here, but + * allowing partition keys which are a subset of the query's pathkeys + * can often be useful. For example, consider a table partitioned by + * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child + * paths that can produce the a, b, c ordering (perhaps via indexes on + * (a, b, c)) then it works to consider the appendrel output as + * ordered by a, b, c. + */ + } + + /* Now consider each interesting sort ordering */ foreach(lcp, all_child_pathkeys) { List *pathkeys = (List *) lfirst(lcp); @@ -1749,6 +1790,27 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *total_subpaths = NIL; bool startup_neq_total = false; ListCell *lcr; + bool match_partition_order; + bool match_partition_order_desc; + + /* + * Determine if this sort ordering matches any partition pathkeys we + * have, for both ascending and descending partition order. If the + * partition pathkeys happen to be contained in pathkeys then it still + * works, as described above, providing that the partition pathkeys + * are complete and not just a prefix of the partition keys. (In such + * cases we'll be relying on the child paths to have sorted the + * lower-order columns of the required pathkeys.) + */ + match_partition_order = + pathkeys_contained_in(pathkeys, partition_pathkeys) || + (!partition_pathkeys_partial && + pathkeys_contained_in(partition_pathkeys, pathkeys)); + + match_partition_order_desc = !match_partition_order && + (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) || + (!partition_pathkeys_desc_partial && + pathkeys_contained_in(partition_pathkeys_desc, pathkeys))); /* Select the child paths for this ordering... */ foreach(lcr, live_childrels) @@ -1791,26 +1853,94 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, if (cheapest_startup != cheapest_total) startup_neq_total = true; - accumulate_append_subpath(cheapest_startup, - &startup_subpaths, NULL); - accumulate_append_subpath(cheapest_total, - &total_subpaths, NULL); + /* + * Collect the appropriate child paths. The required logic varies + * for the Append and MergeAppend cases. + */ + if (match_partition_order) + { + /* + * We're going to make a plain Append path. We don't need + * most of what accumulate_append_subpath would do, but we do + * want to cut out child Appends or MergeAppends if they have + * just a single subpath (and hence aren't doing anything + * useful). + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lappend(startup_subpaths, cheapest_startup); + total_subpaths = lappend(total_subpaths, cheapest_total); + } + else if (match_partition_order_desc) + { + /* + * As above, but we need to reverse the order of the children, + * because nodeAppend.c doesn't know anything about reverse + * ordering and will scan the children in the order presented. + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lcons(cheapest_startup, startup_subpaths); + total_subpaths = lcons(cheapest_total, total_subpaths); + } + else + { + /* + * Otherwise, rely on accumulate_append_subpath to collect the + * child paths for the MergeAppend. + */ + accumulate_append_subpath(cheapest_startup, + &startup_subpaths, NULL); + accumulate_append_subpath(cheapest_total, + &total_subpaths, NULL); + } } - /* ... and build the MergeAppend paths */ - add_path(rel, (Path *) create_merge_append_path(root, - rel, - startup_subpaths, - pathkeys, - NULL, - partitioned_rels)); - if (startup_neq_total) + /* ... and build the Append or MergeAppend paths */ + if (match_partition_order || match_partition_order_desc) + { + /* We only need Append */ + add_path(rel, (Path *) create_append_path(root, + rel, + startup_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + partitioned_rels, + -1)); + if (startup_neq_total) + add_path(rel, (Path *) create_append_path(root, + rel, + total_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + partitioned_rels, + -1)); + } + else + { + /* We need MergeAppend */ add_path(rel, (Path *) create_merge_append_path(root, rel, - total_subpaths, + startup_subpaths, pathkeys, NULL, partitioned_rels)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL, + partitioned_rels)); + } } } @@ -1901,7 +2031,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, * omitting a sort step, which seems fine: if the parent is to be an Append, * its result would be unsorted anyway, while if the parent is to be a * MergeAppend, there's no point in a separate sort on a child. - * its result would be unsorted anyway. * * Normally, either path is a partial path and subpaths is a list of partial * paths, or else path is a non-partial plan and subpaths is a list of those. @@ -1951,6 +2080,36 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) *subpaths = lappend(*subpaths, path); } +/* + * get_singleton_append_subpath + * Returns the single subpath of an Append/MergeAppend, or just + * return 'path' if it's not a single sub-path Append/MergeAppend. + * + * Note: 'path' must not be a parallel-aware path. + */ +static Path * +get_singleton_append_subpath(Path *path) +{ + Assert(!path->parallel_aware); + + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (list_length(apath->subpaths) == 1) + return (Path *) linitial(apath->subpaths); + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + if (list_length(mpath->subpaths) == 1) + return (Path *) linitial(mpath->subpaths); + } + + return path; +} + /* * set_dummy_rel_pathlist * Build a dummy path for a relation that's been excluded by constraints @@ -1975,7 +2134,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, - rel->lateral_relids, + NIL, rel->lateral_relids, 0, false, NIL, -1)); /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 4b9be13f08..afd32884a2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1878,27 +1878,83 @@ cost_append(AppendPath *apath) apath->path.startup_cost = 0; apath->path.total_cost = 0; + apath->path.rows = 0; if (apath->subpaths == NIL) return; if (!apath->path.parallel_aware) { - Path *subpath = (Path *) linitial(apath->subpaths); + List *pathkeys = apath->path.pathkeys; - /* - * Startup cost of non-parallel-aware Append is the startup cost of - * first subpath. - */ - apath->path.startup_cost = subpath->startup_cost; - - /* Compute rows and costs as sums of subplan rows and costs. */ - foreach(l, apath->subpaths) + if (pathkeys == NIL) { - Path *subpath = (Path *) lfirst(l); + Path *subpath = (Path *) linitial(apath->subpaths); - apath->path.rows += subpath->rows; - apath->path.total_cost += subpath->total_cost; + /* + * For an unordered, non-parallel-aware Append we take the startup + * cost as the startup cost of the first subpath. + */ + apath->path.startup_cost = subpath->startup_cost; + + /* Compute rows and costs as sums of subplan rows and costs. */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + + apath->path.rows += subpath->rows; + apath->path.total_cost += subpath->total_cost; + } + } + else + { + /* + * For an ordered, non-parallel-aware Append we take the startup + * cost as the sum of the subpath startup costs. This ensures + * that we don't underestimate the startup cost when a query's + * LIMIT is such that several of the children have to be run to + * satisfy it. This might be overkill --- another plausible hack + * would be to take the Append's startup cost as the maximum of + * the child startup costs. But we don't want to risk believing + * that an ORDER BY LIMIT query can be satisfied at small cost + * when the first child has small startup cost but later ones + * don't. (If we had the ability to deal with nonlinear cost + * interpolation for partial retrievals, we would not need to be + * so conservative about this.) + * + * This case is also different from the above in that we have to + * account for possibly injecting sorts into subpaths that aren't + * natively ordered. + */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + Path sort_path; /* dummy for result of cost_sort */ + + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* + * We'll need to insert a Sort node, so include costs for + * that. We can use the parent's LIMIT if any, since we + * certainly won't pull more than that many tuples from + * any child. + */ + cost_sort(&sort_path, + NULL, /* doesn't currently need root */ + pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + subpath = &sort_path; + } + + apath->path.rows += subpath->rows; + apath->path.startup_cost += subpath->startup_cost; + apath->path.total_cost += subpath->total_cost; + } } } else /* parallel-aware */ @@ -1906,6 +1962,9 @@ cost_append(AppendPath *apath) int i = 0; double parallel_divisor = get_parallel_divisor(&apath->path); + /* Parallel-aware Append never produces ordered output. */ + Assert(apath->path.pathkeys == NIL); + /* Calculate startup cost. */ foreach(l, apath->subpaths) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 34cc7dacdf..e66cf328be 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1261,7 +1261,7 @@ mark_dummy_rel(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, - rel->lateral_relids, + NIL, rel->lateral_relids, 0, false, NIL, -1)); /* Set or update cheapest_total_path and related fields */ diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 56d839bb31..68b2204b3b 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -18,16 +18,21 @@ #include "postgres.h" #include "access/stratnum.h" +#include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "partitioning/partbounds.h" #include "utils/lsyscache.h" static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static bool matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, + int partkeycol); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -546,6 +551,165 @@ build_index_pathkeys(PlannerInfo *root, return retval; } +/* + * partkey_is_bool_constant_for_query + * + * If a partition key column is constrained to have a constant value by the + * query's WHERE conditions, then it's irrelevant for sort-order + * considerations. Usually that means we have a restriction clause + * WHERE partkeycol = constant, which gets turned into an EquivalenceClass + * containing a constant, which is recognized as redundant by + * build_partition_pathkeys(). But if the partition key column is a + * boolean variable (or expression), then we are not going to see such a + * WHERE clause, because expression preprocessing will have simplified it + * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going + * to have a matching EquivalenceClass (unless the query also contains + * "ORDER BY partkeycol"). To allow such cases to work the same as they would + * for non-boolean values, this function is provided to detect whether the + * specified partition key column matches a boolean restriction clause. + */ +static bool +partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol) +{ + PartitionScheme partscheme = partrel->part_scheme; + ListCell *lc; + + /* If the partkey isn't boolean, we can't possibly get a match */ + if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol])) + return false; + + /* Check each restriction clause for the partitioned rel */ + foreach(lc, partrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Ignore pseudoconstant quals, they won't match */ + if (rinfo->pseudoconstant) + continue; + + /* See if we can match the clause's expression to the partkey column */ + if (matches_boolean_partition_clause(rinfo, partrel, partkeycol)) + return true; + } + + return false; +} + +/* + * matches_boolean_partition_clause + * Determine if the boolean clause described by rinfo matches + * partrel's partkeycol-th partition key column. + * + * "Matches" can be either an exact match (equivalent to partkey = true), + * or a NOT above an exact match (equivalent to partkey = false). + */ +static bool +matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, int partkeycol) +{ + Node *clause = (Node *) rinfo->clause; + Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]); + + /* Direct match? */ + if (equal(partexpr, clause)) + return true; + /* NOT clause? */ + else if (is_notclause(clause)) + { + Node *arg = (Node *) get_notclausearg((Expr *) clause); + + if (equal(partexpr, arg)) + return true; + } + + return false; +} + +/* + * build_partition_pathkeys + * Build a pathkeys list that describes the ordering induced by the + * partitions of partrel, under either forward or backward scan + * as per scandir. + * + * Caller must have checked that the partitions are properly ordered, + * as detected by partitions_are_ordered(). + * + * Sets *partialkeys to true if pathkeys were only built for a prefix of the + * partition key, or false if the pathkeys include all columns of the + * partition key. + */ +List * +build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir, bool *partialkeys) +{ + List *retval = NIL; + PartitionScheme partscheme = partrel->part_scheme; + int i; + + Assert(partscheme != NULL); + Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts)); + /* For now, we can only cope with baserels */ + Assert(IS_SIMPLE_REL(partrel)); + + for (i = 0; i < partscheme->partnatts; i++) + { + PathKey *cpathkey; + Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]); + + /* + * Try to make a canonical pathkey for this partkey. + * + * We're considering a baserel scan, so nullable_relids should be + * NULL. Also, we assume the PartitionDesc lists any NULL partition + * last, so we treat the scan like a NULLS LAST index: we have + * nulls_first for backwards scan only. + */ + cpathkey = make_pathkey_from_sortinfo(root, + keyCol, + NULL, + partscheme->partopfamily[i], + partscheme->partopcintype[i], + partscheme->partcollation[i], + ScanDirectionIsBackward(scandir), + ScanDirectionIsBackward(scandir), + 0, + partrel->relids, + false); + + + if (cpathkey) + { + /* + * We found the sort key in an EquivalenceClass, so it's relevant + * for this query. Add it to list, unless it's redundant. + */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else + { + /* + * Boolean partition keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special treatment + * of boolean equality conditions --- see the comment for + * partkey_is_bool_constant_for_query(). If that applies, we can + * continue to examine lower-order partition keys. Otherwise, the + * sort key is not an interesting sort order for this query, so we + * should stop considering partition columns; any lower-order sort + * keys won't be useful either. + */ + if (!partkey_is_bool_constant_for_query(partrel, i)) + { + *partialkeys = true; + return retval; + } + } + } + + *partialkeys = false; + return retval; +} + /* * build_expression_pathkey * Build a pathkeys list that describes an ordering by a single expression diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cc222cb06c..efe073a3ee 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual Index scanrelid, char *enrname); static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); -static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1060,10 +1058,16 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) { Append *plan; List *tlist = build_path_tlist(root, &best_path->path); + List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + int nodenumsortkeys = 0; + AttrNumber *nodeSortColIdx = NULL; + Oid *nodeSortOperators = NULL; + Oid *nodeCollations = NULL; + bool *nodeNullsFirst = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1089,6 +1093,37 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) return plan; } + /* + * Otherwise build an Append plan. Note that if there's just one child, + * the Append is pretty useless; but we wait till setrefs.c to get rid of + * it. Doing so here doesn't work because the varno of the child scan + * plan won't match the parent-rel Vars it'll be asked to emit. + * + * We don't have the actual creation of the Append node split out into a + * separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + plan = makeNode(Append); + plan->plan.targetlist = tlist; + plan->plan.qual = NIL; + plan->plan.lefttree = NULL; + plan->plan.righttree = NULL; + + if (pathkeys != NIL) + { + /* Compute sort column info, and adjust the Append's tlist as needed */ + (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &nodenumsortkeys, + &nodeSortColIdx, + &nodeSortOperators, + &nodeCollations, + &nodeNullsFirst); + } + /* Build the plan for each child */ foreach(subpaths, best_path->subpaths) { @@ -1098,6 +1133,63 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* Must insist that all children return the same tlist */ subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + /* + * For ordered Appends, we must insert a Sort node if subplan isn't + * sufficiently ordered. + */ + if (pathkeys != NIL) + { + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* + * Compute sort column info, and adjust subplan's tlist as needed. + * We must apply prepare_sort_from_pathkeys even to subplans that + * don't need an explicit sort, to make sure they are returning + * the same sort key columns the Append expects. + */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + nodeSortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just + * Assert that the sortops match, since those depend only on the + * pathkeys; but it seems like a good idea to check the sort + * column numbers explicitly, to ensure the tlists match up. + */ + Assert(numsortkeys == nodenumsortkeys); + if (memcmp(sortColIdx, nodeSortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "Append child's targetlist doesn't match Append"); + Assert(memcmp(sortOperators, nodeSortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(collations, nodeCollations, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, nodeNullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + } + subplans = lappend(subplans, subplan); } @@ -1133,15 +1225,9 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) prunequal); } - /* - * And build the Append plan. Note that if there's just one child, the - * Append is pretty useless; but we wait till setrefs.c to get rid of it. - * Doing so here doesn't work because the varno of the child scan plan - * won't match the parent-rel Vars it'll be asked to emit. - */ - - plan = make_append(subplans, best_path->first_partial_path, - tlist, partpruneinfo); + plan->appendplans = subplans; + plan->first_partial_plan = best_path->first_partial_path; + plan->part_prune_info = partpruneinfo; copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1266,7 +1352,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) if (best_path->path.param_info) { - List *prmquals = best_path->path.param_info->ppi_clauses; prmquals = extract_actual_clauses(prmquals, false); @@ -5300,23 +5385,6 @@ make_foreignscan(List *qptlist, return node; } -static Append * -make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo) -{ - Append *node = makeNode(Append); - Plan *plan = &node->plan; - - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; - plan->righttree = NULL; - node->appendplans = appendplans; - node->first_partial_plan = first_partial_plan; - node->part_prune_info = partpruneinfo; - return node; -} - static RecursiveUnion * make_recursive_union(List *tlist, Plan *lefttree, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e2cdc83613..cbd3fb8e0e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1721,7 +1721,8 @@ inheritance_planner(PlannerInfo *root) /* Make a dummy path, cf set_dummy_rel_pathlist() */ dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, + NIL, -1); /* These lists must be nonempty to make a valid ModifyTable node */ subpaths = list_make1(dummy_path); @@ -4003,6 +4004,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, paths, NIL, + NIL, NULL, 0, false, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index eb815c2f12..c1def3823f 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -617,7 +617,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, NIL, -1); /* * For UNION ALL, we just need the Append path. For UNION, need to add @@ -672,7 +672,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, ppath = (Path *) create_append_path(root, result_rel, NIL, partial_pathlist, - NULL, parallel_workers, enable_parallel_append, + NIL, NULL, + parallel_workers, enable_parallel_append, NIL, -1); ppath = (Path *) create_gather_path(root, result_rel, ppath, @@ -783,7 +784,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, NIL, -1); /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 1ea89ff54c..36aee35d46 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1203,12 +1203,13 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, * pathnode. * * Note that we must handle subpaths = NIL, representing a dummy access path. + * Also, there are callers that pass root = NULL. */ AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, - Relids required_outer, + List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows) { @@ -1242,6 +1243,7 @@ create_append_path(PlannerInfo *root, pathnode->path.parallel_aware = parallel_aware; pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_workers; + pathnode->path.pathkeys = pathkeys; pathnode->partitioned_rels = list_copy(partitioned_rels); /* @@ -1255,6 +1257,13 @@ create_append_path(PlannerInfo *root, */ if (pathnode->path.parallel_aware) { + /* + * We mustn't fiddle with the order of subpaths when the Append has + * pathkeys. The order they're listed in is critical to keeping the + * pathkeys valid. + */ + Assert(pathkeys == NIL); + subpaths = list_qsort(subpaths, append_total_cost_compare); partial_subpaths = list_qsort(partial_subpaths, append_startup_cost_compare); @@ -1262,6 +1271,15 @@ create_append_path(PlannerInfo *root, pathnode->first_partial_path = list_length(subpaths); pathnode->subpaths = list_concat(subpaths, partial_subpaths); + /* + * Apply query-wide LIMIT if known and path is for sole base relation. + * (Handling this at this low level is a bit klugy.) + */ + if (root != NULL && bms_equal(rel->relids, root->all_baserels)) + pathnode->limit_tuples = root->limit_tuples; + else + pathnode->limit_tuples = -1.0; + foreach(l, pathnode->subpaths) { Path *subpath = (Path *) lfirst(l); @@ -1278,8 +1296,9 @@ create_append_path(PlannerInfo *root, /* * If there's exactly one child path, the Append is a no-op and will be * discarded later (in setrefs.c); therefore, we can inherit the child's - * size, cost, and pathkeys if any. Otherwise, it's unsorted, and we must - * do the normal costsize calculation. + * size and cost, as well as its pathkeys if any (overriding whatever the + * caller might've said). Otherwise, we must do the normal costsize + * calculation. */ if (list_length(pathnode->subpaths) == 1) { @@ -1291,10 +1310,7 @@ create_append_path(PlannerInfo *root, pathnode->path.pathkeys = child->pathkeys; } else - { - pathnode->path.pathkeys = NIL; /* unsorted if more than 1 subpath */ cost_append(pathnode); - } /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) @@ -3759,7 +3775,7 @@ reparameterize_path(PlannerInfo *root, Path *path, } return (Path *) create_append_path(root, rel, childpaths, partialpaths, - required_outer, + apath->path.pathkeys, required_outer, apath->path.parallel_workers, apath->path.parallel_aware, apath->partitioned_rels, diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index bdd0d23854..c8770ccfee 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -861,6 +861,70 @@ partition_bounds_copy(PartitionBoundInfo src, return dest; } +/* + * partitions_are_ordered + * Determine whether the partitions described by 'boundinfo' are ordered, + * that is partitions appearing earlier in the PartitionDesc sequence + * contain partition keys strictly less than those appearing later. + * Also, if NULL values are possible, they must come in the last + * partition defined in the PartitionDesc. + * + * If out of order, or there is insufficient info to know the order, + * then we return false. + */ +bool +partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) +{ + Assert(boundinfo != NULL); + + switch (boundinfo->strategy) + { + case PARTITION_STRATEGY_RANGE: + + /* + * RANGE-type partitioning guarantees that the partitions can be + * scanned in the order that they're defined in the PartitionDesc + * to provide sequential, non-overlapping ranges of tuples. + * However, if a DEFAULT partition exists then it doesn't work, as + * that could contain tuples from either below or above the + * defined range, or tuples belonging to gaps between partitions. + */ + if (!partition_bound_has_default(boundinfo)) + return true; + break; + + case PARTITION_STRATEGY_LIST: + + /* + * LIST partitioning can also guarantee ordering, but only if the + * partitions don't accept interleaved values. We could likely + * check for this by looping over the PartitionBound's indexes + * array to check that the indexes are in order. For now, let's + * just keep it simple and just accept LIST partitioning when + * there's no DEFAULT partition, exactly one value per partition, + * and optionally a NULL partition that does not accept any other + * values. Such a NULL partition will come last in the + * PartitionDesc, and the other partitions will be properly + * ordered. This is a cheap test to make as it does not require + * any per-partition processing. Maybe we'd like to handle more + * complex cases in the future. + */ + if (partition_bound_has_default(boundinfo)) + return false; + + if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) + == nparts) + return true; + break; + + default: + /* HASH, or some other strategy */ + break; + } + + return false; +} + /* * check_new_partition_bound * diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 15314a8cfe..f8429df9e1 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -280,6 +280,11 @@ struct PlannerInfo List *join_info_list; /* list of SpecialJoinInfos */ + /* + * Note: for AppendRelInfos describing partitions of a partitioned table, + * we guarantee that partitions that come earlier in the partitioned + * table's PartitionDesc will appear earlier in append_rel_list. + */ List *append_rel_list; /* list of AppendRelInfos */ List *rowMarks; /* list of PlanRowMarks */ @@ -1363,9 +1368,9 @@ typedef struct AppendPath /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; List *subpaths; /* list of component Paths */ - - /* Index of first partial path in subpaths */ + /* Index of first partial path in subpaths; list_length(subpaths) if none */ int first_partial_path; + double limit_tuples; /* hard limit on output tuples, or -1 */ } AppendPath; #define IS_DUMMY_APPEND(p) \ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5b577c12ec..437250f557 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -65,7 +65,7 @@ extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, - Relids required_outer, + List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows); extern MergeAppendPath *create_merge_append_path(PlannerInfo *root, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 36d12bc376..0e858097c8 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -194,6 +194,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, extern Path *get_cheapest_parallel_safe_total_inner(List *paths); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); +extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir, bool *partialkeys); extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid opno, Relids rel, bool create_it); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index 683e1574ea..c954965e4e 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -88,6 +88,7 @@ extern bool partition_bounds_equal(int partnatts, int16 *parttyplen, PartitionBoundInfo b2); extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key); +extern bool partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts); extern void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec); extern void check_default_partition_contents(Relation parent, diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 7518148df0..4725a15948 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -2037,7 +2037,6 @@ explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mc Filter: ((c > 20) AND (a = 20)) (9 rows) -drop table mcrparted; -- check that partitioned table Appends cope with being referenced in -- subplans create table parted_minmax (a int, b varchar(16)) partition by range (a); @@ -2065,3 +2064,255 @@ select min(a), max(a) from parted_minmax where b = '12345'; (1 row) drop table parted_minmax; +-- Test code that uses Append nodes in place of MergeAppend when the +-- partition ordering matches the desired ordering. +create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c); +-- MergeAppend must be used when a default partition exists +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------------- + Merge Append + Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5_a_abs_c_idx on mcrparted5 + -> Index Scan using mcrparted_def_a_abs_c_idx on mcrparted_def +(9 rows) + +drop table mcrparted_def; +-- Append is used for a RANGE partitioned table with no default +-- and no subpartitions +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5_a_abs_c_idx on mcrparted5 +(7 rows) + +-- Append is used with subpaths in reverse order with backwards index scans +explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc; + QUERY PLAN +---------------------------------------------------------------------- + Append + -> Index Scan Backward using mcrparted5_a_abs_c_idx on mcrparted5 + -> Index Scan Backward using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan Backward using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan Backward using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan Backward using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan Backward using mcrparted0_a_abs_c_idx on mcrparted0 +(7 rows) + +-- check that Append plan is used containing a MergeAppend for sub-partitions +-- that are unordered. +drop table mcrparted5; +create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a); +create table mcrparted5a partition of mcrparted5 for values in(20); +create table mcrparted5_def partition of mcrparted5 default; +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Merge Append + Sort Key: mcrparted5a.a, (abs(mcrparted5a.b)), mcrparted5a.c + -> Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a + -> Index Scan using mcrparted5_def_a_abs_c_idx on mcrparted5_def +(10 rows) + +drop table mcrparted5_def; +-- check that an Append plan is used and the sub-partitions are flattened +-- into the main Append when the sub-partition is unordered but contains +-- just a single sub-partition. +explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c; + QUERY PLAN +--------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a +(7 rows) + +-- check that Append is used when the sub-partitioned tables are pruned +-- during planning. +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + Index Cond: (a < 20) + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a < 20) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a < 20) + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + Index Cond: (a < 20) +(9 rows) + +create table mclparted (a int) partition by list(a); +create table mclparted1 partition of mclparted for values in(1); +create table mclparted2 partition of mclparted for values in(2); +create index on mclparted (a); +-- Ensure an Append is used for a list partition with an order by. +explain (costs off) select * from mclparted order by a; + QUERY PLAN +------------------------------------------------------------ + Append + -> Index Only Scan using mclparted1_a_idx on mclparted1 + -> Index Only Scan using mclparted2_a_idx on mclparted2 +(3 rows) + +-- Ensure a MergeAppend is used when a partition exists with interleaved +-- datums in the partition bound. +create table mclparted3_5 partition of mclparted for values in(3,5); +create table mclparted4 partition of mclparted for values in(4); +explain (costs off) select * from mclparted order by a; + QUERY PLAN +---------------------------------------------------------------- + Merge Append + Sort Key: mclparted1.a + -> Index Only Scan using mclparted1_a_idx on mclparted1 + -> Index Only Scan using mclparted2_a_idx on mclparted2 + -> Index Only Scan using mclparted3_5_a_idx on mclparted3_5 + -> Index Only Scan using mclparted4_a_idx on mclparted4 +(6 rows) + +drop table mclparted; +-- Ensure subplans which don't have a path with the correct pathkeys get +-- sorted correctly. +drop index mcrparted_a_abs_c_idx; +create index on mcrparted1 (a, abs(b), c); +create index on mcrparted2 (a, abs(b), c); +create index on mcrparted3 (a, abs(b), c); +create index on mcrparted4 (a, abs(b), c); +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c limit 1; + QUERY PLAN +------------------------------------------------------------------------- + Limit + -> Append + -> Sort + Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c + -> Seq Scan on mcrparted0 + Filter: (a < 20) + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a < 20) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a < 20) + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + Index Cond: (a < 20) +(12 rows) + +set enable_bitmapscan = 0; +-- Ensure Append node can be used when the partition is ordered by some +-- pathkeys which were deemed redundant. +explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a = 10) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a = 10) +(5 rows) + +reset enable_bitmapscan; +drop table mcrparted; +-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend +create table bool_lp (b bool) partition by list(b); +create table bool_lp_true partition of bool_lp for values in(true); +create table bool_lp_false partition of bool_lp for values in(false); +create index on bool_lp (b); +explain (costs off) select * from bool_lp order by b; + QUERY PLAN +------------------------------------------------------------------ + Append + -> Index Only Scan using bool_lp_false_b_idx on bool_lp_false + -> Index Only Scan using bool_lp_true_b_idx on bool_lp_true +(3 rows) + +drop table bool_lp; +-- Ensure const bool quals can be properly detected as redundant +create table bool_rp (b bool, a int) partition by range(b,a); +create table bool_rp_false_1k partition of bool_rp for values from (false,0) to (false,1000); +create table bool_rp_true_1k partition of bool_rp for values from (true,0) to (true,1000); +create table bool_rp_false_2k partition of bool_rp for values from (false,1000) to (false,2000); +create table bool_rp_true_2k partition of bool_rp for values from (true,1000) to (true,2000); +create index on bool_rp (b,a); +explain (costs off) select * from bool_rp where b = true order by b,a; + QUERY PLAN +------------------------------------------------------------------------ + Append + -> Index Only Scan using bool_rp_true_1k_b_a_idx on bool_rp_true_1k + Index Cond: (b = true) + -> Index Only Scan using bool_rp_true_2k_b_a_idx on bool_rp_true_2k + Index Cond: (b = true) +(5 rows) + +explain (costs off) select * from bool_rp where b = false order by b,a; + QUERY PLAN +-------------------------------------------------------------------------- + Append + -> Index Only Scan using bool_rp_false_1k_b_a_idx on bool_rp_false_1k + Index Cond: (b = false) + -> Index Only Scan using bool_rp_false_2k_b_a_idx on bool_rp_false_2k + Index Cond: (b = false) +(5 rows) + +explain (costs off) select * from bool_rp where b = true order by a; + QUERY PLAN +------------------------------------------------------------------------ + Append + -> Index Only Scan using bool_rp_true_1k_b_a_idx on bool_rp_true_1k + Index Cond: (b = true) + -> Index Only Scan using bool_rp_true_2k_b_a_idx on bool_rp_true_2k + Index Cond: (b = true) +(5 rows) + +explain (costs off) select * from bool_rp where b = false order by a; + QUERY PLAN +-------------------------------------------------------------------------- + Append + -> Index Only Scan using bool_rp_false_1k_b_a_idx on bool_rp_false_1k + Index Cond: (b = false) + -> Index Only Scan using bool_rp_false_2k_b_a_idx on bool_rp_false_2k + Index Cond: (b = false) +(5 rows) + +drop table bool_rp; +-- Ensure an Append scan is chosen when the partition order is a subset of +-- the required order. +create table range_parted (a int, b int, c int) partition by range(a, b); +create table range_parted1 partition of range_parted for values from (0,0) to (10,10); +create table range_parted2 partition of range_parted for values from (10,10) to (20,20); +create index on range_parted (a,b,c); +explain (costs off) select * from range_parted order by a,b,c; + QUERY PLAN +---------------------------------------------------------------------- + Append + -> Index Only Scan using range_parted1_a_b_c_idx on range_parted1 + -> Index Only Scan using range_parted2_a_b_c_idx on range_parted2 +(3 rows) + +explain (costs off) select * from range_parted order by a desc,b desc,c desc; + QUERY PLAN +------------------------------------------------------------------------------- + Append + -> Index Only Scan Backward using range_parted2_a_b_c_idx on range_parted2 + -> Index Only Scan Backward using range_parted1_a_b_c_idx on range_parted1 +(3 rows) + +drop table range_parted; diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 7806ba1d47..0789b316eb 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -3078,14 +3078,14 @@ drop table boolp; -- set enable_seqscan = off; set enable_sort = off; -create table ma_test (a int) partition by range (a); +create table ma_test (a int, b int) partition by range (a); create table ma_test_p1 partition of ma_test for values from (0) to (10); create table ma_test_p2 partition of ma_test for values from (10) to (20); create table ma_test_p3 partition of ma_test for values from (20) to (30); -insert into ma_test select x from generate_series(0,29) t(x); -create index on ma_test (a); +insert into ma_test select x,x from generate_series(0,29) t(x); +create index on ma_test (b); analyze ma_test; -prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b; -- Execute query 5 times to allow choose_custom_plan -- to start considering a generic plan. execute mt_q1(0); @@ -3132,17 +3132,15 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(15); QUERY PLAN ------------------------------------------------------------------------------- Merge Append (actual rows=2 loops=1) - Sort Key: ma_test_p2.a + Sort Key: ma_test_p2.b Subplans Removed: 1 - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) - Rows Removed by Filter: 4 - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) Rows Removed by Filter: 9 -(11 rows) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) + Rows Removed by Filter: 9 +(9 rows) execute mt_q1(15); a @@ -3155,13 +3153,12 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(25); QUERY PLAN ------------------------------------------------------------------------------- Merge Append (actual rows=1 loops=1) - Sort Key: ma_test_p3.a + Sort Key: ma_test_p3.b Subplans Removed: 2 - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) - Rows Removed by Filter: 4 -(7 rows) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) + Rows Removed by Filter: 9 +(6 rows) execute mt_q1(25); a @@ -3174,12 +3171,11 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(35); QUERY PLAN ------------------------------------------------------------------------ Merge Append (actual rows=0 loops=1) - Sort Key: ma_test_p1.a + Sort Key: ma_test_p1.b Subplans Removed: 2 - -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) -(6 rows) + -> Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed) + Filter: ((a >= $1) AND ((a % 10) = 5)) +(5 rows) execute mt_q1(35); a @@ -3188,23 +3184,23 @@ execute mt_q1(35); deallocate mt_q1; -- ensure initplan params properly prune partitions -explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b; QUERY PLAN ------------------------------------------------------------------------------------------------------------ Merge Append (actual rows=20 loops=1) - Sort Key: ma_test_p1.a + Sort Key: ma_test_p1.b InitPlan 2 (returns $1) -> Result (actual rows=1 loops=1) InitPlan 1 (returns $0) -> Limit (actual rows=1 loops=1) - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) - Index Cond: (a IS NOT NULL) - -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) - Index Cond: (a >= $1) - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) - Index Cond: (a >= $1) - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) - Index Cond: (a >= $1) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (b IS NOT NULL) + -> Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed) + Filter: (a >= $1) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=10 loops=1) + Filter: (a >= $1) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=10 loops=1) + Filter: (a >= $1) (14 rows) reset enable_seqscan; diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index 5480fe7db4..e91f61bc37 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -715,7 +715,6 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti explain (costs off) select * from mcrparted where a > -1; -- scans all partitions explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10; -- scans mcrparted4 explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5, mcrparted_def -drop table mcrparted; -- check that partitioned table Appends cope with being referenced in -- subplans @@ -726,3 +725,111 @@ insert into parted_minmax values (1,'12345'); explain (costs off) select min(a), max(a) from parted_minmax where b = '12345'; select min(a), max(a) from parted_minmax where b = '12345'; drop table parted_minmax; + +-- Test code that uses Append nodes in place of MergeAppend when the +-- partition ordering matches the desired ordering. + +create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c); + +-- MergeAppend must be used when a default partition exists +explain (costs off) select * from mcrparted order by a, abs(b), c; + +drop table mcrparted_def; + +-- Append is used for a RANGE partitioned table with no default +-- and no subpartitions +explain (costs off) select * from mcrparted order by a, abs(b), c; + +-- Append is used with subpaths in reverse order with backwards index scans +explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc; + +-- check that Append plan is used containing a MergeAppend for sub-partitions +-- that are unordered. +drop table mcrparted5; +create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a); +create table mcrparted5a partition of mcrparted5 for values in(20); +create table mcrparted5_def partition of mcrparted5 default; + +explain (costs off) select * from mcrparted order by a, abs(b), c; + +drop table mcrparted5_def; + +-- check that an Append plan is used and the sub-partitions are flattened +-- into the main Append when the sub-partition is unordered but contains +-- just a single sub-partition. +explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c; + +-- check that Append is used when the sub-partitioned tables are pruned +-- during planning. +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; + +create table mclparted (a int) partition by list(a); +create table mclparted1 partition of mclparted for values in(1); +create table mclparted2 partition of mclparted for values in(2); +create index on mclparted (a); + +-- Ensure an Append is used for a list partition with an order by. +explain (costs off) select * from mclparted order by a; + +-- Ensure a MergeAppend is used when a partition exists with interleaved +-- datums in the partition bound. +create table mclparted3_5 partition of mclparted for values in(3,5); +create table mclparted4 partition of mclparted for values in(4); + +explain (costs off) select * from mclparted order by a; + +drop table mclparted; + +-- Ensure subplans which don't have a path with the correct pathkeys get +-- sorted correctly. +drop index mcrparted_a_abs_c_idx; +create index on mcrparted1 (a, abs(b), c); +create index on mcrparted2 (a, abs(b), c); +create index on mcrparted3 (a, abs(b), c); +create index on mcrparted4 (a, abs(b), c); + +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c limit 1; + +set enable_bitmapscan = 0; +-- Ensure Append node can be used when the partition is ordered by some +-- pathkeys which were deemed redundant. +explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c; +reset enable_bitmapscan; + +drop table mcrparted; + +-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend +create table bool_lp (b bool) partition by list(b); +create table bool_lp_true partition of bool_lp for values in(true); +create table bool_lp_false partition of bool_lp for values in(false); +create index on bool_lp (b); + +explain (costs off) select * from bool_lp order by b; + +drop table bool_lp; + +-- Ensure const bool quals can be properly detected as redundant +create table bool_rp (b bool, a int) partition by range(b,a); +create table bool_rp_false_1k partition of bool_rp for values from (false,0) to (false,1000); +create table bool_rp_true_1k partition of bool_rp for values from (true,0) to (true,1000); +create table bool_rp_false_2k partition of bool_rp for values from (false,1000) to (false,2000); +create table bool_rp_true_2k partition of bool_rp for values from (true,1000) to (true,2000); +create index on bool_rp (b,a); +explain (costs off) select * from bool_rp where b = true order by b,a; +explain (costs off) select * from bool_rp where b = false order by b,a; +explain (costs off) select * from bool_rp where b = true order by a; +explain (costs off) select * from bool_rp where b = false order by a; + +drop table bool_rp; + +-- Ensure an Append scan is chosen when the partition order is a subset of +-- the required order. +create table range_parted (a int, b int, c int) partition by range(a, b); +create table range_parted1 partition of range_parted for values from (0,0) to (10,10); +create table range_parted2 partition of range_parted for values from (10,10) to (20,20); +create index on range_parted (a,b,c); + +explain (costs off) select * from range_parted order by a,b,c; +explain (costs off) select * from range_parted order by a desc,b desc,c desc; + +drop table range_parted; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 2e4d2b483d..c30e58eef7 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -775,15 +775,15 @@ drop table boolp; -- set enable_seqscan = off; set enable_sort = off; -create table ma_test (a int) partition by range (a); +create table ma_test (a int, b int) partition by range (a); create table ma_test_p1 partition of ma_test for values from (0) to (10); create table ma_test_p2 partition of ma_test for values from (10) to (20); create table ma_test_p3 partition of ma_test for values from (20) to (30); -insert into ma_test select x from generate_series(0,29) t(x); -create index on ma_test (a); +insert into ma_test select x,x from generate_series(0,29) t(x); +create index on ma_test (b); analyze ma_test; -prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b; -- Execute query 5 times to allow choose_custom_plan -- to start considering a generic plan. @@ -804,7 +804,7 @@ execute mt_q1(35); deallocate mt_q1; -- ensure initplan params properly prune partitions -explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b; reset enable_seqscan; reset enable_sort;