Improve performance of ORDER BY / DISTINCT aggregates

ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 23:11:45 +12:00 · 2022-08-02 23:11:45 +12:00 · 1349d2790b
parent a69959fab2
commit 1349d2790b
24 changed files with 850 additions and 139 deletions
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a
 -- extension yet.
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
 GroupAggregate
   Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
   Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)

 -- This should not be pushed either.
 explain (verbose, costs off)
@ -3329,6 +3332,7 @@ alter extension postgres_fdw add operator public.=^(int, int);
 alter extension postgres_fdw add operator public.>^(int, int);
 alter server loopback options (set extensions 'postgres_fdw');
 -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
                                                                           QUERY PLAN                                                                           
@ -3345,6 +3349,7 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
 {6,16,26,36,46,56,66,76,86,96}
 (1 row)

+alter server loopback options (drop fdw_tuple_cost);
 -- This should be pushed too.
 explain (verbose, costs off)
 select * from ft2 order by c1 using operator(public.<^);
@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw');
 -- This will not be pushed as sort operator is now removed from the extension.
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
 GroupAggregate
   Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
   Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)

 -- Cleanup
 drop operator class my_op_class using btree;
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@ -943,9 +943,11 @@ alter extension postgres_fdw add operator public.>^(int, int);
 alter server loopback options (set extensions 'postgres_fdw');

 -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
+alter server loopback options (drop fdw_tuple_cost);

 -- This should be pushed too.
 explain (verbose, costs off)
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@ -3666,13 +3666,17 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 				scratch.resnull = &state->resnull;
 			}
 			argno++;
+
+			Assert(pertrans->numInputs == argno);
 		}
-		else if (pertrans->numSortCols == 0)
+		else if (!pertrans->aggsortrequired)
 		{
 			ListCell   *arg;

 			/*
-			 * Normal transition function without ORDER BY / DISTINCT.
+			 * Normal transition function without ORDER BY / DISTINCT or with
+			 * ORDER BY / DISTINCT but the planner has given us pre-sorted
+			 * input.
 			 */
 			strictargs = trans_fcinfo->args + 1;

@ -3680,6 +3684,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 			{
 				TargetEntry *source_tle = (TargetEntry *) lfirst(arg);

+				/*
+				 * Don't initialize args for any ORDER BY clause that might
+				 * exist in a presorted aggregate.
+				 */
+				if (argno == pertrans->numTransInputs)
+					break;
+
 				/*
 				 * Start from 1, since the 0th arg will be the transition
 				 * value
@ -3689,11 +3700,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 								&trans_fcinfo->args[argno + 1].isnull);
 				argno++;
 			}
+			Assert(pertrans->numTransInputs == argno);
 		}
 		else if (pertrans->numInputs == 1)
 		{
 			/*
-			 * DISTINCT and/or ORDER BY case, with a single column sorted on.
+			 * Non-presorted DISTINCT and/or ORDER BY case, with a single
+			 * column sorted on.
 			 */
 			TargetEntry *source_tle =
 			(TargetEntry *) linitial(pertrans->aggref->args);
@ -3705,11 +3718,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 							&state->resnull);
 			strictnulls = &state->resnull;
 			argno++;
+
+			Assert(pertrans->numInputs == argno);
 		}
 		else
 		{
 			/*
-			 * DISTINCT and/or ORDER BY case, with multiple columns sorted on.
+			 * Non-presorted DISTINCT and/or ORDER BY case, with multiple
+			 * columns sorted on.
 			 */
 			Datum	   *values = pertrans->sortslot->tts_values;
 			bool	   *nulls = pertrans->sortslot->tts_isnull;
@ -3725,8 +3741,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 								&values[argno], &nulls[argno]);
 				argno++;
 			}
+			Assert(pertrans->numInputs == argno);
 		}
-		Assert(pertrans->numInputs == argno);

 		/*
 		 * For a strict transfn, nothing happens when there's a NULL input; we
@ -3748,6 +3764,21 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 										 state->steps_len - 1);
 		}

+		/* Handle DISTINCT aggregates which have pre-sorted input */
+		if (pertrans->numDistinctCols > 0 && !pertrans->aggsortrequired)
+		{
+			if (pertrans->numDistinctCols > 1)
+				scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_MULTI;
+			else
+				scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_SINGLE;
+
+			scratch.d.agg_presorted_distinctcheck.pertrans = pertrans;
+			scratch.d.agg_presorted_distinctcheck.jumpdistinct = -1;	/* adjust later */
+			ExprEvalPushStep(state, &scratch);
+			adjust_bailout = lappend_int(adjust_bailout,
+										 state->steps_len - 1);
+		}
+
 		/*
 		 * Call transition function (once for each concurrently evaluated
 		 * grouping set). Do so for both sort and hash based computations, as
@ -3808,6 +3839,12 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 				Assert(as->d.agg_deserialize.jumpnull == -1);
 				as->d.agg_deserialize.jumpnull = state->steps_len;
 			}
+			else if (as->opcode == EEOP_AGG_PRESORTED_DISTINCT_SINGLE ||
+					 as->opcode == EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+			{
+				Assert(as->d.agg_presorted_distinctcheck.jumpdistinct == -1);
+				as->d.agg_presorted_distinctcheck.jumpdistinct = state->steps_len;
+			}
 			else
 				Assert(false);
 		}
@ -3857,7 +3894,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	/*
 	 * Determine appropriate transition implementation.
 	 *
-	 * For non-ordered aggregates:
+	 * For non-ordered aggregates and ORDER BY / DISTINCT aggregates with
+	 * presorted input:
 	 *
 	 * If the initial value for the transition state doesn't exist in the
 	 * pg_aggregate table then we will let the first non-NULL value returned
@ -3887,7 +3925,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	 * process_ordered_aggregate_{single, multi} and
 	 * advance_transition_function.
 	 */
-	if (pertrans->numSortCols == 0)
+	if (!pertrans->aggsortrequired)
 	{
 		if (pertrans->transtypeByVal)
 		{
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@ -502,6 +502,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
 		&&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
 		&&CASE_EEOP_AGG_PLAIN_TRANS_BYREF,
+		&&CASE_EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+		&&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
 		&&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
 		&&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
 		&&CASE_EEOP_LAST
@ -1786,6 +1788,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}

+		EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_SINGLE)
+		{
+			AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+			AggState   *aggstate = castNode(AggState, state->parent);
+
+			if (ExecEvalPreOrderedDistinctSingle(aggstate, pertrans))
+				EEO_NEXT();
+			else
+				EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+		}
+
+		EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+		{
+			AggState   *aggstate = castNode(AggState, state->parent);
+			AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+
+			if (ExecEvalPreOrderedDistinctMulti(aggstate, pertrans))
+				EEO_NEXT();
+			else
+				EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+		}
+
 		/* process single-column ordered aggregate datum */
 		EEO_CASE(EEOP_AGG_ORDERED_TRANS_DATUM)
 		{
@ -4402,6 +4426,84 @@ ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
 	return newValue;
 }

+/*
+ * ExecEvalPreOrderedDistinctSingle
+ *		Returns true when the aggregate transition value Datum is distinct
+ *		from the previous input Datum and returns false when the input Datum
+ *		matches the previous input Datum.
+ */
+bool
+ExecEvalPreOrderedDistinctSingle(AggState *aggstate, AggStatePerTrans pertrans)
+{
+	Datum		value = pertrans->transfn_fcinfo->args[1].value;
+	bool		isnull = pertrans->transfn_fcinfo->args[1].isnull;
+
+	if (!pertrans->haslast ||
+		pertrans->lastisnull != isnull ||
+		!DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+										pertrans->aggCollation,
+										pertrans->lastdatum, value)))
+	{
+		if (pertrans->haslast && !pertrans->inputtypeByVal)
+			pfree(DatumGetPointer(pertrans->lastdatum));
+
+		pertrans->haslast = true;
+		if (!isnull)
+		{
+			MemoryContext oldContext;
+
+			oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
+
+			pertrans->lastdatum = datumCopy(value, pertrans->inputtypeByVal,
+											pertrans->inputtypeLen);
+
+			MemoryContextSwitchTo(oldContext);
+		}
+		else
+			pertrans->lastdatum = (Datum) 0;
+		pertrans->lastisnull = isnull;
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * ExecEvalPreOrderedDistinctMulti
+ *		Returns true when the aggregate input is distinct from the previous
+ *		input and returns false when the input matches the previous input.
+ */
+bool
+ExecEvalPreOrderedDistinctMulti(AggState *aggstate, AggStatePerTrans pertrans)
+{
+	ExprContext *tmpcontext = aggstate->tmpcontext;
+
+	for (int i = 0; i < pertrans->numTransInputs; i++)
+	{
+		pertrans->sortslot->tts_values[i] = pertrans->transfn_fcinfo->args[i + 1].value;
+		pertrans->sortslot->tts_isnull[i] = pertrans->transfn_fcinfo->args[i + 1].isnull;
+	}
+
+	ExecClearTuple(pertrans->sortslot);
+	pertrans->sortslot->tts_nvalid = pertrans->numInputs;
+	ExecStoreVirtualTuple(pertrans->sortslot);
+
+	tmpcontext->ecxt_outertuple = pertrans->sortslot;
+	tmpcontext->ecxt_innertuple = pertrans->uniqslot;
+
+	if (!pertrans->haslast ||
+		!ExecQual(pertrans->equalfnMulti, tmpcontext))
+	{
+		if (pertrans->haslast)
+			ExecClearTuple(pertrans->uniqslot);
+
+		pertrans->haslast = true;
+		ExecCopySlot(pertrans->uniqslot, pertrans->sortslot);
+		return true;
+	}
+	return false;
+}
+
 /*
 * Invoke ordered transition function, with a datum argument.
 */
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@ -583,7 +583,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
 	/*
 	 * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
 	 */
-	if (pertrans->numSortCols > 0)
+	if (pertrans->aggsortrequired)
 	{
 		/*
 		 * In case of rescan, maybe there could be an uncompleted sort
@ -1309,7 +1309,7 @@ finalize_aggregates(AggState *aggstate,

 		pergroupstate = &pergroup[transno];

-		if (pertrans->numSortCols > 0)
+		if (pertrans->aggsortrequired)
 		{
 			Assert(aggstate->aggstrategy != AGG_HASHED &&
 				   aggstate->aggstrategy != AGG_MIXED);
@ -1323,6 +1323,21 @@ finalize_aggregates(AggState *aggstate,
 												pertrans,
 												pergroupstate);
 		}
+		else if (pertrans->numDistinctCols > 0 && pertrans->haslast)
+		{
+			pertrans->haslast = false;
+
+			if (pertrans->numDistinctCols == 1)
+			{
+				if (!pertrans->inputtypeByVal && !pertrans->lastisnull)
+					pfree(DatumGetPointer(pertrans->lastdatum));
+
+				pertrans->lastisnull = false;
+				pertrans->lastdatum = (Datum) 0;
+			}
+			else
+				ExecClearTuple(pertrans->uniqslot);
+		}
 	}

 	/*
@ -4127,6 +4142,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 	 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
 	 * however; the agg's transfn and finalfn are responsible for that.
 	 *
+	 * When the planner has set the aggpresorted flag, the input to the
+	 * aggregate is already correctly sorted.  For ORDER BY aggregates we can
+	 * simply treat these as normal aggregates.  For presorted DISTINCT
+	 * aggregates an extra step must be added to remove duplicate consecutive
+	 * inputs.
+	 *
 	 * Note that by construction, if there is a DISTINCT clause then the ORDER
 	 * BY clause is a prefix of it (see transformDistinctClause).
 	 */
@ -4134,18 +4155,27 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 	{
 		sortlist = NIL;
 		numSortCols = numDistinctCols = 0;
+		pertrans->aggsortrequired = false;
+	}
+	else if (aggref->aggpresorted && aggref->aggdistinct == NIL)
+	{
+		sortlist = NIL;
+		numSortCols = numDistinctCols = 0;
+		pertrans->aggsortrequired = false;
 	}
 	else if (aggref->aggdistinct)
 	{
 		sortlist = aggref->aggdistinct;
 		numSortCols = numDistinctCols = list_length(sortlist);
 		Assert(numSortCols >= list_length(aggref->aggorder));
+		pertrans->aggsortrequired = !aggref->aggpresorted;
 	}
 	else
 	{
 		sortlist = aggref->aggorder;
 		numSortCols = list_length(sortlist);
 		numDistinctCols = 0;
+		pertrans->aggsortrequired = (numSortCols > 0);
 	}

 	pertrans->numSortCols = numSortCols;
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@ -2335,6 +2335,54 @@ llvm_compile_expr(ExprState *state)
 					break;
 				}

+			case EEOP_AGG_PRESORTED_DISTINCT_SINGLE:
+				{
+					AggState   *aggstate = castNode(AggState, state->parent);
+					AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+					int			jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+					LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctSingle");
+					LLVMValueRef v_args[2];
+					LLVMValueRef v_ret;
+
+					v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+					v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+					v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+					v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+					LLVMBuildCondBr(b,
+									LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+												  l_sbool_const(1), ""),
+									opblocks[opno + 1],
+									opblocks[jumpdistinct]);
+					break;
+				}
+
+			case EEOP_AGG_PRESORTED_DISTINCT_MULTI:
+				{
+					AggState   *aggstate = castNode(AggState, state->parent);
+					AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+					int			jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+					LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctMulti");
+					LLVMValueRef v_args[2];
+					LLVMValueRef v_ret;
+
+					v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+					v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+					v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+					v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+					LLVMBuildCondBr(b,
+									LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+												  l_sbool_const(1), ""),
+									opblocks[opno + 1],
+									opblocks[jumpdistinct]);
+					break;
+				}
+
 			case EEOP_AGG_ORDERED_TRANS_DATUM:
 				build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum",
 								v_state, op, v_econtext);
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@ -103,6 +103,8 @@ void	   *referenced_functions[] =
 {
 	ExecAggInitGroup,
 	ExecAggTransReparent,
+	ExecEvalPreOrderedDistinctSingle,
+	ExecEvalPreOrderedDistinctMulti,
 	ExecEvalAggOrderedTransDatum,
 	ExecEvalAggOrderedTransTuple,
 	ExecEvalArrayCoerce,
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -103,6 +103,28 @@ make_canonical_pathkey(PlannerInfo *root,
 	return pk;
 }

+/*
+ * append_pathkeys
+ *		Append all non-redundant PathKeys in 'source' onto 'target' and
+ *		returns the updated 'target' list.
+ */
+List *
+append_pathkeys(List *target, List *source)
+{
+	ListCell   *lc;
+
+	Assert(target != NIL);
+
+	foreach(lc, source)
+	{
+		PathKey    *pk = lfirst_node(PathKey, lc);
+
+		if (!pathkey_is_redundant(pk, target))
+			target = lappend(target, pk);
+	}
+	return target;
+}
+
 /*
 * pathkey_is_redundant
 *	   Is a pathkey redundant with one already in the given list?
@ -746,7 +768,8 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 								List *path_pathkeys,
-								List *group_pathkeys, List *group_clauses)
+								List *group_pathkeys, List *group_clauses,
+								List *aggregate_pathkeys)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
@ -758,7 +781,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,

 	/* always return at least the original pathkeys/clauses */
 	info = makeNode(PathKeyInfo);
-	info->pathkeys = pathkeys;
+	if (aggregate_pathkeys != NIL)
+		info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+	else
+		info->pathkeys = pathkeys;
 	info->clauses = clauses;

 	infos = lappend(infos, info);
@ -783,7 +809,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 									  n_preordered))
 	{
 		info = makeNode(PathKeyInfo);
-		info->pathkeys = pathkeys;
+		if (aggregate_pathkeys != NIL)
+			info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+		else
+			info->pathkeys = pathkeys;
 		info->clauses = clauses;

 		infos = lappend(infos, info);
@ -822,7 +851,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 		 * still want to keep the keys reordered to path_pathkeys.
 		 */
 		info = makeNode(PathKeyInfo);
-		info->pathkeys = pathkeys;
+		if (aggregate_pathkeys != NIL)
+			info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+		else
+			info->pathkeys = pathkeys;
 		info->clauses = clauses;

 		infos = lappend(infos, info);
@ -850,7 +882,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,

 		/* keep the group keys reordered to match ordering of input path */
 		info = makeNode(PathKeyInfo);
-		info->pathkeys = pathkeys;
+		if (aggregate_pathkeys != NIL)
+			info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+		else
+			info->pathkeys = pathkeys;
 		info->clauses = clauses;

 		infos = lappend(infos, info);
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
 	foreach(lc, root->agginfos)
 	{
 		AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-		Aggref	   *aggref = agginfo->representative_aggref;
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
 		Oid			aggsortop;
 		TargetEntry *curTarget;
 		MinMaxAggInfo *mminfo;
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -24,6 +24,7 @@
 #include "access/sysattr.h"
 #include "access/table.h"
 #include "access/xact.h"
+#include "catalog/pg_aggregate.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_proc.h"
@ -3125,6 +3126,217 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
 	return result;
 }

+/*
+ * make_pathkeys_for_groupagg
+ *		Determine the pathkeys for the GROUP BY clause and/or any ordered
+ *		aggregates.  We expect at least one of these here.
+ *
+ * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
+ * involved here comes from calculating the best pathkeys for ordered
+ * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * aggregate functions.  We find these by looking at the first ORDER BY /
+ * DISTINCT aggregate and take the pathkeys for that before searching for
+ * other aggregates that require the same or a more strict variation of the
+ * same pathkeys.  We then repeat that process for any remaining aggregates
+ * with different pathkeys and if we find another set of pathkeys that suits a
+ * larger number of aggregates then we return those pathkeys instead.
+ *
+ * *number_groupby_pathkeys gets set to the number of elements in the returned
+ * list that belong to the GROUP BY clause.  Any elements above this number
+ * must belong to ORDER BY / DISTINCT aggregates.
+ *
+ * When the best pathkeys are found we also mark each Aggref that can use
+ * those pathkeys as aggpresorted = true.
+ */
+static List *
+make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
+						   int *number_groupby_pathkeys)
+{
+	List	   *grouppathkeys = NIL;
+	List	   *bestpathkeys;
+	Bitmapset  *bestaggs;
+	Bitmapset  *unprocessed_aggs;
+	ListCell   *lc;
+	int			i;
+
+	Assert(groupClause != NIL || root->numOrderedAggs > 0);
+
+	if (groupClause != NIL)
+	{
+		/* no pathkeys possible if there's an unsortable GROUP BY */
+		if (!grouping_is_sortable(groupClause))
+		{
+			*number_groupby_pathkeys = 0;
+			return NIL;
+		}
+
+		grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
+													  tlist);
+		*number_groupby_pathkeys = list_length(grouppathkeys);
+	}
+	else
+		*number_groupby_pathkeys = 0;
+
+	/*
+	 * We can't add pathkeys for ordered aggregates if there are any grouping
+	 * sets.  All handling specific to ordered aggregates must be done by the
+	 * executor in that case.
+	 */
+	if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+		return grouppathkeys;
+
+	/*
+	 * Make a first pass over all AggInfos to collect a Bitmapset containing
+	 * the indexes of all AggInfos to be processed below.
+	 */
+	unprocessed_aggs = NULL;
+	foreach(lc, root->agginfos)
+	{
+		AggInfo    *agginfo = lfirst_node(AggInfo, lc);
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+			continue;
+
+		/* only add aggregates with a DISTINCT or ORDER BY */
+		if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
+			unprocessed_aggs = bms_add_member(unprocessed_aggs,
+											  foreach_current_index(lc));
+	}
+
+	/*
+	 * Now process all the unprocessed_aggs to find the best set of pathkeys
+	 * for the given set of aggregates.
+	 *
+	 * On the first outer loop here 'bestaggs' will be empty.   We'll populate
+	 * this during the first loop using the pathkeys for the very first
+	 * AggInfo then taking any stronger pathkeys from any other AggInfos with
+	 * a more strict set of compatible pathkeys.  Once the outer loop is
+	 * complete, we mark off all the aggregates with compatible pathkeys then
+	 * remove those from the unprocessed_aggs and repeat the process to try to
+	 * find another set of pathkeys that are suitable for a larger number of
+	 * aggregates.  The outer loop will stop when there are not enough
+	 * unprocessed aggregates for it to be possible to find a set of pathkeys
+	 * to suit a larger number of aggregates.
+	 */
+	bestpathkeys = NIL;
+	bestaggs = NULL;
+	while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
+	{
+		Bitmapset  *aggindexes = NULL;
+		List	   *currpathkeys = NIL;
+
+		i = -1;
+		while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
+		{
+			AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+			Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
+			List	   *sortlist;
+
+			if (aggref->aggdistinct != NIL)
+				sortlist = aggref->aggdistinct;
+			else
+				sortlist = aggref->aggorder;
+
+			/*
+			 * When not set yet, take the pathkeys from the first unprocessed
+			 * aggregate.
+			 */
+			if (currpathkeys == NIL)
+			{
+				currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+															 aggref->args);
+
+				/* include the GROUP BY pathkeys, if they exist */
+				if (grouppathkeys != NIL)
+					currpathkeys = append_pathkeys(list_copy(grouppathkeys),
+												   currpathkeys);
+
+				/* record that we found pathkeys for this aggregate */
+				aggindexes = bms_add_member(aggindexes, i);
+			}
+			else
+			{
+				List	   *pathkeys;
+
+				/* now look for a stronger set of matching pathkeys */
+				pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+														 aggref->args);
+
+				/* include the GROUP BY pathkeys, if they exist */
+				if (grouppathkeys != NIL)
+					pathkeys = append_pathkeys(list_copy(grouppathkeys),
+											   pathkeys);
+
+				/* are 'pathkeys' compatible or better than 'currpathkeys'? */
+				switch (compare_pathkeys(currpathkeys, pathkeys))
+				{
+					case PATHKEYS_BETTER2:
+						/* 'pathkeys' are stronger, use these ones instead */
+						currpathkeys = pathkeys;
+						/* FALLTHROUGH */
+
+					case PATHKEYS_BETTER1:
+						/* 'pathkeys' are less strict */
+						/* FALLTHROUGH */
+
+					case PATHKEYS_EQUAL:
+						/* mark this aggregate as covered by 'currpathkeys' */
+						aggindexes = bms_add_member(aggindexes, i);
+						break;
+
+					case PATHKEYS_DIFFERENT:
+						break;
+				}
+			}
+		}
+
+		/* remove the aggregates that we've just processed */
+		unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
+
+		/*
+		 * If this pass included more aggregates than the previous best then
+		 * use these ones as the best set.
+		 */
+		if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
+		{
+			bestaggs = aggindexes;
+			bestpathkeys = currpathkeys;
+		}
+	}
+
+	/*
+	 * Now that we've found the best set of aggregates we can set the
+	 * presorted flag to indicate to the executor that it needn't bother
+	 * performing a sort for these Aggrefs.  We're able to do this now as
+	 * there's no chance of a Hash Aggregate plan as create_grouping_paths
+	 * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
+	 * of ordered aggregates.
+	 */
+	i = -1;
+	while ((i = bms_next_member(bestaggs, i)) >= 0)
+	{
+		AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+
+		foreach(lc, agginfo->aggrefs)
+		{
+			Aggref	   *aggref = lfirst_node(Aggref, lc);
+
+			aggref->aggpresorted = true;
+		}
+	}
+
+	/*
+	 * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
+	 * aggregates, then return bestpathkeys, otherwise return the
+	 * grouppathkeys.
+	 */
+	if (bestpathkeys != NIL)
+		return bestpathkeys;
+
+	return grouppathkeys;
+}
+
 /*
 * Compute query_pathkeys and other pathkeys during plan generation
 */
@ -3137,18 +3349,19 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	List	   *activeWindows = qp_extra->activeWindows;

 	/*
-	 * Calculate pathkeys that represent grouping/ordering requirements.  The
-	 * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
-	 * be, in which case we just leave their pathkeys empty.
+	 * Calculate pathkeys that represent grouping/ordering and/or ordered
+	 * aggregate requirements.
 	 */
-	if (qp_extra->groupClause &&
-		grouping_is_sortable(qp_extra->groupClause))
-		root->group_pathkeys =
-			make_pathkeys_for_sortclauses(root,
-										  qp_extra->groupClause,
-										  tlist);
+	if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
+		root->group_pathkeys = make_pathkeys_for_groupagg(root,
+														  qp_extra->groupClause,
+														  tlist,
+														  &root->num_groupby_pathkeys);
 	else
+	{
 		root->group_pathkeys = NIL;
+		root->num_groupby_pathkeys = 0;
+	}

 	/* We consider only the first (bottom) window in pathkeys logic */
 	if (activeWindows != NIL)
@ -6222,6 +6435,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,

 	if (can_sort)
 	{
+		List	   *group_pathkeys;
+		List	   *orderAggPathkeys;
+		int			numAggPathkeys;
+
+		numAggPathkeys = list_length(root->group_pathkeys) -
+			root->num_groupby_pathkeys;
+
+		if (numAggPathkeys > 0)
+		{
+			group_pathkeys = list_copy_head(root->group_pathkeys,
+											root->num_groupby_pathkeys);
+			orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+											  root->num_groupby_pathkeys);
+		}
+		else
+		{
+			group_pathkeys = root->group_pathkeys;
+			orderAggPathkeys = NIL;
+		}
+
 		/*
 		 * Use any available suitably-sorted path as input, and also consider
 		 * sorting the cheapest-total path.
@ -6234,7 +6467,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,

 			List	   *pathkey_orderings = NIL;

-			List	   *group_pathkeys = root->group_pathkeys;
 			List	   *group_clauses = parse->groupClause;

 			/* generate alternative group orderings that might be useful */
@ -6242,7 +6474,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 																path->rows,
 																path->pathkeys,
 																group_pathkeys,
-																group_clauses);
+																group_clauses,
+																orderAggPathkeys);

 			Assert(list_length(pathkey_orderings) > 0);

@ -6414,7 +6647,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 																	path->rows,
 																	path->pathkeys,
 																	group_pathkeys,
-																	group_clauses);
+																	group_clauses,
+																	orderAggPathkeys);

 				Assert(list_length(pathkey_orderings) > 0);

@ -6717,6 +6951,26 @@ create_partial_grouping_paths(PlannerInfo *root,

 	if (can_sort && cheapest_total_path != NULL)
 	{
+		List	   *group_pathkeys;
+		List	   *orderAggPathkeys;
+		int			numAggPathkeys;
+
+		numAggPathkeys = list_length(root->group_pathkeys) -
+			root->num_groupby_pathkeys;
+
+		if (numAggPathkeys > 0)
+		{
+			group_pathkeys = list_copy_head(root->group_pathkeys,
+											root->num_groupby_pathkeys);
+			orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+											  root->num_groupby_pathkeys);
+		}
+		else
+		{
+			group_pathkeys = root->group_pathkeys;
+			orderAggPathkeys = NIL;
+		}
+
 		/* This should have been checked previously */
 		Assert(parse->hasAggs || parse->groupClause);

@ -6729,10 +6983,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_save = path;
-
 			List	   *pathkey_orderings = NIL;
-
-			List	   *group_pathkeys = root->group_pathkeys;
 			List	   *group_clauses = parse->groupClause;

 			/* generate alternative group orderings that might be useful */
@ -6740,7 +6991,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 																path->rows,
 																path->pathkeys,
 																group_pathkeys,
-																group_clauses);
+																group_clauses,
+																orderAggPathkeys);

 			Assert(list_length(pathkey_orderings) > 0);

@ -6856,16 +7108,33 @@ create_partial_grouping_paths(PlannerInfo *root,

 	if (can_sort && cheapest_partial_path != NULL)
 	{
+		List	   *group_pathkeys;
+		List	   *orderAggPathkeys;
+		int			numAggPathkeys;
+
+		numAggPathkeys = list_length(root->group_pathkeys) -
+			root->num_groupby_pathkeys;
+
+		if (numAggPathkeys > 0)
+		{
+			group_pathkeys = list_copy_head(root->group_pathkeys,
+											root->num_groupby_pathkeys);
+			orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+											  root->num_groupby_pathkeys);
+		}
+		else
+		{
+			group_pathkeys = root->group_pathkeys;
+			orderAggPathkeys = NIL;
+		}
+
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-
 			List	   *pathkey_orderings = NIL;
-
-			List	   *group_pathkeys = root->group_pathkeys;
 			List	   *group_clauses = parse->groupClause;

 			/* generate alternative group orderings that might be useful */
@ -6873,7 +7142,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 																path->rows,
 																path->pathkeys,
 																group_pathkeys,
-																group_clauses);
+																group_clauses,
+																orderAggPathkeys);

 			Assert(list_length(pathkey_orderings) > 0);

--- a/src/backend/optimizer/prep/prepagg.c
+++ b/src/backend/optimizer/prep/prepagg.c
@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
 	{
 		AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, aggno);

+		agginfo->aggrefs = lappend(agginfo->aggrefs, aggref);
 		transno = agginfo->transno;
 	}
 	else
@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
 		AggInfo    *agginfo = makeNode(AggInfo);

 		agginfo->finalfn_oid = aggfinalfn;
-		agginfo->representative_aggref = aggref;
+		agginfo->aggrefs = list_make1(aggref);
 		agginfo->shareable = shareable;

 		aggno = list_length(root->agginfos);
@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg,

 		aggno++;

-		existingRef = agginfo->representative_aggref;
+		existingRef = linitial_node(Aggref, agginfo->aggrefs);

 		/* all of the following must be the same or it's no match */
 		if (newagg->inputcollid != existingRef->inputcollid ||
@ -648,7 +649,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs
 	foreach(lc, root->agginfos)
 	{
 		AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-		Aggref	   *aggref = agginfo->representative_aggref;
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);

 		/*
 		 * Add the appropriate component function execution costs to
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@ -3816,6 +3816,7 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor,
 		aggref->aggstar = false;
 		aggref->aggvariadic = false;
 		aggref->aggkind = AGGKIND_NORMAL;
+		aggref->aggpresorted = false;
 		/* agglevelsup will be set by transformAggregateCall */
 		aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
 		aggref->location = agg_ctor->location;
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@ -774,6 +774,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 		aggref->aggstar = agg_star;
 		aggref->aggvariadic = func_variadic;
 		aggref->aggkind = aggkind;
+		aggref->aggpresorted = false;
 		/* agglevelsup will be set by transformAggregateCall */
 		aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
 		aggref->aggno = -1;		/* planner will set aggno and aggtransno */
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -57,6 +57,6 @@
 */

 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202207291
+#define CATALOG_VERSION_NO	202208021

 #endif
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@ -258,6 +258,8 @@ typedef enum ExprEvalOp
 	EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
 	EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
 	EEOP_AGG_PLAIN_TRANS_BYREF,
+	EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+	EEOP_AGG_PRESORTED_DISTINCT_MULTI,
 	EEOP_AGG_ORDERED_TRANS_DATUM,
 	EEOP_AGG_ORDERED_TRANS_TUPLE,

@ -659,6 +661,17 @@ typedef struct ExprEvalStep
 			int			jumpnull;
 		}			agg_plain_pergroup_nullcheck;

+		/* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */
+		struct
+		{
+			AggStatePerTrans pertrans;
+			ExprContext *aggcontext;
+			int			setno;
+			int			transno;
+			int			setoff;
+			int			jumpdistinct;
+		}			agg_presorted_distinctcheck;
+
 		/* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */
 		/* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */
 		struct
@ -868,6 +881,10 @@ extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggS
 extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
 								  Datum newValue, bool newValueIsNull,
 								  Datum oldValue, bool oldValueIsNull);
+extern bool ExecEvalPreOrderedDistinctSingle(AggState *aggstate,
+											 AggStatePerTrans pertrans);
+extern bool ExecEvalPreOrderedDistinctMulti(AggState *aggstate,
+											AggStatePerTrans pertrans);
 extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op,
 										 ExprContext *econtext);
 extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op,
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@ -48,6 +48,11 @@ typedef struct AggStatePerTransData
 	 */
 	bool		aggshared;

+	/*
+	 * True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
+	 */
+	bool		aggsortrequired;
+
 	/*
 	 * Number of aggregated input columns.  This includes ORDER BY expressions
 	 * in both the plain-agg and ordered-set cases.  Ordered-set direct args
@ -136,6 +141,9 @@ typedef struct AggStatePerTransData
 	TupleTableSlot *sortslot;	/* current input tuple */
 	TupleTableSlot *uniqslot;	/* used for multi-column DISTINCT */
 	TupleDesc	sortdesc;		/* descriptor of input tuples */
+	Datum		lastdatum;		/* used for single-column DISTINCT */
+	bool		lastisnull;		/* used for single-column DISTINCT */
+	bool		haslast;		/* got a last value for DISTINCT check */

 	/*
 	 * These values are working state that is initialized at the start of an
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@ -365,6 +365,14 @@ struct PlannerInfo

 	/* groupClause pathkeys, if any */
 	List	   *group_pathkeys;
+
+	/*
+	 * The number of elements in the group_pathkeys list which belong to the
+	 * GROUP BY clause.  Additional ones belong to ORDER BY / DISTINCT
+	 * aggregates.
+	 */
+	int			num_groupby_pathkeys;
+
 	/* pathkeys of bottom window, if any */
 	List	   *window_pathkeys;
 	/* distinctClause pathkeys, if any */
@ -3134,12 +3142,12 @@ typedef struct AggInfo
 	NodeTag		type;

 	/*
-	 * Link to an Aggref expr this state value is for.
+	 * List of Aggref exprs that this state value is for.
 	 *
-	 * There can be multiple identical Aggref's sharing the same per-agg. This
-	 * points to the first one of them.
+	 * There will always be at least one, but there can be multiple identical
+	 * Aggref's sharing the same per-agg.
 	 */
-	Aggref	   *representative_aggref;
+	List	   *aggrefs;

 	/* Transition state number for this aggregate */
 	int			transno;
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@ -357,6 +357,10 @@ typedef struct Param
 * replaced with a single argument representing the partial-aggregate
 * transition values.
 *
+ * aggpresorted is set by the query planner for ORDER BY and DISTINCT
+ * aggregates where the chosen plan provides presorted input for this
+ * aggregate during execution.
+ *
 * aggsplit indicates the expected partial-aggregation mode for the Aggref's
 * parent plan node.  It's always set to AGGSPLIT_SIMPLE in the parser, but
 * the planner might change it to something else.  We use this mainly as
@ -422,6 +426,9 @@ typedef struct Aggref
 	/* aggregate kind (see pg_aggregate.h) */
 	char		aggkind;

+	/* aggregate input already sorted */
+	bool		aggpresorted pg_node_attr(equal_ignore);
+
 	/* > 0 if agg belongs to outer query */
 	Index		agglevelsup;

--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -208,7 +208,8 @@ extern int	group_keys_reorder_by_pathkeys(List *pathkeys,
 										   List **group_clauses);
 extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 											 List *path_pathkeys,
-											 List *group_pathkeys, List *group_clauses);
+											 List *group_pathkeys, List *group_clauses,
+											 List *aggregate_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
@ -255,6 +256,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
 									   RelOptInfo *rel,
 									   List *pathkeys);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
+extern List *append_pathkeys(List *target, List *source);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 									   EquivalenceClass *eclass, Oid opfamily,
 									   int strategy, bool nulls_first);
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@ -1392,6 +1392,84 @@ ERROR:  column "t1.f1" must appear in the GROUP BY clause or be used in an aggre
 LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1;
               ^
 drop table t1, t2;
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: ten
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+            QUERY PLAN            
+----------------------------------
+ GroupAggregate
+   Group Key: ten
+   ->  Sort
+         Sort Key: ten, two, four
+         ->  Seq Scan on tenk1
+(5 rows)
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
@ -2263,8 +2341,8 @@ NOTICE:  avg_transfn called with 3
 -- shouldn't share states due to the distinctness not matching.
 select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
-NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 3
 my_avg | my_sum 
 --------+--------
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2;
         ->  GroupAggregate
               Group Key: pagg_tab.c
               ->  Sort
-                     Sort Key: pagg_tab.c
+                     Sort Key: pagg_tab.c, pagg_tab.a
                     ->  Seq Scan on pagg_tab_p1 pagg_tab
         ->  GroupAggregate
               Group Key: pagg_tab_1.c
               ->  Sort
-                     Sort Key: pagg_tab_1.c
+                     Sort Key: pagg_tab_1.c, pagg_tab_1.a
                     ->  Seq Scan on pagg_tab_p2 pagg_tab_1
         ->  GroupAggregate
               Group Key: pagg_tab_2.c
               ->  Sort
-                     Sort Key: pagg_tab_2.c
+                     Sort Key: pagg_tab_2.c, pagg_tab_2.a
                     ->  Seq Scan on pagg_tab_p3 pagg_tab_2
 (18 rows)

@ -948,34 +948,36 @@ SET max_parallel_workers_per_gather TO 2;
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
--------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+         ->  Parallel Append
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml.a
+                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_5.a
+                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_2.a
+                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(27 rows)

 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
 a  | sum  |  array_agg  | count 
@ -994,32 +996,34 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                             QUERY PLAN                              
---------------------------------------------------------------------
- Append
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml.a
-         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml.a
-               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_2.a
-         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_2.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_5.a
-         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_5.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)

 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
--- a/src/test/regress/expected/sqljson.out
+++ b/src/test/regress/expected/sqljson.out
@ -866,14 +866,14 @@ FROM
 	(VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar);
  json_arrayagg  |  json_arrayagg  |  json_arrayagg  |  json_arrayagg  |              json_arrayagg              |              json_arrayagg              |  json_arrayagg  |                                                      json_arrayagg                                                       | json_arrayagg |            json_arrayagg             
 -----------------+-----------------+-----------------+-----------------+-----------------------------------------+-----------------------------------------+-----------------+--------------------------------------------------------------------------------------------------------------------------+---------------+--------------------------------------
- [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [null, 3, 1, null, null, 5, 2, 4, null] | [null, 3, 1, null, null, 5, 2, 4, null] | [{"bar":null}, +| [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
-                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":4},  +| 
-                 |                 |                 |                 |                                         |                                         |  {"bar":1},    +|                                                                                                                          |  {"bar":5}]   | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |               | 
+ [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5, null, null, null, null] | [1, 2, 3, 4, 5, null, null, null, null] | [{"bar":1},    +| [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
+                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |  {"bar":4},  +| 
+                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":5}]   | 
                 |                 |                 |                 |                                         |                                         |  {"bar":4},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
                 |                 |                 |                 |                                         |                                         |  {"bar":null}]  |                                                                                                                          |               | 
 (1 row)

--- a/src/test/regress/expected/tuplesort.out
+++ b/src/test/regress/expected/tuplesort.out
@ -622,15 +622,18 @@ EXPLAIN (COSTS OFF) :qry;
         ->  GroupAggregate
               Group Key: a.col12
               Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)

 :qry;
 col12 | count | count | count | count | count 
@ -658,15 +661,18 @@ EXPLAIN (COSTS OFF) :qry;
         ->  GroupAggregate
               Group Key: a.col12
               Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)

 :qry;
 col12 | count | count | count | count | count 
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@ -503,6 +503,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1;

 drop table t1, t2;

+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --