From 2000b6c10aa6777929f1a8b613f30426bb90f849 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 16 Sep 2020 14:28:11 -0400 Subject: [PATCH] Don't fetch partition check expression during InitResultRelInfo. Since there is only one place that actually needs the partition check expression, namely ExecPartitionCheck, it's better to fetch it from the relcache there. In this way we will never fetch it at all if the query never has use for it, and we still fetch it just once when we do need it. The reason for taking an interest in this is that if the relcache doesn't already have the check expression cached, fetching it requires obtaining AccessShareLock on the partition root. That means that operations that look like they should only touch the partition itself will also take a lock on the root. In particular we observed that TRUNCATE on a partition may take a lock on the partition's root, contributing to a deadlock situation in parallel pg_restore. As written, this patch does have a small cost, which is that we are microscopically reducing efficiency for the case where a partition has an empty check expression. ExecPartitionCheck will be called, and will go through the motions of setting up and checking an empty qual, where before it would not have been called at all. We could avoid that by adding a separate boolean flag to track whether there is a partition expression to test. However, this case only arises for a default partition with no siblings, which surely is not an interesting case in practice. Hence adding complexity for it does not seem like a good trade-off. Amit Langote, per a suggestion by me Discussion: https://postgr.es/m/VI1PR03MB31670CA1BD9625C3A8C5DD05EB230@VI1PR03MB3167.eurprd03.prod.outlook.com --- src/backend/commands/copy.c | 2 +- src/backend/executor/execMain.c | 41 +++++++++--------------- src/backend/executor/execPartition.c | 2 +- src/backend/executor/execReplication.c | 4 +-- src/backend/executor/nodeModifyTable.c | 4 +-- src/backend/replication/logical/worker.c | 2 +- src/include/nodes/execnodes.h | 11 +++---- 7 files changed, 26 insertions(+), 40 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index db7d24a511..2047557e52 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -3236,7 +3236,7 @@ CopyFrom(CopyState cstate) * we don't need to if there's no BR trigger defined on the * partition. */ - if (resultRelInfo->ri_PartitionCheck && + if (resultRelInfo->ri_RelationDesc->rd_rel->relispartition && (proute == NULL || has_before_insert_row_trig)) ExecPartitionCheck(resultRelInfo, myslot, estate, true); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 4fdffad6f3..2e27e26ba4 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1280,8 +1280,6 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation partition_root, int instrument_options) { - List *partition_check = NIL; - MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); resultRelInfo->type = T_ResultRelInfo; resultRelInfo->ri_RangeTableIndex = resultRelationIndex; @@ -1325,23 +1323,6 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ReturningSlot = NULL; resultRelInfo->ri_TrigOldSlot = NULL; resultRelInfo->ri_TrigNewSlot = NULL; - - /* - * Partition constraint, which also includes the partition constraint of - * all the ancestors that are partitions. Note that it will be checked - * even in the case of tuple-routing where this table is the target leaf - * partition, if there any BR triggers defined on the table. Although - * tuple-routing implicitly preserves the partition constraint of the - * target partition for a given row, the BR triggers may change the row - * such that the constraint is no longer satisfied, which we must fail for - * by checking it explicitly. - * - * If this is a partitioned table, the partition constraint (if any) of a - * given row will be checked just before performing tuple-routing. - */ - partition_check = RelationGetPartitionQual(resultRelationDesc); - - resultRelInfo->ri_PartitionCheck = partition_check; resultRelInfo->ri_PartitionRoot = partition_root; resultRelInfo->ri_PartitionInfo = NULL; /* may be set later */ resultRelInfo->ri_CopyMultiInsertBuffer = NULL; @@ -1776,7 +1757,7 @@ ExecRelCheck(ResultRelInfo *resultRelInfo, * ExecPartitionCheck --- check that tuple meets the partition constraint. * * Returns true if it meets the partition constraint. If the constraint - * fails and we're asked to emit to error, do so and don't return; otherwise + * fails and we're asked to emit an error, do so and don't return; otherwise * return false. */ bool @@ -1788,14 +1769,22 @@ ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, /* * If first time through, build expression state tree for the partition - * check expression. Keep it in the per-query memory context so they'll - * survive throughout the query. + * check expression. (In the corner case where the partition check + * expression is empty, ie there's a default partition and nothing else, + * we'll be fooled into executing this code each time through. But it's + * pretty darn cheap in that case, so we don't worry about it.) */ if (resultRelInfo->ri_PartitionCheckExpr == NULL) { - List *qual = resultRelInfo->ri_PartitionCheck; + /* + * Ensure that the qual tree and prepared expression are in the + * query-lifespan context. + */ + MemoryContext oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); + List *qual = RelationGetPartitionQual(resultRelInfo->ri_RelationDesc); resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate); + MemoryContextSwitchTo(oldcxt); } /* @@ -1904,9 +1893,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo, Bitmapset *insertedCols; Bitmapset *updatedCols; - Assert(constr || resultRelInfo->ri_PartitionCheck); + Assert(constr); /* we should not be called otherwise */ - if (constr && constr->has_not_null) + if (constr->has_not_null) { int natts = tupdesc->natts; int attrChk; @@ -1967,7 +1956,7 @@ ExecConstraints(ResultRelInfo *resultRelInfo, } } - if (constr && constr->num_check > 0) + if (constr->num_check > 0) { const char *failed; diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index bd2ea25804..33d2c6f63d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -299,7 +299,7 @@ ExecFindPartition(ModifyTableState *mtstate, * First check the root table's partition constraint, if any. No point in * routing the tuple if it doesn't belong in the root table itself. */ - if (rootResultRelInfo->ri_PartitionCheck) + if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition) ExecPartitionCheck(rootResultRelInfo, slot, estate, true); /* start with the root partitioned table */ diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 8f474faed0..b29db7bf4f 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -435,7 +435,7 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot) /* Check the constraints of the tuple */ if (rel->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); - if (resultRelInfo->ri_PartitionCheck) + if (rel->rd_rel->relispartition) ExecPartitionCheck(resultRelInfo, slot, estate, true); /* OK, store the tuple and create index entries for it */ @@ -501,7 +501,7 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, /* Check the constraints of the tuple */ if (rel->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); - if (resultRelInfo->ri_PartitionCheck) + if (rel->rd_rel->relispartition) ExecPartitionCheck(resultRelInfo, slot, estate, true); simple_table_tuple_update(rel, tid, slot, estate->es_snapshot, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 20a4c474cc..9812089161 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -491,7 +491,7 @@ ExecInsert(ModifyTableState *mtstate, * one; except that if we got here via tuple-routing, we don't need to * if there's no BR trigger defined on the partition. */ - if (resultRelInfo->ri_PartitionCheck && + if (resultRelationDesc->rd_rel->relispartition && (resultRelInfo->ri_PartitionRoot == NULL || (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row))) @@ -1181,7 +1181,7 @@ lreplace:; * row. So skip the WCO checks if the partition constraint fails. */ partition_constraint_failed = - resultRelInfo->ri_PartitionCheck && + resultRelationDesc->rd_rel->relispartition && !ExecPartitionCheck(resultRelInfo, slot, estate, false); if (!partition_constraint_failed && diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index c37aafed0d..d239d28c09 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1676,7 +1676,7 @@ apply_handle_tuple_routing(ResultRelInfo *relinfo, * Does the updated tuple still satisfy the current * partition's constraint? */ - if (partrelinfo->ri_PartitionCheck == NULL || + if (!partrel->rd_rel->relispartition || ExecPartitionCheck(partrelinfo, remoteslot_part, estate, false)) { diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 0b42dd6f94..a5ab1aed14 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -477,19 +477,16 @@ typedef struct ResultRelInfo /* ON CONFLICT evaluation state */ OnConflictSetState *ri_onConflict; - /* partition check expression */ - List *ri_PartitionCheck; - - /* partition check expression state */ + /* partition check expression state (NULL if not set up yet) */ ExprState *ri_PartitionCheckExpr; - /* relation descriptor for root partitioned table */ + /* relation descriptor for partitioned table's root, if any */ Relation ri_PartitionRoot; - /* Additional information specific to partition tuple routing */ + /* info for partition tuple routing (NULL if not set up yet) */ struct PartitionRoutingInfo *ri_PartitionInfo; - /* For use by copy.c when performing multi-inserts */ + /* for use by copy.c when performing multi-inserts */ struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer; } ResultRelInfo;