Fix concurrent indexing operations with temporary tables

Attempting to use CREATE INDEX, DROP INDEX or REINDEX with CONCURRENTLY
on a temporary relation with ON COMMIT actions triggered unexpected
errors because those operations use multiple transactions internally to
complete their work.  Here is for example one confusing error when using
ON COMMIT DELETE ROWS:
ERROR:  index "foo" already contains data

Issues related to temporary relations and concurrent indexing are fixed
in this commit by enforcing the non-concurrent path to be taken for
temporary relations even if using CONCURRENTLY, transparently to the
user.  Using a non-concurrent path does not matter in practice as locks
cannot be taken on a temporary relation by a session different than the
one owning the relation, and the non-concurrent operation is more
effective.

The problem exists with REINDEX since v12 with the introduction of
CONCURRENTLY, and with CREATE/DROP INDEX since CONCURRENTLY exists for
those commands.  In all supported versions, this caused only confusing
error messages to be generated.  Note that with REINDEX, it was also
possible to issue a REINDEX CONCURRENTLY for a temporary relation owned
by a different session, leading to a server crash.

The idea to enforce transparently the non-concurrent code path for
temporary relations comes originally from Andres Freund.

Reported-by: Manuel Rigger
Author: Michael Paquier, Heikki Linnakangas
Reviewed-by: Andres Freund, Álvaro Herrera, Heikki Linnakangas
Discussion: https://postgr.es/m/CA+u7OA6gP7YAeCguyseusYcc=uR8+ypjCcgDDCTzjQ+k6S9ksQ@mail.gmail.com
Backpatch-through: 9.4
This commit is contained in:
Michael Paquier 2020-01-22 09:49:18 +09:00
parent 9b9c5f279e
commit a904abe2e2
8 changed files with 235 additions and 13 deletions

View File

@ -129,6 +129,11 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
&mdash; see <xref linkend="sql-createindex-concurrently"
endterm="sql-createindex-concurrently-title"/>.
</para>
<para>
For temporary tables, <command>CREATE INDEX</command> is always
non-concurrent, as no other session can access them, and
non-concurrent index creation is cheaper.
</para>
</listitem>
</varlistentry>

View File

@ -58,6 +58,11 @@ DROP INDEX [ CONCURRENTLY ] [ IF EXISTS ] <replaceable class="parameter">name</r
performed within a transaction block, but
<command>DROP INDEX CONCURRENTLY</command> cannot.
</para>
<para>
For temporary tables, <command>DROP INDEX</command> is always
non-concurrent, as no other session can access them, and
non-concurrent index drop is cheaper.
</para>
</listitem>
</varlistentry>

View File

@ -166,6 +166,11 @@ REINDEX [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] { IN
&mdash; see <xref linkend="sql-reindex-concurrently"
endterm="sql-reindex-concurrently-title"/>.
</para>
<para>
For temporary tables, <command>REINDEX</command> is always
non-concurrent, as no other session can access them, and
non-concurrent reindex is cheaper.
</para>
</listitem>
</varlistentry>

View File

@ -2016,6 +2016,15 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
LOCKTAG heaplocktag;
LOCKMODE lockmode;
/*
* A temporary relation uses a non-concurrent DROP. Other backends can't
* access a temporary relation, so there's no harm in grabbing a stronger
* lock (see comments in RemoveRelations), and a non-concurrent DROP is
* more efficient.
*/
Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
(!concurrent && !concurrent_lock_mode));
/*
* To drop an index safely, we must grab exclusive lock on its parent
* table. Exclusive lock on the index alone is insufficient because

View File

@ -438,6 +438,7 @@ DefineIndex(Oid relationId,
bool skip_build,
bool quiet)
{
bool concurrent;
char *indexRelationName;
char *accessMethodName;
Oid *typeObjectId;
@ -485,6 +486,18 @@ DefineIndex(Oid relationId,
GUC_ACTION_SAVE, true, 0, false);
}
/*
* Force non-concurrent build on temporary relations, even if CONCURRENTLY
* was requested. Other backends can't access a temporary relation, so
* there's no harm in grabbing a stronger lock, and a non-concurrent DROP
* is more efficient. Do this before any use of the concurrent option is
* done.
*/
if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
concurrent = true;
else
concurrent = false;
/*
* Start progress report. If we're building a partition, this was already
* done.
@ -494,7 +507,7 @@ DefineIndex(Oid relationId,
pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
relationId);
pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
stmt->concurrent ?
concurrent ?
PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
PROGRESS_CREATEIDX_COMMAND_CREATE);
}
@ -547,7 +560,7 @@ DefineIndex(Oid relationId,
* parallel workers under the control of certain particular ambuild
* functions will need to be updated, too.
*/
lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
rel = table_open(relationId, lockmode);
namespaceId = RelationGetNamespace(rel);
@ -590,6 +603,12 @@ DefineIndex(Oid relationId,
partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
if (partitioned)
{
/*
* Note: we check 'stmt->concurrent' rather than 'concurrent', so that
* the error is thrown also for temporary tables. Seems better to be
* consistent, even though we could do it on temporary table because
* we're not actually doing it concurrently.
*/
if (stmt->concurrent)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -781,8 +800,8 @@ DefineIndex(Oid relationId,
NIL, /* expressions, NIL for now */
make_ands_implicit((Expr *) stmt->whereClause),
stmt->unique,
!stmt->concurrent,
stmt->concurrent);
!concurrent,
concurrent);
typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
@ -944,7 +963,7 @@ DefineIndex(Oid relationId,
* A valid stmt->oldNode implies that we already have a built form of the
* index. The caller should also decline any index build.
*/
Assert(!OidIsValid(stmt->oldNode) || (skip_build && !stmt->concurrent));
Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
/*
* Make the catalog entries for the index, including constraints. This
@ -955,11 +974,11 @@ DefineIndex(Oid relationId,
flags = constr_flags = 0;
if (stmt->isconstraint)
flags |= INDEX_CREATE_ADD_CONSTRAINT;
if (skip_build || stmt->concurrent || partitioned)
if (skip_build || concurrent || partitioned)
flags |= INDEX_CREATE_SKIP_BUILD;
if (stmt->if_not_exists)
flags |= INDEX_CREATE_IF_NOT_EXISTS;
if (stmt->concurrent)
if (concurrent)
flags |= INDEX_CREATE_CONCURRENT;
if (partitioned)
flags |= INDEX_CREATE_PARTITIONED;
@ -1253,7 +1272,7 @@ DefineIndex(Oid relationId,
return address;
}
if (!stmt->concurrent)
if (!concurrent)
{
/* Close the heap and we're done, in the non-concurrent case */
table_close(rel, NoLock);
@ -2323,6 +2342,11 @@ ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
* Find and lock index, and check permissions on table; use callback to
* obtain lock on table first, to avoid deadlock hazard. The lock level
* used here must match the index lock obtained in reindex_index().
*
* If it's a temporary index, we will perform a non-concurrent reindex,
* even if CONCURRENTLY was requested. In that case, reindex_index() will
* upgrade the lock, but that's OK, because other sessions can't hold
* locks on our temporary table.
*/
state.concurrent = concurrent;
state.locked_table_oid = InvalidOid;
@ -2347,7 +2371,7 @@ ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
persistence = irel->rd_rel->relpersistence;
index_close(irel, NoLock);
if (concurrent)
if (concurrent && persistence != RELPERSISTENCE_TEMP)
ReindexRelationConcurrently(indOid, options);
else
reindex_index(indOid, false, persistence,
@ -2434,13 +2458,20 @@ ReindexTable(RangeVar *relation, int options, bool concurrent)
Oid heapOid;
bool result;
/* The lock level used here should match reindex_relation(). */
/*
* The lock level used here should match reindex_relation().
*
* If it's a temporary table, we will perform a non-concurrent reindex,
* even if CONCURRENTLY was requested. In that case, reindex_relation()
* will upgrade the lock, but that's OK, because other sessions can't hold
* locks on our temporary table.
*/
heapOid = RangeVarGetRelidExtended(relation,
concurrent ? ShareUpdateExclusiveLock : ShareLock,
0,
RangeVarCallbackOwnsTable, NULL);
if (concurrent)
if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
{
result = ReindexRelationConcurrently(heapOid, options);
@ -2646,7 +2677,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
/* functions in indexes may want a snapshot set */
PushActiveSnapshot(GetTransactionSnapshot());
if (concurrent)
if (concurrent && get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
{
(void) ReindexRelationConcurrently(relid, options);
/* ReindexRelationConcurrently() does the verbose output */
@ -2694,6 +2725,12 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
*
* Returns true if any indexes have been rebuilt (including toast table's
* indexes, when relevant), otherwise returns false.
*
* NOTE: This cannot be used on temporary relations. A concurrent build would
* cause issues with ON COMMIT actions triggered by the transactions of the
* concurrent build. Temporary relations are not subject to concurrent
* concerns, so there's no need for the more complicated concurrent build,
* anyway, and a non-concurrent reindex is more efficient.
*/
static bool
ReindexRelationConcurrently(Oid relationOid, int options)
@ -2937,6 +2974,10 @@ ReindexRelationConcurrently(Oid relationOid, int options)
heapRel = table_open(indexRel->rd_index->indrelid,
ShareUpdateExclusiveLock);
/* This function shouldn't be called for temporary relations. */
if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
elog(ERROR, "cannot reindex a temporary table concurrently");
pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
RelationGetRelid(heapRel));
pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,

View File

@ -1260,7 +1260,11 @@ RemoveRelations(DropStmt *drop)
/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
if (drop->concurrent)
{
flags |= PERFORM_DELETION_CONCURRENTLY;
/*
* Note that for temporary relations this lock may get upgraded
* later on, but as no other session can access a temporary
* relation, this is actually fine.
*/
lockmode = ShareUpdateExclusiveLock;
Assert(drop->removeType == OBJECT_INDEX);
if (list_length(drop->objects) != 1)
@ -1351,6 +1355,18 @@ RemoveRelations(DropStmt *drop)
continue;
}
/*
* Decide if concurrent mode needs to be used here or not. The
* relation persistence cannot be known without its OID.
*/
if (drop->concurrent &&
get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
{
Assert(list_length(drop->objects) == 1 &&
drop->removeType == OBJECT_INDEX);
flags |= PERFORM_DELETION_CONCURRENTLY;
}
/* OK, we're ready to delete this one */
obj.classId = RelationRelationId;
obj.objectId = relOid;

View File

@ -1435,6 +1435,31 @@ Indexes:
"concur_index5" btree (f2) WHERE f1 = 'x'::text
"std_index" btree (f2)
-- Temporary tables with concurrent builds and on-commit actions
-- CONCURRENTLY used with CREATE INDEX and DROP INDEX is ignored.
-- PRESERVE ROWS, the default.
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
DROP INDEX CONCURRENTLY concur_temp_ind;
DROP TABLE concur_temp;
-- ON COMMIT DROP
BEGIN;
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT DROP;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
-- Fails when running in a transaction.
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
ERROR: CREATE INDEX CONCURRENTLY cannot run inside a transaction block
COMMIT;
-- ON COMMIT DELETE ROWS
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT DELETE ROWS;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
DROP INDEX CONCURRENTLY concur_temp_ind;
DROP TABLE concur_temp;
--
-- Try some concurrent index drops
--
@ -2418,6 +2443,55 @@ SELECT pg_get_indexdef('concur_exprs_index_pred_2'::regclass);
(1 row)
DROP TABLE concur_exprs_tab;
-- Temporary tables and on-commit actions, where CONCURRENTLY is ignored.
-- ON COMMIT PRESERVE ROWS, the default.
CREATE TEMP TABLE concur_temp_tab_1 (c1 int, c2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp_tab_1 VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX concur_temp_ind_1 ON concur_temp_tab_1(c2);
REINDEX TABLE CONCURRENTLY concur_temp_tab_1;
REINDEX INDEX CONCURRENTLY concur_temp_ind_1;
-- Still fails in transaction blocks
BEGIN;
REINDEX INDEX CONCURRENTLY concur_temp_ind_1;
ERROR: REINDEX CONCURRENTLY cannot run inside a transaction block
COMMIT;
-- ON COMMIT DELETE ROWS
CREATE TEMP TABLE concur_temp_tab_2 (c1 int, c2 text)
ON COMMIT DELETE ROWS;
CREATE INDEX concur_temp_ind_2 ON concur_temp_tab_2(c2);
REINDEX TABLE CONCURRENTLY concur_temp_tab_2;
REINDEX INDEX CONCURRENTLY concur_temp_ind_2;
-- ON COMMIT DROP
BEGIN;
CREATE TEMP TABLE concur_temp_tab_3 (c1 int, c2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp_tab_3 VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX concur_temp_ind_3 ON concur_temp_tab_3(c2);
-- Fails when running in a transaction
REINDEX INDEX CONCURRENTLY concur_temp_ind_3;
ERROR: REINDEX CONCURRENTLY cannot run inside a transaction block
COMMIT;
-- REINDEX SCHEMA processes all temporary relations
CREATE TABLE reindex_temp_before AS
SELECT oid, relname, relfilenode, relkind, reltoastrelid
FROM pg_class
WHERE relname IN ('concur_temp_ind_1', 'concur_temp_ind_2');
SELECT pg_my_temp_schema()::regnamespace as temp_schema_name \gset
REINDEX SCHEMA CONCURRENTLY :temp_schema_name;
SELECT b.relname,
b.relkind,
CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged'
ELSE 'relfilenode has changed' END
FROM reindex_temp_before b JOIN pg_class a ON b.oid = a.oid
ORDER BY 1;
relname | relkind | case
-------------------+---------+-------------------------
concur_temp_ind_1 | i | relfilenode has changed
concur_temp_ind_2 | i | relfilenode has changed
(2 rows)
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
--
-- REINDEX SCHEMA
--

View File

@ -501,6 +501,31 @@ VACUUM FULL concur_heap;
REINDEX TABLE concur_heap;
\d concur_heap
-- Temporary tables with concurrent builds and on-commit actions
-- CONCURRENTLY used with CREATE INDEX and DROP INDEX is ignored.
-- PRESERVE ROWS, the default.
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
DROP INDEX CONCURRENTLY concur_temp_ind;
DROP TABLE concur_temp;
-- ON COMMIT DROP
BEGIN;
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT DROP;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
-- Fails when running in a transaction.
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
COMMIT;
-- ON COMMIT DELETE ROWS
CREATE TEMP TABLE concur_temp (f1 int, f2 text)
ON COMMIT DELETE ROWS;
INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1);
DROP INDEX CONCURRENTLY concur_temp_ind;
DROP TABLE concur_temp;
--
-- Try some concurrent index drops
--
@ -972,6 +997,48 @@ SELECT pg_get_indexdef('concur_exprs_index_pred'::regclass);
SELECT pg_get_indexdef('concur_exprs_index_pred_2'::regclass);
DROP TABLE concur_exprs_tab;
-- Temporary tables and on-commit actions, where CONCURRENTLY is ignored.
-- ON COMMIT PRESERVE ROWS, the default.
CREATE TEMP TABLE concur_temp_tab_1 (c1 int, c2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp_tab_1 VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX concur_temp_ind_1 ON concur_temp_tab_1(c2);
REINDEX TABLE CONCURRENTLY concur_temp_tab_1;
REINDEX INDEX CONCURRENTLY concur_temp_ind_1;
-- Still fails in transaction blocks
BEGIN;
REINDEX INDEX CONCURRENTLY concur_temp_ind_1;
COMMIT;
-- ON COMMIT DELETE ROWS
CREATE TEMP TABLE concur_temp_tab_2 (c1 int, c2 text)
ON COMMIT DELETE ROWS;
CREATE INDEX concur_temp_ind_2 ON concur_temp_tab_2(c2);
REINDEX TABLE CONCURRENTLY concur_temp_tab_2;
REINDEX INDEX CONCURRENTLY concur_temp_ind_2;
-- ON COMMIT DROP
BEGIN;
CREATE TEMP TABLE concur_temp_tab_3 (c1 int, c2 text)
ON COMMIT PRESERVE ROWS;
INSERT INTO concur_temp_tab_3 VALUES (1, 'foo'), (2, 'bar');
CREATE INDEX concur_temp_ind_3 ON concur_temp_tab_3(c2);
-- Fails when running in a transaction
REINDEX INDEX CONCURRENTLY concur_temp_ind_3;
COMMIT;
-- REINDEX SCHEMA processes all temporary relations
CREATE TABLE reindex_temp_before AS
SELECT oid, relname, relfilenode, relkind, reltoastrelid
FROM pg_class
WHERE relname IN ('concur_temp_ind_1', 'concur_temp_ind_2');
SELECT pg_my_temp_schema()::regnamespace as temp_schema_name \gset
REINDEX SCHEMA CONCURRENTLY :temp_schema_name;
SELECT b.relname,
b.relkind,
CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged'
ELSE 'relfilenode has changed' END
FROM reindex_temp_before b JOIN pg_class a ON b.oid = a.oid
ORDER BY 1;
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
--
-- REINDEX SCHEMA
--