reindexdb: Add the index-level REINDEX with multiple jobs

Straight-forward index-level REINDEX is not supported with multiple jobs as
we cannot control the concurrent processing of multiple indexes depending on
the same relation.  Instead, we dedicate the whole table to certain reindex
job.  Thus, if indexes in the lists belong to different tables, that gives us
a fair level of parallelism.

This commit teaches get_parallel_object_list() to fetch table names for
indexes in the case of index-level REINDEX.  The same tables are grouped
together in the output order, and the list of indexes is also rebuilt to
match that order.  Later during processingof that list, we push indexes
belonging to the same table into the same job.

Discussion: https://postgr.es/m/CACG%3DezZU_VwDi-1PN8RUSE6mcYG%2BYx1NH_rJO4%2BKe-mKqLp%3DNw%40mail.gmail.com
Author: Maxim Orlov, Svetlana Derevyanko, Alexander Korotkov
Reviewed-by: Michael Paquier
This commit is contained in:
Alexander Korotkov 2024-03-25 02:07:14 +02:00
parent 503c0ad976
commit 47f99a407d
3 changed files with 130 additions and 27 deletions

View File

@ -179,8 +179,7 @@ PostgreSQL documentation
setting is high enough to accommodate all connections.
</para>
<para>
Note that this option is incompatible with the <option>--index</option>
and <option>--system</option> options.
Note that this option is incompatible with the <option>--system</option> option.
</para>
</listitem>
</varlistentry>

View File

@ -206,19 +206,8 @@ main(int argc, char *argv[])
setup_cancel_handler(NULL);
if (concurrentCons > 1)
{
/*
* Index-level REINDEX is not supported with multiple jobs as we
* cannot control the concurrent processing of multiple indexes
* depending on the same relation.
*/
if (indexes.head != NULL)
pg_fatal("cannot use multiple jobs to reindex indexes");
if (syscatalog)
pg_fatal("cannot use multiple jobs to reindex system catalogs");
}
if (concurrentCons > 1 && syscatalog)
pg_fatal("cannot use multiple jobs to reindex system catalogs");
if (alldb)
{
@ -258,7 +247,7 @@ main(int argc, char *argv[])
if (indexes.head != NULL)
reindex_one_database(&cparams, REINDEX_INDEX, &indexes,
progname, echo, verbose,
concurrently, 1, tablespace);
concurrently, concurrentCons, tablespace);
if (tables.head != NULL)
reindex_one_database(&cparams, REINDEX_TABLE, &tables,
@ -288,12 +277,16 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
{
PGconn *conn;
SimpleStringListCell *cell;
SimpleStringListCell *indices_tables_cell;
bool parallel = concurrentCons > 1;
SimpleStringList *process_list = user_list;
SimpleStringList *indices_tables_list = NULL;
ReindexType process_type = type;
ParallelSlotArray *sa;
bool failed = false;
int items_count = 0;
char *prev_index_table_name = NULL;
ParallelSlot *free_slot = NULL;
conn = connectDatabase(cparams, progname, echo, false, true);
@ -363,8 +356,25 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
return;
break;
case REINDEX_SYSTEM:
case REINDEX_INDEX:
Assert(user_list != NULL);
/*
* Build a list of relations from the indices. This will
* accordingly reorder the list of indices too.
*/
indices_tables_list = get_parallel_object_list(conn, process_type,
user_list, echo);
if (indices_tables_list)
indices_tables_cell = indices_tables_list->head;
/* Bail out if nothing to process */
if (process_list == NULL)
return;
break;
case REINDEX_SYSTEM:
/* not supported */
Assert(false);
break;
@ -404,7 +414,7 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
do
{
const char *objname = cell->val;
ParallelSlot *free_slot = NULL;
bool need_new_slot = true;
if (CancelRequested)
{
@ -412,14 +422,33 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
goto finish;
}
free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
/*
* For parallel index-level REINDEX, the indices of the same table are
* ordered together and they are to be processed by the same job. So,
* we don't switch the job as soon as the index belongs to the same
* table as the previous one.
*/
if (parallel && process_type == REINDEX_INDEX)
{
failed = true;
goto finish;
if (prev_index_table_name != NULL &&
strcmp(prev_index_table_name, indices_tables_cell->val) == 0)
need_new_slot = false;
prev_index_table_name = indices_tables_cell->val;
indices_tables_cell = indices_tables_cell->next;
}
if (need_new_slot)
{
free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
{
failed = true;
goto finish;
}
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
}
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
run_reindex_command(free_slot->connection, process_type, objname,
echo, verbose, concurrently, true, tablespace);
@ -436,6 +465,12 @@ finish:
pg_free(process_list);
}
if (indices_tables_list)
{
simple_string_list_destroy(indices_tables_list);
pg_free(indices_tables_list);
}
ParallelSlotsTerminate(sa);
pfree(sa);
@ -647,8 +682,61 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
}
break;
case REINDEX_SYSTEM:
case REINDEX_INDEX:
{
SimpleStringListCell *cell;
Assert(user_list != NULL);
/*
* Straight-forward index-level REINDEX is not supported with
* multiple jobs as we cannot control the concurrent
* processing of multiple indexes depending on the same
* relation. But we can extract the appropriate table name
* for the index and put REINDEX INDEX commands into different
* jobs, according to the parent tables.
*
* We will order the results to group the same tables
* together. We fetch index names as well to build a new list
* of them with matching order.
*/
appendPQExpBufferStr(&catalog_query,
"SELECT t.relname, n.nspname, i.relname\n"
"FROM pg_catalog.pg_index x\n"
"JOIN pg_catalog.pg_class t ON t.oid = x.indrelid\n"
"JOIN pg_catalog.pg_class i ON i.oid = x.indexrelid\n"
"LEFT JOIN pg_catalog.pg_namespace n ON n.oid = t.relnamespace\n"
"WHERE x.indexrelid OPERATOR(pg_catalog.=) ANY(ARRAY['");
for (cell = user_list->head; cell; cell = cell->next)
{
if (cell != user_list->head)
appendPQExpBufferStr(&catalog_query, "', '");
appendQualifiedRelation(&catalog_query, cell->val, conn, echo);
}
/*
* Order tables by the size of its greatest index. Within the
* table, order indexes by their sizes.
*/
appendPQExpBufferStr(&catalog_query,
"']::pg_catalog.regclass[])\n"
"ORDER BY max(i.relpages) OVER \n"
" (PARTITION BY n.nspname, t.relname),\n"
" n.nspname, t.relname, i.relpages;\n");
/*
* We're going to re-order the user_list to match the order of
* tables. So, empty the user_list to fill it from the query
* result.
*/
simple_string_list_destroy(user_list);
user_list->head = user_list->tail = NULL;
}
break;
case REINDEX_SYSTEM:
case REINDEX_TABLE:
Assert(false);
break;
@ -680,6 +768,20 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
simple_string_list_append(tables, buf.data);
resetPQExpBuffer(&buf);
if (type == REINDEX_INDEX)
{
/*
* For index-level REINDEX, rebuild the list of indexes to match
* the order of tables list.
*/
appendPQExpBufferStr(&buf,
fmtQualifiedId(PQgetvalue(res, i, 1),
PQgetvalue(res, i, 2)));
simple_string_list_append(user_list, buf.data);
resetPQExpBuffer(&buf);
}
}
termPQExpBuffer(&buf);
PQclear(res);

View File

@ -236,9 +236,11 @@ $node->safe_psql(
CREATE SCHEMA s1;
CREATE TABLE s1.t1(id integer);
CREATE INDEX ON s1.t1(id);
CREATE INDEX i1 ON s1.t1(id);
CREATE SCHEMA s2;
CREATE TABLE s2.t2(id integer);
CREATE INDEX ON s2.t2(id);
CREATE INDEX i2 ON s2.t2(id);
-- empty schema
CREATE SCHEMA s3;
|);
@ -246,9 +248,9 @@ $node->safe_psql(
$node->command_fails(
[ 'reindexdb', '-j', '2', '-s', 'postgres' ],
'parallel reindexdb cannot process system catalogs');
$node->command_fails(
[ 'reindexdb', '-j', '2', '-i', 'i1', 'postgres' ],
'parallel reindexdb cannot process indexes');
$node->command_ok(
[ 'reindexdb', '-j', '2', '-i', 's1.i1', '-i', 's2.i2', 'postgres' ],
'parallel reindexdb for indices');
# Note that the ordering of the commands is not stable, so the second
# command for s2.t2 is not checked after.
$node->issues_sql_like(