Have pg_rewind run crash recovery before rewinding

If we don't do this, the rewind fails if the server wasn't cleanly shut
down, which seems unhelpful serving no purpose.

Also provide a new option --no-ensure-shutdown to suppress this
behavior, for alleged advanced usage that prefers to avoid the crash
recovery.

Authors: Paul Guo, Jimmy Yih, Ashwin Agrawal
Reviewed-by: Álvaro Herrera
Discussion: https://postgr.es/m/CAEET0ZEffUkXc48pg2iqARQgGRYDiiVxDu+yYek_bTwJF+q=Uw@mail.gmail.com
This commit is contained in:
Alvaro Herrera 2019-09-27 16:40:01 -03:00
parent c967e13f40
commit 5adafaf176
2 changed files with 95 additions and 0 deletions

View File

@ -165,6 +165,21 @@ PostgreSQL documentation
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-ensure-shutdown</option></term>
<listitem>
<para>
<application>pg_rewind</application> verifies that the target server
is cleanly shutdown before rewinding; by default, if it isn't, it
starts the server in single-user mode to complete crash recovery.
By passing this option, <application>pg_rewind</application> skips
this and errors out immediately if the server is not cleanly shut
down. Users are expected to handle the situation themselves in that
case.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-n</option></term>
<term><option>--dry-run</option></term>

View File

@ -40,6 +40,7 @@ static void digestControlFile(ControlFileData *ControlFile, char *source,
static void syncTargetDirectory(void);
static void sanityChecks(void);
static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
static void ensureCleanShutdown(const char *argv0);
static ControlFileData ControlFile_target;
static ControlFileData ControlFile_source;
@ -79,6 +80,7 @@ usage(const char *progname)
printf(_(" -N, --no-sync do not wait for changes to be written\n"
" safely to disk\n"));
printf(_(" -P, --progress write progress messages\n"));
printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
printf(_(" --debug write a lot of debug messages\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
@ -94,6 +96,7 @@ main(int argc, char **argv)
{"target-pgdata", required_argument, NULL, 'D'},
{"source-pgdata", required_argument, NULL, 1},
{"source-server", required_argument, NULL, 2},
{"no-ensure-shutdown", no_argument, NULL, 44},
{"version", no_argument, NULL, 'V'},
{"dry-run", no_argument, NULL, 'n'},
{"no-sync", no_argument, NULL, 'N'},
@ -110,6 +113,7 @@ main(int argc, char **argv)
XLogRecPtr chkptredo;
size_t size;
char *buffer;
bool no_ensure_shutdown = false;
bool rewind_needed;
XLogRecPtr endrec;
TimeLineID endtli;
@ -169,6 +173,9 @@ main(int argc, char **argv)
case 2: /* --source-server */
connstr_source = pg_strdup(optarg);
break;
case 4:
no_ensure_shutdown = true;
break;
}
}
@ -241,6 +248,24 @@ main(int argc, char **argv)
digestControlFile(&ControlFile_target, buffer, size);
pg_free(buffer);
/*
* If the target instance was not cleanly shut down, run a single-user
* postgres session really quickly and reload the control file to get the
* new state. Note if no_ensure_shutdown is specified, pg_rewind won't do
* that automatically. That means users need to do themselves in advance,
* else pg_rewind will soon quit, see sanityChecks().
*/
if (!no_ensure_shutdown &&
ControlFile_target.state != DB_SHUTDOWNED &&
ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
{
ensureCleanShutdown(argv[0]);
buffer = slurpFile(datadir_target, "global/pg_control", &size);
digestControlFile(&ControlFile_target, buffer, size);
pg_free(buffer);
}
buffer = fetchFile("global/pg_control", &size);
digestControlFile(&ControlFile_source, buffer, size);
pg_free(buffer);
@ -748,3 +773,58 @@ syncTargetDirectory(void)
fsync_pgdata(datadir_target, PG_VERSION_NUM);
}
/*
* Ensure clean shutdown of target instance by launching single-user mode
* postgres to do crash recovery.
*/
static void
ensureCleanShutdown(const char *argv0)
{
int ret;
#define MAXCMDLEN (2 * MAXPGPATH)
char exec_path[MAXPGPATH];
char cmd[MAXCMDLEN];
/* locate postgres binary */
if ((ret = find_other_exec(argv0, "postgres",
PG_BACKEND_VERSIONSTR,
exec_path)) < 0)
{
char full_path[MAXPGPATH];
if (find_my_exec(argv0, full_path) < 0)
strlcpy(full_path, progname, sizeof(full_path));
if (ret == -1)
pg_fatal("The program \"%s\" is needed by %s but was\n"
"not found in the same directory as \"%s\".\n"
"Check your installation.",
"postgres", progname, full_path);
else
pg_fatal("The program \"%s\" was found by \"%s\" but was\n"
"not the same version as %s.\n"
"Check your installation.",
"postgres", full_path, progname);
}
pg_log_info("executing \"%s\" for target server to complete crash recovery",
exec_path);
/*
* Skip processing if requested, but only after ensuring presence of
* postgres.
*/
if (dry_run)
return;
/* finally run postgres in single-user mode */
snprintf(cmd, MAXCMDLEN, "\"%s\" --single -D \"%s\" template1 < \"%s\"",
exec_path, datadir_target, DEVNULL);
if (system(cmd) != 0)
{
pg_log_error("postgres single-user mode of target instance failed");
pg_fatal("Command was: %s", cmd);
}
}