Hack pg_ctl to report postmaster's exit status.

Temporarily change pg_ctl so that the postmaster's exit status will
be printed (to the postmaster's stdout).  This is to help identify
the cause of intermittent "postmaster exited during a parallel
transaction" failures seen on a couple of buildfarm members.  This
change degrades pg_ctl's functionality in a couple of minor ways,
so we'll revert it once we've obtained the desired info.

Discussion: https://postgr.es/m/18537.1570421268@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2019-10-07 10:39:07 -04:00
parent cc4ec2d29a
commit 6a5084eed4
1 changed files with 14 additions and 17 deletions

View File

@ -106,6 +106,7 @@ static char promote_file[MAXPGPATH];
static char logrotate_file[MAXPGPATH];
static volatile pgpid_t postmasterPID = -1;
static pgpid_t old_postmaster_pid = 0;
#ifdef WIN32
static DWORD pgctl_start_type = SERVICE_AUTO_START;
@ -490,16 +491,17 @@ start_postmaster(void)
/*
* Since there might be quotes to handle here, it is easier simply to pass
* everything to a shell to process them. Use exec so that the postmaster
* has the same PID as the current child process.
* everything to a shell to process them.
*
* Since we aren't telling the shell to directly exec the postmaster,
* the returned PID is a parent process, the same as on Windows.
*/
if (log_file != NULL)
snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1",
exec_path, pgdata_opt, post_opts,
DEVNULL, log_file);
snprintf(cmd, MAXPGPATH, "exec < \"%s\" >> \"%s\" 2>&1; \"%s\" %s%s; echo postmaster exit status is $?",
DEVNULL, log_file, exec_path, pgdata_opt, post_opts);
else
snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" 2>&1",
exec_path, pgdata_opt, post_opts, DEVNULL);
snprintf(cmd, MAXPGPATH, "exec < \"%s\" 2>&1; \"%s\" %s%s; echo postmaster exit status is $?",
DEVNULL, exec_path, pgdata_opt, post_opts);
(void) execl("/bin/sh", "/bin/sh", "-c", cmd, (char *) NULL);
@ -586,12 +588,8 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint)
pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
if (pmstart >= start_time - 2 &&
#ifndef WIN32
pmpid == pm_pid
#else
/* Windows can only reject standalone-backend PIDs */
pmpid > 0
#endif
/* If pid is the value we saw before starting, assume it's stale */
pmpid > 0 && pmpid != old_postmaster_pid
)
{
/*
@ -621,7 +619,7 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint)
* Check whether the child postmaster process is still alive. This
* lets us exit early if the postmaster fails during startup.
*
* On Windows, we may be checking the postmaster's parent shell, but
* We may be checking the postmaster's parent shell, but
* that's fine for this purpose.
*/
#ifndef WIN32
@ -823,13 +821,12 @@ do_init(void)
static void
do_start(void)
{
pgpid_t old_pid = 0;
pgpid_t pm_pid;
if (ctl_command != RESTART_COMMAND)
{
old_pid = get_pgpid(false);
if (old_pid != 0)
old_postmaster_pid = get_pgpid(false);
if (old_postmaster_pid != 0)
write_stderr(_("%s: another server might be running; "
"trying to start server anyway\n"),
progname);